Skip to content
Snippets Groups Projects
Commit e087df27 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

working

parent bc11d2e3
No related branches found
No related tags found
3 merge requests!71REL: RElease v0.2.0,!56F refactor,!5FIX: use identifiable instead of record
Pipeline #29360 passed
......@@ -535,10 +535,10 @@ class Crawler(object):
# Entity instead of ID and not cached locally
if (isinstance(p.value, list)):
for el in p.value:
if (isinstance(el, db.Entity) and el.id is None
if (isinstance(el, db.Entity)
and self.get_from_remote_missing_cache(el) is not None):
return True
if (isinstance(p.value, db.Entity) and p.value.id is None
if (isinstance(p.value, db.Entity)
and self.get_from_remote_missing_cache(p.value) is not None):
# might be checked when reference is resolved
return True
......@@ -706,13 +706,9 @@ class Crawler(object):
# flat contains Entities which could not yet be checked against the remote server
while resolved_references and len(flat) > 0:
resolved_references = False
print("LSIT")
for ii, el in enumerate(flat):
print(ii, el.id, el.parents[0].name if len(el.parents) > 0 else "")
for i in reversed(range(len(flat))):
record = flat[i]
print(i, record.id, record.parents[0].name if len(record.parents) > 0 else "")
# TODO remove if the exception is never raised
if (record.id is not None or record in to_be_inserted):
......@@ -720,7 +716,6 @@ class Crawler(object):
"are removed from the list")
# Check whether this record is a duplicate that can be removed
elif self.get_from_any_cache(record) is not None:
print("duplicate")
# We merge the two in order to prevent loss of information
newrecord = self.get_from_any_cache(record)
merge_entities(newrecord, record)
......@@ -728,23 +723,21 @@ class Crawler(object):
old=record, new=newrecord, entities=flat+to_be_updated+to_be_inserted)
del flat[i]
resolved_references = True
# can we check whether the record(identifiable) exists on the remote server
# can we check whether the record(identifiable) exists on the remote server?
elif not self.references_entity_without_id(
self.identifiableAdapter.get_identifiable(record)):
print("checked")
# TODO: remove deepcopy?
identified_record = (
self.identifiableAdapter.retrieve_identified_record_for_record(
deepcopy(record)))
if identified_record is None:
print("not found")
# identifiable does not exist remotely -> record needs to be inserted
to_be_inserted.append(record)
self.add_to_remote_missing_cache(record)
del flat[i]
else:
print("found")
# side effect
record.id = identified_record.id
# Copy over checksum and size too if it is a file
......@@ -756,26 +749,17 @@ class Crawler(object):
self.add_to_remote_existing_cache(record)
del flat[i]
resolved_references = True
elif self.has_missing_object_in_references(record):
# is it impossible to check this record because an identifiable references a
# missing record?
elif self.has_missing_object_in_references(
self.identifiableAdapter.get_identifiable(record)):
to_be_inserted.append(record)
self.add_to_remote_missing_cache(record)
del flat[i]
resolved_references = True
else:
print("nothing")
print(record)
for record in flat:
# TODO: (for review)
# This was the old version, but also for this case the
# check for identifiables has to be done.
# to_be_inserted.append(record)
# self.add_to_remote_existing_cache(record)
# del flat[i]
# TODO: (for review)
# If the following replacement is not done, the cache will
# be invalid as soon as references are resolved.
# replace references by versions from cache:
self.replace_references_with_cached(record)
if len(flat) > 0:
......
......@@ -509,15 +509,18 @@ def test_no_uncached_entity_object_in_references(crawler):
}))
# one reference with id -> check
assert not crawler.has_missing_object_in_references(
db.Record(name="C").add_parent("C").add_property('d', 123))
assert not crawler.has_missing_object_in_references(db.Record(name="C")
.add_parent("C").add_property('d', 123))
# one ref with Entity with id -> check
assert not crawler.has_missing_object_in_references(db.Record(name="C")
.add_property('d', db.Record(id=123)))
.add_parent("C")
.add_property('d', db.Record(id=123)
.add_parent("C")))
# one ref with id one with Entity with id (mixed) -> check
assert not crawler.has_missing_object_in_references(db.Record(name="C").add_parent("D")
.add_property('d', 123)
.add_property('b', db.Record(id=123)))
.add_property('b', db.Record(id=123)
.add_parent("C")))
# entity to be referenced in the following
a = db.Record(name="C").add_parent("C").add_property("d", 12311)
# one ref with id one with Entity without id (but not identifying) -> fail
......@@ -539,8 +542,9 @@ def test_no_uncached_entity_object_in_references(crawler):
def test_references_entities_without_ids(crawler, ident):
assert not crawler.references_entity_without_id(
db.Record().add_parent("Person").add_property('last_name', 123).add_property('first_name', 123))
assert not crawler.references_entity_without_id(db.Record().add_parent("Person")
.add_property('last_name', 123)
.add_property('first_name', 123))
# id and rec with id
assert not crawler.references_entity_without_id(db.Record().add_parent("Person")
.add_property('first_name', 123)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment