Skip to content
Snippets Groups Projects
Commit 3bebbf91 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

WIP passing tests

parent 0adf4e16
No related branches found
No related tags found
2 merge requests!91Release 0.3,!67MAINT: introduce an identifiable class
Pipeline #30447 failed
...@@ -495,7 +495,7 @@ class Crawler(object): ...@@ -495,7 +495,7 @@ class Crawler(object):
Returns False otherwise. Returns False otherwise.
""" """
if ident is None: if ident is None:
return True raise ValueError("Identifiable has to be given as argument")
for pname, pvalue in ident.properties.items(): for pname, pvalue in ident.properties.items():
if isinstance(pvalue, list): if isinstance(pvalue, list):
for el in pvalue: for el in pvalue:
...@@ -536,17 +536,19 @@ class Crawler(object): ...@@ -536,17 +536,19 @@ class Crawler(object):
whether it exists remotely and it was not found. whether it exists remotely and it was not found.
""" """
if ident is None: if ident is None:
return True raise ValueError("Identifiable has to be given as argument")
for pname, pvalue in ident.properties.items(): for pname, pvalue in ident.properties.items():
# if (is_reference(p) # if (is_reference(p)
# Entity instead of ID and not cached locally # Entity instead of ID and not cached locally
if (isinstance(pvalue, list)): if (isinstance(pvalue, list)):
for el in pvalue: for el in pvalue:
if (isinstance(el, db.Entity) if (isinstance(el, db.Entity)
and self.get_from_remote_missing_cache(el) is not None): and self.get_from_remote_missing_cache(
self.identifiableAdapter.get_identifiable(el)) is not None):
return True return True
if (isinstance(pvalue, db.Entity) if (isinstance(pvalue, db.Entity)
and self.get_from_remote_missing_cache(pvalue) is not None): and self.get_from_remote_missing_cache(
self.identifiableAdapter.get_identifiable(pvalue)) is not None):
# might be checked when reference is resolved # might be checked when reference is resolved
return True return True
return False return False
...@@ -562,7 +564,8 @@ class Crawler(object): ...@@ -562,7 +564,8 @@ class Crawler(object):
lst = [] lst = []
for el in p.value: for el in p.value:
if (isinstance(el, db.Entity) and el.id is None): if (isinstance(el, db.Entity) and el.id is None):
cached = self.get_from_any_cache(el) cached = self.get_from_any_cache(
self.identifiableAdapter.get_identifiable(el))
if cached is None: if cached is None:
raise RuntimeError("Not in cache.") raise RuntimeError("Not in cache.")
if not check_identical(cached, el, True): if not check_identical(cached, el, True):
...@@ -576,7 +579,8 @@ class Crawler(object): ...@@ -576,7 +579,8 @@ class Crawler(object):
lst.append(el) lst.append(el)
p.value = lst p.value = lst
if (isinstance(p.value, db.Entity) and p.value.id is None): if (isinstance(p.value, db.Entity) and p.value.id is None):
cached = self.get_from_any_cache(p.value) cached = self.get_from_any_cache(
self.identifiableAdapter.get_identifiable(p.value))
if cached is None: if cached is None:
raise RuntimeError("Not in cache.") raise RuntimeError("Not in cache.")
if not check_identical(cached, p.value, True): if not check_identical(cached, p.value, True):
...@@ -587,15 +591,12 @@ class Crawler(object): ...@@ -587,15 +591,12 @@ class Crawler(object):
raise RuntimeError("Not identical.") raise RuntimeError("Not identical.")
p.value = cached p.value = cached
def get_from_remote_missing_cache(self, record: db.Record): def get_from_remote_missing_cache(self, identifiable: Identifiable):
""" """
returns the identifiable if an identifiable with the same values already exists locally returns the identifiable if an identifiable with the same values already exists locally
(Each identifiable that is not found on the remote server, is 'cached' locally to prevent (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
that the same identifiable exists twice) that the same identifiable exists twice)
""" """
if self.identifiableAdapter is None:
raise RuntimeError("Should not happen.")
identifiable = self.identifiableAdapter.get_identifiable(record)
if identifiable is None: if identifiable is None:
# TODO: check whether the same idea as below works here # TODO: check whether the same idea as below works here
identifiable = record identifiable = record
...@@ -606,15 +607,12 @@ class Crawler(object): ...@@ -606,15 +607,12 @@ class Crawler(object):
else: else:
return None return None
def get_from_any_cache(self, record: db.Record): def get_from_any_cache(self, identifiable: Identifiable):
""" """
returns the identifiable if an identifiable with the same values already exists locally returns the identifiable if an identifiable with the same values already exists locally
(Each identifiable that is not found on the remote server, is 'cached' locally to prevent (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
that the same identifiable exists twice) that the same identifiable exists twice)
""" """
if self.identifiableAdapter is None:
raise RuntimeError("Should not happen.")
identifiable = self.identifiableAdapter.get_identifiable(record)
if identifiable is None: if identifiable is None:
return None return None
...@@ -713,15 +711,16 @@ class Crawler(object): ...@@ -713,15 +711,16 @@ class Crawler(object):
for i in reversed(range(len(flat))): for i in reversed(range(len(flat))):
record = flat[i] record = flat[i]
identifiable = self.identifiableAdapter.get_identifiable(record)
# TODO remove if the exception is never raised # TODO remove if the exception is never raised
if (record.id is not None or record in to_be_inserted): if (record.id is not None or record in to_be_inserted):
raise RuntimeError("This should not be reached since treated elements" raise RuntimeError("This should not be reached since treated elements"
"are removed from the list") "are removed from the list")
# Check whether this record is a duplicate that can be removed # Check whether this record is a duplicate that can be removed
elif self.get_from_any_cache(record) is not None: elif self.get_from_any_cache(identifiable) is not None:
# We merge the two in order to prevent loss of information # We merge the two in order to prevent loss of information
newrecord = self.get_from_any_cache(record) newrecord = self.get_from_any_cache(identifiable)
merge_entities(newrecord, record) merge_entities(newrecord, record)
Crawler.bend_references_to_new_object( Crawler.bend_references_to_new_object(
old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted) old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted)
...@@ -730,9 +729,7 @@ class Crawler(object): ...@@ -730,9 +729,7 @@ class Crawler(object):
resolved_references = True resolved_references = True
# can we check whether the record(identifiable) exists on the remote server? # can we check whether the record(identifiable) exists on the remote server?
elif not self.has_reference_value_without_id( elif not self.has_reference_value_without_id(identifiable):
# TODO move get_identifiable above if else?
self.identifiableAdapter.get_identifiable(record)):
# TODO: remove deepcopy? # TODO: remove deepcopy?
identified_record = ( identified_record = (
self.identifiableAdapter.retrieve_identified_record_for_record( self.identifiableAdapter.retrieve_identified_record_for_record(
...@@ -757,8 +754,7 @@ class Crawler(object): ...@@ -757,8 +754,7 @@ class Crawler(object):
# is it impossible to check this record because an identifiable references a # is it impossible to check this record because an identifiable references a
# missing record? # missing record?
elif self.has_missing_object_in_references( elif self.has_missing_object_in_references(identifiable):
self.identifiableAdapter.get_identifiable(record)):
to_be_inserted.append(record) to_be_inserted.append(record)
self.add_to_remote_missing_cache(record) self.add_to_remote_missing_cache(record)
del flat[i] del flat[i]
......
This diff is collapsed.
...@@ -362,17 +362,18 @@ def test_split_into_inserts_and_updates_trivial(crawler): ...@@ -362,17 +362,18 @@ def test_split_into_inserts_and_updates_trivial(crawler):
def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve): def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
crawler = crawler_mocked_identifiable_retrieve crawler = crawler_mocked_identifiable_retrieve
identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
entlist = [db.Record(name="A").add_parent( entlist = [db.Record(name="A").add_parent(
"C"), db.Record(name="B").add_parent("C")] "C"), db.Record(name="B").add_parent("C")]
assert crawler.get_from_any_cache(entlist[0]) is None assert crawler.get_from_any_cache(identlist[0]) is None
assert crawler.get_from_any_cache(entlist[1]) is None assert crawler.get_from_any_cache(identlist[1]) is None
#assert not crawler.has_reference_value_without_id(entlist[0]) assert not crawler.has_reference_value_without_id(identlist[0])
#assert not crawler.has_reference_value_without_id(entlist[1]) assert not crawler.has_reference_value_without_id(identlist[1])
assert crawler.identifiableAdapter.retrieve_identified_record_for_record( assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
entlist[0]).id == 1111 identlist[0]).id == 1111
assert crawler.identifiableAdapter.retrieve_identified_record_for_record( assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
entlist[1]) is None identlist[1]) is None
insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist)) insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
assert len(insert) == 1 assert len(insert) == 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment