diff --git a/.gitignore b/.gitignore index 67d5d78d3d3d86a0b9d601a3d9ccc9354f472e2b..2282d256281f1c0df0575389d9479ab2e028622e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ src/newcrawler.egg-info/ +__pycache__ unittests/provenance.yml .coverage TAGS -src/.coverage \ No newline at end of file +src/.coverage diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index bd47eba82a4cd3c741d696baec6cf75b3b713026..89efc089cf73fe6844fb2a20bbea0730d869af46 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -442,6 +442,7 @@ class Crawler(object): identified_records = [self.identifiableAdapter.retrieve_identifiable(record) for record in to_be_updated] + self.remove_unnecessary_updates(to_be_updated, identified_records) self.execute_inserts_in_list(to_be_inserted) diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index 89b8d4e19c0f28cd51085f1b131e37a1f17b0ae4..5bddc64cf61f14c9fbb77b7f96afdcc65d0a33ee 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -148,17 +148,10 @@ class IdentifiableAdapter(object): # case A: in the registered identifiable # case B: in the identifiable + if record.get_property(prop.name) is None: + raise NotImplementedError() record_prop = record.get_property(prop.name) newval = record_prop.value - if isinstance(record_prop.value, db.Entity): - newval = self.resolve_reference(record_prop.value) - elif isinstance(record_prop.value, list): - newval = list() - for element in record_prop.value: - if isinstance(element, db.Entity): - newval.append(self.resolve_reference(element)) - else: - newval.append(element) record_prop_new = db.Property(name=record_prop.name, id=record_prop.id, description=record_prop.description, @@ -287,16 +280,13 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): # a) prop_record.value has a registered identifiable: # in this case, fetch the identifiable and set the value accordingly if isinstance(prop.value, db.Entity): # lists are not checked here - registered = self.get_registered_identifiable(prop.value) - - if registered is None: - raise NotImplementedError("Non-identifiable references cannot" - " be used as properties in identifiables.") - - raise RuntimeError("The identifiable which is used as property" - " here has to be inserted first.") + otherid = prop_record.value + if isinstance(prop_record.value, db.Entity): + otherid = prop_record.value.id + if prop.value.id != otherid: + return False - if prop.value != prop_record.value: + elif prop.value != prop_record.value: return False return True diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 51774720e1ce6bda53575c0dfc98c04856fedf7a..e26b0b0ce6a8b22121ee29f1a1c021a025644ac9 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -18,6 +18,13 @@ import pytest from pytest import raises +def basic_ident_lookup(rec, idents): + if rec.parents[0].name in idents: + return idents[rec.parents[0].name] + else: + return None + + def rfp(*pathcomponents): """ Return full path. @@ -238,8 +245,15 @@ def test_crawler_update_list(crawler, ident): assert len(comp[0]["properties"]) == 0 assert len(comp[1]["properties"]) == 0 - insl, updl = crawler.synchronize() + insl, updl = crawler.split_into_inserts_and_updates(crawler.updateList) + assert len(insl) == 0 + assert len(updl) == 18 + identified_records = [crawler.identifiableAdapter.retrieve_identifiable(record) for record + in updl] + for el in updl: + crawler.replace_entities_by_ids(el) + Crawler.remove_unnecessary_updates(updl, identified_records) assert len(updl) == 0 @@ -430,41 +444,43 @@ def test_split_into_inserts_and_updates_with_copy_attr(mock_retrieve): def test_all_references_are_existing_already(crawler): - def base_mocked_lookup(rec, known): - if rec.name in known: - return known[rec.name] - else: - return None + registered_identifiables = { + "C": db.Record().add_parent("C").add_property("a"), + "D": db.Record().add_parent("D").add_property("a").add_property("b")} crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial( - base_mocked_lookup, known={"A": db.Record(name="A").add_parent("C"), - "B": db.Record(name="B").add_parent("C")})) + basic_ident_lookup, idents=registered_identifiables)) - assert crawler.all_references_are_existing_already(db.Record().add_property('a', 123)) - assert crawler.all_references_are_existing_already(db.Record() + assert crawler.all_references_are_existing_already( + db.Record().add_parent("C").add_property('a', 123)) + assert crawler.all_references_are_existing_already(db.Record().add_parent("C") .add_property('a', db.Record(id=123))) - assert crawler.all_references_are_existing_already(db.Record() + assert crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', db.Record(id=123))) - assert not crawler.all_references_are_existing_already(db.Record() + a = db.Record(name="A").add_parent("C").add_property("a", 12311) + assert not crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) - .add_property('b', db.Record(name="A") - .add_parent("C"))) - a = db.Record(name="A").add_parent("C") + .add_property('b', a)) crawler.add_identified_record_to_local_cache(a) - assert crawler.all_references_are_existing_already(db.Record() + assert crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', a)) def test_can_be_checked_externally(crawler): - assert crawler.can_be_checked_externally(db.Record().add_property('a', 123)) - assert crawler.can_be_checked_externally(db.Record() + registered_identifiables = { + "C": db.Record().add_parent("C").add_property("a"), + "D": db.Record().add_parent("D").add_property("a").add_property("b")} + crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial( + basic_ident_lookup, idents=registered_identifiables)) + assert crawler.can_be_checked_externally(db.Record().add_parent("C").add_property('a', 123)) + assert crawler.can_be_checked_externally(db.Record().add_parent("C") .add_property('a', db.Record(id=123))) - assert crawler.can_be_checked_externally(db.Record() + assert crawler.can_be_checked_externally(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', db.Record(id=123))) - assert not crawler.can_be_checked_externally(db.Record() + assert not crawler.can_be_checked_externally(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', db.Record()))