diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 8ec76c2292850f819d613f5feccdd0529db436bd..a0670eb149f82d348a515df4cc143f91e24e3e00 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -319,7 +319,8 @@ class Crawler(object): # side effect record.id = identified_record.id to_be_updated.append(record) - self.add_identified_record_to_local_cache(record) + # TODO think this through + # self.add_identified_record_to_local_cache(record) del flat[i] resolved_references = True @@ -334,13 +335,22 @@ class Crawler(object): return to_be_inserted, to_be_updated + def replace_entities_by_ids(self, rec: db.Record): + for el in rec.properties: + if isinstance(el.value, db.Entity): + el.value = el.value.id + elif isinstance(el.value, list): + for index, val in enumerate(el.value): + if isinstance(val, db.Entity): + el.value[index] = val.id + def remove_unnecessary_updates(self, updateList: list[db.Record]): """ checks whether all relevant attributes (especially Property values) are equal """ for i in reversed(range(len(updateList))): record = updateList[i] - identifiable = self.identifiableAdapter.get_identifiable(record) + identifiable = self.identifiableAdapter.retrieve_identifiable(record) comp = compare_entities(record, identifiable) identical = True @@ -392,6 +402,9 @@ class Crawler(object): to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(updateList) # remove unnecessary updates from list + for el in to_be_updated: + self.replace_entities_by_ids(el) + self.remove_unnecessary_updates(to_be_updated) # TODO diff --git a/unittests/test_tool.py b/unittests/test_tool.py index fd71207723232f483bade70254b67305c5309486..23912ff133fdb7dceb1805907d24a57578fc63ee 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -514,3 +514,14 @@ def test_can_be_checked_externally(crawler): assert not crawler.can_be_checked_externally(db.Record() .add_property('a', 123) .add_property('b', db.Record())) + + +def test_replace_entities_by_ids(crawler): + a = (db.Record().add_parent("B").add_property("A", 12345) + .add_property("B", db.Record(id=12345)) + .add_property("C", [db.Record(id=12345), 233324])) + + crawler.replace_entities_by_ids(a) + assert a.get_property("A").value == 12345 + assert a.get_property("B").value == 12345 + assert a.get_property("C").value == [12345, 233324]