diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index d08eb790b3acf5c1345f59d7784061603561a772..69de43ce3033f6e68370ed2e66a01905fb220d0b 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -531,10 +531,12 @@ class Crawler(object): # TODO: move inside if block? self.create_flat_list([p.value], flat) - def all_references_are_existing_already(self, record: db.Record): + def no_uncached_entity_object_in_references(self, record: db.Record): """ - returns true if all references either have IDs or were checked remotely and not found (i.e. - they exist in the local cache) + returns False if any property value is a db.Entity object that does not have an ID and also + is not contained in the local cache. For example, if it was checked before whether such an + db.Entity object existed remotely and it was not found, it would have been added to the + local cache. """ for p in record.properties: # if (is_reference(p) @@ -716,7 +718,7 @@ class Crawler(object): resolved_references = True # e.g. references an identifiable that does not exist remotely - elif self.all_references_are_existing_already(record): + elif self.no_uncached_entity_object_in_references(record): # TODO: (for review) # This was the old version, but also for this case the diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 67ecc240c7ddf73b41c9b7e247f74d54b5fbae15..65220cb65b5a9d7f0d5e7a03b7315ad2c4a9c193 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -509,31 +509,31 @@ def test_all_references_are_existing_already(crawler): })) # one reference with id -> check - assert crawler.all_references_are_existing_already( + assert crawler.no_uncached_entity_object_in_references( db.Record(name="C").add_parent("C").add_property('d', 123)) # one ref with Entity with id -> check - assert crawler.all_references_are_existing_already(db.Record(name="C") - .add_property('d', db.Record(id=123))) + assert crawler.no_uncached_entity_object_in_references(db.Record(name="C") + .add_property('d', db.Record(id=123))) # one ref with id one with Entity with id (mixed) -> check - assert crawler.all_references_are_existing_already(db.Record(name="C").add_parent("D") - .add_property('d', 123) - .add_property('b', db.Record(id=123))) + assert crawler.no_uncached_entity_object_in_references(db.Record(name="C").add_parent("D") + .add_property('d', 123) + .add_property('b', db.Record(id=123))) # entity to be referenced in the following a = db.Record(name="C").add_parent("C").add_property("d", 12311) # one ref with id one with Entity without id (but not identifying) -> fail - assert not crawler.all_references_are_existing_already(db.Record(name="C").add_parent("C") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler.no_uncached_entity_object_in_references(db.Record(name="C").add_parent("C") + .add_property('d', 123) + .add_property('e', a)) # one ref with id one with Entity without id (mixed) -> fail - assert not crawler.all_references_are_existing_already(db.Record(name="D").add_parent("D") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler.no_uncached_entity_object_in_references(db.Record(name="D").add_parent("D") + .add_property('d', 123) + .add_property('e', a)) crawler.add_identified_record_to_local_cache(a) # one ref with id one with Entity without id but in cache -> check - assert crawler.all_references_are_existing_already(db.Record(name="D").add_parent("D") - .add_property('d', 123) - .add_property('e', a)) + assert crawler.no_uncached_entity_object_in_references(db.Record(name="D").add_parent("D") + .add_property('d', 123) + .add_property('e', a)) # if this ever fails, the mock up may be removed crawler.identifiableAdapter.get_registered_identifiable.assert_called()