diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 75c53b61318167981dbbb330ef25018d2a5b107b..e1cb20097c9d1a8292860b824f79f5414d355c97 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -485,14 +485,27 @@ class Crawler(object): return self._synchronize(self.crawled_data, commit_changes, unique_names=unique_names) - def has_reference_value_without_id(self, ident: Identifiable): + def _has_reference_value_without_id(self, ident: Identifiable) -> bool: """ - Returns True if there is at least one valuee in the properties dict of `ident` which: + Returns True if there is at least one value in the properties attribute of ``ident`` which: + a) is a reference property AND - b) where the value is set to a db.Entity (instead of an ID) AND - c) where the ID of the value (the db.Entity object in b)) is not set (to an integer) + b) where the value is set to a + :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` + (instead of an ID) AND + c) where the ID of the value (the + :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` object in b)) + is not set (to an integer) - Returns False otherwise. + Returns + ------- + bool + True if there is a value without id (see above) + + Raises + ------ + ValueError + If no Identifiable is given. """ if ident is None: raise ValueError("Identifiable has to be given as argument") @@ -528,7 +541,7 @@ class Crawler(object): flat.append(p.value) Crawler.create_flat_list([p.value], flat) - def has_missing_object_in_references(self, ident: Identifiable): + def _has_missing_object_in_references(self, ident: Identifiable): """ returns False if any value in the properties attribute is a db.Entity object that is contained in the `remote_missing_cache`. If ident has such an object in @@ -591,7 +604,7 @@ class Crawler(object): def get_from_remote_missing_cache(self, identifiable: Identifiable): """ - returns the identifiable if an identifiable with the same values already exists locally + returns the identified record if an identifiable with the same values already exists locally (Each identifiable that is not found on the remote server, is 'cached' locally to prevent that the same identifiable exists twice) """ @@ -725,7 +738,7 @@ class Crawler(object): resolved_references = True # can we check whether the record(identifiable) exists on the remote server? - elif not self.has_reference_value_without_id(identifiable): + elif not self._has_reference_value_without_id(identifiable): # TODO: remove deepcopy? identified_record = ( self.identifiableAdapter.retrieve_identified_record_for_record( @@ -750,7 +763,7 @@ class Crawler(object): # is it impossible to check this record because an identifiable references a # missing record? - elif self.has_missing_object_in_references(identifiable): + elif self._has_missing_object_in_references(identifiable): to_be_inserted.append(record) self.add_to_remote_missing_cache(record) del flat[i] diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 6344c22308d765478cac5e459b3a7754c73e4e51..8911923576a10ad81846da3a31254d26d4b2e12b 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -368,8 +368,8 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri assert crawler.get_from_any_cache(identlist[0]) is None assert crawler.get_from_any_cache(identlist[1]) is None - assert not crawler.has_reference_value_without_id(identlist[0]) - assert not crawler.has_reference_value_without_id(identlist[1]) + assert not crawler._has_reference_value_without_id(identlist[0]) + assert not crawler._has_reference_value_without_id(identlist[1]) assert crawler.identifiableAdapter.retrieve_identified_record_for_record( identlist[0]).id == 1111 assert crawler.identifiableAdapter.retrieve_identified_record_for_record( @@ -486,57 +486,57 @@ def test_has_missing_object_in_references(crawler): })) # one reference with id -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C") - .add_parent("RTC").add_property('d', 123)) + assert not crawler._has_missing_object_in_references(db.Record(name="C") + .add_parent("RTC").add_property('d', 123)) # one ref with Entity with id -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C") - .add_parent("RTC") - .add_property('d', db.Record(id=123) - .add_parent("C"))) + assert not crawler._has_missing_object_in_references(db.Record(name="C") + .add_parent("RTC") + .add_property('d', db.Record(id=123) + .add_parent("C"))) # one ref with id one with Entity with id (mixed) -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C").add_parent("RTD") - .add_property('d', 123) - .add_property('b', db.Record(id=123) - .add_parent("RTC"))) + assert not crawler._has_missing_object_in_references(db.Record(name="C").add_parent("RTD") + .add_property('d', 123) + .add_property('b', db.Record(id=123) + .add_parent("RTC"))) # entity to be referenced in the following a = db.Record(name="C").add_parent("C").add_property("d", 12311) # one ref with id one with Entity without id (but not identifying) -> fail - assert not crawler.has_missing_object_in_references(db.Record(name="C").add_parent("RTC") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler._has_missing_object_in_references(db.Record(name="C").add_parent("RTC") + .add_property('d', 123) + .add_property('e', a)) # one ref with id one with Entity without id (mixed) -> fail - assert not crawler.has_missing_object_in_references(db.Record(name="D").add_parent("RTD") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler._has_missing_object_in_references(db.Record(name="D").add_parent("RTD") + .add_property('d', 123) + .add_property('e', a)) crawler.add_to_remote_missing_cache(a) # one ref with id one with Entity without id but in cache -> check - assert crawler.has_missing_object_in_references(db.Record(name="D").add_parent("RTD") - .add_property('d', 123) - .add_property('e', a)) + assert crawler._has_missing_object_in_references(db.Record(name="D").add_parent("RTD") + .add_property('d', 123) + .add_property('e', a)) # if this ever fails, the mock up may be removed crawler.identifiableAdapter.get_registered_identifiable.assert_called() @pytest.mark.xfail() def test_references_entities_without_ids(crawler, ident): - assert not crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('last_name', 123) - .add_property('first_name', 123)) + assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('last_name', 123) + .add_property('first_name', 123)) # id and rec with id - assert not crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', db.Record(id=123))) + assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('last_name', db.Record(id=123))) # id and rec with id and one unneeded prop - assert crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('stuff', db.Record()) - .add_property('last_name', db.Record(id=123))) + assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('stuff', db.Record()) + .add_property('last_name', db.Record(id=123))) # one identifying prop is missing - assert crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', db.Record())) + assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('last_name', db.Record())) def test_replace_entities_with_ids(crawler):