From b60ece3ae3fa88662e484a312bf9ef4db308670b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Sun, 13 Nov 2022 15:33:06 +0100 Subject: [PATCH] doc and make private --- src/caoscrawler/crawl.py | 31 ++++++++++++------ unittests/test_tool.py | 68 ++++++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 43 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 75c53b61..e1cb2009 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -485,14 +485,27 @@ class Crawler(object): return self._synchronize(self.crawled_data, commit_changes, unique_names=unique_names) - def has_reference_value_without_id(self, ident: Identifiable): + def _has_reference_value_without_id(self, ident: Identifiable) -> bool: """ - Returns True if there is at least one valuee in the properties dict of `ident` which: + Returns True if there is at least one value in the properties attribute of ``ident`` which: + a) is a reference property AND - b) where the value is set to a db.Entity (instead of an ID) AND - c) where the ID of the value (the db.Entity object in b)) is not set (to an integer) + b) where the value is set to a + :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` + (instead of an ID) AND + c) where the ID of the value (the + :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` object in b)) + is not set (to an integer) - Returns False otherwise. + Returns + ------- + bool + True if there is a value without id (see above) + + Raises + ------ + ValueError + If no Identifiable is given. """ if ident is None: raise ValueError("Identifiable has to be given as argument") @@ -528,7 +541,7 @@ class Crawler(object): flat.append(p.value) Crawler.create_flat_list([p.value], flat) - def has_missing_object_in_references(self, ident: Identifiable): + def _has_missing_object_in_references(self, ident: Identifiable): """ returns False if any value in the properties attribute is a db.Entity object that is contained in the `remote_missing_cache`. If ident has such an object in @@ -591,7 +604,7 @@ class Crawler(object): def get_from_remote_missing_cache(self, identifiable: Identifiable): """ - returns the identifiable if an identifiable with the same values already exists locally + returns the identified record if an identifiable with the same values already exists locally (Each identifiable that is not found on the remote server, is 'cached' locally to prevent that the same identifiable exists twice) """ @@ -725,7 +738,7 @@ class Crawler(object): resolved_references = True # can we check whether the record(identifiable) exists on the remote server? - elif not self.has_reference_value_without_id(identifiable): + elif not self._has_reference_value_without_id(identifiable): # TODO: remove deepcopy? identified_record = ( self.identifiableAdapter.retrieve_identified_record_for_record( @@ -750,7 +763,7 @@ class Crawler(object): # is it impossible to check this record because an identifiable references a # missing record? - elif self.has_missing_object_in_references(identifiable): + elif self._has_missing_object_in_references(identifiable): to_be_inserted.append(record) self.add_to_remote_missing_cache(record) del flat[i] diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 6344c223..89119235 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -368,8 +368,8 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri assert crawler.get_from_any_cache(identlist[0]) is None assert crawler.get_from_any_cache(identlist[1]) is None - assert not crawler.has_reference_value_without_id(identlist[0]) - assert not crawler.has_reference_value_without_id(identlist[1]) + assert not crawler._has_reference_value_without_id(identlist[0]) + assert not crawler._has_reference_value_without_id(identlist[1]) assert crawler.identifiableAdapter.retrieve_identified_record_for_record( identlist[0]).id == 1111 assert crawler.identifiableAdapter.retrieve_identified_record_for_record( @@ -486,57 +486,57 @@ def test_has_missing_object_in_references(crawler): })) # one reference with id -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C") - .add_parent("RTC").add_property('d', 123)) + assert not crawler._has_missing_object_in_references(db.Record(name="C") + .add_parent("RTC").add_property('d', 123)) # one ref with Entity with id -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C") - .add_parent("RTC") - .add_property('d', db.Record(id=123) - .add_parent("C"))) + assert not crawler._has_missing_object_in_references(db.Record(name="C") + .add_parent("RTC") + .add_property('d', db.Record(id=123) + .add_parent("C"))) # one ref with id one with Entity with id (mixed) -> check - assert not crawler.has_missing_object_in_references(db.Record(name="C").add_parent("RTD") - .add_property('d', 123) - .add_property('b', db.Record(id=123) - .add_parent("RTC"))) + assert not crawler._has_missing_object_in_references(db.Record(name="C").add_parent("RTD") + .add_property('d', 123) + .add_property('b', db.Record(id=123) + .add_parent("RTC"))) # entity to be referenced in the following a = db.Record(name="C").add_parent("C").add_property("d", 12311) # one ref with id one with Entity without id (but not identifying) -> fail - assert not crawler.has_missing_object_in_references(db.Record(name="C").add_parent("RTC") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler._has_missing_object_in_references(db.Record(name="C").add_parent("RTC") + .add_property('d', 123) + .add_property('e', a)) # one ref with id one with Entity without id (mixed) -> fail - assert not crawler.has_missing_object_in_references(db.Record(name="D").add_parent("RTD") - .add_property('d', 123) - .add_property('e', a)) + assert not crawler._has_missing_object_in_references(db.Record(name="D").add_parent("RTD") + .add_property('d', 123) + .add_property('e', a)) crawler.add_to_remote_missing_cache(a) # one ref with id one with Entity without id but in cache -> check - assert crawler.has_missing_object_in_references(db.Record(name="D").add_parent("RTD") - .add_property('d', 123) - .add_property('e', a)) + assert crawler._has_missing_object_in_references(db.Record(name="D").add_parent("RTD") + .add_property('d', 123) + .add_property('e', a)) # if this ever fails, the mock up may be removed crawler.identifiableAdapter.get_registered_identifiable.assert_called() @pytest.mark.xfail() def test_references_entities_without_ids(crawler, ident): - assert not crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('last_name', 123) - .add_property('first_name', 123)) + assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('last_name', 123) + .add_property('first_name', 123)) # id and rec with id - assert not crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', db.Record(id=123))) + assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('last_name', db.Record(id=123))) # id and rec with id and one unneeded prop - assert crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('stuff', db.Record()) - .add_property('last_name', db.Record(id=123))) + assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('stuff', db.Record()) + .add_property('last_name', db.Record(id=123))) # one identifying prop is missing - assert crawler.has_reference_value_without_id(db.Record().add_parent("Person") - .add_property('first_name', 123) - .add_property('last_name', db.Record())) + assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + .add_property('first_name', 123) + .add_property('last_name', db.Record())) def test_replace_entities_with_ids(crawler): -- GitLab