diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index b2afe974ebc7f48fc3bb127cecb5d6def8ae87c7..342a3d13b2b143545bfe39720a55402251083d16 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -125,10 +125,6 @@ def test_dataset( "start_datetime='2022-02-10T16:36:48+01:00'") == 1 -@pytest.mark.xfail( - reason="Reference properties are not updated correctly. " - "See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/12." -) def test_event_update(clear_database, usemodel): identifiable_path = os.path.join(DATADIR, "identifiables.yml") diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index b8b9bd2ce7bff206d1233953f05c795a45a5b4ca..0b81581080230361a4f0ac1cde74e55e4b158329 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -113,7 +113,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict): # list of keys to identify, which variables have been set by which paths: - # these are tuples: + # the items are tuples: # 0: record name # 1: property name keys_modified = [] diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index b0f576a2c73342cc1301ff0f27b74bb519768541..4c096d192681764595e2db7398a2cef2814069fb 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -684,15 +684,16 @@ class Crawler(object): return to_be_inserted, to_be_updated - # TODO: replace _by_ with _with_ - def replace_entities_by_ids(self, rec: db.Record): + def replace_entities_with_ids(self, rec: db.Record): for el in rec.properties: if isinstance(el.value, db.Entity): - el.value = el.value.id + if el.value.id is not None: + el.value = el.value.id elif isinstance(el.value, list): for index, val in enumerate(el.value): if isinstance(val, db.Entity): - el.value[index] = val.id + if val.id is not None: + el.value[index] = val.id @staticmethod def remove_unnecessary_updates(updateList: list[db.Record], @@ -765,16 +766,17 @@ class Crawler(object): if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates( - updateList) + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(updateList) - # remove unnecessary updates from list # TODO: refactoring of typo for el in to_be_updated: - self.replace_entities_by_ids(el) + # all entity objects are replaced by their IDs except for the not yet inserted ones + self.replace_entities_with_ids(el) - identified_records = [self.identifiableAdapter.retrieve_identified_record_for_record(record) for record - in to_be_updated] + identified_records = [ + self.identifiableAdapter.retrieve_identified_record_for_record(record) + for record in to_be_updated] + # remove unnecessary updates from list by comparing the target records to the existing ones self.remove_unnecessary_updates(to_be_updated, identified_records) if commit_changes: diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 47fd5324a4803c67d7c9f99448378e7b5f9241bd..ce3a29b7484de83a17f27c27f3373ab91df70c9a 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -245,6 +245,7 @@ class IdentifiableAdapter(metaclass=ABCMeta): pass # TODO: remove side effect + # TODO: use ID if record has one? def retrieve_identified_record_for_record(self, record: db.Record): """ This function combines all functionality of the IdentifierAdapter by diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 1e7f10069c49ce6cab71da5f469e28b69158b4b5..61fc4f48150e12b44330efa882baf19fa8edd08a 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -512,12 +512,12 @@ def test_can_be_checked_externally(crawler): .add_property('b', db.Record())) -def test_replace_entities_by_ids(crawler): +def test_replace_entities_with_ids(crawler): a = (db.Record().add_parent("B").add_property("A", 12345) .add_property("B", db.Record(id=12345)) .add_property("C", [db.Record(id=12345), 233324])) - crawler.replace_entities_by_ids(a) + crawler.replace_entities_with_ids(a) assert a.get_property("A").value == 12345 assert a.get_property("B").value == 12345 assert a.get_property("C").value == [12345, 233324]