From 439a72c5f4c7d0e6ff7776fb95c6b4dc0a30c383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 24 Jan 2023 15:20:55 +0100 Subject: [PATCH] FIX: remove test and set from create_flat_list --- src/caoscrawler/crawl.py | 20 ++++++++++-------- unittests/test_issues.py | 45 ---------------------------------------- 2 files changed, 11 insertions(+), 54 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 00be826e..a0aa1f06 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -555,17 +555,20 @@ class Crawler(object): return False @staticmethod - def create_flat_list(ent_list: list[db.Entity], flat: Optional[set[db.Entity]] = None): + def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None): """ Recursively adds entities and all their properties contained in ent_list to - the output set flat. + the output list flat. TODO: This function will be moved to pylib as it is also needed by the high level API. """ + # Note: A set would be useful here, but we do not want a random order. if flat is None: - flat = set() - flat.update(ent_list) + flat = list() + for el in ent_list: + if el not in flat: + flat.append(el) for ent in ent_list: for p in ent.properties: # For lists append each element that is of type Entity to flat: @@ -573,13 +576,13 @@ class Crawler(object): for el in p.value: if isinstance(el, db.Entity): if el not in flat: - flat.add(el) + flat.append(el) Crawler.create_flat_list([el], flat) elif isinstance(p.value, db.Entity): if p.value not in flat: - flat.add(p.value) + flat.append(p.value) Crawler.create_flat_list([p.value], flat) - return list(flat) + return flat def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list): """ @@ -749,8 +752,7 @@ class Crawler(object): def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): to_be_inserted: list[db.Entity] = [] to_be_updated: list[db.Entity] = [] - flat = set() - flat = Crawler.create_flat_list(ent_list, flat) + flat = Crawler.create_flat_list(ent_list) # TODO: can the following be removed at some point for ent in flat: diff --git a/unittests/test_issues.py b/unittests/test_issues.py index 6b7b0d52..a1724e5a 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -70,51 +70,6 @@ def test_issue_10(): assert float(records[0].get_property("float_prop").value) == 4.0 -def test_issue_39(): - """Test for merge conflicts in - `crawl.Crawler.split_into_inserts_and_updates` (see - https://gitlab.com/caosdb/caosdb-crawler/-/issues/39). - - """ - - crawler = Crawler(debug=True) - - # For trying and failing to retrieve remotely identified records - def _fake_retrieve(*args, **kwargs): - return None - - ident = CaosDBIdentifiableAdapter() - # identifiable property is just name for both Record Types - ident.register_identifiable("RT_A", db.RecordType().add_parent( - name="RT_A").add_property(name="name")) - ident.register_identifiable("RT_B", db.RecordType().add_parent( - name="RT_B").add_property(name="name")) - # overwrite retrieve - ident.retrieve_identified_record_for_identifiable = _fake_retrieve - crawler.identifiableadapter = ident - - # a1 (has id) references b1 (has no id) - a1 = db.Record(name="A", id=101).add_parent(name="RT_A") - b1 = db.Record(name="B").add_parent(name="RT_B") - a1.add_property(name="RT_B", value=b1) - - # a2 (no id) references b2 (has id) - a2 = db.Record(name="A").add_parent(name="RT_A") - b2 = db.Record(name="B", id=102).add_parent(name="RT_B") - a2.add_property(name="RT_B", value=b2) - - flat_list = [b1, a1, a2, b2] - - # the two records with ids exist remotely - crawler.add_to_remote_existing_cache(a1, - Identifiable(name="A", record_id=101, record_type="RT_A")) - crawler.add_to_remote_existing_cache(b2, - Identifiable(name="B", record_id=102, record_type="RT_B")) - - # this would result in a merge conflict before - ins, ups = crawler.split_into_inserts_and_updates(flat_list) - - @mark.xfail(reason="FIX: https://gitlab.com/caosdb/caosdb-crawler/-/issues/47") def test_list_datatypes(): crawler_definition = { -- GitLab