diff --git a/CHANGELOG.md b/CHANGELOG.md index 538f456f98ccec3143fd480345e0a2c59b21987e..263e58dbed69ee6ffb0f576e467f5f0329bc1a48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - DictTextElement and TextElement were merged into TextElement. The "match" keyword is now invalid for TextElements. - JSONFileConverter creates another level of StructureElements (see "How to upgrade" in the docs) +- create_flat_list function now collects entities in a set and also adds the entities + contained in the given list directly ### Deprecated ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index cde198f26ef1083946cc7be51dd98c6e961ae2f6..6cf025a024e8cc392a7175421d47fb69059302a4 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -555,14 +555,20 @@ class Crawler(object): return False @staticmethod - def create_flat_list(ent_list: list[db.Entity], flat: list[db.Entity]): + def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None): """ - Recursively adds all properties contained in entities from ent_list to - the output list flat. Each element will only be added once to the list. + Recursively adds entities and all their properties contained in ent_list to + the output list flat. TODO: This function will be moved to pylib as it is also needed by the high level API. """ + # Note: A set would be useful here, but we do not want a random order. + if flat is None: + flat = list() + for el in ent_list: + if el not in flat: + flat.append(el) for ent in ent_list: for p in ent.properties: # For lists append each element that is of type Entity to flat: @@ -576,6 +582,7 @@ class Crawler(object): if p.value not in flat: flat.append(p.value) Crawler.create_flat_list([p.value], flat) + return flat def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list): """ @@ -745,9 +752,7 @@ class Crawler(object): def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): to_be_inserted: list[db.Entity] = [] to_be_updated: list[db.Entity] = [] - flat = list(ent_list) - # assure all entities are direct members TODO Can this be removed at some point?Check only? - Crawler.create_flat_list(ent_list, flat) + flat = Crawler.create_flat_list(ent_list) # TODO: can the following be removed at some point for ent in flat: diff --git a/unittests/test_issues.py b/unittests/test_issues.py index 6b7b0d52ce5f4a1cfe5e4ac189d72eafd1454db7..a1724e5a989190977a7ec0d86846fc2b7433ab5d 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -70,51 +70,6 @@ def test_issue_10(): assert float(records[0].get_property("float_prop").value) == 4.0 -def test_issue_39(): - """Test for merge conflicts in - `crawl.Crawler.split_into_inserts_and_updates` (see - https://gitlab.com/caosdb/caosdb-crawler/-/issues/39). - - """ - - crawler = Crawler(debug=True) - - # For trying and failing to retrieve remotely identified records - def _fake_retrieve(*args, **kwargs): - return None - - ident = CaosDBIdentifiableAdapter() - # identifiable property is just name for both Record Types - ident.register_identifiable("RT_A", db.RecordType().add_parent( - name="RT_A").add_property(name="name")) - ident.register_identifiable("RT_B", db.RecordType().add_parent( - name="RT_B").add_property(name="name")) - # overwrite retrieve - ident.retrieve_identified_record_for_identifiable = _fake_retrieve - crawler.identifiableadapter = ident - - # a1 (has id) references b1 (has no id) - a1 = db.Record(name="A", id=101).add_parent(name="RT_A") - b1 = db.Record(name="B").add_parent(name="RT_B") - a1.add_property(name="RT_B", value=b1) - - # a2 (no id) references b2 (has id) - a2 = db.Record(name="A").add_parent(name="RT_A") - b2 = db.Record(name="B", id=102).add_parent(name="RT_B") - a2.add_property(name="RT_B", value=b2) - - flat_list = [b1, a1, a2, b2] - - # the two records with ids exist remotely - crawler.add_to_remote_existing_cache(a1, - Identifiable(name="A", record_id=101, record_type="RT_A")) - crawler.add_to_remote_existing_cache(b2, - Identifiable(name="B", record_id=102, record_type="RT_B")) - - # this would result in a merge conflict before - ins, ups = crawler.split_into_inserts_and_updates(flat_list) - - @mark.xfail(reason="FIX: https://gitlab.com/caosdb/caosdb-crawler/-/issues/47") def test_list_datatypes(): crawler_definition = { diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 214a654359b8bc222fd9820f8a67cc673d5d1bc8..6a828532c1de9796008a6e51c21811f83b85657a 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -735,8 +735,22 @@ def test_create_reference_mapping(): def test_create_flat_list(): a = db.Record() + b = db.Record() a.add_property(name="a", value=a) - Crawler.create_flat_list([a], []) + a.add_property(name="b", value=b) + flat = Crawler.create_flat_list([a]) + assert len(flat) == 2 + assert a in flat + assert b in flat + c = db.Record() + c.add_property(name="a", value=a) + # This would caus recursion if it is not dealt with properly. + a.add_property(name="c", value=c) + flat = Crawler.create_flat_list([c]) + assert len(flat) == 3 + assert a in flat + assert b in flat + assert c in flat @pytest.fixture