From 439a72c5f4c7d0e6ff7776fb95c6b4dc0a30c383 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 24 Jan 2023 15:20:55 +0100
Subject: [PATCH] FIX: remove test and set from create_flat_list

---
 src/caoscrawler/crawl.py | 20 ++++++++++--------
 unittests/test_issues.py | 45 ----------------------------------------
 2 files changed, 11 insertions(+), 54 deletions(-)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 00be826e..a0aa1f06 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -555,17 +555,20 @@ class Crawler(object):
         return False
 
     @staticmethod
-    def create_flat_list(ent_list: list[db.Entity], flat: Optional[set[db.Entity]] = None):
+    def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None):
         """
         Recursively adds entities and all their properties contained in ent_list to
-        the output set flat.
+        the output list flat.
 
         TODO: This function will be moved to pylib as it is also needed by the
               high level API.
         """
+        # Note: A set would be useful here, but we do not want a random order.
         if flat is None:
-            flat = set()
-        flat.update(ent_list)
+            flat = list()
+        for el in ent_list:
+            if el not in flat:
+                flat.append(el)
         for ent in ent_list:
             for p in ent.properties:
                 # For lists append each element that is of type Entity to flat:
@@ -573,13 +576,13 @@ class Crawler(object):
                     for el in p.value:
                         if isinstance(el, db.Entity):
                             if el not in flat:
-                                flat.add(el)
+                                flat.append(el)
                                 Crawler.create_flat_list([el], flat)
                 elif isinstance(p.value, db.Entity):
                     if p.value not in flat:
-                        flat.add(p.value)
+                        flat.append(p.value)
                         Crawler.create_flat_list([p.value], flat)
-        return list(flat)
+        return flat
 
     def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list):
         """
@@ -749,8 +752,7 @@ class Crawler(object):
     def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
         to_be_inserted: list[db.Entity] = []
         to_be_updated: list[db.Entity] = []
-        flat = set()
-        flat = Crawler.create_flat_list(ent_list, flat)
+        flat = Crawler.create_flat_list(ent_list)
 
         # TODO: can the following be removed at some point
         for ent in flat:
diff --git a/unittests/test_issues.py b/unittests/test_issues.py
index 6b7b0d52..a1724e5a 100644
--- a/unittests/test_issues.py
+++ b/unittests/test_issues.py
@@ -70,51 +70,6 @@ def test_issue_10():
     assert float(records[0].get_property("float_prop").value) == 4.0
 
 
-def test_issue_39():
-    """Test for merge conflicts in
-    `crawl.Crawler.split_into_inserts_and_updates` (see
-    https://gitlab.com/caosdb/caosdb-crawler/-/issues/39).
-
-    """
-
-    crawler = Crawler(debug=True)
-
-    # For trying and failing to retrieve remotely identified records
-    def _fake_retrieve(*args, **kwargs):
-        return None
-
-    ident = CaosDBIdentifiableAdapter()
-    # identifiable property is just name for both Record Types
-    ident.register_identifiable("RT_A", db.RecordType().add_parent(
-        name="RT_A").add_property(name="name"))
-    ident.register_identifiable("RT_B", db.RecordType().add_parent(
-        name="RT_B").add_property(name="name"))
-    # overwrite retrieve
-    ident.retrieve_identified_record_for_identifiable = _fake_retrieve
-    crawler.identifiableadapter = ident
-
-    # a1 (has id) references b1 (has no id)
-    a1 = db.Record(name="A", id=101).add_parent(name="RT_A")
-    b1 = db.Record(name="B").add_parent(name="RT_B")
-    a1.add_property(name="RT_B", value=b1)
-
-    # a2 (no id) references b2 (has id)
-    a2 = db.Record(name="A").add_parent(name="RT_A")
-    b2 = db.Record(name="B", id=102).add_parent(name="RT_B")
-    a2.add_property(name="RT_B", value=b2)
-
-    flat_list = [b1, a1, a2, b2]
-
-    # the two records with ids exist remotely
-    crawler.add_to_remote_existing_cache(a1,
-                                         Identifiable(name="A", record_id=101, record_type="RT_A"))
-    crawler.add_to_remote_existing_cache(b2,
-                                         Identifiable(name="B", record_id=102, record_type="RT_B"))
-
-    # this would result in a merge conflict before
-    ins, ups = crawler.split_into_inserts_and_updates(flat_list)
-
-
 @mark.xfail(reason="FIX: https://gitlab.com/caosdb/caosdb-crawler/-/issues/47")
 def test_list_datatypes():
     crawler_definition = {
-- 
GitLab