From 06bc27daa2d634f5813675aab8b7e3a0c8cbcc26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Fri, 10 May 2024 11:18:57 +0200
Subject: [PATCH] works

---
 src/caoscrawler/identifiable_adapters.py |  3 +--
 src/caoscrawler/sync_graph.py            | 20 +++++++++++++-------
 src/caoscrawler/sync_node.py             |  2 ++
 unittests/test_sync_graph.py             |  6 +++---
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index 1b8525db..7f54f396 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -309,8 +309,7 @@ startswith: bool, optional
                         raise MissingReferencingEntityError(
                             f"Could not find referencing entities of type(s): {prop.value}\n"
                             f"for registered identifiable:\n{registered_identifiable}\n"
-                            f"There were {len(identifiable_backrefs)
-                                          } referencing entities to choose from.\n"
+                            f"There were {len(identifiable_backrefs)} referencing entities to choose from.\n"
                             f"This error can also occur in case of merge conflicts in the referencing entities."
                         )
                     elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0:
diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py
index 26ef7f82..6d2f9336 100644
--- a/src/caoscrawler/sync_graph.py
+++ b/src/caoscrawler/sync_graph.py
@@ -113,6 +113,7 @@ class SyncGraph():
         self._identifiable_look_up: Dict[str, SyncNode] = {}
         self._missing: Dict[int, SyncNode] = {}
         self._existing: Dict[int, SyncNode] = {}
+        self._nonidentifiable: Dict[int, SyncNode] = {}
         # entities that are missing get negative IDs to allow identifiable creation
         self._remote_missing_counter = -1
 
@@ -408,6 +409,7 @@ class SyncGraph():
         for node in list(self.nodes[::-1]):
             if "nonidentifiable" in [p.name for p in node.registered_identifiable.properties]:
                 self.unchecked.remove(node)
+                self._nonidentifiable[id(node)] = node
 
     def _merge_into(self, source: SyncNode, target: SyncNode):
         """ FIXME tries to merge record into newrecord
@@ -465,23 +467,27 @@ class SyncGraph():
         self.nodes.remove(source)
         if source in self.unchecked:
             self.unchecked.remove(source)
-        else:
-            self.unchecked.remove(target)
-        assert id(source) not in self._missing
-        assert id(source) not in self._existing
+        # update look ups
         if target.id is not None:
             self._id_look_up[target.id] = target
         if target.path is not None:
             self._path_look_up[target.path] = target
+        if target.identifiable is not None:
+            self._identifiable_look_up[target.identifiable.get_representation()] = target
+
+        if ((id(source) in self._existing and id(target) in self._missing)
+                or (id(target) in self._existing and id(source) in self._missing)):
+            raise RuntimeError("Trying to merge missing and existing")
+
 
-        # due to the merge it might now be possible to create an identifiable
-        if self._identifiable_is_needed(target):
-            self._set_identifiable_of_node(target)
         if id(source) in self._missing and id(target) not in self._missing:
             self._mark_missing(target)
         if id(source) in self._existing and id(target) not in self._existing:
             self._mark_existing(target)
 
+        # due to the merge it might now be possible to create an identifiable
+        if self._identifiable_is_needed(target):
+            self._set_identifiable_of_node(target)
         # This is one of three cases that affect other nodes:
         # - mark existing
         # - mark missing
diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py
index 492839a9..1daf358f 100644
--- a/src/caoscrawler/sync_node.py
+++ b/src/caoscrawler/sync_node.py
@@ -74,6 +74,8 @@ class SyncNode():
         if other.identifiable is not None and self.identifiable is not None:
             assert (other.identifiable.get_representation() ==
                     self.identifiable.get_representation())
+        if other.identifiable:
+            self.identifiable = other.identifiable
         for attr in ["id", "path", "file", "role", "path", "name", "description"]:
             if other.__getattribute__(attr) is not None:
                 if self.__getattribute__(attr) is None:
diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py
index 7927c705..1c34daf0 100644
--- a/unittests/test_sync_graph.py
+++ b/unittests/test_sync_graph.py
@@ -332,7 +332,7 @@ def test_set_id_of_node(simple_adapter):
     st.set_id_of_node(st.unchecked[0], 101)
     assert len(st.nodes) == 1
     assert len(st.unchecked) == 0
-    assert st.nodes[0].properties[0].name == "a"
+    assert st.nodes[0].properties[0].name == "RT2"
 
     # setting the id to None should lead to depending nodes marked as missing
     ent_list = [
@@ -410,9 +410,9 @@ def test_set_id_of_node(simple_adapter):
         st.export_record_lists()
     ent_list = [
         db.Record().add_parent("RT3").add_property('a', value=1)
-        .add_property('b', value=db.Record().add_parent("RT5")),
+        .add_property('b', value=db.Record(name='b').add_parent("RT5")),
         db.Record().add_parent("RT3").add_property('a', value=1)
-        .add_property('b', value=db.Record().add_parent("RT5")),
+        .add_property('b', value=db.Record(name='a').add_parent("RT5")),
     ]
 
     st = SyncGraph(ent_list, simple_adapter)
-- 
GitLab