From d0bc3645440834a02d7d2b4046f1c3f31a338ec1 Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Thu, 16 Dec 2021 11:52:33 +0100
Subject: [PATCH] FIX: properties of identifiables are resolved now correctly

---
 src/newcrawler/crawl.py                 | 25 ++++++++++++--
 src/newcrawler/identifiable_adapters.py | 44 ++++++++++++++++++-------
 unittests/test_tool.py                  |  6 +++-
 3 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index 6b31f31a..c9e85187 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -106,7 +106,6 @@ class Crawler(object):
         self.generalStore = generalStore
         if generalStore is None:
             self.generalStore = GeneralStore()
-            
 
         self.identifiableAdapter = identifiableAdapter
         if identifiableAdapter is None:
@@ -175,7 +174,6 @@ class Crawler(object):
             self.debug_converters = self.global_converters + local_converters
 
         return self.updateList
-        
 
     def synchronize(self, updateList: list[db.Record]):
         """
@@ -207,6 +205,12 @@ class Crawler(object):
         # Walk backwards through list, so that deletion is possible:
         for i in reversed(range(len(updateList))):
             record = updateList[i]
+
+            if record.parents[0].name == "Measurement":
+                breakpoint()
+
+            # resolve references first:
+            self.identifiableAdapter.resolve_references(record)
             identifiable = self.identifiableAdapter.retrieve_identifiable(record)
 
             # if there is no identifiable, move record from update list to insert list:
@@ -227,17 +231,32 @@ class Crawler(object):
             identical = True
             for j in range(2):
                 # TODO: should be implemented elsewhere
-                for label in ("properties", "parents"):
+                for label in ("parents", ):
                     if len(comp[j][label]) > 0:
                         identical = False
                         break
                 if not identical:
                     break
+            for key in comp[0]["properties"]:
+                for attribute in ("datatype", "importance"):
+                    if ("datatype" in comp[0]["properties"][key] and
+                        comp[0]["properties"][key]["datatype"] is not None and
+                        comp[1]["properties"][key]["datatype"] !=
+                        comp[1]["properties"][key]["datatype"]):
+                        identical = False
+                        break
+
+                if "value" in comp[0]["properties"][key]:
+                    identical = False
+
+                if not identical:
+                    break
 
             if identical:
                 del updateList[i]
                 continue
             else:
+                breakpoint()
                 pass
 
         return (insertList, updateList)
diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py
index f306a27c..5f4ad70a 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/newcrawler/identifiable_adapters.py
@@ -95,6 +95,9 @@ class IdentifiableAdapter(object):
         """
         pass
 
+    @abstractmethod
+    def resolve_references(self, record: db.Record):
+        pass
 
     def get_identifiable(self, record: db.Record):
         registered_identifiable = self.get_registered_identifiable(record)
@@ -241,20 +244,10 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                     raise NotImplementedError("Non-identifiable references cannot"
                                               " be used as properties in identifiables.")
 
-                value_identifiable = self.retrieve_identified_record(prop.value)
-                if value_identifiable is None:
-                    raise RuntimeError("The identifiable which is used as property"
-                                       " here has to be inserted first.")
-
-                if value_identifiable.id is None:
-                    raise RuntimeError("The entity has not been assigned an ID.")
-
-                prop.value = value_identifiable.id
+                raise RuntimeError("The identifiable which is used as property"
+                                   " here has to be inserted first.")
                     
-            
             if prop.value != prop_record.value:
-                if prop.name == "project":
-                    breakpoint()
                 return False
         return True
 
@@ -268,3 +261,30 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         if len(candidates) == 0:
             return None
         return candidates[0]
+
+    def _resolve_reference(self, value: db.Entity):
+        registered = self.get_registered_identifiable(value)
+
+        if registered is None:
+            raise NotImplementedError("Non-identifiable references cannot"
+                                      " be used as properties in identifiables.")
+        value_identifiable = self.retrieve_identified_record(value)
+        if value_identifiable is None:
+            raise RuntimeError("The identifiable which is used as property"
+                               " here has to be inserted first.")
+
+        if value_identifiable.id is None:
+            raise RuntimeError("The entity has not been assigned an ID.")
+
+        return value_identifiable.id
+
+    def resolve_references(self, record: db.Record):
+        for prop in record.properties:
+            if isinstance(prop.value, db.Entity):
+                prop.value = self._resolve_reference(prop.value)
+
+            if isinstance(prop.value, list):
+                for element_index in range(len(prop.value)):
+                    element = prop.value[element_index]
+                    if isinstance(element, db.Entity):
+                        prop.value[element_index] = self._resolve_reference(element)
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 38280773..e5664461 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -19,6 +19,7 @@ from pytest import raises
 # - DONE: provenance in structure elements and converters for properties of records
 # - TODO: list whether information from structure elements and converters was used
 
+
 def rfp(*pathcomponents):
     """
     Return full path.
@@ -26,12 +27,14 @@ def rfp(*pathcomponents):
     """
     return join(dirname(__file__), *pathcomponents)
 
+
 def dircheckstr(*pathcomponents):
     """
     Return the debug tree identifier for a given path.
     """
     return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
 
+
 def test_crawler():
     crawler = Crawler(debug=True)
     crawler.crawl_directory(rfp("test_directories", "examples_article"),
@@ -291,16 +294,17 @@ def test_crawler_update_list():
     assert len(comp[1]["properties"]) == 0
 
     insl, updl = crawler.synchronize(crawler.updateList)
+    breakpoint()
     assert len(insl) == 0
     assert len(updl) == 0
 
+
 def test_identifiable_adapter():
     query = IdentifiableAdapter.create_query_for_identifiable(
         db.Record().add_parent("Person")
         .add_property("first_name", value="A")
         .add_property("last_name", value="B"))
     assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' "
-    
 
 
 def test_provenance_debug_data():
-- 
GitLab