From 33e7f66f7e9417cdf6850d59c1d2e3aa9105dd91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Sun, 23 Oct 2022 14:53:40 +0200
Subject: [PATCH] WIP: some fixes

---
 src/caoscrawler/crawl.py                 | 20 +++++--------
 src/caoscrawler/identifiable_adapters.py | 37 +++++++++++++++---------
 unittests/test_tool.py                   | 28 ++++++++++++++----
 3 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 72caf81a..f28eb5e8 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -697,8 +697,8 @@ class Crawler(object):
         Create a dictionary of dictionaries of the form:
         Dict[int, Dict[str, db.Entity]]
 
-        - The integer index is the id of the value object.
-        - The string is the name of the first parent of the value object.
+        - The integer index is the Python id of the value object.
+        - The string is the name of the first parent of the referencing object.
 
         Each value objects is taken from the values of all properties from the list flat.
 
@@ -715,9 +715,7 @@ class Crawler(object):
                     if isinstance(v, db.Entity):
                         if id(v) not in references:
                             references[id(v)] = {}
-                        # TODO: Shouldn't the parent's name always be the same for
-                        #       a single id?
-                        references[id(v)][v.parents[0].name] = ent
+                        references[id(v)][ent.parents[0].name] = ent
         return references
 
     def split_into_inserts_and_updates(self, ent_list: List[db.Entity]):
@@ -734,8 +732,7 @@ class Crawler(object):
             if ent.role == "Record" and len(ent.parents) == 0:
                 raise RuntimeError("Records must have a parent.")
 
-        # TODO: where is this used:
-        references = self.create_reference_mapping(flat)
+        referencing_entities = self.create_reference_mapping(flat)
 
         resolved_references = True
         # flat contains Entities which could not yet be checked against the remote server
@@ -763,10 +760,9 @@ class Crawler(object):
                 # can we check whether the record(identifiable) exists on the remote server?
                 elif not self.has_reference_value_without_id(
                         self.identifiableAdapter.get_identifiable(record)):
-                    # TODO: remove deepcopy?
                     identified_record = (
                         self.identifiableAdapter.retrieve_identified_record_for_record(
-                            deepcopy(record)))
+                            record, referencing_entities=referencing_entities))
                     if identified_record is None:
                         # identifiable does not exist remotely -> record needs to be inserted
                         to_be_inserted.append(record)
@@ -801,7 +797,7 @@ class Crawler(object):
             raise RuntimeError(
                 "Could not resolve all Entity references. Circular Dependency?")
 
-        return to_be_inserted, to_be_updated
+        return to_be_inserted, to_be_updated, referencing_entities
 
     def replace_entities_with_ids(self, rec: db.Record):
         for el in rec.properties:
@@ -949,7 +945,7 @@ class Crawler(object):
         if self.identifiableAdapter is None:
             raise RuntimeError("Should not happen.")
 
-        to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(
+        to_be_inserted, to_be_updated, referencing_entities = self.split_into_inserts_and_updates(
             target_data)
 
         # TODO: refactoring of typo
@@ -959,7 +955,7 @@ class Crawler(object):
 
         identified_records = [
             self.identifiableAdapter.retrieve_identified_record_for_record(
-                record)
+                record, referencing_entities)
             for record in to_be_updated]
         # remove unnecessary updates from list by comparing the target records to the existing ones
         self.remove_unnecessary_updates(to_be_updated, identified_records)
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index 4c0b17fa..7e2d9864 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -100,6 +100,10 @@ class IdentifiableAdapter(metaclass=ABCMeta):
                 "Multiple parents for identifiables not supported.")
 
         query_string = "FIND Record " + ident.get_parents()[0].name
+        if ident.get_property("is_referenced_by") is not None:
+            query_string += (" WHICH IS REFERENCED BY "
+                             + ident.get_property("is_referenced_by").value + " AND")
+
         query_string += " WITH "
 
         if ident.name is None and len(ident.get_properties()) == 0:
@@ -118,7 +122,9 @@ class IdentifiableAdapter(metaclass=ABCMeta):
     def create_property_query(entity: db.Entity):
         query_string = ""
         for p in entity.get_properties():
-            if p.value is None:
+            if p.name == "is_referenced_by":
+                continue
+            elif p.value is None:
                 query_string += "'" + p.name + "' IS NULL AND "
             elif isinstance(p.value, list):
                 for v in p.value:
@@ -156,7 +162,7 @@ class IdentifiableAdapter(metaclass=ABCMeta):
     # -------------------------
     # TODO: this uses an active database connection which is discouraged here!
     @staticmethod
-    def get_recordTypeName(rt_id):
+    def get_recordtype_name(rt_id):
         return db.RecordType(rt_id).retrieve().name
     # -------------------------
 
@@ -213,10 +219,11 @@ class IdentifiableAdapter(metaclass=ABCMeta):
             # case A: in the registered identifiable
             # case B: in the identifiable
 
-            if prop.name == "is_referenced_by":
-                rtname = self.get_recordTypeName(prop.value)
-                if rtname in referencing_entities:
-                    record.add_property("is_referenced_by", referencing_entities[rtname])
+            if prop.name.lower() == "is_referenced_by":
+                rtname = self.get_recordtype_name(prop.value)
+                if (id(record) in referencing_entities
+                        and rtname in referencing_entities[id(record)]):
+                    identifiable.add_property("is_referenced_by", referencing_entities[rtname])
                     continue
             record_prop = record.get_property(prop.name)
             if record_prop is None:
@@ -247,7 +254,8 @@ class IdentifiableAdapter(metaclass=ABCMeta):
         return identifiable
 
     @abstractmethod
-    def retrieve_identified_record_for_identifiable(self, identifiable: db.Record):
+    def retrieve_identified_record_for_identifiable(self, identifiable: db.Record,
+                                                    referencing_entities=None):
         """
         Retrieve identifiable record for a given identifiable.
 
@@ -260,7 +268,7 @@ class IdentifiableAdapter(metaclass=ABCMeta):
 
     # TODO: remove side effect
     # TODO: use ID if record has one?
-    def retrieve_identified_record_for_record(self, record: db.Record):
+    def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None):
         """
         This function combines all functionality of the IdentifierAdapter by
         returning the identifiable after having checked for an appropriate
@@ -269,7 +277,7 @@ class IdentifiableAdapter(metaclass=ABCMeta):
         In case there was no appropriate registered identifiable or no identifiable could
         be found return value is None.
         """
-        identifiable = self.get_identifiable(record)
+        identifiable = self.get_identifiable(record, referencing_entities=referencing_entities)
 
         if identifiable is None:
             return None
@@ -277,7 +285,8 @@ class IdentifiableAdapter(metaclass=ABCMeta):
         if identifiable.role == "File":
             return self.get_file(identifiable)
 
-        return self.retrieve_identified_record_for_identifiable(identifiable)
+        return self.retrieve_identified_record_for_identifiable(
+            identifiable, referencing_entities=referencing_entities)
 
 
 class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
@@ -388,7 +397,8 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                 return False
         return True
 
-    def retrieve_identified_record_for_identifiable(self, identifiable: db.Record):
+    def retrieve_identified_record_for_identifiable(self, identifiable:
+                                                    db.Record, referencing_entities=None):
         candidates = []
         for record in self._records:
             if self.check_record(record, identifiable):
@@ -473,8 +483,9 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             return record
         return record.id
 
-    def retrieve_identified_record_for_identifiable(self, identifiable: db.Record):
-        query_string = self.create_query_for_identifiable(identifiable)
+    def retrieve_identified_record_for_identifiable(self, identifiable: db.Record,
+                                                    referencing_entities=None):
+        query_string = self.create_query_for_identifiable(identifiable, referencing_entities)
         candidates = db.execute_query(query_string)
         if len(candidates) > 1:
             raise RuntimeError(
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index f08f8061..51ea835d 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -290,6 +290,12 @@ def test_identifiable_adapter():
         .add_property("first_name", value="A")
         .add_property("last_name", value="B"))
     assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' "
+    query = IdentifiableAdapter.create_query_for_identifiable(
+        db.Record().add_parent("Person")
+        .add_property("is_referenced_by", value="14433")
+        .add_property("last_name", value="B"))
+    assert query.lower() == ("find record person which is referenced by 14433 and with "
+                             "'last_name'='b' ")
 
 
 def test_remove_unnecessary_updates():
@@ -359,7 +365,7 @@ def test_provenance_debug_data(crawler):
     assert check_key_count("Person") == 14
 
 
-def basic_retrieve_by_name_mock_up(rec, known):
+def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
     """ returns a stored Record if rec.name is an existing key, None otherwise """
     if rec.name in known:
         return known[rec.name]
@@ -399,7 +405,7 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri
     assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
         entlist[1]) is None
 
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    insert, update, _ = crawler.split_into_inserts_and_updates(deepcopy(entlist))
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -417,7 +423,7 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab
     # This is identical to a and should be removed
     c = db.Record(name="A").add_parent("C")
     entlist = [a, b, c]
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    insert, update, _ = crawler.split_into_inserts_and_updates(deepcopy(entlist))
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -434,7 +440,7 @@ def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_ret
     b = db.Record(name="B").add_parent("C")
     b.add_property("A", a)
     entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    insert, update, _ = crawler.split_into_inserts_and_updates(entlist)
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -469,7 +475,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     b.add_property("A", f)
     b.add_property("A", a)
     entlist = [a, b, g]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    insert, update, _ = crawler.split_into_inserts_and_updates(entlist)
     assert len(insert) == 3
     assert "B" in [el.name for el in insert]
     assert len(update) == 1
@@ -489,7 +495,7 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab
     b = db.Record(name="A").add_parent("C")
     b.add_property("bar", 2)
     entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    insert, update, _ = crawler.split_into_inserts_and_updates(entlist)
 
     assert update[0].get_property("bar").value == 2
     assert update[0].get_property("foo").value == 1
@@ -713,3 +719,13 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     reset_mocks([updateCacheMock, insmock, upmock])
     # restore original ident
     ident._records = deepcopy(records_backup)
+
+
+def test_create_reference_mapping():
+    a = db.Record().add_parent("A")
+    b = db.Record().add_parent("B").add_property('a', a)
+    ref = Crawler.create_reference_mapping([a, b])
+    assert id(a) in ref
+    assert id(b) not in ref
+    assert "B" in ref[id(a)]
+    assert ref[id(a)]["B"] is b
-- 
GitLab