From be7f26b1b7ef278812bcd61c9bab0bfaf2d6a415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 28 Dec 2021 18:36:56 +0100
Subject: [PATCH] WIP

---
 .gitignore                              |  3 +-
 src/newcrawler/crawl.py                 |  1 +
 src/newcrawler/identifiable_adapters.py | 26 ++++--------
 unittests/test_tool.py                  | 56 ++++++++++++++++---------
 4 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/.gitignore b/.gitignore
index 67d5d78d..2282d256 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 src/newcrawler.egg-info/
+__pycache__
 unittests/provenance.yml
 .coverage
 TAGS
-src/.coverage
\ No newline at end of file
+src/.coverage
diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index bd47eba8..89efc089 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -442,6 +442,7 @@ class Crawler(object):
 
         identified_records = [self.identifiableAdapter.retrieve_identifiable(record) for record
                               in to_be_updated]
+
         self.remove_unnecessary_updates(to_be_updated, identified_records)
 
         self.execute_inserts_in_list(to_be_inserted)
diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py
index 89b8d4e1..5bddc64c 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/newcrawler/identifiable_adapters.py
@@ -148,17 +148,10 @@ class IdentifiableAdapter(object):
             # case A: in the registered identifiable
             # case B: in the identifiable
 
+            if record.get_property(prop.name) is None:
+                raise NotImplementedError()
             record_prop = record.get_property(prop.name)
             newval = record_prop.value
-            if isinstance(record_prop.value, db.Entity):
-                newval = self.resolve_reference(record_prop.value)
-            elif isinstance(record_prop.value, list):
-                newval = list()
-                for element in record_prop.value:
-                    if isinstance(element, db.Entity):
-                        newval.append(self.resolve_reference(element))
-                    else:
-                        newval.append(element)
             record_prop_new = db.Property(name=record_prop.name,
                                           id=record_prop.id,
                                           description=record_prop.description,
@@ -287,16 +280,13 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
             # a) prop_record.value has a registered identifiable:
             #      in this case, fetch the identifiable and set the value accordingly
             if isinstance(prop.value, db.Entity):  # lists are not checked here
-                registered = self.get_registered_identifiable(prop.value)
-
-                if registered is None:
-                    raise NotImplementedError("Non-identifiable references cannot"
-                                              " be used as properties in identifiables.")
-
-                raise RuntimeError("The identifiable which is used as property"
-                                   " here has to be inserted first.")
+                otherid = prop_record.value
+                if isinstance(prop_record.value, db.Entity):
+                    otherid = prop_record.value.id
+                if prop.value.id != otherid:
+                    return False
 
-            if prop.value != prop_record.value:
+            elif prop.value != prop_record.value:
                 return False
         return True
 
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 51774720..e26b0b0c 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -18,6 +18,13 @@ import pytest
 from pytest import raises
 
 
+def basic_ident_lookup(rec, idents):
+    if rec.parents[0].name in idents:
+        return idents[rec.parents[0].name]
+    else:
+        return None
+
+
 def rfp(*pathcomponents):
     """
     Return full path.
@@ -238,8 +245,15 @@ def test_crawler_update_list(crawler, ident):
     assert len(comp[0]["properties"]) == 0
     assert len(comp[1]["properties"]) == 0
 
-    insl, updl = crawler.synchronize()
+    insl, updl = crawler.split_into_inserts_and_updates(crawler.updateList)
+
     assert len(insl) == 0
+    assert len(updl) == 18
+    identified_records = [crawler.identifiableAdapter.retrieve_identifiable(record) for record
+                          in updl]
+    for el in updl:
+        crawler.replace_entities_by_ids(el)
+    Crawler.remove_unnecessary_updates(updl, identified_records)
     assert len(updl) == 0
 
 
@@ -430,41 +444,43 @@ def test_split_into_inserts_and_updates_with_copy_attr(mock_retrieve):
 
 
 def test_all_references_are_existing_already(crawler):
-    def base_mocked_lookup(rec, known):
-        if rec.name in known:
-            return known[rec.name]
-        else:
-            return None
+    registered_identifiables = {
+        "C": db.Record().add_parent("C").add_property("a"),
+        "D": db.Record().add_parent("D").add_property("a").add_property("b")}
     crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
-        base_mocked_lookup, known={"A": db.Record(name="A").add_parent("C"),
-                                   "B": db.Record(name="B").add_parent("C")}))
+        basic_ident_lookup, idents=registered_identifiables))
 
-    assert crawler.all_references_are_existing_already(db.Record().add_property('a', 123))
-    assert crawler.all_references_are_existing_already(db.Record()
+    assert crawler.all_references_are_existing_already(
+        db.Record().add_parent("C").add_property('a', 123))
+    assert crawler.all_references_are_existing_already(db.Record().add_parent("C")
                                                        .add_property('a', db.Record(id=123)))
-    assert crawler.all_references_are_existing_already(db.Record()
+    assert crawler.all_references_are_existing_already(db.Record().add_parent("D")
                                                        .add_property('a', 123)
                                                        .add_property('b', db.Record(id=123)))
-    assert not crawler.all_references_are_existing_already(db.Record()
+    a = db.Record(name="A").add_parent("C").add_property("a", 12311)
+    assert not crawler.all_references_are_existing_already(db.Record().add_parent("D")
                                                            .add_property('a', 123)
-                                                           .add_property('b', db.Record(name="A")
-                                                                         .add_parent("C")))
-    a = db.Record(name="A").add_parent("C")
+                                                           .add_property('b', a))
     crawler.add_identified_record_to_local_cache(a)
-    assert crawler.all_references_are_existing_already(db.Record()
+    assert crawler.all_references_are_existing_already(db.Record().add_parent("D")
                                                        .add_property('a', 123)
                                                        .add_property('b', a))
 
 
 def test_can_be_checked_externally(crawler):
-    assert crawler.can_be_checked_externally(db.Record().add_property('a', 123))
-    assert crawler.can_be_checked_externally(db.Record()
+    registered_identifiables = {
+        "C": db.Record().add_parent("C").add_property("a"),
+        "D": db.Record().add_parent("D").add_property("a").add_property("b")}
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
+        basic_ident_lookup, idents=registered_identifiables))
+    assert crawler.can_be_checked_externally(db.Record().add_parent("C").add_property('a', 123))
+    assert crawler.can_be_checked_externally(db.Record().add_parent("C")
                                              .add_property('a', db.Record(id=123)))
-    assert crawler.can_be_checked_externally(db.Record()
+    assert crawler.can_be_checked_externally(db.Record().add_parent("D")
                                              .add_property('a', 123)
                                              .add_property('b', db.Record(id=123)))
 
-    assert not crawler.can_be_checked_externally(db.Record()
+    assert not crawler.can_be_checked_externally(db.Record().add_parent("D")
                                                  .add_property('a', 123)
                                                  .add_property('b', db.Record()))
 
-- 
GitLab