From 3093cc1eab7c9d6197553f6a1b2577696198271b Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Thu, 16 Dec 2021 10:50:14 +0100
Subject: [PATCH] FIX: identifiables used as references in properties of
 identifiables are now retrieved

---
 provenance.yml                          | 82 -------------------------
 src/newcrawler/crawl.py                 |  2 +-
 src/newcrawler/identifiable_adapters.py | 31 ++++++++--
 3 files changed, 28 insertions(+), 87 deletions(-)
 delete mode 100644 provenance.yml

diff --git a/provenance.yml b/provenance.yml
deleted file mode 100644
index 20eb0021..00000000
--- a/provenance.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-!!python/object/apply:collections.defaultdict
-args:
-- !!python/name:newcrawler.crawl.%3Clambda%3E ''
-dictitems:
-  Measurement:
-    date: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-    - - SimulationData
-      - project_dir
-      - measurement
-    description: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-      - README.md
-      - description
-    - - SimulationData
-      - project_dir
-      - measurement
-      - README
-      - description
-    identifier: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-    - - SimulationData
-      - project_dir
-      - measurement
-    project: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-    - - SimulationData
-      - project_dir
-      - measurement
-    responsible: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-      - README.md
-      - responsible
-    - - SimulationData
-      - project_dir
-      - measurement
-      - README
-      - responsible_single
-  Person:
-    first_name: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-      - README.md
-      - responsible
-    - - SimulationData
-      - project_dir
-      - measurement
-      - README
-      - responsible_single
-    last_name: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-      - '2020-02-01'
-      - README.md
-      - responsible
-    - - SimulationData
-      - project_dir
-      - measurement
-      - README
-      - responsible_single
-  Project:
-    date: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-    - - SimulationData
-      - project_dir
-    identifier: !!python/tuple
-    - - SimulationData
-      - 2020_climate-model-predict
-    - - SimulationData
-      - project_dir
diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index f4c15fee..6b31f31a 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -226,7 +226,7 @@ class Crawler(object):
             comp = compare_entities(record, identifiable)
             identical = True
             for j in range(2):
-            # TODO: should be implemented elsewhere
+                # TODO: should be implemented elsewhere
                 for label in ("properties", "parents"):
                     if len(comp[j][label]) > 0:
                         identical = False
diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py
index 00c8597a..f306a27c 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/newcrawler/identifiable_adapters.py
@@ -27,6 +27,7 @@ import caosdb as db
 from abc import abstractmethod
 from .utils import get_value, has_parent
 
+
 class IdentifiableAdapter(object):
     """
     Base class for identifiable adapters.
@@ -40,8 +41,8 @@ class IdentifiableAdapter(object):
     - Identifiable is the concrete identifiable, e.g. the Record based on
       the registered identifiable with all the values filled in.
 
-    - Identified record is the result of retrieving a record based on the identifiable
-      from the database.
+    - Identified record is the result of retrieving a record based on the
+      identifiable from the database.
 
     General question to clarify:
     Do we want to support multiple identifiables per RecordType?
@@ -212,6 +213,10 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
 
     def check_record(self, record: db.Record, identifiable: db.Record):
         """
+        Check for a record from the local storage (named "record") if it is
+        the identified record for an identifiable which was created by
+        a run of the crawler.
+        
         Naming of the parameters could be confusing:
         record is the record from the local database to check against.
         identifiable is the record that was created during the crawler run.
@@ -222,13 +227,31 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
             return False
         for prop in identifiable.properties:
             prop_record = record.get_property(prop.name)
+            if prop_record is None:
+                return False
+
             # if prop is an entity, it needs to be resolved first.
             # there are two different cases:
             # a) prop_record.value has a registered identifiable:
             #      in this case, fetch the identifiable and set the value accordingly
+            if isinstance(prop.value, db.Entity):
+                registered = self.get_registered_identifiable(prop.value)
+
+                if registered is None:
+                    raise NotImplementedError("Non-identifiable references cannot"
+                                              " be used as properties in identifiables.")
+
+                value_identifiable = self.retrieve_identified_record(prop.value)
+                if value_identifiable is None:
+                    raise RuntimeError("The identifiable which is used as property"
+                                       " here has to be inserted first.")
+
+                if value_identifiable.id is None:
+                    raise RuntimeError("The entity has not been assigned an ID.")
+
+                prop.value = value_identifiable.id
+                    
             
-            if prop_record is None:
-                return False
             if prop.value != prop_record.value:
                 if prop.name == "project":
                     breakpoint()
-- 
GitLab