From 18c02891a64b5208097b01f6eff00e171fc718bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 6 Jul 2021 14:25:57 +0000
Subject: [PATCH] Treat dependencies among identifiables

---
 CHANGELOG.md                                  |  3 +
 .../2010_TestProject/2019-02-04/README.md     |  5 ++
 integrationtests/filldb.sh                    |  1 +
 integrationtests/insert_some.py               | 28 ++++++++++
 integrationtests/test.sh                      |  2 +-
 integrationtests/test_crawler_with_cfoods.py  |  8 +++
 src/caosadvancedtools/crawler.py              | 55 +++++++++++--------
 .../scifolder/experiment_cfood.py             |  2 +-
 8 files changed, 78 insertions(+), 26 deletions(-)
 create mode 100644 integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md
 create mode 100644 integrationtests/insert_some.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c1ccfd7..d07084cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed ###
 
+- identifiables of single CFoods are now treated one after the other. This 
+  allows them to have dependencies among each other if they are ordered 
+  correctly
 - identifiables must have at least one property or a name
 * `caosadvancedtools.serverside.helper.init_data_model` also checks the role
   and data type of entities.
diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md
new file mode 100644
index 00000000..7de3bd15
--- /dev/null
+++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md
@@ -0,0 +1,5 @@
+---
+responsible:
+- Tom Wood
+description: Something.
+...
diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh
index 98d22347..9f55365e 100755
--- a/integrationtests/filldb.sh
+++ b/integrationtests/filldb.sh
@@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software
 python3 insert_model.py 
+python3 insert_some.py
 python3 crawl.py /
diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py
new file mode 100644
index 00000000..cf16a45d
--- /dev/null
+++ b/integrationtests/insert_some.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import caosdb as db
+from caosadvancedtools.scifolder.experiment_cfood import dm
+
+# This inserts two identifiables. When no dependencies are possible among
+# identifiables, it should not be possible to find both: the experiment
+# identifiable would for example not reference the correct project Record
+project = db.Record(name='2010_TestProject')
+project.add_parent(name=dm.Project)
+project.insert()
+
+pers = db.Record()
+pers.add_parent("Person")
+pers.add_property("lastname", "Wood")
+pers.add_property("firstname", "Tom")
+pers.insert()
+
+experiment = db.Record()
+experiment.add_parent(name=dm.Experiment)
+experiment.description = "Something."
+experiment.add_property(
+    name=dm.date, value='2019-02-04')
+experiment.add_property(name=dm.Project, value=project)
+experiment.add_property(
+        name="identifier", value="empty_identifier")
+experiment.add_property(
+        name="responsible", value=pers)
+experiment.insert(flags={"force-missing-obligatory": "ignore"})
diff --git a/integrationtests/test.sh b/integrationtests/test.sh
index a56b7584..80a2afe3 100755
--- a/integrationtests/test.sh
+++ b/integrationtests/test.sh
@@ -3,6 +3,7 @@ OUT=/tmp/crawler.output
 ls 
 cat pycaosdb.ini
 rm -rf cache.db
+set -e
 echo "Clearing database"
 python3 clear_database.py
 echo "Testing crawler without cfoods"
@@ -29,7 +30,6 @@ echo "run crawler"
 # rename the moved file
 mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx
 # check whether there was something UNAUTHORIZED
-set -e
 grep "There where unauthorized changes" $OUT
 # get the id of the run which is the last field of the output string
 RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }')
diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py
index c39c3fc6..7dc19240 100755
--- a/integrationtests/test_crawler_with_cfoods.py
+++ b/integrationtests/test_crawler_with_cfoods.py
@@ -34,6 +34,14 @@ def get_entity_with_id(eid):
 
 class CrawlerTest(unittest.TestCase):
     def test_experiment(self):
+
+        ########################
+        # # dummy for dependency test experiment # #
+        ########################
+        exp = db.execute_query(
+            "FIND Experiment with date=2019-02-04 and identifier=empty_identifier",
+            unique=True)
+
         ########################
         # # first experiment # #
         ########################
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 747c533d..05471e94 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -576,44 +576,51 @@ carefully and if the changes are ok, click on the following link:
         # looking for matching entities in CaosDB when there is no valid id
         # i.e. there was none set from a cache
 
+        existing = []
+        inserted = []
+
         for ent in identifiables:
             if ent.id is None or ent.id < 0:
                 logger.debug("Looking for: {}".format(
                     ent.id if ent.id is not None else ent.name))
-                existing = Crawler.find_existing(ent)
+                found = Crawler.find_existing(ent)
 
-                if existing is not None:
-                    ent.id = existing.id
+                if found is not None:
+                    ent.id = found.id
             else:
                 logger.debug("Id is known of: {}".format(ent))
 
-        # insert missing, i.e. those which are not valid
-        missing_identifiables = db.Container()
-        missing_identifiables.extend([ent for ent in identifiables
-                                      if ent.id is None or ent.id < 0])
-        # TODO the following should not be necessary. Fix it
-
-        for ent in missing_identifiables:
-            ent.id = None
+            # insert missing, i.e. those which are not valid
+            if ent.id is None or ent.id < 0:
+                missing = ent
+                ent.id = None
+            else:
+                missing = None
+                existing.append(ent)
 
-        if len(missing_identifiables) > 0:
-            info = "Going to insert the following entities:\n"
+            if missing:
+                try:
+                    guard.safe_insert(missing, unique=False,
+                                      flags={"force-missing-obligatory": "ignore"})
+                    inserted.append(ent)
+                except Exception as e:
+                    DataModelProblems.evaluate_exception(e)
+        if len(existing) > 0:
+            info = "Identified the following existing entities:\n"
 
-            for ent in missing_identifiables:
+            for ent in existing:
                 info += str(ent)+"\n"
             logger.debug(info)
+        else:
+            logger.debug("Did not identify any existing entities")
+        if len(inserted) > 0:
+            info = "Inserted the following entities:\n"
 
-        if len(missing_identifiables) == 0:
-            logger.debug("No new entities to be inserted.")
+            for ent in inserted:
+                info += str(ent)+"\n"
+            logger.debug(info)
         else:
-            try:
-                logger.info(
-                    "Inserting {} Records...".format(
-                        len(missing_identifiables)))
-                guard.safe_insert(missing_identifiables, unique=False,
-                                  flags={"force-missing-obligatory": "ignore"})
-            except Exception as e:
-                DataModelProblems.evaluate_exception(e)
+            logger.debug("Did not insert any new entities")
 
         logger.debug("Retrieving entities from CaosDB...")
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py
index 0eccd18d..38606b5f 100644
--- a/src/caosadvancedtools/scifolder/experiment_cfood.py
+++ b/src/caosadvancedtools/scifolder/experiment_cfood.py
@@ -78,7 +78,7 @@ class ExperimentCFood(AbstractFileCFood, WithREADME):
         self.experiment, self.project = (
             ExperimentCFood.create_identifiable_experiment(self.match))
 
-        self.identifiables.extend([self.experiment, self.project])
+        self.identifiables.extend([self.project, self.experiment])
         self.people = parse_responsibles(self.header)
         self.identifiables.extend(self.people)
 
-- 
GitLab