From 54d2b7a9538d30ae0fc72e92525f1412331a949e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Sun, 4 Aug 2019 14:25:04 +0200
Subject: [PATCH] ENH: new cfood for projects, new tests

Caution: Cfood do no longer take the match on creation but the CaosDB
File
---
 .docker/Dockerfile                          |  4 +--
 integrationtests/full_test/crawl.py         |  4 +--
 integrationtests/full_test/filldb.sh        |  1 -
 integrationtests/full_test/insert_record.py |  5 ----
 src/caosadvancedtools/cfood.py              | 12 ++++-----
 src/caosadvancedtools/crawler.py            | 30 ++++++++++++---------
 src/caosadvancedtools/utils.py              |  7 +++--
 7 files changed, 33 insertions(+), 30 deletions(-)
 delete mode 100644 integrationtests/full_test/insert_record.py

diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 4ee23ca6..50361883 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -7,9 +7,9 @@ RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-pylib.git && \
    cd caosdb-pylib && pip3 install .
 RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-models.git && \
    cd caosdb-models && pip3 install .
-ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \
+ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/project_cfood \
    scifolder_version.json
-RUN git clone -b master \
+RUN git clone -b project_cfood \
     https://gitlab.com/henrik_indiscale/scifolder.git && \
     cd scifolder && pip3 install .
 COPY . /git
diff --git a/integrationtests/full_test/crawl.py b/integrationtests/full_test/crawl.py
index d5f31789..d60e920f 100755
--- a/integrationtests/full_test/crawl.py
+++ b/integrationtests/full_test/crawl.py
@@ -30,7 +30,7 @@ import caosdb as db
 
 from caosadvancedtools.crawler import Crawler
 from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood,
-                       SimulationCFood)
+                       SimulationCFood, ProjectCFood)
 
 
 def get_parser():
@@ -55,6 +55,6 @@ if __name__ == "__main__":
     print("Query done...")
     config = db.configuration.get_config()
     c = Crawler(use_cache=True, access=access,
-                food=[AnalysisCFood, ExperimentCFood,
+                food=[ProjectCFood, AnalysisCFood, ExperimentCFood,
                       PublicationCFood, SimulationCFood, ])
     c.crawl(files)
diff --git a/integrationtests/full_test/filldb.sh b/integrationtests/full_test/filldb.sh
index 906e499c..da1abc70 100755
--- a/integrationtests/full_test/filldb.sh
+++ b/integrationtests/full_test/filldb.sh
@@ -6,5 +6,4 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/DataAnalysis
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications
 python3 insert_model.py 
-python3 insert_record.py
 python3 crawl.py /
diff --git a/integrationtests/full_test/insert_record.py b/integrationtests/full_test/insert_record.py
deleted file mode 100644
index 5927b058..00000000
--- a/integrationtests/full_test/insert_record.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import caosdb as db
-
-r = db.Record(name="TestProject")
-r.add_parent(name="Project")
-r.insert()
diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index ac830012..5a52b958 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -66,21 +66,21 @@ class AbstractCFood(object):
     # function match()
     _pattern = None
 
-    def __init__(self, match, access=lambda x: x):
+    def __init__(self, crawled_file, access=lambda x: x):
         """ Abstract base class for Crawler food (CFood).
 
         Parameters
         ----------
-        match : match object of a regular expression match
-                the result from matching a path against the pattern of this
-                class
+        crawled_file : The file that the crawler is currently matching. Its
+                       path should match against the pattern of this class
 
         access : callable, optional
                  A function that takes a CaosDB path and returns a local path
         """
         self.access = access
-        self.crawled_file = match.string
-        self.match = match
+        self.crawled_file = crawled_file
+        self.crawled_path = crawled_file.path
+        self.match = type(self).match(crawled_file.path)
 
     @staticmethod
     def get_re():
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 78376724..2fe94484 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -75,22 +75,23 @@ class Crawler(object):
             #    continue
 
             for Cfood in self.food:
-                match = Cfood.match(crawled_file.path)
+                if Cfood.match(crawled_file.path) is not None:
+                    try:
+                        cfood = Cfood(crawled_file, access=self.access)
+                        identifiables = cfood.create_identifiables()
 
-                if match is not None:
-                    cfood = Cfood(match, access=self.access)
-                    identifiables = cfood.create_identifiables()
+                        if self.use_cache:
+                            hashes = self.cache.update_ids_from_cache(
+                                identifiables)
 
-                    if self.use_cache:
-                        hashes = self.cache.update_ids_from_cache(
-                            identifiables)
+                        self.find_or_insert_identifiables(identifiables)
 
-                    self.find_or_insert_identifiables(identifiables)
+                        if self.use_cache:
+                            self.cache.insert_list(hashes, identifiables)
 
-                    if self.use_cache:
-                        self.cache.insert_list(hashes, identifiables)
-
-                    cfood.update_identifiables()
+                        cfood.update_identifiables()
+                    except Exception as e:
+                        print(e)
 
     @staticmethod
     def find_or_insert_identifiables(identifiables):
@@ -116,6 +117,11 @@ class Crawler(object):
         missing_identifiables = db.Container()
         missing_identifiables.extend([ent for ent in identifiables
                                       if ent.id is None or ent.id < 0])
+        # TODO the following should not be necessary. Fix it
+
+        for ent in missing_identifiables:
+            ent.id = None
+
         missing_identifiables.insert()
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
 
diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py
index 64f828f9..91143871 100644
--- a/src/caosadvancedtools/utils.py
+++ b/src/caosadvancedtools/utils.py
@@ -81,9 +81,12 @@ def find_file_included_by(glob):
     """
 
     query_string = "FIND file which is stored at {}".format(glob)
-    print(query_string)
 
-    return db.execute_query(query_string)
+    try:
+        return db.execute_query(query_string)
+    except Exception as e:
+        print(e)
+        return []
 
 
 def assure_absolute_path_in_glob(glob, prefix):
-- 
GitLab