From c2bfc19f145e55f4a3b087baf0d5f860605daaa1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Sat, 18 May 2019 11:34:15 +0200
Subject: [PATCH] fixes and debug output

---
 src/caosadvancedtools/cfood.py          | 32 +++++++++++++++++++++----
 src/caosadvancedtools/read_md_header.py | 27 +++++++++++++++++----
 2 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 69d7e7d2..94ac85a0 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -37,6 +37,13 @@ from caosdb.exceptions import TransactionError
 ENTITIES = {}
 
 
+def get_value(prop):
+    if not isinstance(prop.value, db.Entity):
+        return prop.value
+    else:
+        return str(prop.value.id)
+
+
 def get_entity(name):
     if name not in ENTITIES:
         ent = db.Entity(name=name)
@@ -69,14 +76,18 @@ class AbstractCFood(object):
                 continue
             existing = None
 
+            print("Looking for \n", identifiable)
+
             if self.use_cache:
                 identifiable_cache = Cache()
                 identifier = Cache.hash_entity(identifiable)
+                print(identifier)
                 cached_id = identifiable_cache.check_existing(identifier)
 
                 if cached_id is not None:
                     existing = db.execute_query("FIND {}".format(cached_id),
                                                 unique=True)
+                    print("Found Entity in cache; Id:", cached_id)
 
             # Nothing in cache or cache not used. Check in CaosDB
 
@@ -91,8 +102,13 @@ class AbstractCFood(object):
             else:
                 entities[key] = existing
 
-        if self.use_cache and cached_id is None:
-            identifiable_cache.insert(identifier, entities[key].id)
+            print("Got\n", identifiable)
+
+            if self.use_cache:
+                print("cid", cached_id)
+
+            if self.use_cache and cached_id is None:
+                identifiable_cache.insert(identifier, entities[key].id)
 
         self.update_identifiables(entities, crawled_file, match)
 
@@ -127,8 +143,11 @@ class AbstractCFood(object):
     @staticmethod
     def find_existing(entity):
         query_string = "FIND Record " + entity.get_parents()[0].name
-        query_string += " with " + "and".join(["'" + p.name + "'='" + p.value + "'"
-                                               for p in entity.get_properties()])
+        #import IPython
+        # IPython.embed()
+        query_string += " with " + " and ".join(
+            ["'" + p.name + "'='"
+             + get_value(p) + "'" for p in entity.get_properties()])
         print(query_string)
         q = db.Query(query_string)
         try:
@@ -136,6 +155,11 @@ class AbstractCFood(object):
         except TransactionError as er:
             r = None
 
+        if r is not None:
+            print("Found Entity with id:", r.id)
+        else:
+            print("Did not find an existing entity")
+
         return r
 
 
diff --git a/src/caosadvancedtools/read_md_header.py b/src/caosadvancedtools/read_md_header.py
index 8d5d3e4f..78c01b3c 100644
--- a/src/caosadvancedtools/read_md_header.py
+++ b/src/caosadvancedtools/read_md_header.py
@@ -31,6 +31,8 @@ import re
 import caosdb as db
 import yaml
 
+from .cfood import AbstractCFood, get_entity
+
 TEMPLATEHEADER = """
 ---
 title: {title}
@@ -210,15 +212,32 @@ def parse_responsibles(header):
     return people
 
 
-def reference_data_files(entity, header):
-    for glob in header["data"]:
+def reference_data_files(entity, header, prefix=""):
+    referenced_files = []
+    for glob in get_data_glob(header):
+        if not glob.startswith("/"):
+            glob = os.path.normpath(os.path.join(prefix, glob))
         for dfile in find_file_for_fileglob(glob):
-            entity.add_property("data", dfile)
+            if dfile is not None:
+                referenced_files.append(dfile)
+    AbstractCFood.set_property(entity, get_entity("Data"), referenced_files)
+
+
+def get_data_glob(header):
+    globs = []
+    for el in header["data"]:
+        if isinstance(el, dict) and "filename" in el:
+            globs.append(el["filename"])
+        else:
+            globs.append(el)
+    return globs
 
 
 def find_file_for_fileglob(glob):
     # TODO this probalby needs to be extended for different kinds of glob
-    res = db.execute_query("FIND file which is stored at {}".format(glob))
+    query_string = "FIND file which is stored at {}".format(glob)
+    print(query_string)
+    return db.execute_query(query_string)
 
 
 def find_records_referencing_files(glob):
-- 
GitLab