diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 69d7e7d2561f358df0b4ad09bc3ff1d7aab1af89..94ac85a0b75db50351cb725425be3b030744e1a7 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -37,6 +37,13 @@ from caosdb.exceptions import TransactionError ENTITIES = {} +def get_value(prop): + if not isinstance(prop.value, db.Entity): + return prop.value + else: + return str(prop.value.id) + + def get_entity(name): if name not in ENTITIES: ent = db.Entity(name=name) @@ -69,14 +76,18 @@ class AbstractCFood(object): continue existing = None + print("Looking for \n", identifiable) + if self.use_cache: identifiable_cache = Cache() identifier = Cache.hash_entity(identifiable) + print(identifier) cached_id = identifiable_cache.check_existing(identifier) if cached_id is not None: existing = db.execute_query("FIND {}".format(cached_id), unique=True) + print("Found Entity in cache; Id:", cached_id) # Nothing in cache or cache not used. Check in CaosDB @@ -91,8 +102,13 @@ class AbstractCFood(object): else: entities[key] = existing - if self.use_cache and cached_id is None: - identifiable_cache.insert(identifier, entities[key].id) + print("Got\n", identifiable) + + if self.use_cache: + print("cid", cached_id) + + if self.use_cache and cached_id is None: + identifiable_cache.insert(identifier, entities[key].id) self.update_identifiables(entities, crawled_file, match) @@ -127,8 +143,11 @@ class AbstractCFood(object): @staticmethod def find_existing(entity): query_string = "FIND Record " + entity.get_parents()[0].name - query_string += " with " + "and".join(["'" + p.name + "'='" + p.value + "'" - for p in entity.get_properties()]) + #import IPython + # IPython.embed() + query_string += " with " + " and ".join( + ["'" + p.name + "'='" + + get_value(p) + "'" for p in entity.get_properties()]) print(query_string) q = db.Query(query_string) try: @@ -136,6 +155,11 @@ class AbstractCFood(object): except TransactionError as er: r = None + if r is not None: + print("Found Entity with id:", r.id) + else: + print("Did not find an existing entity") + return r diff --git a/src/caosadvancedtools/read_md_header.py b/src/caosadvancedtools/read_md_header.py index 8d5d3e4fe9f9826c72aaa264dff2d5cfb2fbff2e..78c01b3c0b7529957b759210f1110674b0edeff1 100644 --- a/src/caosadvancedtools/read_md_header.py +++ b/src/caosadvancedtools/read_md_header.py @@ -31,6 +31,8 @@ import re import caosdb as db import yaml +from .cfood import AbstractCFood, get_entity + TEMPLATEHEADER = """ --- title: {title} @@ -210,15 +212,32 @@ def parse_responsibles(header): return people -def reference_data_files(entity, header): - for glob in header["data"]: +def reference_data_files(entity, header, prefix=""): + referenced_files = [] + for glob in get_data_glob(header): + if not glob.startswith("/"): + glob = os.path.normpath(os.path.join(prefix, glob)) for dfile in find_file_for_fileglob(glob): - entity.add_property("data", dfile) + if dfile is not None: + referenced_files.append(dfile) + AbstractCFood.set_property(entity, get_entity("Data"), referenced_files) + + +def get_data_glob(header): + globs = [] + for el in header["data"]: + if isinstance(el, dict) and "filename" in el: + globs.append(el["filename"]) + else: + globs.append(el) + return globs def find_file_for_fileglob(glob): # TODO this probalby needs to be extended for different kinds of glob - res = db.execute_query("FIND file which is stored at {}".format(glob)) + query_string = "FIND file which is stored at {}".format(glob) + print(query_string) + return db.execute_query(query_string) def find_records_referencing_files(glob):