From 54d2b7a9538d30ae0fc72e92525f1412331a949e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Sun, 4 Aug 2019 14:25:04 +0200 Subject: [PATCH] ENH: new cfood for projects, new tests Caution: Cfood do no longer take the match on creation but the CaosDB File --- .docker/Dockerfile | 4 +-- integrationtests/full_test/crawl.py | 4 +-- integrationtests/full_test/filldb.sh | 1 - integrationtests/full_test/insert_record.py | 5 ---- src/caosadvancedtools/cfood.py | 12 ++++----- src/caosadvancedtools/crawler.py | 30 ++++++++++++--------- src/caosadvancedtools/utils.py | 7 +++-- 7 files changed, 33 insertions(+), 30 deletions(-) delete mode 100644 integrationtests/full_test/insert_record.py diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 4ee23ca6..50361883 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -7,9 +7,9 @@ RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-pylib.git && \ cd caosdb-pylib && pip3 install . RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-models.git && \ cd caosdb-models && pip3 install . -ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \ +ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/project_cfood \ scifolder_version.json -RUN git clone -b master \ +RUN git clone -b project_cfood \ https://gitlab.com/henrik_indiscale/scifolder.git && \ cd scifolder && pip3 install . COPY . /git diff --git a/integrationtests/full_test/crawl.py b/integrationtests/full_test/crawl.py index d5f31789..d60e920f 100755 --- a/integrationtests/full_test/crawl.py +++ b/integrationtests/full_test/crawl.py @@ -30,7 +30,7 @@ import caosdb as db from caosadvancedtools.crawler import Crawler from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood) + SimulationCFood, ProjectCFood) def get_parser(): @@ -55,6 +55,6 @@ if __name__ == "__main__": print("Query done...") config = db.configuration.get_config() c = Crawler(use_cache=True, access=access, - food=[AnalysisCFood, ExperimentCFood, + food=[ProjectCFood, AnalysisCFood, ExperimentCFood, PublicationCFood, SimulationCFood, ]) c.crawl(files) diff --git a/integrationtests/full_test/filldb.sh b/integrationtests/full_test/filldb.sh index 906e499c..da1abc70 100755 --- a/integrationtests/full_test/filldb.sh +++ b/integrationtests/full_test/filldb.sh @@ -6,5 +6,4 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/DataAnalysis python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 insert_model.py -python3 insert_record.py python3 crawl.py / diff --git a/integrationtests/full_test/insert_record.py b/integrationtests/full_test/insert_record.py deleted file mode 100644 index 5927b058..00000000 --- a/integrationtests/full_test/insert_record.py +++ /dev/null @@ -1,5 +0,0 @@ -import caosdb as db - -r = db.Record(name="TestProject") -r.add_parent(name="Project") -r.insert() diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index ac830012..5a52b958 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -66,21 +66,21 @@ class AbstractCFood(object): # function match() _pattern = None - def __init__(self, match, access=lambda x: x): + def __init__(self, crawled_file, access=lambda x: x): """ Abstract base class for Crawler food (CFood). Parameters ---------- - match : match object of a regular expression match - the result from matching a path against the pattern of this - class + crawled_file : The file that the crawler is currently matching. Its + path should match against the pattern of this class access : callable, optional A function that takes a CaosDB path and returns a local path """ self.access = access - self.crawled_file = match.string - self.match = match + self.crawled_file = crawled_file + self.crawled_path = crawled_file.path + self.match = type(self).match(crawled_file.path) @staticmethod def get_re(): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 78376724..2fe94484 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -75,22 +75,23 @@ class Crawler(object): # continue for Cfood in self.food: - match = Cfood.match(crawled_file.path) + if Cfood.match(crawled_file.path) is not None: + try: + cfood = Cfood(crawled_file, access=self.access) + identifiables = cfood.create_identifiables() - if match is not None: - cfood = Cfood(match, access=self.access) - identifiables = cfood.create_identifiables() + if self.use_cache: + hashes = self.cache.update_ids_from_cache( + identifiables) - if self.use_cache: - hashes = self.cache.update_ids_from_cache( - identifiables) + self.find_or_insert_identifiables(identifiables) - self.find_or_insert_identifiables(identifiables) + if self.use_cache: + self.cache.insert_list(hashes, identifiables) - if self.use_cache: - self.cache.insert_list(hashes, identifiables) - - cfood.update_identifiables() + cfood.update_identifiables() + except Exception as e: + print(e) @staticmethod def find_or_insert_identifiables(identifiables): @@ -116,6 +117,11 @@ class Crawler(object): missing_identifiables = db.Container() missing_identifiables.extend([ent for ent in identifiables if ent.id is None or ent.id < 0]) + # TODO the following should not be necessary. Fix it + + for ent in missing_identifiables: + ent.id = None + missing_identifiables.insert() identifiables.retrieve(unique=True, raise_exception_on_error=False) diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 64f828f9..91143871 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -81,9 +81,12 @@ def find_file_included_by(glob): """ query_string = "FIND file which is stored at {}".format(glob) - print(query_string) - return db.execute_query(query_string) + try: + return db.execute_query(query_string) + except Exception as e: + print(e) + return [] def assure_absolute_path_in_glob(glob, prefix): -- GitLab