diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 4ee23ca6d04b1eec51aecd9b3b0b45c2d5fa4b76..50361883b20acdd2d0ff3cfae2b7d189fa8b1229 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -7,9 +7,9 @@ RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-pylib.git && \ cd caosdb-pylib && pip3 install . RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-models.git && \ cd caosdb-models && pip3 install . -ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \ +ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/project_cfood \ scifolder_version.json -RUN git clone -b master \ +RUN git clone -b project_cfood \ https://gitlab.com/henrik_indiscale/scifolder.git && \ cd scifolder && pip3 install . COPY . /git diff --git a/integrationtests/full_test/crawl.py b/integrationtests/full_test/crawl.py index d5f31789bc2f6760699491345cd53324fa56146e..d60e920f47a65db53e4b2b737cafa0d9aed29a95 100755 --- a/integrationtests/full_test/crawl.py +++ b/integrationtests/full_test/crawl.py @@ -30,7 +30,7 @@ import caosdb as db from caosadvancedtools.crawler import Crawler from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood) + SimulationCFood, ProjectCFood) def get_parser(): @@ -55,6 +55,6 @@ if __name__ == "__main__": print("Query done...") config = db.configuration.get_config() c = Crawler(use_cache=True, access=access, - food=[AnalysisCFood, ExperimentCFood, + food=[ProjectCFood, AnalysisCFood, ExperimentCFood, PublicationCFood, SimulationCFood, ]) c.crawl(files) diff --git a/integrationtests/full_test/filldb.sh b/integrationtests/full_test/filldb.sh index 906e499c33f2607135713500b3e36cb0d2930dcf..da1abc7076b8f65dfc41277fd2b41dd9a5f65996 100755 --- a/integrationtests/full_test/filldb.sh +++ b/integrationtests/full_test/filldb.sh @@ -6,5 +6,4 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/DataAnalysis python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 insert_model.py -python3 insert_record.py python3 crawl.py / diff --git a/integrationtests/full_test/insert_record.py b/integrationtests/full_test/insert_record.py deleted file mode 100644 index 5927b05813ffa85d2dff3045f98e7624acfb0f94..0000000000000000000000000000000000000000 --- a/integrationtests/full_test/insert_record.py +++ /dev/null @@ -1,5 +0,0 @@ -import caosdb as db - -r = db.Record(name="TestProject") -r.add_parent(name="Project") -r.insert() diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index ac830012e950b756df0eafce5703eaf90040e126..5a52b9588e92af5e0e5c68dbc5efa6903b3376c7 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -66,21 +66,21 @@ class AbstractCFood(object): # function match() _pattern = None - def __init__(self, match, access=lambda x: x): + def __init__(self, crawled_file, access=lambda x: x): """ Abstract base class for Crawler food (CFood). Parameters ---------- - match : match object of a regular expression match - the result from matching a path against the pattern of this - class + crawled_file : The file that the crawler is currently matching. Its + path should match against the pattern of this class access : callable, optional A function that takes a CaosDB path and returns a local path """ self.access = access - self.crawled_file = match.string - self.match = match + self.crawled_file = crawled_file + self.crawled_path = crawled_file.path + self.match = type(self).match(crawled_file.path) @staticmethod def get_re(): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 783767247221e9ede556868a1941c5bea00328ba..2fe94484e7e4dc6e28cca18db359da188efba5f4 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -75,22 +75,23 @@ class Crawler(object): # continue for Cfood in self.food: - match = Cfood.match(crawled_file.path) + if Cfood.match(crawled_file.path) is not None: + try: + cfood = Cfood(crawled_file, access=self.access) + identifiables = cfood.create_identifiables() - if match is not None: - cfood = Cfood(match, access=self.access) - identifiables = cfood.create_identifiables() + if self.use_cache: + hashes = self.cache.update_ids_from_cache( + identifiables) - if self.use_cache: - hashes = self.cache.update_ids_from_cache( - identifiables) + self.find_or_insert_identifiables(identifiables) - self.find_or_insert_identifiables(identifiables) + if self.use_cache: + self.cache.insert_list(hashes, identifiables) - if self.use_cache: - self.cache.insert_list(hashes, identifiables) - - cfood.update_identifiables() + cfood.update_identifiables() + except Exception as e: + print(e) @staticmethod def find_or_insert_identifiables(identifiables): @@ -116,6 +117,11 @@ class Crawler(object): missing_identifiables = db.Container() missing_identifiables.extend([ent for ent in identifiables if ent.id is None or ent.id < 0]) + # TODO the following should not be necessary. Fix it + + for ent in missing_identifiables: + ent.id = None + missing_identifiables.insert() identifiables.retrieve(unique=True, raise_exception_on_error=False) diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 64f828f94a09ef22daef3606490580e7fb8f0c52..91143871c0617021feec79c8419fcaeae04da127 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -81,9 +81,12 @@ def find_file_included_by(glob): """ query_string = "FIND file which is stored at {}".format(glob) - print(query_string) - return db.execute_query(query_string) + try: + return db.execute_query(query_string) + except Exception as e: + print(e) + return [] def assure_absolute_path_in_glob(glob, prefix):