From 5eeb0924f045a83561a05a60d59343715909eedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Mon, 16 Dec 2019 17:46:50 +0100 Subject: [PATCH] ENH: separated collecting information from initialisation --- src/caosadvancedtools/cfood.py | 32 ++++++++++++++++++++++++-------- src/caosadvancedtools/crawler.py | 9 ++++++++- src/caosadvancedtools/utils.py | 7 +------ unittests/test_cfood.py | 8 +++----- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 8526be5d..b7f5518e 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -82,6 +82,19 @@ class AbstractCFood(object): self.identifiables = db.Container() self.verbosity = verbosity self.attached_ones = [] + self.attached_filenames = [] + + def collect_information(self): + """ The CFood collects information for further processing. + + Often CFoods need information from files or even from the database in + order to make processing decision. It is intended that this function is + called after match. Thus match can be used without connecting to the + database. + + To be overwritten by subclasses + """ + pass @staticmethod def get_re(): @@ -175,16 +188,19 @@ class AbstractCFood(object): def looking_for(self, crawled_file): """ - returns True if crawled_file can be added this CFood. - - to be overwritten by subclasses. - Sometimes files belong to the CFood of created by another file. This - function can be used to define what files shall be 'attached'. E.g. the - data from an experiment should always contain a labnotes scan. Then a - subclass CFood for the experiment could be 'looking_for' this scan - using this function. + returns True if crawled_file can be added to this CFood. + + Typically a CFood exists for a file and defines how to deal with the + file. However, sometimes additional files "belong" to a CFood. E.g. an + experiment CFood might match against a README file but labnotes.txt + also shall be treated by the cfood (and not a special cfood created for + labnotes.txt) + This function can be used to define what files shall be 'attached'. """ + if crawled_file in self.attached_filenames: + return True + return False @staticmethod diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 1c559b60..b0b7785b 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -114,6 +114,13 @@ class Crawler(object): errors_occured = True tbs.append(e) + if self.verbosity >= INFO: + print("-"*60) + print("CFoods are collecting information...") + + for cfood in cfoods: + cfood.collect_information() + if self.verbosity >= INFO: print("-"*60) print("Trying to attach files to created CFoods") @@ -149,7 +156,7 @@ class Crawler(object): def crawl(self, files, interactive=True, security_level=RETRIEVE): guard.set_level(level=security_level) - files = sorted(files, key=lambda x: x.path) + files = sorted([f.path for f in files]) cfoods, matches, tbs, errors_occured = self.match(files) diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 843c14ad..9ca503b0 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -82,12 +82,7 @@ def find_file_included_by(glob): query_string = "FIND file which is stored at {}".format(glob) - try: - return db.execute_query(query_string) - except Exception as e: - print(e) - - return [] + return db.execute_query(query_string) def assure_absolute_path_in_glob(glob, prefix): diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index b3af5929..9122856d 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -76,10 +76,8 @@ class InsertionTest(unittest.TestCase): class ExampleTest(unittest.TestCase): def test(self): - dummy_file = db.File(path="/data/rabbit/2019-03-03/README.md") - print(dummy_file.path) - print(ExampleCFood.get_re()) - cf = ExampleCFood(crawled_file=dummy_file) - self.assertIsNotNone(ExampleCFood.match(dummy_file.path)) + path = "/data/rabbit/2019-03-03/README.md" + cf = ExampleCFood(crawled_file=path) + self.assertIsNotNone(ExampleCFood.match(path)) self.assertEqual(cf.match.group('species'), 'rabbit') self.assertEqual(cf.match.group('date'), '2019-03-03') -- GitLab