diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 8526be5d800ef3c0fea13d2fd80200e4e989ba55..b7f5518ed1a566390bbd06f1fb1c6b82b5f002b7 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -82,6 +82,19 @@ class AbstractCFood(object): self.identifiables = db.Container() self.verbosity = verbosity self.attached_ones = [] + self.attached_filenames = [] + + def collect_information(self): + """ The CFood collects information for further processing. + + Often CFoods need information from files or even from the database in + order to make processing decision. It is intended that this function is + called after match. Thus match can be used without connecting to the + database. + + To be overwritten by subclasses + """ + pass @staticmethod def get_re(): @@ -175,16 +188,19 @@ class AbstractCFood(object): def looking_for(self, crawled_file): """ - returns True if crawled_file can be added this CFood. - - to be overwritten by subclasses. - Sometimes files belong to the CFood of created by another file. This - function can be used to define what files shall be 'attached'. E.g. the - data from an experiment should always contain a labnotes scan. Then a - subclass CFood for the experiment could be 'looking_for' this scan - using this function. + returns True if crawled_file can be added to this CFood. + + Typically a CFood exists for a file and defines how to deal with the + file. However, sometimes additional files "belong" to a CFood. E.g. an + experiment CFood might match against a README file but labnotes.txt + also shall be treated by the cfood (and not a special cfood created for + labnotes.txt) + This function can be used to define what files shall be 'attached'. """ + if crawled_file in self.attached_filenames: + return True + return False @staticmethod diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 1c559b60181856a07b4a75b9705aa4e50813aff6..b0b7785b640933136eedcf9689c79451f7ea4d83 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -114,6 +114,13 @@ class Crawler(object): errors_occured = True tbs.append(e) + if self.verbosity >= INFO: + print("-"*60) + print("CFoods are collecting information...") + + for cfood in cfoods: + cfood.collect_information() + if self.verbosity >= INFO: print("-"*60) print("Trying to attach files to created CFoods") @@ -149,7 +156,7 @@ class Crawler(object): def crawl(self, files, interactive=True, security_level=RETRIEVE): guard.set_level(level=security_level) - files = sorted(files, key=lambda x: x.path) + files = sorted([f.path for f in files]) cfoods, matches, tbs, errors_occured = self.match(files) diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 843c14ad2f5c9072829c455e7fb955ea6f726d52..9ca503b0f551bbe05911302c0d90b0cc48445d74 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -82,12 +82,7 @@ def find_file_included_by(glob): query_string = "FIND file which is stored at {}".format(glob) - try: - return db.execute_query(query_string) - except Exception as e: - print(e) - - return [] + return db.execute_query(query_string) def assure_absolute_path_in_glob(glob, prefix): diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index b3af59297f9ce4198789f1c7540fb08e2976eb1c..9122856dcdee795c4ec5cd14ee1591df3dbff3af 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -76,10 +76,8 @@ class InsertionTest(unittest.TestCase): class ExampleTest(unittest.TestCase): def test(self): - dummy_file = db.File(path="/data/rabbit/2019-03-03/README.md") - print(dummy_file.path) - print(ExampleCFood.get_re()) - cf = ExampleCFood(crawled_file=dummy_file) - self.assertIsNotNone(ExampleCFood.match(dummy_file.path)) + path = "/data/rabbit/2019-03-03/README.md" + cf = ExampleCFood(crawled_file=path) + self.assertIsNotNone(ExampleCFood.match(path)) self.assertEqual(cf.match.group('species'), 'rabbit') self.assertEqual(cf.match.group('date'), '2019-03-03')