From 68167ebd2525d96714b2b86a75afba47772d9ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Sun, 11 Aug 2019 12:57:28 +0200 Subject: [PATCH] MAINT: do first all the matches and then identifiables --- src/caosadvancedtools/crawler.py | 40 +++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 4d548d21..74c13732 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -69,6 +69,8 @@ class Crawler(object): self.cache = Cache() def crawl(self, files): + cfoods = [] + for crawled_file in sorted(files, key=lambda x: x.path): # if crawled_file.size == 0: # crawled_file.add_message( @@ -92,22 +94,8 @@ class Crawler(object): if self.verbose: print("{} matched.".format(Cfood.__name__)) try: - cfood = Cfood(crawled_file, access=self.access, - verbose=self.verbose) - cfood.create_identifiables() - - if self.use_cache: - hashes = self.cache.update_ids_from_cache( - cfood.identifiables) - - self.find_or_insert_identifiables(cfood.identifiables, - self.verbose) - - if self.use_cache: - self.cache.insert_list(hashes, cfood.identifiables) - - cfood.update_identifiables() - cfood.push_identifiables_to_CaosDB() + cfoods.append(Cfood(crawled_file, access=self.access, + verbose=self.verbose)) except Exception as e: traceback.print_exc() print(e) @@ -118,6 +106,26 @@ class Crawler(object): if self.verbose and matches > 1: print("Attention: More than one matching cfood!") + for cfood in cfoods: + try: + cfood.create_identifiables() + + if self.use_cache: + hashes = self.cache.update_ids_from_cache( + cfood.identifiables) + + self.find_or_insert_identifiables(cfood.identifiables, + self.verbose) + + if self.use_cache: + self.cache.insert_list(hashes, cfood.identifiables) + + cfood.update_identifiables() + cfood.push_identifiables_to_CaosDB() + except Exception as e: + traceback.print_exc() + print(e) + @staticmethod def find_or_insert_identifiables(identifiables, verbose=True): """ Sets the ids of identifiables (that do not have already an id from the -- GitLab