diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 4d548d213d70ca63f0f5ef963da7ae8435b9e883..74c13732a0e266e29a7e6b1b3747833af7844f59 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -69,6 +69,8 @@ class Crawler(object): self.cache = Cache() def crawl(self, files): + cfoods = [] + for crawled_file in sorted(files, key=lambda x: x.path): # if crawled_file.size == 0: # crawled_file.add_message( @@ -92,22 +94,8 @@ class Crawler(object): if self.verbose: print("{} matched.".format(Cfood.__name__)) try: - cfood = Cfood(crawled_file, access=self.access, - verbose=self.verbose) - cfood.create_identifiables() - - if self.use_cache: - hashes = self.cache.update_ids_from_cache( - cfood.identifiables) - - self.find_or_insert_identifiables(cfood.identifiables, - self.verbose) - - if self.use_cache: - self.cache.insert_list(hashes, cfood.identifiables) - - cfood.update_identifiables() - cfood.push_identifiables_to_CaosDB() + cfoods.append(Cfood(crawled_file, access=self.access, + verbose=self.verbose)) except Exception as e: traceback.print_exc() print(e) @@ -118,6 +106,26 @@ class Crawler(object): if self.verbose and matches > 1: print("Attention: More than one matching cfood!") + for cfood in cfoods: + try: + cfood.create_identifiables() + + if self.use_cache: + hashes = self.cache.update_ids_from_cache( + cfood.identifiables) + + self.find_or_insert_identifiables(cfood.identifiables, + self.verbose) + + if self.use_cache: + self.cache.insert_list(hashes, cfood.identifiables) + + cfood.update_identifiables() + cfood.push_identifiables_to_CaosDB() + except Exception as e: + traceback.print_exc() + print(e) + @staticmethod def find_or_insert_identifiables(identifiables, verbose=True): """ Sets the ids of identifiables (that do not have already an id from the