Skip to content
Snippets Groups Projects
Commit 68167ebd authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

MAINT: do first all the matches and then identifiables

parent 4220fc87
No related branches found
No related tags found
No related merge requests found
......@@ -69,6 +69,8 @@ class Crawler(object):
self.cache = Cache()
def crawl(self, files):
cfoods = []
for crawled_file in sorted(files, key=lambda x: x.path):
# if crawled_file.size == 0:
# crawled_file.add_message(
......@@ -92,22 +94,8 @@ class Crawler(object):
if self.verbose:
print("{} matched.".format(Cfood.__name__))
try:
cfood = Cfood(crawled_file, access=self.access,
verbose=self.verbose)
cfood.create_identifiables()
if self.use_cache:
hashes = self.cache.update_ids_from_cache(
cfood.identifiables)
self.find_or_insert_identifiables(cfood.identifiables,
self.verbose)
if self.use_cache:
self.cache.insert_list(hashes, cfood.identifiables)
cfood.update_identifiables()
cfood.push_identifiables_to_CaosDB()
cfoods.append(Cfood(crawled_file, access=self.access,
verbose=self.verbose))
except Exception as e:
traceback.print_exc()
print(e)
......@@ -118,6 +106,26 @@ class Crawler(object):
if self.verbose and matches > 1:
print("Attention: More than one matching cfood!")
for cfood in cfoods:
try:
cfood.create_identifiables()
if self.use_cache:
hashes = self.cache.update_ids_from_cache(
cfood.identifiables)
self.find_or_insert_identifiables(cfood.identifiables,
self.verbose)
if self.use_cache:
self.cache.insert_list(hashes, cfood.identifiables)
cfood.update_identifiables()
cfood.push_identifiables_to_CaosDB()
except Exception as e:
traceback.print_exc()
print(e)
@staticmethod
def find_or_insert_identifiables(identifiables, verbose=True):
""" Sets the ids of identifiables (that do not have already an id from the
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment