From 7af972b0ff72038cf91d239af60507b1d0ebb05c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Mon, 16 Dec 2019 11:06:20 +0000
Subject: [PATCH] Separate matching of files from the remaining tasks

---
 src/caosadvancedtools/cfood.py   |  6 ++---
 src/caosadvancedtools/crawler.py | 38 ++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index c7a98594..8526be5d 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -75,9 +75,9 @@ class AbstractCFood(object):
                  A function that takes a CaosDB path and returns a local path
         """
         self.access = access
-        self.crawled_file = crawled_file
-        self.crawled_path = crawled_file.path
-        self.match = type(self).match(crawled_file.path)
+        self.crawled_file = None
+        self.crawled_path = crawled_file
+        self.match = type(self).match(crawled_file)
         self.to_be_updated = db.Container()
         self.identifiables = db.Container()
         self.verbosity = verbosity
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 8a7698b8..1c559b60 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -74,14 +74,11 @@ class Crawler(object):
         if self.use_cache:
             self.cache = Cache()
 
-    def crawl(self, files, interactive=True, security_level=RETRIEVE):
+    def match(self, files):
         errors_occured = False
         tbs = []
-        guard.set_level(level=security_level)
-
-        files = sorted(files, key=lambda x: x.path)
         cfoods = []
-        matches = {f.path: 0 for f in files}
+        matches = {f: 0 for f in files}
 
         if self.verbosity >= INFO:
             print("-"*60)
@@ -93,15 +90,15 @@ class Crawler(object):
 
             for crawled_file in files:
                 if self.verbosity >= DEBUG:
-                    print("Matching {}...".format(crawled_file.path))
+                    print("Matching {}...".format(crawled_file))
 
-                if Cfood.match(crawled_file.path) is not None:
-                    matches[crawled_file.path] += 1
+                if Cfood.match(crawled_file) is not None:
+                    matches[crawled_file] += 1
 
                     if self.verbosity >= VERBOSE:
                         print("{} matched\n{}.".format(
                             Cfood.__class__.__name__,
-                            crawled_file.path))
+                            crawled_file))
                     try:
                         cfood = Cfood.cook(crawled_file, access=self.access,
                                            verbosity=self.verbosity)
@@ -127,25 +124,34 @@ class Crawler(object):
 
             for crawled_file in files:
                 if self.verbosity >= DEBUG:
-                    print("Matching {}...".format(crawled_file.path))
+                    print("Matching {}...".format(crawled_file))
 
                 if cfood.looking_for(crawled_file):
                     if self.verbosity >= VERBOSE:
                         print("{} matched\n{}.".format(
                             Cfood.__class__.__name__,
-                            crawled_file.path))
+                            crawled_file))
                     cfood.attach(crawled_file)
-                    matches[crawled_file.path] += 1
+                    matches[crawled_file] += 1
 
         if self.verbosity >= INFO:
             for crawled_file in files:
-                if matches[crawled_file.path] == 0:
+                if matches[crawled_file] == 0:
                     print("ATTENTION: No matching cfood!")
-                    print("Tried to match {}".format(crawled_file.path))
+                    print("Tried to match {}".format(crawled_file))
 
-                if matches[crawled_file.path] > 1:
+                if matches[crawled_file] > 1:
                     print("Attention: More than one matching cfood!")
-                    print("Tried to match {}".format(crawled_file.path))
+                    print("Tried to match {}".format(crawled_file))
+
+        return cfoods, matches, tbs, errors_occured
+
+    def crawl(self, files, interactive=True, security_level=RETRIEVE):
+        guard.set_level(level=security_level)
+
+        files = sorted(files, key=lambda x: x.path)
+
+        cfoods, matches, tbs, errors_occured = self.match(files)
 
         if interactive and "y" != input("Do you want to continue? (y)"):
             return
-- 
GitLab