From 9556acccc8a864f2becab1c773969c196120e17e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Fri, 10 Jul 2020 14:15:38 +0000
Subject: [PATCH] ENH: Don't let the crawler query all files

---
 src/caosadvancedtools/cfood.py   | 17 +++++++++++++++--
 src/caosadvancedtools/crawler.py |  3 ++-
 unittests/test_cfood.py          | 17 +++++++++++++----
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 89d879f8..3ec1b6ed 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -51,6 +51,7 @@ ENTITIES = {}
 PROPERTIES = {}
 RECORDS = {}
 RECORDTYPES = {}
+FILES = {}
 
 logger = logging.getLogger(__name__)
 
@@ -244,11 +245,20 @@ class AbstractCFood(object, metaclass=ABCMeta):
             entity.add_property(prop, value)
 
 
+def add_files(filemap):
+    """add to the file cache"""
+    FILES.update(filemap)
+
+
 def get_entity_for_path(path):
+    if path in FILES:
+        return FILES[path]
     try:
         q = "FIND FILE WHICH IS STORED AT '{}'".format(path)
+        logger.debug(q)
+        FILES[path] = db.execute_query(q, unique=True)
 
-        return db.execute_query(q, unique=True)
+        return FILES[path]
     except EntityDoesNotExistError:
         path_prefix = "**"
 
@@ -257,7 +267,9 @@ def get_entity_for_path(path):
         q = "FIND FILE WHICH IS STORED AT '{}{}'".format(path_prefix, path)
         logger.debug(q)
 
-        return db.execute_query(q, unique=True)
+        FILES[path] = db.execute_query(q, unique=True)
+
+        return FILES[path]
 
 
 class AbstractFileCFood(AbstractCFood):
@@ -488,6 +500,7 @@ def assure_has_property(entity, name, value, to_be_updated=None,
 
             else:
                 to_be_updated.append(entity)
+
             return
 
     properties = entity.get_properties()
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 84a63569..df675042 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -50,7 +50,7 @@ import caosdb as db
 from caosdb.exceptions import TransactionError
 
 from .cache import Cache, UpdateCache, get_pretty_xml
-from .cfood import RowCFood, get_ids_for_entities_with_names
+from .cfood import RowCFood, add_files, get_ids_for_entities_with_names
 from .datainconsistency import DataInconsistencyError
 from .datamodel_problems import DataModelProblems
 from .guard import RETRIEVE, ProhibitedException
@@ -611,6 +611,7 @@ class FileCrawler(Crawler):
         """
         super().__init__(**kwargs)
         self.files = files
+        add_files({fi.path: fi for fi in files})
 
     def iteritems(self):
         for idx, p in enumerate(sorted([f.path for f in self.files])):
diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py
index f51d515a..802497a8 100644
--- a/unittests/test_cfood.py
+++ b/unittests/test_cfood.py
@@ -25,10 +25,11 @@ import re
 import unittest
 
 import caosdb as db
-from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood,
-                                     CMeal, assure_has_parent,
-                                     assure_has_property,
-                                     assure_object_is_in_list)
+from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood, CMeal,
+                                     assure_has_parent, assure_has_property,
+                                     assure_object_is_in_list,
+                                     get_entity_for_path)
+from caosadvancedtools.crawler import FileCrawler
 from caosadvancedtools.example_cfood import ExampleCFood
 
 PATTERN = "h.*"
@@ -143,6 +144,7 @@ class InsertionTest(unittest.TestCase):
             "Test bool": ("BOOLEAN", True),
             "Test string": ("STRING", "bla")
         }
+
         for name, ty_val in types_and_values.items():
             entity.add_property(name=name, datatype=ty_val[0],
                                 value=ty_val[1])
@@ -183,3 +185,10 @@ class MealTest(unittest.TestCase):
         self.assertTrue(c.looking_for("/this/other"))
         # class should still match other prefixes
         self.assertTrue(ExampleCFoodMeal.match_item("/that/file"))
+
+
+class FileCacheTest(unittest.TestCase):
+    def test(self):
+        self.assertRaises(Exception, get_entity_for_path, "/lol")
+        FileCrawler(cfood_types=[], files=[db.File(path="/lol")])
+        get_entity_for_path("/lol")
-- 
GitLab