From 23445e6eb3e8a7900e355997f6221f9e2b2eabea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Mon, 12 Aug 2019 13:06:31 +0200
Subject: [PATCH] ENH: output can now be controlled by verbosity more
 specifically

---
 src/caosadvancedtools/cfood.py   | 53 ++++++++++++------
 src/caosadvancedtools/crawler.py | 92 +++++++++++++++++++++-----------
 2 files changed, 97 insertions(+), 48 deletions(-)

diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 2afe4b30..e3ef4dec 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -38,6 +38,8 @@ import re
 
 import caosdb as db
 
+from .verbosity import INFO, VERBOSE
+
 ENTITIES = {}
 
 
@@ -60,7 +62,7 @@ class AbstractCFood(object):
     # function match()
     _pattern = None
 
-    def __init__(self, crawled_file, access=lambda x: x, verbose=True):
+    def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO):
         """ Abstract base class for Crawler food (CFood).
 
         Parameters
@@ -77,7 +79,7 @@ class AbstractCFood(object):
         self.match = type(self).match(crawled_file.path)
         self.to_be_updated = db.Container()
         self.identifiables = db.Container()
-        self.verbose = verbose
+        self.verbosity = verbosity
         self.attached_ones = []
 
     @staticmethod
@@ -127,7 +129,16 @@ class AbstractCFood(object):
 
         if len(self.to_be_updated) == 0:
             return
+
         get_ids_for_entities_with_names(self.to_be_updated)
+
+        if self.verbosity >= INFO:
+            print("/"*60)
+            print("UPDATE: updating the following entities")
+
+            for el in self.to_be_updated:
+                print(el.name if el.name is not None else el.id)
+            print("/"*60)
         self.to_be_updated.update()
 
     def attach(self, crawled_file):
@@ -173,7 +184,7 @@ class AbstractCFood(object):
 
 
 def assure_object_is_in_list(obj, containing_object, property_name,
-                             to_be_updated, verbose=True):
+                             to_be_updated, verbosity=INFO):
     """
     Checks whether `obj` is one of the values in the list property
     `property_name` of the supplied entity  containing_object`.
@@ -212,15 +223,17 @@ def assure_object_is_in_list(obj, containing_object, property_name,
 
                 break
 
-        if verbose:
-            if contained:
+        if contained:
+            if verbosity >= VERBOSE:
                 print("{} is in {} of entity {}".format(
                     o, property_name, containing_object.id))
-            else:
-                print("Appending {} to {} of entity {}".format(
-                    o, property_name, containing_object.id))
 
-        if not contained:
+        else:
+            if verbosity >= INFO:
+                print("/"*60)
+                print("UPDATE: Appending {} to {} of entity {}".format(
+                    o, property_name, containing_object.id))
+                print("/"*60)
             current_list.append(o)
             update = True
 
@@ -228,7 +241,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
         to_be_updated.append(containing_object)
 
 
-def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
+def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO):
     """
     Checks whether `entity` has a parent with name `parent`.
 
@@ -247,13 +260,16 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
             break
 
     if contained:
-        if verbose:
+        if verbosity >= VERBOSE:
             print("entity {} has parent {}".format(entity.id, parent))
 
         return
 
-    if verbose:
-        print("Adding  parent {} to entity {}".format(parent, entity.id))
+    if verbosity >= INFO:
+        print("/"*60)
+        print("UPDATE: Adding  parent {} to entity {}".format(parent,
+                                                              entity.id))
+        print("/"*60)
     entity.add_parent(parent)
 
     if to_be_updated is None:
@@ -262,7 +278,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
         to_be_updated.append(entity)
 
 
-def assure_has_property(entity, name, value, to_be_updated=None, verbose=True):
+def assure_has_property(entity, name, value, to_be_updated=None,
+                        verbosity=INFO):
     """
     Checks whether `entity` has a property `name` with the value `value`.
 
@@ -282,17 +299,19 @@ def assure_has_property(entity, name, value, to_be_updated=None, verbose=True):
             break
 
     if contained:
-        if verbose:
+        if verbosity >= VERBOSE:
             print("entity {} has property  {} with value {}".format(
                 entity.id,
                 name, value))
 
         return
 
-    if verbose:
-        print("Adding  property {} with value {} to entity {}".format(
+    if verbosity >= INFO:
+        print("/"*60)
+        print("UPDATE: Adding  property {} with value {} to entity {}".format(
             name,
             value, entity.id))
+        print("/"*60)
     entity.add_property(name=name, value=value)
 
     if to_be_updated is None:
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 2cc85ec7..04b941d9 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -42,10 +42,12 @@ import caosdb as db
 from caosdb.exceptions import TransactionError
 
 from .cache import Cache
+from .verbosity import DEBUG, INFO, VERBOSE
 
 
 class Crawler(object):
-    def __init__(self, food, access=lambda x: x, use_cache=False, verbose=True):
+    def __init__(self, food, access=lambda x: x, use_cache=False,
+                 verbosity=INFO):
         """
         Parameters
         ----------
@@ -63,15 +65,23 @@ class Crawler(object):
         self.access = access
         self.report = db.Container()
         self.use_cache = use_cache
-        self.verbose = verbose
+        self.verbosity = verbosity
 
         if self.use_cache:
             self.cache = Cache()
 
     def crawl(self, files):
+        files = sorted(files, key=lambda x: x.path)
         cfoods = []
+        matches = {f.path: 0 for f in files}
 
-        for crawled_file in sorted(files, key=lambda x: x.path):
+        if self.verbosity >= INFO:
+            print("-"*60)
+            print("Matching files against CFoods")
+
+        for crawled_file in files:
+            if self.verbosity >= VERBOSE:
+                print("Matching {}...".format(crawled_file.path))
             # if crawled_file.size == 0:
             #    crawled_file.add_message(
             #        type="Warning", description="This file is empty. Shouldn't we delete it?")
@@ -79,37 +89,47 @@ class Crawler(object):
 
             #    continue
 
-            if self.verbose:
-                msg = "Matching {}".format(crawled_file.path)
-                print("="*len(msg))
-                print(msg)
-                print("="*len(msg))
-
-            matches = 0
-
             for Cfood in self.food:
                 if Cfood.match(crawled_file.path) is not None:
-                    matches += 1
+                    matches[crawled_file.path] += 1
 
-                    if self.verbose:
+                    if self.verbosity >= VERBOSE:
                         print("{} matched.".format(Cfood.__name__))
                     try:
                         cfoods.append(Cfood(crawled_file, access=self.access,
-                                            verbose=self.verbose))
+                                            verbosity=self.verbosity))
                     except Exception as e:
                         traceback.print_exc()
                         print(e)
 
-            if self.verbose and matches == 0:
-                print("ATTENTION: No matching cfood!")
+        if self.verbosity >= INFO:
+            print("-"*60)
+            print("Trying to attach files to created CFoods")
 
-            if self.verbose and matches > 1:
-                print("Attention: More than one matching cfood!")
+        for crawled_file in files:
+            if self.verbosity >= VERBOSE:
+                print("Matching {}...".format(crawled_file.path))
 
-        for crawled_file in sorted(files, key=lambda x: x.path):
             for cfood in cfoods:
                 if cfood.looking_for(crawled_file):
+                    if self.verbosity >= VERBOSE:
+                        print("{} matched.".format(cfood.__class__.__name__))
                     cfood.attach(crawled_file)
+                    matches[crawled_file.path] += 1
+
+        if self.verbosity >= INFO:
+            for crawled_file in files:
+                if matches[crawled_file.path] == 0:
+                    print("ATTENTION: No matching cfood!")
+                    print("Tried to match {}".format(crawled_file.path))
+
+                if matches[crawled_file.path] > 1:
+                    print("Attention: More than one matching cfood!")
+                    print("Tried to match {}".format(crawled_file.path))
+
+        if self.verbosity >= INFO:
+            print("-"*60)
+            print("Creating and updating Identifiables")
 
         for cfood in cfoods:
             try:
@@ -120,7 +140,7 @@ class Crawler(object):
                         cfood.identifiables)
 
                 self.find_or_insert_identifiables(cfood.identifiables,
-                                                  self.verbose)
+                                                  self.verbosity)
 
                 if self.use_cache:
                     self.cache.insert_list(hashes, cfood.identifiables)
@@ -131,8 +151,13 @@ class Crawler(object):
                 traceback.print_exc()
                 print(e)
 
+        if self.verbosity >= INFO:
+            print("-"*60)
+            print("Crawler terminated successfully!")
+            print("-"*60)
+
     @staticmethod
-    def find_or_insert_identifiables(identifiables, verbose=True):
+    def find_or_insert_identifiables(identifiables, verbosity=INFO):
         """ Sets the ids of identifiables (that do not have already an id from the
         cache) based on searching CaosDB and retrieves those entities.
         The remaining entities (those which can not be retrieved) have no
@@ -141,20 +166,20 @@ class Crawler(object):
         # looking for matching entities in CaosDB when there is no valid id
         # i.e. there was none set from a cache
 
-        if verbose:
+        if verbosity >= VERBOSE:
             print("-----------------------------------------------------")
 
         for ent in identifiables:
             if ent.id is None or ent.id < 0:
-                if verbose:
+                if verbosity >= VERBOSE:
                     print("Looking for:")
                     print(ent)
-                existing = Crawler.find_existing(ent)
+                existing = Crawler.find_existing(ent, verbosity=verbosity)
 
                 if existing is not None:
                     ent.id = existing.id
             else:
-                if verbose:
+                if verbosity >= DEBUG:
                     print("Id is known of:")
                     print(ent)
 
@@ -167,22 +192,26 @@ class Crawler(object):
         for ent in missing_identifiables:
             ent.id = None
 
-        if verbose:
+        if verbosity >= INFO and len(missing_identifiables) > 0:
             print("Going to insert the following entities:")
 
             for ent in missing_identifiables:
                 print(ent)
+
+        if verbosity >= VERBOSE and len(missing_identifiables) == 0:
+            print("No new entities to be inserted.")
+
         missing_identifiables.insert()
 
-        if verbose:
-            print("Updating entities from CaosDB...")
+        if verbosity >= VERBOSE:
+            print("Retrieving entities from CaosDB...")
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
 
-        if verbose:
+        if verbosity >= VERBOSE:
             print("-----------------------------------------------------")
 
     @staticmethod
-    def find_existing(entity):
+    def find_existing(entity, verbosity=INFO):
         """searches for an entity that matches the identifiable in CaosDB
 
         Characteristics of the identifiable like, properties, name or id are
@@ -205,7 +234,8 @@ class Crawler(object):
         else:
             query_string = "FIND '{}'".format(entity.name)
 
-        print(query_string)
+        if verbosity >= VERBOSE:
+            print(query_string)
         q = db.Query(query_string)
         # the identifiable should identify an object uniquely. Thus the query
         # is using the unique keyword
-- 
GitLab