From 19043df5508dfcf51b77aca0a3cf856999f82ee0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Mon, 6 Jul 2020 15:58:54 +0000
Subject: [PATCH] Various Enhancements

---
 .docker/Dockerfile                         |  2 +-
 src/caosadvancedtools/crawler.py           | 31 +++++++++++++++++-----
 src/caosadvancedtools/datainconsistency.py | 29 ++++++++++++++++++++
 3 files changed, 54 insertions(+), 8 deletions(-)
 create mode 100644 src/caosadvancedtools/datainconsistency.py

diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index d8c97578..9bbf0f56 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -7,7 +7,7 @@ RUN apt-get update && \
 	 python3-requests \
 	 python3-pandas \
 	 python3-html2text \
-     tox \
+	 tox \
 	 git \
 	 openjdk-11-jdk-headless \
 	 python-autopep8 \
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 0aa95940..48191558 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -51,6 +51,7 @@ from caosdb.exceptions import TransactionError
 
 from .cache import Cache, UpdateCache, get_pretty_xml
 from .cfood import RowCFood, get_ids_for_entities_with_names
+from .datainconsistency import DataInconsistencyError
 from .datamodel_problems import DataModelProblems
 from .guard import RETRIEVE, ProhibitedException
 from .guard import global_guard as guard
@@ -88,7 +89,8 @@ class UnknownCache(object):
 
 class Crawler(object):
     def __init__(self, cfood_types, use_cache=False,
-                 abort_on_exception=True, interactive=True, hideKnown=False):
+                 abort_on_exception=True, interactive=True, hideKnown=False,
+                 debug_file=None, cache_file=None):
         """
         Parameters
         ----------
@@ -102,6 +104,10 @@ class Crawler(object):
         interactive : boolean, optional
                       If true, questions will be posed during execution of the
                       crawl function.
+        debug_file : a file where debug output is saved. The path will be
+                     printed when a critical error occured.
+        cache_file : a file where the cached identifiables are stored. See
+                     cache.py
 
         """
 
@@ -110,11 +116,12 @@ class Crawler(object):
         self.report = db.Container()
         self.use_cache = use_cache
         self.hideKnown = hideKnown
+        self.debug_file = debug_file
         self.abort_on_exception = abort_on_exception
         self.update_cache = UpdateCache()
 
         if self.use_cache:
-            self.cache = Cache()
+            self.cache = Cache(db_file=cache_file)
 
     def iteritems(self):
         """ generates items to be crawled with an index"""
@@ -192,10 +199,12 @@ class Crawler(object):
                         logger.debug("{} matched\n{}.".format(
                                 Cfood.__name__,
                                 item))
+                    except DataInconsistencyError:
+                        pass
                     except Exception as e:
                         logger.debug("Failed during execution of {}!".format(
                             Cfood.__name__))
-                        # traceback.print_exc()
+                        logger.debug(traceback.format_exc())
                         logger.debug(e)
 
                         if self.abort_on_exception:
@@ -287,10 +296,12 @@ class Crawler(object):
 
                 cfood.update_identifiables()
                 self.push_identifiables_to_CaosDB(cfood)
+            except DataInconsistencyError:
+                pass
             except Exception as e:
                 logger.info("Failed during execution of {}!".format(
                     cfood.__class__.__name__))
-                # traceback.print_exc()
+                logger.debug(traceback.format_exc())
                 logger.debug(e)
 
                 if self.abort_on_exception:
@@ -329,8 +340,13 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
             logger.error(err_msg)
             logger.error('Crawler finished with Datamodel Errors')
         elif errors_occured:
-            logger.error("There were fatal errors during execution, please "
-                         "contact the system administrator!")
+            msg = "There were fatal errors during execution, please "
+            "contact the system administrator!"
+
+            if self.debug_file:
+                msg += "\nPlease provide the following path:\n{}".format(
+                    self.debug_file)
+            logger.error(msg)
             logger.error("Crawler terminated with failures!")
             logger.debug(tbs)
         else:
@@ -511,7 +527,8 @@ carefully and if the changes are ok, click on the following link:
 
         for ent in identifiables:
             if ent.id is None or ent.id < 0:
-                logger.debug("Looking for: {}".format(ent))
+                logger.debug("Looking for: {}".format(
+                    ent.id if ent.id is not None else ent.name))
                 existing = Crawler.find_existing(ent)
 
                 if existing is not None:
diff --git a/src/caosadvancedtools/datainconsistency.py b/src/caosadvancedtools/datainconsistency.py
new file mode 100644
index 00000000..3af8b5a2
--- /dev/null
+++ b/src/caosadvancedtools/datainconsistency.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Implements an error to be used when there is a problem with the data to be
+read. I.e. something that users of CaosDB need to fix.
+
+"""
+
+
+class DataInconsistencyError(ValueError):
+    pass
-- 
GitLab