diff --git a/.docker/Dockerfile b/.docker/Dockerfile index d8c975783da47cc975fd071dfef604223f4e4fe5..9bbf0f56556e1a70f3971aedd3c8ee01e481afe2 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get update && \ python3-requests \ python3-pandas \ python3-html2text \ - tox \ + tox \ git \ openjdk-11-jdk-headless \ python-autopep8 \ diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 0aa959405c0cc1dfe1b7701dc99a72a4b040b7f2..48191558100e62cd29ddff3d16afc128e5b3a240 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -51,6 +51,7 @@ from caosdb.exceptions import TransactionError from .cache import Cache, UpdateCache, get_pretty_xml from .cfood import RowCFood, get_ids_for_entities_with_names +from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard @@ -88,7 +89,8 @@ class UnknownCache(object): class Crawler(object): def __init__(self, cfood_types, use_cache=False, - abort_on_exception=True, interactive=True, hideKnown=False): + abort_on_exception=True, interactive=True, hideKnown=False, + debug_file=None, cache_file=None): """ Parameters ---------- @@ -102,6 +104,10 @@ class Crawler(object): interactive : boolean, optional If true, questions will be posed during execution of the crawl function. + debug_file : a file where debug output is saved. The path will be + printed when a critical error occured. + cache_file : a file where the cached identifiables are stored. See + cache.py """ @@ -110,11 +116,12 @@ class Crawler(object): self.report = db.Container() self.use_cache = use_cache self.hideKnown = hideKnown + self.debug_file = debug_file self.abort_on_exception = abort_on_exception self.update_cache = UpdateCache() if self.use_cache: - self.cache = Cache() + self.cache = Cache(db_file=cache_file) def iteritems(self): """ generates items to be crawled with an index""" @@ -192,10 +199,12 @@ class Crawler(object): logger.debug("{} matched\n{}.".format( Cfood.__name__, item)) + except DataInconsistencyError: + pass except Exception as e: logger.debug("Failed during execution of {}!".format( Cfood.__name__)) - # traceback.print_exc() + logger.debug(traceback.format_exc()) logger.debug(e) if self.abort_on_exception: @@ -287,10 +296,12 @@ class Crawler(object): cfood.update_identifiables() self.push_identifiables_to_CaosDB(cfood) + except DataInconsistencyError: + pass except Exception as e: logger.info("Failed during execution of {}!".format( cfood.__class__.__name__)) - # traceback.print_exc() + logger.debug(traceback.format_exc()) logger.debug(e) if self.abort_on_exception: @@ -329,8 +340,13 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) logger.error(err_msg) logger.error('Crawler finished with Datamodel Errors') elif errors_occured: - logger.error("There were fatal errors during execution, please " - "contact the system administrator!") + msg = "There were fatal errors during execution, please " + "contact the system administrator!" + + if self.debug_file: + msg += "\nPlease provide the following path:\n{}".format( + self.debug_file) + logger.error(msg) logger.error("Crawler terminated with failures!") logger.debug(tbs) else: @@ -511,7 +527,8 @@ carefully and if the changes are ok, click on the following link: for ent in identifiables: if ent.id is None or ent.id < 0: - logger.debug("Looking for: {}".format(ent)) + logger.debug("Looking for: {}".format( + ent.id if ent.id is not None else ent.name)) existing = Crawler.find_existing(ent) if existing is not None: diff --git a/src/caosadvancedtools/datainconsistency.py b/src/caosadvancedtools/datainconsistency.py new file mode 100644 index 0000000000000000000000000000000000000000..3af8b5a2c0b73746185306a94b987a0800acb899 --- /dev/null +++ b/src/caosadvancedtools/datainconsistency.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Implements an error to be used when there is a problem with the data to be +read. I.e. something that users of CaosDB need to fix. + +""" + + +class DataInconsistencyError(ValueError): + pass