From 50fdfcef2061705966acbe26f7645a36bceb77af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Mon, 3 Aug 2020 14:54:23 +0000 Subject: [PATCH] ENH: Add function for logging setup and shared resource --- .gitignore | 1 + CHANGELOG.md | 3 + integrationtests/crawl.py | 4 +- integrationtests/test.sh | 6 +- src/caosadvancedtools/__init__.py | 6 -- src/caosadvancedtools/cfood.py | 2 +- src/caosadvancedtools/crawler.py | 12 +++- src/caosadvancedtools/serverside/helper.py | 36 ++++++++++++ src/caosadvancedtools/serverside/logging.py | 64 +++++++++++++++++++++ src/caosadvancedtools/suppressKnown.py | 10 ++++ src/caosadvancedtools/table_importer.py | 21 +++++-- src/caosadvancedtools/utils.py | 2 +- unittests/test_table_importer.py | 9 +-- 13 files changed, 154 insertions(+), 22 deletions(-) create mode 100644 src/caosadvancedtools/serverside/logging.py diff --git a/.gitignore b/.gitignore index a8771915..b1d651a7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ *.egg-info .docker/cert src/caosadvancedtools/version.py +version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e4c3fda8..22aa4590 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New class to collect possible problems with the data model - New class for checking and importing tables +- Function to get a file path to a shared resource directory +- Function to setup logging appropriate for server side scripts with webui + output ### Changed ### diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index aaab4bb3..e439510f 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -25,6 +25,7 @@ import argparse import logging +import sys from argparse import RawTextHelpFormatter import caosdb as db @@ -65,6 +66,7 @@ def access(path): if __name__ == "__main__": logger = logging.getLogger("caosadvancedtools") + logger.addHandler(logging.StreamHandler(sys.stdout)) conlogger = logging.getLogger("connection") conlogger.setLevel(level=logging.ERROR) logger.setLevel(level=logging.DEBUG) @@ -83,7 +85,7 @@ if __name__ == "__main__": logger.info("Query done...") config = db.configuration.get_config() c = FileCrawler(files=files, use_cache=True, - interactive=False, hideKnown=True, + interactive=False, hideKnown=False, cfood_types=[ProjectCFood, ExperimentCFood, AnalysisCFood, PublicationCFood, SimulationCFood, diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 9712e0e7..e6baabf8 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -16,10 +16,10 @@ pushd extroot egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' popd echo "run crawler" -./crawl.py / > $OUT +./crawl.py / &> $OUT # check whether there was something UNAUTHORIZED set -e -grep "UNAUTHORIZED UPDATE" $OUT +grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') echo $RUN_ID @@ -27,7 +27,7 @@ echo "run crawler again" echo "./crawl.py -a $RUN_ID /" ./crawl.py -a $RUN_ID / > $OUT set +e -if grep "UNAUTHORIZED UPDATE" $OUT +if grep "There where unauthorized changes" $OUT then exit 1 fi diff --git a/src/caosadvancedtools/__init__.py b/src/caosadvancedtools/__init__.py index f96b7578..e69de29b 100644 --- a/src/caosadvancedtools/__init__.py +++ b/src/caosadvancedtools/__init__.py @@ -1,6 +0,0 @@ -import logging -import sys - -logger = logging.getLogger("caosadvancedtools") -logger.setLevel(level=logging.INFO) -logger.addHandler(logging.StreamHandler(sys.stdout)) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 057273e7..3ec1b6ed 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -53,7 +53,7 @@ RECORDS = {} RECORDTYPES = {} FILES = {} -logger = logging.getLogger("caosadvanedtools") +logger = logging.getLogger(__name__) def get_entity(name): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 50da7a23..7e69ed33 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -57,7 +57,7 @@ from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard from .suppressKnown import SuppressKnown -logger = logging.getLogger("caosadvancedtools") +logger = logging.getLogger(__name__) def separated(text): @@ -97,7 +97,15 @@ class Crawler(object): self.abort_on_exception = abort_on_exception self.update_cache = UpdateCache() self.filterKnown = SuppressKnown() - logger.addFilter(self.filterKnown) + advancedtoolslogger = logging.getLogger("caosadvancedtools") + + # TODO this seems to be a bad idea. What if the handler was not added + # yet? What if there is another stream handler, which shall not be + # filtered? + + for hdl in advancedtoolslogger.handlers: + if hdl.__class__.__name__ == "StreamHandler": + hdl.addFilter(self.filterKnown) if hideKnown is False: for cat in ["matches", "inconsistency"]: diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py index a09e6cfe..67c8256a 100644 --- a/src/caosadvancedtools/serverside/helper.py +++ b/src/caosadvancedtools/serverside/helper.py @@ -23,6 +23,8 @@ from __future__ import absolute_import import argparse import datetime import json +import logging +import os import sys import caosdb as db @@ -44,6 +46,7 @@ def wrap_bootstrap_alert(text, kind): alert : str A HTML str of a Bootstrap DIV.alert """ + return ('<div class="alert alert-{kind} alert-dismissible" ' 'role="alert">{text}</div>').format(kind=kind, text=text) @@ -232,6 +235,7 @@ def get_data(filename, default=None): def get_timestamp(): """Return a ISO 8601 compliante timestamp (second precision)""" + return datetime.datetime.utcnow().isoformat(timespec='seconds') @@ -276,3 +280,35 @@ def parse_arguments(args): p = get_argument_parser() return p.parse_args(args) + + +def get_shared_filename(filename): + """ + prefix a filename with a path to a shared resource directory + + + Parameters + ---------- + filename : str + Filename to be prefixed; e.g. `log.txt`. + + Returns + ------- + tuple + (filename, filepath), where `filename` is the name that can be shared + with users, such that they can retrieve the file from the shared + directory. `filepath` is the path that can be used in a script to + actually store the file; e.g. with open(filepath, 'w') as fi... + """ + + if "SHARED_DIR" not in os.environ: + raise RuntimeError( + "The environment variable 'SHARED_DIR' should be " + "set. Cannot identifiy the directory for the shared resource") + + directory = os.environ["SHARED_DIR"] + randname = os.path.basename(os.path.abspath(directory)) + filepath = os.path.abspath(os.path.join(directory, filename)) + filename = os.path.join(randname, filename) + + return filename, filepath diff --git a/src/caosadvancedtools/serverside/logging.py b/src/caosadvancedtools/serverside/logging.py new file mode 100644 index 00000000..33f5bae1 --- /dev/null +++ b/src/caosadvancedtools/serverside/logging.py @@ -0,0 +1,64 @@ +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +from __future__ import absolute_import + +import logging +import os +import sys +import tempfile +from datetime import datetime + +from ..webui_formatter import WebUI_Formatter +from .helper import get_shared_filename + + +def configure_server_side_logging(loggername="caosadvancedtools"): + """ + Set logging up to save one plain debugging log file, one plain info log + file (for users) and a stdout stream with messages wrapped in html elements + + returns the path to the file with debugging output + """ + logger = logging.getLogger(loggername) + logger.setLevel(level=logging.DEBUG) + + filename, filepath = get_shared_filename("log.txt") + + # this is a log file with INFO level for the user + user_file_handler = logging.FileHandler(filename=filepath) + user_file_handler.setLevel(logging.INFO) + logger.addHandler(user_file_handler) + + # The output shall be printed in the webui. Thus wrap it in html elements. + formatter = WebUI_Formatter(full_file="/Shared/{}".format(filename)) + web_handler = logging.StreamHandler(stream=sys.stdout) + web_handler.setFormatter(formatter) + web_handler.setLevel(logging.INFO) + logger.addHandler(web_handler) + + # one log file with debug level output + debug_file = os.path.join(tempfile.gettempdir(), + "{}_{}.log".format(__name__, + datetime.now().isoformat())) + debug_handler = logging.FileHandler(filename=debug_file) + debug_handler.setLevel(logging.DEBUG) + logger.addHandler(debug_handler) + + return debug_file diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py index 61b01605..c15f0e06 100644 --- a/src/caosadvancedtools/suppressKnown.py +++ b/src/caosadvancedtools/suppressKnown.py @@ -71,6 +71,16 @@ class SuppressKnown(logging.Filter): return sha256((txt+str(identifier)).encode("utf-8")).hexdigest() def filter(self, record): + """ + Return whether the record shall be logged. + + + If either identifier of category is missing 1 is returned (logging + enabled). If the record has both attributes, it is checked whether the + combination was shown before (was_tagged). If so 0 is returned. + Otherwise the combination is saved and 1 is returned + """ + if not hasattr(record, "identifier"): return 1 diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 468cdba3..eeb91d6d 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -36,10 +36,10 @@ from xlrd import XLRDError from .datainconsistency import DataInconsistencyError from .suppressKnown import SuppressKnown -logger = logging.getLogger("caosadvancedtools") +logger = logging.getLogger(__name__) -def name_converter(name): +def assure_name_format(name): """ checks whether a string can be interpreted as 'LastName, FirstName' """ @@ -74,7 +74,6 @@ class TSVImporter(object): class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_keys=None): """ converters: dict with column names as keys and converter functions as @@ -86,6 +85,9 @@ class XLSImporter(object): necessary. obligatory_columns: list of column names, optional each listed column must not have missing values + unique_columns : list of column names that in + combination must be unique; i.e. each row has a + unique combination of values in those columns. """ self.sup = SuppressKnown() self.required_columns = list(converters.keys()) @@ -97,7 +99,7 @@ class XLSImporter(object): """ converts an xls file into a Pandas DataFrame. - The converters of the XLS_Importer object are used. + The converters of the XLSImporter object are used. Raises: DataInconsistencyError """ @@ -126,6 +128,15 @@ class XLSImporter(object): 'category': "inconsistency"}) raise DataInconsistencyError(*e.args) + try: + df = xls_file.parse(converters=self.converters) + except Exception as e: + logger.warning( + "Cannot parse {}.".format(filename), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) @@ -167,8 +178,10 @@ class XLSImporter(object): for unique_columns in self.unique_keys: subtable = df[list(unique_columns)] + for index, row in subtable.iterrows(): element = tuple(row) + if element in uniques: errmsg = ( "The {}. row contains the values '{}'.\nThis value " diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index b897d76a..4e85c9df 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -29,7 +29,7 @@ import caosdb as db def set_log_level(level): - logger = logging.getLogger("caosadvancedtools") + logger = logging.getLogger(__name__) logger.setLevel(level=logging.DEBUG) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index b848a3ce..b662493b 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile import numpy as np import pandas as pd from caosadvancedtools.datainconsistency import DataInconsistencyError -from caosadvancedtools.table_importer import (XLSImporter, name_converter, +from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, yes_no_converter) @@ -40,9 +40,10 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "true") - def test_name_converter(self): - self.assertEqual(name_converter("Müstermann, Max"), "Müstermann, Max") - self.assertRaises(ValueError, name_converter, "Max Mustermann") + def test_assure_name_format(self): + self.assertEqual(assure_name_format("Müstermann, Max"), + "Müstermann, Max") + self.assertRaises(ValueError, assure_name_format, "Max Mustermann") class XLSImporterTest(unittest.TestCase): -- GitLab