diff --git a/.gitignore b/.gitignore index a87719155a81c2d1398130f803674726ec831865..b1d651a7852d96ddb67170e80c1522f24573112b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ *.egg-info .docker/cert src/caosadvancedtools/version.py +version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e4c3fda811f838a6f83090e271f5ac5bddda9fa7..22aa4590e690ad2be9a8bc564c7c29f5069e9872 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New class to collect possible problems with the data model - New class for checking and importing tables +- Function to get a file path to a shared resource directory +- Function to setup logging appropriate for server side scripts with webui + output ### Changed ### diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index aaab4bb313dabc900170d48f40b30eb7bd361a8e..e439510fd7308888e9071476905f486c82769dc5 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -25,6 +25,7 @@ import argparse import logging +import sys from argparse import RawTextHelpFormatter import caosdb as db @@ -65,6 +66,7 @@ def access(path): if __name__ == "__main__": logger = logging.getLogger("caosadvancedtools") + logger.addHandler(logging.StreamHandler(sys.stdout)) conlogger = logging.getLogger("connection") conlogger.setLevel(level=logging.ERROR) logger.setLevel(level=logging.DEBUG) @@ -83,7 +85,7 @@ if __name__ == "__main__": logger.info("Query done...") config = db.configuration.get_config() c = FileCrawler(files=files, use_cache=True, - interactive=False, hideKnown=True, + interactive=False, hideKnown=False, cfood_types=[ProjectCFood, ExperimentCFood, AnalysisCFood, PublicationCFood, SimulationCFood, diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 9712e0e7db654c1d7a508e740b936800363e4b8e..e6baabf8feefa216648840ffb96517c2a7b1f8e3 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -16,10 +16,10 @@ pushd extroot egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' popd echo "run crawler" -./crawl.py / > $OUT +./crawl.py / &> $OUT # check whether there was something UNAUTHORIZED set -e -grep "UNAUTHORIZED UPDATE" $OUT +grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') echo $RUN_ID @@ -27,7 +27,7 @@ echo "run crawler again" echo "./crawl.py -a $RUN_ID /" ./crawl.py -a $RUN_ID / > $OUT set +e -if grep "UNAUTHORIZED UPDATE" $OUT +if grep "There where unauthorized changes" $OUT then exit 1 fi diff --git a/src/caosadvancedtools/__init__.py b/src/caosadvancedtools/__init__.py index f96b7578f308ba2d3ee381f7a32216f1a8e02e35..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/caosadvancedtools/__init__.py +++ b/src/caosadvancedtools/__init__.py @@ -1,6 +0,0 @@ -import logging -import sys - -logger = logging.getLogger("caosadvancedtools") -logger.setLevel(level=logging.INFO) -logger.addHandler(logging.StreamHandler(sys.stdout)) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 057273e75d092e7f3437b3f55765382f4ab2bdec..3ec1b6ed7ef5263db8349419225ca3ebf20f52fa 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -53,7 +53,7 @@ RECORDS = {} RECORDTYPES = {} FILES = {} -logger = logging.getLogger("caosadvanedtools") +logger = logging.getLogger(__name__) def get_entity(name): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 50da7a2356fa41978a43a8357323f47a873e80ae..7e69ed33541d9e04b31b7298631edc671a5f896e 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -57,7 +57,7 @@ from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard from .suppressKnown import SuppressKnown -logger = logging.getLogger("caosadvancedtools") +logger = logging.getLogger(__name__) def separated(text): @@ -97,7 +97,15 @@ class Crawler(object): self.abort_on_exception = abort_on_exception self.update_cache = UpdateCache() self.filterKnown = SuppressKnown() - logger.addFilter(self.filterKnown) + advancedtoolslogger = logging.getLogger("caosadvancedtools") + + # TODO this seems to be a bad idea. What if the handler was not added + # yet? What if there is another stream handler, which shall not be + # filtered? + + for hdl in advancedtoolslogger.handlers: + if hdl.__class__.__name__ == "StreamHandler": + hdl.addFilter(self.filterKnown) if hideKnown is False: for cat in ["matches", "inconsistency"]: diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py index a09e6cfe867211e27fde8675d83d6b21bd14d456..67c8256a94391ce6dbc77a63812833d61f475e66 100644 --- a/src/caosadvancedtools/serverside/helper.py +++ b/src/caosadvancedtools/serverside/helper.py @@ -23,6 +23,8 @@ from __future__ import absolute_import import argparse import datetime import json +import logging +import os import sys import caosdb as db @@ -44,6 +46,7 @@ def wrap_bootstrap_alert(text, kind): alert : str A HTML str of a Bootstrap DIV.alert """ + return ('<div class="alert alert-{kind} alert-dismissible" ' 'role="alert">{text}</div>').format(kind=kind, text=text) @@ -232,6 +235,7 @@ def get_data(filename, default=None): def get_timestamp(): """Return a ISO 8601 compliante timestamp (second precision)""" + return datetime.datetime.utcnow().isoformat(timespec='seconds') @@ -276,3 +280,35 @@ def parse_arguments(args): p = get_argument_parser() return p.parse_args(args) + + +def get_shared_filename(filename): + """ + prefix a filename with a path to a shared resource directory + + + Parameters + ---------- + filename : str + Filename to be prefixed; e.g. `log.txt`. + + Returns + ------- + tuple + (filename, filepath), where `filename` is the name that can be shared + with users, such that they can retrieve the file from the shared + directory. `filepath` is the path that can be used in a script to + actually store the file; e.g. with open(filepath, 'w') as fi... + """ + + if "SHARED_DIR" not in os.environ: + raise RuntimeError( + "The environment variable 'SHARED_DIR' should be " + "set. Cannot identifiy the directory for the shared resource") + + directory = os.environ["SHARED_DIR"] + randname = os.path.basename(os.path.abspath(directory)) + filepath = os.path.abspath(os.path.join(directory, filename)) + filename = os.path.join(randname, filename) + + return filename, filepath diff --git a/src/caosadvancedtools/serverside/logging.py b/src/caosadvancedtools/serverside/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..33f5bae1a91b4bd49db32248125561fdb2bd69e5 --- /dev/null +++ b/src/caosadvancedtools/serverside/logging.py @@ -0,0 +1,64 @@ +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +from __future__ import absolute_import + +import logging +import os +import sys +import tempfile +from datetime import datetime + +from ..webui_formatter import WebUI_Formatter +from .helper import get_shared_filename + + +def configure_server_side_logging(loggername="caosadvancedtools"): + """ + Set logging up to save one plain debugging log file, one plain info log + file (for users) and a stdout stream with messages wrapped in html elements + + returns the path to the file with debugging output + """ + logger = logging.getLogger(loggername) + logger.setLevel(level=logging.DEBUG) + + filename, filepath = get_shared_filename("log.txt") + + # this is a log file with INFO level for the user + user_file_handler = logging.FileHandler(filename=filepath) + user_file_handler.setLevel(logging.INFO) + logger.addHandler(user_file_handler) + + # The output shall be printed in the webui. Thus wrap it in html elements. + formatter = WebUI_Formatter(full_file="/Shared/{}".format(filename)) + web_handler = logging.StreamHandler(stream=sys.stdout) + web_handler.setFormatter(formatter) + web_handler.setLevel(logging.INFO) + logger.addHandler(web_handler) + + # one log file with debug level output + debug_file = os.path.join(tempfile.gettempdir(), + "{}_{}.log".format(__name__, + datetime.now().isoformat())) + debug_handler = logging.FileHandler(filename=debug_file) + debug_handler.setLevel(logging.DEBUG) + logger.addHandler(debug_handler) + + return debug_file diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py index 61b016052a89db3d157f5f6e8d0562f88ee22131..c15f0e06fa7d126937497aeb877dd5d2991b6ff7 100644 --- a/src/caosadvancedtools/suppressKnown.py +++ b/src/caosadvancedtools/suppressKnown.py @@ -71,6 +71,16 @@ class SuppressKnown(logging.Filter): return sha256((txt+str(identifier)).encode("utf-8")).hexdigest() def filter(self, record): + """ + Return whether the record shall be logged. + + + If either identifier of category is missing 1 is returned (logging + enabled). If the record has both attributes, it is checked whether the + combination was shown before (was_tagged). If so 0 is returned. + Otherwise the combination is saved and 1 is returned + """ + if not hasattr(record, "identifier"): return 1 diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 468cdba3bbe23ec7507aa394ddf0cfe7e0be0e1a..eeb91d6dcff364abd34b6407c0daa736c5813979 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -36,10 +36,10 @@ from xlrd import XLRDError from .datainconsistency import DataInconsistencyError from .suppressKnown import SuppressKnown -logger = logging.getLogger("caosadvancedtools") +logger = logging.getLogger(__name__) -def name_converter(name): +def assure_name_format(name): """ checks whether a string can be interpreted as 'LastName, FirstName' """ @@ -74,7 +74,6 @@ class TSVImporter(object): class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_keys=None): """ converters: dict with column names as keys and converter functions as @@ -86,6 +85,9 @@ class XLSImporter(object): necessary. obligatory_columns: list of column names, optional each listed column must not have missing values + unique_columns : list of column names that in + combination must be unique; i.e. each row has a + unique combination of values in those columns. """ self.sup = SuppressKnown() self.required_columns = list(converters.keys()) @@ -97,7 +99,7 @@ class XLSImporter(object): """ converts an xls file into a Pandas DataFrame. - The converters of the XLS_Importer object are used. + The converters of the XLSImporter object are used. Raises: DataInconsistencyError """ @@ -126,6 +128,15 @@ class XLSImporter(object): 'category': "inconsistency"}) raise DataInconsistencyError(*e.args) + try: + df = xls_file.parse(converters=self.converters) + except Exception as e: + logger.warning( + "Cannot parse {}.".format(filename), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) @@ -167,8 +178,10 @@ class XLSImporter(object): for unique_columns in self.unique_keys: subtable = df[list(unique_columns)] + for index, row in subtable.iterrows(): element = tuple(row) + if element in uniques: errmsg = ( "The {}. row contains the values '{}'.\nThis value " diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index b897d76ad53de8fa38ddc457e9c532fd3c274e58..4e85c9dffedbf1076f8406777ed51d6c7313dda5 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -29,7 +29,7 @@ import caosdb as db def set_log_level(level): - logger = logging.getLogger("caosadvancedtools") + logger = logging.getLogger(__name__) logger.setLevel(level=logging.DEBUG) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index b848a3ce796a126fb55e03efd2afba3c6c53f603..b662493b32a38ffa0c416b44ac914bbf540ae34c 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile import numpy as np import pandas as pd from caosadvancedtools.datainconsistency import DataInconsistencyError -from caosadvancedtools.table_importer import (XLSImporter, name_converter, +from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, yes_no_converter) @@ -40,9 +40,10 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "true") - def test_name_converter(self): - self.assertEqual(name_converter("Müstermann, Max"), "Müstermann, Max") - self.assertRaises(ValueError, name_converter, "Max Mustermann") + def test_assure_name_format(self): + self.assertEqual(assure_name_format("Müstermann, Max"), + "Müstermann, Max") + self.assertRaises(ValueError, assure_name_format, "Max Mustermann") class XLSImporterTest(unittest.TestCase):