Skip to content
Snippets Groups Projects
Commit f348c56e authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'logging-setup' into 'dev'

Logging setup

See merge request caosdb/caosdb-advanced-user-tools!38
parents c0f5098b 50fdfcef
No related branches found
No related tags found
1 merge request!22Release 0.3
...@@ -6,3 +6,4 @@ __pycache__ ...@@ -6,3 +6,4 @@ __pycache__
*.egg-info *.egg-info
.docker/cert .docker/cert
src/caosadvancedtools/version.py src/caosadvancedtools/version.py
version.py
...@@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New class to collect possible problems with the data model - New class to collect possible problems with the data model
- New class for checking and importing tables - New class for checking and importing tables
- Function to get a file path to a shared resource directory
- Function to setup logging appropriate for server side scripts with webui
output
### Changed ### ### Changed ###
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
import argparse import argparse
import logging import logging
import sys
from argparse import RawTextHelpFormatter from argparse import RawTextHelpFormatter
import caosdb as db import caosdb as db
...@@ -65,6 +66,7 @@ def access(path): ...@@ -65,6 +66,7 @@ def access(path):
if __name__ == "__main__": if __name__ == "__main__":
logger = logging.getLogger("caosadvancedtools") logger = logging.getLogger("caosadvancedtools")
logger.addHandler(logging.StreamHandler(sys.stdout))
conlogger = logging.getLogger("connection") conlogger = logging.getLogger("connection")
conlogger.setLevel(level=logging.ERROR) conlogger.setLevel(level=logging.ERROR)
logger.setLevel(level=logging.DEBUG) logger.setLevel(level=logging.DEBUG)
...@@ -83,7 +85,7 @@ if __name__ == "__main__": ...@@ -83,7 +85,7 @@ if __name__ == "__main__":
logger.info("Query done...") logger.info("Query done...")
config = db.configuration.get_config() config = db.configuration.get_config()
c = FileCrawler(files=files, use_cache=True, c = FileCrawler(files=files, use_cache=True,
interactive=False, hideKnown=True, interactive=False, hideKnown=False,
cfood_types=[ProjectCFood, cfood_types=[ProjectCFood,
ExperimentCFood, AnalysisCFood, ExperimentCFood, AnalysisCFood,
PublicationCFood, SimulationCFood, PublicationCFood, SimulationCFood,
......
...@@ -16,10 +16,10 @@ pushd extroot ...@@ -16,10 +16,10 @@ pushd extroot
egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
popd popd
echo "run crawler" echo "run crawler"
./crawl.py / > $OUT ./crawl.py / &> $OUT
# check whether there was something UNAUTHORIZED # check whether there was something UNAUTHORIZED
set -e set -e
grep "UNAUTHORIZED UPDATE" $OUT grep "There where unauthorized changes" $OUT
# get the id of the run which is the last field of the output string # get the id of the run which is the last field of the output string
RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }')
echo $RUN_ID echo $RUN_ID
...@@ -27,7 +27,7 @@ echo "run crawler again" ...@@ -27,7 +27,7 @@ echo "run crawler again"
echo "./crawl.py -a $RUN_ID /" echo "./crawl.py -a $RUN_ID /"
./crawl.py -a $RUN_ID / > $OUT ./crawl.py -a $RUN_ID / > $OUT
set +e set +e
if grep "UNAUTHORIZED UPDATE" $OUT if grep "There where unauthorized changes" $OUT
then then
exit 1 exit 1
fi fi
......
import logging
import sys
logger = logging.getLogger("caosadvancedtools")
logger.setLevel(level=logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))
...@@ -53,7 +53,7 @@ RECORDS = {} ...@@ -53,7 +53,7 @@ RECORDS = {}
RECORDTYPES = {} RECORDTYPES = {}
FILES = {} FILES = {}
logger = logging.getLogger("caosadvanedtools") logger = logging.getLogger(__name__)
def get_entity(name): def get_entity(name):
......
...@@ -57,7 +57,7 @@ from .guard import RETRIEVE, ProhibitedException ...@@ -57,7 +57,7 @@ from .guard import RETRIEVE, ProhibitedException
from .guard import global_guard as guard from .guard import global_guard as guard
from .suppressKnown import SuppressKnown from .suppressKnown import SuppressKnown
logger = logging.getLogger("caosadvancedtools") logger = logging.getLogger(__name__)
def separated(text): def separated(text):
...@@ -97,7 +97,15 @@ class Crawler(object): ...@@ -97,7 +97,15 @@ class Crawler(object):
self.abort_on_exception = abort_on_exception self.abort_on_exception = abort_on_exception
self.update_cache = UpdateCache() self.update_cache = UpdateCache()
self.filterKnown = SuppressKnown() self.filterKnown = SuppressKnown()
logger.addFilter(self.filterKnown) advancedtoolslogger = logging.getLogger("caosadvancedtools")
# TODO this seems to be a bad idea. What if the handler was not added
# yet? What if there is another stream handler, which shall not be
# filtered?
for hdl in advancedtoolslogger.handlers:
if hdl.__class__.__name__ == "StreamHandler":
hdl.addFilter(self.filterKnown)
if hideKnown is False: if hideKnown is False:
for cat in ["matches", "inconsistency"]: for cat in ["matches", "inconsistency"]:
......
...@@ -23,6 +23,8 @@ from __future__ import absolute_import ...@@ -23,6 +23,8 @@ from __future__ import absolute_import
import argparse import argparse
import datetime import datetime
import json import json
import logging
import os
import sys import sys
import caosdb as db import caosdb as db
...@@ -44,6 +46,7 @@ def wrap_bootstrap_alert(text, kind): ...@@ -44,6 +46,7 @@ def wrap_bootstrap_alert(text, kind):
alert : str alert : str
A HTML str of a Bootstrap DIV.alert A HTML str of a Bootstrap DIV.alert
""" """
return ('<div class="alert alert-{kind} alert-dismissible" ' return ('<div class="alert alert-{kind} alert-dismissible" '
'role="alert">{text}</div>').format(kind=kind, text=text) 'role="alert">{text}</div>').format(kind=kind, text=text)
...@@ -232,6 +235,7 @@ def get_data(filename, default=None): ...@@ -232,6 +235,7 @@ def get_data(filename, default=None):
def get_timestamp(): def get_timestamp():
"""Return a ISO 8601 compliante timestamp (second precision)""" """Return a ISO 8601 compliante timestamp (second precision)"""
return datetime.datetime.utcnow().isoformat(timespec='seconds') return datetime.datetime.utcnow().isoformat(timespec='seconds')
...@@ -276,3 +280,35 @@ def parse_arguments(args): ...@@ -276,3 +280,35 @@ def parse_arguments(args):
p = get_argument_parser() p = get_argument_parser()
return p.parse_args(args) return p.parse_args(args)
def get_shared_filename(filename):
"""
prefix a filename with a path to a shared resource directory
Parameters
----------
filename : str
Filename to be prefixed; e.g. `log.txt`.
Returns
-------
tuple
(filename, filepath), where `filename` is the name that can be shared
with users, such that they can retrieve the file from the shared
directory. `filepath` is the path that can be used in a script to
actually store the file; e.g. with open(filepath, 'w') as fi...
"""
if "SHARED_DIR" not in os.environ:
raise RuntimeError(
"The environment variable 'SHARED_DIR' should be "
"set. Cannot identifiy the directory for the shared resource")
directory = os.environ["SHARED_DIR"]
randname = os.path.basename(os.path.abspath(directory))
filepath = os.path.abspath(os.path.join(directory, filename))
filename = os.path.join(randname, filename)
return filename, filepath
# encoding: utf-8
#
# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
#
from __future__ import absolute_import
import logging
import os
import sys
import tempfile
from datetime import datetime
from ..webui_formatter import WebUI_Formatter
from .helper import get_shared_filename
def configure_server_side_logging(loggername="caosadvancedtools"):
"""
Set logging up to save one plain debugging log file, one plain info log
file (for users) and a stdout stream with messages wrapped in html elements
returns the path to the file with debugging output
"""
logger = logging.getLogger(loggername)
logger.setLevel(level=logging.DEBUG)
filename, filepath = get_shared_filename("log.txt")
# this is a log file with INFO level for the user
user_file_handler = logging.FileHandler(filename=filepath)
user_file_handler.setLevel(logging.INFO)
logger.addHandler(user_file_handler)
# The output shall be printed in the webui. Thus wrap it in html elements.
formatter = WebUI_Formatter(full_file="/Shared/{}".format(filename))
web_handler = logging.StreamHandler(stream=sys.stdout)
web_handler.setFormatter(formatter)
web_handler.setLevel(logging.INFO)
logger.addHandler(web_handler)
# one log file with debug level output
debug_file = os.path.join(tempfile.gettempdir(),
"{}_{}.log".format(__name__,
datetime.now().isoformat()))
debug_handler = logging.FileHandler(filename=debug_file)
debug_handler.setLevel(logging.DEBUG)
logger.addHandler(debug_handler)
return debug_file
...@@ -71,6 +71,16 @@ class SuppressKnown(logging.Filter): ...@@ -71,6 +71,16 @@ class SuppressKnown(logging.Filter):
return sha256((txt+str(identifier)).encode("utf-8")).hexdigest() return sha256((txt+str(identifier)).encode("utf-8")).hexdigest()
def filter(self, record): def filter(self, record):
"""
Return whether the record shall be logged.
If either identifier of category is missing 1 is returned (logging
enabled). If the record has both attributes, it is checked whether the
combination was shown before (was_tagged). If so 0 is returned.
Otherwise the combination is saved and 1 is returned
"""
if not hasattr(record, "identifier"): if not hasattr(record, "identifier"):
return 1 return 1
......
...@@ -36,10 +36,10 @@ from xlrd import XLRDError ...@@ -36,10 +36,10 @@ from xlrd import XLRDError
from .datainconsistency import DataInconsistencyError from .datainconsistency import DataInconsistencyError
from .suppressKnown import SuppressKnown from .suppressKnown import SuppressKnown
logger = logging.getLogger("caosadvancedtools") logger = logging.getLogger(__name__)
def name_converter(name): def assure_name_format(name):
""" """
checks whether a string can be interpreted as 'LastName, FirstName' checks whether a string can be interpreted as 'LastName, FirstName'
""" """
...@@ -74,7 +74,6 @@ class TSVImporter(object): ...@@ -74,7 +74,6 @@ class TSVImporter(object):
class XLSImporter(object): class XLSImporter(object):
def __init__(self, converters, obligatory_columns=None, unique_keys=None): def __init__(self, converters, obligatory_columns=None, unique_keys=None):
""" """
converters: dict with column names as keys and converter functions as converters: dict with column names as keys and converter functions as
...@@ -86,6 +85,9 @@ class XLSImporter(object): ...@@ -86,6 +85,9 @@ class XLSImporter(object):
necessary. necessary.
obligatory_columns: list of column names, optional obligatory_columns: list of column names, optional
each listed column must not have missing values each listed column must not have missing values
unique_columns : list of column names that in
combination must be unique; i.e. each row has a
unique combination of values in those columns.
""" """
self.sup = SuppressKnown() self.sup = SuppressKnown()
self.required_columns = list(converters.keys()) self.required_columns = list(converters.keys())
...@@ -97,7 +99,7 @@ class XLSImporter(object): ...@@ -97,7 +99,7 @@ class XLSImporter(object):
""" """
converts an xls file into a Pandas DataFrame. converts an xls file into a Pandas DataFrame.
The converters of the XLS_Importer object are used. The converters of the XLSImporter object are used.
Raises: DataInconsistencyError Raises: DataInconsistencyError
""" """
...@@ -126,6 +128,15 @@ class XLSImporter(object): ...@@ -126,6 +128,15 @@ class XLSImporter(object):
'category': "inconsistency"}) 'category': "inconsistency"})
raise DataInconsistencyError(*e.args) raise DataInconsistencyError(*e.args)
try:
df = xls_file.parse(converters=self.converters)
except Exception as e:
logger.warning(
"Cannot parse {}.".format(filename),
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
self.check_columns(df, filename=filename) self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename) df = self.check_missing(df, filename=filename)
...@@ -167,8 +178,10 @@ class XLSImporter(object): ...@@ -167,8 +178,10 @@ class XLSImporter(object):
for unique_columns in self.unique_keys: for unique_columns in self.unique_keys:
subtable = df[list(unique_columns)] subtable = df[list(unique_columns)]
for index, row in subtable.iterrows(): for index, row in subtable.iterrows():
element = tuple(row) element = tuple(row)
if element in uniques: if element in uniques:
errmsg = ( errmsg = (
"The {}. row contains the values '{}'.\nThis value " "The {}. row contains the values '{}'.\nThis value "
......
...@@ -29,7 +29,7 @@ import caosdb as db ...@@ -29,7 +29,7 @@ import caosdb as db
def set_log_level(level): def set_log_level(level):
logger = logging.getLogger("caosadvancedtools") logger = logging.getLogger(__name__)
logger.setLevel(level=logging.DEBUG) logger.setLevel(level=logging.DEBUG)
......
...@@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile ...@@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from caosadvancedtools.datainconsistency import DataInconsistencyError from caosadvancedtools.datainconsistency import DataInconsistencyError
from caosadvancedtools.table_importer import (XLSImporter, name_converter, from caosadvancedtools.table_importer import (XLSImporter, assure_name_format,
yes_no_converter) yes_no_converter)
...@@ -40,9 +40,10 @@ class ConverterTest(unittest.TestCase): ...@@ -40,9 +40,10 @@ class ConverterTest(unittest.TestCase):
self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "True")
self.assertRaises(ValueError, yes_no_converter, "true") self.assertRaises(ValueError, yes_no_converter, "true")
def test_name_converter(self): def test_assure_name_format(self):
self.assertEqual(name_converter("Müstermann, Max"), "Müstermann, Max") self.assertEqual(assure_name_format("Müstermann, Max"),
self.assertRaises(ValueError, name_converter, "Max Mustermann") "Müstermann, Max")
self.assertRaises(ValueError, assure_name_format, "Max Mustermann")
class XLSImporterTest(unittest.TestCase): class XLSImporterTest(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment