From a548a7fdb3baccf0950b1cbc1483b606d48e0856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Fri, 10 Jul 2020 15:03:17 +0000 Subject: [PATCH] Allow suppressing logging --- src/caosadvancedtools/crawler.py | 50 ++--------- src/caosadvancedtools/suppressable.py | 88 +++++++++++++++++++ unittests/test_suppressable.py | 120 ++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 41 deletions(-) create mode 100644 src/caosadvancedtools/suppressable.py create mode 100644 unittests/test_suppressable.py diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index df675042..4e88de13 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -55,6 +55,7 @@ from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard +from .suppressable import Suppressable logger = logging.getLogger(__name__) @@ -63,30 +64,6 @@ def separated(text): return "-"*60 + "\n" + text -class UnknownCache(object): - def __init__(self, interactive=False, load=False): - - if interactive and "y" == input( - "\nDo you want to load filenames that previously were not " - "matched by any CFood?\nIn that case, they will not show up " - "again. (y)"): - load = True - - if load and os.path.exists("known_cache.db"): - with open("known_cache.db") as fi: - self.filenames = [el.strip("\n") for el in fi.readlines()] - else: - self.filenames = [] - - def save(self): - with open("known_cache.db", "w") as fi: - for name in self.filenames: - fi.write(name + "\n") - - def add(self, el): - self.filenames.append(el) - - class Crawler(object): def __init__(self, cfood_types, use_cache=False, abort_on_exception=True, interactive=True, hideKnown=False, @@ -120,6 +97,12 @@ class Crawler(object): self.abort_on_exception = abort_on_exception self.update_cache = UpdateCache() + self.sup = Suppressable(logger=logger) + + if hideKnown is False: + for cat in ["matches", "inconsistency"]: + self.sup.reset(cat) + if self.use_cache: self.cache = Cache(db_file=cache_file) @@ -236,20 +219,12 @@ class Crawler(object): return cfoods, tbs, errors_occured def check_matches(self, matches): - # possibly load previously encountered "Missing matches" and - # "Multiple matches" - ucache = UnknownCache(interactive=self.interactive, load=self.hideKnown) - for idx, item in self.iteritems(): if len(matches[idx]) == 0: msg = ("The crawler has no matching rules for and is thus " "ignoring:\n{}".format(item)) - if item in ucache.filenames: - logger.debug(msg) - else: - logger.warning(msg) - ucache.add(item) + self.sup.warning(msg, identifier=item, category="matches") if len(matches[idx]) > 1: msg = ("Attention: More than one matching cfood!\n" @@ -257,14 +232,7 @@ class Crawler(object): + "\tRecordTypes:\t" + ", ".join( matches[idx])+"\n") - if item in ucache.filenames: - logger.debug(msg) - else: - logger.warning(msg) - ucache.add(item) - - # Save the encountered prblem matches - ucache.save() + self.sup.warning(msg, identifier=item, category="matches") def cached_find_identifiables(self, identifiables): if self.use_cache: diff --git a/src/caosadvancedtools/suppressable.py b/src/caosadvancedtools/suppressable.py new file mode 100644 index 00000000..f0ef9f63 --- /dev/null +++ b/src/caosadvancedtools/suppressable.py @@ -0,0 +1,88 @@ + + +import logging +import os +import sqlite3 +from hashlib import sha256 + + +class Suppressable(object): + def __init__(self, db_file=None, logger=None): + if db_file: + self.db_file = db_file + else: + self.db_file = "/tmp/caosadvanced_suppressed_cache.db" + + if logger: + self.logger = logger + else: + self.logger = logging.getLogger() + + if not os.path.exists(self.db_file): + self.create_cache() + + def create_cache(self): + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''CREATE TABLE messages (digest text primary key, category text)''') + conn.commit() + conn.close() + + def tag_msg(self, txt, identifier, category): + digest = self.hash(txt, identifier) + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''INSERT INTO messages VALUES (?,?)''', (digest, category)) + conn.commit() + conn.close() + + def reset(self, category): + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''DELETE FROM messages WHERE category=?''', + (category,)) + conn.commit() + + def was_tagged(self, digest): + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''Select * FROM messages WHERE digest=?''', + (digest,)) + res = c.fetchone() + conn.commit() + conn.close() + + if res is None: + return False + else: + return True + + def warning(self, txt, identifier, category): + self.msg(txt, identifier, "warning", category) + + def debug(self, txt, identifier, category): + self.msg(txt, identifier, "debug", category) + + def info(self, txt, identifier, category): + self.msg(txt, identifier, "info", category) + + def error(self, txt, identifier, category): + self.msg(txt, identifier, "error", category) + + def hash(self, txt, identifier): + return sha256((txt+str(identifier)).encode("utf-8")).hexdigest() + + def msg(self, txt, identifier, kind, category): + if self.was_tagged(self.hash(txt, identifier)): + return + + if kind == "debug": + self.logger.debug(txt) + elif kind == "info": + self.logger.info(txt) + elif kind == "warning": + self.logger.warning(txt) + elif kind == "error": + self.logger.error(txt) + + self.tag_msg(txt, identifier, category) diff --git a/unittests/test_suppressable.py b/unittests/test_suppressable.py new file mode 100644 index 00000000..8c27b11c --- /dev/null +++ b/unittests/test_suppressable.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 IndiScale GmbH +# Copyright (C) 2020 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +import logging +import os +import unittest +from tempfile import NamedTemporaryFile + +from caosadvancedtools.suppressable import Suppressable + + +class SupTestBasic(unittest.TestCase): + def setUp(self): + self.db_file = "/tmp/test_suppress_msg_db_file.db" + self.basic = Suppressable(db_file=self.db_file) + + def test_msg(self): + # testing basic setup + self.basic.info("hi", 5, "test") + digest = self.basic.hash("hi", 5) + assert self.basic.was_tagged(digest) + self.basic.info("hi", 5, "test") + + def tearDown(self): + os.remove(self.db_file) + + +class SupTestAdvanced(SupTestBasic): + def setUp(self): + self.db_file = "/tmp/test_suppress_msg_db_file.db" + self.basic = Suppressable(db_file=self.db_file) + + def test_logger(self): + """ + The logging output is directed to a file which is then checked whether + the output is as expected. + """ + logfile = NamedTemporaryFile() + logger = logging.getLogger() + logger.addHandler(logging.FileHandler(logfile.name)) + logger.setLevel(logging.DEBUG) + + suppressable = Suppressable(db_file=self.db_file, logger=logger) + + suppressable.info("hi", 5, "test") + with open(logfile.name) as lf: + log = lf.read() + # assert that the log was written + assert "hi" in log + # there should be one line so far + assert log.count("\n") == 1 + + # the following is unchanged and should be suppressed + suppressable.info("hi", 5, "test") + with open(logfile.name) as lf: + log = lf.read() + # one line with one hi + assert log.count("hi") == 1 + assert log.count("\n") == 1 + + # the following is a new message and should thus not be suppressed + suppressable.info("ha", 5, "new") + with open(logfile.name) as lf: + log = lf.read() + assert log.count("ha") == 1 + assert log.count("\n") == 2 + + # the following has a identifier and should thus not be suppressed + suppressable.info("hi", 6, "test") + with open(logfile.name) as lf: + log = lf.read() + assert log.count("hi") == 2 + assert log.count("\n") == 3 + + # the following should be suppressed again + suppressable.info("ha", 5, "new") + with open(logfile.name) as lf: + log = lf.read() + assert log.count("ha") == 1 + assert log.count("hi") == 2 + assert log.count("\n") == 3 + + # resetting test category; hi should be removed + suppressable.reset("test") + + # hi should not be suppressed + suppressable.info("hi", 5, "test") + with open(logfile.name) as lf: + log = lf.read() + assert log.count("hi") == 3 + assert log.count("\n") == 4 + + # the following should be suppressed still + suppressable.info("ha", 5, "new") + with open(logfile.name) as lf: + log = lf.read() + assert log.count("ha") == 1 + assert log.count("hi") == 3 + assert log.count("\n") == 4 -- GitLab