diff --git a/Makefile b/Makefile index e49e7b86e3b94f3b5d0ea98618df30568b9a4ffb..0c586c1e43f75d0b54eea2b625f25d6a9d6318a9 100644 --- a/Makefile +++ b/Makefile @@ -41,5 +41,5 @@ style: .PHONY: style lint: - pylint --unsafe-load-any-extension=y -d all -e E,F --ignore=swagger_client src/caosadvancedtools + pylint --unsafe-load-any-extension=y -d R,C --ignore=swagger_client src/caosadvancedtools .PHONY: lint diff --git a/pylintrc b/pylintrc index 625f83ce950841f7a239538123ef7b5812fc5c5f..f781e2ed9a3a83e8da2152f0e81c4051c1256474 100644 --- a/pylintrc +++ b/pylintrc @@ -17,3 +17,8 @@ init-hook= import sys; sys.path.extend(["src/caosadvancedtools"]); import astroid; astroid.context.InferenceContext.max_inferred = 500; +[MESSAGES CONTROL] +disable= + fixme, + logging-format-interpolation, + logging-not-lazy, diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 9d0502e38e83db9f1cb97c11aa8496dca5a28fc9..6655919e9f5e3f05c1d017954e78d5b91e594446 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -41,14 +41,14 @@ match. This occurs in basically three steps: import logging import os -import subprocess import traceback import uuid from datetime import datetime from sqlite3 import IntegrityError +from xml.sax.saxutils import escape import linkahead as db -from linkahead.exceptions import BadQueryError +from linkahead.exceptions import BadQueryError, TransactionError from .cache import IdentifiableCache, UpdateCache, get_pretty_xml from .cfood import RowCFood, add_files, get_ids_for_entities_with_names @@ -67,7 +67,7 @@ def separated(text): return "-"*60 + "\n" + text -def apply_list_of_updates(to_be_updated, update_flags={}, +def apply_list_of_updates(to_be_updated, update_flags=None, update_cache=None, run_id=None): """Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB after removing possible duplicates. If a chace is provided, uauthorized @@ -87,6 +87,8 @@ def apply_list_of_updates(to_be_updated, update_flags={}, Id with which the pending updates are cached. Only meaningful if `update_cache` is provided. Default is None. """ + if update_flags is None: + update_flags = {} if len(to_be_updated) == 0: return @@ -132,7 +134,7 @@ def apply_list_of_updates(to_be_updated, update_flags={}, ) logger.debug(traceback.format_exc()) logger.debug(e) - except Exception as e: + except TransactionError as e: DataModelProblems.evaluate_exception(e) @@ -220,14 +222,16 @@ class Crawler(object): new_cont = db.Container.from_xml(new) ids = [] tmp = db.Container() - update_incomplete = False + # TODO what was this for? + # update_incomplete = False # remove duplicate entities for el in new_cont: if el.id not in ids: ids.append(el.id) tmp.append(el) else: - update_incomplete = True + pass + # update_incomplete = True new_cont = tmp if new_cont[0].version: # pylint: disable=no-member valids = db.Container() @@ -238,7 +242,7 @@ class Crawler(object): if ent.version == remote_ent.version: # pylint: disable=no-member valids.append(ent) else: - update_incomplete = True + # update_incomplete = True nonvalids.append(remote_ent) valids.update(unique=False) logger.info("Successfully updated {} records!".format( @@ -317,10 +321,10 @@ class Crawler(object): logger.debug(e) # TODO: Generally: in which cases should exceptions be raised? When is # errors_occured set to True? The expected behavior must be documented. - except Exception as e: + except TransactionError as e: try: DataModelProblems.evaluate_exception(e) - except BaseException: + except BaseException: # pylint: disable=broad-exception-caught pass logger.debug("Failed during execution of {}!".format( Cfood.__name__)) @@ -349,13 +353,12 @@ class Crawler(object): logger.info("Cannot access {}. However, it might be needed for" " the correct execution".format(e.filename)) remove_cfoods.append(cfood) - except Exception as e: + except TransactionError as e: try: DataModelProblems.evaluate_exception(e) - except BaseException: + except BaseException: # pylint: disable=broad-exception-caught pass - logger.debug("Failed during execution of {}!".format( - Cfood.__name__)) + logger.debug("Failed during execution of {}!".format(cfood.__name__)) logger.debug(traceback.format_exc()) logger.debug(e) remove_cfoods.append(cfood) @@ -415,8 +418,8 @@ class Crawler(object): self.cache.insert_list(hashes, identifiables) def crawl(self, security_level=RETRIEVE, path=None): - self.run_id = uuid.uuid1() - logger.info("Run Id: " + str(self.run_id)) + run_id = uuid.uuid1() + logger.info("Run Id: " + str(run_id)) guard.set_level(level=security_level) logger.info("Scanning the objects to be treated...") @@ -435,17 +438,17 @@ class Crawler(object): cfood.to_be_updated, cfood.update_flags, update_cache=self.update_cache, - run_id=self.run_id) + run_id=run_id) except FileNotFoundError as e: logger.info("Cannot access {}. However, it might be needed for" " the correct execution".format(e.filename)) except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) - except Exception as e: + except TransactionError as e: try: DataModelProblems.evaluate_exception(e) - except Exception: + except Exception: # pylint: disable=broad-exception-caught pass logger.info("Failed during execution of {}!".format( cfood.__class__.__name__)) @@ -457,7 +460,7 @@ class Crawler(object): errors_occured = True tbs.append(e) - pending_changes = self.update_cache.get_updates(self.run_id) + pending_changes = self.update_cache.get_updates(run_id) if pending_changes: # Sending an Email with a link to a form to authorize updates is @@ -465,7 +468,7 @@ class Crawler(object): if "SHARED_DIR" in os.environ: filename = Crawler.save_form([el[3] - for el in pending_changes], path, self.run_id) + for el in pending_changes], path, run_id) Crawler.send_mail([el[3] for el in pending_changes], filename) for i, el in enumerate(pending_changes): @@ -477,7 +480,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) logger.info("There where unauthorized changes (see above). An " "email was sent to the curator.\n" "You can authorize the updates by invoking the crawler" - " with the run id: {rid}\n".format(rid=self.run_id)) + " with the run id: {rid}\n".format(rid=run_id)) if len(DataModelProblems.missing) > 0: err_msg = ("There were problems with one or more RecordType or " @@ -488,8 +491,9 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) logger.error(err_msg) logger.error('Crawler finished with Datamodel Errors') elif errors_occured: - msg = "There were fatal errors during execution, please " - "contact the system administrator!" + msg = ("There were fatal errors during execution, please " + "contact the system administrator!" + ) if self.debug_file: msg += "\nPlease provide the following path:\n{}".format( @@ -516,7 +520,6 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) path: the path defining the subtree that is crawled """ - from xml.sax.saxutils import escape # TODO move path related stuff to sss_helper form = """ @@ -593,7 +596,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) randname = os.path.basename(os.path.abspath(directory)) filepath = os.path.abspath(os.path.join(directory, filename)) filename = os.path.join(randname, filename) - with open(filepath, "w") as f: + with open(filepath, "w", encoding="utf8") as f: f.write(form) return filename @@ -673,7 +676,7 @@ carefully and if the changes are ok, click on the following link: guard.safe_insert(missing, unique=False, flags={"force-missing-obligatory": "ignore"}) inserted.append(ent) - except Exception as e: + except TransactionError as e: DataModelProblems.evaluate_exception(e) if len(existing) > 0: info = "Identified the following existing entities:\n" @@ -774,14 +777,13 @@ class FileCrawler(Crawler): path if path.endswith("/") else path + "/") + "**'" q_info = "Sending the following query: '" + query_str + "'\n" files = db.execute_query(query_str) - logger.info( - q_info + "Found {} files that need to be processed.".format( - len(files))) + logger.info(q_info + f"Found {len(files)} files that need to be processed.") return files class TableCrawler(Crawler): + """Crawler for table like data.""" def __init__(self, table, unique_cols, recordtype, **kwargs): """ @@ -796,6 +798,7 @@ class TableCrawler(Crawler): # TODO I do not like this yet, but I do not see a better way so far. class ThisRowCF(RowCFood): + """CFood for rows.""" def __init__(self, item): super().__init__(item, unique_cols, recordtype) @@ -821,7 +824,6 @@ def get_value(prop): if isinstance(prop.value, db.Entity): return prop.value.id - elif isinstance(prop.value, datetime): + if isinstance(prop.value, datetime): return prop.value.isoformat() - else: - return prop.value + return prop.value