From c6c305c529cb4981936290499538c825e50cc2b8 Mon Sep 17 00:00:00 2001
From: Daniel Hornung <d.hornung@indiscale.com>
Date: Fri, 22 Mar 2024 14:49:35 +0100
Subject: [PATCH] FIX: Linting.

---
 Makefile                         |  2 +-
 pylintrc                         |  5 +++
 src/caosadvancedtools/crawler.py | 66 ++++++++++++++++----------------
 3 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index e49e7b86..0c586c1e 100644
--- a/Makefile
+++ b/Makefile
@@ -41,5 +41,5 @@ style:
 .PHONY: style
 
 lint:
-	pylint --unsafe-load-any-extension=y -d all -e E,F --ignore=swagger_client src/caosadvancedtools
+	pylint --unsafe-load-any-extension=y -d R,C --ignore=swagger_client src/caosadvancedtools
 .PHONY: lint
diff --git a/pylintrc b/pylintrc
index 625f83ce..f781e2ed 100644
--- a/pylintrc
+++ b/pylintrc
@@ -17,3 +17,8 @@ init-hook=
   import sys; sys.path.extend(["src/caosadvancedtools"]);
   import astroid; astroid.context.InferenceContext.max_inferred = 500;
 
+[MESSAGES CONTROL]
+disable=
+  fixme,
+  logging-format-interpolation,
+  logging-not-lazy,
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 9d0502e3..6655919e 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -41,14 +41,14 @@ match. This occurs in basically three steps:
 
 import logging
 import os
-import subprocess
 import traceback
 import uuid
 from datetime import datetime
 from sqlite3 import IntegrityError
+from xml.sax.saxutils import escape
 
 import linkahead as db
-from linkahead.exceptions import BadQueryError
+from linkahead.exceptions import BadQueryError, TransactionError
 
 from .cache import IdentifiableCache, UpdateCache, get_pretty_xml
 from .cfood import RowCFood, add_files, get_ids_for_entities_with_names
@@ -67,7 +67,7 @@ def separated(text):
     return "-"*60 + "\n" + text
 
 
-def apply_list_of_updates(to_be_updated, update_flags={},
+def apply_list_of_updates(to_be_updated, update_flags=None,
                           update_cache=None, run_id=None):
     """Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB
     after removing possible duplicates. If a chace is provided, uauthorized
@@ -87,6 +87,8 @@ def apply_list_of_updates(to_be_updated, update_flags={},
         Id with which the pending updates are cached. Only meaningful if
         `update_cache` is provided. Default is None.
     """
+    if update_flags is None:
+        update_flags = {}
 
     if len(to_be_updated) == 0:
         return
@@ -132,7 +134,7 @@ def apply_list_of_updates(to_be_updated, update_flags={},
             )
             logger.debug(traceback.format_exc())
             logger.debug(e)
-    except Exception as e:
+    except TransactionError as e:
         DataModelProblems.evaluate_exception(e)
 
 
@@ -220,14 +222,16 @@ class Crawler(object):
             new_cont = db.Container.from_xml(new)
             ids = []
             tmp = db.Container()
-            update_incomplete = False
+            # TODO what was this for?
+            # update_incomplete = False
             # remove duplicate entities
             for el in new_cont:
                 if el.id not in ids:
                     ids.append(el.id)
                     tmp.append(el)
                 else:
-                    update_incomplete = True
+                    pass
+                    # update_incomplete = True
             new_cont = tmp
             if new_cont[0].version:  # pylint: disable=no-member
                 valids = db.Container()
@@ -238,7 +242,7 @@ class Crawler(object):
                     if ent.version == remote_ent.version:  # pylint: disable=no-member
                         valids.append(ent)
                     else:
-                        update_incomplete = True
+                        # update_incomplete = True
                         nonvalids.append(remote_ent)
                 valids.update(unique=False)
                 logger.info("Successfully updated {} records!".format(
@@ -317,10 +321,10 @@ class Crawler(object):
                         logger.debug(e)
                         # TODO: Generally: in which cases should exceptions be raised? When is
                         # errors_occured set to True? The expected behavior must be documented.
-                    except Exception as e:
+                    except TransactionError as e:
                         try:
                             DataModelProblems.evaluate_exception(e)
-                        except BaseException:
+                        except BaseException:  # pylint: disable=broad-exception-caught
                             pass
                         logger.debug("Failed during execution of {}!".format(
                             Cfood.__name__))
@@ -349,13 +353,12 @@ class Crawler(object):
                 logger.info("Cannot access {}. However, it might be needed for"
                             " the correct execution".format(e.filename))
                 remove_cfoods.append(cfood)
-            except Exception as e:
+            except TransactionError as e:
                 try:
                     DataModelProblems.evaluate_exception(e)
-                except BaseException:
+                except BaseException:  # pylint: disable=broad-exception-caught
                     pass
-                logger.debug("Failed during execution of {}!".format(
-                    Cfood.__name__))
+                logger.debug("Failed during execution of {}!".format(cfood.__name__))
                 logger.debug(traceback.format_exc())
                 logger.debug(e)
                 remove_cfoods.append(cfood)
@@ -415,8 +418,8 @@ class Crawler(object):
             self.cache.insert_list(hashes, identifiables)
 
     def crawl(self, security_level=RETRIEVE, path=None):
-        self.run_id = uuid.uuid1()
-        logger.info("Run Id: " + str(self.run_id))
+        run_id = uuid.uuid1()
+        logger.info("Run Id: " + str(run_id))
         guard.set_level(level=security_level)
 
         logger.info("Scanning the objects to be treated...")
@@ -435,17 +438,17 @@ class Crawler(object):
                     cfood.to_be_updated,
                     cfood.update_flags,
                     update_cache=self.update_cache,
-                    run_id=self.run_id)
+                    run_id=run_id)
             except FileNotFoundError as e:
                 logger.info("Cannot access {}. However, it might be needed for"
                             " the correct execution".format(e.filename))
             except DataInconsistencyError as e:
                 logger.debug(traceback.format_exc())
                 logger.debug(e)
-            except Exception as e:
+            except TransactionError as e:
                 try:
                     DataModelProblems.evaluate_exception(e)
-                except Exception:
+                except Exception:  # pylint: disable=broad-exception-caught
                     pass
                 logger.info("Failed during execution of {}!".format(
                     cfood.__class__.__name__))
@@ -457,7 +460,7 @@ class Crawler(object):
                 errors_occured = True
                 tbs.append(e)
 
-        pending_changes = self.update_cache.get_updates(self.run_id)
+        pending_changes = self.update_cache.get_updates(run_id)
 
         if pending_changes:
             # Sending an Email with a link to a form to authorize updates is
@@ -465,7 +468,7 @@ class Crawler(object):
 
             if "SHARED_DIR" in os.environ:
                 filename = Crawler.save_form([el[3]
-                                              for el in pending_changes], path, self.run_id)
+                                              for el in pending_changes], path, run_id)
                 Crawler.send_mail([el[3] for el in pending_changes], filename)
 
             for i, el in enumerate(pending_changes):
@@ -477,7 +480,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
             logger.info("There where unauthorized changes (see above). An "
                         "email was sent to the curator.\n"
                         "You can authorize the updates by invoking the crawler"
-                        " with the run id: {rid}\n".format(rid=self.run_id))
+                        " with the run id: {rid}\n".format(rid=run_id))
 
         if len(DataModelProblems.missing) > 0:
             err_msg = ("There were problems with one or more RecordType or "
@@ -488,8 +491,9 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
             logger.error(err_msg)
             logger.error('Crawler finished with Datamodel Errors')
         elif errors_occured:
-            msg = "There were fatal errors during execution, please "
-            "contact the system administrator!"
+            msg = ("There were fatal errors during execution, please "
+                   "contact the system administrator!"
+                   )
 
             if self.debug_file:
                 msg += "\nPlease provide the following path:\n{}".format(
@@ -516,7 +520,6 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
         path: the path defining the subtree that is crawled
 
         """
-        from xml.sax.saxutils import escape
 
         # TODO move path related stuff to sss_helper
         form = """
@@ -593,7 +596,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
         randname = os.path.basename(os.path.abspath(directory))
         filepath = os.path.abspath(os.path.join(directory, filename))
         filename = os.path.join(randname, filename)
-        with open(filepath, "w") as f:
+        with open(filepath, "w", encoding="utf8") as f:
             f.write(form)
         return filename
 
@@ -673,7 +676,7 @@ carefully and if the changes are ok, click on the following link:
                     guard.safe_insert(missing, unique=False,
                                       flags={"force-missing-obligatory": "ignore"})
                     inserted.append(ent)
-                except Exception as e:
+                except TransactionError as e:
                     DataModelProblems.evaluate_exception(e)
         if len(existing) > 0:
             info = "Identified the following existing entities:\n"
@@ -774,14 +777,13 @@ class FileCrawler(Crawler):
             path if path.endswith("/") else path + "/") + "**'"
         q_info = "Sending the following query: '" + query_str + "'\n"
         files = db.execute_query(query_str)
-        logger.info(
-            q_info + "Found {} files that need to be processed.".format(
-                len(files)))
+        logger.info(q_info + f"Found {len(files)} files that need to be processed.")
 
         return files
 
 
 class TableCrawler(Crawler):
+    """Crawler for table like data."""
 
     def __init__(self, table, unique_cols, recordtype, **kwargs):
         """
@@ -796,6 +798,7 @@ class TableCrawler(Crawler):
 
         # TODO I do not like this yet, but I do not see a better way so far.
         class ThisRowCF(RowCFood):
+            """CFood for rows."""
             def __init__(self, item):
                 super().__init__(item, unique_cols, recordtype)
 
@@ -821,7 +824,6 @@ def get_value(prop):
 
     if isinstance(prop.value, db.Entity):
         return prop.value.id
-    elif isinstance(prop.value, datetime):
+    if isinstance(prop.value, datetime):
         return prop.value.isoformat()
-    else:
-        return prop.value
+    return prop.value
-- 
GitLab