From f547fa391a9a63b79764e29b82c4203ff7206289 Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Wed, 8 Mar 2023 14:20:39 +0100
Subject: [PATCH] MAINT: made utility and converter registry functions top
 level functions without references to self

---
 src/caoscrawler/scanner.py | 66 +++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index 66519aee..f2c05ab1 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -70,7 +70,7 @@ from .debug.debug_tree import (DebugTreeStructureElement,
 logger = logging.getLogger(__name__)
 
 
-def load_definition(self, crawler_definition_path: str):
+def load_definition(crawler_definition_path: str):
     """
     Load a cfood from a crawler definition defined by
     crawler definition path and validate it using cfood-schema.yml.
@@ -80,12 +80,12 @@ def load_definition(self, crawler_definition_path: str):
     with open(crawler_definition_path, "r") as f:
         crawler_definitions = list(yaml.safe_load_all(f))
 
-    crawler_definition = self._load_definition_from_yaml_dict(
+    crawler_definition = _load_definition_from_yaml_dict(
         crawler_definitions)
 
-    return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
+    return _resolve_validator_paths(crawler_definition, crawler_definition_path)
 
-def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
+def _load_definition_from_yaml_dict(crawler_definitions: list[dict]):
     """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
     contains either one or two documents.
 
@@ -131,7 +131,8 @@ def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
 
     return crawler_definition
 
-def _resolve_validator_paths(self, definition: dict, definition_path: str):
+
+def _resolve_validator_paths(definition: dict, definition_path: str):
     """Resolve path to validation files with respect to the file in which
     the crawler was defined.
 
@@ -151,7 +152,7 @@ def _resolve_validator_paths(self, definition: dict, definition_path: str):
                         f"Couldn't find validation file {definition[key]}")
         elif isinstance(value, dict):
             # Recursively resolve all validators
-            definition[key] = self._resolve_validator_paths(value, definition_path)
+            definition[key] = _resolve_validator_paths(value, definition_path)
 
     return definition
 
@@ -194,6 +195,35 @@ def create_converter_registry(definition: dict):
     return converter_registry
 
 
+def initialize_converters(crawler_definition: dict, converter_registry: dict):
+    """
+    takes the cfood as dict (`crawler_definition`) and creates the converter objects that
+    are defined on the highest level. Child Converters will in turn be created during the
+    initialization of the Converters.
+    """
+    converters = []
+
+    for key, value in crawler_definition.items():
+        # Definitions and Converters are reserved keywords
+        # on the top level of the yaml file.
+        # TODO: there should also be a top level keyword for the actual
+        #       CFood to avoid confusion between top level keywords
+        #       and the CFood.
+        if key == "Definitions":
+            continue
+        elif key == "Converters":
+            continue
+        converters.append(Converter.converter_factory(
+            value, key, converter_registry))
+
+    return converters
+
+
+# --------------------------------------------------------------------------------
+# Main scanning functions:
+# --------------------------------------------------------------------------------
+
+
 def crawl_directory(self, dirname: str, crawler_definition_path: str,
                     restricted_path: Optional[list[str]] = None):
     """ Crawl a single directory.
@@ -229,31 +259,7 @@ def crawl_directory(self, dirname: str, crawler_definition_path: str,
                         converter_registry,
                         restricted_path=restricted_path
                         )
-
     
-def initialize_converters(crawler_definition: dict, converter_registry: dict):
-    """
-    takes the cfood as dict (`crawler_definition`) and creates the converter objects that
-    are defined on the highest level. Child Converters will in turn be created during the
-    initialization of the Converters.
-    """
-    converters = []
-
-    for key, value in crawler_definition.items():
-        # Definitions and Converters are reserved keywords
-        # on the top level of the yaml file.
-        # TODO: there should also be a top level keyword for the actual
-        #       CFood to avoid confusion between top level keywords
-        #       and the CFood.
-        if key == "Definitions":
-            continue
-        elif key == "Converters":
-            continue
-        converters.append(Converter.converter_factory(
-            value, key, converter_registry))
-
-    return converters
-
 
 def start_crawling(self, items: Union[list[StructureElement], StructureElement],
                    crawler_definition: dict,
-- 
GitLab