From 3c32b1f0222621b7bfd173b5a6f4c7b195974bea Mon Sep 17 00:00:00 2001
From: Florian Spreckelsen <f.spreckelsen@indiscale.com>
Date: Mon, 24 Mar 2025 14:58:49 +0100
Subject: [PATCH] FEAT(scanner): Auto-generate converter_registry and
 registered_transformer_functions

---
 CHANGELOG.md               |  6 ++++++
 src/caoscrawler/scanner.py | 15 ++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 33fdff70..6d15568e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,11 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed ###
 
+- `scanner.scan_structure_elements` now auto-generates the
+  `converter_registry` and the `registered_transformer_functions` from
+  the `crawler_definition` if none are given. Therefore, the
+  `converter_registry` argument is now optional.
+
 ### Deprecated ###
 
 ### Removed ###
 
 ### Fixed ###
+
 - A RecordType with multiple Parents no longer causes an error during
   collection of identifiables
 
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index af1f4173..cb8dfdf2 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -486,7 +486,7 @@ def scan_directory(dirname: Union[str, list[str]], crawler_definition_path: str,
 
 def scan_structure_elements(items: Union[list[StructureElement], StructureElement],
                             crawler_definition: dict,
-                            converter_registry: dict,
+                            converter_registry: Optional[dict] = None,
                             restricted_path: Optional[list[str]] = None,
                             debug_tree: Optional[DebugTree] = None,
                             registered_transformer_functions: Optional[dict] = None) -> (
@@ -508,6 +508,15 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
          Traverse the data tree only along the given path. When the end of the
          given path is reached, traverse the full tree as normal. See docstring
          of 'scanner' for more details.
+    converter_registry: dict, optional
+         Optional dictionary containing the converter definitions
+         needed for the crawler definition. If none is given, it will
+         be generated from the `crawler_definition`. Default is None.
+    registered_transformer_functions: dict, optional
+         Optional dictionary containing the transformer function
+         definitions needed for the crawler definition. If none is
+         given, it will be generated from the
+         `crawler_definition`. Default is None.
 
     Returns
     -------
@@ -519,6 +528,10 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
     if not isinstance(items, list):
         items = [items]
 
+    if converter_registry is None:
+        converter_registry = create_converter_registry(crawler_definition)
+    if registered_transformer_functions is None:
+        registered_transformer_functions = create_transformer_registry(crawler_definition)
     # TODO: needs to be covered somewhere else
     # self.run_id = uuid.uuid1()
     converters = initialize_converters(crawler_definition, converter_registry)
-- 
GitLab