diff --git a/CHANGELOG.md b/CHANGELOG.md index 33fdff70f9af8d1c2174dc0ec297b08762fdeb63..6d15568e4ecff8c9028d063b47419ab2a8adbd4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +- `scanner.scan_structure_elements` now auto-generates the + `converter_registry` and the `registered_transformer_functions` from + the `crawler_definition` if none are given. Therefore, the + `converter_registry` argument is now optional. + ### Deprecated ### ### Removed ### ### Fixed ### + - A RecordType with multiple Parents no longer causes an error during collection of identifiables diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index af1f4173e95827606a02979ddd6d7fcd9f133271..cb8dfdf2bf65dba9fd206366722ad69ecfc251e4 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -486,7 +486,7 @@ def scan_directory(dirname: Union[str, list[str]], crawler_definition_path: str, def scan_structure_elements(items: Union[list[StructureElement], StructureElement], crawler_definition: dict, - converter_registry: dict, + converter_registry: Optional[dict] = None, restricted_path: Optional[list[str]] = None, debug_tree: Optional[DebugTree] = None, registered_transformer_functions: Optional[dict] = None) -> ( @@ -508,6 +508,15 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen Traverse the data tree only along the given path. When the end of the given path is reached, traverse the full tree as normal. See docstring of 'scanner' for more details. + converter_registry: dict, optional + Optional dictionary containing the converter definitions + needed for the crawler definition. If none is given, it will + be generated from the `crawler_definition`. Default is None. + registered_transformer_functions: dict, optional + Optional dictionary containing the transformer function + definitions needed for the crawler definition. If none is + given, it will be generated from the + `crawler_definition`. Default is None. Returns ------- @@ -519,6 +528,10 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen if not isinstance(items, list): items = [items] + if converter_registry is None: + converter_registry = create_converter_registry(crawler_definition) + if registered_transformer_functions is None: + registered_transformer_functions = create_transformer_registry(crawler_definition) # TODO: needs to be covered somewhere else # self.run_id = uuid.uuid1() converters = initialize_converters(crawler_definition, converter_registry)