diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index aecbda9c18dd506d62abfd6397ef9bad9e0823ae..51dc5615ee097e44c03e60df06239f89f8399f9a 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -23,6 +23,14 @@ # ** end header # +""" +TODO place +- The key is the name of the function to be looked up in the dictionary +of registered transformer functions. +- The value is a dictionary with key, value-assignments which will be +passed to the transformer function. +""" + from __future__ import annotations import datetime @@ -376,8 +384,7 @@ class Converter(object, metaclass=ABCMeta): raise RuntimeError("Condition does not match.") values.update(m) - def apply_transformers(self, values: GeneralStore, - transformer_functions: dict): + def apply_transformers(self, values: GeneralStore, transformer_functions: dict): """ Check if transformers are defined using the "transform" keyword. Then apply the transformers to the variables defined in GeneralStore "values". @@ -390,6 +397,11 @@ class Converter(object, metaclass=ABCMeta): transformer_functions: dict A dictionary of registered functions that can be used within this transformer block. + The keys of the dict are the function keys and the values the callable functions of the + form: + + def func(in_value: Any, in_parameters: dict) -> Any: + pass """ if "transform" in self.definition: @@ -421,23 +433,11 @@ class Converter(object, metaclass=ABCMeta): " of the function!") tr_func_key = list(tr_func_el.keys())[0] tr_func_params = tr_func_el[tr_func_key] - # These functions are a list of functions that need to be registered - # in the dictionary of registered transformer_functions. - # Each function is a dictionary: - # - The key is the name of the function to be looked up in the dictionary - # of registered transformer functions. - # - The value is a dictionary with key, value-assignments which will be - # passed to the transformer function. - # The transformer function needs to be of the form: - # - # def func(in_value: Any, in_parameters: dict) -> Any: - # pass - # if tr_func_key not in transformer_functions: raise RuntimeError("Unknown transformer function: {}".format(tr_func_key)) # Retrieve the function from the dictionary: - tr_func = transformer_functions[tr_func_key]["function"] + tr_func = transformer_functions[tr_func_key] # Call the function: out_value = tr_func(in_value, tr_func_params) # The next in_value is the current out_value: diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index fd82a896d88d6470cfe0883fcf85523892e01ae3..5b1d9e511fea0183a81e14c0077a9284efe8ed02 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -199,12 +199,13 @@ def create_transformer_registry(definition: dict): # Defaults for the transformer registry: with open(str(files('caoscrawler').joinpath('default_transformers.yml')), "r") as f: - transformer_registry: dict[str, dict[str, str]] = yaml.safe_load(f) + transformer_def: dict[str, dict[str, str]] = yaml.safe_load(f) + registry = {} # More transformers from definition file: if "Transformers" in definition: for key, entry in definition["Transformers"].items(): - transformer_registry[key] = { + transformer_def[key] = { "function": entry["function"], "package": entry["package"] } @@ -212,8 +213,8 @@ def create_transformer_registry(definition: dict): # Load modules and associate classes: for key, value in transformer_registry.items(): module = importlib.import_module(value["package"]) - value["function"] = getattr(module, value["function"]) - return transformer_registry + registry[key] = getattr(module, value["function"]) + return registry def initialize_converters(crawler_definition: dict, converter_registry: dict): @@ -284,12 +285,10 @@ def scanner(items: list[StructureElement], Each function is a dictionary: - The key is the name of the function to be looked up in the dictionary of registered transformer functions. - - The value is a dictionary with key, value-assignments which will be - passed to the transformer function. - The transformer function needs to be of the form: + - The value is the function which needs to be of the form: - def func(in_value: Any, in_parameters: dict) -> Any: - pass + def func(in_value: Any, in_parameters: dict) -> Any: + pass """ # This path_found variable stores wether the path given by restricted_path was found in the diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 3ee20dd8b0ded7ed66c24be6facfa12a6a7c7ef1..35851a7aaf5ec5b9991b951b3f4ea100eccdf77b 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -379,10 +379,7 @@ def test_apply_transformers(converter_registry): values["a"] = "a|b|c" # transformer_functions = create_transformer_registry(crawler_definition) - transformer_functions = { - "split": { - "function": split, - "package": "caoscrawler.transformer_functions"}} + transformer_functions = {"split": split} conv = ListElementConverter(definition=cfood_def, name='test', converter_registry=converter_registry)