diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index eb41325bd798126c4d06d92f6ea398eaa1ebbb72..b9070c121133fa8cb406d89f425f4d9674a0b2cf 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -23,7 +23,6 @@ # ** end header # - from __future__ import annotations import datetime @@ -33,6 +32,7 @@ import os import re import warnings from abc import ABCMeta, abstractmethod +from inspect import signature from string import Template from typing import Any, List, Optional, Tuple, Union @@ -322,9 +322,10 @@ class Converter(object, metaclass=ABCMeta): Parameters ---------- - definition: dict, stuffPlease refer to XX to learn about the structure that the definition dict must have + definition: dict, Please refer to ``src/doc/converters.rst`` to learn about the structure + that the definition dict must have. converter_registry: dict, A dictionary that contains converter names as keys and dicts as - values. Those value dicts have the keys 'converter' and 'package'. + values. Those value dicts have the keys 'converter' and 'package'. """ self.definition = definition @@ -444,7 +445,11 @@ class Converter(object, metaclass=ABCMeta): # Retrieve the function from the dictionary: tr_func = transformer_functions[tr_func_key] # Call the function: - out_value = tr_func(in_value, tr_func_params) + sig = signature(tr_func) + if len(sig.parameters) == 1 and len(tr_func_params) == 0: + out_value = tr_func(in_value) + else: + out_value = tr_func(in_value, tr_func_params) # The next in_value is the current out_value: in_value = out_value # If everything succeeded, store the final value in the general store: diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 25f72e56d460a2e0c8481ba7c5dc2818937ad4fa..18d722ed8c485dc0472344407f345a7a0e517b60 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -235,6 +235,8 @@ def initialize_converters(crawler_definition: dict, converter_registry: dict): continue elif key == "Converters": continue + elif key == "Transformers": + continue converters.append(Converter.converter_factory( value, key, converter_registry)) diff --git a/unittests/test_directories/test_transformers/cfood.yml b/unittests/test_directories/test_transformers/cfood.yml index 9208d62c717e2435417f2bda54592dbaa57c332f..9064bfad9e2abbb9bf494bec03e4e82b7f6065a7 100644 --- a/unittests/test_directories/test_transformers/cfood.yml +++ b/unittests/test_directories/test_transformers/cfood.yml @@ -1,5 +1,10 @@ # See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/107 +# +Transformers: + ceil: + function: quote + package: shlex RootDir: type: Directory @@ -30,3 +35,15 @@ RootDir: Day: $day_long DayShort: $day_short # just for checking, whether this variable remains DaySplit: $day_split # just for checking, whether this variable remains + Testfi: + type: File + match: ^(?P<no>(\d+ )*)$ + transform: + up: + in: $no + out: $no + functions: + - ceil: {} + records: + Number: + num: $no diff --git a/unittests/test_transformers.py b/unittests/test_transformers.py index 47b31d0b0173786d58b2e7169342dea9fb11a03e..06b089f79f1c2eb75ad9ca0fa116290a253a0666 100644 --- a/unittests/test_transformers.py +++ b/unittests/test_transformers.py @@ -56,18 +56,24 @@ def test_simple_transformer(): "cfood.yml") for r in records: - assert r.get_property("Day") is not None - assert r.get_property("DayShort") is not None - assert r.get_property("DayShort").value != "$day_short" - if r.get_property("DayShort").value == "Unk": - # This unkown folder should not lead to a replacement - assert r.get_property("Day").value == "Unk" - assert r.get_property("DaySplit").value == ["Unk"] - elif r.get_property("DayShort").value == "Mon": - assert r.get_property("Day").value == "Monday" - assert r.get_property("DaySplit").value == ["M", "n"] - elif r.get_property("DayShort").value == "Tue": - assert r.get_property("Day").value == "Tuesday" - assert r.get_property("DaySplit").value == ["Tue"] + if r.parents[0].name == "DayFolder": + assert r.get_property("Day") is not None + assert r.get_property("DayShort") is not None + assert r.get_property("DayShort").value != "$day_short" + if r.get_property("DayShort").value == "Unk": + # This unkown folder should not lead to a replacement + assert r.get_property("Day").value == "Unk" + assert r.get_property("DaySplit").value == ["Unk"] + elif r.get_property("DayShort").value == "Mon": + assert r.get_property("Day").value == "Monday" + assert r.get_property("DaySplit").value == ["M", "n"] + elif r.get_property("DayShort").value == "Tue": + assert r.get_property("Day").value == "Tuesday" + assert r.get_property("DaySplit").value == ["Tue"] + else: + raise RuntimeError("There is no other short version!") + elif r.parents[0].name == "Number": + assert r.get_property("num") is not None + assert r.get_property("num").value == "'12345 5 '" else: - raise RuntimeError("There is no other short version!") + raise RuntimeError("unkown error")