From 9713d134031378812151f79d6893eced66dc1a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 18 Jan 2024 20:17:23 +0100 Subject: [PATCH] ENH: add new transform function: replace --- src/caoscrawler/default_transformers.yml | 3 +++ src/caoscrawler/transformer_functions.py | 11 +++++++++++ unittests/test_converters.py | 19 ++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml index 74a76ae7..1821d9dd 100644 --- a/src/caoscrawler/default_transformers.yml +++ b/src/caoscrawler/default_transformers.yml @@ -6,3 +6,6 @@ submatch: split: package: caoscrawler.transformer_functions function: split +replace: + package: caoscrawler.transformer_functions + function: split diff --git a/src/caoscrawler/transformer_functions.py b/src/caoscrawler/transformer_functions.py index 8901c2f5..eda9f3c2 100644 --- a/src/caoscrawler/transformer_functions.py +++ b/src/caoscrawler/transformer_functions.py @@ -50,3 +50,14 @@ def split(in_value: Any, in_parameters: dict): if not isinstance(in_value, str): raise RuntimeError("must be string") return in_value.split(in_parameters['marker']) + + +def replace(in_value: Any, in_parameters: dict): + """calls the string 'replace' function on the first argument and uses the value of the keys + 'remove' and 'insert' stored in the second argument + """ + if "remove" not in in_parameters or "insert" not in in_parameters: + raise RuntimeError("Mandatory parameter missing.") + if not isinstance(in_value, str): + raise RuntimeError("must be string") + return in_value.replace(in_parameters['remove'], in_parameters['insert']) diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 42b078eb..248c5e3b 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -51,7 +51,7 @@ from caoscrawler.structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, IntegerElement, ListElement, TextElement) -from caoscrawler.transformer_functions import split +from caoscrawler.transformer_functions import replace, split UNITTESTDIR = Path(__file__).parent @@ -400,6 +400,23 @@ def test_apply_transformers(converter_registry): assert values['a'] == ["a", "b", "c"] +def test_apply_replace(converter_registry): + cfood_def = {"type": 'ListElement', "match_name": ".*", + 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{ + 'replace': {'insert': ':', "remove": "_"}}]}}} + values = GeneralStore() + values["a"] = "16_45" + + # transformer_functions = create_transformer_registry(crawler_definition) + transformer_functions = {"replace": replace} + + conv = ListElementConverter(definition=cfood_def, name='test', + converter_registry=converter_registry) + + conv.apply_transformers(values, transformer_functions) + assert values['b'] == "16:45" + + def test_filter_children_of_directory(converter_registry, capsys): """Verify that children (i.e., files) in a directory are filtered or sorted correctly. """ test_dir = Directory("examples_filter_children", UNITTESTDIR / -- GitLab