From 9713d134031378812151f79d6893eced66dc1a2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 18 Jan 2024 20:17:23 +0100
Subject: [PATCH] ENH: add new transform function: replace

---
 src/caoscrawler/default_transformers.yml |  3 +++
 src/caoscrawler/transformer_functions.py | 11 +++++++++++
 unittests/test_converters.py             | 19 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml
index 74a76ae7..1821d9dd 100644
--- a/src/caoscrawler/default_transformers.yml
+++ b/src/caoscrawler/default_transformers.yml
@@ -6,3 +6,6 @@ submatch:
 split:
   package: caoscrawler.transformer_functions
   function: split
+replace:
+  package: caoscrawler.transformer_functions
+  function: split
diff --git a/src/caoscrawler/transformer_functions.py b/src/caoscrawler/transformer_functions.py
index 8901c2f5..eda9f3c2 100644
--- a/src/caoscrawler/transformer_functions.py
+++ b/src/caoscrawler/transformer_functions.py
@@ -50,3 +50,14 @@ def split(in_value: Any, in_parameters: dict):
     if not isinstance(in_value, str):
         raise RuntimeError("must be string")
     return in_value.split(in_parameters['marker'])
+
+
+def replace(in_value: Any, in_parameters: dict):
+    """calls the string 'replace' function on the first argument and uses the value of the keys
+    'remove' and 'insert' stored in the second argument
+    """
+    if "remove" not in in_parameters or "insert" not in in_parameters:
+        raise RuntimeError("Mandatory parameter missing.")
+    if not isinstance(in_value, str):
+        raise RuntimeError("must be string")
+    return in_value.replace(in_parameters['remove'], in_parameters['insert'])
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 42b078eb..248c5e3b 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -51,7 +51,7 @@ from caoscrawler.structure_elements import (BooleanElement, DictElement,
                                             Directory, File, FloatElement,
                                             IntegerElement, ListElement,
                                             TextElement)
-from caoscrawler.transformer_functions import split
+from caoscrawler.transformer_functions import replace, split
 
 UNITTESTDIR = Path(__file__).parent
 
@@ -400,6 +400,23 @@ def test_apply_transformers(converter_registry):
     assert values['a'] == ["a", "b", "c"]
 
 
+def test_apply_replace(converter_registry):
+    cfood_def = {"type": 'ListElement', "match_name": ".*",
+                 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{
+                     'replace': {'insert': ':', "remove": "_"}}]}}}
+    values = GeneralStore()
+    values["a"] = "16_45"
+
+    # transformer_functions  = create_transformer_registry(crawler_definition)
+    transformer_functions = {"replace": replace}
+
+    conv = ListElementConverter(definition=cfood_def, name='test',
+                                converter_registry=converter_registry)
+
+    conv.apply_transformers(values, transformer_functions)
+    assert values['b'] == "16:45"
+
+
 def test_filter_children_of_directory(converter_registry, capsys):
     """Verify that children (i.e., files) in a directory are filtered or sorted correctly. """
     test_dir = Directory("examples_filter_children", UNITTESTDIR /
-- 
GitLab