diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml index 1821d9dd1c3cc3d8b982cd0e5f5eb3cfa07b3150..d0ad23912176bdfbf2446aa6e04bd7fa6b858777 100644 --- a/src/caoscrawler/default_transformers.yml +++ b/src/caoscrawler/default_transformers.yml @@ -8,4 +8,4 @@ split: function: split replace: package: caoscrawler.transformer_functions - function: split + function: replace diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 67c93c74e4011bc402817b79a660a8c06ba38800..52ece13dc2269a3e3b16e6378166e91b084f4a7c 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -45,8 +45,8 @@ from caoscrawler.converters import (Converter, ConverterValidationError, handle_value, replace_variables) from caoscrawler.crawl import Crawler from caoscrawler.scanner import (_load_definition_from_yaml_dict, - create_converter_registry, load_definition, - create_transformer_registry) + create_converter_registry, + create_transformer_registry, load_definition) from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, @@ -401,39 +401,6 @@ def test_apply_transformers(converter_registry): assert values['a'] == ["a", "b", "c"] -def test_apply_replace(converter_registry): - cfood_def = {"type": 'ListElement', "match_name": ".*", - 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{ - 'replace': {'insert': ':', "remove": "_"}}]}}} - values = GeneralStore() - values["a"] = "16_45" - - # transformer_functions = create_transformer_registry(crawler_definition) - transformer_functions = {"replace": replace} - - conv = ListElementConverter(definition=cfood_def, name='test', - converter_registry=converter_registry) - - conv.apply_transformers(values, transformer_functions) - assert values['b'] == "16:45" - -def test_apply_replace_from_def(converter_registry): - cfood_def = {"type": 'ListElement', "match_name": ".*", - 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{ - 'replace': {'insert': ':', "remove": "_"}}]}}} - values = GeneralStore() - values["a"] = "16_45" - - transformer_functions = create_transformer_registry({}) - # transformer_functions = {"replace": replace} - - conv = ListElementConverter(definition=cfood_def, name='test', - converter_registry=converter_registry) - - conv.apply_transformers(values, transformer_functions) - assert values['b'] == "16:45" - - def test_filter_children_of_directory(converter_registry, capsys): """Verify that children (i.e., files) in a directory are filtered or sorted correctly. """ test_dir = Directory("examples_filter_children", UNITTESTDIR / diff --git a/unittests/test_transformers.py b/unittests/test_transformers.py index ac530c9147fd7a4c86fa2ef668b6366a722935b0..02d932d13cc3fad52048b08e2b9fe56f11db2ae7 100644 --- a/unittests/test_transformers.py +++ b/unittests/test_transformers.py @@ -28,6 +28,7 @@ Currently, this is under development. See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/107 """ +import importlib from functools import partial from pathlib import Path from tempfile import NamedTemporaryFile @@ -36,10 +37,10 @@ from unittest.mock import MagicMock, Mock, patch import caosdb as db import pytest import yaml -from caoscrawler.scanner import (create_converter_registry, load_definition, - scan_directory, scan_structure_elements) -from caoscrawler.structure_elements import (DictElement, DictListElement, - DictTextElement, File) +from caoscrawler.converters import Converter, ListElementConverter +from caoscrawler.scanner import create_transformer_registry, scan_directory +from caoscrawler.stores import GeneralStore +from caoscrawler.transformer_functions import replace, split from pytest import raises UNITTESTDIR = Path(__file__).parent @@ -79,3 +80,69 @@ def test_simple_transformer(): else: # unkown error, something wrong with test directories assert False + + +@pytest.fixture +def converter_registry(): + converter_registry: dict[str, dict[str, str]] = { + "Directory": { + "converter": "DirectoryConverter", + "package": "caoscrawler.converters"}, + "MarkdownFile": { + "converter": "MarkdownFileConverter", + "package": "caoscrawler.converters"}, + "Date": { + "converter": "DateElementConverter", + "package": "caoscrawler.converters"}, + "DictElement": { + "converter": "DictElementConverter", + "package": "caoscrawler.converters"}, + "TextElement": { + "converter": "TextElementConverter", + "package": "caoscrawler.converters"}, + "ListElement": { + "converter": "ListElementConverter", + "package": "caoscrawler.converters"}, + "JSONFile": { + "converter": "JSONFileConverter", + "package": "caoscrawler.converters"}, + } + + for key, value in converter_registry.items(): + module = importlib.import_module(value["package"]) + value["class"] = getattr(module, value["converter"]) + return converter_registry + + +def test_apply_replace(converter_registry): + cfood_def = {"type": 'ListElement', "match_name": ".*", + 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{ + 'replace': {'insert': ':', "remove": "_"}}]}}} + values = GeneralStore() + values["a"] = "16_45" + + # transformer_functions = create_transformer_registry(crawler_definition) + transformer_functions = {"replace": replace} + + conv = ListElementConverter(definition=cfood_def, name='test', + converter_registry=converter_registry) + + conv.apply_transformers(values, transformer_functions) + assert values['b'] == "16:45" + + +def test_apply_replace_from_def(converter_registry): + cfood_def = {"type": 'ListElement', "match_name": ".*", + 'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{ + 'replace': {'insert': ':', "remove": "_"}}]}}} + values = GeneralStore() + values["a"] = "16_45" + + transformer_functions = create_transformer_registry({}) + # transformer_functions = {"replace": replace} + + conv = ListElementConverter(definition=cfood_def, name='test', + converter_registry=converter_registry) + + conv.apply_transformers(values, transformer_functions) + assert values['b'] == "16:45"