diff --git a/CHANGELOG.md b/CHANGELOG.md index 88ea70fbfcc9061579d4123b600634d86016944f..4fe401389b5201894f22ad7a829e820f01c8958f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,8 +19,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ZipFileConverter that opens zip files and exposes their contents as File and Directory structure elements. - `linkahead-crawler` script as alias for `caosdb-crawler`. -- New transformers of the form `cast_to_*` which allow casting variables to `int`, `float`, - `str` and `bool`. +- New transformers of the form `cast_to_*` which allow casting + variables to `int`, `float`, `str` and `bool`. +- Transformer function definition in the cfood support variable + substitutions now. ### Changed ### diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index 8e0efd94ef348e67b991708a3d5d7917e9dd32a9..33e29b02db429e3382248bbd80d2d00cd7b07c6b 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -36,10 +36,6 @@ from caosadvancedtools.models.parser import parse_model_from_yaml from jsonschema import ValidationError from linkahead.high_level_api import convert_to_python_object -from caoscrawler import scanner - -# from collections import OrderedDict - def load_json_schema_from_datamodel_yaml(filename: str) -> dict[str, dict]: """ @@ -153,7 +149,7 @@ def validate(records: list[db.Record], schemas: dict[str, dict]) -> list[tuple]: retval = [] for r in records: if len(r.parents) != 1: - raise RuntimeError( + raise NotImplementedError( "Schema validation is only supported if records have exactly one parent.") parname = r.parents[0].name if parname not in schemas: diff --git a/src/doc/converters/transform_functions.rst b/src/doc/converters/transform_functions.rst index 22df35c8521ea0d70b2ebf7b7c8bc7c52e176bd3..ecd47d2dc004c6f1382279901dfec2d96e0e4a2d 100644 --- a/src/doc/converters/transform_functions.rst +++ b/src/doc/converters/transform_functions.rst @@ -38,8 +38,33 @@ An example that splits the variable ``a`` and puts the generated list in ``b`` i Report: tags: $b -This splits the string in '$a' and stores the resulting list in '$b'. This is here used to add a -list valued property to the Report Record. +This splits the string in '$a' and stores the resulting list in +'$b'. This is here used to add a list valued property to the Report +Record. Note that from LinkAhead Crawler 0.11.0 onwards, the value of +``marker`` in the above example can also be read in from a variable in +the usual ``$`` notation: + +.. code-block:: yaml + + # ... variable ``separator`` is defined somewhere above this part, e.g., + # by reading a config file. + Experiment: + type: Dict + match: ".*" + transform: + param_split: + in: $a + out: $b + functions: + - split: + marker: $separator # Now the separator is read in from a + # variable, so we can, e.g., change from + # '|' to ';' without changing the cfood + # definition. + records: + Report: + tags: $b + There are a number of transform functions that are defined by default (see diff --git a/unittests/test_transformers.py b/unittests/test_transformers.py index 5a1a9cfacca1450158e7fb1ba5330f16eca75169..a2d227adc5b0c6a8f2f96cb054e1c7670e980e10 100644 --- a/unittests/test_transformers.py +++ b/unittests/test_transformers.py @@ -30,11 +30,9 @@ See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/107 import importlib from pathlib import Path -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock -import linkahead as db import pytest -import yaml from caoscrawler.converters import Converter, ListElementConverter from caoscrawler.scanner import create_transformer_registry, scan_directory from caoscrawler.stores import GeneralStore @@ -170,20 +168,22 @@ def test_cast_transformer_functions(): for val in ("True", "true", "False", "false"): assert type(cast_to_bool(val, {})) == bool if val[1] == "r": - assert cast_to_bool(val, {}) + assert cast_to_bool(val, {}) is True else: - assert not cast_to_bool(val, {}) + assert cast_to_bool(val, {}) is False for val_err in ("jaksdlfj", "0", 1): with pytest.raises(ValueError): cast_to_bool(val_err, {}) - assert not cast_to_bool(False, {}) - assert cast_to_bool(True, {}) + assert cast_to_bool(False, {}) is False + assert cast_to_bool(True, {}) is True assert cast_to_int("24", {}) == 24 assert cast_to_int(24.0, {}) == 24 assert cast_to_int(24, {}) == 24 + assert cast_to_int("-24", {}) == -24 with pytest.raises(ValueError): cast_to_int("24dsf", {}) + with pytest.raises(ValueError): cast_to_int("24.0", {}) == 24 assert cast_to_float("24", {}) == 24.0 @@ -194,3 +194,23 @@ def test_cast_transformer_functions(): cast_to_float("24dsf", {}) assert cast_to_str(24, {}) == "24" + + +def test_replace_variables(): + vals = GeneralStore() + vals["test"] = "with" + vals["a"] = "str_without_replacement" + conv = Mock() + conv.definition = {} + conv.definition["transform"] = { + "test": { + "in": "$a", + "out": "$a", + "functions": [ + {"replace": { + "remove": "without", + "insert": "$test" + }} + ]}} + Converter.apply_transformers(conv, vals, {"replace": replace}) + assert vals["a"] == "str_with_replacement"