Compare revisions

fbd39c72 · 01964b5b · 89bffd2b · 2e938d95 · 595594a2 · 2acd9529
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
-FROM debian:bullseye
+FROM debian:bookworm
 RUN apt-get update && \
    apt-get install \
    curl \
    git \
-    openjdk-11-jdk-headless \
+    openjdk-17-jdk-headless \
    python3-autopep8 \
    python3-pip \
    python3-pytest \
    python3-sphinx \
    tox \
    -y
-RUN pip3 install pylint recommonmark sphinx-rtd-theme
+RUN pip3 install --break-system-packages \
+  pylint \
+  recommonmark \
+  sphinx-rtd-theme \
+  ;
 COPY .docker/wait-for-it.sh /wait-for-it.sh
 ARG PYLIB
 ADD https://gitlab.indiscale.com/api/v4/projects/97/repository/commits/${PYLIB} \
    pylib_version.json
 RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git && \
-    cd caosdb-pylib && git checkout ${PYLIB} && pip3 install .
+    cd caosdb-pylib && git checkout ${PYLIB} && pip3 install --break-system-packages .
 ARG ADVANCED
 ADD https://gitlab.indiscale.com/api/v4/projects/104/repository/commits/${ADVANCED} \
    advanced_version.json
 RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git && \
-    cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install .[h5-crawler]
+    cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install --break-system-packages .[h5-crawler]
 COPY . /git

 # Delete .git because it is huge.
@@ -30,7 +34,7 @@ RUN rm -r /git/.git
 # Install pycaosdb.ini for the tests
 RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini

-RUN cd /git/ && pip3 install .
+RUN cd /git/ && pip3 install --break-system-packages .[h5-crawler,spss]

 WORKDIR /git/integrationtests
 # wait for server,

--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -113,32 +113,33 @@ info:
  script:
    - *env

-unittest_py3.9:
+unittest_py3.11:
  tags: [cached-dind]
  stage: test
  image: $CI_REGISTRY_IMAGE
  script:
-      - tox
+    - python3 -c "import sys; assert sys.version.startswith('3.11')"
+    - tox

-unittest_py3.7:
+unittest_py3.8:
  tags: [cached-dind]
  stage: test
-  image: python:3.7
+  image: python:3.8
  script: &python_test_script
    # install dependencies
    - pip install pytest pytest-cov
    # TODO: Use f-branch logic here
    - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
    - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
-    - pip install .[h5-crawler]
+    - pip install .[h5-crawler,spss]
    # actual test
    - caosdb-crawler --help
    - pytest --cov=caosdb -vv ./unittests

-unittest_py3.8:
+unittest_py3.9:
  tags: [cached-dind]
  stage: test
-  image: python:3.8
+  image: python:3.9
  script: *python_test_script

 unittest_py3.10:
@@ -147,12 +148,31 @@ unittest_py3.10:
  image: python:3.10
  script: *python_test_script

-unittest_py3.11:
+unittest_py3.12:
  tags: [cached-dind]
  stage: test
-  image: python:3.11
+  image: python:3.12
  script: *python_test_script
-  
+
+unittest_py3.13:
+  allow_failure: true
+  tags: [cached-dind]
+  stage: test
+  image: python:3.13-rc
+  script:
+    # TODO: Replace by '*python_test_script' as soon as 3.13 has been officially released.
+    # TODO Remove the "!" after 3.13 release, which serves as an xfail
+    - apt update && apt install -y cargo
+    # install dependencies
+    - pip install pytest pytest-cov
+    # TODO: Use f-branch logic here
+    - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
+    - (! pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev)
+    - (! pip install .[h5-crawler,spss])
+    # actual test
+    - (! caosdb-crawler --help)
+    - (! pytest --cov=caosdb -vv ./unittests)
+
 inttest:
  tags: [docker]
  services:
@@ -287,7 +307,8 @@ code-style:
    - job: build-testenv
      optional: true
  script:
-      - autopep8 -r --diff --exit-code .
+    - autopep8 --version
+    - autopep8 -r --diff --exit-code .
  allow_failure: true

 pylint:

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,18 +9,45 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added ###

+* Support for Python 3.12 and experimental support for 3.13
+* CFood macros now accept complex objects as values, not just strings.
+* More options for the `CSVTableConverter`
+* New converters:
+  * `DatetimeElementConverter`
+  * `SPSSConverter`
+* New scripts:
+  * `spss_to_datamodel`
+  * `csv_to_datamodel`
+* New transformer functions:
+  * `date_parse`
+  * `datetime_parse`
+
 ### Changed ###

+* CFood macros do not render everything into strings now.
+* Better internal handling of identifiable/reference resolving and merging of entities.  This also
+  includes more understandable output for users.
+* Better handling of missing imports, with nice messages for users.
+* No longer use configuration of advancedtools to set to and from email addresses
+
 ### Deprecated ###

 ### Removed ###

+* Support for Python 3.7
+
 ### Fixed ###

+* [93](https://gitlab.com/linkahead/linkahead-crawler/-/issues/93) cfood.yaml does not allow umlaut in $expression
+* [96](https://gitlab.com/linkahead/linkahead-crawler/-/issues/96) Do not fail silently on transaction errors
+
 ### Security ###

 ### Documentation ###

+* General improvement of the documentaion, in many small places.
+* The API documentation should now also include documentation of the constructors.
+
 ## [0.7.1] - 2024-03-21 ##

 ### Fixed ###
@@ -68,6 +95,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * The `identifiable_adapters.IdentifiableAdapter` uses entity ids (negative for
  entities that don't exist remotely) instead of entity objects for keeping
  track of references.
+* Log output is either written to $SHARED_DIR/ (when this variable is set) or just to the terminal.

 ### Deprecated ###

@@ -161,6 +189,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - ``add_prefix`` and ``remove_prefix`` arguments for the command line interface
  and the ``crawler_main`` function for the adding/removal of path prefixes when
  creating file entities.
+- More strict checking of `identifiables.yaml`.
+- Better error messages when server does not conform to expected data model.

 ### Changed ###


--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -32,7 +32,7 @@ import sys
 from argparse import RawTextHelpFormatter
 from pathlib import Path

-import caosdb as db
+import linkahead as db
 import pytest
 import yaml
 from caosadvancedtools.crawler import Crawler as OldCrawler
@@ -42,8 +42,8 @@ from caoscrawler.debug_tree import DebugTree
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.scanner import scan_directory
-from caosdb import EmptyUniqueQueryError
-from caosdb.utils.register_tests import clear_database, set_test_key
+from linkahead import EmptyUniqueQueryError
+from linkahead.utils.register_tests import clear_database, set_test_key

 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")


--- a/integrationtests/test_issues.py
+++ b/integrationtests/test_issues.py
@@ -16,20 +16,18 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
-from pytest import fixture, mark, raises
-
 import linkahead as db
-from linkahead.cached import cache_clear
 from caosadvancedtools.models.parser import parse_model_from_string
-
 from caoscrawler.crawl import Crawler
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.scanner import (create_converter_registry,
+                                 scan_structure_elements)
 from caoscrawler.structure_elements import DictElement
-
-from caoscrawler.scanner import create_converter_registry, scan_structure_elements
-
+from linkahead.cached import cache_clear
 from linkahead.utils.register_tests import clear_database, set_test_key
+from pytest import fixture, mark, raises
+
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")


@@ -171,8 +169,9 @@ def test_issue_83(clear_database):
        name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
    referencing2 = db.Record(name="Referencing2").add_parent(
        name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
-    referencing3 = db.Record(name="Referencing3").add_parent(name=referencing_type.name).add_property(
-        name=referenced_type.name, value=[ref_target1, ref_target2])
+    referencing3 = db.Record(name="Referencing3").add_parent(
+        name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1,
+                                                                                   ref_target2])

    records = db.Container().extend(
        [ref_target1, ref_target2, referencing1, referencing2, referencing3])

--- a/integrationtests/test_use_case_simple_presentation.py
+++ b/integrationtests/test_use_case_simple_presentation.py
@@ -27,12 +27,12 @@ import os
 import pytest
 from subprocess import run

-import caosdb as db
+import linkahead as db
 from caosadvancedtools.loadFiles import loadpath
-from caosdb.cached import cache_clear
+from linkahead.cached import cache_clear
 from caosadvancedtools.models import parser as parser
 from caoscrawler.crawl import crawler_main
-from caosdb.utils.register_tests import clear_database, set_test_key
+from linkahead.utils.register_tests import clear_database, set_test_key


 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")

--- a/setup.cfg
+++ b/setup.cfg
@@ -17,15 +17,15 @@ classifiers =
 package_dir =
            = src
 packages = find:
-python_requires = >=3.7
+python_requires = >=3.8
 install_requires =
    caosadvancedtools >= 0.7.0
    importlib-resources
-    importlib_metadata;python_version<'3.8'
    linkahead > 0.13.2
    odfpy #make optional
    packaging
    pandas
+    pyarrow  # Will be required by Pandas >= 3.0.
    pyyaml
    yaml-header-tools >= 0.2.1

@@ -40,8 +40,12 @@ per-file-ignores = __init__.py:F401
 [options.entry_points]
 console_scripts =
  caosdb-crawler = caoscrawler.crawl:main
+  spss_to_datamodel = caoscrawler.conv_impl.spss:spss_to_datamodel_main
+  csv_to_datamodel = caoscrawler.scripts.generators:csv_to_datamodel_main

 [options.extras_require]
 h5-crawler =
           h5py >= 3.8
           numpy
+spss =
+     pandas[spss]
--- a/src/caoscrawler/__init__.py
+++ b/src/caoscrawler/__init__.py
+from . import converters, utils
+try:
+    from .conv_impl.spss import SPSSConverter
+except ImportError as err:
+    SPSSConverter: type = utils.MissingImport(
+        name="SPSSConverter", hint="Try installing with the `spss` extra option.",
+        err=err)
 from .crawl import Crawler, SecurityMode
 from .version import CfoodRequiredVersionError, get_caoscrawler_version

 __version__ = get_caoscrawler_version()
+
+# Convenience members #########################################################
+# mypy: disable-error-code="attr-defined"
+converters.SPSSConverter = SPSSConverter
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -28,9 +28,12 @@ cfood:
          - Definitions
          - Dict
          - Date
+          - Datetime
          - JSONFile
+          - YAMLFile
          - CSVTableConverter
          - XLSXTableConverter
+          - SPSSFile
          - H5File
          - H5Dataset
          - H5Group

--- a/src/caoscrawler/conv_impl/__init__.py
+++ b/src/caoscrawler/conv_impl/__init__.py
--- a/src/caoscrawler/conv_impl/spss.py
+++ b/src/caoscrawler/conv_impl/spss.py
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Converter for SAV files (stored by SPSS)."""
+
+from __future__ import annotations  # Can be removed with 3.10.
+
+import argparse
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+import pyreadstat
+import yaml
+
+from .. import converters
+from ..stores import GeneralStore
+from ..structure_elements import (File, StructureElement)
+from typing import Optional, Any
+
+
+READSTAT_TYPES = {
+    "double": "DOUBLE",
+    "string": "TEXT",
+}
+ORIGINAL_TYPES = {
+    "EDATE8": "DATETIME",
+}
+
+
+class SPSSConverter(converters.TableConverter):
+    """Converter for SAV files (stored by SPSS)."""
+
+    def create_children(self, values: GeneralStore, element: StructureElement) -> list:
+        assert isinstance(element, File)
+        # The default dtype backend "numpy_nullable" does not handle dates well.
+        # Note that pandas.ArrowDtype is considered experimental (in Pandas 2.2).
+        df = pd.io.spss.read_spss(element.path, dtype_backend="pyarrow")
+        dtypes = read_column_types(element.path)
+
+        # Fix datetime columns
+        for name, dtype in dtypes.items():
+            if dtype != "DATETIME":
+                continue
+            col = df.loc[:, name]
+            col.fillna(np.nan, inplace=True)
+            col.replace([np.nan], [None], inplace=True)
+
+        return self._children_from_dataframe(df)
+
+
+def read_column_types(savfile: Optional[str] = None, meta: Optional[Any] = None) -> dict[str, str]:
+    """Read SAV file and return the column types.
+
+Optionally, take data from a previours reading.
+
+Parameters
+----------
+savfile : Optional[str]
+    The SAV file to read.
+
+meta : Optional
+    The meta data result from `pyreadstat.read_sav(...)`.
+
+Returns
+-------
+out : dict[str, str]
+    The column names and types.
+    """
+    if not meta:
+        _, meta = pyreadstat.read_sav(savfile, metadataonly=True)
+    elif savfile is not None:
+        raise ValueError("Only one of `savfile` and `meta` must be given.")
+    dtypes: dict[str, str] = {}
+    for name in meta.column_names:
+        datatype = ORIGINAL_TYPES.get(meta.original_variable_types[name],
+                                      READSTAT_TYPES[meta.readstat_variable_types[name]])
+        dtypes[name] = datatype
+    return dtypes
+
+
+def spss_to_yaml(savfile: str, yamlfile: str, cfood: Optional[str] = None) -> None:
+    """Parse the *.sav and create basic datamodel in ``yamlfile``.
+
+Parameters
+----------
+cfood: str
+  If given, also create a cfood skeleton.
+    """
+    _, meta = pyreadstat.read_sav(savfile, metadataonly=True)
+    dtypes = read_column_types(meta=meta)
+
+    cfood_str = """
+---
+metadata:
+  macros:
+  - !defmacro
+    # Simple column value -> property rule
+    name: ColumnValue
+    params:
+      name: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${name}: $$val
+  - !defmacro
+    # column value -> reference property
+    name: ColumnValueReference
+    params:
+      name: null
+      reftype: null  # RecordType of the reference
+      belongsto: BaseElement
+      type: TextElement  # References are always text, right?
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${reftype}:
+            name: $$val
+          ${belongsto}:
+            ${name}: $$${reftype}
+  - !defmacro
+    # Same as "ColumnValue", but also give name of property.
+    name: ColumnValuePropname
+    params:
+      name: null
+      propname: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${propname}: $$val
+---
+directory: # corresponds to the directory given to the crawler
+  type: Directory
+  match: .* # we do not care how it is named here
+  subtree:
+    # This is the file
+    thisfile:
+      type: SPSSFile
+      match: ".*sav"
+      subtree:
+        entry:
+          type: Dict
+          match: .* # Name is irrelevant
+          records:
+            MyParent:
+          subtree: !macro
+"""
+
+    enums: dict[str, list[str]] = {}
+    properties = OrderedDict()
+
+    for name in meta.column_names:
+        prop = {
+            "datatype": dtypes[name],
+        }
+        desc = meta.column_names_to_labels.get(name)
+        if desc and desc != name:
+            prop["description"] = desc
+        # Handle categorial variables
+        if var_label := meta.variable_to_label.get(name):
+            vvl = meta.variable_value_labels[name]
+            # reproducible (and sensible) order
+            label_values = [vvl[key] for key in sorted(vvl.keys())]
+            if label_values not in enums.values():
+                enums[var_label] = label_values
+            else:
+                var_label = [key for key, value in enums.items() if value == label_values][0]
+            prop["datatype"] = var_label
+        properties[name] = prop
+
+    output = f"""# auto-generated data model from file "{savfile}".
+# To insert a datamodel into LinkAhead, run:
+#
+# python3 -m caosadvancedtools.models.parser datamodel.yaml --sync
+
+"""
+
+    # Actual datamodel
+    output += """
+#########
+# Enums #
+#########
+
+"""
+    for name, values in enums.items():
+        output += f"""{name}:
+  description:
+  # possible values: {values}\n"""
+
+    output += ("""
+###############
+# RecordTypes #
+###############
+
+DummyRT:
+  description: Note: Change name and enter description.
+  recommended_properties:
+    """
+               + "    ".join(yaml.dump(dict(properties),  # from OrderedDict to dict
+                                       allow_unicode=True,
+                                       sort_keys=False).splitlines(keepends=True)))
+
+    # Experimental: Enum creation
+    output += """
+###############
+# Enum values #
+###############
+"""
+    for name, values in enums.items():
+        output += f"\n# ### {name} ###\n"
+        for value in values:
+            output += f"""
+{value}:
+  role: Record
+  inherit_from_suggested:
+    - {name}
+"""
+
+    with open(yamlfile, encoding="utf-8", mode="w") as myfile:
+        myfile.write(output)
+
+    if cfood:
+        defs_col_value: list[str] = []
+        defs_col_value_ref: list[str] = []
+        prefix = " " * 14
+        for name, propdef in properties.items():
+            def_str = prefix + f"- name: {name}\n"
+            dtype = None
+            reftype = None
+            defs = defs_col_value
+            # Which type?
+            if propdef["datatype"] == "DOUBLE":
+                dtype = "FloatElement"
+            elif propdef["datatype"] in ("TEXT", "DATETIME"):
+                dtype = None
+            else:
+                reftype = propdef["datatype"]
+                defs = defs_col_value_ref
+
+            # Append according to types:
+            if reftype:
+                def_str += prefix + f"  reftype: {reftype}\n"
+            if dtype:
+                def_str += prefix + f"  type: {dtype}\n"
+
+            # Store result
+            defs.append(def_str)
+            del defs
+
+        cfood_str += (prefix[2:] + "ColumnValue:\n" + "".join(defs_col_value)
+                      + prefix[2:] + "ColumnValueReference:\n" + "".join(defs_col_value_ref)
+                      )
+        with open(cfood, encoding="utf-8", mode="w") as myfile:
+            myfile.write(cfood_str)
+
+
+def _parse_arguments():
+    """Parse the arguments."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('-i', '--input', help="The *.sav file.", required=True)
+    parser.add_argument('-o', '--outfile', help="Yaml filename to save the result", required=True)
+    parser.add_argument('--cfood', help="Yaml filename to create cfood output in", required=False)
+
+    return parser.parse_args()
+
+
+def spss_to_datamodel_main():
+    """The main function of this script."""
+    args = _parse_arguments()
+    spss_to_yaml(savfile=args.input, yamlfile=args.outfile, cfood=args.cfood)
+    print(f"Written datamodel to: {args.outfile}")
+    if args.cfood:
+        print(f"Written cfood to: {args.cfood}")
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
-#!/usr/bin/env python3
 # encoding: utf-8
 #
-# ** header v3.0
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
 # Copyright (C) 2021 Henrik tom Wörden
-#               2021 Alexander Schlemmer
+# Copyright (C) 2021 Alexander Schlemmer
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -19,9 +19,8 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-# ** end header
-#
+
+"""Converters take structure elements and create Records and new structure elements from them."""

 from __future__ import annotations

@@ -34,7 +33,7 @@ import warnings
 from abc import ABCMeta, abstractmethod
 from inspect import signature
 from string import Template
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union

 import linkahead as db
 import pandas as pd
@@ -53,12 +52,16 @@ from .utils import has_parent
 # by the converters:
 SPECIAL_PROPERTIES = ("description", "name", "id", "path",
                      "file", "checksum", "size")
-SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$")
+ID_PATTERN = r"\D[.\w]*"
+SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>" + ID_PATTERN + r")(\})?$")
 logger = logging.getLogger(__name__)


 class CrawlerTemplate(Template):
-    braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)"
+    # This also adds a dot to the default pattern.
+    # See: https://docs.python.org/3/library/string.html#template-strings
+    # Default flags is re.IGNORECASE
+    braceidpattern = ID_PATTERN


 def _only_max(children_with_keys):
@@ -134,8 +137,8 @@ def replace_variables(propvalue: Any, values: GeneralStore):
    This function replaces variables in property values (and possibly other locations,
    where the crawler can replace cfood-internal variables).

-    If `propvalue` is a single variable name preceeded with a '$' (e.g. '$var' or '${var}'), then
-    the corresponding value stored in `values` is returned.
+    If ``propvalue`` is a single variable name preceeded by a ``$`` (e.g. ``$var`` or ``${var}``),
+    then the corresponding value stored in ``values`` is returned.
    In any other case the variable substitution is carried out as defined by string templates
    and a new string with the replaced variables is returned.
    """
@@ -160,16 +163,16 @@ def handle_value(value: Union[dict, str, list], values: GeneralStore):
    add as an additional property (multiproperty).

    Variable names (starting with a "$") are replaced by the corresponding value stored in the
-    `values` GeneralStore.
+    ``values`` GeneralStore.

 Parameters
 ----------

-value:
-  - if str, the value to be interpreted. E.g. "4", "hallo" or "$a" etc.
-  - if dict, must have keys "value" and "collection_mode". The returned tuple is directly
+value: Union[dict, str, list]
+  - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" etc.
+  - If *dict*, must have keys ``value`` and ``collection_mode``. The returned tuple is directly
    created from the corresponding values.
-  - if list, each element is checked for replacement and the resulting list will be used
+  - If *list*, each element is checked for replacement and the resulting list will be used
    as (list) value for the property

 Returns
@@ -181,15 +184,15 @@ out: tuple
    """
    # @review Florian Spreckelsen 2022-05-13

-    if type(value) == dict:
+    if isinstance(value, dict):
        if "value" not in value:
            # TODO: how do we handle this case? Just ignore?
            #       or disallow?
-            raise NotImplementedError()
+            raise NotImplementedError(f"This definition has no \"value\": {value}")
        propvalue = value["value"]
        # can be "single", "list" or "multiproperty"
        collection_mode = value["collection_mode"]
-    elif type(value) == str:
+    elif isinstance(value, str):
        propvalue = value
        collection_mode = "single"
        if propvalue.startswith("+"):
@@ -198,7 +201,7 @@ out: tuple
        elif propvalue.startswith("*"):
            collection_mode = "multiproperty"
            propvalue = propvalue[1:]
-    elif type(value) == list:
+    elif isinstance(value, list):
        # TODO: (for review)
        #       This is a bit dirty right now and needed for
        #       being able to directly set list values. Semantics is, however, a bit
@@ -209,7 +212,7 @@ out: tuple
        propvalue = list()
        for element in value:
            # Do the element-wise replacement only, when its type is string:
-            if type(element) == str:
+            if isinstance(element, str):
                propvalue.append(replace_variables(element, values))
            else:
                propvalue.append(element)
@@ -286,9 +289,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict
                    propvalue = os.path.normpath(propvalue)
                setattr(c_record, key.lower(), propvalue)
            else:
-
                if c_record.get_property(key) is None:
-
                    if collection_mode == "list":
                        c_record.add_property(name=key, value=[propvalue])
                    elif (collection_mode == "multiproperty" or
@@ -322,10 +323,13 @@ class Converter(object, metaclass=ABCMeta):

        Parameters
        ----------
-        definition: dict, Please refer to ``src/doc/converters.rst`` to learn about the structure
-                    that the definition dict must have.
-        converter_registry: dict, A dictionary that contains converter names as keys and dicts as
-                            values. Those value dicts have the keys 'converter' and 'package'.
+        definition: dict
+          Please refer to ``src/doc/converters.rst`` to learn about the structure that the
+          definition dict must have.
+        converter_registry: dict
+          A dictionary that contains converter names as keys and dicts as values. Those value dicts
+          have the keys 'converter', 'package' and 'class'.  'converter' is the class name,
+          'package' the module and 'class' the class instance of converters.
        """

        self.definition = definition
@@ -363,7 +367,7 @@ class Converter(object, metaclass=ABCMeta):

    @staticmethod
    def converter_factory(definition: dict, name: str, converter_registry: dict):
-        """creates a Converter instance of the appropriate class.
+        """Create a Converter instance of the appropriate class.

        The `type` key in the `definition` defines the Converter class which is being used.
        """
@@ -424,10 +428,11 @@ class Converter(object, metaclass=ABCMeta):
                pass
        """

-        if not "transform" in self.definition:
+        if "transform" not in self.definition:
            return
        for transformer_key, transformer in self.definition["transform"].items():
            in_value = replace_variables(transformer["in"], values)
+            out_value = in_value

            for tr_func_el in transformer["functions"]:
                if not isinstance(tr_func_el, dict):
@@ -460,13 +465,13 @@ class Converter(object, metaclass=ABCMeta):
            values[match.group('varname')] = out_value

    @abstractmethod
-    def create_children(self, values: GeneralStore,
-                        element: StructureElement):
+    def create_children(self, values: GeneralStore, element: StructureElement):
        pass

    def create_records(self, values: GeneralStore, records: RecordStore,
                       element: StructureElement):
        # TODO why is element passed but not used???
+        # ANSWER: because it might be used by overriding child classes.

        if "records" not in self.definition:
            return []
@@ -477,7 +482,7 @@ class Converter(object, metaclass=ABCMeta):
                              self.definition["records"])

    def filter_children(self, children_with_strings:
-                        List[Tuple[StructureElement, str]], expr: str,
+                        list[tuple[StructureElement, str]], expr: str,
                        group: str, rule: str):
        """Filter children according to regexp `expr` and `rule`."""

@@ -515,8 +520,8 @@ class Converter(object, metaclass=ABCMeta):
                                 result: Optional[dict]):
        """ Template for the debugging output for the match function """
        msg = "\n--------" + name + "-----------\n"
-        for re, ma in zip(regexp, matched):
-            msg += "matching reg:\t" + re + "\n"
+        for exp, ma in zip(regexp, matched):
+            msg += "matching reg:\t" + exp + "\n"
            msg += "matching val:\t" + ma + "\n"
            msg += "---------\n"
        if result is None:
@@ -620,7 +625,7 @@ class DirectoryConverter(Converter):

        element: A directory (of type Directory) which will be traversed.
        """
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []

        for name in sorted(os.listdir(element.path)):
            path = os.path.join(element.path, name)
@@ -660,7 +665,7 @@ class SimpleFileConverter(Converter):
 class FileConverter(SimpleFileConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use SimpleFileConverter."))
+            "This class is deprecated. Please use SimpleFileConverter."))
        super().__init__(*args, **kwargs)


@@ -693,12 +698,12 @@ class MarkdownFileConverter(SimpleFileConverter):
                "Error during the validation (yaml header cannot be read) of the markdown file "
                "located at the following node in the data structure:\n"
                "{}\nError:\n{}".format(path, err))
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []

        for name, entry in header.items():
-            if type(entry) == list:
+            if isinstance(entry, list):
                children.append(ListElement(name, entry))
-            elif type(entry) == str:
+            elif isinstance(entry, str):
                children.append(TextElement(name, entry))
            else:
                if generalStore is not None and self.name in generalStore:
@@ -713,7 +718,9 @@ class MarkdownFileConverter(SimpleFileConverter):
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                          msg_prefix=""):
    """Convert basic Python objects to the corresponding StructureElements"""
-    if isinstance(element, list):
+    if isinstance(element, StructureElement):
+        return element
+    elif isinstance(element, list):
        return ListElement(name, element)
    elif isinstance(element, dict):
        return DictElement(name, element)
@@ -814,14 +821,14 @@ class DictElementConverter(Converter):
 class DictConverter(DictElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictConverter."))
+            "This class is deprecated. Please use DictElementConverter."))
        super().__init__(*args, **kwargs)


 class DictDictElementConverter(DictElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictElementConverter."))
+            "This class is deprecated. Please use DictElementConverter."))
        super().__init__(*args, **kwargs)


@@ -886,7 +893,7 @@ out:
    """
    if "match_name" in definition:
        if "match" in definition:
-            raise RuntimeError(f"Do not supply both, 'match_name' and 'match'.")
+            raise RuntimeError("Do not supply both, 'match_name' and 'match'.")

        m1 = re.match(definition["match_name"], name)
        if m1 is None:
@@ -1009,7 +1016,7 @@ class BooleanElementConverter(_AbstractScalarValueElementConverter):
 class DictBooleanElementConverter(BooleanElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use BooleanElementConverter."))
+            "This class is deprecated. Please use BooleanElementConverter."))
        super().__init__(*args, **kwargs)


@@ -1025,7 +1032,7 @@ class FloatElementConverter(_AbstractScalarValueElementConverter):
 class DictFloatElementConverter(FloatElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use FloatElementConverter."))
+            "This class is deprecated. Please use FloatElementConverter."))
        super().__init__(*args, **kwargs)


@@ -1040,7 +1047,7 @@ class TextElementConverter(_AbstractScalarValueElementConverter):
    def __init__(self, definition, *args, **kwargs):
        if "match" in definition:
            raise ValueError("""
-The 'match' key will in future be used to match a potential name of a TextElement. Please use
+The 'match' key is used to match a potential name of a TextElement. Please use
 the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name.
 """)

@@ -1050,7 +1057,7 @@ the 'match_value' key to match the value of the TextElement and 'match_name' for
 class DictTextElementConverter(TextElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use TextElementConverter."))
+            "This class is deprecated. Please use TextElementConverter."))
        super().__init__(*args, **kwargs)


@@ -1066,7 +1073,7 @@ class IntegerElementConverter(_AbstractScalarValueElementConverter):
 class DictIntegerElementConverter(IntegerElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use IntegerElementConverter."))
+            "This class is deprecated. Please use IntegerElementConverter."))
        super().__init__(*args, **kwargs)


@@ -1076,7 +1083,7 @@ class ListElementConverter(Converter):
        # TODO: See comment on types and inheritance
        if not isinstance(element, ListElement):
            raise RuntimeError(
-                "This converter can only process DictListElements.")
+                "This converter can only process ListElements.")
        children: list[StructureElement] = []
        for index, list_element in enumerate(element.value):
            children.append(
@@ -1108,7 +1115,7 @@ class ListElementConverter(Converter):
 class DictListElementConverter(ListElementConverter):
    def __init__(self, *args, **kwargs):
        warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use ListElementConverter."))
+            "This class is deprecated. Please use ListElementConverter."))
        super().__init__(*args, **kwargs)


@@ -1122,15 +1129,22 @@ class TableConverter(Converter):
    The rows can be matched using a DictElementConverter.

    """
-    @abstractmethod
-    def get_options(self):
-        """
-        This method needs to be overwritten by the specific table converter to provide
-        information about the possible options.
+
+    def get_options(self) -> dict:
+        """Get specific options, e.g. from ``self.definitions``.
+
+This method may to be overwritten by the specific table converter to provide information about the
+possible options.  Implementors may use ``TableConverter._get_options(...)`` to get (and convert)
+options from ``self.definitions``.
+
+Returns
+-------
+out: dict
+  An options dict.
        """
-        pass
+        return {}

-    def _get_options(self, possible_options):
+    def _get_options(self, possible_options: list[tuple[str, Callable]]) -> dict:
        option_dict = dict()
        for opt_name, opt_conversion in possible_options:
            if opt_name in self.definition:
@@ -1158,6 +1172,14 @@ class TableConverter(Converter):
            return None
        return m.groupdict()

+    @staticmethod
+    def _children_from_dataframe(dataframe: pd.DataFrame):
+        child_elements = list()
+        for index, row in dataframe.iterrows():
+            child_elements.append(
+                DictElement(str(index), row.to_dict()))
+        return child_elements
+

 class XLSXTableConverter(TableConverter):
    """
@@ -1187,11 +1209,7 @@ class XLSXTableConverter(TableConverter):
        if not isinstance(element, File):
            raise RuntimeError("Element must be a File.")
        table = pd.read_excel(element.path, **self.get_options())
-        child_elements = list()
-        for index, row in table.iterrows():
-            child_elements.append(
-                DictElement(str(index), row.to_dict()))
-        return child_elements
+        return self._children_from_dataframe(table)


 class CSVTableConverter(TableConverter):
@@ -1216,22 +1234,19 @@ class CSVTableConverter(TableConverter):
        if not isinstance(element, File):
            raise RuntimeError("Element must be a File.")
        table = pd.read_csv(element.path, **self.get_options())
-        child_elements = list()
-        for index, row in table.iterrows():
-            child_elements.append(
-                DictElement(str(index), row.to_dict()))
-        return child_elements
+        return self._children_from_dataframe(table)


 class DateElementConverter(TextElementConverter):
    """allows to convert different text formats of dates to Python date objects.

    The text to be parsed must be contained in the "date" group. The format string can be supplied
-    under "dateformat" in the Converter definition. The library used is datetime so see its
+    under "date_format" in the Converter definition. The library used is datetime so see its
    documentation for information on how to create the format string.

    """

+    # TODO make `date` parameter name configurable
    def match(self, element: StructureElement):
        matches = super().match(element)
        if matches is not None and "date" in matches:
@@ -1240,3 +1255,24 @@ class DateElementConverter(TextElementConverter):
                self.definition["date_format"] if "date_format" in self.definition else "%Y-%m-%d"
            ).date()})
        return matches
+
+
+class DatetimeElementConverter(TextElementConverter):
+    """Convert text so that it is formatted in a way that LinkAhead can understand it.
+
+The text to be parsed must be in the ``val`` parameter. The format string can be supplied in the
+``datetime_format`` node. This class uses the ``datetime`` module, so ``datetime_format`` must
+follow this specificaton:
+https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+
+    """
+
+    # TODO make `val` parameter name configurable
+    def match(self, element: StructureElement):
+        matches = super().match(element)
+        if matches is not None and "val" in matches:
+            fmt_default = "%Y-%m-%dT%H:%M:%S"
+            fmt = self.definition.get("datetime_format", fmt_default)
+            dt_str = datetime.datetime.strptime(matches["val"], fmt).strftime(fmt_default)
+            matches.update({"val": dt_str})
+        return matches
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
--- a/src/caoscrawler/default_converters.yml
+++ b/src/caoscrawler/default_converters.yml
@@ -8,6 +8,9 @@ BooleanElement:
 Date:
  converter: DateElementConverter
  package: caoscrawler.converters
+Datetime:
+  converter: DatetimeElementConverter
+  package: caoscrawler.converters
 Dict:
  converter: DictElementConverter
  package: caoscrawler.converters
@@ -24,7 +27,7 @@ TextElement:
  converter: TextElementConverter
  package: caoscrawler.converters

-  
+
 DictDictElement:  # deprecated
  converter: DictElementConverter
  package: caoscrawler.converters
@@ -60,7 +63,7 @@ File:  # deprecated
  converter: SimpleFileConverter
  package: caoscrawler.converters

-  
+
 SimpleFile:
  converter: SimpleFileConverter
  package: caoscrawler.converters
@@ -81,6 +84,10 @@ CSVTableConverter:
  converter: CSVTableConverter
  package: caoscrawler.converters

+SPSSFile:
+  converter: SPSSConverter
+  package: caoscrawler.converters
+
 XLSXTableConverter:
  converter: XLSXTableConverter
  package: caoscrawler.converters
--- a/src/caoscrawler/default_transformers.yml
+++ b/src/caoscrawler/default_transformers.yml
-
+# Lookup table for matching functions and cfood yaml node names.

 submatch:
  package: caoscrawler.transformer_functions
@@ -9,3 +9,9 @@ split:
 replace:
  package: caoscrawler.transformer_functions
  function: replace
+date_parse:
+  package: caoscrawler.transformer_functions
+  function: date_parse
+datetime_parse:
+  package: caoscrawler.transformer_functions
+  function: datetime_parse
--- a/src/caoscrawler/exceptions.py
+++ b/src/caoscrawler/exceptions.py
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+class ForbiddenTransaction(Exception):
+    """Thrown if an transactions is needed that is not allowed.
+    For example an update of an entity if the security level is INSERT
+    """
+    pass
+
+
+class ImpossibleMergeError(Exception):
+    """Thrown if due to identifying information, two SyncNodes  or two Properties of SyncNodes
+    should be merged, but there is conflicting information that prevents this.
+    """
+
+    def __init__(self, *args, pname, values, **kwargs):
+        self.pname = pname
+        self.values = values
+        super().__init__(self, *args, **kwargs)
+
+
+class InvalidIdentifiableYAML(Exception):
+    """Thrown if the identifiable definition is invalid."""
+    pass
+
+
+class MissingIdentifyingProperty(Exception):
+    """Thrown if a SyncNode does not have the properties required by the corresponding registered
+    identifiable
+    """
+    pass
+
+
+class MissingRecordType(Exception):
+    """Thrown if an record type can not be found although it is expected that it exists on the
+    server.
+    """
+    pass
+
+
+class MissingReferencingEntityError(Exception):
+    """Thrown if the identifiable requires that some entity references the given entity but there
+    is no such reference """
+
+    def __init__(self, *args, rts=None, **kwargs):
+        self.rts = rts
+        super().__init__(self, *args, **kwargs)
--- a/src/caoscrawler/hdf5_converter.py
+++ b/src/caoscrawler/hdf5_converter.py
@@ -18,6 +18,8 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #

+from typing import Optional
+
 try:
    import h5py
 except ModuleNotFoundError:
@@ -94,8 +96,8 @@ def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
    raise ValueError("The given element must be either a HDF5 Group or Dataset object.")


-def convert_basic_element_with_nd_array(value, name: str = None,
-                                        internal_path: str = None, msg_prefix: str = ""):
+def convert_basic_element_with_nd_array(value, name: Optional[str] = None,
+                                        internal_path: Optional[str] = None, msg_prefix: str = ""):
    """Convert a given object either to an ndarray structure element or to a
    basic scalar structure element.


--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
 #!/usr/bin/env python3
 # encoding: utf-8
 #
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2022 Henrik tom Wörden
 #
@@ -20,23 +20,27 @@
 #

 from __future__ import annotations
-import linkahead as db
-from datetime import datetime
+
 import json
-from hashlib import sha256
-from typing import Union
 import logging
+from datetime import datetime
+from hashlib import sha256
+from typing import Optional, Union
+
+import linkahead as db
+
+from .exceptions import MissingIdentifyingProperty
+from .sync_node import SyncNode

 logger = logging.getLogger(__name__)


 class Identifiable():
    """
-    The fingerprint of a Record in CaosDB.
+    The fingerprint of a Record in LinkAhead.

-    This class contains the information that is used by the CaosDB Crawler to identify Records.
-    On one hand, this can be the ID or a Record or the path of a File.
-    On the other hand, in order to check whether a Record exits in the CaosDB Server, a query can
+    This class contains the information that is used by the LinkAhead Crawler to identify Records.
+    In order to check whether a Record exits in the LinkAhead Server, a query can
    be created using the information contained in the Identifiable.

    Parameters
@@ -46,23 +50,22 @@ class Identifiable():
    properties: dict, keys are names of Properties; values are Property values
                Note, that lists are not checked for equality but are interpreted as multiple
                conditions for a single Property.
-    path: str, In case of files: The path where the file is stored.
    backrefs: list, TODO future
    """

-    def __init__(self, record_id: int = None, path: str = None, record_type: str = None,
-                 name: str = None, properties: dict = None,
-                 backrefs: list[Union[int, str]] = None):
-        if (record_id is None and path is None and name is None
+    def __init__(self, record_id: Optional[int] = None, record_type: Optional[str] = None,
+                 name: Optional[str] = None, properties: Optional[dict] = None,
+                 backrefs: Optional[list[Union[int, str]]] = None):
+        if (record_id is None and name is None
                and (backrefs is None or len(backrefs) == 0)
                and (properties is None or len(properties) == 0)):
-            raise ValueError("There is no identifying information. You need to add a path or "
-                             "properties or other identifying attributes.")
+            raise ValueError(
+                "There is no identifying information. You need to add "
+                "properties or other identifying attributes.")
        if properties is not None and 'name' in [k.lower() for k in properties.keys()]:
            raise ValueError("Please use the separete 'name' keyword instead of the properties "
                             "dict for name")
        self.record_id = record_id
-        self.path = path
        self.record_type = record_type
        self.name = name
        if name == "":
@@ -77,24 +80,21 @@ class Identifiable():
    def get_representation(self) -> str:
        return sha256(Identifiable._create_hashable_string(self).encode('utf-8')).hexdigest()

-    @staticmethod
+    @ staticmethod
    def _value_representation(value) -> str:
        """returns the string representation of property values to be used in the hash function

-        The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities
-        (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int
-        and str.
+        The string is the LinkAhead ID in case of SyncNode objects (SyncNode objects must have an ID)
+        and the string representation of None, bool, float, int, datetime and str.
        """

        if value is None:
            return "None"
-        elif isinstance(value, db.File):
-            return str(value.path)
-        elif isinstance(value, db.Entity):
+        elif isinstance(value, SyncNode):
            if value.id is not None:
                return str(value.id)
            else:
-                return "PyID=" + str(id(value))
+                raise RuntimeError("Python Entity (SyncNode) without ID not allowed")
        elif isinstance(value, list):
            return "[" + ", ".join([Identifiable._value_representation(el) for el in value]) + "]"
        elif (isinstance(value, str) or isinstance(value, int) or isinstance(value, float)
@@ -103,7 +103,7 @@ class Identifiable():
        else:
            raise ValueError(f"Unknown datatype of the value: {value}")

-    @staticmethod
+    @ staticmethod
    def _create_hashable_string(identifiable: Identifiable) -> str:
        """
        creates a string from the attributes of an identifiable that can be hashed
@@ -120,27 +120,20 @@ class Identifiable():
        return rec_string

    def __eq__(self, other) -> bool:
-        """
-        Identifiables are equal if they belong to the same Record. Since ID and path are on their
-        own enough to identify the Record it is sufficient if those attributes are equal.
-        1. both IDs are set (not None)  -> equal if IDs are equal
-        2. both paths are set (not None)  -> equal if paths are equal
-        3. equal if attribute representations are equal
-        """
+        """ Identifiables are equal if they share the same ID or if the representation is equal """
        if not isinstance(other, Identifiable):
            raise ValueError("Identifiable can only be compared to other Identifiable objects.")
-        elif self.record_id is not None and other.record_id is not None:
+        if self.record_id is not None and other.record_id is not None:
            return self.record_id == other.record_id
-        elif self.path is not None and other.path is not None:
-            return self.path == other.path
        elif self.get_representation() == other.get_representation():
            return True
        else:
            return False

    def __repr__(self):
-        pstring = json.dumps(self.properties)
+        """ deterministic text representation of the identifiable """
+        pstring = json.dumps({k: str(v) for k, v in self.properties.items()})
        return (f"{self.__class__.__name__} for RT {self.record_type}: id={self.record_id}; "
-                f"name={self.name}\n\tpath={self.path}\n"
+                f"name={self.name}\n"
                f"\tproperties:\n{pstring}\n"
                f"\tbackrefs:\n{self.backrefs}")
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
--- a/src/caoscrawler/macros/macro_yaml_object.py
+++ b/src/caoscrawler/macros/macro_yaml_object.py
@@ -25,12 +25,17 @@
 # Function to expand a macro in yaml
 # A. Schlemmer, 05/2022

+import re
 from dataclasses import dataclass
 from typing import Any, Dict
 from copy import deepcopy
 from string import Template


+_SAFE_SUBST_PAT = re.compile(r"^\$(?P<key>\w+)$")
+_SAFE_SUBST_PAT_BRACES = re.compile(r"^\$\{(?P<key>\w+)}$")
+
+
 @dataclass
 class MacroDefinition:
    """
@@ -53,6 +58,12 @@ def substitute(propvalue, values: dict):
    Substitution of variables in strings using the variable substitution
    library from python's standard library.
    """
+    # Simple matches are simply replaced by the raw dict entry.
+    if match := (_SAFE_SUBST_PAT.fullmatch(propvalue)
+                 or _SAFE_SUBST_PAT_BRACES.fullmatch(propvalue)):
+        key = match.group("key")
+        if key in values:
+            return values[key]
    propvalue_template = Template(propvalue)
    return propvalue_template.safe_substitute(**values)
No results found