diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 539ac0d4e70bfbde2f630d4254cacc7419105611..1e9763f3496c9dca6cc33e6ba8217a654bed487e 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -1,27 +1,31 @@
-FROM debian:bullseye
+FROM debian:bookworm
 RUN apt-get update && \
     apt-get install \
     curl \
     git \
-    openjdk-11-jdk-headless \
+    openjdk-17-jdk-headless \
     python3-autopep8 \
     python3-pip \
     python3-pytest \
     python3-sphinx \
     tox \
     -y
-RUN pip3 install pylint recommonmark sphinx-rtd-theme
+RUN pip3 install --break-system-packages \
+  pylint \
+  recommonmark \
+  sphinx-rtd-theme \
+  ;
 COPY .docker/wait-for-it.sh /wait-for-it.sh
 ARG PYLIB
 ADD https://gitlab.indiscale.com/api/v4/projects/97/repository/commits/${PYLIB} \
     pylib_version.json
 RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git && \
-    cd caosdb-pylib && git checkout ${PYLIB} && pip3 install .
+    cd caosdb-pylib && git checkout ${PYLIB} && pip3 install --break-system-packages .
 ARG ADVANCED
 ADD https://gitlab.indiscale.com/api/v4/projects/104/repository/commits/${ADVANCED} \
     advanced_version.json
 RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git && \
-    cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install .[h5-crawler]
+    cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install --break-system-packages .[h5-crawler]
 COPY . /git
 
 # Delete .git because it is huge.
@@ -30,7 +34,7 @@ RUN rm -r /git/.git
 # Install pycaosdb.ini for the tests
 RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini
 
-RUN cd /git/ && pip3 install .
+RUN cd /git/ && pip3 install --break-system-packages .[h5-crawler,spss]
 
 WORKDIR /git/integrationtests
 # wait for server,
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 879291320a7a715c10113f850a9f43f9465a7196..8812abacc0ef157c418e8f658a4fa7261bb04743 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -113,32 +113,33 @@ info:
   script:
     - *env
 
-unittest_py3.9:
+unittest_py3.11:
   tags: [cached-dind]
   stage: test
   image: $CI_REGISTRY_IMAGE
   script:
-      - tox
+    - python3 -c "import sys; assert sys.version.startswith('3.11')"
+    - tox
 
-unittest_py3.7:
+unittest_py3.8:
   tags: [cached-dind]
   stage: test
-  image: python:3.7
+  image: python:3.8
   script: &python_test_script
     # install dependencies
     - pip install pytest pytest-cov
     # TODO: Use f-branch logic here
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
-    - pip install .[h5-crawler]
+    - pip install .[h5-crawler,spss]
     # actual test
     - caosdb-crawler --help
     - pytest --cov=caosdb -vv ./unittests
 
-unittest_py3.8:
+unittest_py3.9:
   tags: [cached-dind]
   stage: test
-  image: python:3.8
+  image: python:3.9
   script: *python_test_script
 
 unittest_py3.10:
@@ -147,12 +148,31 @@ unittest_py3.10:
   image: python:3.10
   script: *python_test_script
 
-unittest_py3.11:
+unittest_py3.12:
   tags: [cached-dind]
   stage: test
-  image: python:3.11
+  image: python:3.12
   script: *python_test_script
-  
+
+unittest_py3.13:
+  allow_failure: true
+  tags: [cached-dind]
+  stage: test
+  image: python:3.13-rc
+  script:
+    # TODO: Replace by '*python_test_script' as soon as 3.13 has been officially released.
+    # TODO Remove the "!" after 3.13 release, which serves as an xfail
+    - apt update && apt install -y cargo
+    # install dependencies
+    - pip install pytest pytest-cov
+    # TODO: Use f-branch logic here
+    - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
+    - (! pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev)
+    - (! pip install .[h5-crawler,spss])
+    # actual test
+    - (! caosdb-crawler --help)
+    - (! pytest --cov=caosdb -vv ./unittests)
+
 inttest:
   tags: [docker]
   services:
@@ -287,7 +307,8 @@ code-style:
     - job: build-testenv
       optional: true
   script:
-      - autopep8 -r --diff --exit-code .
+    - autopep8 --version
+    - autopep8 -r --diff --exit-code .
   allow_failure: true
 
 pylint:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 352311d0910bcf0c7f60183b6f58dd7ffdcb0ed4..995ad6eedf391f2219cbd25fbd7fa12e1f32126a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,18 +9,45 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+* Support for Python 3.12 and experimental support for 3.13
+* CFood macros now accept complex objects as values, not just strings.
+* More options for the `CSVTableConverter`
+* New converters:
+  * `DatetimeElementConverter`
+  * `SPSSConverter`
+* New scripts:
+  * `spss_to_datamodel`
+  * `csv_to_datamodel`
+* New transformer functions:
+  * `date_parse`
+  * `datetime_parse`
+
 ### Changed ###
 
+* CFood macros do not render everything into strings now.
+* Better internal handling of identifiable/reference resolving and merging of entities.  This also
+  includes more understandable output for users.
+* Better handling of missing imports, with nice messages for users.
+* No longer use configuration of advancedtools to set to and from email addresses
+
 ### Deprecated ###
 
 ### Removed ###
 
+* Support for Python 3.7
+
 ### Fixed ###
 
+* [93](https://gitlab.com/linkahead/linkahead-crawler/-/issues/93) cfood.yaml does not allow umlaut in $expression
+* [96](https://gitlab.com/linkahead/linkahead-crawler/-/issues/96) Do not fail silently on transaction errors
+
 ### Security ###
 
 ### Documentation ###
 
+* General improvement of the documentaion, in many small places.
+* The API documentation should now also include documentation of the constructors.
+
 ## [0.7.1] - 2024-03-21 ##
 
 ### Fixed ###
@@ -68,6 +95,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * The `identifiable_adapters.IdentifiableAdapter` uses entity ids (negative for
   entities that don't exist remotely) instead of entity objects for keeping
   track of references.
+* Log output is either written to $SHARED_DIR/ (when this variable is set) or just to the terminal.
 
 ### Deprecated ###
 
@@ -161,6 +189,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - ``add_prefix`` and ``remove_prefix`` arguments for the command line interface
   and the ``crawler_main`` function for the adding/removal of path prefixes when
   creating file entities.
+- More strict checking of `identifiables.yaml`.
+- Better error messages when server does not conform to expected data model.
 
 ### Changed ###
 
diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index c906a81d86af56669f7c522169bceb3b5fcb3e01..6fd322e5f6425e9bce25b970d6de7d99892762a5 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -32,7 +32,7 @@ import sys
 from argparse import RawTextHelpFormatter
 from pathlib import Path
 
-import caosdb as db
+import linkahead as db
 import pytest
 import yaml
 from caosadvancedtools.crawler import Crawler as OldCrawler
@@ -42,8 +42,8 @@ from caoscrawler.debug_tree import DebugTree
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.scanner import scan_directory
-from caosdb import EmptyUniqueQueryError
-from caosdb.utils.register_tests import clear_database, set_test_key
+from linkahead import EmptyUniqueQueryError
+from linkahead.utils.register_tests import clear_database, set_test_key
 
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py
index 814e82ad75512ec8fe217294e1a9e86c6aa01ab3..76392f3a4ce20d7ed6b6ccc30c79f1ce400001f7 100644
--- a/integrationtests/test_issues.py
+++ b/integrationtests/test_issues.py
@@ -16,20 +16,18 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
-from pytest import fixture, mark, raises
-
 import linkahead as db
-from linkahead.cached import cache_clear
 from caosadvancedtools.models.parser import parse_model_from_string
-
 from caoscrawler.crawl import Crawler
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.scanner import (create_converter_registry,
+                                 scan_structure_elements)
 from caoscrawler.structure_elements import DictElement
-
-from caoscrawler.scanner import create_converter_registry, scan_structure_elements
-
+from linkahead.cached import cache_clear
 from linkahead.utils.register_tests import clear_database, set_test_key
+from pytest import fixture, mark, raises
+
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
 
@@ -171,8 +169,9 @@ def test_issue_83(clear_database):
         name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
     referencing2 = db.Record(name="Referencing2").add_parent(
         name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
-    referencing3 = db.Record(name="Referencing3").add_parent(name=referencing_type.name).add_property(
-        name=referenced_type.name, value=[ref_target1, ref_target2])
+    referencing3 = db.Record(name="Referencing3").add_parent(
+        name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1,
+                                                                                   ref_target2])
 
     records = db.Container().extend(
         [ref_target1, ref_target2, referencing1, referencing2, referencing3])
diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py
index cf38e951b78534806c0ea76ef58051436aa22704..05b0a543deb03eb524d40d6a386876812e6b54e2 100644
--- a/integrationtests/test_use_case_simple_presentation.py
+++ b/integrationtests/test_use_case_simple_presentation.py
@@ -27,12 +27,12 @@ import os
 import pytest
 from subprocess import run
 
-import caosdb as db
+import linkahead as db
 from caosadvancedtools.loadFiles import loadpath
-from caosdb.cached import cache_clear
+from linkahead.cached import cache_clear
 from caosadvancedtools.models import parser as parser
 from caoscrawler.crawl import crawler_main
-from caosdb.utils.register_tests import clear_database, set_test_key
+from linkahead.utils.register_tests import clear_database, set_test_key
 
 
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
diff --git a/setup.cfg b/setup.cfg
index 88898530f7b7e049e84b230bdcbd45ff5170fabf..848150363c42776993029c54e777f4ff6ccf72ea 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,15 +17,15 @@ classifiers =
 package_dir =
             = src
 packages = find:
-python_requires = >=3.7
+python_requires = >=3.8
 install_requires =
     caosadvancedtools >= 0.7.0
     importlib-resources
-    importlib_metadata;python_version<'3.8'
     linkahead > 0.13.2
     odfpy #make optional
     packaging
     pandas
+    pyarrow  # Will be required by Pandas >= 3.0.
     pyyaml
     yaml-header-tools >= 0.2.1
 
@@ -40,8 +40,12 @@ per-file-ignores = __init__.py:F401
 [options.entry_points]
 console_scripts =
   caosdb-crawler = caoscrawler.crawl:main
+  spss_to_datamodel = caoscrawler.conv_impl.spss:spss_to_datamodel_main
+  csv_to_datamodel = caoscrawler.scripts.generators:csv_to_datamodel_main
 
 [options.extras_require]
 h5-crawler =
            h5py >= 3.8
            numpy
+spss =
+     pandas[spss]
diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py
index 05bad0b54d9098c0b7f165d8295a0faa2966fa32..41b96323b1106d8ce28caadc4a2da012f3dc22ea 100644
--- a/src/caoscrawler/__init__.py
+++ b/src/caoscrawler/__init__.py
@@ -1,4 +1,15 @@
+from . import converters, utils
+try:
+    from .conv_impl.spss import SPSSConverter
+except ImportError as err:
+    SPSSConverter: type = utils.MissingImport(
+        name="SPSSConverter", hint="Try installing with the `spss` extra option.",
+        err=err)
 from .crawl import Crawler, SecurityMode
 from .version import CfoodRequiredVersionError, get_caoscrawler_version
 
 __version__ = get_caoscrawler_version()
+
+# Convenience members #########################################################
+# mypy: disable-error-code="attr-defined"
+converters.SPSSConverter = SPSSConverter
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index 5a6e1e50345382ca6e5a1e6ef3a8fbeafb806b84..340e5b9dec0e8f05b1c39ec2511196249ec87d31 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -28,9 +28,12 @@ cfood:
           - Definitions
           - Dict
           - Date
+          - Datetime
           - JSONFile
+          - YAMLFile
           - CSVTableConverter
           - XLSXTableConverter
+          - SPSSFile
           - H5File
           - H5Dataset
           - H5Group
diff --git a/src/caoscrawler/conv_impl/__init__.py b/src/caoscrawler/conv_impl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/caoscrawler/conv_impl/spss.py b/src/caoscrawler/conv_impl/spss.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dfad0ff8be55e2ca3ddf0db3397dbac5fc9f2b0
--- /dev/null
+++ b/src/caoscrawler/conv_impl/spss.py
@@ -0,0 +1,303 @@
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Converter for SAV files (stored by SPSS)."""
+
+from __future__ import annotations  # Can be removed with 3.10.
+
+import argparse
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+import pyreadstat
+import yaml
+
+from .. import converters
+from ..stores import GeneralStore
+from ..structure_elements import (File, StructureElement)
+from typing import Optional, Any
+
+
+READSTAT_TYPES = {
+    "double": "DOUBLE",
+    "string": "TEXT",
+}
+ORIGINAL_TYPES = {
+    "EDATE8": "DATETIME",
+}
+
+
+class SPSSConverter(converters.TableConverter):
+    """Converter for SAV files (stored by SPSS)."""
+
+    def create_children(self, values: GeneralStore, element: StructureElement) -> list:
+        assert isinstance(element, File)
+        # The default dtype backend "numpy_nullable" does not handle dates well.
+        # Note that pandas.ArrowDtype is considered experimental (in Pandas 2.2).
+        df = pd.io.spss.read_spss(element.path, dtype_backend="pyarrow")
+        dtypes = read_column_types(element.path)
+
+        # Fix datetime columns
+        for name, dtype in dtypes.items():
+            if dtype != "DATETIME":
+                continue
+            col = df.loc[:, name]
+            col.fillna(np.nan, inplace=True)
+            col.replace([np.nan], [None], inplace=True)
+
+        return self._children_from_dataframe(df)
+
+
+def read_column_types(savfile: Optional[str] = None, meta: Optional[Any] = None) -> dict[str, str]:
+    """Read SAV file and return the column types.
+
+Optionally, take data from a previours reading.
+
+Parameters
+----------
+savfile : Optional[str]
+    The SAV file to read.
+
+meta : Optional
+    The meta data result from `pyreadstat.read_sav(...)`.
+
+Returns
+-------
+out : dict[str, str]
+    The column names and types.
+    """
+    if not meta:
+        _, meta = pyreadstat.read_sav(savfile, metadataonly=True)
+    elif savfile is not None:
+        raise ValueError("Only one of `savfile` and `meta` must be given.")
+    dtypes: dict[str, str] = {}
+    for name in meta.column_names:
+        datatype = ORIGINAL_TYPES.get(meta.original_variable_types[name],
+                                      READSTAT_TYPES[meta.readstat_variable_types[name]])
+        dtypes[name] = datatype
+    return dtypes
+
+
+def spss_to_yaml(savfile: str, yamlfile: str, cfood: Optional[str] = None) -> None:
+    """Parse the *.sav and create basic datamodel in ``yamlfile``.
+
+Parameters
+----------
+cfood: str
+  If given, also create a cfood skeleton.
+    """
+    _, meta = pyreadstat.read_sav(savfile, metadataonly=True)
+    dtypes = read_column_types(meta=meta)
+
+    cfood_str = """
+---
+metadata:
+  macros:
+  - !defmacro
+    # Simple column value -> property rule
+    name: ColumnValue
+    params:
+      name: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${name}: $$val
+  - !defmacro
+    # column value -> reference property
+    name: ColumnValueReference
+    params:
+      name: null
+      reftype: null  # RecordType of the reference
+      belongsto: BaseElement
+      type: TextElement  # References are always text, right?
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${reftype}:
+            name: $$val
+          ${belongsto}:
+            ${name}: $$${reftype}
+  - !defmacro
+    # Same as "ColumnValue", but also give name of property.
+    name: ColumnValuePropname
+    params:
+      name: null
+      propname: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${propname}: $$val
+---
+directory: # corresponds to the directory given to the crawler
+  type: Directory
+  match: .* # we do not care how it is named here
+  subtree:
+    # This is the file
+    thisfile:
+      type: SPSSFile
+      match: ".*sav"
+      subtree:
+        entry:
+          type: Dict
+          match: .* # Name is irrelevant
+          records:
+            MyParent:
+          subtree: !macro
+"""
+
+    enums: dict[str, list[str]] = {}
+    properties = OrderedDict()
+
+    for name in meta.column_names:
+        prop = {
+            "datatype": dtypes[name],
+        }
+        desc = meta.column_names_to_labels.get(name)
+        if desc and desc != name:
+            prop["description"] = desc
+        # Handle categorial variables
+        if var_label := meta.variable_to_label.get(name):
+            vvl = meta.variable_value_labels[name]
+            # reproducible (and sensible) order
+            label_values = [vvl[key] for key in sorted(vvl.keys())]
+            if label_values not in enums.values():
+                enums[var_label] = label_values
+            else:
+                var_label = [key for key, value in enums.items() if value == label_values][0]
+            prop["datatype"] = var_label
+        properties[name] = prop
+
+    output = f"""# auto-generated data model from file "{savfile}".
+# To insert a datamodel into LinkAhead, run:
+#
+# python3 -m caosadvancedtools.models.parser datamodel.yaml --sync
+
+"""
+
+    # Actual datamodel
+    output += """
+#########
+# Enums #
+#########
+
+"""
+    for name, values in enums.items():
+        output += f"""{name}:
+  description:
+  # possible values: {values}\n"""
+
+    output += ("""
+###############
+# RecordTypes #
+###############
+
+DummyRT:
+  description: Note: Change name and enter description.
+  recommended_properties:
+    """
+               + "    ".join(yaml.dump(dict(properties),  # from OrderedDict to dict
+                                       allow_unicode=True,
+                                       sort_keys=False).splitlines(keepends=True)))
+
+    # Experimental: Enum creation
+    output += """
+###############
+# Enum values #
+###############
+"""
+    for name, values in enums.items():
+        output += f"\n# ### {name} ###\n"
+        for value in values:
+            output += f"""
+{value}:
+  role: Record
+  inherit_from_suggested:
+    - {name}
+"""
+
+    with open(yamlfile, encoding="utf-8", mode="w") as myfile:
+        myfile.write(output)
+
+    if cfood:
+        defs_col_value: list[str] = []
+        defs_col_value_ref: list[str] = []
+        prefix = " " * 14
+        for name, propdef in properties.items():
+            def_str = prefix + f"- name: {name}\n"
+            dtype = None
+            reftype = None
+            defs = defs_col_value
+            # Which type?
+            if propdef["datatype"] == "DOUBLE":
+                dtype = "FloatElement"
+            elif propdef["datatype"] in ("TEXT", "DATETIME"):
+                dtype = None
+            else:
+                reftype = propdef["datatype"]
+                defs = defs_col_value_ref
+
+            # Append according to types:
+            if reftype:
+                def_str += prefix + f"  reftype: {reftype}\n"
+            if dtype:
+                def_str += prefix + f"  type: {dtype}\n"
+
+            # Store result
+            defs.append(def_str)
+            del defs
+
+        cfood_str += (prefix[2:] + "ColumnValue:\n" + "".join(defs_col_value)
+                      + prefix[2:] + "ColumnValueReference:\n" + "".join(defs_col_value_ref)
+                      )
+        with open(cfood, encoding="utf-8", mode="w") as myfile:
+            myfile.write(cfood_str)
+
+
+def _parse_arguments():
+    """Parse the arguments."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('-i', '--input', help="The *.sav file.", required=True)
+    parser.add_argument('-o', '--outfile', help="Yaml filename to save the result", required=True)
+    parser.add_argument('--cfood', help="Yaml filename to create cfood output in", required=False)
+
+    return parser.parse_args()
+
+
+def spss_to_datamodel_main():
+    """The main function of this script."""
+    args = _parse_arguments()
+    spss_to_yaml(savfile=args.input, yamlfile=args.outfile, cfood=args.cfood)
+    print(f"Written datamodel to: {args.outfile}")
+    if args.cfood:
+        print(f"Written cfood to: {args.cfood}")
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 535a14745282016cd55acd4ca3fcf0ceb0ccd7ec..dad11ec902d638b6c9f7d746b2e628a6af9c0c83 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -1,11 +1,11 @@
-#!/usr/bin/env python3
 # encoding: utf-8
 #
-# ** header v3.0
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
 # Copyright (C) 2021 Henrik tom Wörden
-#               2021 Alexander Schlemmer
+# Copyright (C) 2021 Alexander Schlemmer
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -19,9 +19,8 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-# ** end header
-#
+
+"""Converters take structure elements and create Records and new structure elements from them."""
 
 from __future__ import annotations
 
@@ -34,7 +33,7 @@ import warnings
 from abc import ABCMeta, abstractmethod
 from inspect import signature
 from string import Template
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union
 
 import linkahead as db
 import pandas as pd
@@ -53,12 +52,16 @@ from .utils import has_parent
 # by the converters:
 SPECIAL_PROPERTIES = ("description", "name", "id", "path",
                       "file", "checksum", "size")
-SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$")
+ID_PATTERN = r"\D[.\w]*"
+SINGLE_VAR_RE = re.compile(r"^\$(\{)?(?P<varname>" + ID_PATTERN + r")(\})?$")
 logger = logging.getLogger(__name__)
 
 
 class CrawlerTemplate(Template):
-    braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)"
+    # This also adds a dot to the default pattern.
+    # See: https://docs.python.org/3/library/string.html#template-strings
+    # Default flags is re.IGNORECASE
+    braceidpattern = ID_PATTERN
 
 
 def _only_max(children_with_keys):
@@ -134,8 +137,8 @@ def replace_variables(propvalue: Any, values: GeneralStore):
     This function replaces variables in property values (and possibly other locations,
     where the crawler can replace cfood-internal variables).
 
-    If `propvalue` is a single variable name preceeded with a '$' (e.g. '$var' or '${var}'), then
-    the corresponding value stored in `values` is returned.
+    If ``propvalue`` is a single variable name preceeded by a ``$`` (e.g. ``$var`` or ``${var}``),
+    then the corresponding value stored in ``values`` is returned.
     In any other case the variable substitution is carried out as defined by string templates
     and a new string with the replaced variables is returned.
     """
@@ -160,16 +163,16 @@ def handle_value(value: Union[dict, str, list], values: GeneralStore):
     add as an additional property (multiproperty).
 
     Variable names (starting with a "$") are replaced by the corresponding value stored in the
-    `values` GeneralStore.
+    ``values`` GeneralStore.
 
 Parameters
 ----------
 
-value:
-  - if str, the value to be interpreted. E.g. "4", "hallo" or "$a" etc.
-  - if dict, must have keys "value" and "collection_mode". The returned tuple is directly
+value: Union[dict, str, list]
+  - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" etc.
+  - If *dict*, must have keys ``value`` and ``collection_mode``. The returned tuple is directly
     created from the corresponding values.
-  - if list, each element is checked for replacement and the resulting list will be used
+  - If *list*, each element is checked for replacement and the resulting list will be used
     as (list) value for the property
 
 Returns
@@ -181,15 +184,15 @@ out: tuple
     """
     # @review Florian Spreckelsen 2022-05-13
 
-    if type(value) == dict:
+    if isinstance(value, dict):
         if "value" not in value:
             # TODO: how do we handle this case? Just ignore?
             #       or disallow?
-            raise NotImplementedError()
+            raise NotImplementedError(f"This definition has no \"value\": {value}")
         propvalue = value["value"]
         # can be "single", "list" or "multiproperty"
         collection_mode = value["collection_mode"]
-    elif type(value) == str:
+    elif isinstance(value, str):
         propvalue = value
         collection_mode = "single"
         if propvalue.startswith("+"):
@@ -198,7 +201,7 @@ out: tuple
         elif propvalue.startswith("*"):
             collection_mode = "multiproperty"
             propvalue = propvalue[1:]
-    elif type(value) == list:
+    elif isinstance(value, list):
         # TODO: (for review)
         #       This is a bit dirty right now and needed for
         #       being able to directly set list values. Semantics is, however, a bit
@@ -209,7 +212,7 @@ out: tuple
         propvalue = list()
         for element in value:
             # Do the element-wise replacement only, when its type is string:
-            if type(element) == str:
+            if isinstance(element, str):
                 propvalue.append(replace_variables(element, values))
             else:
                 propvalue.append(element)
@@ -286,9 +289,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict
                     propvalue = os.path.normpath(propvalue)
                 setattr(c_record, key.lower(), propvalue)
             else:
-
                 if c_record.get_property(key) is None:
-
                     if collection_mode == "list":
                         c_record.add_property(name=key, value=[propvalue])
                     elif (collection_mode == "multiproperty" or
@@ -322,10 +323,13 @@ class Converter(object, metaclass=ABCMeta):
 
         Parameters
         ----------
-        definition: dict, Please refer to ``src/doc/converters.rst`` to learn about the structure
-                    that the definition dict must have.
-        converter_registry: dict, A dictionary that contains converter names as keys and dicts as
-                            values. Those value dicts have the keys 'converter' and 'package'.
+        definition: dict
+          Please refer to ``src/doc/converters.rst`` to learn about the structure that the
+          definition dict must have.
+        converter_registry: dict
+          A dictionary that contains converter names as keys and dicts as values. Those value dicts
+          have the keys 'converter', 'package' and 'class'.  'converter' is the class name,
+          'package' the module and 'class' the class instance of converters.
         """
 
         self.definition = definition
@@ -363,7 +367,7 @@ class Converter(object, metaclass=ABCMeta):
 
     @staticmethod
     def converter_factory(definition: dict, name: str, converter_registry: dict):
-        """creates a Converter instance of the appropriate class.
+        """Create a Converter instance of the appropriate class.
 
         The `type` key in the `definition` defines the Converter class which is being used.
         """
@@ -424,10 +428,11 @@ class Converter(object, metaclass=ABCMeta):
                 pass
         """
 
-        if not "transform" in self.definition:
+        if "transform" not in self.definition:
             return
         for transformer_key, transformer in self.definition["transform"].items():
             in_value = replace_variables(transformer["in"], values)
+            out_value = in_value
 
             for tr_func_el in transformer["functions"]:
                 if not isinstance(tr_func_el, dict):
@@ -460,13 +465,13 @@ class Converter(object, metaclass=ABCMeta):
             values[match.group('varname')] = out_value
 
     @abstractmethod
-    def create_children(self, values: GeneralStore,
-                        element: StructureElement):
+    def create_children(self, values: GeneralStore, element: StructureElement):
         pass
 
     def create_records(self, values: GeneralStore, records: RecordStore,
                        element: StructureElement):
         # TODO why is element passed but not used???
+        # ANSWER: because it might be used by overriding child classes.
 
         if "records" not in self.definition:
             return []
@@ -477,7 +482,7 @@ class Converter(object, metaclass=ABCMeta):
                               self.definition["records"])
 
     def filter_children(self, children_with_strings:
-                        List[Tuple[StructureElement, str]], expr: str,
+                        list[tuple[StructureElement, str]], expr: str,
                         group: str, rule: str):
         """Filter children according to regexp `expr` and `rule`."""
 
@@ -515,8 +520,8 @@ class Converter(object, metaclass=ABCMeta):
                                  result: Optional[dict]):
         """ Template for the debugging output for the match function """
         msg = "\n--------" + name + "-----------\n"
-        for re, ma in zip(regexp, matched):
-            msg += "matching reg:\t" + re + "\n"
+        for exp, ma in zip(regexp, matched):
+            msg += "matching reg:\t" + exp + "\n"
             msg += "matching val:\t" + ma + "\n"
             msg += "---------\n"
         if result is None:
@@ -620,7 +625,7 @@ class DirectoryConverter(Converter):
 
         element: A directory (of type Directory) which will be traversed.
         """
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []
 
         for name in sorted(os.listdir(element.path)):
             path = os.path.join(element.path, name)
@@ -660,7 +665,7 @@ class SimpleFileConverter(Converter):
 class FileConverter(SimpleFileConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use SimpleFileConverter."))
+            "This class is deprecated. Please use SimpleFileConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -693,12 +698,12 @@ class MarkdownFileConverter(SimpleFileConverter):
                 "Error during the validation (yaml header cannot be read) of the markdown file "
                 "located at the following node in the data structure:\n"
                 "{}\nError:\n{}".format(path, err))
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []
 
         for name, entry in header.items():
-            if type(entry) == list:
+            if isinstance(entry, list):
                 children.append(ListElement(name, entry))
-            elif type(entry) == str:
+            elif isinstance(entry, str):
                 children.append(TextElement(name, entry))
             else:
                 if generalStore is not None and self.name in generalStore:
@@ -713,7 +718,9 @@ class MarkdownFileConverter(SimpleFileConverter):
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                           msg_prefix=""):
     """Convert basic Python objects to the corresponding StructureElements"""
-    if isinstance(element, list):
+    if isinstance(element, StructureElement):
+        return element
+    elif isinstance(element, list):
         return ListElement(name, element)
     elif isinstance(element, dict):
         return DictElement(name, element)
@@ -963,14 +970,14 @@ class PropertiesFromDictConverter(DictElementConverter):
 class DictConverter(DictElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictElementConverter."))
+            "This class is deprecated. Please use DictElementConverter."))
         super().__init__(*args, **kwargs)
 
 
 class DictDictElementConverter(DictElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictElementConverter."))
+            "This class is deprecated. Please use DictElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1035,7 +1042,7 @@ out:
     """
     if "match_name" in definition:
         if "match" in definition:
-            raise RuntimeError(f"Do not supply both, 'match_name' and 'match'.")
+            raise RuntimeError("Do not supply both, 'match_name' and 'match'.")
 
         m1 = re.match(definition["match_name"], name)
         if m1 is None:
@@ -1158,7 +1165,7 @@ class BooleanElementConverter(_AbstractScalarValueElementConverter):
 class DictBooleanElementConverter(BooleanElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use BooleanElementConverter."))
+            "This class is deprecated. Please use BooleanElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1174,7 +1181,7 @@ class FloatElementConverter(_AbstractScalarValueElementConverter):
 class DictFloatElementConverter(FloatElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use FloatElementConverter."))
+            "This class is deprecated. Please use FloatElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1189,7 +1196,7 @@ class TextElementConverter(_AbstractScalarValueElementConverter):
     def __init__(self, definition, *args, **kwargs):
         if "match" in definition:
             raise ValueError("""
-The 'match' key will in future be used to match a potential name of a TextElement. Please use
+The 'match' key is used to match a potential name of a TextElement. Please use
 the 'match_value' key to match the value of the TextElement and 'match_name' for matching the name.
 """)
 
@@ -1199,7 +1206,7 @@ the 'match_value' key to match the value of the TextElement and 'match_name' for
 class DictTextElementConverter(TextElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use TextElementConverter."))
+            "This class is deprecated. Please use TextElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1215,7 +1222,7 @@ class IntegerElementConverter(_AbstractScalarValueElementConverter):
 class DictIntegerElementConverter(IntegerElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use IntegerElementConverter."))
+            "This class is deprecated. Please use IntegerElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1225,7 +1232,7 @@ class ListElementConverter(Converter):
         # TODO: See comment on types and inheritance
         if not isinstance(element, ListElement):
             raise RuntimeError(
-                "This converter can only process DictListElements.")
+                "This converter can only process ListElements.")
         children: list[StructureElement] = []
         for index, list_element in enumerate(element.value):
             children.append(
@@ -1257,7 +1264,7 @@ class ListElementConverter(Converter):
 class DictListElementConverter(ListElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use ListElementConverter."))
+            "This class is deprecated. Please use ListElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1271,15 +1278,22 @@ class TableConverter(Converter):
     The rows can be matched using a DictElementConverter.
 
     """
-    @abstractmethod
-    def get_options(self):
-        """
-        This method needs to be overwritten by the specific table converter to provide
-        information about the possible options.
+
+    def get_options(self) -> dict:
+        """Get specific options, e.g. from ``self.definitions``.
+
+This method may to be overwritten by the specific table converter to provide information about the
+possible options.  Implementors may use ``TableConverter._get_options(...)`` to get (and convert)
+options from ``self.definitions``.
+
+Returns
+-------
+out: dict
+  An options dict.
         """
-        pass
+        return {}
 
-    def _get_options(self, possible_options):
+    def _get_options(self, possible_options: list[tuple[str, Callable]]) -> dict:
         option_dict = dict()
         for opt_name, opt_conversion in possible_options:
             if opt_name in self.definition:
@@ -1307,6 +1321,14 @@ class TableConverter(Converter):
             return None
         return m.groupdict()
 
+    @staticmethod
+    def _children_from_dataframe(dataframe: pd.DataFrame):
+        child_elements = list()
+        for index, row in dataframe.iterrows():
+            child_elements.append(
+                DictElement(str(index), row.to_dict()))
+        return child_elements
+
 
 class XLSXTableConverter(TableConverter):
     """
@@ -1336,11 +1358,7 @@ class XLSXTableConverter(TableConverter):
         if not isinstance(element, File):
             raise RuntimeError("Element must be a File.")
         table = pd.read_excel(element.path, **self.get_options())
-        child_elements = list()
-        for index, row in table.iterrows():
-            child_elements.append(
-                DictElement(str(index), row.to_dict()))
-        return child_elements
+        return self._children_from_dataframe(table)
 
 
 class CSVTableConverter(TableConverter):
@@ -1365,22 +1383,19 @@ class CSVTableConverter(TableConverter):
         if not isinstance(element, File):
             raise RuntimeError("Element must be a File.")
         table = pd.read_csv(element.path, **self.get_options())
-        child_elements = list()
-        for index, row in table.iterrows():
-            child_elements.append(
-                DictElement(str(index), row.to_dict()))
-        return child_elements
+        return self._children_from_dataframe(table)
 
 
 class DateElementConverter(TextElementConverter):
     """allows to convert different text formats of dates to Python date objects.
 
     The text to be parsed must be contained in the "date" group. The format string can be supplied
-    under "dateformat" in the Converter definition. The library used is datetime so see its
+    under "date_format" in the Converter definition. The library used is datetime so see its
     documentation for information on how to create the format string.
 
     """
 
+    # TODO make `date` parameter name configurable
     def match(self, element: StructureElement):
         matches = super().match(element)
         if matches is not None and "date" in matches:
@@ -1389,3 +1404,24 @@ class DateElementConverter(TextElementConverter):
                 self.definition["date_format"] if "date_format" in self.definition else "%Y-%m-%d"
             ).date()})
         return matches
+
+
+class DatetimeElementConverter(TextElementConverter):
+    """Convert text so that it is formatted in a way that LinkAhead can understand it.
+
+The text to be parsed must be in the ``val`` parameter. The format string can be supplied in the
+``datetime_format`` node. This class uses the ``datetime`` module, so ``datetime_format`` must
+follow this specificaton:
+https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+
+    """
+
+    # TODO make `val` parameter name configurable
+    def match(self, element: StructureElement):
+        matches = super().match(element)
+        if matches is not None and "val" in matches:
+            fmt_default = "%Y-%m-%dT%H:%M:%S"
+            fmt = self.definition.get("datetime_format", fmt_default)
+            dt_str = datetime.datetime.strptime(matches["val"], fmt).strftime(fmt_default)
+            matches.update({"val": dt_str})
+        return matches
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index d21e6e2521578dc407e445d8220506677be84e26..0f23acfdfde2a863a66f25901a85748b538f5d04 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -51,26 +51,27 @@ from caosadvancedtools.cache import UpdateCache
 from caosadvancedtools.crawler import Crawler as OldCrawler
 from caosadvancedtools.serverside.helper import send_mail
 from caosadvancedtools.utils import create_entity_link
-from linkahead.apiutils import (EntityMergeConflictError, compare_entities,
+from linkahead.apiutils import (compare_entities,
                                 merge_entities)
 from linkahead.cached import cache_clear, cached_get_entity_by
 from linkahead.common.datatype import get_list_datatype, is_reference
-from linkahead.exceptions import EmptyUniqueQueryError
+from linkahead.exceptions import (
+    TransactionError,
+)
 from linkahead.utils.escape import escape_squoted_text
 
 from .config import get_config_setting
 from .converters import Converter, ConverterValidationError
 from .debug_tree import DebugTree
-from .identifiable import Identifiable
 from .identifiable_adapters import (CaosDBIdentifiableAdapter,
-                                    IdentifiableAdapter,
-                                    LocalStorageIdentifiableAdapter)
+                                    IdentifiableAdapter)
 from .logging import configure_server_side_logging
 from .macros import defmacro_constructor, macro_constructor
 from .scanner import (create_converter_registry, initialize_converters,
                       load_definition, scan_directory, scan_structure_elements)
 from .stores import GeneralStore
 from .structure_elements import StructureElement
+from .sync_graph import SyncGraph
 
 logger = logging.getLogger(__name__)
 
@@ -172,163 +173,12 @@ def _resolve_datatype(prop: db.Property, remote_entity: db.Entity):
     return prop
 
 
-def _treat_merge_error_of(newrecord, record):
-    """
-    The parameters are two entities that cannot be merged with the merge_entities function.
-
-    # This function checks for two obvious cases where no merge will ever be possible:
-    # 1. Two Entities with differing IDs
-    # 2. Two non-Entity values which differ
-
-    It creates a more informative logger message and raises an Exception in those cases.
-    """
-    for this_p in newrecord.properties:
-        that_p = record.get_property(this_p.name)
-
-        if that_p is None:
-            logger.debug(f"Property {this_p.name} does not exist in the second entity. Note that "
-                         "this should not be the reason for the merge conflict.")
-            continue
-
-        if (isinstance(this_p.value, db.Entity)
-                and isinstance(that_p.value, db.Entity)):
-            if this_p.value.id is not None and that_p.value.id is not None:
-                if this_p.value.id != that_p.value.id:
-                    logger.error("The Crawler is trying to merge two entities "
-                                 "because they should be the same object (same"
-                                 " identifiables), but they reference "
-                                 "different Entities with the same Property."
-                                 f"Problematic Property: {this_p.name}\n"
-                                 f"Referenced Entities: {this_p.value.id} and "
-                                 f"{that_p.value.id}\n"
-                                 f"{record}\n{newrecord}")
-                    raise RuntimeError("Cannot merge Entities")
-        elif (not isinstance(this_p.value, db.Entity)
-              and not isinstance(that_p.value, db.Entity)):
-            if ((this_p.value != that_p.value)
-                # TODO can we also compare lists?
-                and not isinstance(this_p.value, list)
-                    and not isinstance(that_p.value, list)):
-                logger.error(
-                    "The Crawler is trying to merge two entities because they should be the same "
-                    "object (same identifiables), but they have different values for the same "
-                    "Property.\n"
-                    f"Problematic Property: {this_p.name}\n"
-                    f"Values: {this_p.value} and {that_p.value}\n"
-                    f"{record}\n{newrecord}")
-                raise RuntimeError("Cannot merge Entities")
-
-
 class SecurityMode(Enum):
     RETRIEVE = 0
     INSERT = 1
     UPDATE = 2
 
 
-class TreatedRecordLookUp():
-    """tracks Records and Identifiables for which it was checked whether they exist in the remote
-    server
-
-    For a given Record it can be checked, whether it exists in the remote sever if
-    - it has a (valid) ID
-    - it has a (valid) path (FILEs only)
-    - an identifiable can be created for the Record.
-
-    Records are added by calling the `add` function and they are then added to the internal
-    existing or missing list depending on whether the Record has a valid ID.
-    Additionally, the Record is added to three look up dicts. The keys of those are paths, IDs and
-    the representation of the identifiables.
-
-    The extreme case, that one could imagine, would be that the same Record occurs three times as
-    different Python objects: one that only has an ID, one with only a path and one without ID and
-    path but with identifying properties. During `split_into_inserts_and_updates` all three
-    must be identified with each other (and must be merged). Since we require, that treated
-    entities have a valid ID if they exist in the remote server, all three objects would be
-    identified with each other simply using the IDs.
-
-    In the case that the Record is not yet in the remote server, there cannot be a Python object
-    with an ID. Thus we might have one with a path and one with an identifiable. If that Record
-    does not yet exist, it is necessary that both Python objects have at least either the path or
-    the identifiable in common.
-    """
-
-    def __init__(self):
-        self._id_look_up: dict[int, db.Entity] = {}
-        self._path_look_up: dict[str, db.Entity] = {}
-        self._identifiable_look_up: dict[str, db.Entity] = {}
-        self.remote_missing_counter = -1
-        self._missing: dict[int, db.Entity] = {}
-        self._existing: dict[int, db.Entity] = {}
-
-    def add(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """
-        Add a Record that was treated, such that it is contained in the internal look up dicts
-
-        This Record MUST have an ID if it was found in the remote server.
-        """
-        if record.id is None:
-            if record.path is None and identifiable is None:
-                raise RuntimeError("Record must have ID or path or an identifiable must be given."
-                                   f"Record is\n{record}")
-            record.id = self.remote_missing_counter
-            self.remote_missing_counter -= 1
-            self._add_any(record, self._missing, identifiable)
-        else:
-            self._add_any(record, self._existing, identifiable)
-
-    def get_any(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """
-        Check whether this Record was already added. Identity is based on ID, path or Identifiable
-        represenation
-        """
-        if record.id is not None and record.id in self._id_look_up:
-            return self._id_look_up[record.id]
-        if record.path is not None and record.path in self._path_look_up:
-            return self._path_look_up[record.path]
-        if (identifiable is not None and identifiable.get_representation() in
-                self._identifiable_look_up):
-            return self._identifiable_look_up[identifiable.get_representation()]
-
-    def get_existing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """ Check whether this Record exists on the remote server
-
-        Returns: The stored Record
-        """
-        rec = self.get_any(record, identifiable)
-        if id(rec) in self._existing:
-            return rec
-        else:
-            return None
-
-    def get_missing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """ Check whether this Record is missing on the remote server
-
-        Returns: The stored Record
-        """
-        rec = self.get_any(record, identifiable)
-        if id(rec) in self._missing:
-            return rec
-        else:
-            return None
-
-    def get_missing_list(self):
-        """ Return all Records that are missing in the remote server """
-        return list(self._missing.values())
-
-    def get_existing_list(self):
-        """ Return all Records that exist in the remote server """
-        return list(self._existing.values())
-
-    def _add_any(self, record: db.Entity, lookup, identifiable: Optional[Identifiable] = None):
-        if record.id is not None:
-            self._id_look_up[record.id] = record
-        if record.path is not None:
-            self._path_look_up[record.path] = record
-        if identifiable is not None:
-            self._identifiable_look_up[identifiable.get_representation()] = record
-        lookup[id(record)] = record
-
-
 class Crawler(object):
     """
     Crawler class that encapsulates crawling functions.
@@ -365,14 +215,13 @@ class Crawler(object):
         # The following caches store records, where we checked whether they exist on the remote
         # server. Since, it is important to know whether they exist or not, we store them into two
         # different caches.
-        self.treated_records_lookup = TreatedRecordLookUp()
 
         # TODO does it make sense to have this as member variable?
         self.securityMode = securityMode
         # TODO does it make sense to have this as member variable(run_id)?
         self.generate_run_id()
 
-        self.identifiableAdapter: IdentifiableAdapter = LocalStorageIdentifiableAdapter()
+        self.identifiableAdapter: IdentifiableAdapter = CaosDBIdentifiableAdapter()
         if identifiableAdapter is not None:
             self.identifiableAdapter = identifiableAdapter
 
@@ -449,396 +298,59 @@ class Crawler(object):
         self.crawled_data = data
         return data
 
-    def _has_reference_value_without_id(self, ident: Identifiable) -> bool:
-        """
-        Returns True if there is at least one value in the properties and backrefs attributes of
-        ``ident`` which:
-
-        a) is a reference property AND
-        b) where the value is set to a
-           :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>`
-           (instead of an ID) AND
-        c) where the ID of the value (the
-           :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` object in b))
-           is not set (to an integer)
-
-        Returns
-        -------
-        bool
-            True if there is a value without id (see above)
-
-        Raises
-        ------
-        ValueError
-            If no Identifiable is given.
-        """
-        if ident is None:
-            raise ValueError("Identifiable has to be given as argument")
-        for pvalue in list(ident.properties.values()) + ident.backrefs:
-            if isinstance(pvalue, list):
-                for el in pvalue:
-                    if isinstance(el, db.Entity) and el.id is None:
-                        return True
-            elif isinstance(pvalue, db.Entity) and pvalue.id is None:
-                return True
-        return False
-
-    @staticmethod
-    def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None):
-        """
-        Recursively adds entities and all their properties contained in ent_list to
-        the output list flat.
-
-        TODO: This function will be moved to pylib as it is also needed by the
-              high level API.
-        """
-        # Note: A set would be useful here, but we do not want a random order.
-        if flat is None:
-            flat = list()
-        for el in ent_list:
-            if el not in flat:
-                flat.append(el)
-        for ent in ent_list:
-            for p in ent.properties:
-                # For lists append each element that is of type Entity to flat:
-                if isinstance(p.value, list):
-                    for el in p.value:
-                        if isinstance(el, db.Entity):
-                            if el not in flat:
-                                flat.append(el)
-                                Crawler.create_flat_list([el], flat)
-                elif isinstance(p.value, db.Entity):
-                    if p.value not in flat:
-                        flat.append(p.value)
-                        Crawler.create_flat_list([p.value], flat)
-        return flat
-
-    def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: dict):
-        """
-        returns False if any value in the properties attribute is a db.Entity object that
-        is contained in the `remote_missing_cache`. If ident has such an object in
-        properties, it means that it references another Entity, where we checked
-        whether it exists remotely and it was not found.
-        """
-        if ident is None:
-            raise ValueError("Identifiable has to be given as argument")
-        for pvalue in list(ident.properties.values()) + ident.backrefs:
-            # Entity instead of ID and not cached locally
-            if (isinstance(pvalue, list)):
-                for el in pvalue:
-                    elident = self.identifiableAdapter.get_identifiable(
-                        el, referencing_entities[id(el)])
-                    if (isinstance(el, db.Entity)
-                            and self.treated_records_lookup.get_missing(el, elident) is not None):
-                        return True
-            if (isinstance(pvalue, db.Entity) and self.treated_records_lookup.get_missing(
-                pvalue,
-                    self.identifiableAdapter.get_identifiable(pvalue,
-                                                              referencing_entities[id(pvalue)])
-            ) is not None):
-                # might be checked when reference is resolved
-                return True
-        return False
-
-    def replace_references_with_cached(self, record: db.Record, referencing_entities: dict):
-        """
-        Replace all references with the versions stored in the cache.
-
-        If the cache version is not identical, raise an error.
-        """
-        for p in record.properties:
-            if (isinstance(p.value, list)):
-                lst = []
-                for el in p.value:
-                    if (isinstance(el, db.Entity) and el.id is None):
-                        cached = self.treated_records_lookup.get_any(
-                            el,
-                            self.identifiableAdapter.get_identifiable(
-                                el, referencing_entities[id(el)]))
-                        if cached is None:
-                            lst.append(el)
-                            continue
-                        if not check_identical(cached, el, True):
-                            if isinstance(p.value, db.File):
-                                if p.value.path != cached.path:
-                                    raise RuntimeError(
-                                        "The cached and the referenced entity are not identical.\n"
-                                        f"Cached:\n{cached}\nReferenced:\n{el}"
-                                    )
-                            else:
-                                raise RuntimeError(
-                                    "The cached and the referenced entity are not identical.\n"
-                                    f"Cached:\n{cached}\nReferenced:\n{el}"
-                                )
-                        lst.append(cached)
-                    else:
-                        lst.append(el)
-                p.value = lst
-            if (isinstance(p.value, db.Entity) and p.value.id is None):
-                cached = self.treated_records_lookup.get_any(
-                    p.value, self.identifiableAdapter.get_identifiable(
-                        p.value, referencing_entities[id(p.value)]))
-                if cached is None:
-                    continue
-                if not check_identical(cached, p.value, True):
-                    if isinstance(p.value, db.File):
-                        if p.value.path != cached.path:
-                            raise RuntimeError(
-                                "The cached and the referenced entity are not identical.\n"
-                                f"Cached:\n{cached}\nReferenced:\n{p.value}"
-                            )
-                    else:
-                        raise RuntimeError(
-                            "The cached and the referenced entity are not identical.\n"
-                            f"Cached:\n{cached}\nReferenced:\n{p.value}"
-                        )
-                p.value = cached
-
-    @staticmethod
-    def bend_references_to_new_object(old, new, entities):
-        """ Bend references to the other object
-        Iterate over all entities in `entities` and check the values of all properties of
-        occurances of old Entity and replace them with new Entity
-        """
-        for el in entities:
-            for p in el.properties:
-                if isinstance(p.value, list):
-                    for index, val in enumerate(p.value):
-                        if val is old:
-                            p.value[index] = new
-                else:
-                    if p.value is old:
-                        p.value = new
+    def _split_into_inserts_and_updates(self, st: SyncGraph):
+        """Classify nodes in the SyncGraph ``st`` with respect to their state on the server.
 
-    def _merge_identified(self, newrecord, record, try_to_merge_later, all_records):
-        """ tries to merge record into newrecord
+This method iteratively checks whether those nodes exist on the remote server and creates two lists,
+one with the entities that need to be updated and the other with entities to be inserted.
 
-        If it fails, record is added to the try_to_merge_later list.
-        In any case, references are bent to the newrecord object.
+.. todo::
 
-        """
-        try:
-            merge_entities(
-                newrecord, record, merge_references_with_empty_diffs=False,
-                merge_id_with_resolved_entity=True)
-        except EntityMergeConflictError:
-            _treat_merge_error_of(newrecord, record)
-            # We cannot merge but it is none of the clear case where merge is
-            # impossible. Thus we try later
-            try_to_merge_later.append(record)
-            if newrecord.id is not None:
-                record.id = newrecord.id
-        except NotImplementedError:
-            print(newrecord)
-            print(record)
-            raise
-        Crawler.bend_references_to_new_object(
-            old=record, new=newrecord,
-            entities=all_records
-        )
-
-    def _identity_relies_on_unchecked_entities(self, record: db.Record, referencing_entities):
-        """
-        If a record for which it could not yet be verified whether it exists in LA or not is part
-        of the identifying properties, this returns True, otherwise False
-        """
+        Should this be made into a public method of SyncGraph instead?  At the moment, this is a
+        purely static method that only operates on the state of ``st``.
 
-        registered_identifiable = self.identifiableAdapter.get_registered_identifiable(record)
-        if registered_identifiable is None:
-            return False
-        refs = self.identifiableAdapter.get_identifying_referencing_entities(referencing_entities,
-                                                                             registered_identifiable)
-        if any(el is None for el in refs):
-            return True
-
-        refs = self.identifiableAdapter.get_identifying_referenced_entities(
-            record, registered_identifiable)
-        if any([self.treated_records_lookup.get_any(el) is None for el in refs]):
-            return True
-
-        return False
-
-    @staticmethod
-    def create_reference_mapping(flat: list[db.Entity]):
         """
-        Create a dictionary of dictionaries of the form:
-        dict[int, dict[str, list[Union[int,None]]]]
-
-        - The integer index is the Python id of the value object.
-        - The string is the name of the first parent of the referencing object.
-
-        Each value objects is taken from the values of all properties from the list flat.
-
-        So the returned mapping maps ids of entities to the ids of objects which are referring
-        to them.
-        """
-        # TODO we need to treat children of RecordTypes somehow.
-        references: dict[int, dict[str, list[Union[int, None]]]] = {}
-        for ent in flat:
-            if id(ent) not in references:
-                references[id(ent)] = {}
-            for p in ent.properties:
-                val = p.value
-                if not isinstance(val, list):
-                    val = [val]
-                for v in val:
-                    if isinstance(v, db.Entity):
-                        if id(v) not in references:
-                            references[id(v)] = {}
-                        if ent.parents[0].name not in references[id(v)]:
-                            references[id(v)][ent.parents[0].name] = []
-                        references[id(v)][ent.parents[0].name].append(ent.id)
-
-        return references
-
-    def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
-        flat = Crawler.create_flat_list(ent_list)
-        all_records = list(flat)
-
-        # TODO: can the following be removed at some point
-        for ent in flat:
-            if ent.role == "Record" and len(ent.parents) == 0:
-                raise RuntimeError(f"Records must have a parent.\n{ent}")
-
-        try_to_merge_later = []
-
-        # Check whether Records can be identified without identifiable
-        for i in reversed(range(len(flat))):
-            record = flat[i]
-            # 1. Can it be identified via an ID?
-            if record.id is not None:
-                treated_record = self.treated_records_lookup.get_existing(record)
-                if treated_record is not None:
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-                else:
-                    self.treated_records_lookup.add(record, None)
-                assert record.id
-                del flat[i]
-            # 2. Can it be identified via a path?
-            elif record.path is not None:
-                try:
-                    existing = cached_get_entity_by(path=record.path)
-                except EmptyUniqueQueryError:
-                    existing = None
-                if existing is not None:
-                    record.id = existing.id
-                    # TODO check the following copying of _size and _checksum
-                    # Copy over checksum and size too if it is a file
-                    record._size = existing._size
-                    record._checksum = existing._checksum
-                treated_record = self.treated_records_lookup.get_any(record)
-                if treated_record is not None:
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-                else:
-                    # TODO add identifiable if possible
-                    self.treated_records_lookup.add(record, None)
-                assert record.id
-                del flat[i]
-
         entity_was_treated = True
-        # flat contains Entities which could not yet be checked against the remote server
-        while entity_was_treated and len(flat) > 0:
+        # st.unchecked contains Entities which could not yet be checked against the remote server
+        while entity_was_treated and len(st.unchecked) > 0:
             entity_was_treated = False
-            referencing_entities = self.create_reference_mapping(all_records)
-
-            # For each element we try to find out whether we can find it in the server or whether
-            # it does not yet exist. Since a Record may reference other unkown Records it might not
-            # be possible to answer this right away.
-            # The following checks are done on each Record:
-            # 1. Is it in the cache of already checked Records?
-            # 2. Can it be checked on the remote server?
-            # 3. Does it have to be new since a needed reference is missing?
-            for i in reversed(range(len(flat))):
-                record = flat[i]
-
-                if self._identity_relies_on_unchecked_entities(record,
-                                                               referencing_entities[id(record)]):
+
+            for se in st.unchecked:
+                if se.identifiable is None:  # we cannot yet identify this node
                     continue
 
-                identifiable = self.identifiableAdapter.get_identifiable(
-                    record,
-                    referencing_entities=referencing_entities[id(record)])
-
-                # 1. Is it in the cache of already checked Records?
-                if self.treated_records_lookup.get_any(record, identifiable) is not None:
-                    treated_record = self.treated_records_lookup.get_any(record, identifiable)
-                    # Since the identifiables are the same, treated_record and record actually
-                    # describe the same object.
-                    # We merge record into treated_record in order to prevent loss of information
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-
-                    del flat[i]
-                    entity_was_treated = True
-
-                # 2. Can it be checked on the remote server?
-                elif not self._has_reference_value_without_id(identifiable):
-                    identified_record = (
-                        self.identifiableAdapter.retrieve_identified_record_for_identifiable(
-                            identifiable))
-                    if identified_record is None:
-                        # identifiable does not exist remotely -> record needs to be inserted
-                        self.treated_records_lookup.add(record, identifiable)
-                    else:
-                        # side effect
-                        record.id = identified_record.id
-                        record.path = identified_record.path
-                        self.treated_records_lookup.add(record, identifiable)
-                    assert record.id
-                    del flat[i]
-                    entity_was_treated = True
-
-                # 3. Does it have to be new since a needed reference is missing?
-                # (Is it impossible to check this record because an identifiable references a
-                # missing record?)
-                elif self._has_missing_object_in_references(identifiable, referencing_entities):
-                    self.treated_records_lookup.add(record, identifiable)
-                    assert record.id
-                    del flat[i]
-                    entity_was_treated = True
-
-            for record in flat:
-                self.replace_references_with_cached(record, referencing_entities)
-
-        # We postponed the merge for records where it failed previously and try it again now.
+                # check remote server
+                identified_record = (
+                    st.identifiableAdapter.retrieve_identified_record_for_identifiable(
+                        se.identifiable))
+                remote_id = None
+                if identified_record is not None:
+                    remote_id = identified_record.id
+                # set id of node. if node is missing, remote_id is None and the SyncGraph marks it
+                # as missing
+                st.set_id_of_node(se, remote_id)
+                entity_was_treated = True
+                break  # one or more nodes were just removed from st.unchecked -> back to start
+
         # This only might add properties of the postponed records to the already used ones.
-        for record in try_to_merge_later:
-            identifiable = self.identifiableAdapter.get_identifiable(
-                record,
-                referencing_entities=referencing_entities[id(record)])
-            newrecord = self.treated_records_lookup.get_any(record, identifiable)
-            merge_entities(newrecord, record, merge_id_with_resolved_entity=True)
-        if len(flat) > 0:
-            circle = self.detect_circular_dependency(flat)
-            if circle is None:
-                logger.error("Failed, but found NO circular dependency. The data is as follows:"
-                             + str(self.compact_entity_list_representation(flat,
-                                                                           referencing_entities)))
-            else:
-                logger.error("Found circular dependency (Note that this might include references "
-                             "that are not identifying properties): "
-                             + self.compact_entity_list_representation(circle,
-                                                                       referencing_entities))
+        if len(st.unchecked) > 0:
+            # circle = st.unchecked_contains_circular_dependency()
+            # if circle is None:
+            #    logger.error("Failed, but found NO circular dependency. The data is as follows:"
+            #                 + "\n".join([str(el) for el in st.unchecked])
+
+            #                 )
+            # else:
+            #    logger.error("Found circular dependency (Note that this might include references "
+            #                 "that are not identifying properties): "
+            #                 + "\n".join([str(el) for el in st.unchecked])
+            #                 )
 
             raise RuntimeError(
-                f"Could not finish split_into_inserts_and_updates. Circular dependency: "
-                f"{circle is not None}")
-
-        # remove negative IDs
-        missing = self.treated_records_lookup.get_missing_list()
-        for el in missing:
-            if el.id is None:
-                raise RuntimeError("This should not happen")  # TODO remove
-            if el.id >= 0:
-                raise RuntimeError("This should not happen")  # TODO remove
-            el.id = None
+                "Could not finish _split_into_inserts_and_updates. "
+                "It might be due to a circular dependency")
 
-        return (missing, self.treated_records_lookup.get_existing_list())
+        return st.export_record_lists()
 
     def replace_entities_with_ids(self, rec: db.Record):
         for el in rec.properties:
@@ -851,7 +363,7 @@ class Crawler(object):
                         if val.id is not None:
                             el.value[index] = val.id
 
-    @ staticmethod
+    @staticmethod
     def compact_entity_list_representation(entities, referencing_entities: List) -> str:
         """ a more readable representation than the standard xml representation
 
@@ -883,40 +395,7 @@ class Crawler(object):
 
         return text + "--------\n"
 
-    @ staticmethod
-    def detect_circular_dependency(flat: list[db.Entity]):
-        """
-        Detects whether there are circular references in the given entity list and returns a list
-        where the entities are ordered according to the chain of references (and only the entities
-        contained in the circle are included. Returns None if no circular dependency is found.
-
-        TODO: for the sake of detecting problems for split_into_inserts_and_updates we should only
-        consider references that are identifying properties.
-        """
-        circle = [flat[0]]
-        closed = False
-        while not closed:
-            current = circle[-1]
-            added_to_circle = False
-            for p in current.properties:
-                if isinstance(p.value, list):
-                    for pval in p.value:
-                        if pval in flat:
-                            if pval in circle:
-                                closed = True
-                            circle.append(pval)
-                            added_to_circle = True
-                else:
-                    if p.value in flat:
-                        if p.value in circle:
-                            closed = True
-                        circle.append(p.value)
-                        added_to_circle = True
-            if not added_to_circle:
-                return None
-        return circle
-
-    @ staticmethod
+    @staticmethod
     def _merge_properties_from_remote(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -958,7 +437,7 @@ class Crawler(object):
 
         return to_be_updated
 
-    @ staticmethod
+    @staticmethod
     def remove_unnecessary_updates(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -984,7 +463,7 @@ class Crawler(object):
 
         return actual_updates
 
-    @ staticmethod
+    @staticmethod
     def execute_parent_updates_in_list(to_be_updated, securityMode, run_id, unique_names):
         """
         Execute the updates of changed parents.
@@ -1027,13 +506,13 @@ class Crawler(object):
                             "mode. This might lead to a failure of inserts that follow.")
                 logger.info(parent_updates)
 
-    @ staticmethod
+    @staticmethod
     def _get_property_id_for_datatype(rtname: str, name: str):
         return cached_get_entity_by(
             query=f"FIND Entity '{escape_squoted_text(rtname)}' "
-                  f"with name='{escape_squoted_text(name)}'").id
+            f"with name='{escape_squoted_text(name)}'").id
 
-    @ staticmethod
+    @staticmethod
     def replace_name_with_referenced_entity_id(prop: db.Property):
         """changes the given property in place if it is a reference property that has a name as
         value
@@ -1078,7 +557,7 @@ class Crawler(object):
                     propval.append(el)
             prop.value = propval
 
-    @ staticmethod
+    @staticmethod
     def execute_inserts_in_list(to_be_inserted, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -1098,7 +577,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_inserted, run_id, insert=True)
 
-    @ staticmethod
+    @staticmethod
     def set_ids_and_datatype_of_parents_and_properties(rec_list):
         for record in rec_list:
             for parent in record.parents:
@@ -1110,7 +589,7 @@ class Crawler(object):
                     prop.id = entity.id
                     _resolve_datatype(prop, entity)
 
-    @ staticmethod
+    @staticmethod
     def execute_updates_in_list(to_be_updated, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -1124,7 +603,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_updated, run_id)
 
-    @ staticmethod
+    @staticmethod
     def check_whether_parent_exists(records: list[db.Entity], parents: list[str]):
         """ returns a list of all records in `records` that have a parent that is in `parents`"""
         problems = []
@@ -1180,7 +659,8 @@ class Crawler(object):
                 "use for example the Scanner to create this data."))
             crawled_data = self.crawled_data
 
-        to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
+        to_be_inserted, to_be_updated = self._split_into_inserts_and_updates(
+            SyncGraph(crawled_data, self.identifiableAdapter))
 
         for el in to_be_updated:
             # all entity objects are replaced by their IDs except for the not yet inserted ones
@@ -1211,8 +691,10 @@ class Crawler(object):
         if len(ins_problems) > 0 or len(upd_problems) > 0:
             raise ForbiddenTransaction(
                 "One or more Records that have a parent which is excluded from inserts or updates."
-                f"\nRecords excluded from inserts have the following RecordTypes:\n{[el.parents[0].name for el in ins_problems]}"
-                f"\nRecords excluded from updates have the following RecordTypes:\n{[el.parents[0].name for el in upd_problems]}"
+                f"\nRecords excluded from inserts have the following RecordTypes:\n"
+                f"{[el.parents[0].name for el in ins_problems]}"
+                f"\nRecords excluded from updates have the following RecordTypes:\n"
+                f"{[el.parents[0].name for el in upd_problems]}"
             )
 
         logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
@@ -1221,14 +703,14 @@ class Crawler(object):
             cache_clear()
             self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
                                                 run_id=self.run_id, unique_names=unique_names)
-            logger.info(f"Added parent RecordTypes where necessary.")
+            logger.info("Added parent RecordTypes where necessary.")
             self.execute_inserts_in_list(
                 to_be_inserted, self.securityMode, self.run_id, unique_names=unique_names)
-            logger.info(f"Executed inserts:\n"
+            logger.info("Executed inserts:\n"
                         + self.create_entity_summary(to_be_inserted))
             self.execute_updates_in_list(
                 to_be_updated, self.securityMode, self.run_id, unique_names=unique_names)
-            logger.info(f"Executed updates:\n"
+            logger.info("Executed updates:\n"
                         + self.create_entity_summary(to_be_updated))
 
         update_cache = UpdateCache()
@@ -1244,7 +726,7 @@ class Crawler(object):
 
         return (to_be_inserted, to_be_updated)
 
-    @ staticmethod
+    @staticmethod
     def create_entity_summary(entities: list[db.Entity]):
         """ Creates a summary string reprensentation of a list of entities."""
         parents = {}
@@ -1263,13 +745,35 @@ class Crawler(object):
             output = output[:-2] + "\n"
         return output
 
-    @ staticmethod
+    @staticmethod
     def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
         if get_config_setting("send_crawler_notifications"):
-            filename = OldCrawler.save_form(
-                [el[3] for el in pending_changes], path, run_id)
-            OldCrawler.send_mail([el[3] for el in pending_changes], filename)
+            filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id)
+            text = """Dear Curator,
+    there where changes that need your authorization. Please check the following
+    carefully and if the changes are ok, click on the following link:
+
+    {url}/Shared/{filename}
+
+    {changes}
+            """.format(url=db.configuration.get_config()["Connection"]["url"],
+                       filename=filename,
+                       changes="\n".join([el[3] for el in pending_changes]))
+            try:
+                fro = get_config_setting("sendmail_from_address")
+                to = get_config_setting("sendmail_to_address")
+            except KeyError:
+                logger.error("Server Configuration is missing a setting for "
+                             "sending mails. The administrator should check "
+                             "'from_mail' and 'to_mail'.")
+                return
+
+            send_mail(
+                from_addr=fro,
+                to=to,
+                subject="Crawler Update",
+                body=text)
 
         for i, el in enumerate(pending_changes):
 
@@ -1284,7 +788,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     + " by invoking the crawler"
                     " with the run id: {rid}\n".format(rid=run_id))
 
-    @ staticmethod
+    @staticmethod
     def debug_build_usage_tree(converter: Converter):
         res: dict[str, dict[str, Any]] = {
             converter.name: {
@@ -1302,7 +806,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
             res[converter.name]["subtree"][k[0]] = d[k[0]]
         return res
 
-    def save_debug_data(self, filename: str, debug_tree: DebugTree = None):
+    def save_debug_data(self, filename: str, debug_tree: Optional[DebugTree] = None):
         """
         Save the information contained in a debug_tree to a file named filename.
         """
@@ -1361,13 +865,13 @@ def _update_status_record(run_id, n_inserts, n_updates, status):
         cr_rec.get_property('status').value = status
         (cr_rec
             .add_property(db.execute_query(
-                f"FIND Property with name='number_of_inserted_entities'", unique=True).id,
+                "FIND Property with name='number_of_inserted_entities'", unique=True).id,
                 n_inserts)
             .add_property(
-                db.execute_query(f"FIND Property with name='number_of_updated_entities'",
+                db.execute_query("FIND Property with name='number_of_updated_entities'",
                                  unique=True).id, n_updates)
             .add_property(
-                db.execute_query(f"FIND Property with name='finished'",
+                db.execute_query("FIND Property with name='finished'",
                                  unique=True).id, datetime.now().isoformat()))
         cr_rec.update()
 
@@ -1380,6 +884,7 @@ def _notify_about_inserts_and_updates(n_inserts, n_updates, logfile, run_id):
     The email contains some basic information and a link to the log and the CrawlerRun Record.
     """
     if not get_config_setting("send_crawler_notifications"):
+        logger.debug("Crawler email notifications are disabled.")
         return
     if n_inserts == 0 and n_updates == 0:
         return
@@ -1390,8 +895,8 @@ the CaosDB Crawler successfully crawled the data and
 
 """
 
+    domain = get_config_setting("public_host_url")
     if get_config_setting("create_crawler_status_records"):
-        domain = get_config_setting("public_host_url")
         text += ("You can checkout the CrawlerRun Record for more information:\n"
                  f"{domain}/Entity/?P=0L10&query=find%20crawlerrun%20with%20run_id=%27{run_id}%27\n\n")
     text += (f"You can download the logfile here:\n{domain}/Shared/" + logfile)
@@ -1550,11 +1055,19 @@ def crawler_main(crawled_directory_path: str,
     try:
         crawler = Crawler(securityMode=securityMode)
 
-        # setup logging and reporting if serverside execution
-        if "SHARED_DIR" in os.environ:
+        if "SHARED_DIR" in os.environ:  # setup logging and reporting if serverside execution
             userlog_public, htmluserlog_public, debuglog_public = configure_server_side_logging()
+            # TODO make this optional
             _create_status_record(
-                get_config_setting("public_host_url") + "/Shared/" + htmluserlog_public, crawler.run_id)
+                get_config_setting("public_host_url") + "/Shared/" + htmluserlog_public,
+                crawler.run_id)
+        else:  # setup stdout logging for other cases
+            root_logger = logging.getLogger()
+            root_logger.setLevel(level=(logging.DEBUG if debug else logging.INFO))
+            handler = logging.StreamHandler(stream=sys.stdout)
+            handler.setLevel(logging.DEBUG if debug else logging.INFO)
+            root_logger.addHandler(handler)
+            logger.handlers.clear()
 
         debug_tree = DebugTree()
         crawled_data = scan_directory(
@@ -1569,6 +1082,10 @@ def crawler_main(crawled_directory_path: str,
             ident = CaosDBIdentifiableAdapter()
             ident.load_from_yaml_definition(identifiables_definition_file)
             crawler.identifiableAdapter = ident
+        else:
+            # TODO
+            # raise ValueError("An identifiable file is needed.")
+            pass
 
         remove_prefix = _treat_deprecated_prefix(prefix, remove_prefix)
 
@@ -1594,15 +1111,24 @@ def crawler_main(crawled_directory_path: str,
         logger.error(err)
         _update_status_record(crawler.run_id, 0, 0, status="FAILED")
         return 1
+    except TransactionError as err:
+        logger.debug(traceback.format_exc())
+        logger.error(err)
+        logger.error("Transaction error details:")
+        for suberr in err.errors:
+            logger.error("---")
+            logger.error(suberr.msg)
+            logger.error(suberr.entity)
+        return 1
     except Exception as err:
         logger.debug(traceback.format_exc())
-        logger.debug(err)
+        logger.error(err)
 
         if "SHARED_DIR" in os.environ:
             # pylint: disable=E0601
             domain = get_config_setting("public_host_url")
-            logger.error("Unexpected Error: Please tell your administrator about this and provide the"
-                         f" following path.\n{domain}/Shared/" + debuglog_public)
+            logger.error("Unexpected Error: Please tell your administrator about this and provide "
+                         f"the following path.\n{domain}/Shared/" + debuglog_public)
         _update_status_record(crawler.run_id, 0, 0, status="FAILED")
         return 1
 
diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml
index ac8561e6e170007c1e1501a00ed61b152296b9a5..82e2f635f621b2e21e43b728fd9ed6865454f828 100644
--- a/src/caoscrawler/default_converters.yml
+++ b/src/caoscrawler/default_converters.yml
@@ -8,6 +8,9 @@ BooleanElement:
 Date:
   converter: DateElementConverter
   package: caoscrawler.converters
+Datetime:
+  converter: DatetimeElementConverter
+  package: caoscrawler.converters
 Dict:
   converter: DictElementConverter
   package: caoscrawler.converters
@@ -27,7 +30,7 @@ TextElement:
   converter: TextElementConverter
   package: caoscrawler.converters
 
-  
+
 DictDictElement:  # deprecated
   converter: DictElementConverter
   package: caoscrawler.converters
@@ -63,7 +66,7 @@ File:  # deprecated
   converter: SimpleFileConverter
   package: caoscrawler.converters
 
-  
+
 SimpleFile:
   converter: SimpleFileConverter
   package: caoscrawler.converters
@@ -84,6 +87,10 @@ CSVTableConverter:
   converter: CSVTableConverter
   package: caoscrawler.converters
 
+SPSSFile:
+  converter: SPSSConverter
+  package: caoscrawler.converters
+
 XLSXTableConverter:
   converter: XLSXTableConverter
   package: caoscrawler.converters
diff --git a/src/caoscrawler/default_transformers.yml b/src/caoscrawler/default_transformers.yml
index d0ad23912176bdfbf2446aa6e04bd7fa6b858777..ffcb1b15bd2bad71083cc8f0ba84172ee3daf2b0 100644
--- a/src/caoscrawler/default_transformers.yml
+++ b/src/caoscrawler/default_transformers.yml
@@ -1,4 +1,4 @@
-
+# Lookup table for matching functions and cfood yaml node names.
 
 submatch:
   package: caoscrawler.transformer_functions
@@ -9,3 +9,9 @@ split:
 replace:
   package: caoscrawler.transformer_functions
   function: replace
+date_parse:
+  package: caoscrawler.transformer_functions
+  function: date_parse
+datetime_parse:
+  package: caoscrawler.transformer_functions
+  function: datetime_parse
diff --git a/src/caoscrawler/exceptions.py b/src/caoscrawler/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7c61c34e2abbebef4790bde42f50d4b5b29f957
--- /dev/null
+++ b/src/caoscrawler/exceptions.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+class ForbiddenTransaction(Exception):
+    """Thrown if an transactions is needed that is not allowed.
+    For example an update of an entity if the security level is INSERT
+    """
+    pass
+
+
+class ImpossibleMergeError(Exception):
+    """Thrown if due to identifying information, two SyncNodes  or two Properties of SyncNodes
+    should be merged, but there is conflicting information that prevents this.
+    """
+
+    def __init__(self, *args, pname, values, **kwargs):
+        self.pname = pname
+        self.values = values
+        super().__init__(self, *args, **kwargs)
+
+
+class InvalidIdentifiableYAML(Exception):
+    """Thrown if the identifiable definition is invalid."""
+    pass
+
+
+class MissingIdentifyingProperty(Exception):
+    """Thrown if a SyncNode does not have the properties required by the corresponding registered
+    identifiable
+    """
+    pass
+
+
+class MissingRecordType(Exception):
+    """Thrown if an record type can not be found although it is expected that it exists on the
+    server.
+    """
+    pass
+
+
+class MissingReferencingEntityError(Exception):
+    """Thrown if the identifiable requires that some entity references the given entity but there
+    is no such reference """
+
+    def __init__(self, *args, rts=None, **kwargs):
+        self.rts = rts
+        super().__init__(self, *args, **kwargs)
diff --git a/src/caoscrawler/hdf5_converter.py b/src/caoscrawler/hdf5_converter.py
index 5b1ff5775fb74919c989507c449636fd822db7f0..482d59c12d2d0b8540c01bd04da718d9c514ddc4 100644
--- a/src/caoscrawler/hdf5_converter.py
+++ b/src/caoscrawler/hdf5_converter.py
@@ -18,6 +18,8 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 
+from typing import Optional
+
 try:
     import h5py
 except ModuleNotFoundError:
@@ -94,8 +96,8 @@ def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
     raise ValueError("The given element must be either a HDF5 Group or Dataset object.")
 
 
-def convert_basic_element_with_nd_array(value, name: str = None,
-                                        internal_path: str = None, msg_prefix: str = ""):
+def convert_basic_element_with_nd_array(value, name: Optional[str] = None,
+                                        internal_path: Optional[str] = None, msg_prefix: str = ""):
     """Convert a given object either to an ndarray structure element or to a
     basic scalar structure element.
 
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index cefdf4a0f42b1f610e0712fdefebc2dc3b78d69f..f6c85c694e5ef0be7e6a9be8154a34c400bab008 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # encoding: utf-8
 #
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2022 Henrik tom Wörden
 #
@@ -20,23 +20,27 @@
 #
 
 from __future__ import annotations
-import linkahead as db
-from datetime import datetime
+
 import json
-from hashlib import sha256
-from typing import Union
 import logging
+from datetime import datetime
+from hashlib import sha256
+from typing import Optional, Union
+
+import linkahead as db
+
+from .exceptions import MissingIdentifyingProperty
+from .sync_node import SyncNode
 
 logger = logging.getLogger(__name__)
 
 
 class Identifiable():
     """
-    The fingerprint of a Record in CaosDB.
+    The fingerprint of a Record in LinkAhead.
 
-    This class contains the information that is used by the CaosDB Crawler to identify Records.
-    On one hand, this can be the ID or a Record or the path of a File.
-    On the other hand, in order to check whether a Record exits in the CaosDB Server, a query can
+    This class contains the information that is used by the LinkAhead Crawler to identify Records.
+    In order to check whether a Record exits in the LinkAhead Server, a query can
     be created using the information contained in the Identifiable.
 
     Parameters
@@ -46,23 +50,22 @@ class Identifiable():
     properties: dict, keys are names of Properties; values are Property values
                 Note, that lists are not checked for equality but are interpreted as multiple
                 conditions for a single Property.
-    path: str, In case of files: The path where the file is stored.
     backrefs: list, TODO future
     """
 
-    def __init__(self, record_id: int = None, path: str = None, record_type: str = None,
-                 name: str = None, properties: dict = None,
-                 backrefs: list[Union[int, str]] = None):
-        if (record_id is None and path is None and name is None
+    def __init__(self, record_id: Optional[int] = None, record_type: Optional[str] = None,
+                 name: Optional[str] = None, properties: Optional[dict] = None,
+                 backrefs: Optional[list[Union[int, str]]] = None):
+        if (record_id is None and name is None
                 and (backrefs is None or len(backrefs) == 0)
                 and (properties is None or len(properties) == 0)):
-            raise ValueError("There is no identifying information. You need to add a path or "
-                             "properties or other identifying attributes.")
+            raise ValueError(
+                "There is no identifying information. You need to add "
+                "properties or other identifying attributes.")
         if properties is not None and 'name' in [k.lower() for k in properties.keys()]:
             raise ValueError("Please use the separete 'name' keyword instead of the properties "
                              "dict for name")
         self.record_id = record_id
-        self.path = path
         self.record_type = record_type
         self.name = name
         if name == "":
@@ -77,24 +80,21 @@ class Identifiable():
     def get_representation(self) -> str:
         return sha256(Identifiable._create_hashable_string(self).encode('utf-8')).hexdigest()
 
-    @staticmethod
+    @ staticmethod
     def _value_representation(value) -> str:
         """returns the string representation of property values to be used in the hash function
 
-        The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities
-        (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int
-        and str.
+        The string is the LinkAhead ID in case of SyncNode objects (SyncNode objects must have an ID)
+        and the string representation of None, bool, float, int, datetime and str.
         """
 
         if value is None:
             return "None"
-        elif isinstance(value, db.File):
-            return str(value.path)
-        elif isinstance(value, db.Entity):
+        elif isinstance(value, SyncNode):
             if value.id is not None:
                 return str(value.id)
             else:
-                return "PyID=" + str(id(value))
+                raise RuntimeError("Python Entity (SyncNode) without ID not allowed")
         elif isinstance(value, list):
             return "[" + ", ".join([Identifiable._value_representation(el) for el in value]) + "]"
         elif (isinstance(value, str) or isinstance(value, int) or isinstance(value, float)
@@ -103,7 +103,7 @@ class Identifiable():
         else:
             raise ValueError(f"Unknown datatype of the value: {value}")
 
-    @staticmethod
+    @ staticmethod
     def _create_hashable_string(identifiable: Identifiable) -> str:
         """
         creates a string from the attributes of an identifiable that can be hashed
@@ -120,27 +120,20 @@ class Identifiable():
         return rec_string
 
     def __eq__(self, other) -> bool:
-        """
-        Identifiables are equal if they belong to the same Record. Since ID and path are on their
-        own enough to identify the Record it is sufficient if those attributes are equal.
-        1. both IDs are set (not None)  -> equal if IDs are equal
-        2. both paths are set (not None)  -> equal if paths are equal
-        3. equal if attribute representations are equal
-        """
+        """ Identifiables are equal if they share the same ID or if the representation is equal """
         if not isinstance(other, Identifiable):
             raise ValueError("Identifiable can only be compared to other Identifiable objects.")
-        elif self.record_id is not None and other.record_id is not None:
+        if self.record_id is not None and other.record_id is not None:
             return self.record_id == other.record_id
-        elif self.path is not None and other.path is not None:
-            return self.path == other.path
         elif self.get_representation() == other.get_representation():
             return True
         else:
             return False
 
     def __repr__(self):
-        pstring = json.dumps(self.properties)
+        """ deterministic text representation of the identifiable """
+        pstring = json.dumps({k: str(v) for k, v in self.properties.items()})
         return (f"{self.__class__.__name__} for RT {self.record_type}: id={self.record_id}; "
-                f"name={self.name}\n\tpath={self.path}\n"
+                f"name={self.name}\n"
                 f"\tproperties:\n{pstring}\n"
                 f"\tbackrefs:\n{self.backrefs}")
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index d95112ee1aec6ca1526c96421a8052282b6ef9a7..854ee614638712bdcf957c592ef2946dbdd43afc 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -2,7 +2,7 @@
 # encoding: utf-8
 #
 # ** header v3.0
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2021-2022 Henrik tom Wörden
 #               2021-2022 Alexander Schlemmer
@@ -29,7 +29,6 @@ import logging
 import warnings
 from abc import ABCMeta, abstractmethod
 from datetime import datetime
-from functools import lru_cache
 from typing import Any
 
 import linkahead as db
@@ -37,7 +36,14 @@ import yaml
 from linkahead.cached import cached_get_entity_by, cached_query
 from linkahead.utils.escape import escape_squoted_text
 
+from .exceptions import (
+    InvalidIdentifiableYAML,
+    MissingIdentifyingProperty,
+    MissingRecordType,
+    MissingReferencingEntityError,
+)
 from .identifiable import Identifiable
+from .sync_node import SyncNode
 from .utils import has_parent
 
 logger = logging.getLogger(__name__)
@@ -47,11 +53,14 @@ def get_children_of_rt(rtname):
     """Supply the name of a recordtype. This name and the name of all children RTs are returned in
     a list"""
     escaped = escape_squoted_text(rtname)
-    return [p.name for p in cached_query(f"FIND RECORDTYPE '{escaped}'")]
+    recordtypes = [p.name for p in cached_query(f"FIND RECORDTYPE '{escaped}'")]
+    if not recordtypes:
+        raise MissingRecordType(f"Record type could not be found on server: {rtname}")
+    return recordtypes
 
 
 def convert_value(value: Any) -> str:
-    """ Return a string representation of the value suitable for the search query.
+    """Return a string representation of the value suitable for the search query.
 
     This is for search queries looking for the identified record.
 
@@ -82,27 +91,27 @@ def convert_value(value: Any) -> str:
 class IdentifiableAdapter(metaclass=ABCMeta):
     """Base class for identifiable adapters.
 
-Some terms:
+    Some terms:
 
-- A *registered identifiable* defines an identifiable template, for example by specifying:
-    - Parent record types
-    - Properties
-    - ``is_referenced_by`` statements
-- An *identifiable* belongs to a concrete record.  It consists of identifying attributes which "fill
-  in" the *registered identifiable*.  In code, it can be represented as a Record based on the
-  *registered identifiable* with all the values filled in.
-- An *identified record* is the result of retrieving a record from the database, based on the
-  *identifiable* (and its values).
+    - A *registered identifiable* defines an identifiable template, for example by specifying:
+        - Parent record types
+        - Properties
+        - ``is_referenced_by`` statements
+    - An *identifiable* belongs to a concrete record.  It consists of identifying attributes which
+      "fill in" the *registered identifiable*.  In code, it can be represented as a Record based on
+      the *registered identifiable* with all the values filled in.
+    - An *identified record* is the result of retrieving a record from the database, based on the
+      *identifiable* (and its values).
 
-General question to clarify:
+    General question to clarify:
 
-- Do we want to support multiple identifiables per RecordType?
-- Current implementation supports only one identifiable per RecordType.
+    - Do we want to support multiple identifiables per RecordType?
+    - Current implementation supports only one identifiable per RecordType.
 
-The list of referenced by statements is currently not implemented.
+    The list of referenced by statements is currently not implemented.
 
-The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
-identifiabel, identifiable and identified record) for a Record.
+    The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
+    identifiabel, identifiable and identified record) for a Record.
 
     """
 
@@ -127,7 +136,7 @@ identifiabel, identifiable and identified record) for a Record.
             eid = ref
             if isinstance(ref, db.Entity):
                 eid = ref.id
-            query_string += (" WHICH IS REFERENCED BY " + str(eid) + " AND")
+            query_string += " WHICH IS REFERENCED BY " + str(eid) + " AND"
 
         query_string += " WITH "
 
@@ -136,22 +145,84 @@ identifiabel, identifiable and identified record) for a Record.
             if len(ident.properties) > 0:
                 query_string += " AND "
 
-        query_string += IdentifiableAdapter.create_property_query(ident, startswith=startswith)
+        query_string += IdentifiableAdapter.create_property_query(
+            ident, startswith=startswith
+        )
 
         # TODO Can these cases happen at all with the current code?
         if query_string.endswith(" AND WITH "):
-            query_string = query_string[:-len(" AND WITH ")]
+            query_string = query_string[: -len(" AND WITH ")]
         if query_string.endswith(" AND "):
-            query_string = query_string[:-len(" AND ")]
+            query_string = query_string[: -len(" AND ")]
         return query_string
 
+    def all_identifying_properties_exist(
+        self, node: SyncNode, raise_exception: bool = True
+    ):
+        """checks whether all identifying properties exist and raises an error if
+        that's not the case. It furthermore raises an error if "name" is part of
+        the identifiable, but the node does not have a name.
+
+        If raise_exception is False, the function returns False instead of raising an error.
+
+        Backreferences are not checked.
+
+        Returns True if all identifying properties exist.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if node.registered_identifiable is None:
+            if raise_exception:
+                parents = [p.name for p in node.parents]
+                parents_str = "\n".join(f"- {p}" for p in parents)
+                raise RuntimeError("No registered identifiable for node with these parents:\n"
+                                   + parents_str)
+            else:
+                return False
+        for prop in node.registered_identifiable.properties:
+            if prop.name.lower() == "is_referenced_by":
+                continue
+            if prop.name.lower() == "name":
+                if node.name is None:
+                    if raise_exception:
+                        i = MissingIdentifyingProperty("The node has no name.")
+                        i.prop = "name"
+                        raise i
+                    else:
+                        return False
+                else:
+                    continue
+
+            # multiple occurances are ok here. We deal with that when actually creating an
+            # identifiable (IDs of referenced Entities might need to get resolved first).
+            if (
+                len(
+                    [
+                        el
+                        for el in node.properties
+                        if el.name.lower() == prop.name.lower()
+                    ]
+                )
+                == 0
+            ):
+                if raise_exception:
+                    i = MissingIdentifyingProperty(
+                        f"The property {prop.name} is missing."
+                    )
+                    i.prop = prop.name
+                    raise i
+                else:
+                    return False
+
+        return True
+
     @staticmethod
     def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
         """Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
 
-If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
-operator will be used to find entities matching at the beginning.
-"""
+        If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
+        operator will be used to find entities matching at the beginning.
+        """
         if startswith and isinstance(pvalue, str) and len(pvalue) > 200:
             operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'"
         else:
@@ -163,14 +234,14 @@ operator will be used to find entities matching at the beginning.
     def create_property_query(entity: Identifiable, startswith: bool = False):
         """Create a POV query part with the entity's properties.
 
-Parameters
-----------
+        Parameters
+        ----------
 
-entity: Identifiable
-  The Identifiable whose properties shall be used.
+        entity: Identifiable
+          The Identifiable whose properties shall be used.
 
-startswith: bool, optional
-  If True, check string typed properties against the first 200 characters only.  Default is False.
+        startswith: bool, optional
+          If True, check string typed properties against the first 200 characters only.  Default is False.
         """
         query_string = ""
         pov = IdentifiableAdapter.__create_pov_snippet  # Shortcut
@@ -197,61 +268,38 @@ startswith: bool, optional
         return query_string[:-4]
 
     @abstractmethod
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         """
         Check whether an identifiable is registered for this record and return its definition.
         If there is no identifiable registered, return None.
         """
         pass
 
-    @abstractmethod
-    def resolve_reference(self, record: db.Record):
-        pass
-
     @abstractmethod
     def get_file(self, identifiable: db.File):
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         """
         Retrieve the file object for a (File) identifiable.
         """
         pass
 
-    @staticmethod
-    def get_identifying_referencing_entities(referencing_entities, registered_identifiable):
-        refs = []
-        for prop in registered_identifiable.properties:
-            if prop.name.lower() != "is_referenced_by":
-                continue
-            for looking_for_rt in prop.value:
-                found = False
-                if looking_for_rt == "*":
-                    for val in referencing_entities.values():
-                        if len(val) > 0:
-                            found = True
-                        refs.extend(val)
-                else:
-                    rt_and_children = get_children_of_rt(looking_for_rt)
-                    for rtname in rt_and_children:
-                        if (rtname in referencing_entities):
-                            refs.extend(referencing_entities[rtname])
-                            found = True
-            if not found:
-                raise RuntimeError(
-                    f"Could not find referencing entities of type(s): {prop.value}\n"
-                    f"for registered identifiable:\n{registered_identifiable}\n"
-                    f"There were {len(referencing_entities)} referencing entities to choose from.\n"
-                    f"This error can also occur in case of merge conflicts in the referencing entities."
-                )
-        return refs
-
     @staticmethod
     def get_identifying_referenced_entities(record, registered_identifiable):
+        """Create a list of all entities that are referenced by record
+           and that are used as identying properties of the identifiable.
+
+           Last review by Alexander Schlemmer on 2024-05-29.
+        """
         refs = []
         for prop in registered_identifiable.properties:
             pname = prop.name.lower()
             if pname == "name" or pname == "is_referenced_by":
                 continue
             if record.get_property(prop.name) is None:
+                logger.error(f"Record with missing identifying property:\n{record}\n"
+                             f"This property is missing: {prop.name}\n")
                 raise RuntimeError("Missing identifying Property")
             pval = record.get_property(prop.name).value
             if not isinstance(prop.value, list):
@@ -261,83 +309,101 @@ startswith: bool, optional
                     refs.append(val)
         return refs
 
-    def get_identifiable(self, record: db.Record, referencing_entities=None):
+    def get_identifiable(self, se: SyncNode, identifiable_backrefs: set[SyncNode]) -> Identifiable:
         """
-        Retrieve the registered identifiable and fill the property values to create an
-        identifiable.
+        Take the registered identifiable of given SyncNode ``se`` and fill the property values to
+        create an identifiable.
 
         Args:
-            record: the record for which the Identifiable shall be created.
-            referencing_entities: a dictionary (Type: dict[str, list[db.Entity]]), that
-              allows to look up entities with a certain RecordType, that reference ``record``
+            se: the SyncNode for which the Identifiable shall be created.
+            identifiable_backrefs: a set (Type: set[SyncNode]), that contains SyncNodes
+                                   with a certain RecordType, that reference ``se``
 
         Returns:
             Identifiable, the identifiable for record.
-        """
 
-        registered_identifiable = self.get_registered_identifiable(record)
-
-        if referencing_entities is None:
-            referencing_entities = {}
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
 
         property_name_list_A = []
-        property_name_list_B = []
         identifiable_props = {}
-        identifiable_backrefs = []
-        name_is_identifying_property = False
-
-        if registered_identifiable is not None:
-            identifiable_backrefs = self.get_identifying_referencing_entities(
-                referencing_entities, registered_identifiable)
-            # fill the values:
-            for prop in registered_identifiable.properties:
-                if prop.name == "name":
-                    # The name can be an identifiable, but it isn't a property
-                    name_is_identifying_property = True
-                    continue
-                # problem: what happens with multi properties?
-                # case A: in the registered identifiable
-                # case B: in the identifiable
-
-                # treated above
-                if prop.name.lower() == "is_referenced_by":
-                    continue
+        name = None
+
+        if se.registered_identifiable is None:
+            raise ValueError("no registered_identifiable")
+
+        # fill the values:
+        for prop in se.registered_identifiable.properties:
+            # TDOO:
+            # If there are multiproperties in the registered_identifiable, then only the LAST is
+            # taken into account (later properties overwrite previous one in the dict below).
+            if prop.name == "name":
+                name = se.name
+                continue
 
-                record_prop = record.get_property(prop.name)
-                if record_prop is None:
-                    # TODO: how to handle missing values in identifiables
-                    #       raise an exception?
-                    # TODO: is this the appropriate error?
-                    raise NotImplementedError(
-                        f"The following record is missing an identifying property:\n"
-                        f"RECORD\n{record}\nIdentifying PROPERTY\n{prop.name}"
+            if prop.name.lower() == "is_referenced_by":
+                for el in identifiable_backrefs:
+                    if not isinstance(el, SyncNode):
+                        raise ValueError("Elements of `identifiable_backrefs` must be SyncNodes")
+                if len(identifiable_backrefs) == 0:
+                    raise MissingReferencingEntityError(
+                        f"Could not find referencing entities of type(s): {prop.value}\n"
+                        f"for registered identifiable:\n{se.registered_identifiable}\n"
+                        f"There were {len(identifiable_backrefs)} referencing entities to "
+                        "choose from.\n"
+                        f"This error can also occur in case of merge conflicts in the referencing"
+                        " entities."
                     )
-                identifiable_props[record_prop.name] = record_prop.value
-                property_name_list_A.append(prop.name)
-
-            # check for multi properties in the record:
-            for prop in property_name_list_A:
-                property_name_list_B.append(prop)
-            if (len(set(property_name_list_B)) != len(property_name_list_B) or len(
-                    set(property_name_list_A)) != len(property_name_list_A)):
-                raise RuntimeError(
-                    "Multi properties used in identifiables could cause unpredictable results and "
-                    "are not allowed. You might want to consider a Property with a list as value.")
+                elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0:
+                    raise RuntimeError("Referencing entity has no id")
+                # At this point we know that there is at least one referencing SyncNode
+                # with an ID. We do not need to set any property value (the reference will be used
+                # in the backrefs argument below) and can thus continue with the next identifying
+                # property
+                continue
+
+            options = [p.value for p in se.properties if p.name.lower() == prop.name.lower()]
+            if len(options) == 0:
+                raise MissingIdentifyingProperty(
+                    f"The following record is missing an identifying property:\n"
+                    f"RECORD\n{se}\nIdentifying PROPERTY\n{prop.name}"
+                )
+            for ii, el in enumerate(options):
+                if isinstance(el, SyncNode):
+                    options[ii] = el.id
+                    if el.id is None:
+                        raise RuntimeError(
+                            "Reference to unchecked in identifiable:\n"
+                            f"{prop.name}:\n{el}"
+                        )
+                else:
+                    options[ii] = el
+            if not all([f == options[0] for f in options]):
+                raise RuntimeError("differing prop values ")
+
+            identifiable_props[prop.name] = options[0]
+            property_name_list_A.append(prop.name)
+
+        # check for multi properties in the record:
+        if len(set(property_name_list_A)) != len(property_name_list_A):
+            raise RuntimeError(
+                "Multi properties used in identifiables could cause unpredictable results and "
+                "are not allowed. You might want to consider a Property with a list as value."
+            )
 
         # use the RecordType of the registered Identifiable if it exists
         # We do not use parents of Record because it might have multiple
         try:
             return Identifiable(
-                record_id=record.id,
-                record_type=(registered_identifiable.parents[0].name
-                             if registered_identifiable else None),
-                name=record.name if name_is_identifying_property else None,
+                record_id=se.id,
+                record_type=se.registered_identifiable.parents[0].name,
+                name=name,
                 properties=identifiable_props,
-                path=record.path,
-                backrefs=identifiable_backrefs
+                backrefs=[e.id for e in identifiable_backrefs],
             )
-        except Exception:
-            logger.error(f"Error while creating identifiable for this record:\n{record}")
+        except Exception as exc:
+            logger.error(exc)
+            logger.error(f"Error while creating identifiable for this record:\n{se}")
             raise
 
     @abstractmethod
@@ -352,23 +418,29 @@ startswith: bool, optional
         """
         pass
 
-    def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None):
-        """
-        This function combines all functionality of the IdentifierAdapter by
-        returning the identifiable after having checked for an appropriate
-        registered identifiable.
+    @staticmethod
+    def referencing_entity_has_appropriate_type(parents, register_identifiable):
+        """returns true if one of the parents is listed by the 'is_referenced_by' property
 
-        In case there was no appropriate registered identifiable or no identifiable could
-        be found return value is None.
-        """
-        if record.path is not None:
-            return cached_get_entity_by(path=record.path)
-        if record.id is not None:
-            return cached_get_entity_by(eid=record.id)
+        This function also returns True if 'is_referenced_by' contains the wildcard '*'.
 
-        identifiable = self.get_identifiable(record, referencing_entities=referencing_entities)
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        if register_identifiable.get_property("is_referenced_by") is None:
+            return False
+        if register_identifiable.get_property("is_referenced_by").value is None:
+            return False
 
-        return self.retrieve_identified_record_for_identifiable(identifiable)
+        appropriate_types = []
+        for rt in register_identifiable.get_property("is_referenced_by").value:
+            appropriate_types.extend(get_children_of_rt(rt))
+        appropriate_types = [el.lower() for el in appropriate_types]
+        if "*" in appropriate_types:
+            return True
+        for parent in parents:
+            if parent.name.lower() in appropriate_types:
+                return True
+        return False
 
 
 class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
@@ -377,8 +449,11 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
     """
 
     def __init__(self):
-        warnings.warn(DeprecationWarning(
-            "This class is deprecated. Please use the CaosDBIdentifiableAdapter."))
+        warnings.warn(
+            DeprecationWarning(
+                "This class is deprecated. Please use the CaosDBIdentifiableAdapter."
+            )
+        )
         self._registered_identifiables = dict()
         self._records = []
 
@@ -393,7 +468,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         Just look in records for a file with the same path.
         """
         candidates = []
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         for record in self._records:
             if record.role == "File" and record.path == identifiable.path:
                 candidates.append(record)
@@ -405,15 +482,18 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
 
     def store_state(self, filename):
         with open(filename, "w") as f:
-            f.write(db.common.utils.xml2str(
-                db.Container().extend(self._records).to_xml()))
+            f.write(
+                db.common.utils.xml2str(db.Container().extend(self._records).to_xml())
+            )
 
     def restore_state(self, filename):
         with open(filename, "r") as f:
             self._records = db.Container().from_xml(f.read())
 
     # TODO: move to super class?
-    def is_identifiable_for_record(self, registered_identifiable: db.RecordType, record: db.Record):
+    def is_identifiable_for_record(
+        self, registered_identifiable: db.RecordType, record: db.Record
+    ):
         """
         Check whether this registered_identifiable is an identifiable for the record.
 
@@ -424,8 +504,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         Return True in that case and False otherwise.
         """
         if len(registered_identifiable.parents) != 1:
-            raise RuntimeError(
-                "Multiple parents for identifiables not supported.")
+            raise RuntimeError("Multiple parents for identifiables not supported.")
 
         if not has_parent(record, registered_identifiable.parents[0].name):
             return False
@@ -435,14 +514,13 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                 return False
         return True
 
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         identifiable_candidates = []
         for _, definition in self._registered_identifiables.items():
             if self.is_identifiable_for_record(definition, record):
                 identifiable_candidates.append(definition)
         if len(identifiable_candidates) > 1:
-            raise RuntimeError(
-                "Multiple candidates for an identifiable found.")
+            raise RuntimeError("Multiple candidates for an identifiable found.")
         if len(identifiable_candidates) == 0:
             return None
         return identifiable_candidates[0]
@@ -457,8 +535,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         record is the record from the local database to check against.
         identifiable is the record that was created during the crawler run.
         """
-        if (identifiable.record_type is not None
-                and not has_parent(record, identifiable.record_type)):
+        if identifiable.record_type is not None and not has_parent(
+            record, identifiable.record_type
+        ):
             return False
         for propname, propvalue in identifiable.properties.items():
             prop_record = record.get_property(propname)
@@ -487,27 +566,12 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                 candidates.append(record)
         if len(candidates) > 1:
             raise RuntimeError(
-                f"Identifiable was not defined unambigiously. Possible candidates are {candidates}")
+                f"Identifiable was not defined unambigiously. Possible candidates are {candidates}"
+            )
         if len(candidates) == 0:
             return None
         return candidates[0]
 
-    def resolve_reference(self, value: db.Record):
-        if self.get_registered_identifiable(value) is None:
-            raise NotImplementedError("Non-identifiable references cannot"
-                                      " be used as properties in identifiables.")
-            # TODO: just resolve the entity
-
-        value_identifiable = self.retrieve_identified_record_for_record(value)
-        if value_identifiable is None:
-            raise RuntimeError("The identifiable which is used as property"
-                               " here has to be inserted first.")
-
-        if value_identifiable.id is None:
-            raise RuntimeError("The entity has not been assigned an ID.")
-
-        return value_identifiable.id
-
 
 class CaosDBIdentifiableAdapter(IdentifiableAdapter):
     """
@@ -521,27 +585,42 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
 
     def load_from_yaml_definition(self, path: str):
         """Load identifiables defined in a yaml file"""
-        with open(path, 'r', encoding="utf-8") as yaml_f:
+        with open(path, "r", encoding="utf-8") as yaml_f:
             identifiable_data = yaml.safe_load(yaml_f)
+        self.load_from_yaml_object(identifiable_data)
+
+    def load_from_yaml_object(self, identifiable_data):
+        """Load identifiables defined in a yaml object.
+        """
 
-        for key, value in identifiable_data.items():
-            rt = db.RecordType().add_parent(key)
-            for prop_name in value:
+        for rt_name, id_list in identifiable_data.items():
+            rt = db.RecordType().add_parent(rt_name)
+            if not isinstance(id_list, list):
+                raise InvalidIdentifiableYAML(
+                    f"Identifiable contents must be lists, but this was not: {rt_name}")
+            for prop_name in id_list:
                 if isinstance(prop_name, str):
                     rt.add_property(name=prop_name)
                 elif isinstance(prop_name, dict):
                     for k, v in prop_name.items():
+                        if k == "is_referenced_by" and not isinstance(v, list):
+                            raise InvalidIdentifiableYAML(
+                                f"'is_referenced_by' must be a list.  Found in: {rt_name}")
                         rt.add_property(name=k, value=v)
                 else:
-                    NotImplementedError("YAML is not structured correctly")
+                    raise InvalidIdentifiableYAML(
+                        "Identifiable properties must be str or dict, but this one was not:\n"
+                        f"    {rt_name}/{prop_name}")
 
-            self.register_identifiable(key, rt)
+            self.register_identifiable(rt_name, rt)
 
     def register_identifiable(self, name: str, definition: db.RecordType):
         self._registered_identifiables[name] = definition
 
     def get_file(self, identifiable: Identifiable):
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         # TODO is this needed for Identifiable?
         # or can we get rid of this function?
         if isinstance(identifiable, db.Entity):
@@ -555,7 +634,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             return None
         return candidates[0]
 
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         """
         returns the registered identifiable for the given Record
 
@@ -570,22 +649,17 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             if definition.parents[0].name.lower() == rt_name.lower():
                 return definition
 
-    def resolve_reference(self, record: db.Record):
-        """
-        Current implementation just sets the id for this record
-        as a value. It needs to be verified that references all contain an ID.
-        """
-        if record.id is None:
-            return record
-        return record.id
-
     def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable):
         query_string = self.create_query_for_identifiable(identifiable)
         try:
             candidates = cached_query(query_string)
-        except db.exceptions.HTTPServerError as err:
-            query_string = self.create_query_for_identifiable(identifiable, startswith=True)
-            candidates = cached_query(query_string).copy()  # Copy against cache poisoning
+        except db.exceptions.HTTPServerError:
+            query_string = self.create_query_for_identifiable(
+                identifiable, startswith=True
+            )
+            candidates = cached_query(
+                query_string
+            ).copy()  # Copy against cache poisoning
 
             # Test if the candidates really match all properties
             for pname, pvalue in identifiable.properties.items():
@@ -604,7 +678,8 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             raise RuntimeError(
                 f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the "
                 f"following {candidates}."
-                f"Identifiable:\n{identifiable.record_type}{identifiable.properties}")
+                f"Identifiable:\n{identifiable.record_type}{identifiable.properties}"
+            )
         if len(candidates) == 0:
             return None
         return candidates[0]
diff --git a/src/caoscrawler/macros/macro_yaml_object.py b/src/caoscrawler/macros/macro_yaml_object.py
index c6b5de27d7f498d9b1db6b6a90d986487340a880..d85883011db3cf651da0dda6c110015128fbe439 100644
--- a/src/caoscrawler/macros/macro_yaml_object.py
+++ b/src/caoscrawler/macros/macro_yaml_object.py
@@ -25,12 +25,17 @@
 # Function to expand a macro in yaml
 # A. Schlemmer, 05/2022
 
+import re
 from dataclasses import dataclass
 from typing import Any, Dict
 from copy import deepcopy
 from string import Template
 
 
+_SAFE_SUBST_PAT = re.compile(r"^\$(?P<key>\w+)$")
+_SAFE_SUBST_PAT_BRACES = re.compile(r"^\$\{(?P<key>\w+)}$")
+
+
 @dataclass
 class MacroDefinition:
     """
@@ -53,6 +58,12 @@ def substitute(propvalue, values: dict):
     Substitution of variables in strings using the variable substitution
     library from python's standard library.
     """
+    # Simple matches are simply replaced by the raw dict entry.
+    if match := (_SAFE_SUBST_PAT.fullmatch(propvalue)
+                 or _SAFE_SUBST_PAT_BRACES.fullmatch(propvalue)):
+        key = match.group("key")
+        if key in values:
+            return values[key]
     propvalue_template = Template(propvalue)
     return propvalue_template.safe_substitute(**values)
 
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index 9d1f538732858ff2fbf949d45c359ebb16fe3480..9f8f5e40beb729d73151bad38f3e390a4a8cecb4 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -62,11 +62,10 @@ def load_definition(crawler_definition_path: str):
     """
 
     # Load the cfood from a yaml file:
-    with open(crawler_definition_path, "r") as f:
+    with open(crawler_definition_path, encoding="utf-8") as f:
         crawler_definitions = list(yaml.safe_load_all(f))
 
-    crawler_definition = _load_definition_from_yaml_dict(
-        crawler_definitions)
+    crawler_definition = _load_definition_from_yaml_dict(crawler_definitions)
 
     return _resolve_validator_paths(crawler_definition, crawler_definition_path)
 
@@ -362,16 +361,19 @@ def scanner(items: list[StructureElement],
                     debug_tree.debug_metadata["usage"][str(element)].add(
                         "/".join(converters_path + [converter.name]))
                     mod_info = debug_tree.debug_metadata["provenance"]
-                    for record_name, prop_name in keys_modified:
-                        # TODO: check
-                        internal_id = record_store_copy.get_internal_id(
-                            record_name)
-                        record_identifier = record_name + \
-                            "_" + str(internal_id)
-                        converter.metadata["usage"].add(record_identifier)
-                        mod_info[record_identifier][prop_name] = (
-                            structure_elements_path + [element.get_name()],
-                            converters_path + [converter.name])
+                    # TODO: actually keys_modified must not be None. create_records should
+                    #       always return a list.
+                    if keys_modified is not None:
+                        for record_name, prop_name in keys_modified:
+                            # TODO: check
+                            internal_id = record_store_copy.get_internal_id(
+                                record_name)
+                            record_identifier = record_name + \
+                                "_" + str(internal_id)
+                            converter.metadata["usage"].add(record_identifier)
+                            mod_info[record_identifier][prop_name] = (
+                                structure_elements_path + [element.get_name()],
+                                converters_path + [converter.name])
 
                 scanner(children, converter.converters,
                         general_store_copy, record_store_copy,
diff --git a/src/caoscrawler/scripts/__init__.py b/src/caoscrawler/scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/caoscrawler/scripts/generators.py b/src/caoscrawler/scripts/generators.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba8e6e39cc03e9be1923d72ec5c8d699c01fa8f9
--- /dev/null
+++ b/src/caoscrawler/scripts/generators.py
@@ -0,0 +1,247 @@
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Scripts and functions to generate datamodel yaml files and cfood skeletons.
+
+For example from actual data files.
+"""
+
+import argparse
+import csv
+from collections import OrderedDict
+from string import Template
+from typing import Optional
+
+import pandas as pd
+import yaml
+
+
+DM_TEMPLATE = """# auto-generated data model from file "[]{infile}".
+# To insert a datamodel into LinkAhead, run:
+#
+# python3 -m caosadvancedtools.models.parser datamodel.yaml --sync
+"""
+
+HEADER_RT = """
+###############
+# RecordTypes #
+###############
+
+DummyRT:
+  description: Note: Change name and enter description.
+  recommended_properties:
+    """
+
+CFOOD_TEMPLATE = """
+---
+metadata:
+  macros:
+  - !defmacro
+    # Simple column value -> property rule
+    name: ColumnValue
+    params:
+      name: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${name}: $$val
+  - !defmacro
+    # column value -> reference property
+    name: ColumnValueReference
+    params:
+      name: null
+      reftype: null  # RecordType of the reference
+      belongsto: BaseElement
+      type: TextElement  # References are always text, right?
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${reftype}:
+            name: $$val
+          ${belongsto}:
+            ${name}: $$${reftype}
+  - !defmacro
+    # Same as "ColumnValue", but also give name of property.
+    name: ColumnValuePropname
+    params:
+      name: null
+      propname: null
+      belongsto: BaseElement
+      type: TextElement
+    definition:
+      ${name}:
+        type: ${type}
+        match_name: ^${name}$$
+        match_value: (?P<val>.*)
+        records:
+          ${belongsto}:
+            ${propname}: $$val
+---
+directory: # corresponds to the directory given to the crawler
+  type: Directory
+  match: .* # we do not care how it is named here
+  records:
+    DirRecord:    # One record for each directory.
+  subtree:
+    # This is the file
+    thisfile:
+      type: []{file}
+      match: []{match}
+      records:
+        DatFileRecord:    # One record for each matching file
+          role: File
+          path: $thisfile
+          file: $thisfile
+      subtree:
+        entry:
+          type: Dict
+          match: .* # Name is irrelevant
+          records:
+            BaseElement:    # One BaseElement record for each row in the CSV/TSV file
+              DatFileRecord: $DatFileRecord
+            DirRecord:
+              BaseElement: +$BaseElement
+          subtree: !macro
+"""
+
+
+class _CustomTemplate(Template):
+    delimiter = "[]"  # "$" is used too much by the yaml template.
+
+
+def csv_to_datamodel(infile: str, outfile: str, cfood: Optional[str] = None):
+    """Parse the input csv and create basic datamodel in ``outfile``.
+
+Parameters
+----------
+cfood: str
+  If given, also create a cfood skeleton.
+    """
+    sniffer = csv.Sniffer()
+    with open(infile, encoding="utf-8") as f_infile:
+        max_sniff = 50000
+        sniffed = sniffer.sniff(f_infile.read(max_sniff))
+    df = pd.read_table(infile, sep=sniffed.delimiter, quotechar=sniffed.quotechar,
+                       escapechar=sniffed.escapechar)
+
+    properties = OrderedDict()
+    for colname in df.columns:
+        column = df[colname]
+        dtype: Optional[str] = "TEXT"
+        if pd.api.types.is_bool_dtype(column.dtype):
+            dtype = "BOOLEAN"
+        if pd.api.types.is_float_dtype(column.dtype):
+            dtype = "DOUBLE"
+        elif pd.api.types.is_integer_dtype(column.dtype):
+            dtype = "INTEGER"
+        properties[colname] = {
+            "datatype": dtype
+        }
+
+    result = (_CustomTemplate(DM_TEMPLATE).substitute({"infile": infile})
+              + HEADER_RT
+              + "    ".join(yaml.dump(dict(properties),  # from OrderedDict to dict
+                                      allow_unicode=True,
+                                      sort_keys=False).splitlines(keepends=True))
+              )
+    with open(outfile, encoding="utf-8", mode="w") as myfile:
+        myfile.write(result)
+
+    #################
+    # cfood section #
+    #################
+    if cfood:
+        defs_col_value: list[str] = []
+        defs_col_value_ref: list[str] = []
+        prefix = " " * 14
+        for name, propdef in properties.items():
+            def_str = prefix + f"- name: {name}\n"
+            dtype = None
+            reftype = None
+            defs = defs_col_value
+            # Which type?
+            if propdef["datatype"] == "BOOLEAN":
+                dtype = "BooleanElement"
+            elif propdef["datatype"] == "INTEGER":
+                dtype = "IntegerElement"
+            elif propdef["datatype"] == "DOUBLE":
+                dtype = "FloatElement"
+            elif propdef["datatype"] == "TEXT":
+                dtype = None
+            else:
+                reftype = propdef["datatype"]
+                defs = defs_col_value_ref
+
+            # Append according to types:
+            if reftype:
+                def_str += prefix + f"  reftype: {reftype}\n"
+            if dtype:
+                def_str += prefix + f"  type: {dtype}\n"
+
+            # Store result
+            defs.append(def_str)
+            del defs
+
+        sep = repr(sniffed.delimiter)
+        sep = f'"{sep[1:-1]}"'
+        match_str = f"""'.*[ct]sv'
+      sep: {sep}
+      # "header": [int]
+      # "names": [str]
+      # "index_col": [int]
+      # "usecols": [int]
+      # "true_values": [str]
+      # "false_values": [str]
+      # "na_values": [str]
+      # "skiprows": [int]
+      # "nrows": [int]
+      # "keep_default_na": [bool]
+        """
+
+        cfood_str = (_CustomTemplate(CFOOD_TEMPLATE).substitute({"file": "CSVTableConverter",
+                                                                 "match": match_str})
+                     + prefix[2:] + "ColumnValue:\n" + "".join(defs_col_value)
+                     + prefix[2:] + "ColumnValueReference:\n" + "".join(defs_col_value_ref)
+                     )
+        with open(cfood, encoding="utf-8", mode="w") as myfile:
+            myfile.write(cfood_str)
+
+
+def _parse_args_csv():
+    """Parse the arguments."""
+    parser = argparse.ArgumentParser(description="Create datamodel and cfood from CSV files.")
+    parser.add_argument('-i', '--input', help="The input file.", required=True, dest="infile")
+    parser.add_argument('-o', '--outfile', help="Yaml filename to save the result", required=True)
+    parser.add_argument('--cfood', help="Yaml filename to create cfood output in", required=False)
+
+    return parser.parse_args()
+
+
+def csv_to_datamodel_main():
+    """The main function for csv data handling."""
+    args = _parse_args_csv()
+    csv_to_datamodel(**vars(args))
diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py
index ff070626ebfdd580c16bbbf2dc30ab330dc162f0..0efba91c185446e0bfbecbbb53f68aaa8a8e15d1 100644
--- a/src/caoscrawler/structure_elements.py
+++ b/src/caoscrawler/structure_elements.py
@@ -23,7 +23,6 @@
 # ** end header
 #
 
-from typing import Dict as tDict
 import warnings
 
 
@@ -39,7 +38,7 @@ name: str
 
     def __init__(self, name: str):
         # Used to store usage information for debugging:
-        self.metadata: tDict[str, set[str]] = {
+        self.metadata: dict[str, set[str]] = {
             "usage": set()
         }
 
diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c021a10f35e95ca56d45151b8d064ec905993ec
--- /dev/null
+++ b/src/caoscrawler/sync_graph.py
@@ -0,0 +1,719 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+A data model class for the graph of entities that shall be created during synchronization of the
+crawler.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, Union, Callable
+
+import linkahead as db
+from linkahead.cached import cached_get_entity_by
+from linkahead.exceptions import EmptyUniqueQueryError
+
+from .identifiable_adapters import IdentifiableAdapter
+from .identifiable import Identifiable
+from .sync_node import SyncNode, TempID
+
+import re
+
+logger = logging.getLogger(__name__)
+
+
+def _set_each_scalar_value(
+    node: SyncNode, condition: Callable[[Any], bool], value: Any
+):
+    """helper function that conditionally replaces each value element of each property of a node
+
+    If the property value is a list, the replacement is done for each list entry.
+    The replacement is only performed if the condition that
+    is provided is fulfilled, i.e. the callable ``condition`` returns True. The callable
+    ``condition`` must take the property value (or list element) as the sole argument.
+
+    Args:
+        node (SyncNode): The node which provides the properties (and their values) to operate on.
+        condition (Callable): A function with one argument which is interpreted as a condition:
+                              Only if it returns True for the property value, the action is
+                              executed.
+        value (Callable): A function returning a new value that is set as the property value. This
+                          function receives the old value as the single argument.
+
+    Last review by Alexander Schlemmer on 2024-05-24.
+    """
+    for p in node.properties:
+        if isinstance(p.value, list):
+            for ii, el in enumerate(p.value):
+                if condition(el):
+                    p.value[ii] = value(el)
+        elif condition(p.value):
+            p.value = value(p.value)
+
+
+class SyncGraph:
+    """
+    A data model class for the graph of entities that shall be created during synchronization of
+    the crawler.
+
+    The SyncGraph combines nodes in the graph based on their identity in order to create a graph of
+    objects that can either be inserted or updated in(to) the remote server. This combination of
+    SyncNodes happens during initialization and later on when the ID of SyncNodes is set.
+
+    When the SyncGraph is initialized, the properties of given entities are scanned and used to
+    create multiple reference maps that track how SyncNodes reference each other.
+    These maps are kept up to date when SyncNodes are merged because they are identified with each
+    other. During initialization, SyncNodes are first merged based on their ID, path or
+    identifiable.
+
+    When additional information is added to the graph by setting the ID of a node
+    (via `set_id_of_node`) then the graph is updated accordingly:
+    - if this information implies that the node is equivalent to another node (e.g. has same ID),
+      then they are merged
+    - if knowing that one node does not exist in the remote server, then this might imply that some
+      other node also does not exist if its identity relies on the latter.
+    - The new ID might make it possible to create the identifiables of connected nodes and thus
+      might trigger further merging of nodes based on the new identifiables.
+
+    A SyncGraph should only be manipulated via one function:
+    - set_id_of_node: a positive integer means the Entity exists, None means it is missing
+    TODO what about String IDs
+
+    The SyncGraph can be converted back to lists of entities which allow to perform the desired
+    inserts and updates.
+
+    Usage:
+    - Initialize the Graph with a list of entities. Those will be converted to the SyncNodes of the
+      graph.
+    - SyncNodes that can be merged are automatically merged and SyncNodes where the existence can
+      be determined are automatically removed from the list of unchecked SyncNodes:
+      graph.unchecked.
+    - You manipulate the graph by setting the ID of a SyncNode (either to a valid ID or to None).
+      For example, you can check whether a SyncNode has an identifiable and then query the remote
+      server and use the result to set the ID.
+    - After each manipulation, the graph updates accordingly (see above)
+    - Ideally, the unchecked list is empty after some manipulation.
+    - You can export a list of entities to be inserted and one of entities to be updated with
+      export_record_lists.
+
+    Last review by Alexander Schlemmer on 2024-05-24.
+    """
+
+    # General implementation remark:
+    # There are three cases where an update of one SyncNode can affect other nodes:
+    # - mark existing (add identifiables)
+    # - mark missing (add identifiables and add (negative) IDs)
+    # - merge (add identifiables)
+    #
+    # We cannot get an infinite recursion where one update triggers another update and so on
+    # because updates are conditional:
+    # Setting an ID removes the node (immediately) from the unchecked list and it is only tried to
+    # set an ID in _mark_missing if a node is in the uncheck list. Thus, setting the ID once
+    # prevents future attempts to set the ID of the same node.
+    # Also, setting an identifiable is only done when needed, i.e. there is no identifiable.
+    # Note, that when ever one node is changed, we check all dependent nodes (see usage of
+    # `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot
+    # miss a necessary update.
+    def __init__(
+        self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter
+    ):
+        self.identifiableAdapter = identifiableAdapter
+        # A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs.
+        # This dictionary is initially set using _mark_entities_with_path_or_id and later updated
+        # using set_id_of_node or during merges of nodes.
+        self._id_look_up: dict[Union[int, TempID, str], SyncNode] = {}
+        # Similar as above for looking up nodes using paths
+        self._path_look_up: dict[str, SyncNode] = {}
+        # Similar as above for looking up nodes using identifiables. This dictionary uses the text
+        # representation generated by get_representation method of Identifiable as keys.
+        self._identifiable_look_up: dict[str, SyncNode] = {}
+        # look up for the nodes that were marked as being missing (on the remote server)
+        self._missing: dict[int, SyncNode] = {}
+        # same for existing
+        self._existing: dict[int, SyncNode] = {}
+        # entities that are missing get negative IDs to allow identifiable creation
+        self._remote_missing_counter = -1
+
+        self.nodes: list[SyncNode] = []
+        self._initialize_nodes(entities)  # list of all SemanticEntities
+        # list all SemanticEntities that have not yet been checked
+        self.unchecked = list(self.nodes)
+
+        # initialize reference mappings (see _create_reference_mapping)
+        (
+            self.forward_references,  # id(node) -> full set of nodes referenced by the given node
+            self.backward_references,  # id(node) -> full set of nodes referencing the given node
+            # as above, subset where the reference properties are part of identifiables
+            self.forward_references_id_props,
+            self.backward_references_id_props,
+            # as above, subset where references are part of identifiables due to "referenced_by"
+            self.forward_references_backref,
+            self.backward_references_backref,
+        ) = self._create_reference_mapping(self.nodes)
+
+        # remove entities with path or ID from unchecked list
+        self._mark_entities_with_path_or_id()
+
+        # add identifiables where possible
+        for node in list(self.nodes):
+            if self._identifiable_is_needed(node):
+                self._set_identifiable_of_node(node)
+
+        # everything in unchecked neither does have an ID nor a path.
+        # Thus, it must be possible to create an
+        # identifiable which is checked using the following function:
+        for node in self.unchecked:
+            self.identifiableAdapter.all_identifying_properties_exist(node)
+
+    def set_id_of_node(self, node: SyncNode, node_id: Optional[str] = None):
+        """sets the ID attribute of the given SyncNode to node_id.
+
+        If node_id is None, a negative ID will be
+        given indicating that the node does not exist on the remote server.
+        Furthermore it will be marked as missing using _mark_missing.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if node.id is not None:
+            raise RuntimeError(
+                "Cannot update ID.\n"
+                f"It already is {node.id} and shall be set to {node_id}."
+            )
+        if node_id is None:
+            node_id = TempID(self._get_new_id())
+        node.id = node_id
+        if node_id in self._id_look_up:
+            self._merge_into(node, self._id_look_up[node.id])
+        else:
+            self._id_look_up[node.id] = node
+            if isinstance(node.id, TempID):
+                self._mark_missing(node)
+            else:
+                self._mark_existing(node)
+
+    def export_record_lists(self):
+        """exports the SyncGraph in form of db.Entities
+
+        All nodes are converted to db.Entity objects and reference values that are SyncNodes are
+        replaced by their corresponding (newly created) db.Entity objects.
+
+        Since the result is returned in form of two lists, one with Entities that have a valid ID
+        one with those that haven't, an error is raised if there are any SyncNodes without an
+        (possibly negative) ID.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        # TODO reactivate once the implementation is appropriate
+        # if len(self.unchecked) > 1:
+        # self.unchecked_contains_circular_dependency()
+
+        for el in self.nodes:
+            if el.id is None:
+                raise RuntimeError("Exporting unchecked entities is not supported")
+
+        entities = []
+        node_map = {}
+        for el in self.nodes:
+            entities.append(el.export_entity())
+            node_map[id(el)] = entities[-1]
+
+        for ent in entities:
+            _set_each_scalar_value(
+                ent,
+                condition=lambda val: isinstance(val, SyncNode),
+                value=lambda val: node_map[id(val)],
+            )
+
+        missing = [el for el in entities if el.id < 0]
+        existing = [el for el in entities if el.id > 0]
+        # remove negative IDs
+        for el in missing:
+            el.id = None
+
+        return (missing, existing)
+
+    def _identity_relies_on_unchecked_entity(self, node: SyncNode):
+        """
+        If a record for which it could not yet be verified whether it exists in LA or not is part
+        of the identifying properties, this returns True, otherwise False
+
+        Last review by Alexander Schlemmer on 2024-05-27.
+        """
+
+        return any(
+            [
+                id(ent) not in self._missing and id(ent) not in self._existing
+                for ent in self.forward_references_id_props[id(node)]
+            ]
+            + [
+                id(ent) not in self._missing and id(ent) not in self._existing
+                for ent in self.backward_references_backref[id(node)]
+            ]
+        )
+
+    def unchecked_contains_circular_dependency(self):
+        """
+        Detects whether there are circular references in the given entity list and returns a list
+        where the entities are ordered according to the chain of references (and only the entities
+        contained in the circle are included. Returns None if no circular dependency is found.
+
+        TODO: for the sake of detecting problems for split_into_inserts_and_updates we should only
+        consider references that are identifying properties.
+        """
+        raise NotImplementedError("This function is not yet properly implemented")
+        # TODO if the first element is not part of the circle, then
+        # this will not work
+        # We must created a better implementation (see also TODO in docstring)
+        circle = [self.unchecked[0]]
+        closed = False
+        while not closed:
+            added_to_circle = False
+            for referenced in self.forward_references[id(circle[-1])]:
+                if referenced in self.unchecked:
+                    if referenced in circle:
+                        closed = True
+                    circle.append(referenced)
+                    added_to_circle = True
+            if not added_to_circle:
+                return None
+        return circle
+
+    def get_equivalent(self, entity: SyncNode) -> Optional[SyncNode]:
+        """
+        Return an equivalent SyncNode.
+
+        Equivalent means that ID, path or identifiable are the same.
+        If a new information was added to the given SyncNode (e.g. the ID), it might be possible
+        then to identify an equivalent node (i.e. one with the same ID in this example).
+        There might be more than one equivalent node in the graph. However, simply the first that
+        is found is being returned. (When an equivalent node is found, the given node is
+        typically merged, into the one that was found and after the merge the graph is again
+        checked for equivalent nodes.)
+
+        Returns None if no equivalent node is found.
+
+        Last review by Alexander Schlemmer on 2024-05-28.
+        """
+        if entity.id is not None and entity.id in self._id_look_up:
+            candidate = self._id_look_up[entity.id]
+            if candidate is not entity:
+                return candidate
+        if entity.path is not None and entity.path in self._path_look_up:
+            candidate = self._path_look_up[entity.path]
+            if candidate is not entity:
+                return candidate
+        if (
+            entity.identifiable is not None
+            and entity.identifiable.get_representation() in self._identifiable_look_up
+        ):
+            candidate = self._identifiable_look_up[
+                entity.identifiable.get_representation()
+            ]
+            if candidate is not entity:
+                return candidate
+        return None
+
+    def _get_new_id(self):
+        """returns the next unused temporary ID
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        self._remote_missing_counter -= 1
+        return self._remote_missing_counter
+
+    def _set_identifiable_of_node(
+        self, node: SyncNode, identifiable: Optional[Identifiable] = None
+    ):
+        """sets the identifiable and checks whether an equivalent node can be found with that new
+        information. If an equivalent node is found, 'node' is merged into that node.
+
+        if no identifiable is given, the identifiable is retrieved from the identifiable adapter
+
+        Raises a ValueError if the equivalent node found does not have an identifiable.
+        Raises a RuntimeError if there is no equivalent node found and
+          the (unique) string representation of the identifiable of node is already contained in
+          the identifiable_look_up.
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        if identifiable is None:
+            self.identifiableAdapter.all_identifying_properties_exist(node)
+            identifiable = self.identifiableAdapter.get_identifiable(
+                node, self.backward_references_backref[id(node)]
+            )
+        node.identifiable = identifiable
+        equivalent_se = self.get_equivalent(node)
+        if equivalent_se is not None:
+            self._merge_into(node, equivalent_se)
+        else:
+            if node.identifiable.get_representation() in self._identifiable_look_up:
+                raise RuntimeError("Identifiable is already in the look up")
+            self._identifiable_look_up[node.identifiable.get_representation()] = node
+
+    @staticmethod
+    def _sanity_check(entities: list[db.Entity]):
+        """
+        Checks whether each record in entities has at least one parent.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        for ent in entities:
+            if ent.role == "Record" and len(ent.parents) == 0:
+                raise ValueError(f"Records must have a parent.\n{ent}")
+            if isinstance(ent.id, int) and ent.id < 0:
+                raise ValueError(
+                    f"Records must not have negative integers as IDs.\n{ent}"
+                )
+            if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id):
+                raise ValueError(
+                    f"Records must not have negative integers as IDs.\n{ent}"
+                )
+
+    def _get_nodes_whose_identity_relies_on(self, node: SyncNode):
+        """returns a set of nodes that reference the given node as identifying property or are
+        referenced by the given node and the parent of the given node is listed as
+        "is_referenced_by"
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        return self.backward_references_id_props[id(node)].union(
+            self.forward_references_backref[id(node)]
+        )
+
+    @staticmethod
+    def _create_flat_list(
+        ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None
+    ):
+        """
+        Recursively adds entities and all their properties contained in ent_list to
+        the output list flat.
+
+        TODO: This function will be moved to pylib as it is also needed by the
+              high level API.
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        # Note: A set would be useful here, but we do not want a random order.
+        if flat is None:
+            flat = list()
+        for el in ent_list:
+            if el not in flat:
+                flat.append(el)
+        for ent in ent_list:
+            for p in ent.properties:
+                # For lists append each element that is of type Entity to flat:
+                if isinstance(p.value, list):
+                    for el in p.value:
+                        if isinstance(el, db.Entity):
+                            if el not in flat:
+                                flat.append(el)
+                                SyncGraph._create_flat_list([el], flat)
+                elif isinstance(p.value, db.Entity):
+                    if p.value not in flat:
+                        flat.append(p.value)
+                        SyncGraph._create_flat_list([p.value], flat)
+        return flat
+
+    @staticmethod
+    def _create_reference_mapping(flat: list[SyncNode]):
+        """
+        Create six dictionaries that describe references among SyncNodes. All dictionaries use the
+        Python ID of SyncNodes as keys.
+        There is always one dictionary to describe the direction of the reference, i.e.
+        map[id(node)] -> other where other is a set of SyncNodes that are being referenced by node.
+        And then there is always one dictionary for the inverse direction. The two dictionaries are
+        named "forward_" and "backward_", respectively.
+
+        Then there are three kinds of maps being generated: One includes all references
+        ("_references"), one includes references that are values of identifying properties
+        ("_references_id_props") and one includes references that are relevant for identifying
+        backreferences/"is_referenced_by" ("_references_backref"). I.e. the two latter are subesets
+        of the former reference map.
+
+        Arguments:
+        ----------
+           flat: list[SyncNode]
+                 all SyncNodes that span the graph for which the reference map shall be created
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        # TODO we need to treat children of RecordTypes somehow.
+        forward_references: dict[int, set[SyncNode]] = {}
+        backward_references: dict[int, set[SyncNode]] = {}
+        forward_references_id_props: dict[int, set[SyncNode]] = {}
+        backward_references_id_props: dict[int, set[SyncNode]] = {}
+        forward_references_backref: dict[int, set[SyncNode]] = {}
+        backward_references_backref: dict[int, set[SyncNode]] = {}
+
+        # initialize with empty lists/dict
+        for node in flat:
+            forward_references[id(node)] = set()
+            backward_references[id(node)] = set()
+            forward_references_id_props[id(node)] = set()
+            backward_references_id_props[id(node)] = set()
+            forward_references_backref[id(node)] = set()
+            backward_references_backref[id(node)] = set()
+        for node in flat:
+            for p in node.properties:
+                val = p.value
+                if not isinstance(val, list):
+                    val = [val]
+                for v in val:
+                    if isinstance(v, SyncNode):
+                        forward_references[id(node)].add(v)
+                        backward_references[id(v)].add(node)
+                        if (
+                            node.registered_identifiable is not None
+                            and len(
+                                [
+                                    el.name
+                                    for el in node.registered_identifiable.properties
+                                    if el.name == p.name
+                                ]
+                            )
+                            > 0
+                        ):
+                            forward_references_id_props[id(node)].add(v)
+                            backward_references_id_props[id(v)].add(node)
+                        if (
+                            v.registered_identifiable is not None
+                            and IdentifiableAdapter.referencing_entity_has_appropriate_type(
+                                node.parents, v.registered_identifiable
+                            )
+                        ):
+                            forward_references_backref[id(node)].add(v)
+                            backward_references_backref[id(v)].add(node)
+
+        return (
+            forward_references,
+            backward_references,
+            forward_references_id_props,
+            backward_references_id_props,
+            forward_references_backref,
+            backward_references_backref,
+        )
+
+    def _mark_entities_with_path_or_id(self):
+        """A path or an ID is sufficiently identifying. Thus, those entities can be marked as
+        checked
+
+        When this function returns, there is only one node for each ID (i.e. no two nodes with the
+        same ID). The same is true for paths.
+
+        This function also updates _id_look_up and _path_look_up
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        for node in list(self.nodes):
+            if node.id is not None:
+                eq_node = self.get_equivalent(node)
+                if eq_node is not None:
+                    self._basic_merge_into(node, eq_node)
+                else:
+                    self._id_look_up[node.id] = node
+                    self._mark_existing(node)
+
+        for node in list(self.nodes):
+            if node.path is not None:
+                eq_node = self.get_equivalent(node)
+                if eq_node is not None:
+                    self._basic_merge_into(node, eq_node)
+                else:
+                    self._path_look_up[node.path] = node
+                    try:
+                        existing = cached_get_entity_by(path=node.path)
+                    except EmptyUniqueQueryError:
+                        existing = None
+                    remote_id = None
+                    if existing is not None:
+                        remote_id = existing.id
+                    self.set_id_of_node(node, remote_id)
+
+    def _basic_merge_into(self, source: SyncNode, target: SyncNode):
+        """tries to merge source into target and updates member variables
+
+        - reference maps are updated
+        - self.nodes is updated
+        - self.unchecked is updated
+        - lookups are being updated
+        """
+        # sanity checks
+        if source is target:
+            raise ValueError("source must not be target")
+
+        target.update(source)
+
+        # replace actual reference property values
+        for node in self.backward_references[id(source)]:
+            _set_each_scalar_value(
+                node, condition=lambda val: val is source, value=lambda val: target
+            )
+
+        # update reference mappings
+        for setA, setB in (
+            (self.forward_references, self.backward_references),  # ref: source -> other
+            (self.backward_references, self.forward_references),  # ref: other -> source
+            (self.forward_references_id_props, self.backward_references_id_props),
+            (self.backward_references_id_props, self.forward_references_id_props),
+            (self.forward_references_backref, self.backward_references_backref),
+            (self.backward_references_backref, self.forward_references_backref),
+        ):
+            for node in setA.pop(id(source)):
+                setA[id(target)].add(node)
+                setB[id(node)].remove(source)
+                setB[id(node)].add(target)
+
+        # remove unneeded SyncNode
+        self.nodes.remove(source)
+        if source in self.unchecked:
+            self.unchecked.remove(source)
+        # update look ups
+        if target.id is not None:
+            self._id_look_up[target.id] = target
+        if target.path is not None:
+            self._path_look_up[target.path] = target
+        if target.identifiable is not None:
+            self._identifiable_look_up[target.identifiable.get_representation()] = target
+
+    def _merge_into(self, source: SyncNode, target: SyncNode):
+        """tries to merge source into target and performs the necessary updates:
+        - update the member variables of target using source (``target.update(source)``).
+        - replaces reference values to source by target
+        - updates the reference map
+        - updates lookup tables
+        - removes source from node lists
+        - marks target as missing/existing if source was marked that way
+        - adds an identifiable if now possible (e.g. merging based on ID might allow create an
+          identifiable when none of the two nodes had the sufficient properties on its own before)
+        - check whether dependent nodes can now get an identifiable (the merge might have set the
+          ID such that dependent nodes can now create an identifiable)
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        self._basic_merge_into(source, target)
+
+        if (id(source) in self._existing and id(target) in self._missing) or (
+            id(target) in self._existing and id(source) in self._missing
+        ):
+            raise RuntimeError("Trying to merge missing and existing")
+
+        if id(source) in self._missing and id(target) not in self._missing:
+            self._mark_missing(target)
+        elif id(source) in self._existing and id(target) not in self._existing:
+            self._mark_existing(target)
+
+        # due to the merge it might now be possible to create an identifiable
+        if self._identifiable_is_needed(target):
+            self._set_identifiable_of_node(target)
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(target)
+
+        eq_node = self.get_equivalent(target)
+        if eq_node is not None:
+            self._merge_into(target, eq_node)
+
+    def _identifiable_is_needed(self, node: SyncNode):
+        """
+        This function checks:
+        - the identifiable of node is None
+        - the node has all properties that are needed for the identifiable
+        - there are no unchecked entities that are needed for the identifiable of the node,
+          neither as forward or as backward references
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        return (
+            node.identifiable is None
+            and not self._identity_relies_on_unchecked_entity(node)
+            and self.identifiableAdapter.all_identifying_properties_exist(
+                node, raise_exception=False
+            )
+        )
+
+    def _initialize_nodes(self, entities: list[db.Entity]):
+        """create initial set of SyncNodes from provided Entity list"""
+        self._sanity_check(entities)
+        entities = self._create_flat_list(entities)
+        se_lookup: dict[int, SyncNode] = {}  # lookup: python id -> SyncNode
+
+        # Create new sync nodes from the list of entities, their registered identifiables
+        # are set from the identifiable adapter.
+        for el in entities:
+            self.nodes.append(
+                SyncNode(el, self.identifiableAdapter.get_registered_identifiable(el))
+            )
+            se_lookup[id(el)] = self.nodes[-1]
+
+        # replace db.Entity objects with SyncNodes in references:
+        for node in self.nodes:
+            _set_each_scalar_value(
+                node,
+                condition=lambda val: id(val) in se_lookup,
+                value=lambda val: se_lookup[id(val)],
+            )
+
+    def _add_identifiables_to_dependent_nodes(self, node):
+        """For each dependent node, we check whether this allows to create an identifiable
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        for other_node in self._get_nodes_whose_identity_relies_on(node):
+            if self._identifiable_is_needed(other_node):
+                self._set_identifiable_of_node(other_node)
+
+    def _mark_missing(self, node: SyncNode):
+        """Mark a sync node as missing and remove it from the dictionary of unchecked nodes.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        self._missing[id(node)] = node
+        self.unchecked.remove(node)
+
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(node)
+        # For each dependent node, we set the ID to None (missing)
+        # (None is the default second argument of set_id_of_node.)
+        for other_node in self._get_nodes_whose_identity_relies_on(node):
+            if other_node in self.unchecked:
+                self.set_id_of_node(other_node)
+
+    def _mark_existing(self, node: SyncNode):
+        """Mark a sync node as existing and remove it from the dictionary of unchecked nodes.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if isinstance(node.id, TempID):
+            raise ValueError("ID must valid existing entities, not TempID")
+        self._existing[id(node)] = node
+        self.unchecked.remove(node)
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(node)
diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..141e743bffa09f0caf661bcd1939a4233cb7249c
--- /dev/null
+++ b/src/caoscrawler/sync_node.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+import linkahead as db
+import yaml
+from linkahead.common.models import Parent, _ParentList, _Properties
+from warnings import warn
+
+from .exceptions import ImpossibleMergeError
+
+if TYPE_CHECKING:
+    from .identifiable import Identifiable
+
+logger = logging.getLogger(__name__)
+
+
+class TempID(int):
+    """A special kind of int for negative temporary IDs.
+
+    This allows to identify TempIDs in the presence of String IDs.
+    A string ID might look like a negative integer.
+    """
+    pass
+
+
+class SyncNode(db.Entity):
+    """represents the information of an Entity as it shall be created in LinkAhead
+
+    The following information is taken from an db.Entity object during initialization or when the
+    object is updated using the `update` member function:
+    - id
+    - role
+    - path
+    - file
+    - name
+    - description
+    - parents
+    - properties
+
+    Typically, this class is used in the following way:
+    1. A SyncNode is initialized with a db.Entity object.
+    2. The SyncNode object is possibly updated one or more times with other SyncNode objects.
+    3. A db.Entity object is created (`export_entity`) that contains the combined information.
+    """
+
+    def __init__(
+        self, entity: db.Entity, registered_identifiable: Optional[db.RecordType] = None,
+        **kwargs
+    ):
+        super().__init__(name=entity.name,
+                         id=entity.id,
+                         description=entity.description,
+                         **kwargs)
+        # db.Entity properties
+        self.role = entity.role
+        self.path = entity.path
+        self.file = entity.file
+        self.parents = _ParentList().extend(entity.parents)
+        self.properties = _Properties().extend(entity.properties)
+        self._check_for_multiproperties()
+        # other members
+        self.identifiable: Optional[Identifiable] = None
+        self.registered_identifiable = registered_identifiable
+
+    def update(self, other: SyncNode) -> None:
+        """update this node with information of given ``other`` SyncNode.
+
+        parents are added if they are not yet in the list
+        properties are added in any case. This may lead to duplication of properties.
+        We allow this duplication here and remove it when we create a db.Entity (export_entity
+        function) because if property values are SyncNode objects, they might not be comparable (no
+        ID, no identifiable) yet.
+        """
+
+        if other.identifiable is not None and self.identifiable is not None:
+            if (
+                other.identifiable.get_representation()
+                != self.identifiable.get_representation()
+            ):
+                raise ValueError(
+                    "The SyncNode that is used with update must have an equivalent"
+                    f" identifiable. I.e. you cannot merge entities with differing identifiables"
+                    "The identifiables where:\n"
+                    f"{self.identifiable._create_hashable_string(self.identifiable)}\n"
+                    f"and\n{other.identifiable._create_hashable_string(other.identifiable)}."
+                )
+
+        if other.identifiable:
+            self.identifiable = other.identifiable
+        for attr in ["id", "role", "path", "file", "name", "description"]:
+            if other.__getattribute__(attr) is not None:
+                if self.__getattribute__(attr) is None:
+                    self.__setattr__(attr, other.__getattribute__(attr))
+                else:
+                    if self.__getattribute__(attr) != other.__getattribute__(attr):
+                        raise ImpossibleMergeError(
+                            f"Trying to update {attr} but this would lead to an "
+                            f"override of the value '{self.__getattribute__(attr)}' "
+                            f"by the value '{other.__getattribute__(attr)}'",
+                            pname=attr, values=(self.__getattribute__(attr),
+                                                other.__getattribute__(attr))
+                        )
+        for p in other.parents:
+            if not parent_in_list(p, self.parents):
+                self.parents.append(p)
+        for p in other.properties:
+            self.properties.append(p)
+
+    def export_entity(self) -> db.Entity:
+        """create a db.Entity object from this SyncNode
+
+        Properties are only added once (based on id or name). If values do not match, an Error is
+        raised. If values are SyncNode objects with IDs, they are considered equal if their IDs are
+        equal.
+        """
+        ent = None
+        if self.role == "Record":
+            ent = db.Record()
+        elif self.role == "File":
+            ent = db.File()
+        else:
+            raise RuntimeError("Invalid role")
+        for attr in ["id", "role", "path", "file", "name", "description"]:
+            ent.__setattr__(attr, self.__getattribute__(attr))
+        for p in self.parents:
+            ent.add_parent(p)
+        for p in self.properties:
+            entval: Any = ent.get_property(p)
+            if entval is None:
+                ent.add_property(id=p.id, name=p.name, value=p.value, description=p.description,
+                                 datatype=p.datatype, unit=p.unit)
+            else:
+                entval = entval.value
+                unequal = False
+                pval = p.value
+                if isinstance(entval, list) != isinstance(pval, list):
+                    unequal = True
+                if not isinstance(entval, list):
+                    entval = [entval]
+                if not isinstance(pval, list):
+                    pval = [pval]
+                if len(entval) != len(pval):
+                    unequal = True
+                else:
+                    for e_el, p_el in zip(entval, pval):
+                        if isinstance(e_el, SyncNode) and e_el.id is not None:
+                            e_el = e_el.id
+                        if isinstance(p_el, SyncNode) and p_el.id is not None:
+                            p_el = p_el.id
+                        if e_el != p_el:
+                            unequal = True
+
+                if unequal:
+                    logger.error(
+                        "The Crawler is trying to create an entity,"
+                        " but there are conflicting property values."
+                        f"Problematic Property: {p.name}\n"
+                        f"First value:\n{entval}\n"
+                        f"Second value:\n{pval}\n"
+                        f"{self}"
+                    )
+                    ime = ImpossibleMergeError(
+                        "Cannot merge Entities", pname=p.name, values=(entval, pval)
+                    )
+                    raise ime
+        return ent
+
+    def __repr__(self) -> str:
+        """ somewhat concise text representation of the SyncNode """
+        res = f"\n=====================================================\n{self.role}\n"
+        res += yaml.dump(
+            {
+                "id": self.id,
+                "name": self.name,
+                "path": self.path,
+                "parents": [el.name for el in self.parents],
+            },
+            allow_unicode=True,
+        )
+        res += "---------------------------------------------------\n"
+        res += "properties:\n"
+        d: dict[str, Any] = {}
+        for p in self.properties:
+            v = p.value
+            d[p.name] = []
+            if not isinstance(p.value, list):
+                v = [v]
+            for el in v:
+                if isinstance(el, SyncNode):
+                    d[p.name].append(
+                        {
+                            "id": el.id,
+                            "name": el.name,
+                            "path": el.path,
+                            "parents": [e.name for e in el.parents],
+                        }
+                    )
+                else:
+                    d[p.name].append(el)
+
+        return (
+            res
+            + yaml.dump(d, allow_unicode=True)
+            + "=====================================================\n"
+        )
+
+    def _check_for_multiproperties(self):
+        """ warns if multiproperties are present """
+        ids = set()
+        names = set()
+        for p in self.properties:
+            if p.name is not None:
+                if p.name in names:
+                    warn("Multiproperties are not supported by the crawler.")
+                names.add(p.name)
+            if p.id is not None:
+                if p.id in ids:
+                    warn("Multiproperties are not supported by the crawler.")
+                ids.add(p.id)
+
+
+def parent_in_list(parent: Parent, plist: _ParentList) -> bool:
+    """helper function that checks whether a parent with the same name or ID is in the plist"""
+    missing = False
+    if parent.name is not None:
+        if parent.name not in plist._element_by_name:
+            missing = True
+    if parent.id is not None:
+        if str(parent.id) not in plist._element_by_id:
+            missing = True
+    return not missing
+
+
+def property_in_list(prop: db.Property, plist: _Properties) -> bool:
+    """helper function that checks whether a property with the same name or ID is in the plist"""
+    missing = False
+    if prop.name is not None:
+        if prop.name not in plist._element_by_name:
+            missing = True
+    if prop.id is not None:
+        if str(prop.id) not in plist._element_by_id:
+            missing = True
+    return not missing
diff --git a/src/caoscrawler/transformer_functions.py b/src/caoscrawler/transformer_functions.py
index eda9f3c2bc98c8d2561f152f9f6ddd422daee00a..ce08bc6bc05caa84f342cdc25f3243c5bab0b79c 100644
--- a/src/caoscrawler/transformer_functions.py
+++ b/src/caoscrawler/transformer_functions.py
@@ -20,9 +20,14 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
+"""Definition of default transformer functions.
+
+See https://docs.indiscale.com/caosdb-crawler/converters.html#transform-functions for more
+information.
+
 """
-Defnition of default transformer functions.
-"""
+
+import datetime
 import re
 from typing import Any
 
@@ -61,3 +66,36 @@ def replace(in_value: Any, in_parameters: dict):
     if not isinstance(in_value, str):
         raise RuntimeError("must be string")
     return in_value.replace(in_parameters['remove'], in_parameters['insert'])
+
+
+def date_parse(in_value: str, params: dict) -> str:
+    """Transform text so that it is formatted in a way that LinkAhead can understand it.
+
+Parameters
+==========
+
+- date_format: str, optional
+    A format string using the ``datetime`` specificaton:
+    https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+    """
+    fmt_default = "%Y-%m-%d"
+    fmt = params.get("date_format", fmt_default)
+    dt_str = datetime.datetime.strptime(in_value, fmt).strftime(fmt_default)
+    return dt_str
+
+
+def datetime_parse(in_value: str, params: dict) -> str:
+    """Transform text so that it is formatted in a way that LinkAhead can understand it.
+
+
+Parameters
+==========
+
+- datetime_format: str, optional
+    A format string using the ``datetime`` specificaton:
+    https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+    """
+    fmt_default = "%Y-%m-%dT%H:%M:%S"
+    fmt = params.get("datetime_format", fmt_default)
+    dt_str = datetime.datetime.strptime(in_value, fmt).strftime(fmt_default)
+    return dt_str
diff --git a/src/caoscrawler/utils.py b/src/caoscrawler/utils.py
index c62f44eeaa75ca42579aa3d6ead437e901cd38ff..096fde9b573f4ff60995498144cad3589ce7dbb2 100644
--- a/src/caoscrawler/utils.py
+++ b/src/caoscrawler/utils.py
@@ -25,6 +25,9 @@
 
 # Some utility functions, e.g. for extending pylib.
 
+import sys
+from typing import Optional
+
 import linkahead as db
 
 
@@ -39,3 +42,30 @@ def has_parent(entity: db.Entity, name: str):
         if parent.name == name:
             return True
     return False
+
+
+def MissingImport(name: str, hint: str = "", err: Optional[Exception] = None) -> type:
+    """Factory with dummy classes, which may be assigned to variables but never used."""
+    def _error():
+        error_msg = f"This class ({name}) cannot be used, because some libraries are missing."
+        if hint:
+            error_msg += "\n\n" + hint
+
+        if err:
+            print(error_msg, file=sys.stdout)
+            raise RuntimeError(error_msg) from err
+        raise RuntimeError(error_msg)
+
+    class _Meta(type):
+        def __getattribute__(cls, *args, **kwargs):
+            _error()
+
+        def __call__(cls, *args, **kwargs):
+            _error()
+
+    class _DummyClass(metaclass=_Meta):
+        pass
+
+    _DummyClass.__name__ = name
+
+    return _DummyClass
diff --git a/src/caoscrawler/version.py b/src/caoscrawler/version.py
index fdc8323452cd190cc3628efa57c15992f30fabeb..0b72dd65116fbc102a4dc2492d726698cad5a13b 100644
--- a/src/caoscrawler/version.py
+++ b/src/caoscrawler/version.py
@@ -17,11 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
-try:
-    from importlib import metadata as importlib_metadata
-except ImportError:  # Python<3.8 dowesn"t support this so use
-    import importlib_metadata
-
+from importlib import metadata as importlib_metadata
 from packaging.version import parse as parse_version
 from warnings import warn
 
@@ -43,7 +39,7 @@ def check_cfood_version(metadata: dict):
     if not metadata or "crawler-version" not in metadata:
 
         msg = """
-No crawler version specified in cfood definition, so there is now guarantee that
+No crawler version specified in cfood definition, so there is no guarantee that
 the cfood definition matches the installed crawler version.
 
 Specifying a version is highly recommended to ensure that the definition works
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 7100bcd1790edb3e040a1a90663a32a09b7c8eaf..770731857112b93205f0e80d623fa9183c4aa885 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -1,3 +1,4 @@
+========
 Concepts
 ========
 
@@ -5,6 +6,10 @@ The CaosDB Crawler can handle any kind of hierarchical data structure. The typic
 directory tree that is traversed. We use the following terms/concepts to describe how the CaosDB
 Crawler works.
 
+Basics
+======
+
+
 Structure Elements
 ++++++++++++++++++
 
@@ -29,7 +34,7 @@ existing StructureElements, Converters create a tree of StructureElements.
 .. image:: img/converter.png
   :height: 170
 
-See :std:doc:`converters<converters>` for details.
+See the chapter :std:doc:`Converters<converters>` for details.
 
 Relevant sources in:
 
@@ -183,8 +188,7 @@ TODO
 Caching
 +++++++
 
-The Crawler uses the cached library function ``cached_get_entity_by``. The
-cache is cleared automatically, when the Crawler does updates, but if you would
-run the same Python process indefinetely the Crawler would not see changes due
-to the Cache. Thus, please make sure to clear the cache if you create long
-running Python processes.
+The Crawler uses the cached library function ``cached_get_entity_by``. The cache is cleared
+automatically when the Crawler does updates, but if you ran the same Python process indefinitely,
+the Crawler would not see changes in LinkAhead due to the cache. Thus, please make sure to clear the
+cache if you create long running Python processes.
diff --git a/src/doc/conf.py b/src/doc/conf.py
index 3cce99d03728d229c848ba6374d15de9fe73ec7b..3248726ed63dd80fdee7c06da3c27caace93f22c 100644
--- a/src/doc/conf.py
+++ b/src/doc/conf.py
@@ -53,6 +53,7 @@ extensions = [
     'sphinx.ext.autosectionlabel',
     'sphinx.ext.intersphinx',
     'sphinx.ext.napoleon',     # For Google style docstrings
+    "sphinx.ext.todo",
     "recommonmark",            # For markdown files.
     "sphinx_rtd_theme",
 ]
@@ -213,6 +214,10 @@ intersphinx_mapping = {
 
 # TODO Which options do we want?
 autodoc_default_options = {
-    'members': None,
-    'undoc-members': None,
+    'members': True,
+    'undoc-members': True,
+    'member-order': 'bysource',
+    'special-member': ["__init__"],
 }
+
+todo_include_todos = True
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
index 9b28c9a61eec4d9707b9640720b9c6a44a8fe25e..d7e11c235fafa1e42f53342a24255ceb0d275ed4 100644
--- a/src/doc/converters.rst
+++ b/src/doc/converters.rst
@@ -8,10 +8,6 @@ existing StructureElements, Converters create a tree of StructureElements.
 .. image:: img/converter.png
   :height: 170
 
-The ``cfood.yml`` definition also describes which
-Converters shall be used to treat the generated child StructureElements. The
-definition therefore itself also defines a tree.
-
 Each StructureElement in the tree has a set of properties, organized as
 key-value pairs.
 Some of those properties are specified by the type of StructureElement. For example,
@@ -19,15 +15,18 @@ a file could have the file name as property: ``'filename': myfile.dat``.
 Converters may define additional functions that create further values. For
 example, a regular expression could be used to get a date from a file name.
 
+CFood definition
+++++++++++++++++
 
-A converter is defined via a yml file or part of it. The definition states
-what kind of StructureElement it treats (typically one).
-Also, it defines how children of the current StructureElement are
-created and what Converters shall be used to treat those.
+Converter application to data is specified via a tree-like yml file (called ``cfood.yml``, by
+convention).  The yml file specifies which Converters shall be used on which StructureElements, and
+how to treat the generated *child* StructureElements.
 
 The yaml definition may look like this:
 
-TODO: outdated, see cfood-schema.yml
+.. todo::
+
+  This is outdated, see ``cfood-schema.yml`` for the current specification of a ``cfood.yml``.
 
 .. code-block:: yaml
 
@@ -47,13 +46,18 @@ TODO: outdated, see cfood-schema.yml
         subtree:
             (...)
 
-The **<NodeName>** is a description of what it represents (e.g.
-'experiment-folder') and is used as identifier.
+The **<NodeName>** is a description of what the current block represents (e.g.
+``experiment-folder``) and is used as an identifier.
 
 **<type>** selects the converter that is going to be matched against the current structure
 element. If the structure element matches (this is a combination of a typecheck and a detailed
-match, see :py:class:`~caoscrawler.converters.Converter` for details) the converter is used
-to generate records (see :py:meth:`~caoscrawler.converters.Converter.create_records`) and to possibly process a subtree, as defined by the function :func:`caoscrawler.converters.create_children`.
+match, see the :py:class:`~caoscrawler.converters.Converter` source documentation for details), the
+converter will:
+
+- generate records (with :py:meth:`~caoscrawler.converters.Converter.create_records`)
+- possibly process a subtree (with :py:meth:`caoscrawler.converters.Converter.create_children`)
+
+**match** *TODO*
 
 **records** is a dict of definitions that define the semantic structure
 (see details below).
@@ -151,6 +155,9 @@ The following StructureElement types are typically created by the DictElement co
 - ListElement
 - DictElement
 
+Note that you may use ``TextElement`` for anything that exists in a text format that can be
+interpreted by the server, such as date and datetime strings in ISO-8601 format.
+
 Scalar Value Converters
 =======================
 `BooleanElementConverter`, `FloatElementConverter`, `TextElementConverter`,  and
@@ -253,13 +260,13 @@ HDF5 Converters
 
 For treating `HDF5 Files
 <https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html>`_, there are in total
-four individual converters corresponding to the internal structure of HDF5 files:
-the :ref:`H5FileConverter` which opens the file itself and creates further
-structure elements from HDF5 groups, datasets, and included multi-dimensional
-arrays that are in turn treated by the :ref:`H5GroupConverter`, the
-:ref:`H5DatasetConverter`, and the :ref:`H5NdarrayConverter`, respectively. You
-need to install the LinkAhead crawler with its optional ``h5crawler`` dependency
-for using these converters.
+four individual converters corresponding to the internal structure of HDF5
+files: the :ref:`H5FileConverter` which opens the file itself and creates
+further structure elements from HDF5 groups, datasets, and included
+multi-dimensional arrays that are in turn treated by the
+:ref:`H5GroupConverter`, the :ref:`H5DatasetConverter`, and the
+:ref:`H5NdarrayConverter`, respectively. You need to install the LinkAhead
+crawler with its optional ``h5-crawler`` dependency for using these converters.
 
 The basic idea when crawling HDF5 files is to treat them very similar to
 :ref:`dictionaries <DictElement Converter>` in which the attributes on root,
diff --git a/src/doc/getting_started/furtherreading.rst b/src/doc/getting_started/furtherreading.rst
index eb600416c1fce3857d28fc2e856ceabebb3a8bb7..8d8d3ecc4b5575f71e90e9e5a17b060a63403a07 100644
--- a/src/doc/getting_started/furtherreading.rst
+++ b/src/doc/getting_started/furtherreading.rst
@@ -6,3 +6,4 @@ Further reading
 - Some useful examples can be found in the `integration tests
   <https://gitlab.com/caosdb/caosdb-crawler/-/tree/main/integrationtests>`_ (and to a certain extent
   in the unit tests).
+- TODO: Information on caching
diff --git a/src/doc/getting_started/helloworld.md b/src/doc/getting_started/helloworld.md
index 723fb88d08047350d9f4bc3d3d2bd84ec9b27efb..67fdf88974391ac6209f1010bfb4f2d883e51021 100644
--- a/src/doc/getting_started/helloworld.md
+++ b/src/doc/getting_started/helloworld.md
@@ -33,7 +33,7 @@ Then you can do the following interactively in (I)Python. But we recommend that
 copy the code into a script and execute it to spare yourself typing.
 
 ```python
-import caosdb as db
+import linkahead as db
 from datetime import datetime
 from caoscrawler import Crawler, SecurityMode
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
diff --git a/src/doc/getting_started/optionalfeatures.rst b/src/doc/getting_started/optionalfeatures.rst
index d326d7fce6f77a0278c9f2d05a641888203a2089..7b77646501d677b7a99799b97fae752107b11d6f 100644
--- a/src/doc/getting_started/optionalfeatures.rst
+++ b/src/doc/getting_started/optionalfeatures.rst
@@ -30,6 +30,13 @@ to decide what tool is used for sending mails (use the upper one if you
 want to actually send mails. See ``sendmail`` configuration in the
 LinkAhead docs.
 
+You can even supply the name of a custom CSS file that shall be used:
+
+.. code:: ini
+
+   [advancedtools]
+   crawler.customcssfile = theme-research.css
+
 Crawler Status Records
 ----------------------
 
diff --git a/src/doc/macros.rst b/src/doc/macros.rst
index d093d9b69f5d2c14b5bfbb2fe292545fc7943ca7..3a234973ee17791aaa2a0bd9e4b81836207a07e0 100644
--- a/src/doc/macros.rst
+++ b/src/doc/macros.rst
@@ -1,6 +1,9 @@
 Macros
 ------
 
+Introduction
+============
+
 Macros highly facilitate the writing of complex :doc:`CFoods<cfood>`. Consider the following common
 example:
 
@@ -83,16 +86,46 @@ The expanded version of `ExperimentalData` will look like:
 This :ref:`example<example_files_2>` can also be found in the macro unit tests (see :func:`unittests.test_macros.test_documentation_example_2`).
 
 
-Complex Example
-===============
 
-The following, more complex example, demonstrates the use
-of macro variable substitutions that generate crawler variable substitutions:
+Mixing macros and plain definitions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can also mix macros and plain definitions.  Whenever a name cannot be resolved to a macro, a
+plain yaml node definition is used as a fallback:
+
+.. code:: yaml
+
+  ---
+  metadata:
+    macros:
+    - !defmacro
+      name: MarkdownFile
+  # ... Definition here ...
+  ---
+  ExperimentalData:
+    type: Directory
+    match: ExperimentalData
+    subtree: !macro
+      MarkdownFile:
+      - name: README
+        filename: ^README.md$
+      OtherContent:  # There is no macro named "OtherContent", so this is parsed as normal content.
+        type: SimpleFile
+        match: .*txt
+        records:
+          # ... Normal content ...
+
 
-- `$$$nodename` will lead to a macro variable substitution of variable `$nodename` during macro expansion.
-- `$$` will be turned into `$`
-- So in the crawler cfood, the string will appear as `$value` if variable `nodename` would be set to `value` when using the macro.
+Complex example
+===============
+
+Let's try something more complex: what happens to multiple ``$``?  This example demonstrates the use
+of `macro` variable substitutions to generate `crawler` variable substitutions:
 
+- ``$$`` will be converted into ``$``.
+- ``$$$nodename`` will retain a single ``$`` and substitute ``$nodename`` during macro expansion.
+- So in the cfood, if ``nodename: value``, the string ``$$$nodename`` will be converted to
+  ``$value``.
 
 .. _example_1:
 .. code-block:: yaml
@@ -118,7 +151,8 @@ of macro variable substitutions that generate crawler variable substitutions:
              Simulation:
                $recordtype: +$File
 
-The expanded version of :ref:`example<example_1>` can be seen in :ref:`example<example_1_expanded>`.
+The expanded version of the :ref:`example above<example_1>` (with ``nodename: Dataset``) can be seen
+:ref:`here<example_1_expanded>`:
 
 
 .. _example_1_expanded:
@@ -141,11 +175,11 @@ The expanded version of :ref:`example<example_1>` can be seen in :ref:`example<e
         type: SimpleFile
     type: Directory
 
-This :ref:`example<example_1>` can also be found in the macro unit tests (see :func:`unittests.test_macros.test_documentation_example_1`).
-
+This example can also be found in the macro unit tests (see
+:func:`unittests.test_macros.test_documentation_example_1`).
 
 
-Using Macros Multiple Times
+Using macros multiple times
 ===========================
 
 To use the same macro multiple times in the same yaml node, lists can be used:
@@ -198,11 +232,11 @@ use the same top level key. Because later versions would overwrite previous
 ones. Here we used ``$macro_name`` to prevent that.
 
 
-Limitation
-==========
+Limitations
+===========
 
-Currently it is not possible to use the same macro twice in the same yaml node, but in different
-positions. Consider:
+Currently it is not possible to use the same macro twice in the same yaml node, if it occurs in
+different positions. Consider:
 
 .. _example_multiple_limitation:
 .. code-block:: yaml
@@ -227,14 +261,13 @@ positions. Consider:
       Other_node:
         type: test
         
-      test_twice:  # This is NOT possible as each
-                   #  dictionary element can only appear once in a yaml node.
+      test_twice:  # This is NOT possible as each key
+                   # can only appear once in a yaml node.
       - macro_name: twice # <- This is the second one, with different arguments
         a: 5
       - {}                # <- This is the third one, just using default arguments
 
-However, this should not be a real limitation, as the crawler is designed in a way,
-that the order of the nodes in the same level should not matter.
+This should not be a real limitation however, as the order of nodes does not matter for the crawler.
 
 
 Using macros within macro definitions
diff --git a/src/doc/tutorials/parameterfile.rst b/src/doc/tutorials/parameterfile.rst
index 9369ba8b83df8c484a4af8f240e1a1de2f4c10fb..2442969541eebf9a4e058b797b48995b39372a3e 100644
--- a/src/doc/tutorials/parameterfile.rst
+++ b/src/doc/tutorials/parameterfile.rst
@@ -88,6 +88,10 @@ regular expressions do:
 We can use the groups from the regular expressions that are used for matching.
 In our example, we use the "value" group to assign the "frequency" value to the "Experiment".
 
+.. note::
+
+  For more information on the ``cfood.yml`` specification, read on in the chapter :ref:`Converters`.
+
 A fully grown CFood
 -------------------
 
@@ -148,4 +152,6 @@ the CFood file is in the current working directory):
 
    caosdb-crawler -s update -i identifiables.yml cfood.yml .
 
+.. note::
 
+   ``caosdb-crawler`` currently only works with cfoods which have a directory as top level element.
diff --git a/tox.ini b/tox.ini
index 03e02ebeff196430129e10c4c0d853ca77c47302..41249e4277391c5ffa4ec13fc4da1a6ee1f48491 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,21 +1,23 @@
 [tox]
-envlist = py37, py38, py39, py310, py311
+envlist = py38, py39, py310, py311, py312, py313
 skip_missing_interpreters = true
 
 [testenv]
-deps = .
+deps = .[h5-crawler,spss]
     pytest
     pytest-cov
-    h5py
     # TODO: Make this f-branch sensitive
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
 commands = caosdb-crawler --help
-    py.test --cov=caosdb -vv {posargs}
+    py.test --cov=caoscrawler -vv {posargs}
 
 [flake8]
 max-line-length = 100
 
+[pycodestyle]
+max-line-length = 100
+
 [pytest]
 testpaths = unittests
-xfail_strict = True
\ No newline at end of file
+xfail_strict = True
diff --git a/unittests/example_cfood.yml b/unittests/example_cfood.yml
index 713bd4be0f3c816e1e8c8b7a057b30a4b400f13c..798e540fa25e49bf610ea21653db41a0bddc4d5f 100644
--- a/unittests/example_cfood.yml
+++ b/unittests/example_cfood.yml
@@ -1,6 +1,6 @@
 ---
 metadata:
-  crawler-version: 0.3.1
+  crawler-version: 0.7.2
 ---
 Definitions:
   type: Definitions
diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml
index f688de6a2171da6533626449b030bcd95a43b37b..4b95a0a31bc43a902eb63dc3aa09b805fc28c2aa 100644
--- a/unittests/h5_cfood.yml
+++ b/unittests/h5_cfood.yml
@@ -1,6 +1,6 @@
 ---
 metadata:
-  crawler-version: 0.6.1
+  crawler-version: 0.7.2
 ---
 Converters:
   H5Dataset:
diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml
index 9d6e8cf3ea325ad14641530f2e6cafd43f0dc1bb..ca5fa589b5903e0c0d8ef3dcb2528ea79e0f8cee 100644
--- a/unittests/scifolder_cfood.yml
+++ b/unittests/scifolder_cfood.yml
@@ -4,7 +4,7 @@
 
 ---
 metadata:
-  crawler-version: 0.3.1
+  crawler-version: 0.7.2
 ---
 Definitions:
   type: Definitions
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 665169d785c1ed604314c4aff4a640d4418e80a9..8e5441ce00a7dca8bc69e90b6a96576a07187bfb 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -3,8 +3,9 @@
 #
 # This file is a part of the CaosDB Project.
 #
-# Copyright (C) 2021,2022 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2021-2024 Indiscale GmbH <info@indiscale.com>
 # Copyright (C) 2021,2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -148,7 +149,7 @@ def test_markdown_converter(converter_registry):
     converter = MarkdownFileConverter({"match": "(.*)"}, "TestMarkdownFileConverter",
                                       converter_registry)
 
-    with pytest.raises(ConverterValidationError) as err:
+    with pytest.raises(ConverterValidationError):
         converter.create_children(None, File("test_tool.py", UNITTESTDIR / "test_crawler.py"))
 
     m = converter.match(test_readme)
@@ -504,7 +505,7 @@ MyElement:
     two_doc_yaml = """
 ---
 metadata:
-  crawler-version: 0.3.1
+  crawler-version: 0.7.2
   Converters:
     MyNewType:
       converter: MyNewTypeConverter
@@ -640,7 +641,7 @@ def test_load_converters():
     # converter classes can be loaded from their respective packages.
 
     # Please adapt, if defaults change!
-    assert len(converter_registry) == 23
+    assert len(converter_registry) == 24
 
     # All of them are contained in caoscrawler.converters
     for conv_key, conv in converter_registry.items():
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
index a48b5e16ad1a71beeb4a5bf1c2ac52f67bbd7afe..0a6aee44a1892f1c950a80b936adf184616fd612 100644
--- a/unittests/test_crawler.py
+++ b/unittests/test_crawler.py
@@ -39,10 +39,12 @@ import linkahead.common.models as dbmodels
 import pytest
 import yaml
 from caosadvancedtools.models.parser import parse_model_from_string
-from caoscrawler.crawl import (Crawler, SecurityMode, TreatedRecordLookUp,
-                               _treat_deprecated_prefix, crawler_main,
-                               split_restricted_path)
+from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
+                               crawler_main, split_restricted_path)
 from caoscrawler.debug_tree import DebugTree
+from caoscrawler.exceptions import (ImpossibleMergeError,
+                                    MissingIdentifyingProperty,
+                                    MissingReferencingEntityError)
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                IdentifiableAdapter,
@@ -52,6 +54,7 @@ from caoscrawler.scanner import (create_converter_registry, scan_directory,
 from caoscrawler.stores import GeneralStore, RecordStore
 from caoscrawler.structure_elements import (DictElement, DictListElement,
                                             DictTextElement, File)
+from caoscrawler.sync_graph import SyncGraph
 from linkahead.apiutils import compare_entities
 from linkahead.cached import cache_clear
 from linkahead.exceptions import EmptyUniqueQueryError
@@ -87,6 +90,20 @@ NEW_ELEMENT = (db.Record()
                .add_property(name="result", value="homogeneous"))
 
 
+def reset_mocks(mocks):
+    for mock in mocks:
+        mock.reset_mock()
+
+
+def mock_create_values(values, element):
+    pass
+
+
+def mock_get_entity_by_query(query=None):
+    if query is not None:
+        return db.Record(id=1111, name='rec_name').add_parent('RT')
+
+
 def mock_get_entity_by(eid=None, name=None, path=None):
     if eid is not None:
         candidates = [el for el in EXAMPLE_SERVER_STATE if el.id == eid]
@@ -110,6 +127,14 @@ def mock_get_entity_by(eid=None, name=None, path=None):
             raise EmptyUniqueQueryError("")
 
 
+def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
+    """ returns a stored Record if rec.name is an existing key, None otherwise """
+    if rec.name in known:
+        return known[rec.name]
+    else:
+        return None
+
+
 def mock_retrieve_record(identifiable: Identifiable):
     """ assumes that the identifiable is always only the date"""
 
@@ -148,7 +173,15 @@ A:
         model.get_deep("A").id = 2
         return result + [model.get_deep("B")]
     print(query_string)
-    raise NotImplementedError("Mock for this case is missing")
+    raise NotImplementedError(f"Mock for this case is missing: {query_string}")
+
+
+def mock_cached_only_rt_allow_empty(query_string: str):
+    try:
+        result = mock_cached_only_rt(query_string)
+    except NotImplementedError:
+        result = db.Container()
+    return result
 
 
 @pytest.fixture(autouse=True)
@@ -156,8 +189,51 @@ def clear_cache():
     cache_clear()
 
 
+@pytest.fixture
+def crawler_mocked_identifiable_retrieve():
+    crawler = Crawler()
+    # TODO use minimal setup
+    # mock retrieval of registered identifiabls: return Record with just a parent
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent(x.parents[0].name).add_property(name='name'))
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+    return crawler
+
+
+@pytest.fixture
+def crawler_mocked_for_backref_test():
+    crawler = Crawler()
+    # mock retrieval of registered identifiabls: return Record with just a parent
+
+    def get_reg_ident(x):
+        if x.parents[0].name == "C":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR"]).add_property("name")
+        elif x.parents[0].name == "D":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR", "BR2"]).add_property("name")
+        else:
+            return db.Record().add_parent(x.parents[0].name).add_property("name")
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A":
+                                                   db.Record(id=1111, name="A").add_parent("BR")}))
+    return crawler
+
+
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
 def test_constructor():
+    # tests that appropriate DeprecationWarnings are triggered by the constructor when deprecated
+    # arguments are being passed.
     with warnings.catch_warnings(record=True) as w:
         # Cause all warnings to always be triggered.
         warnings.filterwarnings("ignore")
@@ -174,6 +250,7 @@ def test_constructor():
 
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
 def test_deprecated_functions():
+    # tests that appropriate DeprecationWarnings are triggered by deprecated methods
     with warnings.catch_warnings(record=True) as w:
         # Cause all warnings to always be triggered.
         warnings.filterwarnings("ignore")
@@ -218,113 +295,58 @@ def test_check_whether_parent_exists():
 
 def test_remove_unnecessary_updates():
     # test trvial case
-    upl = [db.Record().add_parent("A")]
-    irs = [db.Record().add_parent("A")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A")]
+    identified_records = [db.Record().add_parent("A")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 0
 
     # test property difference case
-    # TODO this should work right?
-    # upl = [db.Record().add_parent("A").add_property("a", 3)]
-    # irs = [db.Record().add_parent("A")]  # ID should be s
-    # Crawler.remove_unnecessary_updates(upl, irs)
-    # assert len(upl) == 1
+    crawled_data = [db.Record().add_parent("A").add_property("a", 3)]
+    identified_records = [db.Record().add_parent("A")]  # ID should be s
+    Crawler.remove_unnecessary_updates(crawled_data, identified_records)
+    assert len(crawled_data) == 1
 
     # test value difference case
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", 5)]
+    identified_records = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", 5)]
+    identified_records = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 0
 
     # test unit difference case
-    upl = [db.Record().add_parent("A").add_property("a", unit='cm')]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", unit='cm')]
+    identified_records = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
 
     # test None difference case
-    upl = [db.Record().add_parent("A").add_property("a")]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a")]
+    identified_records = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
 
 
 def test_split_into_inserts_and_updates_trivial():
     crawler = Crawler()
-    crawler.split_into_inserts_and_updates([])
-
-
-def test_split_into_inserts_and_updates_unidentified():
-    crawler = Crawler()
-    with raises(ValueError) as err:
-        crawler.split_into_inserts_and_updates([db.Record(name="recname").add_parent("someparent")])
-    assert str(err.value).startswith("There is no identifying information.")
-
-
-def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
-    """ returns a stored Record if rec.name is an existing key, None otherwise """
-    if rec.name in known:
-        return known[rec.name]
-    else:
-        return None
-
-
-@pytest.fixture
-def crawler_mocked_identifiable_retrieve():
-    crawler = Crawler()
-    # TODO use minimal setup
-    # mock retrieval of registered identifiabls: return Record with just a parent
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent(x.parents[0].name).add_property(name='name'))
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    return crawler
+    st = SyncGraph([], crawler.identifiableAdapter)
+    crawler._split_into_inserts_and_updates(st)
 
 
-def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
+def test_split_into_inserts_and_updates_simple(crawler_mocked_identifiable_retrieve):
+    # basic test that checks whether two records are correctly sorted to update and insert based on
+    # whether an entity can be found using the identifiable
     crawler = crawler_mocked_identifiable_retrieve
     identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
-    entlist = [db.Record(name="A").add_parent(
-        "C"), db.Record(name="B").add_parent("C")]
-
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert not crawler._has_reference_value_without_id(identlist[1])
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[1]) is None
-
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+    entlist = [db.Record(name="A").add_parent("C"),
+               db.Record(name="B").add_parent("C")]
 
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    # check setup
 
-def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    # This is identical to a and should be removed
-    c = db.Record(name="A").add_parent("C")
-    entlist = [a, b, c]
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    insert, update = crawler._split_into_inserts_and_updates(st)
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -334,31 +356,20 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab
     crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
 
 
-def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve):
+def test_split_into_inserts_and_updates_with_circ(crawler_mocked_identifiable_retrieve):
+    # test trying to split circular dependency
     crawler = crawler_mocked_identifiable_retrieve
-    # try it with a reference
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent('C').add_property(name='a')
+    )
+    # two records that reference each other via identifying properties
+    a = db.Record().add_parent("C")
+    b = db.Record().add_parent("C").add_property(name='a', value=a)
+    a.add_property(name='a', value=b)
 
-def test_split_into_inserts_and_updates_with_circ():
-    # try circular
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    a.add_property("B", b)
-    entlist = [a, b]
-    # TODO this does not seem to be complete!
+    st = SyncGraph([a, b], crawler.identifiableAdapter)
+    with pytest.raises(RuntimeError):
+        crawler._split_into_inserts_and_updates(st)
 
 
 def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve):
@@ -372,11 +383,12 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     b = db.Record(name="B").add_parent("C")
     g = db.Record(name="G").add_parent("C")
     f = db.Record(name="F").add_parent("C")
-    g.add_property("A", a)
-    b.add_property("A", f)
+    g.add_property("C", b)
     b.add_property("A", a)
+    b.add_property("C", f)
     entlist = [a, b, g]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    insert, update = crawler._split_into_inserts_and_updates(st)
     assert len(insert) == 3
     assert "B" in [el.name for el in insert]
     assert len(update) == 1
@@ -388,23 +400,8 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     # TODO write test where the unresoled entity is not part of the identifiable
 
 
-def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
-    # assume identifiable is only the name
-    a = db.Record(name="A").add_parent("C")
-    a.add_property("foo", 1)
-    b = db.Record(name="A").add_parent("C")
-    b.add_property("bar", 2)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-
-    assert update[0].get_property("bar").value == 2
-    assert update[0].get_property("foo").value == 1
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
 @patch("caoscrawler.identifiable_adapters.cached_query",
        new=Mock(side_effect=mock_cached_only_rt))
 def test_split_iiau_with_unmergeable_list_items():
@@ -440,6 +437,12 @@ b1: ("same", c1)
 b2: ("same", c2)
 
 a: ([b1, b2])
+
+
+
+- a can be identified.
+- bs can be identified with each other once a is identified
+- cs depend on b(s), but cannot be put in one Entity because they have conflicting properties
     """
     prop_ident = db.Property("prop_ident", datatype=db.INTEGER)
     prop_other = db.Property("prop_ident", datatype=db.INTEGER)
@@ -472,82 +475,104 @@ a: ([b1, b2])
 
     crawler = Crawler(identifiableAdapter=ident_adapter)
 
-    with raises(RuntimeError) as rte:
-        crawler.synchronize(commit_changes=False,
-                            crawled_data=[rec_a, *rec_b, *rec_c])
-    assert not isinstance(rte.value, NotImplementedError), \
-        "Exception must not be NotImplementedError, but plain RuntimeError."
-    assert "Could not find referencing entities" in rte.value.args[0]
-    assert "merge conflicts in the referencing" in rte.value.args[0]
+    st = SyncGraph(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter)
+    assert st._identity_relies_on_unchecked_entity(st.nodes[0]) is False
+    assert st._identity_relies_on_unchecked_entity(st.nodes[1])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[2])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[3])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[4])
+    assert len(st.unchecked) == 5
+
+    # The Cs cannot be merged due to different identifying properties
+    # The Bs cannot be merged due to different references to Cs
+    with raises(ImpossibleMergeError) as rte:
+        crawler._split_into_inserts_and_updates(st)
+    # TODO
+    # assert not isinstance(rte.value, NotImplementedError), \
+        # "Exception must not be NotImplementedError, but plain RuntimeError."
+    # assert "Could not find referencing entities" in rte.value.args[0]
+    # assert "merge conflicts in the referencing" in rte.value.args[0]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
+    # test that backrefs are appropriately considered in the identifiable
+    crawler = crawler_mocked_for_backref_test
+    identlist = [Identifiable(name="A", record_type="BR"),
+                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
 
+    # Test without referencing object
+    # currently a RuntimeError is raised if necessary properties are missing.
+    with raises(MissingReferencingEntityError):
+        st = SyncGraph([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter)
 
-def test_has_missing_object_in_references():
-    crawler = Crawler()
-    # Simulate remote server content by using the names to identify records
-    # There are only two known Records with name A and B
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
-                                               .add_property("d").add_property("name"),
-                                               "D": db.Record(name="D").add_parent("RTD")
-                                               .add_property("d").add_property("e").add_property("name"),
-                                               }))
-
-    # one reference with id -> check
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123}), {})
-    # one ref with Entity with id -> check
-    rec = db.Record(id=123).add_parent("C")
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': rec}), {id(rec): {'C': [None]}})
-    # one ref with id one with Entity with id (mixed) -> check
-    rec = db.Record(id=123).add_parent("RTC")
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTD",
-                     properties={'d': 123, 'b': rec}), {id(rec): {'C': [None]}})
-    # entity to be referenced in the following
-    a = db.Record(name="C").add_parent("C").add_property("d", 12311)
-    # one ref with id one with Entity without id (but not identifying) -> fail
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
-
-    # one ref with id one with Entity without id (mixed) -> fail
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
-
-    crawler.treated_records_lookup.add(a, Identifiable(name="C", record_type="RTC",
-                                                       properties={'d': 12311}))
-    # one ref with id one with Entity without id but in cache -> check
-    assert crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
+    # identifiables were not yet checked
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    assert st.get_equivalent(st.nodes[1]) is None
+    assert st.get_equivalent(st.nodes[0]) is None
+    # one can be found remotely, one not
 
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    # check the split...
+    insert, update = crawler._split_into_inserts_and_updates(st)
+    # A was found remotely and is therefore in the update list
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # B does not exist on the (simulated) remote server
+    assert len(insert) == 1
+    assert insert[0].name == "B"
 
 
-@ pytest.mark.xfail()
-def test_references_entities_without_ids():
-    crawler = Crawler()
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('last_name', 123)
-                                                       .add_property('first_name', 123))
-    # id and rec with id
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('first_name', 123)
-                                                       .add_property('last_name',
-                                                                     db.Record(id=123)))
-    # id and rec with id and one unneeded prop
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('stuff', db.Record())
-                                                   .add_property('last_name', db.Record(id=123)))
-
-    # one identifying prop is missing
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('last_name', db.Record()))
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the same record type are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced,
+               db.Record(id=1, name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(id=2, name="C").add_parent("BR").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        st.nodes[0],
+        st.backward_references_backref[id(st.nodes[0])])
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler._split_into_inserts_and_updates(st)
+    assert len(update) == 2
+    assert len(insert) == 1
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the different record types are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("D")
+    entlist = [referenced,
+               db.Record(id=1, name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(id=2, name="A").add_parent("BR2").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        st.nodes[0],
+        st.backward_references_backref[id(st.nodes[0])])
+
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler._split_into_inserts_and_updates(st)
+    assert len(update) == 2
+    assert len(insert) == 1
 
 
 def test_replace_entities_with_ids():
@@ -562,20 +587,15 @@ def test_replace_entities_with_ids():
     assert a.get_property("C").value == [12345, 233324]
 
 
-def reset_mocks(mocks):
-    for mock in mocks:
-        mock.reset_mock()
-
-
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-        "retrieve_identified_record_for_identifiable",
-        new=Mock(side_effect=mock_retrieve_record))
-@ patch("caoscrawler.crawl.db.Container.insert")
-@ patch("caoscrawler.crawl.db.Container.update")
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
 def test_synchronization_no_commit(upmock, insmock):
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
     # change  one; add one
@@ -592,20 +612,19 @@ def test_synchronization_no_commit(upmock, insmock):
     assert len(ups) == 1
 
 
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-        "retrieve_identified_record_for_identifiable",
-        new=Mock(side_effect=mock_retrieve_record))
-@ patch("caoscrawler.crawl.db.Container.insert")
-@ patch("caoscrawler.crawl.db.Container.update")
-@ patch("caoscrawler.crawl.UpdateCache.insert")
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
+@patch("caoscrawler.crawl.UpdateCache.insert")
 def test_security_mode(updateCacheMock, upmock, insmock):
     # trivial case: nothing to do
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
-    print(crawled_data)
     crawler = Crawler(securityMode=SecurityMode.RETRIEVE)
     crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
@@ -640,9 +659,6 @@ def test_security_mode(updateCacheMock, upmock, insmock):
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
-    # import IPython
-    # IPython.embed()
-    # print(updateCacheMock.call_args_list)
     assert updateCacheMock.call_count == 1
     # reset counts
     reset_mocks([updateCacheMock, insmock, upmock])
@@ -698,65 +714,6 @@ def test_security_mode(updateCacheMock, upmock, insmock):
     crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy()
 
 
-def test_create_reference_mapping():
-    a = db.Record().add_parent("A")
-    b = db.Record(id=132).add_parent("B").add_property('a', a)
-    ref = Crawler.create_reference_mapping([a, b])
-    assert id(a) in ref
-    assert id(b) in ref
-    assert "B" in ref[id(a)]
-    assert {} == ref[id(b)]
-    assert ref[id(a)]["B"] == [132]
-
-
-def test_create_flat_list():
-    a = db.Record()
-    b = db.Record()
-    a.add_property(name="a", value=a)
-    a.add_property(name="b", value=b)
-    flat = Crawler.create_flat_list([a])
-    assert len(flat) == 2
-    assert a in flat
-    assert b in flat
-    c = db.Record()
-    c.add_property(name="a", value=a)
-    # This would caus recursion if it is not dealt with properly.
-    a.add_property(name="c", value=c)
-    flat = Crawler.create_flat_list([c])
-    assert len(flat) == 3
-    assert a in flat
-    assert b in flat
-    assert c in flat
-
-
-@ pytest.fixture
-def crawler_mocked_for_backref_test():
-    crawler = Crawler()
-    # mock retrieval of registered identifiabls: return Record with just a parent
-
-    def get_reg_ident(x):
-        if x.parents[0].name == "C":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR"]).add_property("name")
-        elif x.parents[0].name == "D":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR", "BR2"]).add_property("name")
-        else:
-            return db.Record().add_parent(x.parents[0].name).add_property("name")
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A":
-                                               db.Record(id=1111, name="A").add_parent("BR")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A":
-                                                   db.Record(id=1111, name="A").add_parent("BR")}))
-    return crawler
-
-
 def test_validation_error_print(caplog):
     caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
     # there should be no server interaction since we only test the behavior if a validation error
@@ -773,96 +730,7 @@ def test_validation_error_print(caplog):
         caplog.clear()
 
 
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
-    crawler = crawler_mocked_for_backref_test
-    identlist = [Identifiable(name="A", record_type="BR"),
-                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
-
-    # Test without referencing object
-    # currently a RuntimeError is raised if necessary properties are missing.
-    with raises(RuntimeError):
-        crawler.split_into_inserts_and_updates([db.Record(name="B").add_parent("C")])
-
-    # identifiables were not yet checked
-    assert crawler.treated_records_lookup.get_any(entlist[1], identlist[0]) is None
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[1]) is None
-    # one with reference, one without
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert crawler._has_reference_value_without_id(identlist[1])
-    # one can be found remotely, one not
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[1]) is None
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    # A was found remotely and is therefore in the update list
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # B does not exist on the (simulated) remote server
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-
-
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the same record type are correctly used
-    crawler = crawler_mocked_for_backref_test
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="C").add_parent("BR").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(
-        referenced,
-        referencing_entities[id(referenced)])
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 1
-    assert len(insert) == 2
-
-
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the different record types are correctly used
-    crawler = crawler_mocked_for_backref_test
-    referenced = db.Record(name="B").add_parent("D")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="A").add_parent("BR2").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(
-        referenced,
-        referencing_entities[id(referenced)])
-
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 2
-    assert len(insert) == 1
-
-
-def mock_create_values(values, element):
-    pass
-
-
-@ patch("caoscrawler.converters.IntegerElementConverter.create_values")
+@patch("caoscrawler.converters.IntegerElementConverter.create_values")
 def test_restricted_path(create_mock):
     """
     The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
@@ -955,7 +823,7 @@ def test_split_restricted_path():
 
 # Filter the warning because we want to have it here and this way it does not hinder running
 # tests with -Werror.
-@ pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
 def test_deprecated_prefix_option():
     """Test that calling the crawler's main function with the deprecated
     `prefix` option raises the correct errors and warnings.
@@ -993,36 +861,8 @@ def test_create_entity_summary():
     assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
 
 
-def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog):
-    crawler = crawler_mocked_identifiable_retrieve
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent('C').add_property(name='C'))
-    a = db.Record(name='a').add_parent("C")
-    b = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
-    c = db.Record(name='c').add_parent("C").add_property(name='D', value='e'
-                                                         ).add_property(name="C", value=b)
-    d = db.Record(name='c').add_parent("C")
-    a.add_property(name="C", value=c)
-    flat = [a, b, c]
-    circle = Crawler.detect_circular_dependency(flat)
-    assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
-
-    assert Crawler.detect_circular_dependency([d]) is None
-    with raises(RuntimeError):
-        _, _ = crawler.split_into_inserts_and_updates(flat)
-    caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
-    assert "Found circular dependency" in caplog.text
-    assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
-    caplog.clear()
-
-
-def mock_get_entity_by_query(query=None):
-    if query is not None:
-        return db.Record(id=1111, name='rec_name').add_parent('RT')
-
-
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by_query))
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by_query))
 def test_replace_name_with_referenced_entity():
     test_text = 'lkajsdf'
     test_int = 134343
@@ -1090,72 +930,3 @@ def test_replace_name_with_referenced_entity():
     assert isinstance(prop.value[2], int)
     assert prop.value[2] == test_id
     assert caoscrawler.crawl.cached_get_entity_by.call_count == 3
-
-
-def test_treated_record_lookup():
-    trlu = TreatedRecordLookUp()
-    exist = db.Record(id=1)
-    trlu.add(exist)
-    assert len(trlu._existing) == 1
-    # was added to existing
-    assert trlu._existing[id(exist)] is exist
-    # is in ID lookup
-    assert trlu._id_look_up[exist.id] is exist
-    # can be accessed via get_existing
-    assert trlu.get_existing(db.Record(id=1)) is exist
-
-    miss = db.Record()
-    # exception when identifiable is missing
-    with raises(RuntimeError):
-        trlu.add(miss)
-    ident = Identifiable(name='a')
-    trlu.add(miss, ident)
-    # was added to missing
-    assert trlu._missing[id(miss)] is miss
-    # is in ident lookup
-    assert trlu._identifiable_look_up[ident.get_representation()] is miss
-    # can be accessed via get_missing
-    assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss
-
-    fi = db.File(path='a', id=2)
-    trlu.add(fi)
-    assert len(trlu._existing) == 2
-    # was added to existing
-    assert trlu._existing[id(fi)] is fi
-    # is in ID lookup
-    assert trlu._id_look_up[fi.id] is fi
-    # is in path lookup
-    assert trlu._path_look_up[fi.path] is fi
-    # can be accessed via get_existing
-    assert trlu.get_existing(fi) is fi
-
-    all_exi = trlu.get_existing_list()
-    assert fi in all_exi
-    assert exist in all_exi
-    all_mi = trlu.get_missing_list()
-    assert miss in all_mi
-
-    # If a Record was added using the ID, the ID must be used to identify it even though later an
-    # identifiable may be passed as well
-    assert trlu.get_any(exist, Identifiable(name='b')) is exist
-
-    fi2 = db.File(path='b')
-    trlu.add(fi2)
-    assert trlu.get_any(db.File(path='b'), Identifiable(name='c')) is fi2
-
-
-def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_retrieve):
-    # When one python object representing a record is merged into another python object
-    # representing the same record, the former object can be forgotten and references from it to
-    # other records must not play a role
-    crawler = crawler_mocked_identifiable_retrieve
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent('C').add_property(name='name') if
-        x.parents[0].name == "C" else
-        db.Record().add_parent('D').add_property(name='is_referenced_by', value="*")
-    )
-    a = db.Record(name='a').add_parent("D")
-    b = db.Record(name='b').add_parent("C")
-    c = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
-    flat = [a, c, b]
-    _, _ = crawler.split_into_inserts_and_updates(flat)
diff --git a/unittests/test_data/invalid_identifiable/identifiable_content_no_list.yaml b/unittests/test_data/invalid_identifiable/identifiable_content_no_list.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aee572a190bd7f439f638ef7c9a5d94a831aca81
--- /dev/null
+++ b/unittests/test_data/invalid_identifiable/identifiable_content_no_list.yaml
@@ -0,0 +1,4 @@
+Experiment:
+  date:
+    - 1
+    - 2
diff --git a/unittests/test_data/invalid_identifiable/identifiable_no_str_or_dict.yaml b/unittests/test_data/invalid_identifiable/identifiable_no_str_or_dict.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a33c4ace9f8709a9b4a77c5fd8f38514acbe1e9c
--- /dev/null
+++ b/unittests/test_data/invalid_identifiable/identifiable_no_str_or_dict.yaml
@@ -0,0 +1,3 @@
+Experiment:
+- date
+- 23
diff --git a/unittests/test_data/invalid_identifiable/identifiable_referenced_no_list.yaml b/unittests/test_data/invalid_identifiable/identifiable_referenced_no_list.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a504eab748d4891c3e1088ee785afcf6347fbbab
--- /dev/null
+++ b/unittests/test_data/invalid_identifiable/identifiable_referenced_no_list.yaml
@@ -0,0 +1,5 @@
+Experiment:
+- date
+Event:
+- is_referenced_by: Experiment
+- event_id
diff --git a/unittests/test_entity_comparison.py b/unittests/test_entity_comparison.py
index 549bc4f42a59765d25446d44fbb845e49ca4d9b9..0f62475b6c61d82feb3e550cf5ab53e91183f80a 100644
--- a/unittests/test_entity_comparison.py
+++ b/unittests/test_entity_comparison.py
@@ -2,7 +2,7 @@
 # Tests for entity comparison
 # A. Schlemmer, 06/2021
 
-import caosdb as db
+import linkahead as db
 
 import pytest
 from pytest import raises
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
deleted file mode 100644
index 4ec02aa3fc497f8dc35adc709533ef5b35066f3a..0000000000000000000000000000000000000000
--- a/unittests/test_file_identifiables.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/python
-# Tests for file identifiables
-# A. Schlemmer, 06/2021
-
-from unittest.mock import Mock, patch
-
-import caosdb as db
-import pytest
-from caoscrawler.identifiable import Identifiable
-from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
-from caosdb.cached import cache_clear
-from caosdb.exceptions import EmptyUniqueQueryError
-from pytest import raises
-
-from test_crawler import mock_get_entity_by
-
-
-@pytest.fixture(autouse=True)
-def clear_cache():
-    cache_clear()
-
-
-@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-       new=Mock(side_effect=id))
-@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-def test_file_identifiable():
-    ident = LocalStorageIdentifiableAdapter()
-
-    # Without a path there is no identifying information
-    with raises(ValueError):
-        ident.get_identifiable(db.File(), [])
-
-    fp = "/test/bla/bla.txt"
-    file_obj = db.File(path=fp)
-    identifiable = ident.get_identifiable(file_obj)
-
-    # the path is copied to the identifiable
-    assert fp == identifiable.path
-    assert isinstance(identifiable, Identifiable)
-
-    # __eq__ function is only defined for Identifiable objects
-    with raises(ValueError):
-        file_obj != identifiable
-
-    # since the path does not exist in the data in ident, the follwoing functions return None
-    with raises(EmptyUniqueQueryError):
-        ident.retrieve_identified_record_for_record(file_obj)
-    assert ident.get_file(identifiable) is None
-
-    # Try again with actual files in the store:
-    records = ident.get_records()
-    test_record_wrong_path = db.File(path="/bla/bla/test.txt")
-    test_record_correct_path = db.File(path="/test/bla/bla.txt")
-    test_record_alsocorrect_path = db.File(path="/test/bla/bla.txt")
-    records.append(test_record_wrong_path)
-    # Now, there is a file, but still wrong path -> result is still None
-    identified_file = ident.get_file(file_obj)
-    assert identified_file is None
-
-    records.append(test_record_correct_path)
-    # now there is a match
-    identified_file = ident.get_file(file_obj)
-    assert identified_file is not None
-    assert identified_file.path == file_obj.path
-
-    with raises(RuntimeError, match=".*unambigiously.*"):
-        records.append(test_record_alsocorrect_path)
-        identified_file = ident.get_file(file_obj)
diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py
index 2f7fae5d8d32bb7e5c90a535b63158c33df55daa..7f244e2cbdccb0d4eee6a62f59e9cea5684295a6 100644
--- a/unittests/test_h5_converter.py
+++ b/unittests/test_h5_converter.py
@@ -23,7 +23,7 @@ from functools import partial
 from pathlib import Path
 from pytest import fixture, importorskip
 
-import caosdb as db
+import linkahead as db
 
 from caoscrawler.debug_tree import DebugTree
 from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array,
diff --git a/unittests/test_identifiable.py b/unittests/test_identifiable.py
index 28bdb7a2ad75d5b9389b47ca3f0ec2b2e2a1404b..d94d852583523a3b3f29f002eaacb9ae0b616c4f 100644
--- a/unittests/test_identifiable.py
+++ b/unittests/test_identifiable.py
@@ -24,9 +24,10 @@
 test identifiable module
 """
 
-import caosdb as db
+import linkahead as db
 import pytest
 from caoscrawler.identifiable import Identifiable
+from caoscrawler.sync_node import SyncNode
 
 
 def test_create_hashable_string():
@@ -42,25 +43,20 @@ def test_create_hashable_string():
     assert (
         Identifiable._create_hashable_string(
             Identifiable(name="A", record_type="B",
-                         properties={'a': db.Record(id=12)})
+                         properties={'a': SyncNode(db.Record(id=12))})
         ) == "P<B>N<A>R<[]>a:12")
     a = Identifiable._create_hashable_string(
-        Identifiable(name="A", record_type="B", properties={'a': [db.Record(id=12)]}))
+        Identifiable(name="A", record_type="B", properties={'a': [SyncNode(db.Record(id=12))]}))
     assert (a == "P<B>N<A>R<[]>a:[12]")
     assert (Identifiable._create_hashable_string(
         Identifiable(name="A", record_type="B", properties={'a': [12]})) == "P<B>N<A>R<[]>a:[12]")
     assert (
         Identifiable._create_hashable_string(
             Identifiable(name="A", record_type="B", properties={
-                         'a': [db.Record(id=12), 11]})
+                         'a': [SyncNode(db.Record(id=12)), 11]})
         ) == "P<B>N<A>R<[]>a:[12, 11]")
-    assert (
-        Identifiable._create_hashable_string(
-            Identifiable(record_type="B", properties={'a': [db.Record()]})
-        ) != Identifiable._create_hashable_string(
-            Identifiable(record_type="B", properties={'a': [db.Record()]})))
     assert Identifiable._create_hashable_string(
-        Identifiable(name="A", record_type="B", backrefs=[123, db.Entity(id=124)],
+        Identifiable(name="A", record_type="B", backrefs=[123, SyncNode(db.Record(id=124))],
                      properties={'a': 5})) == "P<B>N<A>R<['123', '124']>a:5"
 
 
@@ -73,9 +69,9 @@ def test_repr():
     # only test that something meaningful is returned
     assert 'properties' in str(Identifiable(name="A", record_type="B"))
     assert str(Identifiable(name="A", record_type="B", properties={'a': 0})).split(
-        "properties:\n")[1].split('\n')[0] == '{"a": 0}'
+        "properties:\n")[1].split('\n')[0] == '{"a": "0"}'
     assert str(Identifiable(name="A", record_type="B", properties={'a': 0, 'b': "test"})).split(
-        "properties:\n")[1].split('\n')[0] == '{"a": 0, "b": "test"}'
+        "properties:\n")[1].split('\n')[0] == '{"a": "0", "b": "test"}'
 
     # TODO(henrik): Add a test using backrefs once that's implemented.
 
@@ -87,13 +83,5 @@ def test_equality():
         record_id=12, properties={"a": 0}) != Identifiable(record_id=13, properties={"a": 0})
     assert Identifiable(
         record_id=12, properties={"a": 0}) == Identifiable(properties={"a": 0})
-    assert Identifiable(
-        path="a", properties={"a": 0}) != Identifiable(path="b", properties={"a": 0})
-    assert Identifiable(
-        path="a", properties={"a": 0}) == Identifiable(path="a", properties={"a": 1})
-    assert Identifiable(
-        path="a", properties={"a": 0}) == Identifiable(properties={"a": 0})
-    assert Identifiable(properties={"a": 0}) == Identifiable(
-        properties={"a": 0})
-    assert Identifiable(properties={"a": 0}) != Identifiable(
-        properties={"a": 1})
+    assert Identifiable(properties={"a": 0}) == Identifiable(properties={"a": 0})
+    assert Identifiable(properties={"a": 0}) != Identifiable(properties={"a": 1})
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index ee0e0d6cd7c791f78e7cd2307dc6f34698326b4a..53490bc0413a95d960d94186c639dac2c6223b80 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -29,14 +29,18 @@ test identifiable_adapters module
 
 import os
 from datetime import datetime
+from unittest.mock import MagicMock, Mock, patch
 from pathlib import Path
 
-import caosdb as db
+import linkahead as db
 import pytest
+from caoscrawler.exceptions import (InvalidIdentifiableYAML,
+                                    )
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                IdentifiableAdapter,
                                                convert_value)
+from caoscrawler.sync_graph import SyncNode
 
 UNITTESTDIR = Path(__file__).parent
 
@@ -120,30 +124,45 @@ def test_load_from_yaml_file():
     assert project_i.get_property("title") is not None
 
 
+def test_invalid_yaml():
+    ident = CaosDBIdentifiableAdapter()
+    invalid_dir = UNITTESTDIR / "test_data" / "invalid_identifiable"
+    with pytest.raises(InvalidIdentifiableYAML) as exc:
+        ident.load_from_yaml_definition(invalid_dir / "identifiable_content_no_list.yaml")
+    assert str(exc.value) == "Identifiable contents must be lists, but this was not: Experiment"
+
+    with pytest.raises(InvalidIdentifiableYAML) as exc:
+        ident.load_from_yaml_definition(invalid_dir / "identifiable_referenced_no_list.yaml")
+    assert str(exc.value) == "'is_referenced_by' must be a list.  Found in: Event"
+
+    with pytest.raises(InvalidIdentifiableYAML) as exc:
+        ident.load_from_yaml_definition(invalid_dir / "identifiable_no_str_or_dict.yaml")
+    assert str(exc.value) == ("Identifiable properties must be str or dict, but this one was not:\n"
+                              "    Experiment/23")
+
+
 def test_non_default_name():
     ident = CaosDBIdentifiableAdapter()
-    ident.register_identifiable(
-        "Person", db.RecordType()
-        .add_parent(name="Person")
-        .add_property(name="last_name"))
-    identifiable = ident.get_identifiable(db.Record(name="don't touch it")
-                                          .add_parent("Person")
-                                          .add_property(name="last_name", value='Tom')
-                                          )
+    identifiable = ident.get_identifiable(SyncNode(db.Record(name="don't touch it")
+                                                   .add_parent("Person")
+                                                   .add_property(name="last_name", value='Tom'),
+                                                   db.RecordType()
+                                                   .add_parent(name="Person")
+                                                   .add_property(name="last_name")), [])
     assert identifiable.name is None
 
 
 def test_wildcard_ref():
     ident = CaosDBIdentifiableAdapter()
-    ident.register_identifiable(
-        "Person", db.RecordType()
-        .add_parent(name="Person")
-        .add_property(name="is_referenced_by", value=["*"]))
     rec = (db.Record(name="don't touch it").add_parent("Person")
            .add_property(name="last_name", value='Tom'))
-    identifiable = ident.get_identifiable(rec,
-                                          referencing_entities={
-                                              'A': [1]}
+    dummy = SyncNode(db.Record(), None)
+    dummy.id = 1
+    identifiable = ident.get_identifiable(SyncNode(rec, db.RecordType()
+                                                   .add_parent(name="Person")
+                                                   .add_property(name="is_referenced_by",
+                                                                 value=["*"])),
+                                          [dummy]
                                           )
     assert identifiable.backrefs[0] == 1
 
@@ -158,25 +177,63 @@ def test_convert_value():
 
 
 def test_get_identifiable():
-    # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
-    # get_identifable function
-
     ident = CaosDBIdentifiableAdapter()
     ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml")
-    r_cur = (db.Record(id=5)
-             .add_parent(name="Experiment", id=3)
-             .add_property(name="date", value="2022-02-01")
-             .add_property(name="result", value="FAIL"))
-    id_r0 = ident.get_identifiable(r_cur)
-    assert r_cur.parents[0].name == id_r0.record_type
-    assert r_cur.get_property(
-        "date").value == id_r0.properties["date"]
-    assert len(r_cur.parents) == 1
-    assert len(r_cur.properties) == 2
+    rec = (db.Record(id=5)
+           .add_parent(name="Experiment", id=3)
+           .add_property(name="date", value="2022-02-01")
+           .add_property(name="result", value="FAIL"))
+    se = SyncNode(rec,
+                  ident.get_registered_identifiable(rec))
+    id_r0 = ident.get_identifiable(se, [])
+    assert rec.parents[0].name == id_r0.record_type
+    assert rec.get_property("date").value == id_r0.properties["date"]
+    assert len(rec.parents) == 1
+    assert len(rec.properties) == 2
+    assert len(id_r0.properties) == 1
+
+    ident = CaosDBIdentifiableAdapter()
+    ident_a = db.RecordType(name="A").add_parent("A").add_property("name").add_property("a")
+    ident.register_identifiable("A", ident_a)
+    rec = (db.Record(id=5)
+           .add_parent(name="A", id=3)
+           .add_property(name="a", value="2022-02-01")
+           .add_property(name="result", value="FAIL"))
+    se = SyncNode(rec, ident.get_registered_identifiable(rec))
+    for el in [
+        db.Record()
+        .add_parent(name="A", id=3)
+        .add_property(name="a", value="2022-02-01")
+        .add_property(name="result", value="FAIL"),
+        db.Record(name='a')
+        .add_parent(name="A", id=3)
+        .add_property(name="a", value="2022-02-01")
+        .add_property(name="result", value="FAIL"),
+    ]:
+        se.update(SyncNode(el))
+
+    id_r0 = ident.get_identifiable(se, [])
+    assert "A" == id_r0.record_type
+    assert "2022-02-01" == id_r0.properties["a"]
+    assert 'a' == id_r0.name
     assert len(id_r0.properties) == 1
 
+    rec = (db.Record(name='a')
+           .add_parent(name="A")
+           .add_property(name="a", value="2")
+           )
+    se = SyncNode(rec, ident.get_registered_identifiable(rec))
+    se.update(SyncNode(
+        db.Record(name='a')
+        .add_parent(name="A")
+        .add_property(name="a", value="3")
+    ))
 
-@pytest.mark.xfail
+    with pytest.raises(RuntimeError):
+        id_r0 = ident.get_identifiable(se, [])
+
+
+@ pytest.mark.xfail
 def test_retrieve_identified_record_for_identifiable():
     # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
     # retrieve_identified_record_for_identifiable function
@@ -190,7 +247,7 @@ def test_retrieve_identified_record_for_identifiable():
             r_cur = r
             break
 
-    id_r1 = ident.get_identifiable(r_cur)
+    id_r1 = ident.get_identifiable(r_cur, [])
     assert r_cur.parents[0].name == id_r1.record_type
     assert r_cur.get_property(
         "identifier").value == id_r1.properties["identifier"]
@@ -211,3 +268,19 @@ def test_retrieve_identified_record_for_identifiable():
     assert r_cur.get_property(
         "responsible").value == idr_r1.get_property("responsible").value
     assert r_cur.description == idr_r1.description
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_referencing_entity_has_appropriate_type():
+    dummy = db.Record().add_parent("A")
+    registered_identifiable = db.RecordType()
+    rft = IdentifiableAdapter.referencing_entity_has_appropriate_type
+    assert not rft([], registered_identifiable)
+    assert not rft(dummy.parents, registered_identifiable)
+    registered_identifiable.add_property("is_referenced_by", "B")
+    assert not rft(dummy.parents, registered_identifiable)
+    registered_identifiable.properties[0].value = ["B", "A"]
+    assert rft(dummy.parents, registered_identifiable)
+    registered_identifiable.properties[0].value = ["B", "*"]
+    assert rft(dummy.parents, registered_identifiable)
diff --git a/unittests/test_issues.py b/unittests/test_issues.py
index cbbe9cabcfd17daaf07165757351f00dc051eeab..1678280555e739bae55819fa7fe42a53c938c4e5 100644
--- a/unittests/test_issues.py
+++ b/unittests/test_issues.py
@@ -22,13 +22,10 @@
 
 from pytest import mark
 
-import caosdb as db
-
+from caoscrawler.converters import replace_variables, CrawlerTemplate
 from caoscrawler.crawl import Crawler
-from caoscrawler.identifiable import Identifiable
-from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import DictElement
-
+from caoscrawler.stores import GeneralStore
 from caoscrawler.scanner import create_converter_registry, scan_structure_elements
 
 
@@ -110,3 +107,43 @@ def test_list_datatypes():
     assert isinstance(records[0].get_property("Subject").value, list)
     assert records[0].get_property("Subject").datatype is not None
     assert records[0].get_property("Subject").datatype.startswith("LIST")
+
+
+def test_issue_93():
+    """https://gitlab.com/linkahead/linkahead-crawler/-/issues/93
+
+    cfood.yaml does not allow umlaut in $expression"""
+    values = GeneralStore()
+    expressions = [
+        "foo",
+        "foo.bär",
+        "_1",
+        "Ä",
+        "ųøîµ",
+    ]
+    for exp in expressions:
+        values[exp] = f"This is {exp}"
+    # ## Test preliminary check
+    # With braces
+    for exp in expressions:
+        assert replace_variables(f"${{{exp}}}", values) == f"This is {exp}"
+    # Without braces
+    for exp in expressions:
+        assert replace_variables(f"${exp}", values) == f"This is {exp}"
+
+    # ## Test actual replacement
+    for exp in expressions:
+        # as-is
+        propvalue = f"${{{exp}}}"
+        propvalue_template = CrawlerTemplate(propvalue)
+        # from IPython import embed
+        # embed()
+
+        assert propvalue_template.safe_substitute(**values.get_storage()) == f"This is {exp}"
+
+        # String embedded into context
+        propvalue = f"some text before >> ${{{exp}}} << some text after"
+        print(propvalue)
+        propvalue_template = CrawlerTemplate(propvalue)
+        assert (propvalue_template.safe_substitute(**values.get_storage())
+                == f"some text before >> This is {exp} << some text after")
diff --git a/unittests/test_json.py b/unittests/test_json.py
index fdb332df60d73dce3356a563e09ae0d02cf845b7..be65a26ea01e11e11968bd927c80513708e73850 100644
--- a/unittests/test_json.py
+++ b/unittests/test_json.py
@@ -31,7 +31,7 @@ import os
 
 from pytest import raises
 
-import caosdb as db
+import linkahead as db
 
 from caoscrawler.converters import JSONFileConverter
 from pathlib import Path
diff --git a/unittests/test_macros.py b/unittests/test_macros.py
index 53837e920e93f2cc318d62549145a0e8ac757372..85fe56cd2d49581bcf07b1c7af8456ad219b0111 100644
--- a/unittests/test_macros.py
+++ b/unittests/test_macros.py
@@ -142,7 +142,7 @@ def test_multi_macros_toplevel(register_macros, macro_store_reset):
     dat_loader = list(yaml.safe_load_all("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: test_one
@@ -171,7 +171,7 @@ def test_load_definition(register_macros, macro_store_reset):
     txt = """
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
 ---
 extroot:
   type: Directory
@@ -188,7 +188,7 @@ extroot:
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: test_one
@@ -223,7 +223,6 @@ extroot3:
     assert cfood["extroot3"]["subtree"]["SimulationData"]["match"] == "SimulationData"
 
 
-@pytest.mark.xfail
 def test_replace_arbitrary_objects(register_macros, macro_store_reset):
     """
     See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/24
@@ -234,27 +233,34 @@ defs:
   name: test
   params:
     b: 25
+    testvar_list_empty: []
     testvar_list:
     - a
     - $b
+    testvar_dict_empty: {}
     testvar_dict:
       t1: a
       t2: $b
   definition:
     replaced1:
       $b: ok
-      c: $testvar_dict
-      d: $testvar_list
+      dict_empty: $testvar_dict_empty
+      dict: $testvar_dict
+      list_empty: $testvar_list_empty
+      list: ${testvar_list}
 
 testnode:
   obl: !macro
     test:
 """, Loader=yaml.SafeLoader)
     print(yaml.dump(dat))
-    assert dat["testnode"]["obl"]["replaced1"]["c"]["t1"] == "a"
-    assert dat["testnode"]["obl"]["replaced1"]["c"]["t2"] == "25"
-    assert dat["testnode"]["obl"]["replaced1"]["d"][0] == "a"
-    assert dat["testnode"]["obl"]["replaced1"]["d"][1] == "25"
+    replaced = dat["testnode"]["obl"]["replaced1"]
+    assert replaced["dict_empty"] == {}
+    assert replaced["dict"]["t1"] == "a"
+    assert replaced["dict"]["t2"] == 25
+    assert replaced["list_empty"] == []
+    assert replaced["list"][0] == "a"
+    assert replaced["list"][1] == 25
 
 
 def test_macros_in_macros(register_macros, macro_store_reset):
@@ -264,7 +270,7 @@ def test_macros_in_macros(register_macros, macro_store_reset):
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: one_macro
@@ -293,11 +299,11 @@ extroot: !macro
     assert "test_macro" not in cfood["extroot"]
     assert cfood["extroot"]["macro_top"]["not_macro"]["a"] == 26
     d = cfood["extroot"]["macro_top"]
-    assert d["macro_sub_17"]["b"] == "17"
+    assert d["macro_sub_17"]["b"] == 17
     assert d["macro_sub_17"]["another_param"] == 3
-    assert d["macro_sub_25"]["b"] == "25"
+    assert d["macro_sub_25"]["b"] == 25
     assert d["macro_sub_25"]["another_param"] == 3
-    assert d["macro_sub_98"]["b"] == "98"
+    assert d["macro_sub_98"]["b"] == 98
     assert d["macro_sub_98"]["another_param"] == 3
 
 
@@ -309,7 +315,7 @@ def test_silent_overwrite(register_macros, macro_store_reset):
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: one_macro
@@ -340,7 +346,7 @@ def test_circular_macro_definition(register_macros, macro_store_reset):
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: test_one
@@ -389,7 +395,7 @@ def test_use_macro_twice():
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: test_twice
@@ -410,9 +416,9 @@ extroot: !macro
     """)
     for name in ["once", "twice", "default_name"]:
         assert name in cfood["extroot"]
-    assert cfood["extroot"]["once"]["something"]["a"] == "4"
-    assert cfood["extroot"]["twice"]["something"]["a"] == "5"
-    assert cfood["extroot"]["default_name"]["something"]["a"] == "4"
+    assert cfood["extroot"]["once"]["something"]["a"] == 4
+    assert cfood["extroot"]["twice"]["something"]["a"] == 5
+    assert cfood["extroot"]["default_name"]["something"]["a"] == 4
     # Code sample to generate the expanded macro:
     # with open("expanded_test_macro.yaml", "w") as f:
     #     f.write(yaml.dump(cfood))
@@ -423,7 +429,7 @@ def test_documentation_example_2():
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
   - !defmacro
     name: MarkdownFile
@@ -461,7 +467,7 @@ def test_documentation_example_1():
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
   - !defmacro
     name: SimulationDatasetFile
@@ -510,7 +516,7 @@ def test_def_replacements():
     cfood = _temp_file_load("""
 ---
 metadata:
-  crawler-version: 0.5.1
+  crawler-version: 0.7.2
   macros:
     - !defmacro
       name: test_def_replacements
@@ -573,9 +579,9 @@ testnode:
     test2:
       a: 4
 """, Loader=yaml.SafeLoader)
-    assert dat["testnode"]["obl"]["expanded_4"]["param"] == "4"
-    assert dat["testnode"]["obl"]["expanded_2"]["param"] == "2"
-    assert dat["testnode"]["obl"]["expanded_4_test2"]["param"] == "4"
+    assert dat["testnode"]["obl"]["expanded_4"]["param"] == 4
+    assert dat["testnode"]["obl"]["expanded_2"]["param"] == 2
+    assert dat["testnode"]["obl"]["expanded_4_test2"]["param"] == 4
 
 
 def test_variable_in_macro_definition(register_macros, macro_store_reset):
@@ -598,7 +604,7 @@ testnode:
     - a: 2
       b: 4
 """, Loader=yaml.SafeLoader)
-    assert dat["testnode"]["obl"]["expanded_4"]["param"] == "4"
-    assert dat["testnode"]["obl"]["expanded_4"]["param_b"] == "4"
-    assert dat["testnode"]["obl"]["expanded_2"]["param"] == "2"
-    assert dat["testnode"]["obl"]["expanded_2"]["param_b"] == "4"
+    assert dat["testnode"]["obl"]["expanded_4"]["param"] == 4
+    assert dat["testnode"]["obl"]["expanded_4"]["param_b"] == 4
+    assert dat["testnode"]["obl"]["expanded_2"]["param"] == 2
+    assert dat["testnode"]["obl"]["expanded_2"]["param_b"] == 4
diff --git a/unittests/test_parent_cfood.yml b/unittests/test_parent_cfood.yml
index b8d0eaf597641d311cb70017dc2bc75c7c3434f3..cd63e81b270117841128a34765a9635a036c52ec 100644
--- a/unittests/test_parent_cfood.yml
+++ b/unittests/test_parent_cfood.yml
@@ -1,6 +1,6 @@
 ---
 metadata:
-  crawler-version: 0.6.1
+  crawler-version: 0.7.2
 ---
 Definitions:
   type: Definitions
diff --git a/unittests/test_scanner.py b/unittests/test_scanner.py
index c0ce736fc4bed18f371f1626b6bc451ee103db49..226b5040547f0e003729dba63622edf836552f18 100644
--- a/unittests/test_scanner.py
+++ b/unittests/test_scanner.py
@@ -31,7 +31,7 @@ from pathlib import Path
 from tempfile import NamedTemporaryFile
 from unittest.mock import MagicMock, Mock, patch
 
-import caosdb as db
+import linkahead as db
 import pytest
 import yaml
 from caoscrawler.crawl import Crawler
diff --git a/unittests/test_schema.py b/unittests/test_schema.py
index 0d5bebce98fbc8c789c1080bcf3919f128bdbf54..3b576c9b72e41b799355f927d6e5387f1c187a18 100644
--- a/unittests/test_schema.py
+++ b/unittests/test_schema.py
@@ -3,7 +3,7 @@
 # A. Schlemmer, 06/2021
 
 from importlib_resources import files
-import caosdb as db
+import linkahead as db
 
 from os.path import join, dirname
 from caoscrawler import Crawler
diff --git a/unittests/test_spss_converter.py b/unittests/test_spss_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ffc18dba43a6f7cd3c9fbc9273da349b4ec3c6e
--- /dev/null
+++ b/unittests/test_spss_converter.py
@@ -0,0 +1,83 @@
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Testing converter for SPSS files."""
+
+import datetime
+import importlib
+import re
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from caoscrawler.converters import (
+    ConverterValidationError,
+    SPSSConverter,
+)
+from caoscrawler.structure_elements import (BooleanElement, DictElement,
+                                            Directory, File, FloatElement,
+                                            IntegerElement, ListElement,
+                                            TextElement)
+
+UNITTESTDIR = Path(__file__).parent
+
+
+@pytest.fixture
+def converter_registry():
+    converter_registry: dict[str, dict[str, str]] = {
+        "Directory": {
+            "converter": "DirectoryConverter",
+            "package": "caoscrawler.converters"},
+    }
+
+    for key, value in converter_registry.items():
+        module = importlib.import_module(value["package"])
+        value["class"] = getattr(module, value["converter"])
+    return converter_registry
+
+
+def test_spss_converter(converter_registry):
+    converter = SPSSConverter({
+        "match": ("sample.sav")
+    },
+        "ThisConverterNameIsIrrelevant", converter_registry
+    )
+
+    spss_dir = UNITTESTDIR / "test_tables" / "spss"
+    for sav_file, length, thistype in [
+            (File("sample.sav", spss_dir / "sample.sav"), 5, str),
+            (File("sample.sav", spss_dir / "sample_large.sav"), 485, int),
+    ]:
+        m = converter.match(sav_file)
+        assert m is not None
+        assert len(m) == 0
+
+        children = converter.create_children(None, sav_file)
+        assert len(children) == length
+
+        for ii, child in enumerate(children):
+            assert child.__class__ == DictElement
+            assert child.name == str(ii)
+            my_dict = child.value
+            assert isinstance(my_dict["mychar"], str)
+            assert isinstance(my_dict["mydate"], datetime.date) or np.isnan(my_dict["mydate"])
+            assert isinstance(my_dict["dtime"], datetime.datetime) or np.isnan(my_dict["dtime"])
+            assert isinstance(my_dict["mytime"], datetime.time) or np.isnan(my_dict["mytime"])
+            assert isinstance(my_dict["mylabl"], thistype), f"{type(my_dict['mylabl'])}"
+            assert isinstance(my_dict["myord"], thistype), f"{type(my_dict['myord'])}"
diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..9015e74be69c60c43ece80a2f742d6e9b7badda6
--- /dev/null
+++ b/unittests/test_sync_graph.py
@@ -0,0 +1,685 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+import logging
+from functools import partial
+from unittest.mock import MagicMock, Mock, patch
+
+import linkahead as db
+import pytest
+from test_crawler import (basic_retrieve_by_name_mock_up,
+                          mock_cached_only_rt_allow_empty,
+                          mock_get_entity_by,
+                          )
+
+from caoscrawler.exceptions import (ImpossibleMergeError,
+                                    MissingIdentifyingProperty,
+                                    MissingRecordType,
+                                    )
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.sync_graph import SyncGraph, _set_each_scalar_value
+from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list
+
+from itertools import product
+
+
+@pytest.fixture
+def simple_adapter():
+    # different RTs with different registered identifiables to allow to test various behavior
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable(
+        "RT1",
+        db.RecordType().add_parent("RT1").add_property("RT2"))
+    ident_adapter.register_identifiable(
+        "RT2",
+        db.RecordType().add_parent("RT2").add_property("is_referenced_by", ["RT1", "RT3"]))
+    ident_adapter.register_identifiable(
+        "RT3",
+        db.RecordType().add_parent("RT3").add_property("a"))
+    ident_adapter.register_identifiable(
+        "RT4",
+        db.RecordType().add_parent("RT4").add_property("RT3"))
+    ident_adapter.register_identifiable(
+        "RT5",
+        db.RecordType().add_parent("RT5").add_property("name"))
+    return ident_adapter
+
+
+def test_create_flat_list():
+    a = db.Record()
+    b = db.Record()
+    a.add_property(name="a", value=a)
+    a.add_property(name="b", value=b)
+    flat = SyncGraph._create_flat_list([a])
+    assert len(flat) == 2
+    assert a in flat
+    assert b in flat
+    c = db.Record()
+    c.add_property(name="a", value=a)
+    # This would cause a recursion error if it is not dealt with properly.
+    a.add_property(name="c", value=c)
+    flat = SyncGraph._create_flat_list([c])
+    assert len(flat) == 3
+    assert a in flat
+    assert b in flat
+    assert c in flat
+
+    # Test for lists:
+    a = db.Record()
+    b = db.Record()
+    d = db.Record()
+    a.add_property(name="a", value=a)
+    a.add_property(name="list", value=[b, d])
+    flat = SyncGraph._create_flat_list([a])
+    assert len(flat) == 3
+    assert a in flat
+    assert b in flat
+    assert d in flat
+
+    c = db.Record()
+    c.add_property(name="a", value=a)
+    # This would cause a recursion error if it is not dealt with properly.
+    a.add_property(name="second_list", value=[b, d, c])
+    flat = SyncGraph._create_flat_list([c])
+    assert len(flat) == 4
+    assert a in flat
+    assert b in flat
+    assert c in flat
+    assert d in flat
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_create_reference_mapping():
+    a = SyncNode(db.Record().add_parent("RT1"),
+                 db.RecordType().add_property("is_referenced_by", ["RT2"]))
+    b = SyncNode(db.Record(id=132).add_parent("RT2").add_property('a', a),
+                 db.RecordType().add_property("a"))
+    ses = [a, b]
+
+    mappings = SyncGraph._create_reference_mapping(ses)
+    # test initialization
+    for index, mapping in product((0, 1), mappings):
+        assert id(ses[index]) in mapping
+
+    (forward_references, backward_references, forward_references_id_props,
+     backward_references_id_props, forward_references_backref,
+     backward_references_backref) = mappings
+
+    # a has no ref
+    assert len(forward_references[id(a)]) == 0
+    assert backward_references[id(a)] == set([b])
+    # b does
+    assert forward_references[id(b)] == set([a])
+    assert backward_references[id(b)] == set()
+    # a has no identifying reference
+    assert forward_references_id_props[id(a)] == set()
+    assert backward_references_id_props[id(a)] == set([b])
+    # b has an identifying reference
+    assert forward_references_id_props[id(b)] == set([a])
+    assert backward_references_id_props[id(b)] == set()
+    # a has an identifying back reference
+    assert forward_references_backref[id(a)] == set()
+    assert backward_references_backref[id(a)] == set([b])
+    # b does not
+    assert forward_references_backref[id(b)] == set([a])
+    assert backward_references_backref[id(b)] == set()
+
+
+@patch("caoscrawler.sync_graph.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+def test_SyncGraph_init():
+    # trivial case
+    a = db.Record(id=101).add_parent("A")
+    ident_a = db.RecordType().add_parent("A").add_property("prop_ident")
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable("A", ident_a)
+    SyncGraph([a], ident_adapter)
+    SyncGraph([], ident_adapter)  # should not fail either...
+    # test whether missing identifying properties cause an exception
+    with pytest.raises(MissingIdentifyingProperty):
+        SyncGraph([db.Record().add_parent("A")], ident_adapter)
+
+    entlist = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=102).add_parent("A"),
+        db.File(path='a').add_parent("A"),
+        db.File(path='b').add_parent("A"),
+        db.Record(id=103).add_parent("A"),
+        db.Record(id=104).add_parent("A").add_property(name='prop_ident', value="MERGEME"),
+        db.Record().add_parent("A").add_property(name='prop_ident', value="MERGEME"),
+        db.File(path='a', file='b').add_parent("A"),
+        db.Record(id=101).add_parent("A"),
+        db.Record().add_parent("A").add_property(name='prop_ident', value="other"),
+        db.Record().add_parent("A").add_property(name='prop_ident',
+                                                 value=db.Record().add_parent("A")
+                                                 .add_property(name='prop_ident', value="other")),
+        db.File(path='a', file='b').add_parent("A"),
+        db.Record(id=101).add_parent("A"),
+    ]
+    st = SyncGraph(entlist, ident_adapter)
+    # all nodes with ID=101 have been merged
+    assert len([el for el in st.nodes if el.id == 101]) == 1
+    # all nodes with path='a' have been merged
+    assert len([el for el in st.nodes if el.path == 'a']) == 1
+    # all nodes with ID or path were removed from unchecked
+    for el in st.nodes:
+        if el.id is not None or el.path is not None:
+            assert el not in st.unchecked
+    # all nodes with ID are in the ID lookup
+    for el in st.nodes:
+        if el.id is not None:
+            assert st._id_look_up[el.id] is el
+    # all nodes with path are in the path lookup
+    for el in st.nodes:
+        if el.path is not None:
+            assert st._path_look_up[el.path] is el
+    # all nodes with identifiable are in the identifiable lookup
+    for el in st.nodes:
+        if el.identifiable is not None:
+            assert st._identifiable_look_up[el.identifiable.get_representation()] is el
+    # The node, which has no ID but has an identifiable,  was merged with another node with ID (due
+    # to the shared identifiable)
+    new_one = [el for el in st.nodes if len(el.properties) > 0
+               and el.properties[0].value == "MERGEME"]
+    assert len(new_one) == 1
+    assert new_one[0].id == 104
+    # every node that does not rely on something unchecked has an identifiable or an ID
+    for el in st.nodes:
+        if not st._identity_relies_on_unchecked_entity(el):
+            assert el.identifiable is not None or el.id is not None
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_merge_into_trivial(simple_adapter):
+    # simplest case: a -> c
+    #                b
+    #                (a reference c; b does not reference anything; a & b have the same target
+    #                 record)
+    c = db.Record(name='c').add_parent("RT2")
+    a = db.Record(name='a').add_parent("RT1").add_property('RT2', c)
+    b = db.Record(id=101).add_parent("RT1")
+
+    st = SyncGraph([a, b], simple_adapter)
+    se_a, se_b, se_c = st.nodes
+    assert se_a.name == 'a'
+    assert se_b.id == 101
+    assert se_c.name == 'c'
+
+    # CHECK REFERENCE MAP (before merge):
+    # c is referenced by a
+    assert len(st.forward_references[id(se_a)]) == 1
+    assert se_c in st.forward_references[id(se_a)]
+    assert len(st.forward_references[id(se_b)]) == 0
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert len(st.backward_references[id(se_a)]) == 0
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    assert se_a in st.backward_references[id(se_c)]
+
+    assert len(st.forward_references_id_props[id(se_a)]) == 1
+    assert se_c in st.forward_references_id_props[id(se_a)]
+    assert len(st.forward_references_id_props[id(se_b)]) == 0
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert len(st.backward_references_id_props[id(se_a)]) == 0
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    assert se_a in st.backward_references_id_props[id(se_c)]
+
+    assert len(st.forward_references_backref[id(se_a)]) == 1
+    assert se_c in st.forward_references_backref[id(se_a)]
+    assert len(st.forward_references_backref[id(se_b)]) == 0
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert len(st.backward_references_backref[id(se_a)]) == 0
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    assert se_a in st.backward_references_backref[id(se_c)]
+
+    st.set_id_of_node(se_a, 101)
+
+    # CHECK REFERENCE MAP (after merge):
+    # c is now referenced by b
+    assert id(se_a) not in st.forward_references
+    assert len(st.forward_references[id(se_b)]) == 1
+    assert se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    assert se_b in st.backward_references[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_id_props
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    assert se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_id_props
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    assert se_b in st.backward_references_id_props[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_backref
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    assert se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_backref
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    assert se_b in st.backward_references_backref[id(se_c)]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_merge_into_simple(simple_adapter):
+    # simple case: a -> c <- b (a & b reference c; a & b have the same target record)
+    c = db.Record(name='c').add_parent("RT2")
+    a = db.Record().add_parent("RT1").add_property('RT2', c)
+    b = db.Record().add_parent("RT1").add_property('RT2', c)
+
+    st = SyncGraph([a, b], simple_adapter)
+    se_a = st.nodes[0]
+    se_b = st.nodes[1]
+    se_c = st.nodes[2]
+
+    # CHECK REFERENCE MAP:
+    # c is referenced by a & b
+    assert len(st.forward_references[id(se_a)]) == 1
+    se_c in st.forward_references[id(se_a)]
+    assert len(st.forward_references[id(se_b)]) == 1
+    se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert len(st.backward_references[id(se_a)]) == 0
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 2
+    se_a in st.backward_references[id(se_c)]
+    se_b in st.backward_references[id(se_c)]
+
+    assert len(st.forward_references_id_props[id(se_a)]) == 1
+    se_c in st.forward_references_id_props[id(se_a)]
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert len(st.backward_references_id_props[id(se_a)]) == 0
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 2
+    se_a in st.backward_references_id_props[id(se_c)]
+    se_b in st.backward_references_id_props[id(se_c)]
+
+    assert len(st.forward_references_backref[id(se_a)]) == 1
+    se_c in st.forward_references_backref[id(se_a)]
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert len(st.backward_references_backref[id(se_a)]) == 0
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 2
+    se_a in st.backward_references_backref[id(se_c)]
+    se_b in st.backward_references_backref[id(se_c)]
+
+    st._merge_into(se_a, se_b)
+
+    # CHECK REFERENCE MAP (after merge):
+    # c is now referenced by b
+    # (same situation as above)
+    assert id(se_a) not in st.forward_references
+    assert len(st.forward_references[id(se_b)]) == 1
+    se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    se_b in st.backward_references[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_id_props
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_id_props
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    se_b in st.backward_references_id_props[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_backref
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_backref
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    se_b in st.backward_references_backref[id(se_c)]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_backward_references_backref():
+    # We use the reference as identifying reference in both directions. Thus the map is the same
+    # for all three categories: references, id_references and id_referenced_by
+    ident_a = db.RecordType().add_parent("BR").add_property("name")
+    ident_b = db.RecordType().add_parent("C").add_property("is_referenced_by", ["BR"])
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable("BR", ident_a)
+    ident_adapter.register_identifiable("C", ident_b)
+
+    referenced = db.Record(name="B").add_parent("C")
+    ent_list = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert st.nodes[1] in st.backward_references_backref[id(st.nodes[0])]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_set_id_of_node(simple_adapter):
+    # setting the id should lead to the node being marked as existing
+    ent_list = [db.Record(name='a').add_parent("RT5")]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 1
+    st.set_id_of_node(st.unchecked[0], 101)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._existing
+
+    # setting the id with None should lead to the node being marked as missing
+    ent_list = [db.Record().add_parent("RT1").add_property(name="RT2", value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 1
+    # is automatically set in during initialization of graph
+    assert st.nodes[0].identifiable is not None
+    st.set_id_of_node(st.unchecked[0])
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+
+    # setting the id to one that already exists should lead to a merge
+    ent_list = [
+        db.Record(id=101).add_parent("RT5"),
+        db.Record(name='a').add_parent("RT5").add_property(name="RT2", value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 1
+    st.set_id_of_node(st.unchecked[0], 101)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert st.nodes[0].properties[0].name == "RT2"
+
+    # setting the id to None should lead to depending nodes marked as missing
+    ent_list = [
+        db.Record().add_parent("RT3").add_property(name="a", value=1).add_property(
+            name="RT2", value=db.Record().add_parent("RT2")),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    st.set_id_of_node(st.unchecked[0])
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+    assert id(st.nodes[1]) in st._missing
+
+    # same as above but with backref
+    ent_list = [
+        db.Record()
+        .add_parent("RT4")
+        .add_property(name="RT3",
+                      value=db.Record().add_parent("RT3").add_property(name="a", value=1)),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert st.unchecked[1].identifiable is not None
+    st.set_id_of_node(st.unchecked[1])
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+    assert id(st.nodes[1]) in st._missing
+
+    # setting an id might allow to check another node that depends on the former
+    ent_list = [
+        db.Record()
+        .add_parent("RT4")
+        .add_property(name="RT3",
+                      value=db.Record().add_parent("RT3").add_property(name="a", value=1)),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert st.nodes[0].identifiable is None
+    assert st.nodes[1].identifiable is not None
+    st.set_id_of_node(st.unchecked[1], 111)
+    assert st.nodes[0].identifiable is not None
+    assert st.nodes[1].identifiable is not None
+
+    # same as above but going one step further: the new identifiable allows to merge that node
+    ent_list = [
+        (db.Record()
+         .add_parent("RT4")
+         .add_property(name="RT3",
+                       value=db.Record().add_parent("RT3").add_property(name="a", value=1))),
+
+        (db.Record()
+         .add_parent("RT4")
+         .add_property(name="RT3", value=111))
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert st.nodes[0].identifiable is None
+    assert st.nodes[1].identifiable is not None
+    assert st.nodes[2].identifiable is not None
+    assert len(st.nodes) == 3
+    st.set_id_of_node(st.unchecked[2], 111)
+    assert st.nodes[0].identifiable is not None
+    assert len(st.nodes) == 2
+
+
+@patch("caoscrawler.sync_graph.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+def test_merging(simple_adapter):
+    # identifying information can be given at various locations in the hierachical tree
+    # test whether an object is correctly combined for all cases
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_a = db.RecordType().add_parent("A").add_property("name").add_property("a")
+    ident_adapter.register_identifiable("A", ident_a)
+    ident_adapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+
+    # merging based on id
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101).add_parent("A")]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert 101 == st.nodes[0].id
+    assert "A" == st.nodes[0].parents[0].name
+
+    # merging based on path
+    ent_list = [
+        db.File(path='101').add_parent("A"),
+        db.File(path='101').add_parent("A")]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert '101' == st.nodes[0].path
+    assert "A" == st.nodes[0].parents[0].name
+
+    # merging based on identifiable (non identifying properties are ignored)
+    ent_list = [
+        db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=1),
+        db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=2)]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert st.nodes[0].id is None
+    assert '101' == st.nodes[0].name
+    assert "A" == st.nodes[0].parents[0].name
+    assert 1 == st.nodes[0].properties[0].value
+    assert "a" == st.nodes[0].properties[0].name
+
+    # Merging a mix. One Record needs the identifiable to be merged. But the identifying
+    # information is scattered in the other case.
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101, name='a').add_parent("A"),
+        db.Record(id=101).add_parent("A").add_property('a', value=1),
+        db.Record(name='a').add_parent("A").add_property('a', value=1)]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert 'a' == st.nodes[0].name
+    assert "A" == st.nodes[0].parents[0].name
+    assert 1 == st.nodes[0].properties[0].value
+    assert "a" == st.nodes[0].properties[0].name
+    assert 101 == st.nodes[0].id
+
+    # test that adding an ID can lead to a cascade of merges
+    # This also tests whether setting something to missing allows to create an identifiable
+    # and thus allows a merge
+    subtree = db.Record(name='a').add_parent("A").add_property('a', value=db.Record(
+        name='b').add_parent("A").add_property('a', value=db.Record(
+            name='c').add_parent("A").add_property('a', value="missing")))
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101, name='z').add_parent("A"),
+        db.Record(id=101).add_parent("A").add_property('a', value=subtree),
+        db.Record(name='z').add_parent("A").add_property('a', value=subtree),
+    ]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 5
+    assert len(st.unchecked) == 4
+    missing_one = [el for el in st.nodes if el.name == 'c'][0]
+    st.set_id_of_node(missing_one)
+    # setting c to missing means that b cannot exist which means that a cannot exist, this allows
+    # to merge the two z nodes
+    assert len(st.nodes) == 4
+    assert len(st.unchecked) == 0
+
+
+def test_update_of_reference_values(simple_adapter):
+    # multiple nodes are merged including one that is referenced
+    # assure that this still leads to the value of the property of the referencing node to be
+    # updated, when the id is set. (Value object is replaced appropriately)
+    a = db.Record().add_parent("RT3").add_property('a', value=1)
+    ent_list = [
+        a,
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT4").add_property('RT3', value=a),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert 'RT4' == st.nodes[1].parents[0].name
+    st.set_id_of_node(st.nodes[0], 101)
+    b_prop = st.nodes[1].properties[0].value
+    assert b_prop.id == 101
+
+
+def test_ignoring_irrelevant_references(simple_adapter):
+    # make sure that a circle of references is no problem if one references is not identifying
+    b = db.Record(name='b').add_parent("RT5")
+    a = db.Record().add_parent("RT3").add_property('a', value=b)
+    b.add_property('a', value=a)
+    ent_list = [a, b]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert st.nodes[1].name == 'b'
+
+    # a relies on b
+    assert st._identity_relies_on_unchecked_entity(st.nodes[0])
+    # b relies on nothing
+    assert not st._identity_relies_on_unchecked_entity(st.nodes[1])
+    # set ID of b
+    st.set_id_of_node(st.nodes[1], 101)
+    assert len(st.unchecked) == 1
+    # now a nolonger relies on unchecked
+    assert not st._identity_relies_on_unchecked_entity(st.nodes[0])
+
+# 'is implementation insufficient'
+
+
+@pytest.mark.xfail()
+def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog):
+    crawler = crawler_mocked_identifiable_retrieve
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent('C').add_property(name='C'))
+    a = db.Record(name='a').add_parent("C")
+    b = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
+    c = db.Record(name='c').add_parent("C").add_property(name='D', value='e'
+                                                         ).add_property(name="C", value=b)
+    d = db.Record(name='c').add_parent("C")
+    a.add_property(name="C", value=c)
+    flat = [a, b, c]
+    circle = Crawler.detect_circular_dependency(flat)
+    assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
+
+    assert Crawler.detect_circular_dependency([d]) is None
+    st = SyncGraph(flat, crawler.identifiableAdapter)
+    with pytest.raises(RuntimeError):
+        _, _ = crawler._split_into_inserts_and_updates(st)
+    caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
+    assert "Found circular dependency" in caplog.text
+    assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
+    caplog.clear()
+
+
+def test_set_each_scalar_value():
+    """Test whether properties with None as value are treated appropriately."""
+    a = SyncNode(db.Record().add_parent("RT1").add_property(name="bla"),
+                 db.RecordType().add_property("is_referenced_by", ["RT2"]))
+    _set_each_scalar_value(a, lambda x: False, None)
+    _set_each_scalar_value(a, lambda x: isinstance(x, SyncNode), None)
+    _set_each_scalar_value(a, lambda x: x is None, lambda x: 42)
+    assert a.properties[0].value == 42
+    _set_each_scalar_value(a, lambda x: x == 42, lambda x: None)
+    assert a.properties[0].value is None
+
+
+@patch("caoscrawler.identifiable_adapters.cached_query",
+       new=Mock(side_effect=mock_cached_only_rt_allow_empty))
+def test_merge_referenced_by():
+    """Merging two entities that are referenced by a third entity with nonexistent RecordType.
+
+    See also https://gitlab.com/linkahead/linkahead-crawler/-/issues/95
+    """
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_object({
+        "RT_A": ["name"],
+        "RT_B": [{"is_referenced_by": ["RT_A"]}, "my_id"]
+    })
+    crawled_data: list = []
+    references: list = []
+    for ii in [0, 1]:
+        rec = db.Record().add_parent("RT_B").add_property("my_id", value=ii)
+        references.append(rec)
+        crawled_data.append(rec)
+    rec_a = db.Record(name="Rec_A").add_parent("RT_A")
+    rec_a.add_property("my_ref", value=references)
+    crawled_data.append(rec_a)
+
+    with pytest.raises(MissingRecordType) as mrt:
+        SyncGraph(crawled_data, ident)
+    assert str(mrt.value).endswith("Record type could not be found on server: RT_A")
diff --git a/unittests/test_sync_node.py b/unittests/test_sync_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..668a53470d028dfcfce7bb5785d68b685b034595
--- /dev/null
+++ b/unittests/test_sync_node.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+from functools import partial
+from unittest.mock import MagicMock, Mock, patch
+
+import linkahead as db
+import pytest
+from caoscrawler.exceptions import ImpossibleMergeError
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.sync_graph import SyncGraph
+from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list
+
+from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
+
+
+def assert_parents_equal(p1, p2):
+    """Special assertion for comparing parents."""
+    for a, b in zip(p1, p2):
+        assert a.id == b.id
+        assert a.name == b.name
+
+
+def assert_properties_equal(p1, p2):
+    """Special assertion for comparing properties."""
+    for a, b in zip(p1, p2):
+        assert a.id == b.id
+        assert a.name == b.name
+        assert a.value == b.value
+        assert a.datatype == b.datatype
+
+
+def test_sync_node():
+    # initialization
+    rec = (db.Record(id=101, name='101')
+           .add_parent("A")
+           .add_parent("B")
+           .add_parent(id=102)
+           .add_property(name="a", value='a')
+           .add_property(id=103, value='b'))
+    rec.description = "hallo"
+    sna = SyncNode(rec)
+    # check information stored in initialized SyncNode
+    assert "Record" in str(sna)
+    assert sna.id == rec.id
+    assert sna.role == rec.role
+    assert sna.name == rec.name
+    assert sna.description == rec.description
+    assert_parents_equal(sna.parents, rec.parents)
+    assert_properties_equal(sna.properties, rec.properties)
+    # ... special case File (path and file attributes)
+    fi = db.File(id=101, name='101', path='/a/')
+    snb = SyncNode(fi)
+    assert snb.role == fi.role
+    assert snb.name == fi.name
+    assert snb.id == fi.id
+    assert snb.path == fi.path
+    assert snb.file == fi.file
+
+    # check information in exported db.Entity
+    export = sna.export_entity()
+    assert export.id == rec.id
+    assert export.role == rec.role
+    assert export.name == rec.name
+    assert export.description == rec.description
+    assert_parents_equal(export.parents, rec.parents)
+    assert_properties_equal(export.properties, rec.properties)
+    export = snb.export_entity()
+    assert export.role == fi.role
+    assert export.name == fi.name
+    assert export.id == fi.id
+    assert export.path == fi.path
+    assert export.file == fi.file
+
+    # merge no common information
+    # ---------------------------
+    rec_a = (db.Record(name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a')
+             .add_property(id=103, value='b'))
+
+    rec_b = (db.Record(id=101)
+             .add_parent("B")
+             .add_parent(id=103)
+             .add_property(name="a", value='a')
+             .add_property(id=103, value='b'))
+    rec_b.description = "tja"
+
+    sn_a = SyncNode(rec_a)
+    sn_b = SyncNode(rec_b)
+    sn_a.update(sn_b)
+    # test information in updated node
+    assert sn_a.id == rec_b.id
+    assert sn_a.role == rec_a.role
+    assert sn_a.name == rec_a.name
+    assert sn_a.description == rec_b.description
+    for p in rec_a.parents + rec_b.parents:
+        assert p in sn_a.parents
+    for p in rec_a.properties + rec_b.properties:
+        assert p in sn_a.properties
+    # Check for duplicated property:
+    ps = [p for p in sn_a.properties if p.name == "a"]
+    assert len(ps) == 2
+    assert ps[0].value == "a"
+    assert ps[1].value == "a"
+
+    # test information in exported entity
+    export = sn_a.export_entity()
+    assert export.id == rec_b.id
+    assert export.name == rec_a.name
+    for p in rec_a.parents + rec_b.parents:
+        assert parent_in_list(p, export.parents)
+    for p in rec_a.properties + rec_b.properties:
+        if p.name is not None:
+            assert p.name in [el.name for el in export.properties]
+        if p.id is not None:
+            assert p.id in [el.id for el in export.properties]
+    assert len(export.properties) == 2
+    assert export.get_property('a').value == 'a'
+    assert export.get_property(103).value == 'b'
+    assert export.description == rec_b.description
+    assert export.role == rec_a.role
+
+    # merge with common information
+    # -----------------------------
+    rec_a = (db.Record(id=101, name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a'))
+
+    rec_b = (db.Record(id=101, name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a'))
+
+    sn_a = SyncNode(rec_a)
+    sn_b = SyncNode(rec_b)
+    sn_a.update(sn_b)
+    assert sn_a.id == rec_b.id
+    assert sn_a.name == rec_a.name
+    for p in rec_a.parents + rec_b.parents:
+        assert parent_in_list(p, sn_a.parents)
+    for p in rec_a.properties + rec_b.properties:
+        assert property_in_list(p, sn_a.properties)
+    assert sn_a.description == rec_b.description
+    assert sn_a.role == rec_a.role
+
+    # merge with conflicting information
+    # ----------------------------------
+    # ID mismatch
+    sn_a = SyncNode(db.Record(id=102))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(id=101)))
+
+    # name mismatch
+    sn_a = SyncNode(db.Record(name='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(name='101')))
+
+    # type mismatch
+    sn_a = SyncNode(db.Record(name='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.File(name='102')))
+
+    # description mismatch
+    sn_a = SyncNode(db.Record(description='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(description='101')))
+
+    # path mismatch
+    sn_a = SyncNode(db.File(path='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.File(path='101')))
+
+    # identifiable mismatch
+    sn_a = SyncNode(db.File(path='102'))
+    sn_a.identifiable = Identifiable(name='a')
+    sn_b = SyncNode(db.File(path='101'))
+    sn_b.identifiable = Identifiable(name='b')
+    with pytest.raises(ValueError, match="identifiable"):
+        sn_a.update(sn_b)
+
+
+def test_export_node():
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_parent(id=103)
+             .add_property(name="a", value=[SyncNode(db.Record())])
+             .add_property(name='b', id=103, value='b'))
+
+    sn_a = SyncNode(rec_a)
+    exp = sn_a.export_entity()
+    assert exp.id == rec_a.id
+    assert exp.name == rec_a.name
+    for p in rec_a.parents:
+        assert len([el for el in exp.parents if p.name == el.name]) == 1
+    for p in rec_a.properties:
+        assert p.value == exp.get_property(p.name).value
+        if isinstance(p.value, list):
+            assert len(p.value) == len(exp.get_property(p.name).value)
+    assert len(exp.properties) == len(rec_a.properties)
+    assert len(exp.parents) == len(rec_a.parents)
+
+    # ---------------------------------------------------------------------------------------------
+    # NOTE: in the following we create a SyncNode object with twice the same Property as a short
+    # hand for a SyncNode that was created from one Entity with such a Property and then updating
+    # it with another SyncNode that also has the Property
+    # ---------------------------------------------------------------------------------------------
+
+    # same property name, different values
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value='b')
+             .add_property(name="a", value='a'))
+
+    # there should be a warning when multiproperties are used
+    with pytest.warns(UserWarning) as caught:
+        SyncNode(rec_a)
+        messages = {str(w.message) for w in caught}
+        assert ("Multiproperties are not supported by the crawler.") in messages
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # SyncNodes with same ID are considered equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=SyncNode(db.Record(id=1))))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value.id == 1
+    # SyncNodes convert multi properties into single properties
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # same SyncNode object is obviously equal
+    sn = SyncNode(db.Record(id=1))
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=sn)
+             .add_property(name="a", value=sn))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value.id == 1
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # different SyncNode Objects (without an ID) are not equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record()))
+             .add_property(name="a", value=SyncNode(db.Record())))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different SyncNode Objects with differing ID are not equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=SyncNode(db.Record(id=2))))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # SyncNodes with same ID are considered equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value[0].id == 1
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # SyncNodes with same ID are not equal when in different order (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=2)), SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # same SyncNode object is obviously equal (list)
+    sn = SyncNode(db.Record(id=1))
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[sn])
+             .add_property(name="a", value=[sn]))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value[0].id == 1
+
+    # different SyncNode Objects are not equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record())])
+             .add_property(name="a", value=[SyncNode(db.Record())]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different SyncNode Objects with differing are not equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=2))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # list vs no list
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different list sizes
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py
index 178393d9345bd8a6846b66e362ce4f7edac382ee..3b563fd3179968fd90b1c92b9bc5bf0db9ed0858 100644
--- a/unittests/test_table_converter.py
+++ b/unittests/test_table_converter.py
@@ -32,7 +32,7 @@ import os
 from os.path import basename, dirname, join
 from pathlib import Path
 
-import caosdb as db
+import linkahead as db
 import pytest
 from caoscrawler import Crawler
 from caoscrawler.converters import (Converter, ConverterValidationError,
diff --git a/unittests/test_tables/spss/CITATION.cff b/unittests/test_tables/spss/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..140fcc071bf2d5f5709cf31bf11bd9676b81ca5f
--- /dev/null
+++ b/unittests/test_tables/spss/CITATION.cff
@@ -0,0 +1,11 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Fajardo"
+  given-names: "Otto"
+  orcid: "https://orcid.org/0000-0002-3363-9287"
+title: "Pyreadstat"
+version: 1.2.7
+doi: 10.5281/zenodo.6612282
+date-released: 2018-09-24
+url: "https://github.com/Roche/pyreadstat"
diff --git a/unittests/test_tables/spss/LICENSE b/unittests/test_tables/spss/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..a2f94b1a2a5a4255fc8ef6d0beb94cce89f545e8
--- /dev/null
+++ b/unittests/test_tables/spss/LICENSE
@@ -0,0 +1,210 @@
+Test data files were copied from [pyreadstat](https://github.com/Roche/pyreadstat), they are
+licensed under the Apache License, cited below.
+
+Copyright (C) 2018-2024 Otto Fajardo
+Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+
+pyreadstat liscence:
+---------------------------------------------------------------------------
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/unittests/test_tables/spss/sample.sav b/unittests/test_tables/spss/sample.sav
new file mode 100644
index 0000000000000000000000000000000000000000..20d0c5ce6689a60adfa329a17b4347274e9a863b
Binary files /dev/null and b/unittests/test_tables/spss/sample.sav differ
diff --git a/unittests/test_tables/spss/sample_large.sav b/unittests/test_tables/spss/sample_large.sav
new file mode 100644
index 0000000000000000000000000000000000000000..b0c16c1390a15a4f62a859ade76aa17b89c6ae40
Binary files /dev/null and b/unittests/test_tables/spss/sample_large.sav differ
diff --git a/unittests/test_transformers.py b/unittests/test_transformers.py
index 02d932d13cc3fad52048b08e2b9fe56f11db2ae7..4ed12751d9052c839aa4db4abd586c419bed1018 100644
--- a/unittests/test_transformers.py
+++ b/unittests/test_transformers.py
@@ -34,7 +34,7 @@ from pathlib import Path
 from tempfile import NamedTemporaryFile
 from unittest.mock import MagicMock, Mock, patch
 
-import caosdb as db
+import linkahead as db
 import pytest
 import yaml
 from caoscrawler.converters import Converter, ListElementConverter
@@ -46,6 +46,38 @@ from pytest import raises
 UNITTESTDIR = Path(__file__).parent
 
 
+@pytest.fixture
+def converter_registry():
+    converter_registry: dict[str, dict[str, str]] = {
+        "Directory": {
+            "converter": "DirectoryConverter",
+            "package": "caoscrawler.converters"},
+        "MarkdownFile": {
+            "converter": "MarkdownFileConverter",
+            "package": "caoscrawler.converters"},
+        "Date": {
+            "converter": "DateElementConverter",
+            "package": "caoscrawler.converters"},
+        "DictElement": {
+            "converter": "DictElementConverter",
+            "package": "caoscrawler.converters"},
+        "TextElement": {
+            "converter": "TextElementConverter",
+            "package": "caoscrawler.converters"},
+        "ListElement": {
+            "converter": "ListElementConverter",
+            "package": "caoscrawler.converters"},
+        "JSONFile": {
+            "converter": "JSONFileConverter",
+            "package": "caoscrawler.converters"},
+    }
+
+    for key, value in converter_registry.items():
+        module = importlib.import_module(value["package"])
+        value["class"] = getattr(module, value["converter"])
+    return converter_registry
+
+
 def test_simple_transformer():
     """
     Test the correct list of returned records by the scanner using the
@@ -82,38 +114,6 @@ def test_simple_transformer():
             assert False
 
 
-@pytest.fixture
-def converter_registry():
-    converter_registry: dict[str, dict[str, str]] = {
-        "Directory": {
-            "converter": "DirectoryConverter",
-            "package": "caoscrawler.converters"},
-        "MarkdownFile": {
-            "converter": "MarkdownFileConverter",
-            "package": "caoscrawler.converters"},
-        "Date": {
-            "converter": "DateElementConverter",
-            "package": "caoscrawler.converters"},
-        "DictElement": {
-            "converter": "DictElementConverter",
-            "package": "caoscrawler.converters"},
-        "TextElement": {
-            "converter": "TextElementConverter",
-            "package": "caoscrawler.converters"},
-        "ListElement": {
-            "converter": "ListElementConverter",
-            "package": "caoscrawler.converters"},
-        "JSONFile": {
-            "converter": "JSONFileConverter",
-            "package": "caoscrawler.converters"},
-    }
-
-    for key, value in converter_registry.items():
-        module = importlib.import_module(value["package"])
-        value["class"] = getattr(module, value["converter"])
-    return converter_registry
-
-
 def test_apply_replace(converter_registry):
     cfood_def = {"type": 'ListElement', "match_name": ".*",
                  'transform': {'test': {'in': '$a', 'out': '$b', 'functions': [{
@@ -146,3 +146,21 @@ def test_apply_replace_from_def(converter_registry):
 
     conv.apply_transformers(values, transformer_functions)
     assert values['b'] == "16:45"
+
+
+def test_empty_functions_list(converter_registry):
+    cfood_def = {"type": 'ListElement',
+                 "match_name": ".*",
+                 'transform': {'test': {'in': '$a', 'out': '$b',
+                                        'functions': []}}}
+    values = GeneralStore()
+    values["a"] = "16_45"
+
+    # transformer_functions  = create_transformer_registry(crawler_definition)
+    transformer_functions = {"replace": replace}
+
+    conv = ListElementConverter(definition=cfood_def, name='test',
+                                converter_registry=converter_registry)
+
+    conv.apply_transformers(values, transformer_functions)
+    assert values['b'] == "16_45"
diff --git a/unittests/test_utilities.py b/unittests/test_utilities.py
index 5a80ab9b230db4540d741bf8fa4f9d11b5158aab..dfb79c8b6b10909952174cf24c3aa9198f3b7743 100644
--- a/unittests/test_utilities.py
+++ b/unittests/test_utilities.py
@@ -19,7 +19,10 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 
+import pytest
+
 from caoscrawler.crawl import split_restricted_path
+from caoscrawler.utils import MissingImport
 
 
 def test_split_restricted_path():
@@ -33,3 +36,33 @@ def test_split_restricted_path():
     assert split_restricted_path("/test//bla") == ["test", "bla"]
     assert split_restricted_path("//test/bla") == ["test", "bla"]
     assert split_restricted_path("///test//bla////") == ["test", "bla"]
+
+
+def test_dummy_class():
+    Missing = MissingImport(name="Not Important", hint="Do the thing instead.")
+    with pytest.raises(RuntimeError) as err_info_1:
+        print(Missing.__name__)
+    with pytest.raises(RuntimeError) as err_info_2:
+        Missing()
+    with pytest.raises(RuntimeError) as err_info_3:
+        print(Missing.foo)
+
+    for err_info in (err_info_1, err_info_2, err_info_3):
+        msg = str(err_info.value)
+        assert "(Not Important)" in msg
+        assert msg.endswith("Do the thing instead.")
+
+    MissingErr = MissingImport(name="Not Important", hint="Do the thing instead.",
+                               err=ImportError("Old error"))
+    with pytest.raises(RuntimeError) as err_info_1:
+        print(MissingErr.__name__)
+    with pytest.raises(RuntimeError) as err_info_2:
+        MissingErr()
+    with pytest.raises(RuntimeError) as err_info_3:
+        print(MissingErr.foo)
+
+    for err_info in (err_info_1, err_info_2, err_info_3):
+        msg = str(err_info.value)
+        assert "(Not Important)" in msg
+        orig_msg = str(err_info.value.__cause__)
+        assert orig_msg == "Old error"
diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py
index 09f78df661d82970e7264996102eff8881ee19ec..90d144b04a4e1271f74b769759e3f201007af705 100644
--- a/unittests/test_variable_substitutions.py
+++ b/unittests/test_variable_substitutions.py
@@ -25,7 +25,7 @@ from os.path import basename, dirname, join
 from pathlib import Path
 from unittest.mock import MagicMock, Mock
 
-import caosdb as db
+import linkahead as db
 import pytest
 import yaml
 from caoscrawler import Crawler
@@ -35,7 +35,7 @@ from caoscrawler.identifiable_adapters import (IdentifiableAdapter,
 from caoscrawler.scanner import scan_directory
 from caoscrawler.structure_elements import (DictListElement, DictTextElement,
                                             File)
-from caosdb.apiutils import compare_entities
+from linkahead.apiutils import compare_entities
 from pytest import raises
 
 from utils import dircheckstr as dircheckstr_base