diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9036ad3c8fff8eb873e75221cdd0c0bdb2a92498..d28cb8be7569b470531961f863ad8f08fa40aec2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed ###
 
 ### Fixed ###
+- usage of ID when looking for identified records
 - Query generation when there are only backrefs or backrefs and a name
 
 ### Security ###
diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index b33974d9c2c5600bf2a91cbf14d7c8799ffc2644..15aee62bbb9bf253607dc0bb04c44f3baae2548d 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -45,6 +45,7 @@ import yaml
 from caosdb.utils.register_tests import clear_database, set_test_key
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
+#TODO move test related stuff here and remove it from unittests
 
 def rfp(*pathcomponents):
     """
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 7542a6592c0abfeb03056ea9ef5f230ecac7564a..7ea1cc20537060d1f94b9e9c9b233141acc0f565 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -833,7 +833,6 @@ class Crawler(object):
         to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
         referencing_entities = self.create_reference_mapping(to_be_updated + to_be_inserted)
 
-        # TODO: refactoring of typo
         for el in to_be_updated:
             # all entity objects are replaced by their IDs except for the not yet inserted ones
             self.replace_entities_with_ids(el)
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index 241685b5cfe9d87acad16e0c6a871d9ea6ad79e3..9122cc4a882b94e3a6bba27921f59bc4bbc9d9a0 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -27,6 +27,7 @@ from __future__ import annotations
 import yaml
 
 from datetime import datetime
+from caosdb.cached import cached_get_entity_by
 from typing import Any
 from .identifiable import Identifiable
 import caosdb as db
@@ -264,8 +265,6 @@ identifiabel, identifiable and identified record) for a Record.
         """
         pass
 
-    # TODO: remove side effect
-    # TODO: use ID if record has one?
     def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None):
         """
         This function combines all functionality of the IdentifierAdapter by
@@ -275,10 +274,12 @@ identifiabel, identifiable and identified record) for a Record.
         In case there was no appropriate registered identifiable or no identifiable could
         be found return value is None.
         """
-        identifiable = self.get_identifiable(record, referencing_entities=referencing_entities)
+        if record.path is not None:
+            return cached_get_entity_by(path=record.path)
+        if record.id is not None:
+            return cached_get_entity_by(eid=record.id)
 
-        if identifiable.path is not None:
-            return self.get_file(identifiable)
+        identifiable = self.get_identifiable(record, referencing_entities=referencing_entities)
 
         return self.retrieve_identified_record_for_identifiable(identifiable)
 
@@ -450,6 +451,10 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
         self._registered_identifiables[name] = definition
 
     def get_file(self, identifiable: Identifiable):
+        # TODO is this needed for Identifiable?
+        # or can we get rid of this function?
+        if isinstance(identifiable, db.Entity):
+            return cached_get_entity_by(path=identifiable)
         if identifiable.path is None:
             raise RuntimeError("Path must not be None for File retrieval.")
         candidates = db.execute_query("FIND File which is stored at '{}'".format(
diff --git a/unittests/example_cfood.yml b/unittests/example_cfood.yml
new file mode 100644
index 0000000000000000000000000000000000000000..713bd4be0f3c816e1e8c8b7a057b30a4b400f13c
--- /dev/null
+++ b/unittests/example_cfood.yml
@@ -0,0 +1,47 @@
+---
+metadata:
+  crawler-version: 0.3.1
+---
+Definitions:
+  type: Definitions
+
+data:
+  type: Dict
+  match_name: '.*'
+  subtree:
+    Expiments:
+      type: ListElement
+      match_name: 'Experiments'
+      subtree:
+        Experiment:
+          type: DictElement
+          match: '.*'
+          records:
+            Ent:
+              parents: ["Experiment"]
+          subtree: &date_res
+            date:
+              type: Date
+              match_name: 'date'
+              match_value: '(?P<date>.*)'
+              records:
+                Ent:
+                  date: $date
+            result:
+              type: TextElement
+              match_name: 'result'
+              match_value: '(?P<res>.*)'
+              records:
+                Ent:
+                  result: $res
+    Analyses:
+      type: ListElement
+      match_name: 'Analyses'
+      subtree:
+        Analysis:
+          type: DictElement
+          match: '.*'
+          records:
+            Ent:
+              parents: ["Analysis"]
+          subtree: *date_res
diff --git a/unittests/example_datastructure.yml b/unittests/example_datastructure.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ec9d4575c7216fe5b8954db22cd9f2d03a7e749
--- /dev/null
+++ b/unittests/example_datastructure.yml
@@ -0,0 +1,10 @@
+Experiments:
+  - date: 2022-02-01
+    result: FAIL
+  - date: 2022-02-02
+    result: SUCCESS
+Analyses:
+  - date: 2022-03-01
+    result: homogeneous
+  - date: 2022-03-02
+    result: heterogeneous
diff --git a/unittests/example_identifiables.yml b/unittests/example_identifiables.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7e7da0a74cc202178bcae8a70be52d85f660d4e6
--- /dev/null
+++ b/unittests/example_identifiables.yml
@@ -0,0 +1,4 @@
+Experiment:
+  - date
+Analysis:
+  - date
diff --git a/unittests/simulated_server_data.py b/unittests/simulated_server_data.py
deleted file mode 100644
index dd0c6b4e8693d64c9d96cafc5db2f447613daa1b..0000000000000000000000000000000000000000
--- a/unittests/simulated_server_data.py
+++ /dev/null
@@ -1,24 +0,0 @@
-
-import caosdb as db
-data_model = {"person": (db.RecordType(id=259, name="Person")
-                         .add_property(name="first_name")
-                         .add_property(name="last_name")),
-              "measurement": (db.RecordType(id=278, name="Measurement")
-                              .add_property(name="identifier")
-                              .add_property(name="date")
-                              .add_property(name="project")),
-              "project": (db.RecordType(id=250, name="Project")
-                          .add_property(name="date")
-                          .add_property(name="identifier")),
-              "first_name": db.Property(name="first_name", datatype=db.TEXT, id=261),
-              "responsible": db.Property(name="responsible", datatype="Person", id=249),
-              "last_name": db.Property(name="last_name", datatype=db.TEXT, id=262),
-              "identifier": db.Property(name="identifier", datatype=db.TEXT, id=248),
-              "date": db.Property(name="date", datatype=db.DATETIME, id=247),
-              }
-existing_data = {
-}
-
-full_data = {}
-full_data.update(data_model)
-full_data.update(existing_data)
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 154724be6d126aefb430c7d0600b86a5ec721812..ab5710feaaf14babc3fed65f10598250e53ffd9b 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -23,31 +23,35 @@
 """
 test the converters module
 """
+import datetime
+import importlib
 import json
-import yaml
 import logging
-import sys
-import importlib
 import os
+import sys
 from itertools import product
-import datetime
+from pathlib import Path
+
 import pytest
 import yaml
-
-from caoscrawler.converters import (Converter, ConverterValidationError, DictElementConverter,
-                                    DirectoryConverter, DictIntegerElementConverter,
-                                    handle_value, MarkdownFileConverter, DateElementConverter,
-                                    FloatElementConverter, IntegerElementConverter,
-                                    JSONFileConverter, YAMLFileConverter)
-from caoscrawler.converters import _AbstractScalarValueElementConverter
+from caoscrawler.converters import (Converter, ConverterValidationError,
+                                    DateElementConverter, DictElementConverter,
+                                    DictIntegerElementConverter,
+                                    DirectoryConverter, FloatElementConverter,
+                                    IntegerElementConverter, JSONFileConverter,
+                                    MarkdownFileConverter, YAMLFileConverter,
+                                    _AbstractScalarValueElementConverter,
+                                    handle_value)
 from caoscrawler.crawl import Crawler
+from caoscrawler.scanner import (_load_definition_from_yaml_dict,
+                                 create_converter_registry, load_definition)
 from caoscrawler.stores import GeneralStore
-from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement,
-                                            BooleanElement, IntegerElement,
-                                            FloatElement, Directory)
-from caoscrawler.scanner import load_definition, _load_definition_from_yaml_dict, create_converter_registry
+from caoscrawler.structure_elements import (BooleanElement, DictElement,
+                                            Directory, File, FloatElement,
+                                            IntegerElement, ListElement,
+                                            TextElement)
 
-from test_tool import rfp
+UNITTESTDIR = Path(__file__).parent
 
 
 @pytest.fixture
@@ -108,7 +112,7 @@ def testDirectoryConverter(converter_registry):
         },
         name="Test", converter_registry=converter_registry)
     elements = dc.create_children(GeneralStore(),
-                                  Directory("test_directories", rfp("test_directories")))
+                                  Directory("test_directories", UNITTESTDIR / "test_directories"))
 
     # Check whether the right structure elements were created
     # this has been updated, there are more directories now
@@ -125,17 +129,16 @@ def testDirectoryConverter(converter_registry):
 def test_markdown_converter(converter_registry):
     test_readme = File(
         "README.md",
-        rfp(
-            "test_directories", "examples_article", "DataAnalysis",
-            "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"
-        )
+        UNITTESTDIR /
+        "test_directories" / "examples_article" / "DataAnalysis" /
+        "2020_climate-model-predict" / "2020-02-08_prediction-errors" / "README.md"
     )
 
     converter = MarkdownFileConverter({"match": "(.*)"}, "TestMarkdownFileConverter",
                                       converter_registry)
 
     with pytest.raises(ConverterValidationError) as err:
-        converter.create_children(None, File("test_tool.py", rfp("test_tool.py")))
+        converter.create_children(None, File("test_tool.py", UNITTESTDIR / "test_crawler.py"))
 
     m = converter.match(test_readme)
     assert m is not None
@@ -163,8 +166,8 @@ def test_markdown_converter(converter_registry):
 
     test_readme2 = File(
         "README.md",
-        rfp("test_directories", "examples_article",
-            "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")
+        UNITTESTDIR/"test_directories" / "examples_article" /
+        "ExperimentalData" / "2020_SpeedOfLight" / "2020-01-01_TimeOfFlight" / "README.md"
     )
 
     m = converter.match(test_readme2)
@@ -183,8 +186,8 @@ def test_markdown_converter(converter_registry):
 
 
 def test_json_converter(converter_registry):
-    test_json = File("testjson.json", rfp(
-        "test_directories", "examples_json", "testjson.json"))
+    test_json = File("testjson.json", UNITTESTDIR /
+                     "test_directories" / "examples_json" / "testjson.json")
 
     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                "test_directories", "examples_json", "testjson.schema.json")
@@ -241,7 +244,7 @@ def test_json_converter(converter_registry):
 
     invalid_json = File(
         "invalidjson.json",
-        rfp("test_directories", "examples_json", "invalidjson.json")
+        UNITTESTDIR/"test_directories" / "examples_json" / "invalidjson.json"
     )
     # Doesn't validate because of missing required 'name' property
     with pytest.raises(ConverterValidationError) as err:
@@ -250,15 +253,15 @@ def test_json_converter(converter_registry):
 
     broken_json = File(
         "brokenjson.json",
-        rfp("test_directories", "examples_json", "brokenjson.json")
+        UNITTESTDIR/"test_directories" / "examples_json" / "brokenjson.json"
     )
     with pytest.raises(json.decoder.JSONDecodeError) as err:
         jsonconverter.create_children(None, broken_json)
 
 
 def test_yaml_converter(converter_registry):
-    test_yaml = File("testyaml.yml", rfp(
-        "test_directories", "test_yamls", "testyaml.yml"))
+    test_yaml = File("testyaml.yml", UNITTESTDIR /
+                     "test_directories" / "test_yamls" / "testyaml.yml")
 
     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                "test_directories", "test_yamls", "testyaml.schema.json")
@@ -315,7 +318,7 @@ def test_yaml_converter(converter_registry):
 
     invalid_yaml = File(
         "invalidyaml.yml",
-        rfp("test_directories", "test_yamls", "invalidyaml.yml")
+        UNITTESTDIR/"test_directories" / "test_yamls" / "invalidyaml.yml"
     )
 
     # Doesn't validate because of missing required 'name' property
@@ -325,7 +328,7 @@ def test_yaml_converter(converter_registry):
 
     broken_yaml = File(
         "brokenyaml.yml",
-        rfp("test_directories", "test_yamls", "brokenyaml.yml")
+        UNITTESTDIR/"test_directories" / "test_yamls" / "brokenyaml.yml"
     )
     with pytest.raises(yaml.parser.ParserError) as err:
         yamlconverter.create_children(None, broken_yaml)
@@ -361,12 +364,9 @@ def test_variable_replacement():
 
 
 def test_filter_children_of_directory(converter_registry, capsys):
-    """Verify that children (i.e., files) in a directory are filtered or sorted
-    correctly.
-
-    """
-    test_dir = Directory("examples_filter_children", rfp(
-        "test_directories", "examples_filter_children"))
+    """Verify that children (i.e., files) in a directory are filtered or sorted correctly. """
+    test_dir = Directory("examples_filter_children", UNITTESTDIR /
+                         "test_directories" / "examples_filter_children")
 
     dc = DirectoryConverter(
         definition={
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
index f3ad73c5d75acea5fd3e92954e3899983ea73a2a..1722ec0fe3291d6e96042870d65af4e249671e82 100644
--- a/unittests/test_crawler.py
+++ b/unittests/test_crawler.py
@@ -24,18 +24,85 @@
 test the Crawler class
 """
 import json
+import logging
 import os
+import warnings
+from copy import deepcopy
+from functools import partial
+from os.path import basename, dirname, join
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
 
+import caosdb as db
+import caosdb.common.models as dbmodels
+import pytest
+import yaml
+from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
+                               crawler_main, split_restricted_path)
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
+                                               IdentifiableAdapter,
+                                               LocalStorageIdentifiableAdapter)
+from caoscrawler.scanner import (create_converter_registry, scan_directory,
+                                 scan_structure_elements)
+from caoscrawler.stores import GeneralStore, RecordStore
+from caoscrawler.structure_elements import (DictElement, DictListElement,
+                                            DictTextElement, File)
+from caosdb.apiutils import compare_entities
+from caosdb.cached import cache_clear
+from caosdb.exceptions import EmptyUniqueQueryError
 from pytest import raises
 
-import caosdb as db
+UNITTESTDIR = Path(__file__).parent
 
-from caoscrawler.stores import GeneralStore
-from caoscrawler.crawl import Crawler
-import warnings
+EXAMPLE_SERVER_STATE = [
+    db.Property(id=1, name='result', datatype=db.TEXT),
+    db.Property(id=2, name='date', datatype=db.DATETIME),
+    db.RecordType(id=3, name="Experiment"),
+    db.RecordType(id=4, name="Analysis"),
+    db.Record(id=5)
+    .add_parent(name="Experiment", id=3)
+    .add_property(name="date", value="2022-02-01")
+    .add_property(name="result", value="FAIL"),
+    db.Record(id=6)
+    .add_parent(name="Experiment", id=3)
+    .add_property(name="date", value="2022-02-02")
+    .add_property(name="result", value="SUCCESS"),
+    db.Record(id=7)
+    .add_parent(name="Analysis", id=4)
+    .add_property(name="date", value="2022-03-01")
+    .add_property(name="result", value="homogeneous"),
+    db.Record(id=8)
+    .add_parent(name="Analysis", id=4)
+    .add_property(name="date", value="2022-03-02")
+    .add_property(name="result", value="heterogeneous"),
+]
+NEW_ELEMENT = (db.Record()
+               .add_parent(name="Analysis", id=4)
+               .add_property(name="date", value="2022-03-05")  # new date
+               .add_property(name="result", value="homogeneous"))
+
+
+def mock_get_entity_by(eid=None, name=None):
+    if eid is not None:
+        candidates = [el for el in EXAMPLE_SERVER_STATE if el.id == eid]
+        if len(candidates) > 0:
+            return candidates[0]
+        else:
+            raise EmptyUniqueQueryError("")
+    if name is not None:
+        candidates = [el for el in EXAMPLE_SERVER_STATE
+                      if (el.name is not None and el.name.lower() == name.lower())]
+        if len(candidates) > 0:
+            return candidates[0]
+        else:
+            raise EmptyUniqueQueryError("")
 
-from test_tool import rfp
-import pytest
+
+@pytest.fixture(autouse=True)
+def clear_cache():
+    cache_clear()
 
 
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
@@ -61,7 +128,7 @@ def test_deprecated_functions():
         warnings.filterwarnings("ignore")
         warnings.filterwarnings("always", category=DeprecationWarning)
         cr = Crawler()
-        cr.crawl_directory(".", rfp("scifolder_cfood.yml"))
+        cr.crawl_directory(UNITTESTDIR, UNITTESTDIR/"scifolder_cfood.yml")
         print(w)
         print(w[0].message)
         assert issubclass(w[-1].category, DeprecationWarning)
@@ -74,3 +141,691 @@ def test_deprecated_functions():
         cr.crawled_data
         assert issubclass(w[-1].category, DeprecationWarning)
         assert "The use of self.crawled_data is depricated" in str(w[-1].message)
+
+
+def test_remove_unnecessary_updates():
+    # test trvial case
+    upl = [db.Record().add_parent("A")]
+    irs = [db.Record().add_parent("A")]
+    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    assert len(updates) == 0
+
+    # test property difference case
+    # TODO this should work right?
+    # upl = [db.Record().add_parent("A").add_property("a", 3)]
+    # irs = [db.Record().add_parent("A")]  # ID should be s
+    # Crawler.remove_unnecessary_updates(upl, irs)
+    # assert len(upl) == 1
+
+    # test value difference case
+    upl = [db.Record().add_parent("A").add_property("a", 5)]
+    irs = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    assert len(updates) == 1
+    upl = [db.Record().add_parent("A").add_property("a", 5)]
+    irs = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    assert len(updates) == 0
+
+    # test unit difference case
+    upl = [db.Record().add_parent("A").add_property("a", unit='cm')]
+    irs = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    assert len(updates) == 1
+
+    # test None difference case
+    upl = [db.Record().add_parent("A").add_property("a")]
+    irs = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    assert len(updates) == 1
+
+
+def test_split_into_inserts_and_updates_trivial():
+    crawler = Crawler()
+    crawler.split_into_inserts_and_updates([])
+
+
+def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
+    """ returns a stored Record if rec.name is an existing key, None otherwise """
+    if rec.name in known:
+        return known[rec.name]
+    else:
+        return None
+
+
+@pytest.fixture
+def crawler_mocked_identifiable_retrieve():
+    crawler = Crawler()
+    # TODO use minimal setup
+    # mock retrieval of registered identifiabls: return Record with just a parent
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent(x.parents[0].name))
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+        basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+    return crawler
+
+
+def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
+    crawler = crawler_mocked_identifiable_retrieve
+    identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
+    entlist = [db.Record(name="A").add_parent(
+        "C"), db.Record(name="B").add_parent("C")]
+
+    assert crawler.get_from_any_cache(identlist[0]) is None
+    assert crawler.get_from_any_cache(identlist[1]) is None
+    assert not crawler._has_reference_value_without_id(identlist[0])
+    assert not crawler._has_reference_value_without_id(identlist[1])
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[0]).id == 1111
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[1]) is None
+
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    assert len(insert) == 1
+    assert insert[0].name == "B"
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+
+
+def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve):
+    crawler = crawler_mocked_identifiable_retrieve
+    a = db.Record(name="A").add_parent("C")
+    b = db.Record(name="B").add_parent("C")
+    b.add_property("A", a)
+    # This is identical to a and should be removed
+    c = db.Record(name="A").add_parent("C")
+    entlist = [a, b, c]
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    assert len(insert) == 1
+    assert insert[0].name == "B"
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+
+
+def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve):
+    crawler = crawler_mocked_identifiable_retrieve
+    # try it with a reference
+    a = db.Record(name="A").add_parent("C")
+    b = db.Record(name="B").add_parent("C")
+    b.add_property("A", a)
+    entlist = [a, b]
+    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    assert len(insert) == 1
+    assert insert[0].name == "B"
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+
+
+def test_split_into_inserts_and_updates_with_circ():
+    # try circular
+    a = db.Record(name="A").add_parent("C")
+    b = db.Record(name="B").add_parent("C")
+    b.add_property("A", a)
+    a.add_property("B", b)
+    entlist = [a, b]
+    # TODO this does not seem to be complete!
+
+
+def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve):
+    crawler = crawler_mocked_identifiable_retrieve
+    #      A
+    #      ^
+    #      |
+    # F <- B <- G
+    a = db.Record(name="A").add_parent("C").add_property(
+        'd', 13).add_property('e', "lskdjlsfdj")
+    b = db.Record(name="B").add_parent("C")
+    g = db.Record(name="G").add_parent("C")
+    f = db.Record(name="F").add_parent("C")
+    g.add_property("A", a)
+    b.add_property("A", f)
+    b.add_property("A", a)
+    entlist = [a, b, g]
+    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    assert len(insert) == 3
+    assert "B" in [el.name for el in insert]
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+
+    # TODO write test where the unresoled entity is not part of the identifiable
+
+
+def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve):
+    crawler = crawler_mocked_identifiable_retrieve
+    # assume identifiable is only the name
+    a = db.Record(name="A").add_parent("C")
+    a.add_property("foo", 1)
+    b = db.Record(name="A").add_parent("C")
+    b.add_property("bar", 2)
+    entlist = [a, b]
+    insert, update = crawler.split_into_inserts_and_updates(entlist)
+
+    assert update[0].get_property("bar").value == 2
+    assert update[0].get_property("foo").value == 1
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
+
+
+def test_has_missing_object_in_references():
+    crawler = Crawler()
+    # Simulate remote server content by using the names to identify records
+    # There are only two known Records with name A and B
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
+        basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
+                                               .add_property("d"),
+                                               "D": db.Record(name="D").add_parent("RTD")
+                                               .add_property("d").add_property("e"),
+                                               }))
+
+    # one reference with id -> check
+    assert not crawler._has_missing_object_in_references(
+        Identifiable(name="C", record_type="RTC", properties={'d': 123}), [])
+    # one ref with Entity with id -> check
+    assert not crawler._has_missing_object_in_references(
+        Identifiable(name="C", record_type="RTC", properties={'d': db.Record(id=123)
+                                                              .add_parent("C")}), [])
+    # one ref with id one with Entity with id (mixed) -> check
+    assert not crawler._has_missing_object_in_references(
+        Identifiable(name="C", record_type="RTD",
+                     properties={'d': 123, 'b': db.Record(id=123).add_parent("RTC")}), [])
+    # entity to be referenced in the following
+    a = db.Record(name="C").add_parent("C").add_property("d", 12311)
+    # one ref with id one with Entity without id (but not identifying) -> fail
+    assert not crawler._has_missing_object_in_references(
+        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}), [])
+
+    # one ref with id one with Entity without id (mixed) -> fail
+    assert not crawler._has_missing_object_in_references(
+        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
+
+    crawler.add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC",
+                                                        properties={'d': 12311}))
+    # one ref with id one with Entity without id but in cache -> check
+    assert crawler._has_missing_object_in_references(
+        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
+
+    # if this ever fails, the mock up may be removed
+    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+
+
+@pytest.mark.xfail()
+def test_references_entities_without_ids():
+    crawler = Crawler()
+    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                       .add_property('last_name', 123)
+                                                       .add_property('first_name', 123))
+    # id and rec with id
+    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                       .add_property('first_name', 123)
+                                                       .add_property('last_name',
+                                                                     db.Record(id=123)))
+    # id and rec with id and one unneeded prop
+    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                   .add_property('first_name', 123)
+                                                   .add_property('stuff', db.Record())
+                                                   .add_property('last_name', db.Record(id=123)))
+
+    # one identifying prop is missing
+    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                   .add_property('first_name', 123)
+                                                   .add_property('last_name', db.Record()))
+
+
+def test_replace_entities_with_ids():
+    crawler = Crawler()
+    a = (db.Record().add_parent("B").add_property("A", 12345)
+         .add_property("B", db.Record(id=12345))
+         .add_property("C", [db.Record(id=12345), 233324]))
+
+    crawler.replace_entities_with_ids(a)
+    assert a.get_property("A").value == 12345
+    assert a.get_property("B").value == 12345
+    assert a.get_property("C").value == [12345, 233324]
+
+
+def reset_mocks(mocks):
+    for mock in mocks:
+        mock.reset_mock()
+
+
+def mock_retrieve_record(identifiable: Identifiable):
+    """ assumes that the identifiable is always only the date"""
+
+    for record in EXAMPLE_SERVER_STATE:
+        if (record.role == "Record"
+                and record.get_property("date").value == identifiable.properties['date']):
+            return record
+    return None
+
+
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
+def test_synchronization_no_commit(upmock, insmock):
+    crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
+    # change  one; add one
+    crawled_data[-1].get_property('result').value = "wst"
+    crawled_data.append(NEW_ELEMENT.copy())
+
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(UNITTESTDIR/"example_identifiables.yml")
+    crawler = Crawler(securityMode=SecurityMode.UPDATE, identifiableAdapter=ident)
+    ins, ups = crawler.synchronize(commit_changes=False, crawled_data=crawled_data)
+    insmock.assert_not_called()
+    upmock.assert_not_called()
+    assert len(ins) == 1
+    assert len(ups) == 1
+
+
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
+@patch("caoscrawler.crawl.UpdateCache.insert")
+def test_security_mode(updateCacheMock, upmock, insmock):
+    # trivial case: nothing to do
+    crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
+    print(crawled_data)
+    crawler = Crawler(securityMode=SecurityMode.RETRIEVE)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.assert_not_called()
+    upmock.assert_not_called()
+    updateCacheMock.assert_not_called()
+
+    # RETRIEVE: insert only
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(UNITTESTDIR/"example_identifiables.yml")
+    crawler = Crawler(securityMode=SecurityMode.RETRIEVE, identifiableAdapter=ident)
+
+    # add a new entity
+    crawled_data.append(NEW_ELEMENT.copy())
+
+    # insert forbidden
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.assert_not_called()
+    upmock.assert_not_called()
+    assert updateCacheMock.call_count == 1
+    # reset counts
+    reset_mocks([updateCacheMock, insmock, upmock])
+    # remove new record again
+    crawled_data.pop()
+
+    # RETRIEVE: update only
+    crawler = Crawler(securityMode=SecurityMode.RETRIEVE)
+    # change one element
+    crawled_data[-1].get_property('result').value = "wst"
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.assert_not_called()
+    upmock.assert_not_called()
+    # import IPython
+    # IPython.embed()
+    # print(updateCacheMock.call_args_list)
+    assert updateCacheMock.call_count == 1
+    # reset counts
+    reset_mocks([updateCacheMock, insmock, upmock])
+    # reset value
+    crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy()
+
+    # INSERT: insert only
+    # add one element
+    crawled_data.append(NEW_ELEMENT.copy())
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(UNITTESTDIR/"example_identifiables.yml")
+    crawler = Crawler(securityMode=SecurityMode.INSERT, identifiableAdapter=ident)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.assert_called_once()
+    upmock.assert_not_called()
+    updateCacheMock.assert_not_called()
+    # reset counts
+    reset_mocks([updateCacheMock, insmock, upmock])
+    # remove new record again
+    crawled_data.pop()
+
+    # INSERT: update only
+    crawler = Crawler(securityMode=SecurityMode.INSERT, identifiableAdapter=ident)
+    # change one element
+    crawled_data[-1].get_property('result').value = "wst"
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.assert_not_called()
+    upmock.assert_not_called()
+    updateCacheMock.assert_called_once()
+    # reset counts
+    reset_mocks([updateCacheMock, insmock, upmock])
+    # reset value
+    crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy()
+
+    # INSERT: insert and update
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(UNITTESTDIR/"example_identifiables.yml")
+    crawler = Crawler(securityMode=SecurityMode.INSERT, identifiableAdapter=ident)
+    # change  one; add one
+    crawled_data[-1].get_property('result').value = "wst"
+    crawled_data.append(NEW_ELEMENT.copy())
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
+    assert crawler.run_id is not None
+    insmock.asser_called_once()
+    upmock.assert_not_called()
+    updateCacheMock.assert_called_once()
+    # reset counts
+    reset_mocks([updateCacheMock, insmock, upmock])
+    # restore original ident
+    crawled_data.pop()
+    crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy()
+
+
+def test_create_reference_mapping():
+    a = db.Record().add_parent("A")
+    b = db.Record().add_parent("B").add_property('a', a)
+    ref = Crawler.create_reference_mapping([a, b])
+    assert id(a) in ref
+    assert id(b) not in ref
+    assert "B" in ref[id(a)]
+    assert ref[id(a)]["B"] == [b]
+
+
+def test_create_flat_list():
+    a = db.Record()
+    b = db.Record()
+    a.add_property(name="a", value=a)
+    a.add_property(name="b", value=b)
+    flat = Crawler.create_flat_list([a])
+    assert len(flat) == 2
+    assert a in flat
+    assert b in flat
+    c = db.Record()
+    c.add_property(name="a", value=a)
+    # This would caus recursion if it is not dealt with properly.
+    a.add_property(name="c", value=c)
+    flat = Crawler.create_flat_list([c])
+    assert len(flat) == 3
+    assert a in flat
+    assert b in flat
+    assert c in flat
+
+
+@ pytest.fixture
+def crawler_mocked_for_backref_test():
+    crawler = Crawler()
+    # mock retrieval of registered identifiabls: return Record with just a parent
+
+    def get_reg_ident(x):
+        if x.parents[0].name == "C":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR"])
+        elif x.parents[0].name == "D":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR", "BR2"])
+        else:
+            return db.Record().add_parent(x.parents[0].name)
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+        basic_retrieve_by_name_mock_up, known={"A":
+                                               db.Record(id=1111, name="A").add_parent("BR")}))
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A":
+                                                   db.Record(id=1111, name="A").add_parent("BR")}))
+    return crawler
+
+
+def test_validation_error_print(caplog):
+    caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
+    # there should be no server interaction since we only test the behavior if a validation error
+    # occurs during the data collection stage
+    DATADIR = os.path.join(os.path.dirname(__file__), "test_data", "failing_validation")
+    for fi in ["cfood.yml", "cfood2.yml"]:
+        ret = crawler_main(DATADIR,
+                           os.path.join(DATADIR, fi),
+                           os.path.join(DATADIR, "identifiables.yml"),
+                           True,
+                           None,
+                           False)
+        assert "Couldn't validate" in caplog.text
+        caplog.clear()
+
+
+def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
+    crawler = crawler_mocked_for_backref_test
+    identlist = [Identifiable(name="A", record_type="BR"),
+                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
+
+    # Test without referencing object
+    # currently a NotImplementedError is raised if necessary properties are missing.
+    with raises(NotImplementedError):
+        crawler.split_into_inserts_and_updates([db.Record(name="B").add_parent("C")])
+
+    # identifiables were not yet checked
+    assert crawler.get_from_any_cache(identlist[0]) is None
+    assert crawler.get_from_any_cache(identlist[1]) is None
+    # one with reference, one without
+    assert not crawler._has_reference_value_without_id(identlist[0])
+    assert crawler._has_reference_value_without_id(identlist[1])
+    # one can be found remotely, one not
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[0]).id == 1111
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[1]) is None
+
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    # A was found remotely and is therefore in the update list
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # B does not exist on the (simulated) remote server
+    assert len(insert) == 1
+    assert insert[0].name == "B"
+
+
+def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the same record type are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced,
+               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(name="C").add_parent("BR").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    referencing_entities = crawler.create_reference_mapping(entlist)
+    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    assert len(update) == 1
+    assert len(insert) == 2
+
+
+def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the different record types are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("D")
+    entlist = [referenced,
+               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(name="A").add_parent("BR2").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    referencing_entities = crawler.create_reference_mapping(entlist)
+    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    assert len(update) == 2
+    assert len(insert) == 1
+
+
+def mock_create_values(values, element):
+    pass
+
+
+@patch("caoscrawler.converters.IntegerElementConverter.create_values")
+def test_restricted_path(create_mock):
+    """
+    The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
+    sure, that is that argument is provided, ideed only the given path of the tree is traversed.
+
+    The check is done using the mock of the create_values function of the IntegerElementConverter.
+    This function is only called if elements are being treated.
+    """
+    crawler_definition = {
+        "DictTest": {
+            "type": "DictElement",
+            "match": "(.*)",
+            "subtree": {
+                "nextdict": {
+                    "type": "DictElement",
+                    "match": "(.*)",
+                    "subtree": {
+                        "int_element": {
+                            "type": "IntegerElement",
+                            "match_name": ".*",
+                            "match_value": "(?P<int_value>.*)",
+                            "records": {
+                                "Dataset": {
+                                    "Subject": "$int_value"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    crawler = Crawler()
+    converter_registry = create_converter_registry(crawler_definition)
+
+    # This structure is crawled
+    test_dict = {
+        "v1": {
+            "a": 1,
+            "b": 2,
+        },
+        "v2": {
+            "c": 3,
+            "d": 4,
+        }
+    }
+    # first test without a restricted_path
+    restricted_path = None
+    records = scan_structure_elements(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 4
+    create_mock.reset_mock()
+
+    # test with a restricted_path but one that has no effect (single root element)
+    # this also tests that the remainder of the tree is fully traversed
+    restricted_path = ["TestDict"]
+    records = scan_structure_elements(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 4
+    create_mock.reset_mock()
+
+    # test with a restricted_path that restricts the tree (single root element)
+    restricted_path = ["TestDict", "v2"]
+    records = scan_structure_elements(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 2
+    create_mock.reset_mock()
+
+    # test with a restricted_path that contains a bad element
+    restricted_path = ["TestDict", "v3"]
+    with raises(RuntimeError):
+        records = scan_structure_elements(
+            DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+            restricted_path
+        )
+
+
+def test_split_restricted_path():
+    assert ["el"] == split_restricted_path("/el")
+    assert ["el"] == split_restricted_path("/el/")
+    assert ["el", "el"] == split_restricted_path("/el/el")
+
+
+# Filter the warning because we want to have it here and this way it does not hinder running
+# tests with -Werror.
+@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
+def test_deprecated_prefix_option():
+    """Test that calling the crawler's main function with the deprecated
+    `prefix` option raises the correct errors and warnings.
+
+    """
+
+    with pytest.deprecated_call():
+        crawler_main("./", UNITTESTDIR/"scifolder_cfood.yml", prefix="to/be/removed")
+
+    # Check that crawler main terminates with an error
+    assert 1 == crawler_main("./", UNITTESTDIR/"scifolder_cfood.yml", prefix="to/be/removed",
+                             remove_prefix="to/be/removed")
+
+    with raises(ValueError) as ve:
+
+        _treat_deprecated_prefix(prefix="to/be/removed", remove_prefix="to/be/removed")
+
+    assert "(deprecated) `prefix` and the `remove_prefix`" in str(ve.value)
+
+
+def test_create_entity_summary():
+    assert "" == Crawler.create_entity_summary([]).strip()
+
+    entities = [
+        db.Record(id=1).add_parent("A"),
+        db.Record(id=4, name='a').add_parent("B"),
+        db.Record(id=5).add_parent("A"),
+        db.Record(id=6, name='b').add_parent("B"),
+    ]
+    text = Crawler.create_entity_summary(entities).strip()
+    assert 'a' in text
+    assert 'b' in text
+    assert 'A:' in text
+    assert 'B:' in text
+    assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
index aff174d0228d2750efd1cca129547c821c974127..e46bb80711cea9684dc4610393b13f0592659427 100644
--- a/unittests/test_file_identifiables.py
+++ b/unittests/test_file_identifiables.py
@@ -6,11 +6,32 @@ import caosdb as db
 
 import pytest
 from pytest import raises
+from unittest.mock import patch, Mock
 
 from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
+from caosdb.cached import cache_clear
 from caoscrawler.identifiable import Identifiable
 
+from caosdb.exceptions import EmptyUniqueQueryError
 
+
+@pytest.fixture(autouse=True)
+def clear_cache():
+    cache_clear()
+
+
+def mock_get_entity_by(eid=None, name=None, path=None):
+    if eid is not None:
+        candidates = [el for el in EXAMPLE_SERVER_STATE if el.id == eid]
+        if len(candidates) > 0:
+            return candidates[0]
+        else:
+            raise EmptyUniqueQueryError("")
+    raise EmptyUniqueQueryError("")
+
+
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
 def test_file_identifiable():
     ident = LocalStorageIdentifiableAdapter()
 
@@ -31,7 +52,8 @@ def test_file_identifiable():
         file_obj != identifiable
 
     # since the path does not exist in the data in ident, the follwoing functions return None
-    assert ident.retrieve_identified_record_for_record(file_obj) is None
+    with raises(EmptyUniqueQueryError):
+        ident.retrieve_identified_record_for_record(file_obj)
     assert ident.get_file(identifiable) is None
 
     # Try again with actual files in the store:
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index 894d476d628e9a05fbc6a4f7089404c886e01cbf..57f1459de9b6cf4f3fec73b9f3c0af1ae2b87659 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -29,10 +29,16 @@ test identifiable_adapters module
 
 import os
 from datetime import datetime
-from caoscrawler.identifiable_adapters import (
-    CaosDBIdentifiableAdapter, convert_value, IdentifiableAdapter)
-from caoscrawler.identifiable import Identifiable
+from pathlib import Path
+
 import caosdb as db
+import pytest
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
+                                               IdentifiableAdapter,
+                                               convert_value)
+
+UNITTESTDIR = Path(__file__).parent
 
 
 def test_create_query_for_identifiable():
@@ -90,8 +96,7 @@ def test_create_query_for_identifiable():
 def test_load_from_yaml_file():
     ident = CaosDBIdentifiableAdapter()
     ident.load_from_yaml_definition(
-        os.path.join(os.path.dirname(__file__), "test_directories",
-                     "single_file_test_data", "identifiables.yml")
+        UNITTESTDIR / "test_directories" / "single_file_test_data" / "identifiables.yml"
     )
 
     person_i = ident.get_registered_identifiable(
@@ -118,3 +123,59 @@ def test_convert_value():
             return " a "
 
     assert convert_value(A()) == " a "
+
+
+def test_get_identifiable():
+    # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
+    # get_identifable function
+
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml")
+    r_cur = (db.Record(id=5)
+             .add_parent(name="Experiment", id=3)
+             .add_property(name="date", value="2022-02-01")
+             .add_property(name="result", value="FAIL"))
+    id_r0 = ident.get_identifiable(r_cur)
+    assert r_cur.parents[0].name == id_r0.record_type
+    assert r_cur.get_property(
+        "date").value == id_r0.properties["date"]
+    assert len(r_cur.parents) == 1
+    assert len(r_cur.properties) == 2
+    assert len(id_r0.properties) == 1
+
+
+@pytest.mark.xfail
+def test_retrieve_identified_record_for_identifiable():
+    # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
+    # retrieve_identified_record_for_identifiable function
+    idr_r0_test = ident.retrieve_identified_record_for_identifiable(id_r0)
+    idr_r0 = ident.retrieve_identified_record_for_record(r_cur)
+    assert idr_r0 == idr_r0_test
+
+    # take the first measurement in the list of records:
+    for r in ident.get_records():
+        if r.parents[0].name == "Measurement":
+            r_cur = r
+            break
+
+    id_r1 = ident.get_identifiable(r_cur)
+    assert r_cur.parents[0].name == id_r1.record_type
+    assert r_cur.get_property(
+        "identifier").value == id_r1.properties["identifier"]
+    assert r_cur.get_property("date").value == id_r1.properties["date"]
+    assert r_cur.get_property(
+        "project").value == id_r1.properties["project"]
+    assert len(r_cur.parents) == 1
+    assert len(r_cur.properties) == 4
+    assert len(id_r1.properties) == 3
+
+    idr_r1_test = ident.retrieve_identified_record_for_identifiable(id_r1)
+    idr_r1 = ident.retrieve_identified_record_for_record(r_cur)
+    assert idr_r1 == idr_r1_test
+    assert idr_r1 != idr_r0
+    assert idr_r1_test != idr_r0_test
+
+    assert len(idr_r1.properties) == 4
+    assert r_cur.get_property(
+        "responsible").value == idr_r1.get_property("responsible").value
+    assert r_cur.description == idr_r1.description
diff --git a/unittests/test_issues.py b/unittests/test_issues.py
index 46157af9225c11b79e76dd3ef856d60519a6eb9d..cbbe9cabcfd17daaf07165757351f00dc051eeab 100644
--- a/unittests/test_issues.py
+++ b/unittests/test_issues.py
@@ -28,7 +28,6 @@ from caoscrawler.crawl import Crawler
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import DictElement
-from test_tool import rfp
 
 from caoscrawler.scanner import create_converter_registry, scan_structure_elements
 
diff --git a/unittests/test_json.py b/unittests/test_json.py
index 3c120be174ff819baeeaa49ddf142cf40dba751e..fdb332df60d73dce3356a563e09ae0d02cf845b7 100644
--- a/unittests/test_json.py
+++ b/unittests/test_json.py
@@ -34,16 +34,18 @@ from pytest import raises
 import caosdb as db
 
 from caoscrawler.converters import JSONFileConverter
+from pathlib import Path
 from caoscrawler.crawl import Crawler
 from caoscrawler.structure_elements import File, JSONFile
 from caoscrawler.scanner import load_definition, create_converter_registry, scan_structure_elements
-from test_tool import rfp, dircheckstr
+
+UNITTESTDIR = Path(__file__).parent
 
 
 def test_json():
-    crawler_definition_path = rfp("test_directories", "examples_json",
-                                  "jsontest_cfood.yml")
-    json_file_path = rfp("test_directories", "examples_json", "testjson.json")
+    crawler_definition_path = (UNITTESTDIR / "test_directories" / "examples_json"
+                               / "jsontest_cfood.yml")
+    json_file_path = UNITTESTDIR / "test_directories" / "examples_json" / "testjson.json"
 
     crawler_definition = load_definition(crawler_definition_path)
     # Load and register converter packages:
@@ -68,8 +70,7 @@ def test_json():
 
 
 def test_broken_validation():
-    crawler_definition_path = rfp(
-        "broken_cfoods", "broken_validation_path.yml")
+    crawler_definition_path = UNITTESTDIR / "broken_cfoods" / "broken_validation_path.yml"
     with raises(FileNotFoundError) as err:
         crawler_definition = load_definition(crawler_definition_path)
 
diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py
index 89d94fc74ebda6aedfbee422294e99eab2216d73..57a8083c2aace830115c410e0425a8af4da17a7b 100644
--- a/unittests/test_scalars_cfood.py
+++ b/unittests/test_scalars_cfood.py
@@ -2,19 +2,21 @@
 # Tests for:
 # https://gitlab.com/caosdb/caosdb-crawler/-/issues/9
 # A. Schlemmer, 06/2021
+import os
+from pathlib import Path
 
 import pytest
-
 # The main function that is affected by this issue:
 from caoscrawler.converters import handle_value
 from caoscrawler.crawl import Crawler
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.scanner import scan_directory
 # We need the store for the above function
 from caoscrawler.stores import GeneralStore
-from caoscrawler.scanner import scan_directory
-from caoscrawler.debug_tree import DebugTree
 
+from utils import dircheckstr
 
-from test_tool import dircheckstr, rfp
+UNITTESTDIR = Path(__file__).parent
 
 
 def test_handle_value():
@@ -35,9 +37,11 @@ def test_handle_value():
 
 def test_record_structure_generation():
     dbt = DebugTree()
-    scan_directory(rfp("test_directories", "examples_article"), rfp("cfoods_scalar.yml"),
+    scan_directory(UNITTESTDIR/"test_directories" / "examples_article",
+                   UNITTESTDIR/"cfoods_scalar.yml",
                    debug_tree=dbt)
-    subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
+    subd = dbt.debug_tree[dircheckstr(
+        UNITTESTDIR/"test_directories" / "examples_article", "DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
     if "Data" in subd[0]:
diff --git a/unittests/test_scanner.py b/unittests/test_scanner.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d0a1e042c4483af608140df521cd9c087c71c70
--- /dev/null
+++ b/unittests/test_scanner.py
@@ -0,0 +1,169 @@
+
+import json
+import logging
+import os
+import warnings
+from copy import deepcopy
+from functools import partial
+from os.path import basename, dirname, join
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from unittest.mock import MagicMock, Mock, patch
+
+import caosdb as db
+import caosdb.common.models as dbmodels
+import pytest
+import yaml
+from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
+                               crawler_main, split_restricted_path)
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
+                                               IdentifiableAdapter,
+                                               LocalStorageIdentifiableAdapter)
+from caoscrawler.scanner import (create_converter_registry, load_definition,
+                                 scan_directory, scan_structure_elements)
+from caoscrawler.stores import GeneralStore, RecordStore
+from caoscrawler.structure_elements import (DictElement, DictListElement,
+                                            DictTextElement, File)
+from caosdb.apiutils import compare_entities
+from caosdb.cached import cache_clear
+from caosdb.exceptions import EmptyUniqueQueryError
+from pytest import raises
+
+from utils import dircheckstr as dircheck_base
+
+UNITTESTDIR = Path(__file__).parent
+
+dircheckstr = partial(dircheck_base, UNITTESTDIR/"test_directories" / "examples_article")
+
+
+def test_scan_structure_elements():
+    tmpfi = NamedTemporaryFile(delete=False)
+    with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
+        data = yaml.load(f, Loader=yaml.SafeLoader)
+
+    crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
+    converter_registry = create_converter_registry(crawler_definition)
+    recs = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
+                                   converter_registry)
+    assert len(recs) == 4
+
+
+def test_provenance_debug_data():
+    # TODO rewrite the test to use a smaller example setup
+    tmpfi = NamedTemporaryFile(delete=False)
+    debug_tree = DebugTree()
+    with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
+        data = yaml.load(f, Loader=yaml.SafeLoader)
+
+    crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
+    converter_registry = create_converter_registry(crawler_definition)
+    stuff = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
+                                    converter_registry, debug_tree=debug_tree)
+    crawler = Crawler()
+    crawler.save_debug_data(tmpfi.name, debug_tree)
+    with open(tmpfi.name, "r") as f:
+        provenance = yaml.load(f, Loader=yaml.SafeLoader)
+
+    pr = provenance["provenance"]
+
+    def check_key_count(prefix):
+        return sum([1 for key in pr.keys() if key.startswith(prefix)])
+    assert check_key_count("Ent") == 4
+
+
+def test_record_structure_generation():
+    # TODO create a test from this that tests scan_structure
+    # the cfood should be minimal but cover typical scenarios (e.g. children)
+    # add also a minimal test for scan_directory; it can be very basic since the only difference
+    # to scan_structure is the kind of starting structure_element (check this statement)
+    # The test should not check debug tree output but actual created records
+
+    # TODO test creation of debug information in a separate test
+
+    dbt = DebugTree()
+    scan_directory(UNITTESTDIR/"test_directories" / "examples_article",
+                   UNITTESTDIR/"scifolder_cfood.yml",
+                   debug_tree=dbt)
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
+    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")]
+    assert len(subd) == 2
+    # variables store on Data Analysis node of debug tree
+    assert len(subd[0]) == 4
+    # record store on Data Analysis node of debug tree
+    assert len(subd[1]) == 0
+    assert len(subc) == 2
+    assert len(subc[0]) == 4
+    assert len(subc[1]) == 0
+
+    # The data analysis node creates one variable for the node itself:
+    assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
+    assert subc[0]["DataAnalysis"] is False
+
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
+    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict")]
+
+    assert len(subd[1]) == 1
+    assert len(subd[1]["Project"].get_parents()) == 1
+    assert subd[1]["Project"].get_parents()[0].name == "Project"
+    assert subd[1]["Project"].get_property("date").value == "2020"
+    assert subd[1]["Project"].get_property(
+        "identifier").value == "climate-model-predict"
+
+    assert len(subd[0]) == 9
+    assert subd[0]["date"] == "2020"
+    assert subd[0]["identifier"] == "climate-model-predict"
+    assert subd[0]["Project"].__class__ == db.Record
+
+    assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
+    assert subc[0]["DataAnalysis"] is True
+    assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict"
+    assert subc[0]["project_dir"] is False
+
+    # Check the copy flags for the first level in the hierarchy:
+    assert len(subc[0]) == 9
+    assert len(subc[1]) == 1
+    assert subc[1]["Project"] is False
+    assert subc[0]["Project"] is False
+    assert subc[0]["date"] is False
+    assert subc[0]["identifier"] is False
+
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis",
+                                      "2020_climate-model-predict",
+                                      "2020-02-08_prediction-errors")]
+    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis",
+                                                    "2020_climate-model-predict",
+                                                    "2020-02-08_prediction-errors")]
+    assert len(subd[0]) == 12
+    assert subd[0]["date"] == "2020-02-08"
+    assert subd[0]["identifier"] == "prediction-errors"
+    assert subd[0]["Project"].__class__ == db.Record
+    assert subd[0]["Measurement"].__class__ == db.Record
+
+    assert len(subd[1]) == 2
+
+    assert len(subd[1]["Project"].get_parents()) == 1
+    assert subd[1]["Project"].get_parents()[0].name == "Project"
+    assert subd[1]["Project"].get_property("date").value == "2020"
+    assert subd[1]["Project"].get_property(
+        "identifier").value == "climate-model-predict"
+
+    assert len(subd[1]["Measurement"].get_parents()) == 1
+    assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
+    assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
+    assert subd[1]["Measurement"].get_property(
+        "identifier").value == "prediction-errors"
+    assert subd[1]["Measurement"].get_property("project").value != "$Project"
+    assert subd[1]["Measurement"].get_property(
+        "project").value.__class__ == db.Record
+    assert subd[1]["Measurement"].get_property(
+        "project").value == subd[0]["Project"]
+
+    # Check the copy flags for the second level in the hierarchy:
+    assert subc[1]["Project"] is True
+    assert subc[0]["Project"] is True
+    assert subc[1]["Measurement"] is False
+    assert subc[0]["Measurement"] is False
+    assert subc[0]["date"] is False
+    assert subc[0]["identifier"] is False
diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py
index d739695fc4c6a019f28f3c3697e3f134e0f1755e..12fcabf968e5319e0f2d0b569cb56afa7ac23fda 100644
--- a/unittests/test_table_converter.py
+++ b/unittests/test_table_converter.py
@@ -26,29 +26,31 @@
 test the converters module
 """
 
-from caoscrawler.converters import Converter
-from caoscrawler.stores import GeneralStore
-from caoscrawler.scanner import scan_directory
-from caoscrawler.debug_tree import DebugTree
-from caoscrawler.converters import (ConverterValidationError,
-                                    DictConverter, XLSXTableConverter, CSVTableConverter)
-from caoscrawler.structure_elements import Directory
-from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement,
-                                            BooleanElement, IntegerElement, FloatElement)
-
-from os.path import join, dirname, basename
-
-from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
-
-import pytest
-import os
 import importlib
-
 import math
+import os
+from os.path import basename, dirname, join
+from pathlib import Path
 
+import caosdb as db
+import pytest
 from caoscrawler import Crawler
+from caoscrawler.converters import (Converter, ConverterValidationError,
+                                    CSVTableConverter, DictConverter,
+                                    XLSXTableConverter)
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.identifiable_adapters import (IdentifiableAdapter,
+                                               LocalStorageIdentifiableAdapter)
+from caoscrawler.scanner import scan_directory
+from caoscrawler.stores import GeneralStore
+from caoscrawler.structure_elements import (BooleanElement, DictElement,
+                                            Directory, File, FloatElement,
+                                            IntegerElement, ListElement,
+                                            TextElement)
 
-import caosdb as db
+from utils import dircheckstr
+
+UNITTESTDIR = Path(__file__).parent
 
 
 @pytest.fixture
@@ -86,13 +88,6 @@ def rfp(*pathcomponents):
     return join(dirname(__file__), *pathcomponents)
 
 
-def dircheckstr(*pathcomponents):
-    """
-    Return the debug tree identifier for a given path.
-    """
-    return "caoscrawler.structure_elements.File: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_tables", "ExperimentalData", *pathcomponents)
-
-
 def test_convert_table(converter_registry):
     extentions = ["xlsx", "csv", "tsv"]
     if importlib.util.find_spec("odf") is not None:
@@ -151,7 +146,13 @@ def test_crawl_csv_table():
                    rfp("test_directories", "examples_tables", "crawler_for_tables.yml"),
                    debug_tree=dbt)
     for file_ext in ["xlsx", "csv"]:
-        subd = dbt.debug_tree[dircheckstr("test1." + file_ext)]
+        print(dbt.debug_tree)
+        print(dircheckstr(
+            UNITTESTDIR / "test_directories" / "examples_tables" / "ExperimentalData",
+            "test1." + file_ext))
+        subd = dbt.debug_tree[dircheckstr(
+            UNITTESTDIR / "test_directories" / "examples_tables" / "ExperimentalData",
+            "test1." + file_ext)]
         record_experiment = subd[1]["Experiment"]
         assert isinstance(record_experiment, db.Record)
         assert isinstance(record_experiment.get_property("Measurements").value, list)
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
deleted file mode 100755
index ec3e0bb9e69a45416d23f3c7aba15ec759cabf77..0000000000000000000000000000000000000000
--- a/unittests/test_tool.py
+++ /dev/null
@@ -1,1024 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-#
-# This file is a part of the CaosDB Project.
-#
-# Copyright (C) 2021 Alexander Schlemmer
-# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com>
-# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-
-"""
-Tests for the tool using pytest
-Adapted from check-sfs
-"""
-import logging
-
-from caoscrawler.stores import GeneralStore, RecordStore
-import os
-from caoscrawler.crawl import (_treat_deprecated_prefix, Crawler, crawler_main,
-                               SecurityMode, split_restricted_path)
-from caoscrawler.identifiable import Identifiable
-from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement
-from caoscrawler.scanner import scan_directory
-from caoscrawler.debug_tree import DebugTree
-from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
-from simulated_server_data import full_data
-from functools import partial
-from copy import deepcopy
-from unittest.mock import patch
-import caosdb.common.models as dbmodels
-from unittest.mock import MagicMock, Mock
-from os.path import join, dirname, basename
-import yaml
-import caosdb as db
-from caosdb.apiutils import compare_entities
-from caosdb.cached import cache_clear
-import pytest
-from pytest import raises
-
-from caoscrawler.scanner import create_converter_registry, scan_structure_elements
-
-
-def rfp(*pathcomponents):
-    """
-    Return full path.
-    Shorthand convenience function.
-    """
-    return join(dirname(__file__), *pathcomponents)
-
-
-ident = LocalStorageIdentifiableAdapter()
-ident.restore_state(rfp("records.xml"))
-full_data.update({el.name: el for el in ident._records if el.name is not None})
-full_data.update({el.id: el for el in ident._records if el.name is None})
-
-
-def dircheckstr(*pathcomponents):
-    """
-    Return the debug tree identifier for a given path.
-    """
-    return ("caoscrawler.structure_elements.Directory: " + basename(
-        join(*pathcomponents)) + ", " + rfp(
-            "test_directories", "examples_article", *pathcomponents))
-
-
-@pytest.fixture(autouse=True)
-def clear_cache():
-    cache_clear()
-
-
-@pytest.fixture
-def crawler():
-    crawler = Crawler()
-    debug_tree = DebugTree()
-    crawled_data = scan_directory(
-        rfp("test_directories", "examples_article"),
-        rfp("scifolder_cfood.yml"), debug_tree=debug_tree)
-    return crawler, crawled_data, debug_tree
-
-
-@pytest.fixture
-def ident(crawler):
-    ident = LocalStorageIdentifiableAdapter()
-    crawler[0].identifiableAdapter = ident
-
-    # The records.xml file is constructed as follows:
-    # To a full run of the crawler, resolve all identifiables and insert all resulting entities.
-    # See: test-setup/datamodel/generate_test_data.py for details.
-    ident.restore_state(rfp("records.xml"))
-
-    ident.register_identifiable(
-        "Person", db.RecordType()
-        .add_parent(name="Person")
-        .add_property(name="first_name")
-        .add_property(name="last_name"))
-    ident.register_identifiable(
-        "Measurement", db.RecordType()
-        .add_parent(name="Measurement")
-        .add_property(name="identifier")
-        .add_property(name="date")
-        .add_property(name="project"))
-    ident.register_identifiable(
-        "Project", db.RecordType()
-        .add_parent(name="Project")
-        .add_property(name="date")
-        .add_property(name="identifier"))
-    return ident
-
-
-def test_record_structure_generation():
-    # TODO How does this test relate to the test function in test_scalars_cfood with the same name?
-    #      There seems to be code duplication
-
-    dbt = DebugTree()
-    scan_directory(rfp("test_directories", "examples_article"),
-                   rfp("scifolder_cfood.yml"),
-                   debug_tree=dbt)
-    subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
-    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")]
-    assert len(subd) == 2
-    # variables store on Data Analysis node of debug tree
-    assert len(subd[0]) == 4
-    # record store on Data Analysis node of debug tree
-    assert len(subd[1]) == 0
-    assert len(subc) == 2
-    assert len(subc[0]) == 4
-    assert len(subc[1]) == 0
-
-    # The data analysis node creates one variable for the node itself:
-    assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
-    assert subc[0]["DataAnalysis"] is False
-
-    subd = dbt.debug_tree[dircheckstr(
-        "DataAnalysis", "2020_climate-model-predict")]
-    subc = dbt.debug_metadata["copied"][dircheckstr(
-        "DataAnalysis", "2020_climate-model-predict")]
-
-    assert len(subd[1]) == 1
-    assert len(subd[1]["Project"].get_parents()) == 1
-    assert subd[1]["Project"].get_parents()[0].name == "Project"
-    assert subd[1]["Project"].get_property("date").value == "2020"
-    assert subd[1]["Project"].get_property(
-        "identifier").value == "climate-model-predict"
-
-    assert len(subd[0]) == 9
-    assert subd[0]["date"] == "2020"
-    assert subd[0]["identifier"] == "climate-model-predict"
-    assert subd[0]["Project"].__class__ == db.Record
-
-    assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
-    assert subc[0]["DataAnalysis"] is True
-    assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict"
-    assert subc[0]["project_dir"] is False
-
-    # Check the copy flags for the first level in the hierarchy:
-    assert len(subc[0]) == 9
-    assert len(subc[1]) == 1
-    assert subc[1]["Project"] is False
-    assert subc[0]["Project"] is False
-    assert subc[0]["date"] is False
-    assert subc[0]["identifier"] is False
-
-    subd = dbt.debug_tree[dircheckstr("DataAnalysis",
-                                      "2020_climate-model-predict",
-                                      "2020-02-08_prediction-errors")]
-    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis",
-                                                    "2020_climate-model-predict",
-                                                    "2020-02-08_prediction-errors")]
-    assert len(subd[0]) == 12
-    assert subd[0]["date"] == "2020-02-08"
-    assert subd[0]["identifier"] == "prediction-errors"
-    assert subd[0]["Project"].__class__ == db.Record
-    assert subd[0]["Measurement"].__class__ == db.Record
-
-    assert len(subd[1]) == 2
-
-    assert len(subd[1]["Project"].get_parents()) == 1
-    assert subd[1]["Project"].get_parents()[0].name == "Project"
-    assert subd[1]["Project"].get_property("date").value == "2020"
-    assert subd[1]["Project"].get_property(
-        "identifier").value == "climate-model-predict"
-
-    assert len(subd[1]["Measurement"].get_parents()) == 1
-    assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
-    assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
-    assert subd[1]["Measurement"].get_property(
-        "identifier").value == "prediction-errors"
-    assert subd[1]["Measurement"].get_property("project").value != "$Project"
-    assert subd[1]["Measurement"].get_property(
-        "project").value.__class__ == db.Record
-    assert subd[1]["Measurement"].get_property(
-        "project").value == subd[0]["Project"]
-
-    # Check the copy flags for the second level in the hierarchy:
-    assert subc[1]["Project"] is True
-    assert subc[0]["Project"] is True
-    assert subc[1]["Measurement"] is False
-    assert subc[0]["Measurement"] is False
-    assert subc[0]["date"] is False
-    assert subc[0]["identifier"] is False
-
-
-# def prepare_test_record_file():
-#     ident = LocalStorageIdentifiableAdapter()
-#     crawler = Crawler(debug=True, identifiableAdapter=ident)
-#     crawler.crawl_directory(rfp("test_directories", "examples_article"),
-#                             rfp("scifolder_cfood.yml"))
-
-#     # clean record list:
-#     recordlist = ident.get_records()
-#     for i in range(len(recordlist)-1, 1, -1):
-#         if recordlist[i].parents[0].name == "Person":
-#             del recordlist[i]
-
-#     ident.store_state(rfp("records.xml"))
-
-
-def test_crawler_update_list(crawler, ident):
-    crawled_data = crawler[1]
-    # If the following assertions fail, that is a hint, that the test file records.xml has changed
-    # and this needs to be updated:
-    assert len(ident.get_records()) == 18
-    assert len(
-        [r for r in ident.get_records() if r.parents[0].name == "Person"]
-    ) == 5
-    assert len(
-        [r for r in ident.get_records() if r.parents[0].name == "Measurement"]
-    ) == 11
-    assert len(
-        [r for r in ident.get_records() if r.parents[0].name == "Project"]
-    ) == 2
-
-    # The crawler contains lots of duplicates, because identifiables have not been resolved yet:
-    assert len(ident.get_records()) != len(crawled_data)
-
-    # Check consistency:
-    # Check whether identifiables retrieved from current identifiable store return
-    # the same results.
-
-    # take the first person in the list of records:
-    for r in ident.get_records():
-        if r.parents[0].name == "Person":
-            r_cur = r
-            break
-
-    id_r0 = ident.get_identifiable(r_cur)
-    assert r_cur.parents[0].name == id_r0.record_type
-    assert r_cur.get_property(
-        "first_name").value == id_r0.properties["first_name"]
-    assert r_cur.get_property(
-        "last_name").value == id_r0.properties["last_name"]
-    assert len(r_cur.parents) == 1
-    assert len(r_cur.properties) == 2
-    assert len(id_r0.properties) == 2
-
-    idr_r0_test = ident.retrieve_identified_record_for_identifiable(id_r0)
-    idr_r0 = ident.retrieve_identified_record_for_record(r_cur)
-    assert idr_r0 == idr_r0_test
-
-    # take the first measurement in the list of records:
-    for r in ident.get_records():
-        if r.parents[0].name == "Measurement":
-            r_cur = r
-            break
-
-    id_r1 = ident.get_identifiable(r_cur)
-    assert r_cur.parents[0].name == id_r1.record_type
-    assert r_cur.get_property(
-        "identifier").value == id_r1.properties["identifier"]
-    assert r_cur.get_property("date").value == id_r1.properties["date"]
-    assert r_cur.get_property(
-        "project").value == id_r1.properties["project"]
-    assert len(r_cur.parents) == 1
-    assert len(r_cur.properties) == 4
-    assert len(id_r1.properties) == 3
-
-    idr_r1_test = ident.retrieve_identified_record_for_identifiable(id_r1)
-    idr_r1 = ident.retrieve_identified_record_for_record(r_cur)
-    assert idr_r1 == idr_r1_test
-    assert idr_r1 != idr_r0
-    assert idr_r1_test != idr_r0_test
-
-    assert len(idr_r1.properties) == 4
-    assert r_cur.get_property(
-        "responsible").value == idr_r1.get_property("responsible").value
-    assert r_cur.description == idr_r1.description
-
-
-def test_synchronization(crawler, ident):
-    insl, updl = crawler[0].synchronize(commit_changes=False, crawled_data=crawler[1])
-    assert len(insl) == 0
-    assert len(updl) == 0
-
-
-def test_remove_unnecessary_updates():
-    # test trvial case
-    upl = [db.Record().add_parent("A")]
-    irs = [db.Record().add_parent("A")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
-    assert len(updates) == 0
-
-    # test property difference case
-    # TODO this should work right?
-    # upl = [db.Record().add_parent("A").add_property("a", 3)]
-    # irs = [db.Record().add_parent("A")]  # ID should be s
-    # Crawler.remove_unnecessary_updates(upl, irs)
-    # assert len(upl) == 1
-
-    # test value difference case
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
-    assert len(updates) == 1
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
-    assert len(updates) == 0
-
-    # test unit difference case
-    upl = [db.Record().add_parent("A").add_property("a", unit='cm')]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
-    assert len(updates) == 1
-
-    # test None difference case
-    upl = [db.Record().add_parent("A").add_property("a")]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
-    assert len(updates) == 1
-
-
-# Current status:
-# TODO: currently, this test fails, because non identifiable records cannot
-#       be inserted into the cache. Solution might be, just not to add them
-#       into the local cache. Probably in split_into_inserts_and_updates.
-@pytest.mark.xfail
-def test_identifiable_adapter_no_identifiable(crawler, ident):
-    del ident._registered_identifiables["Person"]
-    insl, updl = crawler[0].synchronize()
-    assert len(updl) == 0
-
-    pers = [r for r in crawler[0].crawled_data if r.parents[0].name == "Person"]
-    # All persons are inserted, because they are not identifiable:
-    assert len(insl) == len(pers)
-
-
-def test_provenance_debug_data(crawler):
-    crawler[0].save_debug_data(rfp("provenance.yml"), debug_tree=crawler[2])
-
-    with open(rfp("provenance.yml"), "r") as f:
-        provenance = yaml.load(f, Loader=yaml.SafeLoader)
-
-    pr = provenance["provenance"]
-
-    def check_key_count(prefix):
-        return sum([1 for key in pr.keys() if key.startswith(prefix)])
-    assert check_key_count("Measurement") == 11
-    assert check_key_count("Project") == 5
-    assert check_key_count("Person") == 14
-
-
-def test_split_into_inserts_and_updates_trivial(crawler):
-    crawler[0].split_into_inserts_and_updates([])
-
-
-def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
-    """ returns a stored Record if rec.name is an existing key, None otherwise """
-    if rec.name in known:
-        return known[rec.name]
-    else:
-        return None
-
-
-@pytest.fixture
-def crawler_mocked_identifiable_retrieve(crawler):
-    # mock retrieval of registered identifiabls: return Record with just a parent
-    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent(x.parents[0].name))
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    return crawler
-
-
-def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve[0]
-    identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
-    entlist = [db.Record(name="A").add_parent(
-        "C"), db.Record(name="B").add_parent("C")]
-
-    assert crawler.get_from_any_cache(identlist[0]) is None
-    assert crawler.get_from_any_cache(identlist[1]) is None
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert not crawler._has_reference_value_without_id(identlist[1])
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[1]) is None
-
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
-def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve[0]
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    # This is identical to a and should be removed
-    c = db.Record(name="A").add_parent("C")
-    entlist = [a, b, c]
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
-def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve[0]
-    # try it with a reference
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
-def test_split_into_inserts_and_updates_with_circ(crawler):
-    # try circular
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    a.add_property("B", b)
-    entlist = [a, b]
-    # TODO this does not seem to be complete!
-
-
-def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve[0]
-    #      A
-    #      ^
-    #      |
-    # F <- B <- G
-    a = db.Record(name="A").add_parent("C").add_property(
-        'd', 13).add_property('e', "lskdjlsfdj")
-    b = db.Record(name="B").add_parent("C")
-    g = db.Record(name="G").add_parent("C")
-    f = db.Record(name="F").add_parent("C")
-    g.add_property("A", a)
-    b.add_property("A", f)
-    b.add_property("A", a)
-    entlist = [a, b, g]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-    assert len(insert) == 3
-    assert "B" in [el.name for el in insert]
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-    # TODO write test where the unresoled entity is not part of the identifiable
-
-
-def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve[0]
-    # assume identifiable is only the name
-    a = db.Record(name="A").add_parent("C")
-    a.add_property("foo", 1)
-    b = db.Record(name="A").add_parent("C")
-    b.add_property("bar", 2)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-
-    assert update[0].get_property("bar").value == 2
-    assert update[0].get_property("foo").value == 1
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
-def test_has_missing_object_in_references(crawler):
-    # Simulate remote server content by using the names to identify records
-    # There are only two known Records with name A and B
-    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
-                                               .add_property("d"),
-                                               "D": db.Record(name="D").add_parent("RTD")
-                                               .add_property("d").add_property("e"),
-                                               }))
-
-    # one reference with id -> check
-    assert not crawler[0]._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123}), [])
-    # one ref with Entity with id -> check
-    assert not crawler[0]._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': db.Record(id=123)
-                                                              .add_parent("C")}), [])
-    # one ref with id one with Entity with id (mixed) -> check
-    assert not crawler[0]._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTD",
-                     properties={'d': 123, 'b': db.Record(id=123).add_parent("RTC")}), [])
-    # entity to be referenced in the following
-    a = db.Record(name="C").add_parent("C").add_property("d", 12311)
-    # one ref with id one with Entity without id (but not identifying) -> fail
-    assert not crawler[0]._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}), [])
-
-    # one ref with id one with Entity without id (mixed) -> fail
-    assert not crawler[0]._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
-
-    crawler[0].add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC",
-                                                           properties={'d': 12311}))
-    # one ref with id one with Entity without id but in cache -> check
-    assert crawler[0]._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
-
-    # if this ever fails, the mock up may be removed
-    crawler[0].identifiableAdapter.get_registered_identifiable.assert_called()
-
-
-@pytest.mark.xfail()
-def test_references_entities_without_ids(crawler, ident):
-    assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                          .add_property('last_name', 123)
-                                                          .add_property('first_name', 123))
-    # id and rec with id
-    assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                          .add_property('first_name', 123)
-                                                          .add_property('last_name',
-                                                                        db.Record(id=123)))
-    # id and rec with id and one unneeded prop
-    assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                      .add_property('first_name', 123)
-                                                      .add_property('stuff', db.Record())
-                                                      .add_property('last_name', db.Record(id=123)))
-
-    # one identifying prop is missing
-    assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                      .add_property('first_name', 123)
-                                                      .add_property('last_name', db.Record()))
-
-
-def test_replace_entities_with_ids(crawler):
-    a = (db.Record().add_parent("B").add_property("A", 12345)
-         .add_property("B", db.Record(id=12345))
-         .add_property("C", [db.Record(id=12345), 233324]))
-
-    crawler[0].replace_entities_with_ids(a)
-    assert a.get_property("A").value == 12345
-    assert a.get_property("B").value == 12345
-    assert a.get_property("C").value == [12345, 233324]
-
-
-def mock_get_entity_by(eid=None, name=None):
-    if eid is not None:
-        candidates = [el for el in list(full_data.values()) if el.id == eid]
-        if len(candidates) > 0:
-            return candidates[0]
-        else:
-            raise ValueError()
-    if name is not None:
-        candidates = [el for el in full_data.values()
-                      if (el.name is not None and el.name.lower() == name.lower())]
-        if len(candidates) > 0:
-            return candidates[0]
-        else:
-            raise ValueError()
-
-
-def prepare_crawler_with_sec_mode(mode, ident):
-    crawler = Crawler(securityMode=mode)
-    debug_tree = DebugTree()
-    crawled_data = scan_directory(
-        rfp("test_directories", "examples_article"),
-        rfp("scifolder_cfood.yml"), debug_tree=debug_tree)
-    crawler.identifiableAdapter = ident
-
-    return crawler, crawled_data, debug_tree
-
-
-def reset_mocks(mocks):
-    for mock in mocks:
-        mock.reset_mock()
-
-
-def change_identifiable_prop(ident):
-    """
-    This function is supposed to change a non identifiing property.
-    """
-    for ent in ident._records:
-        if len(ent.parents) == 0 or ent.parents[0].name != "Measurement":
-            continue
-        for prop in ent.properties:
-            if prop.name != "date":
-                continue
-            # change one element; This removes a responsible which is not part of the identifiable
-            prop.value = "2022-01-04"
-            return
-    # If it does not work, this test is not implemented properly
-    raise RuntimeError("Did not find the property that should be changed.")
-
-
-def change_non_identifiable_prop(ident):
-    """
-    This function is supposed to change a non identifiing property.
-    """
-    for ent in ident._records:
-        if len(ent.parents) == 0 or ent.parents[0].name != "Measurement":
-            continue
-
-        for prop in ent.properties:
-            if prop.name != "responsible" or len(prop.value) < 2:
-                continue
-            # change one element; This removes a responsible which is not part of the identifiable
-            del prop.value[-1]
-            return
-    raise RuntimeError("Did not find the property that should be changed.")
-
-
-@patch("caoscrawler.crawl.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-@patch("caoscrawler.crawl.db.Container.insert")
-@patch("caoscrawler.crawl.db.Container.update")
-@patch("caoscrawler.crawl.UpdateCache.insert")
-def test_security_mode(updateCacheMock, upmock, insmock, ident):
-    records_backup = deepcopy(ident._records)
-
-    # trivial case: nothing to do
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.assert_not_called()
-    upmock.assert_not_called()
-    updateCacheMock.assert_not_called()
-
-    # RETRIEVE: insert only
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
-    # remove one element
-    del ident._records[-1]
-    # insert forbidden
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.assert_not_called()
-    upmock.assert_not_called()
-    assert updateCacheMock.call_count == 1
-    # reset counts
-    reset_mocks([updateCacheMock, insmock, upmock])
-    # restore original ident
-    ident._records = deepcopy(records_backup)
-
-    # RETRIEVE: update only
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
-    # change one element
-    change_non_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.assert_not_called()
-    upmock.assert_not_called()
-    assert updateCacheMock.call_count == 1
-    # reset counts
-    reset_mocks([updateCacheMock, insmock, upmock])
-    # restore original ident
-    ident._records = deepcopy(records_backup)
-
-    # INSERT: insert only
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
-    # remove one element
-    del ident._records[-1]
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.assert_called_once()
-    upmock.assert_not_called()
-    updateCacheMock.assert_not_called()
-    # reset counts
-    reset_mocks([updateCacheMock, insmock, upmock])
-    # restore original ident
-    ident._records = deepcopy(records_backup)
-
-    # INSERT: update only
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
-    # change one element
-    change_non_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.assert_not_called()
-    upmock.assert_not_called()
-    updateCacheMock.assert_called_once()
-    # reset counts
-    reset_mocks([updateCacheMock, insmock, upmock])
-    # restore original ident
-    ident._records = deepcopy(records_backup)
-
-    # INSERT: insert and update
-    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
-    # change two elements
-    change_non_identifiable_prop(ident)
-    change_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
-    assert crawler.run_id is not None
-    insmock.asser_called_once()
-    upmock.assert_not_called()
-    updateCacheMock.assert_called_once()
-    # reset counts
-    reset_mocks([updateCacheMock, insmock, upmock])
-    # restore original ident
-    ident._records = deepcopy(records_backup)
-
-
-def test_create_reference_mapping():
-    a = db.Record().add_parent("A")
-    b = db.Record().add_parent("B").add_property('a', a)
-    ref = Crawler.create_reference_mapping([a, b])
-    assert id(a) in ref
-    assert id(b) not in ref
-    assert "B" in ref[id(a)]
-    assert ref[id(a)]["B"] == [b]
-
-
-def test_create_flat_list():
-    a = db.Record()
-    b = db.Record()
-    a.add_property(name="a", value=a)
-    a.add_property(name="b", value=b)
-    flat = Crawler.create_flat_list([a])
-    assert len(flat) == 2
-    assert a in flat
-    assert b in flat
-    c = db.Record()
-    c.add_property(name="a", value=a)
-    # This would caus recursion if it is not dealt with properly.
-    a.add_property(name="c", value=c)
-    flat = Crawler.create_flat_list([c])
-    assert len(flat) == 3
-    assert a in flat
-    assert b in flat
-    assert c in flat
-
-
-@pytest.fixture
-def crawler_mocked_for_backref_test(crawler):
-    # mock retrieval of registered identifiabls: return Record with just a parent
-    def get_reg_ident(x):
-        if x.parents[0].name == "C":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR"])
-        elif x.parents[0].name == "D":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR", "BR2"])
-        else:
-            return db.Record().add_parent(x.parents[0].name)
-    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A":
-                                               db.Record(id=1111, name="A").add_parent("BR")}))
-    crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A":
-                                                   db.Record(id=1111, name="A").add_parent("BR")}))
-    return crawler
-
-
-def test_validation_error_print(caplog):
-    caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
-    # there should be no server interaction since we only test the behavior if a validation error
-    # occurs during the data collection stage
-    DATADIR = os.path.join(os.path.dirname(__file__), "test_data", "failing_validation")
-    for fi in ["cfood.yml", "cfood2.yml"]:
-        ret = crawler_main(DATADIR,
-                           os.path.join(DATADIR, fi),
-                           os.path.join(DATADIR, "identifiables.yml"),
-                           True,
-                           None,
-                           False)
-        assert "Couldn't validate" in caplog.text
-        caplog.clear()
-
-
-def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
-    crawler = crawler_mocked_for_backref_test[0]
-    identlist = [Identifiable(name="A", record_type="BR"),
-                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
-
-    # Test without referencing object
-    # currently a NotImplementedError is raised if necessary properties are missing.
-    with raises(NotImplementedError):
-        crawler.split_into_inserts_and_updates([db.Record(name="B").add_parent("C")])
-
-    # identifiables were not yet checked
-    assert crawler.get_from_any_cache(identlist[0]) is None
-    assert crawler.get_from_any_cache(identlist[1]) is None
-    # one with reference, one without
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert crawler._has_reference_value_without_id(identlist[1])
-    # one can be found remotely, one not
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[1]) is None
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    # A was found remotely and is therefore in the update list
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # B does not exist on the (simulated) remote server
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-
-
-def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the same record type are correctly used
-    crawler = crawler_mocked_for_backref_test[0]
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="C").add_parent("BR").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 1
-    assert len(insert) == 2
-
-
-def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the different record types are correctly used
-    crawler = crawler_mocked_for_backref_test[0]
-    referenced = db.Record(name="B").add_parent("D")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="A").add_parent("BR2").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 2
-    assert len(insert) == 1
-
-
-def mock_create_values(values, element):
-    pass
-
-
-@patch("caoscrawler.converters.IntegerElementConverter.create_values")
-def test_restricted_path(create_mock):
-    """
-    The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
-    sure, that is that argument is provided, ideed only the given path of the tree is traversed.
-
-    The check is done using the mock of the create_values function of the IntegerElementConverter.
-    This function is only called if elements are being treated.
-    """
-    crawler_definition = {
-        "DictTest": {
-            "type": "DictElement",
-            "match": "(.*)",
-            "subtree": {
-                "nextdict": {
-                    "type": "DictElement",
-                    "match": "(.*)",
-                    "subtree": {
-                        "int_element": {
-                            "type": "IntegerElement",
-                            "match_name": ".*",
-                            "match_value": "(?P<int_value>.*)",
-                            "records": {
-                                "Dataset": {
-                                    "Subject": "$int_value"
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    crawler = Crawler()
-    converter_registry = create_converter_registry(crawler_definition)
-
-    # This structure is crawled
-    test_dict = {
-        "v1": {
-            "a": 1,
-            "b": 2,
-        },
-        "v2": {
-            "c": 3,
-            "d": 4,
-        }
-    }
-    # first test without a restricted_path
-    restricted_path = None
-    records = scan_structure_elements(
-        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
-        restricted_path
-    )
-    assert create_mock.call_count == 4
-    create_mock.reset_mock()
-
-    # test with a restricted_path but one that has no effect (single root element)
-    # this also tests that the remainder of the tree is fully traversed
-    restricted_path = ["TestDict"]
-    records = scan_structure_elements(
-        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
-        restricted_path
-    )
-    assert create_mock.call_count == 4
-    create_mock.reset_mock()
-
-    # test with a restricted_path that restricts the tree (single root element)
-    restricted_path = ["TestDict", "v2"]
-    records = scan_structure_elements(
-        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
-        restricted_path
-    )
-    assert create_mock.call_count == 2
-    create_mock.reset_mock()
-
-    # test with a restricted_path that contains a bad element
-    restricted_path = ["TestDict", "v3"]
-    with raises(RuntimeError):
-        records = scan_structure_elements(
-            DictElement("TestDict", test_dict), crawler_definition, converter_registry,
-            restricted_path
-        )
-
-
-def test_split_restricted_path():
-    assert ["el"] == split_restricted_path("/el")
-    assert ["el"] == split_restricted_path("/el/")
-    assert ["el", "el"] == split_restricted_path("/el/el")
-
-
-# Filter the warning because we want to have it here and this way it does not hinder running
-# tests with -Werror.
-@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
-def test_deprecated_prefix_option():
-    """Test that calling the crawler's main function with the deprecated
-    `prefix` option raises the correct errors and warnings.
-
-    """
-
-    with pytest.deprecated_call():
-        crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed")
-
-    # Check that crawler main terminates with an error
-    assert 1 == crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed",
-                             remove_prefix="to/be/removed")
-
-    with raises(ValueError) as ve:
-
-        _treat_deprecated_prefix(prefix="to/be/removed", remove_prefix="to/be/removed")
-
-    assert "(deprecated) `prefix` and the `remove_prefix`" in str(ve.value)
-
-
-def test_create_entity_summary():
-    assert "" == Crawler.create_entity_summary([]).strip()
-
-    entities = [
-        db.Record(id=1).add_parent("A"),
-        db.Record(id=4, name='a').add_parent("B"),
-        db.Record(id=5).add_parent("A"),
-        db.Record(id=6, name='b').add_parent("B"),
-    ]
-    text = Crawler.create_entity_summary(entities).strip()
-    assert 'a' in text
-    assert 'b' in text
-    assert 'A:' in text
-    assert 'B:' in text
-    assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py
deleted file mode 100644
index 7dd4282e4c6d206c8c360424d865b9f736b5e582..0000000000000000000000000000000000000000
--- a/unittests/test_tool_extended.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/python
-# Tests for the tool using pytest
-# Adapted from check-sfs
-# A. Schlemmer, 06/2021
-
-from caoscrawler import Crawler
-from caoscrawler.structure_elements import File, DictTextElement, DictListElement
-from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
-from caoscrawler.scanner import scan_directory
-from functools import partial
-from caoscrawler.debug_tree import DebugTree
-from copy import deepcopy
-from unittest.mock import MagicMock, Mock
-from os.path import join, dirname, basename
-import yaml
-import caosdb as db
-from caosdb.apiutils import compare_entities
-
-import pytest
-from pytest import raises
-
-
-def rfp(*pathcomponents):
-    """
-    Return full path.
-    Shorthand convenience function.
-    """
-    return join(dirname(__file__), *pathcomponents)
-
-
-def dircheckstr(*pathcomponents, structure_element_type="Directory"):
-    """
-    Return the debug tree identifier for a given path.
-    """
-    return ("caoscrawler.structure_elements." + structure_element_type + ": " +
-            basename(join(*pathcomponents)) + ", " +
-            rfp("test_directories", "examples_article", *pathcomponents))
-
-
-@pytest.fixture
-def crawler():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "examples_article"),
-                            rfp("scifolder_extended.yml"))
-    return crawler
-
-
-# @pytest.fixture
-# def ident(crawler):
-#     ident = LocalStorageIdentifiableAdapter()
-#     crawler.identifiableAdapter = ident
-
-#     ident.restore_state(rfp("records.xml"))
-
-#     ident.register_identifiable(
-#         "Person", db.RecordType()
-#         .add_parent(name="Person")
-#         .add_property(name="first_name")
-#         .add_property(name="last_name"))
-#     ident.register_identifiable(
-#         "Measurement", db.RecordType()
-#         .add_parent(name="Measurement")
-#         .add_property(name="identifier")
-#         .add_property(name="date")
-#         .add_property(name="project"))
-#     ident.register_identifiable(
-#         "Project", db.RecordType()
-#         .add_parent(name="Project")
-#         .add_property(name="date")
-#         .add_property(name="identifier"))
-#     return ident
-
-
-def test_file_structure_generation():
-    dbt = DebugTree()
-    scan_directory(rfp("test_directories", "examples_article"),
-                   rfp("scifolder_extended.yml"),
-                   debug_tree=dbt)
-    sd = dbt.debug_tree[dircheckstr("SimulationData",
-                                    "2020_climate-model-predict", "2020-02-01",
-                                    "README.md", structure_element_type="File")]
-    assert sd[1]["ReadmeFile"].role == "File"
-    assert len(sd[1]["ReadmeFile"].path) > 0
-    assert len(sd[1]["ReadmeFile"].file) > 0
diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py
index f13e759982e8102bbf37e65311ff4073ba52e5a2..9d1981c773e481e78eb4f82e785e27ee8f8d00d6 100644
--- a/unittests/test_variable_substitutions.py
+++ b/unittests/test_variable_substitutions.py
@@ -1,23 +1,49 @@
-#!/bin/python
-# Tests for variable substitutions
-# A. Schlemmer, 05/2022
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 Alexander Schlemmer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
 
-from caoscrawler.debug_tree import DebugTree
-from caoscrawler import Crawler
-from caoscrawler.scanner import scan_directory
-from caoscrawler.structure_elements import File, DictTextElement, DictListElement
-from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
-from functools import partial
 from copy import deepcopy
+from functools import partial
+from os.path import basename, dirname, join
+from pathlib import Path
 from unittest.mock import MagicMock, Mock
-from os.path import join, dirname, basename
-import yaml
-import caosdb as db
-from caosdb.apiutils import compare_entities
 
+import caosdb as db
 import pytest
+import yaml
+from caoscrawler import Crawler
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.identifiable_adapters import (IdentifiableAdapter,
+                                               LocalStorageIdentifiableAdapter)
+from caoscrawler.scanner import scan_directory
+from caoscrawler.structure_elements import (DictListElement, DictTextElement,
+                                            File)
+from caosdb.apiutils import compare_entities
 from pytest import raises
 
+from utils import dircheckstr as dircheckstr_base
+
+UNITTESTDIR = Path(__file__).parent
+dircheckstr = partial(dircheckstr_base, UNITTESTDIR/"test_directories" /
+                      "example_substitutions")
+
 
 def rfp(*pathcomponents):
     """
@@ -27,13 +53,6 @@ def rfp(*pathcomponents):
     return join(dirname(__file__), *pathcomponents)
 
 
-def dircheckstr(element_type, *pathcomponents):
-    """
-    Return the debug tree identifier for a given path.
-    """
-    return "caoscrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents)
-
-
 def test_substitutions():
 
     dbt = DebugTree()
@@ -42,13 +61,12 @@ def test_substitutions():
                    debug_tree=dbt)
     # @review Florian Spreckelsen 2022-05-13
     for i in range(2):
-        subd = dbt.debug_tree[dircheckstr(
-            "File", "ExperimentalData", "220512_data.dat")]
+        subd = dbt.debug_tree[dircheckstr("ExperimentalData", "220512_data.dat")]
         assert subd[i]["Experiment"].get_property("date").value == "2022-05-12"
         assert isinstance(subd[i]["ExperimentSeries"].get_property(
             "Experiment").value, db.Record)
 
-        subd = dbt.debug_tree[dircheckstr("Directory", "ExperimentalData")]
+        subd = dbt.debug_tree[dircheckstr("ExperimentalData")]
         assert subd[i]["Project"].name == "project"
         assert isinstance(subd[i]["Project"].get_property(
             "Experiments").value, list)
@@ -69,8 +87,7 @@ def test_substitutions_parents():
     # This is a test for:
     # https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/35
     # ... testing whether variable substitutions can be used in parent declarations.
-    subd = dbt.debug_tree[dircheckstr(
-        "File", "ExperimentalData", "220512_data.dat")]
+    subd = dbt.debug_tree[dircheckstr("ExperimentalData", "220512_data.dat")]
     # subd[0] <- generalStore
     # subd[1] <- recordStore
 
@@ -89,8 +106,7 @@ def test_empty_parents():
     # This is a test for:
     # https://gitlab.com/caosdb/caosdb-crawler/-/issues/8
 
-    subd = dbt.debug_tree[dircheckstr(
-        "File", "ExperimentalData", "220512_data.dat")]
+    subd = dbt.debug_tree[dircheckstr("ExperimentalData", "220512_data.dat")]
 
     parents = subd[1]["RecordWithoutParents"].get_parents()
     assert len(parents) == 0
diff --git a/unittests/utils.py b/unittests/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9649dea686c33dc33d0d7636d08aa51beb35412
--- /dev/null
+++ b/unittests/utils.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import os
+from pathlib import Path
+
+"""
+utilities for tests
+"""
+UNITTESTDIR = Path(__file__).parent
+
+
+def dircheckstr(prefix, *pathcomponents):
+    """
+    Return the debug tree identifier for a given path.
+    """
+    if os.path.isdir(os.path.join(prefix, *pathcomponents)):
+        ftype = "Directory"
+    else:
+        ftype = "File"
+    return (f"caoscrawler.structure_elements.{ftype}: " + os.path.basename(
+        os.path.join(*pathcomponents)) + ", " + os.path.join(prefix, *pathcomponents))