diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8840e613f1e1eb86f30779b8b3535e2ff97ad0cc..b2ecd97cbded32dea3be9046cfdaac516bfb0ec2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -130,7 +130,7 @@ unittest_py3.7:
     # TODO: Use f-branch logic here
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
-    - pip install .
+    - pip install .[h5-crawler]
     # actual test
     - caosdb-crawler --help
     - pytest --cov=caosdb -vv ./unittests
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba0603be4ef7a66be5377e315e8dce0befae5d3c..2d049e0f3c5e8b30b7a7dc2206664e6fd7964246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,23 +8,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased] ##
 
 ### Added ###
-* 'transform' sections can be added to a CFood to apply functions to values stored in variables.
+
+* `transform` sections can be added to a CFood to apply functions to values stored in variables.
 * default transform functions: submatch, split and replace.
 * New command line option "--new-debug-tree" that allows saving a full tree of debug information for crawler runs in yaml and HTML format.
+* `*` can now be used as a wildcard in the identifiables parameter file to denote
+  that any Record may reference the identified one.
+* `crawl.TreatedRecordLookUp` class replacing the old (and slow)
+  `identified_cache` module. The new class now handles all records identified by
+  id, path, or identifiable simultaneously. See API docs for more info on how to
+  add to and get from the new lookup class.
+* `identifiable_adapters.IdentifiableAdapter.get_identifying_referencing_entities`
+  and
+  `identifiable_adapters.IdentifiableAdapter.get_identifying_referenced_entities`
+  static methods to return the referencing or referenced entities belonging to a
+  registered identifiable, respectively.
+* [#70](https://gitlab.com/linkahead/linkahead-crawler/-/issues/70): Optional
+  converters for HDF5 files. They require this package to be installed with its
+  ``h5-crawler`` dependency.
 
 ### Changed ###
-- If the `parents` key is used in a cfood at a lower level for a Record that
+
+* If the `parents` key is used in a cfood at a lower level for a Record that
   already has a Parent (because it was explicitly given or the default Parent),
   the old Parent(s) are now overwritten with the value belonging to the
   `parents` key.
-- If a registered identifiable states, that a reference by a Record with parent
+* If a registered identifiable states, that a reference by a Record with parent
   RT1 is needed, then now also references from Records that have a child of RT1
   as parent are accepted.
+* More aggressive caching.
+* The `identifiable_adapters.IdentifiableAdapter` now creates (possibly empty)
+  reference lists for all records in `create_reference_mapping`. This allows
+  functions like `get_identifiable` to be called only with the subset of the
+  referenceing entities belonging to a specific Record.
+* The `identifiable_adapters.IdentifiableAdapter` uses entity ids (negative for
+  entities that don't exist remotely) instead of entity objects for keeping
+  track of references.
 
 ### Deprecated ###
 
+* `IdentifiableAdapter.get_file`
+
 ### Removed ###
 
+* `identified_cache` module which was replaced by the `crawl.TreatedRecordLookUp` class.
+
 ### Fixed ###
 
 * Empty Records can now be created (https://gitlab.com/caosdb/caosdb-crawler/-/issues/27)
@@ -37,6 +65,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   handles cases correctly in which entities retrieved from the server have to be
   merged with local entities that both reference another, already existing
   entity
+* A corner case in `split_into_inserts_and_updates` whereby two records created
+  in different places in the cfood definition would not be merged if both were
+  identified by the same LinkAhead id
+* [#87](https://gitlab.com/linkahead/linkahead-crawler/-/issues/87) Handle long strings more gracefully.  The crawler sometimes runs into
+  [linkahead-server#101](https://gitlab.com/linkahead/linkahead-server/-/issues/101), this is now mitigated.
+* [indiscale#128](https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/128) Yet another corner case of referencing resolution resolved.
 
 ### Security ###
 
diff --git a/CITATION.cff b/CITATION.cff
index c6d41cc49d74b72056d825ca731fa79897fc537b..a41a45add0addbe657e6a95bd4e74f8c2365c742 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,25 +1,22 @@
 cff-version: 1.2.0
 message: "If you use this software, please cite it as below."
 authors:
-  - family-names: Fitschen
-    given-names: Timm
-    orcid: https://orcid.org/0000-0002-4022-432X
-  - family-names: Schlemmer
-    given-names: Alexander
-    orcid: https://orcid.org/0000-0003-4124-9649
-  - family-names: Hornung
-    given-names: Daniel
-    orcid: https://orcid.org/0000-0002-7846-6375
   - family-names: tom Wörden
     given-names: Henrik
     orcid: https://orcid.org/0000-0002-5549-578X
-  - family-names: Parlitz
-    given-names: Ulrich
-    orcid: https://orcid.org/0000-0003-3058-1435
+  - family-names: Spreckelsen
+    given-names: Florian
+    orcid: https://orcid.org/0000-0002-6856-2910
   - family-names: Luther
     given-names: Stefan
     orcid: https://orcid.org/0000-0001-7214-8125
+  - family-names: Parlitz
+    given-names: Ulrich
+    orcid: https://orcid.org/0000-0003-3058-1435
+- family-names: Schlemmer
+    given-names: Alexander
+    orcid: https://orcid.org/0000-0003-4124-9649
 title: CaosDB - Crawler
 version: 0.6.0
-doi: 10.3390/data4020083
+doi: 10.3390/data9020024
 date-released: 2023-06-23
\ No newline at end of file
diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index d3482a19f0c95912002b2ff68101623476d452ea..c906a81d86af56669f7c522169bceb3b5fcb3e01 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -112,11 +112,11 @@ def crawler_extended(ident):
     return cr, crawled_data, debug_tree
 
 
-def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
+def test_ambiguous_lookup(clear_database, usemodel, crawler, ident):
     ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True)
-    with pytest.raises(RuntimeError, match=".*unambigiously.*"):
+    with pytest.raises(RuntimeError, match=".*unambiguously.*"):
         print(crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable(
             Identifiable(properties={'project': proj.id})))
 
diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py
index 441edac5481585e483c94d61d864a1baaa139aa2..814e82ad75512ec8fe217294e1a9e86c6aa01ab3 100644
--- a/integrationtests/test_issues.py
+++ b/integrationtests/test_issues.py
@@ -16,24 +16,26 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
-from pytest import fixture, mark
+from pytest import fixture, mark, raises
 
-import caosdb as db
-from caosdb.cached import cache_clear
+import linkahead as db
+from linkahead.cached import cache_clear
 from caosadvancedtools.models.parser import parse_model_from_string
 
 from caoscrawler.crawl import Crawler
+from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import DictElement
 
 from caoscrawler.scanner import create_converter_registry, scan_structure_elements
 
-from caosdb.utils.register_tests import clear_database, set_test_key
+from linkahead.utils.register_tests import clear_database, set_test_key
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
 
 @fixture(autouse=True)
 def clear_cache():
+    """Clear the LinkAhead cache."""
     cache_clear()
 
 
@@ -266,3 +268,64 @@ Campaign:
     # Nothing to do for the existing ents
     assert len(ups) == 0
     assert ins[0].name == event.name
+
+
+def test_indiscale_87(clear_database):
+    """Handle long string queries gracefully.
+
+    https://gitlab.com/linkahead/linkahead-crawler/-/issues/87
+    """
+
+    prop = db.Property(name="str", datatype=db.TEXT).insert()
+    rt = db.RecordType(name="RT1").add_property(prop).insert()
+    strings = [
+        "X123456789" * 26,
+        "X" * 260,
+        "X123456789" * 25 + "9876543210",
+    ]
+    recs = [
+        db.Record().add_parent(rt).add_property(name="str", value=string).insert()
+        for string in strings
+    ]
+    idents = [
+        Identifiable(record_type="RT1", properties={"str": string})
+        for string in strings
+    ]
+    adapter = CaosDBIdentifiableAdapter()
+    for rec, ident in zip(recs, idents):
+        print(f"Testing: ...{rec.get_property('str').value[-10:]}")
+        retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
+        # print(rec)
+        # print(retrieved)
+        print(db.apiutils.compare_entities(rec, retrieved))
+        assert db.apiutils.empty_diff(rec, retrieved)
+        print("---")
+
+    # add another, harmless, property
+    prop2 = db.Property(name="someint", datatype=db.INTEGER).insert()
+    rt.add_property(prop2).update()
+    string = "Y123456789" * 26
+    numbers = [23, 42]
+    recs = [
+        db.Record().add_parent(rt).add_property(name="str", value=string).add_property(
+            name="someint", value=number).insert()
+        for number in numbers
+    ]
+    idents = [Identifiable(record_type="RT1", properties={"str": string})]
+    # Ambiguous result
+    with raises(RuntimeError, match=".*unambiguously.*"):
+        retrieved = adapter.retrieve_identified_record_for_identifiable(idents[0])
+
+    # Upgrade new property to be identifying
+    idents = [
+        Identifiable(record_type="RT1", properties={"str": string, "someint": number})
+        for number in numbers
+    ]
+    for rec, ident in zip(recs, idents):
+        print(f"Testing: someint={rec.get_property('someint').value}")
+        retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
+        # print(rec)
+        # print(retrieved)
+        print(db.apiutils.compare_entities(rec, retrieved))
+        assert db.apiutils.empty_diff(rec, retrieved)
+        print("---")
diff --git a/setup.cfg b/setup.cfg
index ffb00e54eb7bf8753a335bb96d9fdf23700aaadd..0f6afe5d2538228ee49608bb8b3751139cba4e00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -40,3 +40,8 @@ per-file-ignores = __init__.py:F401
 [options.entry_points]
 console_scripts =
   caosdb-crawler = caoscrawler.crawl:main
+
+[options.extras_require]
+h5-crawler =
+           h5py >= 3.8
+           numpy
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index b0d77bbf5d7ba09df3c0c47d656fa3d22d07b6d2..5a6e1e50345382ca6e5a1e6ef3a8fbeafb806b84 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -31,6 +31,10 @@ cfood:
           - JSONFile
           - CSVTableConverter
           - XLSXTableConverter
+          - H5File
+          - H5Dataset
+          - H5Group
+          - H5Ndarray
           description: Type of this converter node.
         match:
           description: typically a regexp which is matched to a structure element name
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index bbc34e3795f707b55d2147d05753fa6166a20dc5..477d114c8e29047278df90ef226371f20ad89e58 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -36,7 +36,7 @@ from inspect import signature
 from string import Template
 from typing import Any, List, Optional, Tuple, Union
 
-import caosdb as db
+import linkahead as db
 import pandas as pd
 import yaml
 import yaml_header_tools
@@ -1082,16 +1082,13 @@ class ListElementConverter(Converter):
                 "This converter can only process DictListElements.")
         children: list[StructureElement] = []
         for index, list_element in enumerate(element.value):
-            # TODO(fspreck): Refactor this and merge with DictXXXElements maybe?
-            if isinstance(list_element, str):
-                children.append(TextElement(str(index), list_element))
-            elif isinstance(list_element, dict):
-                children.append(DictElement(str(index), list_element))
-            elif isinstance(list_element, StructureElement):
-                children.append(list_element)
-            else:
-                raise NotImplementedError(
-                    f"Unkown type {type(list_element)} in list element {list_element}.")
+            children.append(
+                convert_basic_element(
+                    list_element,
+                    name=f"{index}",
+                    msg_prefix=f"The value at index {index} in the list as an unknown type."
+                )
+            )
         return children
 
     def typecheck(self, element: StructureElement):
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index a4649ff0856dc917105fc773333b5b81ded3b85a..a542b366d42ef5de1d2edabb60326adb466ac121 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -3,10 +3,11 @@
 #
 # This file is a part of the CaosDB Project.
 #
-# Copyright (C) 2021      Henrik tom Wörden <h.tomwoerden@indiscale.com>
-#               2021-2023 Research Group Biomedical Physics,
-# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
-# Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+# Copyright (C) 2021-2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2021-2023 Research Group Biomedical Physics, MPI-DS Göttingen
+# Copyright (C) 2021-2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+# Copyright (C) 2021-2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -26,7 +27,7 @@
 
 """
 Crawl a file structure using a yaml cfood definition and synchronize
-the acuired data with CaosDB.
+the acuired data with LinkAhead.
 """
 
 from __future__ import annotations
@@ -42,20 +43,20 @@ from argparse import RawTextHelpFormatter
 from copy import deepcopy
 from datetime import datetime
 from enum import Enum
-from typing import Any, Optional, Union
+from typing import Any, List, Optional, Union
 
-import caosdb as db
+import linkahead as db
 import yaml
 from caosadvancedtools.cache import UpdateCache
 from caosadvancedtools.crawler import Crawler as OldCrawler
 from caosadvancedtools.serverside.helper import send_mail
 from caosadvancedtools.utils import create_entity_link
-from caosdb.apiutils import (EntityMergeConflictError, compare_entities,
-                             merge_entities)
-from caosdb.cached import cache_clear, cached_get_entity_by
-from caosdb.exceptions import EmptyUniqueQueryError
+from linkahead.apiutils import (EntityMergeConflictError, compare_entities,
+                                merge_entities)
+from linkahead.cached import cache_clear, cached_get_entity_by
 from linkahead.common.datatype import get_list_datatype, is_reference
-from linkahead.utils.escape import escape_quoted_text
+from linkahead.exceptions import EmptyUniqueQueryError
+from linkahead.utils.escape import escape_squoted_text
 
 from .config import get_config_setting
 from .converters import Converter, ConverterValidationError
@@ -64,7 +65,6 @@ from .identifiable import Identifiable
 from .identifiable_adapters import (CaosDBIdentifiableAdapter,
                                     IdentifiableAdapter,
                                     LocalStorageIdentifiableAdapter)
-from .identified_cache import IdentifiedCache
 from .logging import configure_server_side_logging
 from .macros import defmacro_constructor, macro_constructor
 from .scanner import (create_converter_registry, initialize_converters,
@@ -185,6 +185,11 @@ def _treat_merge_error_of(newrecord, record):
     """
     for this_p in newrecord.properties:
         that_p = record.get_property(this_p.name)
+        if that_p is None:
+            logger.debug(f"Property {this_p.name} does not exist in the second entity. Note that "
+                         "this should not be the reason for the merge conflict.")
+            continue
+
         if (isinstance(this_p.value, db.Entity)
                 and isinstance(that_p.value, db.Entity)):
             if this_p.value.id is not None and that_p.value.id is not None:
@@ -204,14 +209,13 @@ def _treat_merge_error_of(newrecord, record):
                 # TODO can we also compare lists?
                 and not isinstance(this_p.value, list)
                     and not isinstance(that_p.value, list)):
-                logger.error("The Crawler is trying to merge two entities "
-                             "because they should be the same object (same"
-                             " identifiables), but they have  "
-                             "different values for the same Property."
-                             f"Problematic Property: {this_p.name}\n"
-                             f"Values: {this_p.value} and "
-                             f"{that_p.value}\n"
-                             f"{record}\n{newrecord}")
+                logger.error(
+                    "The Crawler is trying to merge two entities because they should be the same "
+                    "object (same identifiables), but they have different values for the same "
+                    "Property.\n"
+                    f"Problematic Property: {this_p.name}\n"
+                    f"Values: {this_p.value} and {that_p.value}\n"
+                    f"{record}\n{newrecord}")
                 raise RuntimeError("Cannot merge Entities")
 
 
@@ -221,6 +225,110 @@ class SecurityMode(Enum):
     UPDATE = 2
 
 
+class TreatedRecordLookUp():
+    """tracks Records and Identifiables for which it was checked whether they exist in the remote
+    server
+
+    For a given Record it can be checked, whether it exists in the remote sever if
+    - it has a (valid) ID
+    - it has a (valid) path (FILEs only)
+    - an identifiable can be created for the Record.
+
+    Records are added by calling the `add` function and they are then added to the internal
+    existing or missing list depending on whether the Record has a valid ID.
+    Additionally, the Record is added to three look up dicts. The keys of those are paths, IDs and
+    the representation of the identifiables.
+
+    The extreme case, that one could imagine, would be that the same Record occurs three times as
+    different Python objects: one that only has an ID, one with only a path and one without ID and
+    path but with identifying properties. During `split_into_inserts_and_updates` all three
+    must be identified with each other (and must be merged). Since we require, that treated
+    entities have a valid ID if they exist in the remote server, all three objects would be
+    identified with each other simply using the IDs.
+
+    In the case that the Record is not yet in the remote server, there cannot be a Python object
+    with an ID. Thus we might have one with a path and one with an identifiable. If that Record
+    does not yet exist, it is necessary that both Python objects have at least either the path or
+    the identifiable in common.
+    """
+
+    def __init__(self):
+        self._id_look_up: dict[int, db.Entity] = {}
+        self._path_look_up: dict[str, db.Entity] = {}
+        self._identifiable_look_up: dict[str, db.Entity] = {}
+        self.remote_missing_counter = -1
+        self._missing: dict[int, db.Entity] = {}
+        self._existing: dict[int, db.Entity] = {}
+
+    def add(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
+        """
+        Add a Record that was treated, such that it is contained in the internal look up dicts
+
+        This Record MUST have an ID if it was found in the remote server.
+        """
+        if record.id is None:
+            if record.path is None and identifiable is None:
+                raise RuntimeError("Record must have ID or path or an identifiable must be given."
+                                   f"Record is\n{record}")
+            record.id = self.remote_missing_counter
+            self.remote_missing_counter -= 1
+            self._add_any(record, self._missing, identifiable)
+        else:
+            self._add_any(record, self._existing, identifiable)
+
+    def get_any(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
+        """
+        Check whether this Record was already added. Identity is based on ID, path or Identifiable
+        represenation
+        """
+        if record.id is not None and record.id in self._id_look_up:
+            return self._id_look_up[record.id]
+        if record.path is not None and record.path in self._path_look_up:
+            return self._path_look_up[record.path]
+        if (identifiable is not None and identifiable.get_representation() in
+                self._identifiable_look_up):
+            return self._identifiable_look_up[identifiable.get_representation()]
+
+    def get_existing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
+        """ Check whether this Record exists on the remote server
+
+        Returns: The stored Record
+        """
+        rec = self.get_any(record, identifiable)
+        if id(rec) in self._existing:
+            return rec
+        else:
+            return None
+
+    def get_missing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
+        """ Check whether this Record is missing on the remote server
+
+        Returns: The stored Record
+        """
+        rec = self.get_any(record, identifiable)
+        if id(rec) in self._missing:
+            return rec
+        else:
+            return None
+
+    def get_missing_list(self):
+        """ Return all Records that are missing in the remote server """
+        return list(self._missing.values())
+
+    def get_existing_list(self):
+        """ Return all Records that exist in the remote server """
+        return list(self._existing.values())
+
+    def _add_any(self, record: db.Entity, lookup, identifiable: Optional[Identifiable] = None):
+        if record.id is not None:
+            self._id_look_up[record.id] = record
+        if record.path is not None:
+            self._path_look_up[record.path] = record
+        if identifiable is not None:
+            self._identifiable_look_up[identifiable.get_representation()] = record
+        lookup[id(record)] = record
+
+
 class Crawler(object):
     """
     Crawler class that encapsulates crawling functions.
@@ -257,8 +365,8 @@ class Crawler(object):
         # The following caches store records, where we checked whether they exist on the remote
         # server. Since, it is important to know whether they exist or not, we store them into two
         # different caches.
-        self.remote_existing_cache = IdentifiedCache()
-        self.remote_missing_cache = IdentifiedCache()
+        self.treated_records_lookup = TreatedRecordLookUp()
+
         # TODO does it make sense to have this as member variable?
         self.securityMode = securityMode
         # TODO does it make sense to have this as member variable(run_id)?
@@ -405,7 +513,7 @@ class Crawler(object):
                         Crawler.create_flat_list([p.value], flat)
         return flat
 
-    def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: list):
+    def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: dict):
         """
         returns False if any value in the properties attribute is a db.Entity object that
         is contained in the `remote_missing_cache`. If ident has such an object in
@@ -418,16 +526,21 @@ class Crawler(object):
             # Entity instead of ID and not cached locally
             if (isinstance(pvalue, list)):
                 for el in pvalue:
-                    if (isinstance(el, db.Entity) and self.get_from_remote_missing_cache(
-                            self.identifiableAdapter.get_identifiable(el, referencing_entities)) is not None):
+                    elident = self.identifiableAdapter.get_identifiable(
+                        el, referencing_entities[id(el)])
+                    if (isinstance(el, db.Entity)
+                            and self.treated_records_lookup.get_missing(el, elident) is not None):
                         return True
-            if (isinstance(pvalue, db.Entity) and self.get_from_remote_missing_cache(
-                    self.identifiableAdapter.get_identifiable(pvalue, referencing_entities)) is not None):
+            if (isinstance(pvalue, db.Entity) and self.treated_records_lookup.get_missing(
+                pvalue,
+                    self.identifiableAdapter.get_identifiable(pvalue,
+                                                              referencing_entities[id(pvalue)])
+            ) is not None):
                 # might be checked when reference is resolved
                 return True
         return False
 
-    def replace_references_with_cached(self, record: db.Record, referencing_entities: list):
+    def replace_references_with_cached(self, record: db.Record, referencing_entities: dict):
         """
         Replace all references with the versions stored in the cache.
 
@@ -438,8 +551,10 @@ class Crawler(object):
                 lst = []
                 for el in p.value:
                     if (isinstance(el, db.Entity) and el.id is None):
-                        cached = self.get_from_any_cache(
-                            self.identifiableAdapter.get_identifiable(el, referencing_entities))
+                        cached = self.treated_records_lookup.get_any(
+                            el,
+                            self.identifiableAdapter.get_identifiable(
+                                el, referencing_entities[id(el)]))
                         if cached is None:
                             lst.append(el)
                             continue
@@ -447,12 +562,12 @@ class Crawler(object):
                             if isinstance(p.value, db.File):
                                 if p.value.path != cached.path:
                                     raise RuntimeError(
-                                        "The cached and the refernced entity are not identical.\n"
+                                        "The cached and the referenced entity are not identical.\n"
                                         f"Cached:\n{cached}\nReferenced:\n{el}"
                                     )
                             else:
                                 raise RuntimeError(
-                                    "The cached and the refernced entity are not identical.\n"
+                                    "The cached and the referenced entity are not identical.\n"
                                     f"Cached:\n{cached}\nReferenced:\n{el}"
                                 )
                         lst.append(cached)
@@ -460,82 +575,25 @@ class Crawler(object):
                         lst.append(el)
                 p.value = lst
             if (isinstance(p.value, db.Entity) and p.value.id is None):
-                cached = self.get_from_any_cache(
-                    self.identifiableAdapter.get_identifiable(p.value, referencing_entities))
+                cached = self.treated_records_lookup.get_any(
+                    p.value, self.identifiableAdapter.get_identifiable(
+                        p.value, referencing_entities[id(p.value)]))
                 if cached is None:
                     continue
                 if not check_identical(cached, p.value, True):
                     if isinstance(p.value, db.File):
                         if p.value.path != cached.path:
                             raise RuntimeError(
-                                "The cached and the refernced entity are not identical.\n"
+                                "The cached and the referenced entity are not identical.\n"
                                 f"Cached:\n{cached}\nReferenced:\n{p.value}"
                             )
                     else:
                         raise RuntimeError(
-                            "The cached and the refernced entity are not identical.\n"
+                            "The cached and the referenced entity are not identical.\n"
                             f"Cached:\n{cached}\nReferenced:\n{p.value}"
                         )
                 p.value = cached
 
-    def get_from_remote_missing_cache(self, identifiable: Identifiable):
-        """
-        returns the identified record if an identifiable with the same values already exists locally
-        (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
-        that the same identifiable exists twice)
-        """
-        if identifiable is None:
-            raise ValueError("Identifiable has to be given as argument")
-
-        if identifiable in self.remote_missing_cache:
-            return self.remote_missing_cache[identifiable]
-        else:
-            return None
-
-    def get_from_any_cache(self, identifiable: Identifiable):
-        """
-        returns the identifiable if an identifiable with the same values already exists locally
-        (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
-        that the same identifiable exists twice)
-        """
-        if identifiable is None:
-            raise ValueError("Identifiable has to be given as argument")
-
-        if identifiable in self.remote_existing_cache:
-            return self.remote_existing_cache[identifiable]
-        elif identifiable in self.remote_missing_cache:
-            return self.remote_missing_cache[identifiable]
-        else:
-            return None
-
-    def add_to_remote_missing_cache(self, record: db.Record, identifiable: Identifiable):
-        """
-        stores the given Record in the remote_missing_cache.
-
-        If identifiable is None, the Record is NOT stored.
-        """
-        self.add_to_cache(record=record, cache=self.remote_missing_cache,
-                          identifiable=identifiable)
-
-    def add_to_remote_existing_cache(self, record: db.Record, identifiable: Identifiable):
-        """
-        stores the given Record in the remote_existing_cache.
-
-        If identifiable is None, the Record is NOT stored.
-        """
-        self.add_to_cache(record=record, cache=self.remote_existing_cache,
-                          identifiable=identifiable)
-
-    def add_to_cache(self, record: db.Record, cache: IdentifiedCache,
-                     identifiable: Identifiable) -> None:
-        """
-        stores the given Record in the given cache.
-
-        If identifiable is None, the Record is NOT stored.
-        """
-        if identifiable is not None:
-            cache.add(identifiable=identifiable, record=record)
-
     @staticmethod
     def bend_references_to_new_object(old, new, entities):
         """ Bend references to the other object
@@ -552,23 +610,73 @@ class Crawler(object):
                     if p.value is old:
                         p.value = new
 
+    def _merge_identified(self, newrecord, record, try_to_merge_later, all_records):
+        """ tries to merge record into newrecord
+
+        If it fails, record is added to the try_to_merge_later list.
+        In any case, references are bent to the newrecord object.
+
+        """
+        try:
+            merge_entities(
+                newrecord, record, merge_references_with_empty_diffs=False,
+                merge_id_with_resolved_entity=True)
+        except EntityMergeConflictError:
+            _treat_merge_error_of(newrecord, record)
+            # We cannot merge but it is none of the clear case where merge is
+            # impossible. Thus we try later
+            try_to_merge_later.append(record)
+            if newrecord.id is not None:
+                record.id = newrecord.id
+        except NotImplementedError:
+            print(newrecord)
+            print(record)
+            raise
+        Crawler.bend_references_to_new_object(
+            old=record, new=newrecord,
+            entities=all_records
+        )
+
+    def _identity_relies_on_unchecked_entities(self, record: db.Record, referencing_entities):
+        """
+        If a record for which it could not yet be verified whether it exists in LA or not is part
+        of the identifying properties, this returns True, otherwise False
+        """
+
+        registered_identifiable = self.identifiableAdapter.get_registered_identifiable(record)
+        if registered_identifiable is None:
+            return False
+        refs = self.identifiableAdapter.get_identifying_referencing_entities(referencing_entities,
+                                                                             registered_identifiable)
+        if any(el is None for el in refs):
+            return True
+
+        refs = self.identifiableAdapter.get_identifying_referenced_entities(
+            record, registered_identifiable)
+        if any([self.treated_records_lookup.get_any(el) is None for el in refs]):
+            return True
+
+        return False
+
     @staticmethod
     def create_reference_mapping(flat: list[db.Entity]):
         """
         Create a dictionary of dictionaries of the form:
-        dict[int, dict[str, list[db.Entity]]]
+        dict[int, dict[str, list[Union[int,None]]]]
 
         - The integer index is the Python id of the value object.
         - The string is the name of the first parent of the referencing object.
 
         Each value objects is taken from the values of all properties from the list flat.
 
-        So the returned mapping maps ids of entities to the objects which are referring
+        So the returned mapping maps ids of entities to the ids of objects which are referring
         to them.
         """
         # TODO we need to treat children of RecordTypes somehow.
-        references: dict[int, dict[str, list[db.Entity]]] = {}
+        references: dict[int, dict[str, list[Union[int, None]]]] = {}
         for ent in flat:
+            if id(ent) not in references:
+                references[id(ent)] = {}
             for p in ent.properties:
                 val = p.value
                 if not isinstance(val, list):
@@ -579,127 +687,119 @@ class Crawler(object):
                             references[id(v)] = {}
                         if ent.parents[0].name not in references[id(v)]:
                             references[id(v)][ent.parents[0].name] = []
-                        references[id(v)][ent.parents[0].name].append(ent)
+                        references[id(v)][ent.parents[0].name].append(ent.id)
 
         return references
 
     def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
-        to_be_inserted: list[db.Entity] = []
-        to_be_updated: list[db.Entity] = []
         flat = Crawler.create_flat_list(ent_list)
+        all_records = list(flat)
 
         # TODO: can the following be removed at some point
         for ent in flat:
             if ent.role == "Record" and len(ent.parents) == 0:
                 raise RuntimeError(f"Records must have a parent.\n{ent}")
 
-        resolved_references = True
+        # Check whether Records can be identified without identifiable
+        for i in reversed(range(len(flat))):
+            record = flat[i]
+            # 1. Can it be identified via an ID?
+            if record.id is not None:
+                treated_record = self.treated_records_lookup.get_existing(record)
+                if treated_record is not None:
+                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
+                    all_records.remove(record)
+                    referencing_entities = self.create_reference_mapping(all_records)
+                else:
+                    self.treated_records_lookup.add(record, None)
+                assert record.id
+                del flat[i]
+            # 2. Can it be identified via a path?
+            elif record.path is not None:
+                try:
+                    existing = cached_get_entity_by(path=record.path)
+                except EmptyUniqueQueryError:
+                    existing = None
+                if existing is not None:
+                    record.id = existing.id
+                    # TODO check the following copying of _size and _checksum
+                    # Copy over checksum and size too if it is a file
+                    record._size = existing._size
+                    record._checksum = existing._checksum
+                treated_record = self.treated_records_lookup.get_any(record)
+                if treated_record is not None:
+                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
+                    all_records.remove(record)
+                    referencing_entities = self.create_reference_mapping(all_records)
+                else:
+                    # TODO add identifiable if possible
+                    self.treated_records_lookup.add(record, None)
+                assert record.id
+                del flat[i]
+
+        entity_was_treated = True
         # flat contains Entities which could not yet be checked against the remote server
         try_to_merge_later = []
-        while resolved_references and len(flat) > 0:
-            resolved_references = False
-            referencing_entities = self.create_reference_mapping(
-                flat + to_be_updated + try_to_merge_later + to_be_inserted)
+        while entity_was_treated and len(flat) > 0:
+            entity_was_treated = False
+            referencing_entities = self.create_reference_mapping(all_records)
 
             # For each element we try to find out whether we can find it in the server or whether
             # it does not yet exist. Since a Record may reference other unkown Records it might not
             # be possible to answer this right away.
             # The following checks are done on each Record:
-            # 1. Can it be identified via an ID?
-            # 2. Can it be identified via a path?
-            # 3. Is it in the cache of already checked Records?
-            # 4. Can it be checked on the remote server?
-            # 5. Does it have to be new since a needed reference is missing?
+            # 1. Is it in the cache of already checked Records?
+            # 2. Can it be checked on the remote server?
+            # 3. Does it have to be new since a needed reference is missing?
             for i in reversed(range(len(flat))):
                 record = flat[i]
+
+                if self._identity_relies_on_unchecked_entities(record,
+                                                               referencing_entities[id(record)]):
+                    continue
+
                 identifiable = self.identifiableAdapter.get_identifiable(
                     record,
-                    referencing_entities=referencing_entities)
-
-                # TODO remove if the exception is never raised
-                if record in to_be_inserted:
-                    raise RuntimeError("This should not be reached since treated elements"
-                                       "are removed from the list")
-                # 1. Can it be identified via an ID?
-                elif record.id is not None:
-                    to_be_updated.append(record)
-                    self.add_to_remote_existing_cache(record, identifiable)
-                    del flat[i]
-                # 2. Can it be identified via a path?
-                elif record.path is not None:
-                    try:
-                        existing = cached_get_entity_by(path=record.path)
-                    except EmptyUniqueQueryError:
-                        existing = None
-                    if existing is None:
-                        to_be_inserted.append(record)
-                        self.add_to_remote_missing_cache(record, identifiable)
-                        del flat[i]
-                    else:
-                        record.id = existing.id
-                        # TODO check the following copying of _size and _checksum
-                        # Copy over checksum and size too if it is a file
-                        record._size = existing._size
-                        record._checksum = existing._checksum
-                        to_be_updated.append(record)
-                        self.add_to_remote_existing_cache(record, identifiable)
-                        del flat[i]
-                # 3. Is it in the cache of already checked Records?
-                elif self.get_from_any_cache(identifiable) is not None:
-                    newrecord = self.get_from_any_cache(identifiable)
-                    # Since the identifiables are the same, newrecord and record actually describe
-                    # the same obejct.
-                    # We merge the two in order to prevent loss of information
-                    try:
-                        merge_entities(
-                            newrecord, record, merge_references_with_empty_diffs=False, merge_id_with_resolved_entity=True)
-                    except EntityMergeConflictError:
-                        _treat_merge_error_of(newrecord, record)
-                        # We cannot merge but it is none of the clear case where merge is
-                        # impossible. Thus we try later
-                        try_to_merge_later.append(record)
-                        if newrecord.id is not None:
-                            record.id = newrecord.id
-                    except NotImplementedError:
-                        print(newrecord)
-                        print(record)
-                        raise
-                    Crawler.bend_references_to_new_object(
-                        old=record, new=newrecord,
-                        entities=flat + to_be_updated + to_be_inserted + try_to_merge_later
-                    )
-                    referencing_entities = self.create_reference_mapping(
-                        flat + to_be_updated + try_to_merge_later + to_be_inserted)
+                    referencing_entities=referencing_entities[id(record)])
+
+                # 1. Is it in the cache of already checked Records?
+                if self.treated_records_lookup.get_any(record, identifiable) is not None:
+                    treated_record = self.treated_records_lookup.get_any(record, identifiable)
+                    # Since the identifiables are the same, treated_record and record actually
+                    # describe the same object.
+                    # We merge record into treated_record in order to prevent loss of information
+                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
+                    all_records.remove(record)
+                    referencing_entities = self.create_reference_mapping(all_records)
 
                     del flat[i]
-                    resolved_references = True
+                    entity_was_treated = True
 
-                # 4. Can it be checked on the remote server?
+                # 2. Can it be checked on the remote server?
                 elif not self._has_reference_value_without_id(identifiable):
                     identified_record = (
                         self.identifiableAdapter.retrieve_identified_record_for_identifiable(
                             identifiable))
                     if identified_record is None:
                         # identifiable does not exist remotely -> record needs to be inserted
-                        to_be_inserted.append(record)
-                        self.add_to_remote_missing_cache(record, identifiable)
-                        del flat[i]
+                        self.treated_records_lookup.add(record, identifiable)
                     else:
                         # side effect
                         record.id = identified_record.id
-                        to_be_updated.append(record)
-                        self.add_to_remote_existing_cache(record, identifiable)
-                        del flat[i]
-                    resolved_references = True
+                        record.path = identified_record.path
+                        self.treated_records_lookup.add(record, identifiable)
+                    assert record.id
+                    del flat[i]
+                    entity_was_treated = True
 
-                # 5. Does it have to be new since a needed reference is missing?
+                # 3. Does it have to be new since a needed reference is missing?
                 # (Is it impossible to check this record because an identifiable references a
                 # missing record?)
                 elif self._has_missing_object_in_references(identifiable, referencing_entities):
-                    to_be_inserted.append(record)
-                    self.add_to_remote_missing_cache(record, identifiable)
+                    self.treated_records_lookup.add(record, identifiable)
+                    assert record.id
                     del flat[i]
-                    resolved_references = True
+                    entity_was_treated = True
 
             for record in flat:
                 self.replace_references_with_cached(record, referencing_entities)
@@ -709,23 +809,35 @@ class Crawler(object):
         for record in try_to_merge_later:
             identifiable = self.identifiableAdapter.get_identifiable(
                 record,
-                referencing_entities=referencing_entities)
-            newrecord = self.get_from_any_cache(identifiable)
+                referencing_entities=referencing_entities[id(record)])
+            newrecord = self.treated_records_lookup.get_any(record, identifiable)
             merge_entities(newrecord, record, merge_id_with_resolved_entity=True)
         if len(flat) > 0:
             circle = self.detect_circular_dependency(flat)
             if circle is None:
                 logger.error("Failed, but found NO circular dependency. The data is as follows:"
-                             + str(self.compact_entity_list_representation(flat)))
+                             + str(self.compact_entity_list_representation(flat,
+                                                                           referencing_entities)))
             else:
                 logger.error("Found circular dependency (Note that this might include references "
                              "that are not identifying properties): "
-                             + self.compact_entity_list_representation(circle))
+                             + self.compact_entity_list_representation(circle,
+                                                                       referencing_entities))
+
             raise RuntimeError(
                 f"Could not finish split_into_inserts_and_updates. Circular dependency: "
                 f"{circle is not None}")
 
-        return to_be_inserted, to_be_updated
+        # remove negative IDs
+        missing = self.treated_records_lookup.get_missing_list()
+        for el in missing:
+            if el.id is None:
+                raise RuntimeError("This should not happen")  # TODO remove
+            if el.id >= 0:
+                raise RuntimeError("This should not happen")  # TODO remove
+            el.id = None
+
+        return (missing, self.treated_records_lookup.get_existing_list())
 
     def replace_entities_with_ids(self, rec: db.Record):
         for el in rec.properties:
@@ -738,23 +850,39 @@ class Crawler(object):
                         if val.id is not None:
                             el.value[index] = val.id
 
-    @staticmethod
-    def compact_entity_list_representation(circle):
+    @ staticmethod
+    def compact_entity_list_representation(entities, referencing_entities: List) -> str:
         """ a more readable representation than the standard xml representation
 
         TODO this can be removed once the yaml format representation is in pylib
         """
         text = "\n--------\n"
-        for el in circle:
-            if el.name is not None:
-                text += f"{el.name}\n"
-            text += f"{[el.name for el in el.parents]}\n"
-            props = {p.name: p.value for p in el.properties}
-            text += f"{props}\n"
+
+        grouped = {"": []}
+        for ent in entities:
+            if not ent.parents:
+                grouped[""].append(ent)
+            for parent in ent.parents:
+                if parent.name not in grouped:
+                    grouped[parent.name] = []
+                grouped[parent.name].append(ent)
+        if not grouped[""]:
+            del grouped[""]
+        for parent, group in grouped.items():
+            text += f"\n> Parent: {parent}\n"
+            for ent in group:
+                if ent.name is not None:
+                    text += f"\n>> Name: {ent.name}\n"
+                else:
+                    text += "\n>> name: # No name"
+                text += f"{[ent.name for ent in ent.parents]}\n"
+                props = {p.name: p.value for p in ent.properties}
+                text += f"{props}\n"
+                text += f"is_referenced_by:\n{referencing_entities[id(ent)]}\n"
 
         return text + "--------\n"
 
-    @staticmethod
+    @ staticmethod
     def detect_circular_dependency(flat: list[db.Entity]):
         """
         Detects whether there are circular references in the given entity list and returns a list
@@ -787,7 +915,7 @@ class Crawler(object):
                 return None
         return circle
 
-    @staticmethod
+    @ staticmethod
     def _merge_properties_from_remote(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -829,7 +957,7 @@ class Crawler(object):
 
         return to_be_updated
 
-    @staticmethod
+    @ staticmethod
     def remove_unnecessary_updates(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -855,7 +983,7 @@ class Crawler(object):
 
         return actual_updates
 
-    @staticmethod
+    @ staticmethod
     def execute_parent_updates_in_list(to_be_updated, securityMode, run_id, unique_names):
         """
         Execute the updates of changed parents.
@@ -898,13 +1026,13 @@ class Crawler(object):
                             "mode. This might lead to a failure of inserts that follow.")
                 logger.info(parent_updates)
 
-    @staticmethod
+    @ staticmethod
     def _get_property_id_for_datatype(rtname: str, name: str):
         return cached_get_entity_by(
-            query=f"FIND Entity '{escape_quoted_text(rtname)}' "
-                  f"with name='{escape_quoted_text(name)}'").id
+            query=f"FIND Entity '{escape_squoted_text(rtname)}' "
+                  f"with name='{escape_squoted_text(name)}'").id
 
-    @staticmethod
+    @ staticmethod
     def replace_name_with_referenced_entity_id(prop: db.Property):
         """changes the given property in place if it is a reference property that has a name as
         value
@@ -937,7 +1065,7 @@ class Crawler(object):
                 if isinstance(el, str):
                     try:
                         # the get_entity function will raise an error if not unique
-                        propval.append(Crawler._get_property_id_for_datatype(rtname=prop.datatype,
+                        propval.append(Crawler._get_property_id_for_datatype(rtname=dt,
                                                                              name=el))
                     except (db.EmptyUniqueQueryError, db.QueryNotUniqueError):
                         logger.error(
@@ -949,7 +1077,7 @@ class Crawler(object):
                     propval.append(el)
             prop.value = propval
 
-    @staticmethod
+    @ staticmethod
     def execute_inserts_in_list(to_be_inserted, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -969,7 +1097,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_inserted, run_id, insert=True)
 
-    @staticmethod
+    @ staticmethod
     def set_ids_and_datatype_of_parents_and_properties(rec_list):
         for record in rec_list:
             for parent in record.parents:
@@ -981,7 +1109,7 @@ class Crawler(object):
                     prop.id = entity.id
                     _resolve_datatype(prop, entity)
 
-    @staticmethod
+    @ staticmethod
     def execute_updates_in_list(to_be_updated, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -995,7 +1123,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_updated, run_id)
 
-    @staticmethod
+    @ staticmethod
     def check_whether_parent_exists(records: list[db.Entity], parents: list[str]):
         """ returns a list of all records in `records` that have a parent that is in `parents`"""
         problems = []
@@ -1047,12 +1175,11 @@ class Crawler(object):
         """
         if crawled_data is None:
             warnings.warn(DeprecationWarning(
-                "Calling synchronize without the data to be synchronized is depricated. Please "
+                "Calling synchronize without the data to be synchronized is deprecated. Please "
                 "use for example the Scanner to create this data."))
             crawled_data = self.crawled_data
 
         to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
-        referencing_entities = self.create_reference_mapping(to_be_updated + to_be_inserted)
 
         for el in to_be_updated:
             # all entity objects are replaced by their IDs except for the not yet inserted ones
@@ -1116,7 +1243,7 @@ class Crawler(object):
 
         return (to_be_inserted, to_be_updated)
 
-    @staticmethod
+    @ staticmethod
     def create_entity_summary(entities: list[db.Entity]):
         """ Creates a summary string reprensentation of a list of entities."""
         parents = {}
@@ -1135,7 +1262,7 @@ class Crawler(object):
             output = output[:-2] + "\n"
         return output
 
-    @staticmethod
+    @ staticmethod
     def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
         if get_config_setting("send_crawler_notifications"):
@@ -1156,7 +1283,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     + " by invoking the crawler"
                     " with the run id: {rid}\n".format(rid=run_id))
 
-    @staticmethod
+    @ staticmethod
     def debug_build_usage_tree(converter: Converter):
         res: dict[str, dict[str, Any]] = {
             converter.name: {
diff --git a/src/caoscrawler/debug_tree.py b/src/caoscrawler/debug_tree.py
index 9983981c69e3df7c58ddfda4b6977944eac54999..0d57040f5c20aca236a3c11531e8b7c45bad89ab 100644
--- a/src/caoscrawler/debug_tree.py
+++ b/src/caoscrawler/debug_tree.py
@@ -45,13 +45,13 @@ from importlib_resources import files
 from jsonschema import validate
 from typing import Any, Optional, Type, Union
 
-import caosdb as db
+import linkahead as db
 
 from caosadvancedtools.cache import UpdateCache, Cache
 from caosadvancedtools.crawler import Crawler as OldCrawler
-from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
-                             merge_entities)
-from caosdb.common.datatype import is_reference
+from linkahead.apiutils import (compare_entities, EntityMergeConflictError,
+                                merge_entities)
+from linkahead.common.datatype import is_reference
 
 from .converters import Converter, DirectoryConverter, ConverterValidationError
 
diff --git a/src/caoscrawler/hdf5_converter.py b/src/caoscrawler/hdf5_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b1ff5775fb74919c989507c449636fd822db7f0
--- /dev/null
+++ b/src/caoscrawler/hdf5_converter.py
@@ -0,0 +1,336 @@
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+try:
+    import h5py
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "Couldn't find module h5py. Did you install the crawler package with "
+        "its optional `h5-crawler` dependency?"
+    )
+
+import numpy as np
+
+from typing import Union
+
+import linkahead as db
+
+from .converters import (convert_basic_element, Converter, DictElementConverter,
+                         match_name_and_value, SimpleFileConverter)
+from .stores import GeneralStore, RecordStore
+from .structure_elements import DictElement, File, FloatElement, IntegerElement, StructureElement
+
+
+def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
+    """Convert hdf5 attributes to a list of either basic scalar structure elements or ndarrays.
+
+    Parameters
+    ----------
+    elt : Union[h5py.File, h5py.Group, h5py.Dataset]
+        The hdf5 element the attributes of which will be converted to structure
+        elements.
+
+    Returns
+    -------
+    converted : list[StructureElement]
+        A list of the attributes converted to StructureElements (either basic
+        scalar elements or ndarray).
+    """
+
+    converted = []
+    for name, value in elt.attrs.items():
+        converted.append(convert_basic_element_with_nd_array(
+            value, name, f"The value of attribute {name} has an unknown type: {type(value)}."))
+
+    return converted
+
+
+def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
+    """Convert a given HDF5 element to the corresponding StructureElement.
+
+    Parameters
+    ----------
+    elt : Union[h5py.Group, h5py.Dataset]
+        The hdf5 element to be converted.
+    name : str
+        The name of the StructureElement that the hdf5 element is converted to.
+
+    Raises
+    ------
+    ValueError
+        In case of anything that is not convertible to a HDF5 structure element.
+
+    Returns
+    -------
+    StructureElement
+        The converted StructureElement.
+    """
+
+    if isinstance(elt, h5py.Group):
+
+        return H5GroupElement(name, elt)
+
+    if isinstance(elt, h5py.Dataset):
+
+        return H5DatasetElement(name, elt)
+
+    raise ValueError("The given element must be either a HDF5 Group or Dataset object.")
+
+
+def convert_basic_element_with_nd_array(value, name: str = None,
+                                        internal_path: str = None, msg_prefix: str = ""):
+    """Convert a given object either to an ndarray structure element or to a
+    basic scalar structure element.
+
+    This function extends :func:`~caoscrawler.converters.convert_basic_element`
+    by a special treatment for certain numpy objects, most importantly
+    ndarrays. They are converted to a scalar in case of a size-1 array, to a
+    list in case of a 1-d array, and to a ``H5NdarrayElement`` in all other
+    cases. In addition, numpy integers and floats are also converted to
+    IntegerElements and FloatElements, respectively.
+
+    Parameters
+    ----------
+    value
+        The object to be converted.
+    name : str, optional
+        The name of the structure element ``value`` is being converted
+        to. Default is None.
+    internal_path : str, optional
+        The internal path of ``value`` within the HDF5 file. Default is None.
+    msg_prefix : str, optional
+        The prefix of the error message that will be raised. Default is ``""``.
+
+    Returns
+    -------
+    StructureElement
+        The StructureElement ``value`` was converted to.
+
+    """
+
+    if isinstance(value, np.ndarray):
+
+        if value.size == 1:
+            # this is a scalar stacked in a numpy array. We don't know its
+            # actual shape, so we reshape first, then use the actual value
+            # inside.
+            value = value.reshape((1,))[0]
+
+        elif np.squeeze(value).ndim == 1:
+            # If the array is one-dimensional we can save it as a list
+            value = list(np.squeeze(value))
+
+        else:
+            # real multi-dimensional array
+            return H5NdarrayElement(name, value, internal_path)
+
+    elif isinstance(value, np.int32) or isinstance(value, np.int64):
+
+        return IntegerElement(name, value)
+
+    elif isinstance(value, np.float64):
+
+        return FloatElement(name, value)
+
+    return convert_basic_element(value, name, msg_prefix)
+
+
+class H5GroupElement(DictElement):
+    """StructureElement specific for HDF5 groups"""
+
+    def __init__(self, name: str, value: h5py.Group):
+        super().__init__(name, value)
+
+
+class H5DatasetElement(DictElement):
+    """StructureElement specific for HDF5 datasets."""
+
+    def __init__(self, name: str, value: h5py.Dataset):
+        super().__init__(name, value)
+
+
+class H5NdarrayElement(DictElement):
+    """StructureElement specific for NDArrays within HDF5 files.
+
+    Also store the internal path of the array within the HDF5 file in its
+    ``internal_path`` attribute.
+
+    """
+
+    def __init__(self, name: str, value, internal_path: str):
+        super().__init__(name, value)
+        self.internal_path = internal_path
+
+
+class H5FileConverter(SimpleFileConverter):
+    """Converter for HDF5 files that creates children for the contained
+    attributes, groups, and datasets.
+
+    """
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from root-level file attributes and contained hdf5
+        elements.
+
+        """
+
+        if not isinstance(element, File):
+
+            raise ValueError("create_children should have been called with a File object.")
+
+        ff = h5py.File(element.path, 'r')
+
+        children = []
+
+        for name, value in ff.items():
+
+            children.append(convert_h5_element(value, name))
+
+        children.extend(convert_attributes(ff))
+
+        return children
+
+
+class H5GroupConverter(DictElementConverter):
+    """Converter for HDF5 groups that creates children from the group-level
+    attributes and the contained subgroups and datasets.
+
+    """
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5GroupElement)
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from group attributes and hdf5 elements contained in
+        this group.
+
+        """
+
+        if not isinstance(element.value, h5py.Group):
+
+            raise ValueError("create_children should have been called with a HDF5 Group object.")
+
+        children = []
+
+        for name, value in element.value.items():
+
+            children.append(convert_h5_element(value, name))
+
+        children.append(convert_attributes(element.value))
+
+        return children
+
+
+class H5DatasetConverter(DictElementConverter):
+    """Converter for HDF5 datasets that creates children from the dataset
+    attributes and the contained array data.
+
+    """
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5DatasetElement)
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from the dataset attributes and append the array data
+        contained in this dataset.
+
+        """
+
+        if not isinstance(element.value, h5py.Dataset):
+
+            raise ValueError("create_children should have been called with a HDF5 Dataset object")
+
+        children = convert_attributes(element.value)
+
+        children.append(
+            H5NdarrayElement(
+                name=self.name+"_ndarray",
+                value=element.value,
+                internal_path=element.value.name
+            )
+        )
+        return children
+
+
+class H5NdarrayConverter(Converter):
+    """Converter for ndarrays contained in HDF5 files. Creates the wrapper
+    record for this ndarray.
+
+    """
+
+    def __init__(self, definition: dict, name: str, converter_registry: dict):
+
+        # Check that a non-empty name for the record that will be created for
+        # the ndarray Record (within the cfood) is given
+        if not ("recordname" in definition and definition["recordname"]):
+
+            raise RuntimeError(f"Converter {name} lacks the `recordname` definition.")
+
+        super().__init__(definition, name, converter_registry)
+
+    def create_children(self, values: GeneralStore, element: StructureElement):
+        """The ndarray doesn't have any further children."""
+
+        return []
+
+    def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement):
+        """Create a wrapper record with name ``recordname``, type
+        ``array_recordtype_name`` (default ``H5Ndarray``) and the internal path
+        stored in a property with name ``internal_path_property_name`` (default
+        ``internal_hdf5_path``).
+
+        """
+
+        rname = self.definition["recordname"]
+        if "array_recordtype_name" in self.definition:
+            rtname = self.definition["array_recordtype_name"]
+        else:
+            rtname = "H5Ndarray"
+
+        if "internal_path_property_name" in self.definition:
+            propname = self.definition["internal_path_property_name"]
+        else:
+            propname = "internal_hdf5_path"
+
+        rec = db.Record().add_parent(rtname)
+        records[rname] = rec
+        values[rname] = rec
+
+        rec.add_property(name=propname, value=element.internal_path)
+        keys_modified = [(rname, propname)]
+
+        keys_modified.extend(super().create_records(values, records, element))
+
+        return keys_modified
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5NdarrayElement)
+
+    @Converter.debug_matching("name")
+    def match(self, element: StructureElement):
+
+        if not isinstance(element, H5NdarrayElement):
+
+            raise RuntimeError("This converter can only be called with H5NdarrayElements.")
+
+        return match_name_and_value(self.definition, element.name, element.value)
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index 75af5be8f06a6ab95a4b7f2b92eda8cf3e321a1b..cefdf4a0f42b1f610e0712fdefebc2dc3b78d69f 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -20,7 +20,7 @@
 #
 
 from __future__ import annotations
-import caosdb as db
+import linkahead as db
 from datetime import datetime
 import json
 from hashlib import sha256
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index ceaa3bfe1f1f040fc0099d17e14a0c6797804ac4..9be539fe0842e3ce24b68060fa2288cdc4c531b2 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -26,13 +26,16 @@
 from __future__ import annotations
 
 import logging
+import warnings
 from abc import ABCMeta, abstractmethod
 from datetime import datetime
+from functools import lru_cache
 from typing import Any
 
-import caosdb as db
+import linkahead as db
 import yaml
-from caosdb.cached import cached_get_entity_by
+from linkahead.cached import cached_get_entity_by, cached_query
+from linkahead.utils.escape import escape_squoted_text
 
 from .identifiable import Identifiable
 from .utils import has_parent
@@ -43,21 +46,24 @@ logger = logging.getLogger(__name__)
 def get_children_of_rt(rtname):
     """Supply the name of a recordtype. This name and the name of all children RTs are returned in
     a list"""
-    return [p.name for p in db.execute_query(f"FIND RECORDTYPE {rtname}")]
+    escaped = escape_squoted_text(rtname)
+    return [p.name for p in cached_query(f"FIND RECORDTYPE '{escaped}'")]
 
 
-def convert_value(value: Any):
-    """ Returns a string representation of the value that is suitable
-    to be used in the query
-    looking for the identified record.
+def convert_value(value: Any) -> str:
+    """ Return a string representation of the value suitable for the search query.
+
+    This is for search queries looking for the identified record.
 
     Parameters
     ----------
-    value : Any type, the value that shall be returned and potentially converted.
+    value: Any
+      The value to be converted.
 
     Returns
     -------
-    out : the string reprensentation of the value
+    out: str
+      the string reprensentation of the value.
 
     """
 
@@ -68,8 +74,7 @@ def convert_value(value: Any):
     elif isinstance(value, bool):
         return str(value).upper()
     elif isinstance(value, str):
-        # replace single quotes, otherwise they may break the queries
-        return value.replace("\'", "\\'")
+        return escape_squoted_text(value)
     else:
         return str(value)
 
@@ -79,14 +84,15 @@ class IdentifiableAdapter(metaclass=ABCMeta):
 
 Some terms:
 
-- Registered identifiable is the definition of an identifiable which is:
-    - A record type as the parent
-    - A list of properties
-    - A list of referenced by statements
-- Identifiable is the concrete identifiable, e.g. the Record based on
-    the registered identifiable with all the values filled in.
-- Identified record is the result of retrieving a record based on the
-    identifiable from the database.
+- A *registered identifiable* defines an identifiable template, for example by specifying:
+    - Parent record types
+    - Properties
+    - ``is_referenced_by`` statements
+- An *identifiable* belongs to a concrete record.  It consists of identifying attributes which "fill
+  in" the *registered identifiable*.  In code, it can be represented as a Record based on the
+ *registered identifiable* with all the values filled in.
+- An *identified record* is the result of retrieving a record from the database, based on the
+  *identifiable* (and its values).
 
 General question to clarify:
 
@@ -95,24 +101,28 @@ General question to clarify:
 
 The list of referenced by statements is currently not implemented.
 
-The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registred
+The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
 identifiabel, identifiable and identified record) for a Record.
 
     """
 
     @staticmethod
-    def create_query_for_identifiable(ident: Identifiable):
+    def create_query_for_identifiable(ident: Identifiable, startswith: bool = False):
         """
         This function is taken from the old crawler:
         caosdb-advanced-user-tools/src/caosadvancedtools/crawler.py
 
         uses the properties of ident to create a query that can determine
         whether the required record already exists.
+
+        If ``startswith`` is True, use ``LIKE`` for long string values to test if the strings starts
+        with the first 200 characters of the value.
         """
 
         query_string = "FIND RECORD "
         if ident.record_type is not None:
-            query_string += f"'{ident.record_type}'"
+            escaped_rt = escape_squoted_text(ident.record_type)
+            query_string += f"'{escaped_rt}'"
         for ref in ident.backrefs:
             eid = ref
             if isinstance(ref, db.Entity):
@@ -122,11 +132,13 @@ identifiabel, identifiable and identified record) for a Record.
         query_string += " WITH "
 
         if ident.name is not None:
-            query_string += "name='{}'".format(convert_value(ident.name))
+            query_string += "name='{}'".format(escape_squoted_text(ident.name))
             if len(ident.properties) > 0:
                 query_string += " AND "
 
-        query_string += IdentifiableAdapter.create_property_query(ident)
+        query_string += IdentifiableAdapter.create_property_query(ident, startswith=startswith)
+
+        # TODO Can these cases happen at all with the current code?
         if query_string.endswith(" AND WITH "):
             query_string = query_string[:-len(" AND WITH ")]
         if query_string.endswith(" AND "):
@@ -134,15 +146,40 @@ identifiabel, identifiable and identified record) for a Record.
         return query_string
 
     @staticmethod
-    def create_property_query(entity: Identifiable):
+    def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
+        """Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
+
+If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
+operator will be used to find entities matching at the beginning.
+"""
+        if startswith and isinstance(pvalue, str) and len(pvalue) > 200:
+            operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'"
+        else:
+            operator_value_str = "='" + convert_value(pvalue) + "'"
+        result = "'" + escape_squoted_text(pname) + "'" + operator_value_str
+        return result
+
+    @staticmethod
+    def create_property_query(entity: Identifiable, startswith: bool = False):
+        """Create a POV query part with the entity's properties.
+
+Parameters
+----------
+
+entity: Identifiable
+  The Identifiable whose properties shall be used.
+
+startswith: bool, optional
+  If True, check string typed properties against the first 200 characters only.  Default is False.
+        """
         query_string = ""
+        pov = IdentifiableAdapter.__create_pov_snippet  # Shortcut
         for pname, pvalue in entity.properties.items():
             if pvalue is None:
-                query_string += "'" + pname + "' IS NULL AND "
+                query_string += "'" + escape_squoted_text(pname) + "' IS NULL AND "
             elif isinstance(pvalue, list):
                 for v in pvalue:
-                    query_string += ("'" + pname + "'='" +
-                                     convert_value(v) + "' AND ")
+                    query_string += pov(pname, v, startswith=startswith) + " AND "
 
             # TODO: (for review)
             #       This code would allow for more complex identifiables with
@@ -155,8 +192,7 @@ identifiabel, identifiable and identified record) for a Record.
             #                      IdentifiableAdapter.create_property_query(p.value) +
             #                      ") AND ")
             else:
-                query_string += ("'" + pname + "'='" +
-                                 convert_value(pvalue) + "' AND ")
+                query_string += pov(pname, pvalue, startswith=startswith) + " AND "
         # remove the last AND
         return query_string[:-4]
 
@@ -174,19 +210,64 @@ identifiabel, identifiable and identified record) for a Record.
 
     @abstractmethod
     def get_file(self, identifiable: db.File):
+        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
         """
         Retrieve the file object for a (File) identifiable.
         """
         pass
 
+    @staticmethod
+    def get_identifying_referencing_entities(referencing_entities, registered_identifiable):
+        refs = []
+        for prop in registered_identifiable.properties:
+            if prop.name.lower() != "is_referenced_by":
+                continue
+            for looking_for_rt in prop.value:
+                found = False
+                if looking_for_rt == "*":
+                    for val in referencing_entities.values():
+                        if len(val) > 0:
+                            found = True
+                        refs.extend(val)
+                else:
+                    rt_and_children = get_children_of_rt(looking_for_rt)
+                    for rtname in rt_and_children:
+                        if (rtname in referencing_entities):
+                            refs.extend(referencing_entities[rtname])
+                            found = True
+            if not found:
+                raise NotImplementedError(
+                    f"Could not find referencing entities of type(s): {prop.value}\n"
+                    f"for registered identifiable:\n{registered_identifiable}\n"
+                    f"There were {len(referencing_entities)} referencing entities to choose from."
+                )
+        return refs
+
+    @staticmethod
+    def get_identifying_referenced_entities(record, registered_identifiable):
+        refs = []
+        for prop in registered_identifiable.properties:
+            pname = prop.name.lower()
+            if pname == "name" or pname == "is_referenced_by":
+                continue
+            if record.get_property(prop.name) is None:
+                raise RuntimeError("Missing identifying Property")
+            pval = record.get_property(prop.name).value
+            if not isinstance(prop.value, list):
+                pval = [prop.value]
+            for val in pval:
+                if isinstance(val, db.Entity):
+                    refs.append(val)
+        return refs
+
     def get_identifiable(self, record: db.Record, referencing_entities=None):
         """
-        retrieve the registred identifiable and fill the property values to create an
-        identifiable
+        Retrieve the registered identifiable and fill the property values to create an
+        identifiable.
 
         Args:
             record: the record for which the Identifiable shall be created.
-            referencing_entities: a dictionary (Type: dict[int, dict[str, list[db.Entity]]]), that
+            referencing_entities: a dictionary (Type: dict[str, list[db.Entity]]), that
               allows to look up entities with a certain RecordType, that reference ``record``
 
         Returns:
@@ -205,6 +286,8 @@ identifiabel, identifiable and identified record) for a Record.
         name_is_identifying_property = False
 
         if registered_identifiable is not None:
+            identifiable_backrefs = self.get_identifying_referencing_entities(
+                referencing_entities, registered_identifiable)
             # fill the values:
             for prop in registered_identifiable.properties:
                 if prop.name == "name":
@@ -215,24 +298,8 @@ identifiabel, identifiable and identified record) for a Record.
                 # case A: in the registered identifiable
                 # case B: in the identifiable
 
-                # TODO: similar to the Identifiable class, Registred Identifiable should be a
-                # separate class too
+                # treated above
                 if prop.name.lower() == "is_referenced_by":
-                    for givenrt in prop.value:
-                        rt_and_children = get_children_of_rt(givenrt)
-                        found = False
-                        for rtname in rt_and_children:
-                            if (id(record) in referencing_entities
-                                    and rtname in referencing_entities[id(record)]):
-                                identifiable_backrefs.extend(
-                                    referencing_entities[id(record)][rtname])
-                                found = True
-                        if not found:
-                            # TODO: is this the appropriate error?
-                            raise NotImplementedError(
-                                f"The following record is missing an identifying property:"
-                                f"RECORD\n{record}\nIdentifying PROPERTY\n{prop.name}"
-                            )
                     continue
 
                 record_prop = record.get_property(prop.name)
@@ -256,7 +323,7 @@ identifiabel, identifiable and identified record) for a Record.
                     "Multi properties used in identifiables could cause unpredictable results and "
                     "are not allowed. You might want to consider a Property with a list as value.")
 
-        # use the RecordType of the registred Identifiable if it exists
+        # use the RecordType of the registered Identifiable if it exists
         # We do not use parents of Record because it might have multiple
         try:
             return Identifiable(
@@ -309,6 +376,8 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
     """
 
     def __init__(self):
+        warnings.warn(DeprecationWarning(
+            "This class is deprecated. Please use the CaosDBIdentifiableAdapter."))
         self._registered_identifiables = dict()
         self._records = []
 
@@ -323,6 +392,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         Just look in records for a file with the same path.
         """
         candidates = []
+        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
         for record in self._records:
             if record.role == "File" and record.path == identifiable.path:
                 candidates.append(record)
@@ -470,14 +540,14 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
         self._registered_identifiables[name] = definition
 
     def get_file(self, identifiable: Identifiable):
+        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
         # TODO is this needed for Identifiable?
         # or can we get rid of this function?
         if isinstance(identifiable, db.Entity):
             return cached_get_entity_by(path=identifiable)
         if identifiable.path is None:
             raise RuntimeError("Path must not be None for File retrieval.")
-        candidates = db.execute_query("FIND File which is stored at '{}'".format(
-            identifiable.path))
+        candidates = cached_get_entity_by(path=identifiable.path)
         if len(candidates) > 1:
             raise RuntimeError("Identifiable was not defined unambigiously.")
         if len(candidates) == 0:
@@ -486,7 +556,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
 
     def get_registered_identifiable(self, record: db.Record):
         """
-        returns the registred identifiable for the given Record
+        returns the registered identifiable for the given Record
 
         It is assumed, that there is exactly one identifiable for each RecordType. Only the first
         parent of the given Record is considered; others are ignored
@@ -510,10 +580,28 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
 
     def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable):
         query_string = self.create_query_for_identifiable(identifiable)
-        candidates = db.execute_query(query_string)
+        try:
+            candidates = cached_query(query_string)
+        except db.exceptions.HTTPServerError as err:
+            query_string = self.create_query_for_identifiable(identifiable, startswith=True)
+            candidates = cached_query(query_string).copy()  # Copy against cache poisoning
+
+            # Test if the candidates really match all properties
+            for pname, pvalue in identifiable.properties.items():
+                popme = []
+                for i in range(len(candidates)):
+                    this_prop = candidates[i].get_property(pname)
+                    if this_prop is None:
+                        popme.append(i)
+                        continue
+                    if not this_prop.value == pvalue:
+                        popme.append(i)
+                for i in reversed(popme):
+                    candidates.pop(i)
+
         if len(candidates) > 1:
             raise RuntimeError(
-                f"Identifiable was not defined unambigiously.\n{query_string}\nReturned the "
+                f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the "
                 f"following {candidates}."
                 f"Identifiable:\n{identifiable.record_type}{identifiable.properties}")
         if len(candidates) == 0:
diff --git a/src/caoscrawler/identified_cache.py b/src/caoscrawler/identified_cache.py
deleted file mode 100644
index aa2d82f8e66c738e737c62f3cc68eaf60127e28b..0000000000000000000000000000000000000000
--- a/src/caoscrawler/identified_cache.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-#
-# ** header v3.0
-# This file is a part of the CaosDB Project.
-#
-# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
-# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-# ** end header
-#
-
-
-"""
-see class docstring
-"""
-
-from .identifiable import Identifiable
-import caosdb as db
-
-
-class IdentifiedCache(object):
-    """
-    This class is like a dictionary where the keys are Identifiables. When you check whether an
-    Identifiable exists as key this class returns True not only if that exact Python object is
-    used as a key, but if an Identifiable is used as key that is **equal** to the one being
-    considered (see __eq__ function of Identifiable). Similarly, if you do `cache[identifiable]`
-    you get the Record where the key is an Identifiable that is equal to the one in the rectangular
-    brackets.
-
-    This class is used for Records where we checked the existence in a remote server using
-    identifiables. If the Record was found, this means that we identified the corresponding Record
-    in the remote server and the ID of the local object can be set.
-    To prevent querying the server again and again for the same objects, this cache allows storing
-    Records that were found on a remote server and those that were not (typically in separate
-    caches).
-    """
-
-    def __init__(self):
-        self._cache = {}
-        self._identifiables = []
-
-    def __contains__(self, identifiable: Identifiable):
-        return identifiable in self._identifiables
-
-    def __getitem__(self, identifiable: db.Record):
-        index = self._identifiables.index(identifiable)
-        return self._cache[id(self._identifiables[index])]
-
-    def add(self, record: db.Record, identifiable: Identifiable):
-        self._cache[id(identifiable)] = record
-        self._identifiables.append(identifiable)
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index 17ec54b0a2131468b11a99cba247c7e21fe317b3..ebb8c985bad8908b756ac4f3db53bdec6b93b0f4 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -41,7 +41,7 @@ from typing import Any, Optional, Type, Union
 
 import jinja2
 
-import caosdb as db
+import linkahead as db
 import yaml
 from importlib_resources import files
 from jsonschema import validate
@@ -268,34 +268,38 @@ def scanner(items: list[StructureElement],
 
     Formerly known as "_crawl".
 
-    items: structure_elements (e.g. files and folders on one level on the hierarchy)
-
-    converters: locally defined converters for
-        treating structure elements. A locally defined converter could be
-        one that is only valid for a specific subtree of the originally
-        cralwed StructureElement structure.
-
-    general_store and record_store: This recursion of the crawl function should only operate on
-                                  copies of the global stores of the Crawler object.
-
-    restricted_path: optional, list of strings, traverse the data tree only along the given
-                     path. For example, when a directory contains files a, b and c and b is
-                     given as restricted_path, a and c will be ignroed by the crawler.
-                     When the end of the given path is reached, traverse the full tree as
-                     normal. The first element of the list provided by restricted_path should
-                     be the name of the StructureElement at this level, i.e. denoting the
-                     respective element in the items argument.
-
-    registered_transformer_functions: dict
+    Parameters
+    ----------
+    items:
+        structure_elements (e.g. files and folders on one level on the hierarchy)
+
+    converters:
+        locally defined converters for treating structure elements. A locally
+        defined converter could be one that is only valid for a specific subtree
+        of the originally cralwed StructureElement structure.
+
+    general_store, record_store:
+        This recursion of the crawl function should only operate on copies of
+        the global stores of the Crawler object.
+
+    restricted_path : list of strings, optional
+        traverse the data tree only along the given path. For example, when a
+        directory contains files a, b and c and b is given as restricted_path, a
+        and c will be ignroed by the crawler. When the end of the given path is
+        reached, traverse the full tree as normal. The first element of the list
+        provided by restricted_path should be the name of the StructureElement
+        at this level, i.e. denoting the respective element in the items
+        argument.
+
+    registered_transformer_functions : dict, optional
         A dictionary of transformer functions that can be used in the "transform" block
         of a converter and that allows to apply simple transformations to variables extracted
         either by the current converter or to other variables found in the current variable store.
 
         Each function is a dictionary:
-        - The key is the name of the function to be looked up in the dictionary
-          of registered transformer functions.
-        - The value is the function which needs to be of the form:
 
+        - The key is the name of the function to be looked up in the dictionary of registered transformer functions.
+        - The value is the function which needs to be of the form:
             def func(in_value: Any, in_parameters: dict) -> Any:
                 pass
 
@@ -539,9 +543,9 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
          A dictionary representing the crawler definition, possibly from a yaml
          file.
     restricted_path: optional, list of strings
-         Traverse the data tree only along the given path. When the end of the given path
-         is reached, traverse the full tree as normal. See docstring of 'scanner' for
-            more details.
+         Traverse the data tree only along the given path. When the end of the
+         given path is reached, traverse the full tree as normal. See docstring
+         of 'scanner' formore details.
 
     Returns
     -------
diff --git a/src/caoscrawler/utils.py b/src/caoscrawler/utils.py
index 61b363099d0892b74e91f257bccb6cc832c3d59f..c62f44eeaa75ca42579aa3d6ead437e901cd38ff 100644
--- a/src/caoscrawler/utils.py
+++ b/src/caoscrawler/utils.py
@@ -25,7 +25,7 @@
 
 # Some utility functions, e.g. for extending pylib.
 
-import caosdb as db
+import linkahead as db
 
 
 def has_parent(entity: db.Entity, name: str):
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
index 5f5161d0d672ff3ad14db5c5b49f5c65550b06d7..a75193783d861707adf3b3d45311c392e22626f4 100644
--- a/src/doc/README_SETUP.md
+++ b/src/doc/README_SETUP.md
@@ -4,7 +4,10 @@
 see INSTALL.md
 
 ## Run Unit Tests
-Run `pytest unittests`.
+
+1. Install additional dependencies:
+  - h5py
+2. Run `pytest unittests`.
 
 ## Documentation ##
 We use sphinx to create the documentation. Docstrings in the code should comply
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index c12e251d49e164a737b20e92e56e7b3e10149d4f..07431af0a9fb26e569be5d47f79d6a4f120df269 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -183,7 +183,7 @@ in a vairable with the same name (as it is the case for other Records).
 Transform Functions
 -------------------
 You can use transform functions to alter variable values that the crawler consumes (e.g. a string
-that was matched with a reg exp). See :doc:`Converter Documentation<converters.rst>`.
+that was matched with a reg exp). See :doc:`Converter Documentation<converters>`.
 
 You can define your own  transform functions by adding the the same way you add custom converters:
 
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 0b15f9b5d9aebefc2137b234ac4a9440b84906f5..32176b9edb895074021b3ed4eabe270ad48ae632 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -18,6 +18,8 @@ Relevant sources in:
 
 - ``src/structure_elements.py``
 
+.. _ConceptConverters:
+
 Converters
 ++++++++++
 
@@ -85,7 +87,9 @@ we can check whether a Record with the parent "Project" is referencing the "Expe
 Record. If that is the case, this reference is part of the identifiable for the "Experiment"
 Record. Note, that if there are multiple Records with the appropriate parent (e.g.
 multiple "Project" Records in the above example) it will be required that all of them
-reference the object to be identified.
+reference the object to be identified. You can also use the wildcard "*" as
+RecordType name in the configuration which will only require, that ANY Record
+references the Record at hand.
 
 
 Identified Records
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
index 60da52d3ed110f050a3d7aae866cc7d8b6b8dc31..44988fbd497cdb57023b5a696f83d55e7eb5113a 100644
--- a/src/doc/converters.rst
+++ b/src/doc/converters.rst
@@ -25,7 +25,7 @@ The yaml definition looks like the following:
 TODO: outdated, see cfood-schema.yml
 
 .. code-block:: yaml
-                
+
     <NodeName>:
         type: <ConverterName>
         match: ".*"
@@ -41,7 +41,7 @@ TODO: outdated, see cfood-schema.yml
                 - Experiment
         subtree:
             (...)
-     
+
 The **<NodeName>** is a description of what it represents (e.g.
 'experiment-folder') and is used as identifier.
 
@@ -58,13 +58,13 @@ described here.
 
 Transform Functions
 +++++++++++++++++++
-Often the situation arises, that you cannot use a value as it is found. Maybe a value should be 
+Often the situation arises, that you cannot use a value as it is found. Maybe a value should be
 increased by an offset or a string should be split into a list of pieces. In order to allow such
 simple conversions, transform functions can be named in the converter definition that are then
 applied to the respective variables when the converter is executed.
 
 .. code-block:: yaml
-                
+
     <NodeName>:
         type: <ConverterName>
         match: ".*"
@@ -102,7 +102,7 @@ list valued property to the Report Record.
 
 There are a number of transform functions that are defined by default (see
 ``src/caoscrawler/default_transformers.yml``). You can define custom transform functions by adding
-them to the cfood definition (see :doc:`CFood Documentation<cfood.rst>`).
+them to the cfood definition (see :doc:`CFood Documentation<cfood>`).
 
 
 Standard Converters
@@ -191,7 +191,7 @@ column names to values of the respective cell.
 Example:
 
 .. code-block:: yaml
-                
+
    subtree:
      TABLE:
        type: CSVTableConverter
@@ -220,6 +220,105 @@ XLSXTableConverter
 CSVTableConverter
 =================
 
+Further converters
+++++++++++++++++++
+
+More converters, together with cfood definitions and examples can be found in
+the `LinkAhead Crawler Extensions Subgroup
+<https://gitlab.com/linkahead/crawler-extensions>`_ on gitlab. In the following,
+we list converters that are shipped with the crawler library itself but are not
+part of the set of standard converters and may require this library to be
+installed with additional optional dependencies.
+
+HDF5 Converters
+===============
+
+For treating `HDF5 Files
+<https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html>`_, there are in total
+four individual converters corresponding to the internal structure of HDF5 files:
+the :ref:`H5FileConverter` which opens the file itself and creates further
+structure elements from HDF5 groups, datasets, and included multi-dimensional
+arrays that are in turn treated by the :ref:`H5GroupConverter`, the
+:ref:`H5DatasetConverter`, and the :ref:`H5NdarrayConverter`, respectively. You
+need to install the LinkAhead crawler with its optional ``h5crawler`` dependency
+for using these converters.
+
+The basic idea when crawling HDF5 files is to treat them very similar to
+:ref:`dictionaries <DictElement Converter>` in which the attributes on root,
+group, or dataset level are essentially treated like ``BooleanElement``,
+``TextElement``, ``FloatElement``, and ``IntegerElement`` in a dictionary: They
+are appended as children and can be accessed via the ``subtree``. The file
+itself and the groups within may contain further groups and datasets, which can
+have their own attributes, subgroups, and datasets, very much like
+``DictElements`` within a dictionary. The main difference to any other
+dictionary type is the presence of multi-dimensional arrays within HDF5
+datasets. Since LinkAhead doesn't have any datatype corresponding to these, and
+since it isn't desirable to store these arrays directly within LinkAhead for
+reasons of performance and of searchability, we wrap them within a specific
+Record as explained :ref:`below <H5NdarrayConverter>`, together with more
+metadata and their internal path within the HDF5 file. Users can thus query for
+datasets and their arrays according to their metadata within LinkAhead and then
+use the internal path information to access the dataset within the file
+directly. The type of this record and the property for storing the internal path
+need to be reflected in the datamodel. Using the default names, you would need a
+datamodel like
+
+.. code-block:: yaml
+
+   H5Ndarray:
+     obligatory_properties:
+       internal_hdf5-path:
+         datatype: TEXT
+
+although the names of both property and record type can be configured within the
+cfood definition.
+
+A simple example of a cfood definition for HDF5 files can be found in the `unit
+tests
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
+and shows how the individual converters are used in order to crawl a `simple
+example file
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
+containing groups, subgroups, and datasets, together with their respective
+attributes.
+
+H5FileConverter
+---------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.SimpleFileConverter` class. It opens the HDF5
+file and creates children for any contained group or dataset. Additionally, the
+root-level attributes of the HDF5 file are accessible as children.
+
+H5GroupConverter
+----------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Children are
+created for all subgroups and datasets in this HDF5 group. Additionally, the
+group-level attributes are accessible as children.
+
+H5DatasetConverter
+------------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Most
+importantly, it stores the array data in HDF5 dataset into
+:py:class:`~caoscrawler.hdf5_converter.H5NdarrayElement` which is added to its
+children, as well as the dataset attributes.
+
+H5NdarrayConverter
+------------------
+
+This converter creates a wrapper record for the contained dataset. The name of
+this record needs to be specified in the cfood definition of this converter via
+the ``recordname`` option. The RecordType of this record can be configured with
+the ``array_recordtype_name`` option and defaults to ``H5Ndarray``. Via the
+given ``recordname``, this record can be used within the cfood. Most
+importantly, this record stores the internal path of this array within the HDF5
+file in a text property, the name of which can be configured with the
+``internal_path_property_name`` option which defaults to ``internal_hdf5_path``.
+
 Custom Converters
 +++++++++++++++++
 
@@ -251,10 +350,10 @@ The following methods are abstract and need to be overwritten by your custom con
 - :py:meth:`~caoscrawler.converters.Converter.match`
 - :py:meth:`~caoscrawler.converters.Converter.typecheck`
 
-  
+
 Example
 =======
-  
+
 In the following, we will explain the process of adding a custom converter to a yaml file using
 a SourceResolver that is able to attach a source element to another entity.
 
@@ -285,50 +384,50 @@ Furthermore we will customize the method :py:meth:`~caoscrawler.converters.Conve
 number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.Converter.create_records` is recommended.
 In this context it is recommended to make use of the function :func:`caoscrawler.converters.create_records` that implements creation of record objects from python dictionaries of the same structure
 that would be given using a yaml definition (see next section below).
-     
+
 .. code-block:: python
 
     import re
     from caoscrawler.stores import GeneralStore, RecordStore
     from caoscrawler.converters import TextElementConverter, create_records
     from caoscrawler.structure_elements import StructureElement, TextElement
-    
+
 
     class SourceResolver(TextElementConverter):
       """
       This resolver uses a source list element (e.g. from the markdown readme file)
       to link sources correctly.
       """
-       
+
       def __init__(self, definition: dict, name: str,
                    converter_registry: dict):
           """
           Initialize a new directory converter.
           """
           super().__init__(definition, name, converter_registry)
-       
+
       def create_children(self, generalStore: GeneralStore,
                                 element: StructureElement):
-                                
+
           # The source resolver does not create children:
-          
+
           return []
-       
+
       def create_records(self, values: GeneralStore,
                          records: RecordStore,
                          element: StructureElement,
                          file_path_prefix):
           if not isinstance(element, TextElement):
               raise RuntimeError()
-       
+
           # This function must return a list containing tuples, each one for a modified
           # property: (name_of_entity, name_of_property)
           keys_modified = []
-       
+
           # This is the name of the entity where the source is going to be attached:
           attach_to_scientific_activity = self.definition["scientific_activity"]
           rec = records[attach_to_scientific_activity]
-       
+
           # The "source" is a path to a source project, so it should have the form:
           # /<Category>/<project>/<scientific_activity>/
           # obtain these information from the structure element:
@@ -336,18 +435,18 @@ that would be given using a yaml definition (see next section below).
           regexp = (r'/(?P<category>(SimulationData)|(ExperimentalData)|(DataAnalysis))'
                     '/(?P<project_date>.*?)_(?P<project_identifier>.*)'
                     '/(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))?/')
-       
+
           res = re.match(regexp, val)
           if res is None:
               raise RuntimeError("Source cannot be parsed correctly.")
-       
+
           # Mapping of categories on the file system to corresponding record types in CaosDB:
           cat_map = {
               "SimulationData": "Simulation",
               "ExperimentalData": "Experiment",
               "DataAnalysis": "DataAnalysis"}
           linkrt = cat_map[res.group("category")]
-       
+
           keys_modified.extend(create_records(values, records, {
               "Project": {
                   "date": res.group("project_date"),
@@ -361,7 +460,7 @@ that would be given using a yaml definition (see next section below).
               attach_to_scientific_activity: {
                   "sources": "+$" + linkrt
               }}, file_path_prefix))
-       
+
           # Process the records section of the yaml definition:
           keys_modified.extend(
               super().create_records(values, records, element, file_path_prefix))
@@ -374,7 +473,7 @@ that would be given using a yaml definition (see next section below).
 If the recommended (python) package structure is used, the package containing the converter
 definition can just be installed using `pip install .` or `pip install -e .` from the
 `scifolder_package` directory.
-          
+
 The following yaml block will register the converter in a yaml file:
 
 .. code-block:: yaml
@@ -384,7 +483,7 @@ The following yaml block will register the converter in a yaml file:
        package: scifolder.converters.sources
        converter: SourceResolver
 
-       
+
 Using the `create_records` API function
 =======================================
 
@@ -422,7 +521,7 @@ Let's formulate that using `create_records`:
 .. code-block:: python
 
   dir_name = "directory name"
-  
+
   record_def = {
     "Experiment": {
       "identifier": dir_name
@@ -498,7 +597,7 @@ Let's have a look at a more complex examples, defining multiple records:
         Project: $Project
       ProjectGroup:
         projects: +$Project
-      
+
 
 This block will create two new Records:
 
@@ -514,7 +613,7 @@ Let's formulate that using `create_records` (again, `dir_name` is constant here)
 .. code-block:: python
 
   dir_name = "directory name"
-  
+
   record_def = {
     "Project": {
       "identifier": "project_name",
@@ -526,7 +625,7 @@ Let's formulate that using `create_records` (again, `dir_name` is constant here)
     "ProjectGroup": {
       "projects": "+$Project",
     }
-    
+
   }
 
   keys_modified = create_records(values, records,
diff --git a/tox.ini b/tox.ini
index a7d4465ed36f0fe5e49c06721d3e3a0cdf453fa0..03e02ebeff196430129e10c4c0d853ca77c47302 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,6 +6,7 @@ skip_missing_interpreters = true
 deps = .
     pytest
     pytest-cov
+    h5py
     # TODO: Make this f-branch sensitive
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f688de6a2171da6533626449b030bcd95a43b37b
--- /dev/null
+++ b/unittests/h5_cfood.yml
@@ -0,0 +1,69 @@
+---
+metadata:
+  crawler-version: 0.6.1
+---
+Converters:
+  H5Dataset:
+    converter: H5DatasetConverter
+    package: caoscrawler.hdf5_converter
+  H5File:
+    converter: H5FileConverter
+    package: caoscrawler.hdf5_converter
+  H5Group:
+    converter: H5GroupConverter
+    package: caoscrawler.hdf5_converter
+  H5Ndarray:
+    converter: H5NdarrayConverter
+    package: caoscrawler.hdf5_converter
+# Top-level, we have just the HDF5 file.
+ParentDirectory:
+  type: Directory
+  match: (.*)
+  subtree:
+    H5FileElement:
+      type: H5File
+      match: (.*)\.(hdf5|h5)$
+      records:
+        H5File:
+          parents:
+            - H5File
+          role: File
+          path: $H5FileElement
+          file: $H5FileElement
+      subtree:
+        # Here, we have the groups, the top-level dataset, and possible
+        # attributes (empty for now).
+        RootIntegerElement:
+          type: H5Dataset
+          match_name: ^root_integers$
+          records:
+            H5Dataset:
+              parents:
+                - H5Dataset
+            H5File:
+              H5Dataset: +$H5Dataset
+          subtree:
+            # included NDArray in this dataset
+            TopLevelIntNDElement:
+              type: H5Ndarray
+              match_name: (.*)
+              recordname: this
+              records:
+                # this:
+                #   ContainingFile: $H5File
+                H5Dataset:
+                  Ndarray: $this
+            # There is one more list-valued attribute to this dataset.
+            TopLevelDataAttribute:
+              type: ListElement
+              match_name: ^attr_data_root$
+              subtree:
+                AttributeListEntry:
+                  type: FloatElement
+                  match_name: (.*)
+                  match_value: (?P<value>.*)
+                  records:
+                    H5Dataset:
+                      attr_data_root: +$value
+            
+      
diff --git a/unittests/hdf5_dummy_file.hdf5 b/unittests/hdf5_dummy_file.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf
Binary files /dev/null and b/unittests/hdf5_dummy_file.hdf5 differ
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
index fbf98346e59b0cbec88f17398eff41f26c423dee..e026899201ae0dfa937053d315854e5fbb8a351a 100644
--- a/unittests/test_crawler.py
+++ b/unittests/test_crawler.py
@@ -34,12 +34,14 @@ from pathlib import Path
 from unittest.mock import MagicMock, Mock, patch
 
 import caoscrawler
-import caosdb as db
-import caosdb.common.models as dbmodels
+import linkahead as db
+import linkahead.common.models as dbmodels
 import pytest
 import yaml
-from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
-                               crawler_main, split_restricted_path)
+from caosadvancedtools.models.parser import parse_model_from_string
+from caoscrawler.crawl import (Crawler, SecurityMode, TreatedRecordLookUp,
+                               _treat_deprecated_prefix, crawler_main,
+                               split_restricted_path)
 from caoscrawler.debug_tree import DebugTree
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
@@ -50,9 +52,9 @@ from caoscrawler.scanner import (create_converter_registry, scan_directory,
 from caoscrawler.stores import GeneralStore, RecordStore
 from caoscrawler.structure_elements import (DictElement, DictListElement,
                                             DictTextElement, File)
-from caosdb.apiutils import compare_entities
-from caosdb.cached import cache_clear
-from caosdb.exceptions import EmptyUniqueQueryError
+from linkahead.apiutils import compare_entities
+from linkahead.cached import cache_clear
+from linkahead.exceptions import EmptyUniqueQueryError
 from pytest import raises
 
 UNITTESTDIR = Path(__file__).parent
@@ -108,6 +110,47 @@ def mock_get_entity_by(eid=None, name=None, path=None):
             raise EmptyUniqueQueryError("")
 
 
+def mock_retrieve_record(identifiable: Identifiable):
+    """ assumes that the identifiable is always only the date"""
+
+    for record in EXAMPLE_SERVER_STATE:
+        if (record.role == "Record" and "date" in identifiable.properties
+                and record.get_property("date").value == identifiable.properties['date']):
+            return record
+    return None
+
+
+def mock_cached_only_rt(query_string: str):
+    """Always return an empty Container"""
+    result = db.Container()
+    lo_query = query_string.lower()
+    if lo_query.startswith("find record ") or lo_query.startswith("find file "):
+        return result
+    model = parse_model_from_string("""
+B:
+  obligatory_properties:
+    C:
+      obligatory_properties:
+        prop_other:
+          datatype: INTEGER
+    prop_ident:
+      datatype: INTEGER
+A:
+  obligatory_properties:
+    B:
+      datatype: LIST<B>
+    prop_ident:
+""")
+    if query_string == "FIND RECORDTYPE 'A'":
+        model.get_deep("A").id = 1
+        return result + [model.get_deep("A")]
+    if query_string == "FIND RECORDTYPE 'B'":
+        model.get_deep("A").id = 2
+        return result + [model.get_deep("B")]
+    print(query_string)
+    raise NotImplementedError("Mock for this case is missing")
+
+
 @pytest.fixture(autouse=True)
 def clear_cache():
     cache_clear()
@@ -215,6 +258,13 @@ def test_split_into_inserts_and_updates_trivial():
     crawler.split_into_inserts_and_updates([])
 
 
+def test_split_into_inserts_and_updates_unidentified():
+    crawler = Crawler()
+    with raises(ValueError) as err:
+        crawler.split_into_inserts_and_updates([db.Record(name="recname").add_parent("someparent")])
+    assert str(err.value).startswith("There is no identifying information.")
+
+
 def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
     """ returns a stored Record if rec.name is an existing key, None otherwise """
     if rec.name in known:
@@ -247,8 +297,8 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri
     entlist = [db.Record(name="A").add_parent(
         "C"), db.Record(name="B").add_parent("C")]
 
-    assert crawler.get_from_any_cache(identlist[0]) is None
-    assert crawler.get_from_any_cache(identlist[1]) is None
+    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
+    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
     assert not crawler._has_reference_value_without_id(identlist[0])
     assert not crawler._has_reference_value_without_id(identlist[1])
     assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
@@ -355,6 +405,84 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab
     crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
 
 
+@pytest.mark.xfail(reason="https://gitlab.com/linkahead/linkahead-crawler/-/issues/88")
+@patch("caoscrawler.identifiable_adapters.cached_query",
+       new=Mock(side_effect=mock_cached_only_rt))
+def test_split_iiau_with_unmergeable_list_items():
+    """Test for meaningful exception when referencing a list of unmergeable entities.
+
+Datamodel
+---------
+A:
+  B: LIST<B>
+  prop_ident: INTEGER
+
+B:
+  prop_ident:
+  C:
+
+C:
+  prop_other: INTEGER
+
+Identifiables
+-------------
+
+id_A: [prop_ident]
+id_B: [prop_ident, "is_referenced_by: A"]
+
+Data
+----
+
+b1: ("same", 23)
+b2: ("same", 42)
+
+a: ([b1, b2])
+    """
+    prop_ident = db.Property("prop_ident", datatype=db.INTEGER)
+    prop_other = db.Property("prop_ident", datatype=db.INTEGER)
+    rt_c = db.RecordType("C").add_property(prop_other)
+    # Somehow it is necessary that `B` has a reference property.  Dunno if C must have an
+    # identifiable as well.
+    rt_b = db.RecordType("B").add_property(prop_ident).add_property("C")
+    rt_a = db.RecordType("A").add_property(prop_ident).add_property("LIST<B>")
+
+    ident_a = db.RecordType().add_parent("A").add_property("prop_ident")
+    ident_b = db.RecordType().add_parent("B").add_property("prop_ident").add_property(
+        "is_referenced_by", value="A")
+    ident_c = db.RecordType().add_parent("C").add_property("prop_other").add_property(
+        "is_referenced_by", value="B")
+
+    rec_a = db.Record("a").add_parent(rt_a).add_property("prop_ident", value=1234)
+    rec_b = []
+    rec_c = []
+    for value in [23, 42]:
+        new_c = db.Record().add_parent(rt_c).add_property("prop_other", value=value)
+        rec_c.append(new_c)
+        rec_b.append(db.Record().add_parent(rt_b).add_property(
+            "prop_ident", value=2020).add_property("C", value=new_c))
+    rec_a.add_property("B", rec_b)
+
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable("A", ident_a)
+    ident_adapter.register_identifiable("B", ident_b)
+    ident_adapter.register_identifiable("C", ident_c)
+
+    crawler = Crawler(identifiableAdapter=ident_adapter)
+
+    # This should give a merge conflict, and not
+    # "Could not find referencing entities of type(s): A"
+
+    # from IPython import embed; embed()
+    with raises(RuntimeError) as rte:
+        crawler.synchronize(commit_changes=False,
+                            crawled_data=[rec_a, *rec_b, *rec_c])
+    assert not isinstance(rte.value, NotImplementedError), \
+        "Exception must not be NotImplementedError, but plain RuntimeError."
+    assert "Could not find referencing entities" not in rte.value.args[0]
+    assert "merging impossible" in rte.something
+    # crawler.split_into_inserts_and_updates(ent_list=[rec_a, *rec_b, *rec_c])
+
+
 def test_has_missing_object_in_references():
     crawler = Crawler()
     # Simulate remote server content by using the names to identify records
@@ -368,36 +496,40 @@ def test_has_missing_object_in_references():
 
     # one reference with id -> check
     assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123}), [])
+        Identifiable(name="C", record_type="RTC", properties={'d': 123}), {})
     # one ref with Entity with id -> check
+    rec = db.Record(id=123).add_parent("C")
     assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': db.Record(id=123)
-                                                              .add_parent("C")}), [])
+        Identifiable(name="C", record_type="RTC", properties={'d': rec}), {id(rec): {'C': [None]}})
     # one ref with id one with Entity with id (mixed) -> check
+    rec = db.Record(id=123).add_parent("RTC")
     assert not crawler._has_missing_object_in_references(
         Identifiable(name="C", record_type="RTD",
-                     properties={'d': 123, 'b': db.Record(id=123).add_parent("RTC")}), [])
+                     properties={'d': 123, 'b': rec}), {id(rec): {'C': [None]}})
     # entity to be referenced in the following
     a = db.Record(name="C").add_parent("C").add_property("d", 12311)
     # one ref with id one with Entity without id (but not identifying) -> fail
     assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}), [])
+        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}),
+        {id(a): {'C': [None]}})
 
     # one ref with id one with Entity without id (mixed) -> fail
     assert not crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
+        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
+        {id(a): {'C': [None]}})
 
-    crawler.add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC",
-                                                        properties={'d': 12311}))
+    crawler.treated_records_lookup.add(a, Identifiable(name="C", record_type="RTC",
+                                                       properties={'d': 12311}))
     # one ref with id one with Entity without id but in cache -> check
     assert crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
+        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
+        {id(a): {'C': [None]}})
 
     # if this ever fails, the mock up may be removed
     crawler.identifiableAdapter.get_registered_identifiable.assert_called()
 
 
-@pytest.mark.xfail()
+@ pytest.mark.xfail()
 def test_references_entities_without_ids():
     crawler = Crawler()
     assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
@@ -437,25 +569,15 @@ def reset_mocks(mocks):
         mock.reset_mock()
 
 
-def mock_retrieve_record(identifiable: Identifiable):
-    """ assumes that the identifiable is always only the date"""
-
-    for record in EXAMPLE_SERVER_STATE:
-        if (record.role == "Record"
-                and record.get_property("date").value == identifiable.properties['date']):
-            return record
-    return None
-
-
-@patch("caoscrawler.crawl.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-       "retrieve_identified_record_for_identifiable",
-       new=Mock(side_effect=mock_retrieve_record))
-@patch("caoscrawler.crawl.db.Container.insert")
-@patch("caoscrawler.crawl.db.Container.update")
+@ patch("caoscrawler.crawl.cached_get_entity_by",
+        new=Mock(side_effect=mock_get_entity_by))
+@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+        new=Mock(side_effect=mock_get_entity_by))
+@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+        "retrieve_identified_record_for_identifiable",
+        new=Mock(side_effect=mock_retrieve_record))
+@ patch("caoscrawler.crawl.db.Container.insert")
+@ patch("caoscrawler.crawl.db.Container.update")
 def test_synchronization_no_commit(upmock, insmock):
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
     # change  one; add one
@@ -472,16 +594,16 @@ def test_synchronization_no_commit(upmock, insmock):
     assert len(ups) == 1
 
 
-@patch("caoscrawler.crawl.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-       "retrieve_identified_record_for_identifiable",
-       new=Mock(side_effect=mock_retrieve_record))
-@patch("caoscrawler.crawl.db.Container.insert")
-@patch("caoscrawler.crawl.db.Container.update")
-@patch("caoscrawler.crawl.UpdateCache.insert")
+@ patch("caoscrawler.crawl.cached_get_entity_by",
+        new=Mock(side_effect=mock_get_entity_by))
+@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+        new=Mock(side_effect=mock_get_entity_by))
+@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+        "retrieve_identified_record_for_identifiable",
+        new=Mock(side_effect=mock_retrieve_record))
+@ patch("caoscrawler.crawl.db.Container.insert")
+@ patch("caoscrawler.crawl.db.Container.update")
+@ patch("caoscrawler.crawl.UpdateCache.insert")
 def test_security_mode(updateCacheMock, upmock, insmock):
     # trivial case: nothing to do
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
@@ -580,12 +702,13 @@ def test_security_mode(updateCacheMock, upmock, insmock):
 
 def test_create_reference_mapping():
     a = db.Record().add_parent("A")
-    b = db.Record().add_parent("B").add_property('a', a)
+    b = db.Record(id=132).add_parent("B").add_property('a', a)
     ref = Crawler.create_reference_mapping([a, b])
     assert id(a) in ref
-    assert id(b) not in ref
+    assert id(b) in ref
     assert "B" in ref[id(a)]
-    assert ref[id(a)]["B"] == [b]
+    assert {} == ref[id(b)]
+    assert ref[id(a)]["B"] == [132]
 
 
 def test_create_flat_list():
@@ -608,7 +731,7 @@ def test_create_flat_list():
     assert c in flat
 
 
-@pytest.fixture
+@ pytest.fixture
 def crawler_mocked_for_backref_test():
     crawler = Crawler()
     # mock retrieval of registered identifiabls: return Record with just a parent
@@ -652,8 +775,8 @@ def test_validation_error_print(caplog):
         caplog.clear()
 
 
-@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-       new=Mock(side_effect=lambda x: [x]))
+@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+        new=Mock(side_effect=lambda x: [x]))
 def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
     crawler = crawler_mocked_for_backref_test
     identlist = [Identifiable(name="A", record_type="BR"),
@@ -667,8 +790,8 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test)
         crawler.split_into_inserts_and_updates([db.Record(name="B").add_parent("C")])
 
     # identifiables were not yet checked
-    assert crawler.get_from_any_cache(identlist[0]) is None
-    assert crawler.get_from_any_cache(identlist[1]) is None
+    assert crawler.treated_records_lookup.get_any(entlist[1], identlist[0]) is None
+    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[1]) is None
     # one with reference, one without
     assert not crawler._has_reference_value_without_id(identlist[0])
     assert crawler._has_reference_value_without_id(identlist[1])
@@ -688,8 +811,8 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test)
     assert insert[0].name == "B"
 
 
-@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-       new=Mock(side_effect=lambda x: [x]))
+@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+        new=Mock(side_effect=lambda x: [x]))
 def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
     # test whether multiple references of the same record type are correctly used
     crawler = crawler_mocked_for_backref_test
@@ -701,7 +824,9 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
 
     # test whether both entities are listed in the backref attribute of the identifiable
     referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        referenced,
+        referencing_entities[id(referenced)])
     assert len(identifiable.backrefs) == 2
 
     # check the split...
@@ -710,8 +835,8 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
     assert len(insert) == 2
 
 
-@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-       new=Mock(side_effect=lambda x: [x]))
+@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+        new=Mock(side_effect=lambda x: [x]))
 def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
     # test whether multiple references of the different record types are correctly used
     crawler = crawler_mocked_for_backref_test
@@ -723,7 +848,10 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_
 
     # test whether both entities are listed in the backref attribute of the identifiable
     referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(referenced, referencing_entities)
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        referenced,
+        referencing_entities[id(referenced)])
+
     assert len(identifiable.backrefs) == 2
 
     # check the split...
@@ -736,7 +864,7 @@ def mock_create_values(values, element):
     pass
 
 
-@patch("caoscrawler.converters.IntegerElementConverter.create_values")
+@ patch("caoscrawler.converters.IntegerElementConverter.create_values")
 def test_restricted_path(create_mock):
     """
     The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
@@ -829,7 +957,7 @@ def test_split_restricted_path():
 
 # Filter the warning because we want to have it here and this way it does not hinder running
 # tests with -Werror.
-@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
+@ pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
 def test_deprecated_prefix_option():
     """Test that calling the crawler's main function with the deprecated
     `prefix` option raises the correct errors and warnings.
@@ -886,7 +1014,7 @@ def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog
         _, _ = crawler.split_into_inserts_and_updates(flat)
     caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
     assert "Found circular dependency" in caplog.text
-    assert "-------\na\n['C" in caplog.text
+    assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
     caplog.clear()
 
 
@@ -895,8 +1023,8 @@ def mock_get_entity_by_query(query=None):
         return db.Record(id=1111, name='rec_name').add_parent('RT')
 
 
-@patch("caoscrawler.crawl.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by_query))
+@ patch("caoscrawler.crawl.cached_get_entity_by",
+        new=Mock(side_effect=mock_get_entity_by_query))
 def test_replace_name_with_referenced_entity():
     test_text = 'lkajsdf'
     test_int = 134343
@@ -964,3 +1092,72 @@ def test_replace_name_with_referenced_entity():
     assert isinstance(prop.value[2], int)
     assert prop.value[2] == test_id
     assert caoscrawler.crawl.cached_get_entity_by.call_count == 3
+
+
+def test_treated_record_lookup():
+    trlu = TreatedRecordLookUp()
+    exist = db.Record(id=1)
+    trlu.add(exist)
+    assert len(trlu._existing) == 1
+    # was added to existing
+    assert trlu._existing[id(exist)] is exist
+    # is in ID lookup
+    assert trlu._id_look_up[exist.id] is exist
+    # can be accessed via get_existing
+    assert trlu.get_existing(db.Record(id=1)) is exist
+
+    miss = db.Record()
+    # exception when identifiable is missing
+    with raises(RuntimeError):
+        trlu.add(miss)
+    ident = Identifiable(name='a')
+    trlu.add(miss, ident)
+    # was added to missing
+    assert trlu._missing[id(miss)] is miss
+    # is in ident lookup
+    assert trlu._identifiable_look_up[ident.get_representation()] is miss
+    # can be accessed via get_missing
+    assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss
+
+    fi = db.File(path='a', id=2)
+    trlu.add(fi)
+    assert len(trlu._existing) == 2
+    # was added to existing
+    assert trlu._existing[id(fi)] is fi
+    # is in ID lookup
+    assert trlu._id_look_up[fi.id] is fi
+    # is in path lookup
+    assert trlu._path_look_up[fi.path] is fi
+    # can be accessed via get_existing
+    assert trlu.get_existing(fi) is fi
+
+    all_exi = trlu.get_existing_list()
+    assert fi in all_exi
+    assert exist in all_exi
+    all_mi = trlu.get_missing_list()
+    assert miss in all_mi
+
+    # If a Record was added using the ID, the ID must be used to identify it even though later an
+    # identifiable may be passed as well
+    assert trlu.get_any(exist, Identifiable(name='b')) is exist
+
+    fi2 = db.File(path='b')
+    trlu.add(fi2)
+    assert trlu.get_any(db.File(path='b'), Identifiable(name='c')) is fi2
+
+
+def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_retrieve):
+    # When one python object representing a record is merged into another python object
+    # representing the same record, the former object can be forgotten and references from it to
+    # other records must not play a role
+    crawler = crawler_mocked_identifiable_retrieve
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent('C').add_property(name='name') if
+        x.parents[0].name == "C" else
+        db.Record().add_parent('D').add_property(name='is_referenced_by', value="*")
+    )
+    a = db.Record(name='a').add_parent("D")
+    b = db.Record(name='b').add_parent("C")
+    c = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
+    flat = [a, c, b]
+    _, _ = crawler.split_into_inserts_and_updates(flat)
diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f7fae5d8d32bb7e5c90a535b63158c33df55daa
--- /dev/null
+++ b/unittests/test_h5_converter.py
@@ -0,0 +1,135 @@
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+from functools import partial
+from pathlib import Path
+from pytest import fixture, importorskip
+
+import caosdb as db
+
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array,
+                                        convert_h5_element, H5GroupElement,
+                                        H5DatasetElement, H5NdarrayElement)
+from caoscrawler.scanner import scan_directory
+from caoscrawler.structure_elements import (FloatElement, ListElement,
+                                            TextElement)
+from utils import dircheckstr as dircheck_base
+
+# Skip the whole module if h5py hasn't been installed
+h5py = importorskip("h5py")
+
+
+UNITTESTDIR = Path(__file__).parent
+
+# always add the path here
+dircheckstr = partial(dircheck_base, UNITTESTDIR)
+
+
+@fixture
+def h5_dummy_file():
+
+    path = UNITTESTDIR / "hdf5_dummy_file.hdf5"
+
+    return h5py.File(path, 'r')
+
+
+def test_h5_elements(h5_dummy_file):
+
+    elt = convert_h5_element(h5_dummy_file["group_level1_a"], "test")
+    assert isinstance(elt, H5GroupElement)
+
+    elt = convert_h5_element(h5_dummy_file["root_integers"], "test")
+    assert isinstance(elt, H5DatasetElement)
+
+
+def test_nd_array_conversion():
+
+    # Only test array handling here, `convert_basic_element` is tested
+    # elsewhere.
+    arr = np.array([[["something"]]])
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, TextElement)
+    assert elt.value == "something"
+
+    arr = np.zeros((1, 1))
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, FloatElement)
+    assert elt.value == 0
+
+    arr = np.zeros((1, 3, 1))
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, ListElement)
+    assert elt.value == [0, 0, 0]
+
+    arr = np.array([[1, 2, 3], [4, 5, 6]])
+    elt = convert_basic_element_with_nd_array(arr, internal_path="some/path")
+    assert isinstance(elt, H5NdarrayElement)
+    assert elt.internal_path == "some/path"
+
+    # Non-arrays should be forwarded correctly
+    elt = convert_basic_element_with_nd_array("something")
+    assert isinstance(elt, TextElement)
+    assert elt.value == "something"
+
+    elt = convert_basic_element_with_nd_array([0, 0, 0])
+    assert isinstance(elt, ListElement)
+    assert elt.value == [0, 0, 0]
+
+
+def test_record_creation():
+
+    dbt = DebugTree()
+    records = scan_directory(UNITTESTDIR, UNITTESTDIR / "h5_cfood.yml", debug_tree=dbt)
+
+    # In total 3 records: The file, the Dataset, and its ndarray
+    assert len(records) == 3
+    file_rec = [rec for rec in records if isinstance(rec, db.File)]
+    # exactly on file
+    assert len(file_rec) == 1
+
+    subd = dbt.debug_tree[dircheckstr("hdf5_dummy_file.hdf5")]
+    # At this level, we have 5 variables (directories and paths, plus H5File
+    # record), and one record.
+    assert len(subd[0]) == 5
+    assert len(subd[1]) == 1
+    file_rec = subd[1]["H5File"]
+    assert file_rec.get_property("H5Dataset") is not None
+    assert file_rec.get_property("H5Dataset").value is not None
+    # Reference properties currently need to be integration tested (especially
+    # with the circular dependency between) H5File and NDArray.
+
+    # top level integers
+    subd = dbt.debug_tree["root_integers"]
+    # Two additional variables (RootIntegerElement + Dataset record), one
+    # additional record
+    assert len(subd[0]) == 7
+    assert len(subd[1]) == 2
+    ds_rec = subd[1]["H5Dataset"]
+    assert isinstance(ds_rec, db.Record)
+    assert len(ds_rec.parents) == 1
+    assert ds_rec.parents[0].name == "H5Dataset"
+    assert ds_rec.get_property("Ndarray") is not None
+    assert ds_rec.get_property("Ndarray").value is not None
+    assert ds_rec.get_property("attr_data_root") is not None
+    assert isinstance(ds_rec.get_property("attr_data_root").value, list)
+    for number in [-2.,  -4.,  -8., -10.12345]:
+        assert number in [float(val) for val in ds_rec.get_property("attr_data_root").value]
diff --git a/unittests/test_identifiable.py b/unittests/test_identifiable.py
index 3f3c606b163df4dc238be9a669fd31eb630a582d..28bdb7a2ad75d5b9389b47ca3f0ec2b2e2a1404b 100644
--- a/unittests/test_identifiable.py
+++ b/unittests/test_identifiable.py
@@ -24,10 +24,9 @@
 test identifiable module
 """
 
-import pytest
 import caosdb as db
+import pytest
 from caoscrawler.identifiable import Identifiable
-from caoscrawler.identified_cache import IdentifiedCache
 
 
 def test_create_hashable_string():
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index 268b9800ddf1ef1688386f394ec1c6c7eb3e3912..ee0e0d6cd7c791f78e7cd2307dc6f34698326b4a 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -133,6 +133,21 @@ def test_non_default_name():
     assert identifiable.name is None
 
 
+def test_wildcard_ref():
+    ident = CaosDBIdentifiableAdapter()
+    ident.register_identifiable(
+        "Person", db.RecordType()
+        .add_parent(name="Person")
+        .add_property(name="is_referenced_by", value=["*"]))
+    rec = (db.Record(name="don't touch it").add_parent("Person")
+           .add_property(name="last_name", value='Tom'))
+    identifiable = ident.get_identifiable(rec,
+                                          referencing_entities={
+                                              'A': [1]}
+                                          )
+    assert identifiable.backrefs[0] == 1
+
+
 def test_convert_value():
     # test that string representation of objects stay unchanged. No stripping or so.
     class A():
diff --git a/unittests/test_identified_cache.py b/unittests/test_identified_cache.py
deleted file mode 100644
index 4ed7c55c7326415308917e20e9f391b17b07ad87..0000000000000000000000000000000000000000
--- a/unittests/test_identified_cache.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python3
-# encoding: utf-8
-#
-# ** header v3.0
-# This file is a part of the CaosDB Project.
-#
-# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
-# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#
-# ** end header
-#
-
-"""
-test identified_cache module
-"""
-
-import caosdb as db
-from caoscrawler.identifiable import Identifiable
-from caoscrawler.identified_cache import IdentifiedCache
-
-
-def test_IdentifiedCache():
-    ident = Identifiable(name="A", record_type="B")
-    record = db.Record("A").add_parent("B").add_property('b', 5)
-    cache = IdentifiedCache()
-    assert ident not in cache
-    cache.add(record=record, identifiable=ident)
-    assert ident in cache
-    assert cache[ident] is record
-    assert Identifiable(name="A", record_type="C") != Identifiable(name="A", record_type="B")
-    assert Identifiable(name="A", record_type="C") not in cache