diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8840e613f1e1eb86f30779b8b3535e2ff97ad0cc..b2ecd97cbded32dea3be9046cfdaac516bfb0ec2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -130,7 +130,7 @@ unittest_py3.7:
     # TODO: Use f-branch logic here
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     - pip install git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
-    - pip install .
+    - pip install .[h5-crawler]
     # actual test
     - caosdb-crawler --help
     - pytest --cov=caosdb -vv ./unittests
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 33cd6e06a51a1dcec534e3817a99b4f1a509ce2f..7aff30041ad4d619467e267ef7999344d586d08f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   `identifiable_adapters.IdentifiableAdapter.get_identifying_referenced_entities`
   static methods to return the referencing or referenced entities belonging to a
   registered identifiable, respectively.
+* [#70](https://gitlab.com/linkahead/linkahead-crawler/-/issues/70): Optional
+  converters for HDF5 files. They require this package to be installed with its
+  ``h5-crawler`` dependency.
 
 ### Changed ###
 
diff --git a/setup.cfg b/setup.cfg
index fedee42ab158adfcf19fd8103fcf67f2c52c7099..dd2961d13934c88b13535a0cc1c17b6d7dbd74e3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,3 +39,8 @@ per-file-ignores = __init__.py:F401
 [options.entry_points]
 console_scripts =
   caosdb-crawler = caoscrawler.crawl:main
+
+[options.extras_require]
+h5-crawler =
+           h5py >= 3.8
+           numpy
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index b0d77bbf5d7ba09df3c0c47d656fa3d22d07b6d2..5a6e1e50345382ca6e5a1e6ef3a8fbeafb806b84 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -31,6 +31,10 @@ cfood:
           - JSONFile
           - CSVTableConverter
           - XLSXTableConverter
+          - H5File
+          - H5Dataset
+          - H5Group
+          - H5Ndarray
           description: Type of this converter node.
         match:
           description: typically a regexp which is matched to a structure element name
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 3fcc2c088e47d070ad7b9b03b19895fe58f304ed..dddb83a7c57d90a987de74f337668f3bae73ee1b 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -1073,16 +1073,13 @@ class ListElementConverter(Converter):
                 "This converter can only process DictListElements.")
         children: list[StructureElement] = []
         for index, list_element in enumerate(element.value):
-            # TODO(fspreck): Refactor this and merge with DictXXXElements maybe?
-            if isinstance(list_element, str):
-                children.append(TextElement(str(index), list_element))
-            elif isinstance(list_element, dict):
-                children.append(DictElement(str(index), list_element))
-            elif isinstance(list_element, StructureElement):
-                children.append(list_element)
-            else:
-                raise NotImplementedError(
-                    f"Unkown type {type(list_element)} in list element {list_element}.")
+            children.append(
+                convert_basic_element(
+                    list_element,
+                    name=f"{index}",
+                    msg_prefix=f"The value at index {index} in the list as an unknown type."
+                )
+            )
         return children
 
     def typecheck(self, element: StructureElement):
diff --git a/src/caoscrawler/hdf5_converter.py b/src/caoscrawler/hdf5_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..506c7b3942cc2518ffa47762c4bed742b9f09b83
--- /dev/null
+++ b/src/caoscrawler/hdf5_converter.py
@@ -0,0 +1,336 @@
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+try:
+    import h5py
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "Couldn't find module h5py. Did you install the crawler package with "
+        "its optional `h5-crawler` dependency?"
+    )
+
+import numpy as np
+
+from typing import Union
+
+import caosdb as db
+
+from .converters import (convert_basic_element, Converter, DictElementConverter,
+                         match_name_and_value, SimpleFileConverter)
+from .stores import GeneralStore, RecordStore
+from .structure_elements import DictElement, File, FloatElement, IntegerElement, StructureElement
+
+
+def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
+    """Convert hdf5 attributes to a list of either basic scalar structure elements or ndarrays.
+
+    Parameters
+    ----------
+    elt : Union[h5py.File, h5py.Group, h5py.Dataset]
+        The hdf5 element the attributes of which will be converted to structure
+        elements.
+
+    Returns
+    -------
+    converted : list[StructureElement]
+        A list of the attributes converted to StructureElements (either basic
+        scalar elements or ndarray).
+    """
+
+    converted = []
+    for name, value in elt.attrs.items():
+        converted.append(convert_basic_element_with_nd_array(
+            value, name, f"The value of attribute {name} has an unknown type: {type(value)}."))
+
+    return converted
+
+
+def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
+    """Convert a given HDF5 element to the corresponding StructureElement.
+
+    Parameters
+    ----------
+    elt : Union[h5py.Group, h5py.Dataset]
+        The hdf5 element to be converted.
+    name : str
+        The name of the StructureElement that the hdf5 element is converted to.
+
+    Raises
+    ------
+    ValueError
+        In case of anything that is not convertible to a HDF5 structure element.
+
+    Returns
+    -------
+    StructureElement
+        The converted StructureElement.
+    """
+
+    if isinstance(elt, h5py.Group):
+
+        return H5GroupElement(name, elt)
+
+    if isinstance(elt, h5py.Dataset):
+
+        return H5DatasetElement(name, elt)
+
+    raise ValueError("The given element must be either a HDF5 Group or Dataset object.")
+
+
+def convert_basic_element_with_nd_array(value, name: str = None,
+                                        internal_path: str = None, msg_prefix: str = ""):
+    """Convert a given object either to an ndarray structure element or to a
+    basic scalar structure element.
+
+    This function extends :func:`~caoscrawler.converters.convert_basic_element`
+    by a special treatment for certain numpy objects, most importantly
+    ndarrays. They are converted to a scalar in case of a size-1 array, to a
+    list in case of a 1-d array, and to a ``H5NdarrayElement`` in all other
+    cases. In addition, numpy integers and floats are also converted to
+    IntegerElements and FloatElements, respectively.
+
+    Parameters
+    ----------
+    value
+        The object to be converted.
+    name : str, optional
+        The name of the structure element ``value`` is being converted
+        to. Default is None.
+    internal_path : str, optional
+        The internal path of ``value`` within the HDF5 file. Default is None.
+    msg_prefix : str, optional
+        The prefix of the error message that will be raised. Default is ``""``.
+
+    Returns
+    -------
+    StructureElement
+        The StructureElement ``value`` was converted to.
+
+    """
+
+    if isinstance(value, np.ndarray):
+
+        if value.size == 1:
+            # this is a scalar stacked in a numpy array. We don't know its
+            # actual shape, so we reshape first, then use the actual value
+            # inside.
+            value = value.reshape((1,))[0]
+
+        elif np.squeeze(value).ndim == 1:
+            # If the array is one-dimensional we can save it as a list
+            value = list(np.squeeze(value))
+
+        else:
+            # real multi-dimensional array
+            return H5NdarrayElement(name, value, internal_path)
+
+    elif isinstance(value, np.int32) or isinstance(value, np.int64):
+
+        return IntegerElement(name, value)
+
+    elif isinstance(value, np.float64):
+
+        return FloatElement(name, value)
+
+    return convert_basic_element(value, name, msg_prefix)
+
+
+class H5GroupElement(DictElement):
+    """StructureElement specific for HDF5 groups"""
+
+    def __init__(self, name: str, value: h5py.Group):
+        super().__init__(name, value)
+
+
+class H5DatasetElement(DictElement):
+    """StructureElement specific for HDF5 datasets."""
+
+    def __init__(self, name: str, value: h5py.Dataset):
+        super().__init__(name, value)
+
+
+class H5NdarrayElement(DictElement):
+    """StructureElement specific for NDArrays within HDF5 files.
+
+    Also store the internal path of the array within the HDF5 file in its
+    ``internal_path`` attribute.
+
+    """
+
+    def __init__(self, name: str, value, internal_path: str):
+        super().__init__(name, value)
+        self.internal_path = internal_path
+
+
+class H5FileConverter(SimpleFileConverter):
+    """Converter for HDF5 files that creates children for the contained
+    attributes, groups, and datasets.
+
+    """
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from root-level file attributes and contained hdf5
+        elements.
+
+        """
+
+        if not isinstance(element, File):
+
+            raise ValueError("create_children should have been called with a File object.")
+
+        ff = h5py.File(element.path, 'r')
+
+        children = []
+
+        for name, value in ff.items():
+
+            children.append(convert_h5_element(value, name))
+
+        children.extend(convert_attributes(ff))
+
+        return children
+
+
+class H5GroupConverter(DictElementConverter):
+    """Converter for HDF5 groups that creates children from the group-level
+    attributes and the contained subgroups and datasets.
+
+    """
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5GroupElement)
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from group attributes and hdf5 elements contained in
+        this group.
+
+        """
+
+        if not isinstance(element.value, h5py.Group):
+
+            raise ValueError("create_children should have been called with a HDF5 Group object.")
+
+        children = []
+
+        for name, value in element.value.items():
+
+            children.append(convert_h5_element(value, name))
+
+        children.append(convert_attributes(element.value))
+
+        return children
+
+
+class H5DatasetConverter(DictElementConverter):
+    """Converter for HDF5 datasets that creates children from the dataset
+    attributes and the contained array data.
+
+    """
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5DatasetElement)
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Create children from the dataset attributes and append the array data
+        contained in this dataset.
+
+        """
+
+        if not isinstance(element.value, h5py.Dataset):
+
+            raise ValueError("create_children should have been called with a HDF5 Dataset object")
+
+        children = convert_attributes(element.value)
+
+        children.append(
+            H5NdarrayElement(
+                name=self.name+"_ndarray",
+                value=element.value,
+                internal_path=element.value.name
+            )
+        )
+        return children
+
+
+class H5NdarrayConverter(Converter):
+    """Converter for ndarrays contained in HDF5 files. Creates the wrapper
+    record for this ndarray.
+
+    """
+
+    def __init__(self, definition: dict, name: str, converter_registry: dict):
+
+        # Check that a non-empty name for the record that will be created for
+        # the ndarray Record (within the cfood) is given
+        if not ("recordname" in definition and definition["recordname"]):
+
+            raise RuntimeError(f"Converter {name} lacks the `recordname` definition.")
+
+        super().__init__(definition, name, converter_registry)
+
+    def create_children(self, values: GeneralStore, element: StructureElement):
+        """The ndarray doesn't have any further children."""
+
+        return []
+
+    def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement):
+        """Create a wrapper record with name ``recordname``, type
+        ``array_recordtype_name`` (default ``H5Ndarray``) and the internal path
+        stored in a property with name ``internal_path_property_name`` (default
+        ``internal_hdf5_path``).
+
+        """
+
+        rname = self.definition["recordname"]
+        if "array_recordtype_name" in self.definition:
+            rtname = self.definition["array_recordtype_name"]
+        else:
+            rtname = "H5Ndarray"
+
+        if "internal_path_property_name" in self.definition:
+            propname = self.definition["internal_path_property_name"]
+        else:
+            propname = "internal_hdf5_path"
+
+        rec = db.Record().add_parent(rtname)
+        records[rname] = rec
+        values[rname] = rec
+
+        rec.add_property(name=propname, value=element.internal_path)
+        keys_modified = [(rname, propname)]
+
+        keys_modified.extend(super().create_records(values, records, element))
+
+        return keys_modified
+
+    def typecheck(self, element: StructureElement):
+
+        return isinstance(element, H5NdarrayElement)
+
+    @Converter.debug_matching("name")
+    def match(self, element: StructureElement):
+
+        if not isinstance(element, H5NdarrayElement):
+
+            raise RuntimeError("This converter can only be called with H5NdarrayElements.")
+
+        return match_name_and_value(self.definition, element.name, element.value)
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index 8c832109451ecd05c36838da58fbb851d5b9ac7c..c4a8063cae5e74222e92f28a3898656bf4a97f6a 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -262,34 +262,38 @@ def scanner(items: list[StructureElement],
 
     Formerly known as "_crawl".
 
-    items: structure_elements (e.g. files and folders on one level on the hierarchy)
-
-    converters: locally defined converters for
-        treating structure elements. A locally defined converter could be
-        one that is only valid for a specific subtree of the originally
-        cralwed StructureElement structure.
-
-    general_store and record_store: This recursion of the crawl function should only operate on
-                                  copies of the global stores of the Crawler object.
-
-    restricted_path: optional, list of strings, traverse the data tree only along the given
-                     path. For example, when a directory contains files a, b and c and b is
-                     given as restricted_path, a and c will be ignroed by the crawler.
-                     When the end of the given path is reached, traverse the full tree as
-                     normal. The first element of the list provided by restricted_path should
-                     be the name of the StructureElement at this level, i.e. denoting the
-                     respective element in the items argument.
-
-    registered_transformer_functions: dict
+    Parameters
+    ----------
+    items:
+        structure_elements (e.g. files and folders on one level on the hierarchy)
+
+    converters:
+        locally defined converters for treating structure elements. A locally
+        defined converter could be one that is only valid for a specific subtree
+        of the originally cralwed StructureElement structure.
+
+    general_store, record_store:
+        This recursion of the crawl function should only operate on copies of
+        the global stores of the Crawler object.
+
+    restricted_path : list of strings, optional
+        traverse the data tree only along the given path. For example, when a
+        directory contains files a, b and c and b is given as restricted_path, a
+        and c will be ignroed by the crawler. When the end of the given path is
+        reached, traverse the full tree as normal. The first element of the list
+        provided by restricted_path should be the name of the StructureElement
+        at this level, i.e. denoting the respective element in the items
+        argument.
+
+    registered_transformer_functions : dict, optional
         A dictionary of transformer functions that can be used in the "transform" block
         of a converter and that allows to apply simple transformations to variables extracted
         either by the current converter or to other variables found in the current variable store.
 
         Each function is a dictionary:
-        - The key is the name of the function to be looked up in the dictionary
-          of registered transformer functions.
-        - The value is the function which needs to be of the form:
 
+        - The key is the name of the function to be looked up in the dictionary of registered transformer functions.
+        - The value is the function which needs to be of the form:
             def func(in_value: Any, in_parameters: dict) -> Any:
                 pass
 
@@ -468,9 +472,9 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
          A dictionary representing the crawler definition, possibly from a yaml
          file.
     restricted_path: optional, list of strings
-         Traverse the data tree only along the given path. When the end of the given path
-         is reached, traverse the full tree as normal. See docstring of 'scanner' for
-            more details.
+         Traverse the data tree only along the given path. When the end of the
+         given path is reached, traverse the full tree as normal. See docstring
+         of 'scanner' formore details.
 
     Returns
     -------
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index c12e251d49e164a737b20e92e56e7b3e10149d4f..07431af0a9fb26e569be5d47f79d6a4f120df269 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -183,7 +183,7 @@ in a vairable with the same name (as it is the case for other Records).
 Transform Functions
 -------------------
 You can use transform functions to alter variable values that the crawler consumes (e.g. a string
-that was matched with a reg exp). See :doc:`Converter Documentation<converters.rst>`.
+that was matched with a reg exp). See :doc:`Converter Documentation<converters>`.
 
 You can define your own  transform functions by adding the the same way you add custom converters:
 
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index b10deccdca1a2adf60ebcbac930bc797875724ff..32176b9edb895074021b3ed4eabe270ad48ae632 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -18,6 +18,8 @@ Relevant sources in:
 
 - ``src/structure_elements.py``
 
+.. _ConceptConverters:
+
 Converters
 ++++++++++
 
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
index 60da52d3ed110f050a3d7aae866cc7d8b6b8dc31..44988fbd497cdb57023b5a696f83d55e7eb5113a 100644
--- a/src/doc/converters.rst
+++ b/src/doc/converters.rst
@@ -25,7 +25,7 @@ The yaml definition looks like the following:
 TODO: outdated, see cfood-schema.yml
 
 .. code-block:: yaml
-                
+
     <NodeName>:
         type: <ConverterName>
         match: ".*"
@@ -41,7 +41,7 @@ TODO: outdated, see cfood-schema.yml
                 - Experiment
         subtree:
             (...)
-     
+
 The **<NodeName>** is a description of what it represents (e.g.
 'experiment-folder') and is used as identifier.
 
@@ -58,13 +58,13 @@ described here.
 
 Transform Functions
 +++++++++++++++++++
-Often the situation arises, that you cannot use a value as it is found. Maybe a value should be 
+Often the situation arises, that you cannot use a value as it is found. Maybe a value should be
 increased by an offset or a string should be split into a list of pieces. In order to allow such
 simple conversions, transform functions can be named in the converter definition that are then
 applied to the respective variables when the converter is executed.
 
 .. code-block:: yaml
-                
+
     <NodeName>:
         type: <ConverterName>
         match: ".*"
@@ -102,7 +102,7 @@ list valued property to the Report Record.
 
 There are a number of transform functions that are defined by default (see
 ``src/caoscrawler/default_transformers.yml``). You can define custom transform functions by adding
-them to the cfood definition (see :doc:`CFood Documentation<cfood.rst>`).
+them to the cfood definition (see :doc:`CFood Documentation<cfood>`).
 
 
 Standard Converters
@@ -191,7 +191,7 @@ column names to values of the respective cell.
 Example:
 
 .. code-block:: yaml
-                
+
    subtree:
      TABLE:
        type: CSVTableConverter
@@ -220,6 +220,105 @@ XLSXTableConverter
 CSVTableConverter
 =================
 
+Further converters
+++++++++++++++++++
+
+More converters, together with cfood definitions and examples can be found in
+the `LinkAhead Crawler Extensions Subgroup
+<https://gitlab.com/linkahead/crawler-extensions>`_ on gitlab. In the following,
+we list converters that are shipped with the crawler library itself but are not
+part of the set of standard converters and may require this library to be
+installed with additional optional dependencies.
+
+HDF5 Converters
+===============
+
+For treating `HDF5 Files
+<https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html>`_, there are in total
+four individual converters corresponding to the internal structure of HDF5 files:
+the :ref:`H5FileConverter` which opens the file itself and creates further
+structure elements from HDF5 groups, datasets, and included multi-dimensional
+arrays that are in turn treated by the :ref:`H5GroupConverter`, the
+:ref:`H5DatasetConverter`, and the :ref:`H5NdarrayConverter`, respectively. You
+need to install the LinkAhead crawler with its optional ``h5crawler`` dependency
+for using these converters.
+
+The basic idea when crawling HDF5 files is to treat them very similar to
+:ref:`dictionaries <DictElement Converter>` in which the attributes on root,
+group, or dataset level are essentially treated like ``BooleanElement``,
+``TextElement``, ``FloatElement``, and ``IntegerElement`` in a dictionary: They
+are appended as children and can be accessed via the ``subtree``. The file
+itself and the groups within may contain further groups and datasets, which can
+have their own attributes, subgroups, and datasets, very much like
+``DictElements`` within a dictionary. The main difference to any other
+dictionary type is the presence of multi-dimensional arrays within HDF5
+datasets. Since LinkAhead doesn't have any datatype corresponding to these, and
+since it isn't desirable to store these arrays directly within LinkAhead for
+reasons of performance and of searchability, we wrap them within a specific
+Record as explained :ref:`below <H5NdarrayConverter>`, together with more
+metadata and their internal path within the HDF5 file. Users can thus query for
+datasets and their arrays according to their metadata within LinkAhead and then
+use the internal path information to access the dataset within the file
+directly. The type of this record and the property for storing the internal path
+need to be reflected in the datamodel. Using the default names, you would need a
+datamodel like
+
+.. code-block:: yaml
+
+   H5Ndarray:
+     obligatory_properties:
+       internal_hdf5-path:
+         datatype: TEXT
+
+although the names of both property and record type can be configured within the
+cfood definition.
+
+A simple example of a cfood definition for HDF5 files can be found in the `unit
+tests
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
+and shows how the individual converters are used in order to crawl a `simple
+example file
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
+containing groups, subgroups, and datasets, together with their respective
+attributes.
+
+H5FileConverter
+---------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.SimpleFileConverter` class. It opens the HDF5
+file and creates children for any contained group or dataset. Additionally, the
+root-level attributes of the HDF5 file are accessible as children.
+
+H5GroupConverter
+----------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Children are
+created for all subgroups and datasets in this HDF5 group. Additionally, the
+group-level attributes are accessible as children.
+
+H5DatasetConverter
+------------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Most
+importantly, it stores the array data in HDF5 dataset into
+:py:class:`~caoscrawler.hdf5_converter.H5NdarrayElement` which is added to its
+children, as well as the dataset attributes.
+
+H5NdarrayConverter
+------------------
+
+This converter creates a wrapper record for the contained dataset. The name of
+this record needs to be specified in the cfood definition of this converter via
+the ``recordname`` option. The RecordType of this record can be configured with
+the ``array_recordtype_name`` option and defaults to ``H5Ndarray``. Via the
+given ``recordname``, this record can be used within the cfood. Most
+importantly, this record stores the internal path of this array within the HDF5
+file in a text property, the name of which can be configured with the
+``internal_path_property_name`` option which defaults to ``internal_hdf5_path``.
+
 Custom Converters
 +++++++++++++++++
 
@@ -251,10 +350,10 @@ The following methods are abstract and need to be overwritten by your custom con
 - :py:meth:`~caoscrawler.converters.Converter.match`
 - :py:meth:`~caoscrawler.converters.Converter.typecheck`
 
-  
+
 Example
 =======
-  
+
 In the following, we will explain the process of adding a custom converter to a yaml file using
 a SourceResolver that is able to attach a source element to another entity.
 
@@ -285,50 +384,50 @@ Furthermore we will customize the method :py:meth:`~caoscrawler.converters.Conve
 number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.Converter.create_records` is recommended.
 In this context it is recommended to make use of the function :func:`caoscrawler.converters.create_records` that implements creation of record objects from python dictionaries of the same structure
 that would be given using a yaml definition (see next section below).
-     
+
 .. code-block:: python
 
     import re
     from caoscrawler.stores import GeneralStore, RecordStore
     from caoscrawler.converters import TextElementConverter, create_records
     from caoscrawler.structure_elements import StructureElement, TextElement
-    
+
 
     class SourceResolver(TextElementConverter):
       """
       This resolver uses a source list element (e.g. from the markdown readme file)
       to link sources correctly.
       """
-       
+
       def __init__(self, definition: dict, name: str,
                    converter_registry: dict):
           """
           Initialize a new directory converter.
           """
           super().__init__(definition, name, converter_registry)
-       
+
       def create_children(self, generalStore: GeneralStore,
                                 element: StructureElement):
-                                
+
           # The source resolver does not create children:
-          
+
           return []
-       
+
       def create_records(self, values: GeneralStore,
                          records: RecordStore,
                          element: StructureElement,
                          file_path_prefix):
           if not isinstance(element, TextElement):
               raise RuntimeError()
-       
+
           # This function must return a list containing tuples, each one for a modified
           # property: (name_of_entity, name_of_property)
           keys_modified = []
-       
+
           # This is the name of the entity where the source is going to be attached:
           attach_to_scientific_activity = self.definition["scientific_activity"]
           rec = records[attach_to_scientific_activity]
-       
+
           # The "source" is a path to a source project, so it should have the form:
           # /<Category>/<project>/<scientific_activity>/
           # obtain these information from the structure element:
@@ -336,18 +435,18 @@ that would be given using a yaml definition (see next section below).
           regexp = (r'/(?P<category>(SimulationData)|(ExperimentalData)|(DataAnalysis))'
                     '/(?P<project_date>.*?)_(?P<project_identifier>.*)'
                     '/(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))?/')
-       
+
           res = re.match(regexp, val)
           if res is None:
               raise RuntimeError("Source cannot be parsed correctly.")
-       
+
           # Mapping of categories on the file system to corresponding record types in CaosDB:
           cat_map = {
               "SimulationData": "Simulation",
               "ExperimentalData": "Experiment",
               "DataAnalysis": "DataAnalysis"}
           linkrt = cat_map[res.group("category")]
-       
+
           keys_modified.extend(create_records(values, records, {
               "Project": {
                   "date": res.group("project_date"),
@@ -361,7 +460,7 @@ that would be given using a yaml definition (see next section below).
               attach_to_scientific_activity: {
                   "sources": "+$" + linkrt
               }}, file_path_prefix))
-       
+
           # Process the records section of the yaml definition:
           keys_modified.extend(
               super().create_records(values, records, element, file_path_prefix))
@@ -374,7 +473,7 @@ that would be given using a yaml definition (see next section below).
 If the recommended (python) package structure is used, the package containing the converter
 definition can just be installed using `pip install .` or `pip install -e .` from the
 `scifolder_package` directory.
-          
+
 The following yaml block will register the converter in a yaml file:
 
 .. code-block:: yaml
@@ -384,7 +483,7 @@ The following yaml block will register the converter in a yaml file:
        package: scifolder.converters.sources
        converter: SourceResolver
 
-       
+
 Using the `create_records` API function
 =======================================
 
@@ -422,7 +521,7 @@ Let's formulate that using `create_records`:
 .. code-block:: python
 
   dir_name = "directory name"
-  
+
   record_def = {
     "Experiment": {
       "identifier": dir_name
@@ -498,7 +597,7 @@ Let's have a look at a more complex examples, defining multiple records:
         Project: $Project
       ProjectGroup:
         projects: +$Project
-      
+
 
 This block will create two new Records:
 
@@ -514,7 +613,7 @@ Let's formulate that using `create_records` (again, `dir_name` is constant here)
 .. code-block:: python
 
   dir_name = "directory name"
-  
+
   record_def = {
     "Project": {
       "identifier": "project_name",
@@ -526,7 +625,7 @@ Let's formulate that using `create_records` (again, `dir_name` is constant here)
     "ProjectGroup": {
       "projects": "+$Project",
     }
-    
+
   }
 
   keys_modified = create_records(values, records,
diff --git a/tox.ini b/tox.ini
index a7d4465ed36f0fe5e49c06721d3e3a0cdf453fa0..03e02ebeff196430129e10c4c0d853ca77c47302 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,6 +6,7 @@ skip_missing_interpreters = true
 deps = .
     pytest
     pytest-cov
+    h5py
     # TODO: Make this f-branch sensitive
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev
diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f688de6a2171da6533626449b030bcd95a43b37b
--- /dev/null
+++ b/unittests/h5_cfood.yml
@@ -0,0 +1,69 @@
+---
+metadata:
+  crawler-version: 0.6.1
+---
+Converters:
+  H5Dataset:
+    converter: H5DatasetConverter
+    package: caoscrawler.hdf5_converter
+  H5File:
+    converter: H5FileConverter
+    package: caoscrawler.hdf5_converter
+  H5Group:
+    converter: H5GroupConverter
+    package: caoscrawler.hdf5_converter
+  H5Ndarray:
+    converter: H5NdarrayConverter
+    package: caoscrawler.hdf5_converter
+# Top-level, we have just the HDF5 file.
+ParentDirectory:
+  type: Directory
+  match: (.*)
+  subtree:
+    H5FileElement:
+      type: H5File
+      match: (.*)\.(hdf5|h5)$
+      records:
+        H5File:
+          parents:
+            - H5File
+          role: File
+          path: $H5FileElement
+          file: $H5FileElement
+      subtree:
+        # Here, we have the groups, the top-level dataset, and possible
+        # attributes (empty for now).
+        RootIntegerElement:
+          type: H5Dataset
+          match_name: ^root_integers$
+          records:
+            H5Dataset:
+              parents:
+                - H5Dataset
+            H5File:
+              H5Dataset: +$H5Dataset
+          subtree:
+            # included NDArray in this dataset
+            TopLevelIntNDElement:
+              type: H5Ndarray
+              match_name: (.*)
+              recordname: this
+              records:
+                # this:
+                #   ContainingFile: $H5File
+                H5Dataset:
+                  Ndarray: $this
+            # There is one more list-valued attribute to this dataset.
+            TopLevelDataAttribute:
+              type: ListElement
+              match_name: ^attr_data_root$
+              subtree:
+                AttributeListEntry:
+                  type: FloatElement
+                  match_name: (.*)
+                  match_value: (?P<value>.*)
+                  records:
+                    H5Dataset:
+                      attr_data_root: +$value
+            
+      
diff --git a/unittests/hdf5_dummy_file.hdf5 b/unittests/hdf5_dummy_file.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf
Binary files /dev/null and b/unittests/hdf5_dummy_file.hdf5 differ
diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f7fae5d8d32bb7e5c90a535b63158c33df55daa
--- /dev/null
+++ b/unittests/test_h5_converter.py
@@ -0,0 +1,135 @@
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import numpy as np
+
+from functools import partial
+from pathlib import Path
+from pytest import fixture, importorskip
+
+import caosdb as db
+
+from caoscrawler.debug_tree import DebugTree
+from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array,
+                                        convert_h5_element, H5GroupElement,
+                                        H5DatasetElement, H5NdarrayElement)
+from caoscrawler.scanner import scan_directory
+from caoscrawler.structure_elements import (FloatElement, ListElement,
+                                            TextElement)
+from utils import dircheckstr as dircheck_base
+
+# Skip the whole module if h5py hasn't been installed
+h5py = importorskip("h5py")
+
+
+UNITTESTDIR = Path(__file__).parent
+
+# always add the path here
+dircheckstr = partial(dircheck_base, UNITTESTDIR)
+
+
+@fixture
+def h5_dummy_file():
+
+    path = UNITTESTDIR / "hdf5_dummy_file.hdf5"
+
+    return h5py.File(path, 'r')
+
+
+def test_h5_elements(h5_dummy_file):
+
+    elt = convert_h5_element(h5_dummy_file["group_level1_a"], "test")
+    assert isinstance(elt, H5GroupElement)
+
+    elt = convert_h5_element(h5_dummy_file["root_integers"], "test")
+    assert isinstance(elt, H5DatasetElement)
+
+
+def test_nd_array_conversion():
+
+    # Only test array handling here, `convert_basic_element` is tested
+    # elsewhere.
+    arr = np.array([[["something"]]])
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, TextElement)
+    assert elt.value == "something"
+
+    arr = np.zeros((1, 1))
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, FloatElement)
+    assert elt.value == 0
+
+    arr = np.zeros((1, 3, 1))
+    elt = convert_basic_element_with_nd_array(arr)
+    assert isinstance(elt, ListElement)
+    assert elt.value == [0, 0, 0]
+
+    arr = np.array([[1, 2, 3], [4, 5, 6]])
+    elt = convert_basic_element_with_nd_array(arr, internal_path="some/path")
+    assert isinstance(elt, H5NdarrayElement)
+    assert elt.internal_path == "some/path"
+
+    # Non-arrays should be forwarded correctly
+    elt = convert_basic_element_with_nd_array("something")
+    assert isinstance(elt, TextElement)
+    assert elt.value == "something"
+
+    elt = convert_basic_element_with_nd_array([0, 0, 0])
+    assert isinstance(elt, ListElement)
+    assert elt.value == [0, 0, 0]
+
+
+def test_record_creation():
+
+    dbt = DebugTree()
+    records = scan_directory(UNITTESTDIR, UNITTESTDIR / "h5_cfood.yml", debug_tree=dbt)
+
+    # In total 3 records: The file, the Dataset, and its ndarray
+    assert len(records) == 3
+    file_rec = [rec for rec in records if isinstance(rec, db.File)]
+    # exactly on file
+    assert len(file_rec) == 1
+
+    subd = dbt.debug_tree[dircheckstr("hdf5_dummy_file.hdf5")]
+    # At this level, we have 5 variables (directories and paths, plus H5File
+    # record), and one record.
+    assert len(subd[0]) == 5
+    assert len(subd[1]) == 1
+    file_rec = subd[1]["H5File"]
+    assert file_rec.get_property("H5Dataset") is not None
+    assert file_rec.get_property("H5Dataset").value is not None
+    # Reference properties currently need to be integration tested (especially
+    # with the circular dependency between) H5File and NDArray.
+
+    # top level integers
+    subd = dbt.debug_tree["root_integers"]
+    # Two additional variables (RootIntegerElement + Dataset record), one
+    # additional record
+    assert len(subd[0]) == 7
+    assert len(subd[1]) == 2
+    ds_rec = subd[1]["H5Dataset"]
+    assert isinstance(ds_rec, db.Record)
+    assert len(ds_rec.parents) == 1
+    assert ds_rec.parents[0].name == "H5Dataset"
+    assert ds_rec.get_property("Ndarray") is not None
+    assert ds_rec.get_property("Ndarray").value is not None
+    assert ds_rec.get_property("attr_data_root") is not None
+    assert isinstance(ds_rec.get_property("attr_data_root").value, list)
+    for number in [-2.,  -4.,  -8., -10.12345]:
+        assert number in [float(val) for val in ds_rec.get_property("attr_data_root").value]