diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b75284d0f3651610566dfa3dfd54a994524137c..2ba6c84749478314882a4131754bf9cc7fc5b184 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+* New converters for XML documents/trees/tags: XMLFile, XMLTag, XMLTextNode
+
 ### Changed ###
 
 ### Deprecated ###
diff --git a/README_SETUP.md b/README_SETUP.md
deleted file mode 120000
index d478016ecde09dab8820d398b15df325f4159380..0000000000000000000000000000000000000000
--- a/README_SETUP.md
+++ /dev/null
@@ -1 +0,0 @@
-src/doc/README_SETUP.md
\ No newline at end of file
diff --git a/README_SETUP.md b/README_SETUP.md
new file mode 100644
index 0000000000000000000000000000000000000000..32f0bb89a6051bc2ec4be0bae6cf06cd1a540f8b
--- /dev/null
+++ b/README_SETUP.md
@@ -0,0 +1,34 @@
+# Getting started with the CaosDB Crawler #
+
+## Installation
+see INSTALL.md
+
+## Run Unit Tests
+
+1. Install additional dependencies:
+  - h5py
+2. Run `pytest unittests`.
+
+## Documentation ##
+We use sphinx to create the documentation. Docstrings in the code should comply
+with the Googly style (see link below).
+
+Build documentation in `src/doc` with `make doc`. Note that for the
+automatic generation of the complete API documentation, it is
+necessary to first install this library with all its optional
+dependencies, i.e., `pip install .[h5-crawler,spss]`.
+
+### Requirements ###
+
+- `sphinx`
+- `sphinx-autoapi`
+- `recommonmark`
+- `sphinx-rtd-theme`
+
+### How to contribute ###
+
+- [Google Style Python Docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+- [Google Style Python Docstrings 2nd reference](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings)
+- [References to other documentation](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#role-external)
+
+
diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py
index 41b96323b1106d8ce28caadc4a2da012f3dc22ea..27bdbfd371e10826d007480b4189bd2cd148344c 100644
--- a/src/caoscrawler/__init__.py
+++ b/src/caoscrawler/__init__.py
@@ -1,4 +1,4 @@
-from . import converters, utils
+from . import converters, utils, xml_converter
 try:
     from .conv_impl.spss import SPSSConverter
 except ImportError as err:
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index 6609d8eb05135b17f5f6d9526df255b810de112a..acc3911f21d320146d0c35abc9d781541ee151ac 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -73,6 +73,9 @@ cfood:
           - H5Dataset
           - H5Group
           - H5Ndarray
+          - XMLFile
+          - XMLTag
+          - XMLTextNode
           - PropertiesFromDictElement
           description: Type of this converter node.
         match:
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 0eee8965512ed39add9a9688c531c540f80d7df2..9805d1103e380f688b40a9bfd4c3d03129dbd591 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -316,7 +316,10 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict
 
 
 class Converter(object, metaclass=ABCMeta):
-    """Converters treat StructureElements contained in the hierarchical sturcture."""
+    """Converters treat StructureElements contained in the hierarchical sturcture.
+
+    This is the abstract super class for all Converters.
+    """
 
     def __init__(self, definition: dict, name: str, converter_registry: dict):
         """
@@ -582,6 +585,12 @@ class Converter(object, metaclass=ABCMeta):
 
 
 class DirectoryConverter(Converter):
+    """
+    Converter that matches and handles structure elements of type directory.
+
+    This is one typical starting point of a crawling procedure.
+    """
+
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, Directory):
diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml
index 82e2f635f621b2e21e43b728fd9ed6865454f828..cb4a7d8c63489158c15dcf86b83fd940cd608460 100644
--- a/src/caoscrawler/default_converters.yml
+++ b/src/caoscrawler/default_converters.yml
@@ -94,3 +94,20 @@ SPSSFile:
 XLSXTableConverter:
   converter: XLSXTableConverter
   package: caoscrawler.converters
+
+
+# -------------------------
+# XML
+# -------------------------
+
+XMLFile:
+  converter: XMLFileConverter
+  package: caoscrawler.xml_converter
+
+XMLTag:
+  converter: XMLTagConverter
+  package: caoscrawler.xml_converter
+
+XMLTextNode:
+  converter: XMLTextNodeConverter
+  package: caoscrawler.xml_converter
diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py
index 0efba91c185446e0bfbecbbb53f68aaa8a8e15d1..67cd1056b382c92485deada2058526a03b6d8535 100644
--- a/src/caoscrawler/structure_elements.py
+++ b/src/caoscrawler/structure_elements.py
@@ -24,6 +24,7 @@
 #
 
 import warnings
+import lxml.etree
 
 
 class StructureElement(object):
@@ -167,3 +168,53 @@ class DictDictElement(DictElement):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning("This class is depricated. Please use DictElement."))
         super().__init__(*args, **kwargs)
+
+
+class XMLTagElement(StructureElement):
+    """
+    Stores elements of an XML tree.
+    """
+
+    def __init__(self, element: lxml.etree.Element):
+        super().__init__(element.getroottree().getelementpath(element))
+        self.tag = element
+
+
+class XMLTextNode(StructureElement):
+    """
+    Stores text nodes of XML trees.
+    """
+
+    def __init__(self, element: lxml.etree.Element):
+        """
+        Initializes this XML text node.
+
+        Please note that, although syntactically similar, it is semantically
+        different from TextElement:
+        - TextElements have a meaningful name, e.g. a key in a key-value pair. This name can
+          be matched using the match_name entry.
+        - XMLTextNodes just have a text and the name is just for identifying the structure element.
+          They can only be matched using the match entry in the XMLTextNodeConverter.
+        """
+        super().__init__(element.getroottree().getelementpath(element) + "/text()")
+        self.tag = element
+        self.value = element.text
+
+
+class XMLAttributeNode(StructureElement):
+    """
+    Stores text nodes of XML trees.
+    """
+
+    def __init__(self, element: lxml.etree.Element,
+                 key: str):
+        """
+        Initializes this XML attribute node.
+
+        element: The xml tree element containing the attribute.
+        key: The key which identifies the attribute in the list of attributes.
+        """
+        super().__init__(element.getroottree().getelementpath(element) + "@" + key)
+        self.value = element.attrib[key]
+        self.key = key
+        self.tag = element
diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/xml_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d350c26d467372e65c4acc0fd397d6679279b24
--- /dev/null
+++ b/src/caoscrawler/xml_converter.py
@@ -0,0 +1,226 @@
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Alexander Schlemmer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Converters take structure elements and create Records and new structure elements from them."""
+
+from __future__ import annotations
+
+import datetime
+import json
+import logging
+import os
+import re
+import warnings
+from inspect import signature
+from string import Template
+from typing import Any, Callable, Optional, Union
+
+import linkahead as db
+from jsonschema import ValidationError, validate
+
+from .stores import GeneralStore, RecordStore
+from .structure_elements import (BooleanElement, DictElement, Directory, File,
+                                 FloatElement, IntegerElement, JSONFile,
+                                 ListElement, NoneElement, StructureElement,
+                                 TextElement, XMLTagElement, XMLTextNode, XMLAttributeNode)
+from .utils import has_parent
+
+import lxml.etree
+from .converters import SimpleFileConverter, ConverterValidationError, Converter
+
+
+class XMLFileConverter(SimpleFileConverter):
+
+    """Convert XML files. See
+    https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/145
+    for the current suggestion for the specification.
+
+    """
+
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        # TODO: See comment on types and inheritance
+        if not isinstance(element, File):
+            raise ValueError("create_children was called with wrong type of StructureElement")
+        with open(element.path, 'r') as xml_file:
+            xml = lxml.etree.parse(xml_file)
+        if "validate" in self.definition and self.definition["validate"]:
+            try:
+                raise NotImplementedError("XML validation not implemented yet.")
+            except ConverterValidationError as err:
+                raise ConverterValidationError(
+                    "Error during the validation of the XML file:\n"
+                    f"{element.path}\n" + err.message)
+
+        return [XMLTagElement(xml.getroot())]
+
+
+class XMLTagConverter(Converter):
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        """Children that are generated by this function are the
+        result of the xpath query given in the yaml property
+        ``xpath``. Its default (when not given) is ``child::*``, so the
+        direct children of the current xml node. The xpath expression
+        must be designed in a way that it returns xml tags (and no
+        attributes or texts). That means, that the axis ``attribute::``
+        and the function ``text()`` must not be used.
+
+        The following yaml properties can be used to generate other
+        types of nodes (text nodes and attribute nodes) as subtree
+        structure elements:
+
+        ::
+
+            # _*_ marks the default:
+            attribs_as_children: true  # true / _false_
+            text_as_children: true  # true / _false_
+            tags_as_children: true  # _true_ / false
+
+        The default is to generate the tags matched by the xpath expression only.
+
+        - When text_as_children is set to true, text nodes will be generated that contain the text
+          contained in the matched tags.
+        - When attribs_as_children is set to true, attribute nodes will be generated from the attributes
+          of the matched tags.
+
+        Notes
+        -----
+        The default is to take the namespace map from the current node and use it in xpath queries.
+        Because default namespaces cannot be handled by xpath, it is possible to remap the default namespace
+        using the key ``default_namespace``.
+        The key ``nsmap`` can be used to define additional nsmap entries.
+
+        """
+        if not isinstance(element, XMLTagElement):
+            raise TypeError("Element must be an instance of XMLTagElement.")
+
+        # Get the namespace map from the element:
+        nsmap = element.tag.nsmap
+        # The default name of the default namespace is "default".
+        # You can overwrite it using the attribute "default_namespace" in the converter definition:
+        default_namespace = self.definition.get("default_namespace", "default")
+        if None in nsmap:
+            nsmap[default_namespace] = nsmap[None]
+            del nsmap[None]
+
+        # Set additional nsmap entries from the converter definition:
+        if "nsmap" in self.definition:
+            for key, value in self.definition["nsmap"].items():
+                nsmap[key] = value
+
+        xpath = self.definition.get("xpath", "child::*")
+        children = element.tag.xpath(xpath, namespaces=nsmap)
+        el_lst = []
+        for el in children:
+            if isinstance(el, str):
+                raise RuntimeError(
+                    "Only standard xml nodes are supported as results of xpath queries.")
+            elif isinstance(el, lxml.etree._Element):
+                if self.definition.get("tags_as_children", True):
+                    el_lst.append(XMLTagElement(el))
+                if self.definition.get("attribs_as_children", False):
+                    for attrib in el.attrib:
+                        el_lst.append(XMLAttributeNode(el, attrib))
+                if self.definition.get("text_as_children", False):
+                    el_lst.append(XMLTextNode(el))
+            else:
+                raise RuntimeError("Unsupported child type.")
+        return el_lst
+
+    def typecheck(self, element: StructureElement):
+        """
+        Check whether the current structure element can be converted using
+        this converter.
+        """
+        return isinstance(element, XMLTagElement)
+
+    def match(self, element: StructureElement) -> Optional[dict]:
+        # See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/145
+        # for a suggestion for the design of the matching algorithm.
+        if not isinstance(element, XMLTagElement):
+            raise TypeError("Element must be an instance of XMLTagElement.")
+
+        # Store the result of all individual regexp variable results:
+        vardict = {}
+
+        if "match_tag" in self.definition:
+            m_tag = re.match(self.definition["match_tag"], element.tag.tag)
+            if m_tag is None:
+                return None
+            vardict.update(m_tag.groupdict())
+
+        if "match_text" in self.definition:
+            tagtext = element.tag.text
+            if element.tag.text is None:
+                tagtext = ""
+            m_text = re.match(self.definition["match_text"], tagtext, re.DOTALL)
+            if m_text is None:
+                return None
+            vardict.update(m_text.groupdict())
+
+        if "match_attrib" in self.definition:
+            for attrib_def_key, attrib_def_value in self.definition["match_attrib"].items():
+                match_counter = 0
+                matched_m_attrib = None
+                matched_m_attrib_value = None
+                for attr_key, attr_value in element.tag.attrib.items():
+                    m_attrib = re.match(attrib_def_key, attr_key)
+                    if m_attrib is not None:
+                        match_counter += 1
+                        matched_m_attrib = m_attrib
+                        m_attrib_value = re.match(attrib_def_value, attr_value)
+                        if m_attrib_value is None:
+                            return None
+                        matched_m_attrib_value = m_attrib_value
+                # TODO: How to deal with multiple matches?
+                #       There are multiple options:
+                #       - Allow multiple attribute-key matches: Leads to possible overwrites of variables
+                #       - Require unique attribute-key and attribute-value matches: Very complex
+                #       - Only allow one single attribute-key to match and run attribute-value match separately.
+                #       Currently the latter option is implemented.
+                if match_counter == 0:
+                    return None
+                elif match_counter > 1:
+                    raise RuntimeError("Multiple attributes match the same match_attrib entry.")
+                vardict.update(matched_m_attrib.groupdict())
+                vardict.update(matched_m_attrib_value.groupdict())
+
+        return vardict
+
+
+class XMLTextNodeConverter(Converter):
+    def create_children(self, generalStore: GeneralStore, element: StructureElement):
+        raise NotImplementedError()
+
+    def typecheck(self, element: StructureElement):
+        """
+        Check whether the current structure element can be converted using
+        this converter.
+        """
+        return isinstance(element, XMLTextNode)
+
+    def match(self, element: StructureElement) -> Optional[dict]:
+        # See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/145
+        # for a suggestion for the design of the matching algorithm.
+        if not isinstance(element, XMLTextNode):
+            raise TypeError("Element must be an instance of XMLTextNode.")
+
+        raise NotImplementedError()
+
+        return None
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
deleted file mode 100644
index 32f0bb89a6051bc2ec4be0bae6cf06cd1a540f8b..0000000000000000000000000000000000000000
--- a/src/doc/README_SETUP.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Getting started with the CaosDB Crawler #
-
-## Installation
-see INSTALL.md
-
-## Run Unit Tests
-
-1. Install additional dependencies:
-  - h5py
-2. Run `pytest unittests`.
-
-## Documentation ##
-We use sphinx to create the documentation. Docstrings in the code should comply
-with the Googly style (see link below).
-
-Build documentation in `src/doc` with `make doc`. Note that for the
-automatic generation of the complete API documentation, it is
-necessary to first install this library with all its optional
-dependencies, i.e., `pip install .[h5-crawler,spss]`.
-
-### Requirements ###
-
-- `sphinx`
-- `sphinx-autoapi`
-- `recommonmark`
-- `sphinx-rtd-theme`
-
-### How to contribute ###
-
-- [Google Style Python Docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
-- [Google Style Python Docstrings 2nd reference](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings)
-- [References to other documentation](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#role-external)
-
-
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index 07431af0a9fb26e569be5d47f79d6a4f120df269..51c392780b44b73964921506ad3764b95e14d5ed 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -183,7 +183,7 @@ in a vairable with the same name (as it is the case for other Records).
 Transform Functions
 -------------------
 You can use transform functions to alter variable values that the crawler consumes (e.g. a string
-that was matched with a reg exp). See :doc:`Converter Documentation<converters>`.
+that was matched with a reg exp). See :doc:`Converter Documentation<converters/index>`.
 
 You can define your own  transform functions by adding the the same way you add custom converters:
 
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 770731857112b93205f0e80d623fa9183c4aa885..b3aa02a151a4d03c1531094ea01a5246cb02ba73 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -20,7 +20,7 @@ example a tree of Python *file objects* (StructureElements) could correspond to
 
 Relevant sources in:
 
-- ``src/structure_elements.py``
+- :py:mod:`caoscrawler.structure_elements`
 
 .. _ConceptConverters:
 
@@ -34,11 +34,11 @@ existing StructureElements, Converters create a tree of StructureElements.
 .. image:: img/converter.png
   :height: 170
 
-See the chapter :std:doc:`Converters<converters>` for details.
+See the chapter :std:doc:`Converters<converters/index>` for details.
 
 Relevant sources in:
 
-- ``src/converters.py``
+- :py:mod:`caoscrawler.converters`
 
 
 Identifiables
@@ -70,8 +70,8 @@ In the current implementation an identifiable can only use one RecordType even t
 
 Relevant sources in
 
-- ``src/identifiable_adapters.py``
-- ``src/identifiable.py``
+- :py:mod:`caoscrawler.identifiable_adapters`
+- :py:mod:`caoscrawler.identifiable`
 
 Registered Identifiables
 ++++++++++++++++++++++++
@@ -110,7 +110,7 @@ The crawler can be considered the main program doing the synchronization in basi
 
 Relevant sources in:
 
-- ``src/crawl.py``
+- :py:mod:`caoscrawler.crawl`
 
 
 
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
deleted file mode 100644
index f59e6d3dff0a1f75dc4e0e5bcbbee0b4ceb7e81d..0000000000000000000000000000000000000000
--- a/src/doc/converters.rst
+++ /dev/null
@@ -1,822 +0,0 @@
-Converters
-))))))))))
-
-Converters treat a StructureElement and during this process create a number of new
-StructureElements: the children of the initially treated StructureElement.  Thus by treatment of
-existing StructureElements, Converters create a tree of StructureElements.
-
-.. image:: img/converter.png
-  :height: 170
-
-Each StructureElement in the tree has a set of properties, organized as
-key-value pairs.
-Some of those properties are specified by the type of StructureElement. For example,
-a file could have the file name as property: ``'filename': myfile.dat``.
-Converters may define additional functions that create further values. For
-example, a regular expression could be used to get a date from a file name.
-
-CFood definition
-++++++++++++++++
-
-Converter application to data is specified via a tree-like yml file (called ``cfood.yml``, by
-convention).  The yml file specifies which Converters shall be used on which StructureElements, and
-how to treat the generated *child* StructureElements.
-
-The yaml definition may look like this:
-
-.. todo::
-
-  This is outdated, see ``cfood-schema.yml`` for the current specification of a ``cfood.yml``.
-
-.. code-block:: yaml
-
-    <NodeName>:
-	type: <ConverterName>
-	match: ".*"
-	records:
-	    Experiment1:
-		parents:
-		- Experiment
-		- Blablabla
-		date: $DATUM
-		(...)
-	    Experiment2:
-		parents:
-		- Experiment
-	subtree:
-	    (...)
-
-The **<NodeName>** is a description of what the current block represents (e.g.
-``experiment-folder``) and is used as an identifier.
-
-**<type>** selects the converter that is going to be matched against the current structure
-element. If the structure element matches (this is a combination of a typecheck and a detailed
-match, see the :py:class:`~caoscrawler.converters.Converter` source documentation for details), the
-converter will:
-
-- generate records (with :py:meth:`~caoscrawler.converters.Converter.create_records`)
-- possibly process a subtree (with :py:meth:`caoscrawler.converters.Converter.create_children`)
-
-**match** *TODO*
-
-**records** is a dict of definitions that define the semantic structure
-(see details below).
-
-**subtree** makes the yaml recursive: It contains a list of new Converter
-definitions, which work on the StructureElements that are returned by the
-current Converter.
-
-Transform Functions
-+++++++++++++++++++
-Often the situation arises, that you cannot use a value as it is found. Maybe a value should be
-increased by an offset or a string should be split into a list of pieces. In order to allow such
-simple conversions, transform functions can be named in the converter definition that are then
-applied to the respective variables when the converter is executed.
-
-.. code-block:: yaml
-
-    <NodeName>:
-	type: <ConverterName>
-	match: ".*"
-	transform:
-	  <TransformNodeName>:
-	    in: $<in_var_name>
-	    out: $<out_var_name>
-	    functions:
-	    - <func_name>:                         # name of the function to be applied
-		<func_arg1>: <func_arg1_value>     # key value pairs that are passed as parameters
-		<func_arg2>: <func_arg2_value>
-		# ...
-
-An example that splits the variable ``a`` and puts the generated list in ``b`` is the following:
-
-.. code-block:: yaml
-
-    Experiment:
-	type: Dict
-	match: ".*"
-	transform:
-	  param_split:
-	    in: $a
-	    out: $b
-	    functions:
-	    - split:            # split is a function that is defined by default
-		marker: "|"     # its only parameter is the marker that is used to split the string
-	records:
-	  Report:
-	    tags: $b
-
-This splits the string in '$a' and stores the resulting list in '$b'. This is here used to add a
-list valued property to the Report Record.
-
-
-There are a number of transform functions that are defined by default (see
-``src/caoscrawler/default_transformers.yml``). You can define custom transform functions by adding
-them to the cfood definition (see :doc:`CFood Documentation<cfood>`).
-
-
-Standard Converters
-+++++++++++++++++++
-
-These are the standard converters that exist in a default installation.  For writing and applying
-*custom converters*, see :ref:`below <Custom Converters>`.
-
-Directory Converter
-===================
-The Directory Converter creates StructureElements for each File and Directory
-inside the current Directory. You can match a regular expression against the
-directory name using the 'match' key.
-
-Simple File Converter
-=====================
-The Simple File Converter does not create any children and is usually used if
-a file shall be used as it is and be inserted and referenced by other entities.
-
-Markdown File Converter
-=======================
-Reads a YAML header from Markdown files (if such a header exists) and creates
-children elements according to the structure of the header.
-
-DictElement Converter
-=====================
-
-DictElement → StructureElement
-
-Creates a child StructureElement for each key in the dictionary.
-
-Typical Subtree converters
---------------------------
-The following StructureElement types are typically created by the DictElement converter:
-
-- BooleanElement
-- FloatElement
-- TextElement
-- IntegerElement
-- ListElement
-- DictElement
-
-Note that you may use ``TextElement`` for anything that exists in a text format that can be
-interpreted by the server, such as date and datetime strings in ISO-8601 format.
-
-Scalar Value Converters
-=======================
-`BooleanElementConverter`, `FloatElementConverter`, `TextElementConverter`,  and
-`IntegerElementConverter` behave very similarly.
-
-These converters expect `match_name` and `match_value` in their definition
-which allow to match the key and the value, respectively.
-
-Note that there are defaults for accepting other types. For example,
-FloatElementConverter also accepts IntegerElements. The default
-behavior can be adjusted with the fields `accept_text`, `accept_int`,
-`accept_float`, and `accept_bool`.
-
-The following denotes what kind of StructureElements are accepted by default
-(they are defined in `src/caoscrawler/converters.py`):
-
-- BooleanElementConverter: bool, int
-- FloatElementConverter: int, float
-- TextElementConverter: text, bool, int, float
-- IntegerElementConverter: int
-- ListElementConverter: list
-- DictElementConverter: dict
-
-YAMLFileConverter
-=================
-
-A specialized Dict Converter for yaml files: Yaml files are opened and the contents are
-converted into dictionaries that can be further converted using the typical subtree converters
-of dict converter.
-
-**WARNING**: Currently unfinished implementation.
-
-JSONFileConverter
-=================
-
-
-
-
-TableConverter
-==============
-
-Table → DictElement
-
-A generic converter (abstract) for files containing tables.
-Currently, there are two specialized implementations for XLSX files and CSV files.
-
-All table converters generate a subtree of dicts, which in turn can be converted with DictElementConverters:
-For each row in the table the TableConverter generates a DictElement (structure element). The key of the
-element is the row number. The value of the element is a dict containing the mapping of
-column names to values of the respective cell.
-
-Example:
-
-.. code-block:: yaml
-
-   subtree:
-     TABLE:  # Any name for the table as a whole
-       type: CSVTableConverter
-       match: ^test_table.csv$
-       records:
-	 (...)  # Records edited for the whole table file
-       subtree:
-	 ROW:  # Any name for a data row in the table
-	   type: DictElement
-	   match_name: .*
-	   match_value: .*
-	   records:
-	     (...)  # Records edited for each row
-	   subtree:
-	     COLUMN:  # Any name for a specific type of column in the table
-	       type: FloatElement
-	       match_name: measurement  # Name of the column in the table file
-	       match_value: (?P<column_value).*)
-	       records:
-		 (...)  # Records edited for each cell
-
-
-XLSXTableConverter
-==================
-
-XLSX File → DictElement
-
-CSVTableConverter
-=================
-
-CSV File → DictElement
-
-PropertiesFromDictConverter
-===========================
-
-The :py:class:`~caoscrawler.converters.PropertiesFromDictConverter` is
-a specialization of the
-:py:class:`~caoscrawler.converters.DictElementConverter` and offers
-all its functionality. It is meant to operate on dictionaries (e.g.,
-from reading in a json or a table file), the keys of which correspond
-closely to properties in a LinkAhead datamodel. This is especially
-handy in cases where properties may be added to the data model and
-data sources that are not yet known when writing the cfood definition.
-
-The converter definition of the
-:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has an
-additional required entry ``record_from_dict`` which specifies the
-Record to which the properties extracted from the dict are attached
-to. This Record is identified by its ``variable_name`` by which it can
-be referred to further down the subtree. You can also use the name of
-a Record that was specified earlier in the CFood definition in order
-to extend it by the properties extracted from a dict. Let's have a
-look at a simple example. A CFood definition
-
-.. code-block:: yaml
-
-   PropertiesFromDictElement:
-       type: PropertiesFromDictElement
-       match: ".*"
-       record_from_dict:
-	   variable_name: MyRec
-	   parents:
-	   - MyType1
-	   - MyType2
-
-applied to a dictionary
-
-.. code-block:: json
-
-   {
-     "name": "New name",
-     "a": 5,
-     "b": ["a", "b", "c"],
-     "author": {
-       "full_name": "Silvia Scientist"
-     }
-   }
-
-will create a Record ``New name`` with parents ``MyType1`` and
-``MyType2``. It has a scalar property ``a`` with value 5, a list
-property ``b`` with values "a", "b" and "c", and an ``author``
-property which references an ``author`` with a ``full_name`` property
-with value "Silvia Scientist":
-
-.. image:: img/properties-from-dict-records-author.png
-  :height: 210
-
-Note how the different dictionary keys are handled differently
-depending on their types: scalar and list values are understood
-automatically, and a dictionary-valued entry like ``author`` is
-translated into a reference to an ``author`` Record automatically.
-
-You can further specify how references are treated with an optional
-``references key`` in ``record_from_dict``. Let's assume that in the
-above example, we have an ``author`` **Property** with datatype
-``Person`` in our data model. We could add this information by
-extending the above example definition by
-
-
-.. code-block:: yaml
-
-   PropertiesFromDictElement:
-       type: PropertiesFromDictElement
-       match: ".*"
-       record_from_dict:
-	   variable_name: MyRec
-	   parents:
-	   - MyType1
-	   - MyType2
-	   references:
-	       author:
-		   parents:
-		   - Person
-
-so that now, a ``Person`` record with a ``full_name`` property with
-value "Silvia Scientist" is created as the value of the ``author``
-property:
-
-.. image:: img/properties-from-dict-records-person.png
-  :height: 200
-
-For the time being, only the parents of the referenced record can be
-set via this option. More complicated treatments can be implemented
-via the ``referenced_record_callback`` (see below).
-
-Properties can be blacklisted with the ``properties_blacklist``
-keyword, i.e., all keys listed under ``properties_blacklist`` will be
-excluded from automated treatment. Since the
-:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has
-all the functionality of the
-:py:class:`~caoscrawler.converters.DictElementConverter`, individual
-properties can still be used in a subtree. Together with
-``properties_blacklist`` this can be used to add custom treatment to
-specific properties by blacklisting them in ``record_from_dict`` and
-then treating them in the subtree the same as you would do it in the
-standard
-:py:class:`~caoscrawler.converters.DictElementConverter`. Note that
-the blacklisted keys are excluded on **all** levels of the dictionary,
-i.e., also when they occur in a referenced entity.
-
-For further customization, the
-:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` can be
-used as a basis for :ref:`custom converters<Custom Converters>` which
-can make use of its ``referenced_record_callback`` argument. The
-``referenced_record_callback`` can be a callable object which takes
-exactly a Record as an argument and needs to return that Record after
-doing whatever custom treatment is needed. Additionally, it is given
-the ``RecordStore`` and the ``ValueStore`` in order to be able to
-access the records and values that have already been defined from
-within ``referenced_record_callback``. Such a function might look the
-following:
-
-.. code-block:: python
-
-   def my_callback(rec: db.Record, records: RecordStore, values: GeneralStore):
-       # do something with rec, possibly using other records or values from the stores...
-       rec.description = "This was updated in a callback"
-       return rec
-
-It is applied to all Records that are created from the dictionary and
-it can be used to, e.g., transform values of some properties, or add
-special treatment to all Records of a specific
-type. ``referenced_record_callback`` is applied **after** the
-properties from the dictionary have been applied as explained above.
-
-
-Further converters
-++++++++++++++++++
-
-More converters, together with cfood definitions and examples can be found in
-the `LinkAhead Crawler Extensions Subgroup
-<https://gitlab.com/linkahead/crawler-extensions>`_ on gitlab. In the following,
-we list converters that are shipped with the crawler library itself but are not
-part of the set of standard converters and may require this library to be
-installed with additional optional dependencies.
-
-HDF5 Converters
-===============
-
-For treating `HDF5 Files
-<https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html>`_, there are in total
-four individual converters corresponding to the internal structure of HDF5
-files: the :ref:`H5FileConverter` which opens the file itself and creates
-further structure elements from HDF5 groups, datasets, and included
-multi-dimensional arrays that are in turn treated by the
-:ref:`H5GroupConverter`, the :ref:`H5DatasetConverter`, and the
-:ref:`H5NdarrayConverter`, respectively. You need to install the LinkAhead
-crawler with its optional ``h5-crawler`` dependency for using these converters.
-
-The basic idea when crawling HDF5 files is to treat them very similar to
-:ref:`dictionaries <DictElement Converter>` in which the attributes on root,
-group, or dataset level are essentially treated like ``BooleanElement``,
-``TextElement``, ``FloatElement``, and ``IntegerElement`` in a dictionary: They
-are appended as children and can be accessed via the ``subtree``. The file
-itself and the groups within may contain further groups and datasets, which can
-have their own attributes, subgroups, and datasets, very much like
-``DictElements`` within a dictionary. The main difference to any other
-dictionary type is the presence of multi-dimensional arrays within HDF5
-datasets. Since LinkAhead doesn't have any datatype corresponding to these, and
-since it isn't desirable to store these arrays directly within LinkAhead for
-reasons of performance and of searchability, we wrap them within a specific
-Record as explained :ref:`below <H5NdarrayConverter>`, together with more
-metadata and their internal path within the HDF5 file. Users can thus query for
-datasets and their arrays according to their metadata within LinkAhead and then
-use the internal path information to access the dataset within the file
-directly. The type of this record and the property for storing the internal path
-need to be reflected in the datamodel. Using the default names, you would need a
-datamodel like
-
-.. code-block:: yaml
-
-   H5Ndarray:
-     obligatory_properties:
-       internal_hdf5-path:
-	 datatype: TEXT
-
-although the names of both property and record type can be configured within the
-cfood definition.
-
-A simple example of a cfood definition for HDF5 files can be found in the `unit
-tests
-<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
-and shows how the individual converters are used in order to crawl a `simple
-example file
-<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
-containing groups, subgroups, and datasets, together with their respective
-attributes.
-
-H5FileConverter
----------------
-
-This is an extension of the
-:py:class:`~caoscrawler.converters.SimpleFileConverter` class. It opens the HDF5
-file and creates children for any contained group or dataset. Additionally, the
-root-level attributes of the HDF5 file are accessible as children.
-
-H5GroupConverter
-----------------
-
-This is an extension of the
-:py:class:`~caoscrawler.converters.DictElementConverter` class. Children are
-created for all subgroups and datasets in this HDF5 group. Additionally, the
-group-level attributes are accessible as children.
-
-H5DatasetConverter
-------------------
-
-This is an extension of the
-:py:class:`~caoscrawler.converters.DictElementConverter` class. Most
-importantly, it stores the array data in HDF5 dataset into
-:py:class:`~caoscrawler.hdf5_converter.H5NdarrayElement` which is added to its
-children, as well as the dataset attributes.
-
-H5NdarrayConverter
-------------------
-
-This converter creates a wrapper record for the contained dataset. The name of
-this record needs to be specified in the cfood definition of this converter via
-the ``recordname`` option. The RecordType of this record can be configured with
-the ``array_recordtype_name`` option and defaults to ``H5Ndarray``. Via the
-given ``recordname``, this record can be used within the cfood. Most
-importantly, this record stores the internal path of this array within the HDF5
-file in a text property, the name of which can be configured with the
-``internal_path_property_name`` option which defaults to ``internal_hdf5_path``.
-
-Custom Converters
-+++++++++++++++++
-
-As mentioned before it is possible to create custom converters.
-These custom converters can be used to integrate arbitrary data extraction and ETL capabilities
-into the LinkAhead crawler and make these extensions available to any yaml specification.
-
-Tell the crawler about a custom converter
-=========================================
-
-To use a custom crawler, it must be defined in the ``Converters`` section of the CFood yaml file.
-The basic syntax for adding a custom converter to a definition file is:
-
-.. code-block:: yaml
-
-   Converters:
-     <NameOfTheConverterInYamlFile>:
-       package: <python>.<module>.<name>
-       converter: <PythonClassName>
-
-The Converters section can be either put into the first or the second document of the cfood yaml file.
-It can be also part of a single-document yaml cfood file. Please refer to :doc:`the cfood documentation<cfood>` for more details.
-
-Details:
-
-- **<NameOfTheConverterInYamlFile>**: This is the name of the converter as it is going to be used in the present yaml file.
-- **<python>.<module>.<name>**: The name of the module where the converter class resides.
-- **<PythonClassName>**: Within this specified module there must be a class inheriting from base class :py:class:`caoscrawler.converters.Converter`.
-
-Implementing a custom converter
-===============================
-
-Converters inherit from the :py:class:`~caoscrawler.converters.Converter` class.
-
-The following methods are abstract and need to be overwritten by your custom converter to make it work:
-
-:py:meth:`~caoscrawler.converters.Converter.create_children`:
-    Return a list of child StructureElement objects.
-
-- :py:meth:`~caoscrawler.converters.Converter.match`
-- :py:meth:`~caoscrawler.converters.Converter.typecheck`
-
-
-Example
-=======
-
-In the following, we will explain the process of adding a custom converter to a yaml file using
-a SourceResolver that is able to attach a source element to another entity.
-
-**Note**: This example might become a standard crawler soon, as part of the scifolder specification. See https://doi.org/10.3390/data5020043 for details. In this documentation example we will, therefore, add it to a package called "scifolder".
-
-First we will create our package and module structure, which might be:
-
-.. code-block::
-
-   scifolder_package/
-     README.md
-     setup.cfg
-     setup.py
-     Makefile
-     tox.ini
-     src/
-       scifolder/
-	 __init__.py
-	 converters/
-	   __init__.py
-	   sources.py  # <- the actual file containing
-		       #    the converter class
-     doc/
-     unittests/
-
-Now we need to create a class called "SourceResolver" in the file "sources.py". In this - more advanced - example, we will not inherit our converter directly from :py:class:`~caoscrawler.converters.Converter`, but use :py:class:`~caoscrawler.converters.TextElementConverter`. The latter already implements :py:meth:`~caoscrawler.converters.Converter.match` and :py:meth:`~caoscrawler.converters.Converter.typecheck`, so only an implementation for :py:meth:`~caoscrawler.converters.Converter.create_children` has to be provided by us.
-Furthermore we will customize the method :py:meth:`~caoscrawler.converters.Converter.create_records` that allows us to specify a more complex record generation procedure than provided in the standard implementation. One specific limitation of the standard implementation is, that only a fixed
-number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.Converter.create_records` is recommended.
-In this context it is recommended to make use of the function :func:`caoscrawler.converters.create_records` that implements creation of record objects from python dictionaries of the same structure
-that would be given using a yaml definition (see next section below).
-
-.. code-block:: python
-
-    import re
-    from caoscrawler.stores import GeneralStore, RecordStore
-    from caoscrawler.converters import TextElementConverter, create_records
-    from caoscrawler.structure_elements import StructureElement, TextElement
-
-
-    class SourceResolver(TextElementConverter):
-      """
-      This resolver uses a source list element (e.g. from the markdown readme file)
-      to link sources correctly.
-      """
-
-      def __init__(self, definition: dict, name: str,
-		   converter_registry: dict):
-	  """
-	  Initialize a new directory converter.
-	  """
-	  super().__init__(definition, name, converter_registry)
-
-      def create_children(self, generalStore: GeneralStore,
-				element: StructureElement):
-
-	  # The source resolver does not create children:
-
-	  return []
-
-      def create_records(self, values: GeneralStore,
-			 records: RecordStore,
-			 element: StructureElement,
-			 file_path_prefix):
-	  if not isinstance(element, TextElement):
-	      raise RuntimeError()
-
-	  # This function must return a list containing tuples, each one for a modified
-	  # property: (name_of_entity, name_of_property)
-	  keys_modified = []
-
-	  # This is the name of the entity where the source is going to be attached:
-	  attach_to_scientific_activity = self.definition["scientific_activity"]
-	  rec = records[attach_to_scientific_activity]
-
-	  # The "source" is a path to a source project, so it should have the form:
-	  # /<Category>/<project>/<scientific_activity>/
-	  # obtain these information from the structure element:
-	  val = element.value
-	  regexp = (r'/(?P<category>(SimulationData)|(ExperimentalData)|(DataAnalysis))'
-		    '/(?P<project_date>.*?)_(?P<project_identifier>.*)'
-		    '/(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))?/')
-
-	  res = re.match(regexp, val)
-	  if res is None:
-	      raise RuntimeError("Source cannot be parsed correctly.")
-
-	  # Mapping of categories on the file system to corresponding record types in CaosDB:
-	  cat_map = {
-	      "SimulationData": "Simulation",
-	      "ExperimentalData": "Experiment",
-	      "DataAnalysis": "DataAnalysis"}
-	  linkrt = cat_map[res.group("category")]
-
-	  keys_modified.extend(create_records(values, records, {
-	      "Project": {
-		  "date": res.group("project_date"),
-		  "identifier": res.group("project_identifier"),
-	      },
-	      linkrt: {
-		  "date": res.group("date"),
-		  "identifier": res.group("identifier"),
-		  "project": "$Project"
-	      },
-	      attach_to_scientific_activity: {
-		  "sources": "+$" + linkrt
-	      }}, file_path_prefix))
-
-	  # Process the records section of the yaml definition:
-	  keys_modified.extend(
-	      super().create_records(values, records, element, file_path_prefix))
-
-	  # The create_records function must return the modified keys to make it compatible
-	  # to the crawler functions:
-	  return keys_modified
-
-
-If the recommended (python) package structure is used, the package containing the converter
-definition can just be installed using `pip install .` or `pip install -e .` from the
-`scifolder_package` directory.
-
-The following yaml block will register the converter in a yaml file:
-
-.. code-block:: yaml
-
-   Converters:
-     SourceResolver:
-       package: scifolder.converters.sources
-       converter: SourceResolver
-
-
-Using the `create_records` API function
-=======================================
-
-The function :func:`caoscrawler.converters.create_records` was already mentioned above and it is
-the recommended way to create new records from custom converters. Let's have a look at the
-function signature:
-
-.. code-block:: python
-
-    def create_records(values: GeneralStore,  # <- pass the current variables store here
-		       records: RecordStore,  # <- pass the current store of CaosDB records here
-		       def_records: dict):    # <- This is the actual definition of new records!
-
-
-`def_records` is the actual definition of new records according to the yaml cfood specification
-(work in progress, in the docs). Essentially you can do everything here, that you could do
-in the yaml document as well, but using python source code.
-
-Let's have a look at a few examples:
-
-.. code-block:: yaml
-
-  DirConverter:
-    type: Directory
-    match: (?P<dir_name>.*)
-    records:
-      Experiment:
-	identifier: $dir_name
-
-This block will just create a new record with parent `Experiment` and one property
-`identifier` with a value derived from the matching regular expression.
-
-Let's formulate that using `create_records`:
-
-.. code-block:: python
-
-  dir_name = "directory name"
-
-  record_def = {
-    "Experiment": {
-      "identifier": dir_name
-      }
-  }
-
-  keys_modified = create_records(values, records,
-				 record_def)
-
-The `dir_name` is set explicitely here, everything else is identical to the yaml statements.
-
-
-The role of `keys_modified`
-===========================
-
-You probably have noticed already, that :func:`caoscrawler.converters.create_records` returns
-`keys_modified` which is a list of tuples. Each element of `keys_modified` has two elements:
-
-- Element 0 is the name of the record that is modified (as used in the record store `records`).
-- Element 1 is the name of the property that is modified.
-
-It is important, that the correct list of modified keys is returned by
-:py:meth:`~caoscrawler.converters.Converter.create_records` to make the crawler process work.
-
-So, a sketch of a typical implementation within a custom converter could look like this:
-
-
-.. code-block:: python
-
-  def create_records(self, values: GeneralStore,
-		       records: RecordStore,
-		       element: StructureElement,
-		       file_path_prefix: str):
-
-    # Modify some records:
-    record_def = {
-      # ...
-    }
-
-  keys_modified = create_records(values, records,
-				 record_def)
-
-  # You can of course do it multiple times:
-  keys_modified.extend(create_records(values, records,
-				      record_def))
-
-  # You can also process the records section of the yaml definition:
-  keys_modified.extend(
-	 super().create_records(values, records, element, file_path_prefix))
-  # This essentially allows users of your converter to customize the creation of records
-  # by providing a custom "records" section additionally to the modifications provided
-  # in this implementation of the Converter.
-
-  # Important: Return the list of modified keys!
-  return keys_modified
-
-
-More complex example
-====================
-
-Let's have a look at a more complex examples, defining multiple records:
-
-.. code-block:: yaml
-
-  DirConverter:
-    type: Directory
-    match: (?P<dir_name>.*)
-    records:
-      Project:
-	identifier: project_name
-      Experiment:
-	identifier: $dir_name
-	Project: $Project
-      ProjectGroup:
-	projects: +$Project
-
-
-This block will create two new Records:
-
-- A project with a constant identifier
-- An experiment with an identifier, derived from a regular expression and a reference to the new project.
-
-Furthermore a Record `ProjectGroup` will be edited (its initial definition is not given in the
-yaml block): The project that was just created will be added as a list element to the property
-`projects`.
-
-Let's formulate that using `create_records` (again, `dir_name` is constant here):
-
-.. code-block:: python
-
-  dir_name = "directory name"
-
-  record_def = {
-    "Project": {
-      "identifier": "project_name",
-    }
-    "Experiment": {
-      "identifier": dir_name,
-      "Project": "$Project",
-      }
-    "ProjectGroup": {
-      "projects": "+$Project",
-    }
-
-  }
-
-  keys_modified = create_records(values, records,
-				 record_def)
-
-Debugging
-=========
-
-You can add the key `debug_match` to the definition of a Converter in order to create debugging
-output for the match step. The following snippet illustrates this:
-
-.. code-block:: yaml
-
-  DirConverter:
-    type: Directory
-    match: (?P<dir_name>.*)
-    debug_match: True
-    records:
-      Project:
-	identifier: project_name
-
-
-Whenever this Converter tries to match a StructureElement, it logs what was tried to macht against
-what and what the result was.
diff --git a/src/doc/converters/cfood_definition.rst b/src/doc/converters/cfood_definition.rst
new file mode 100644
index 0000000000000000000000000000000000000000..13c04fd38df8b00c435192a1c3cf02147f870b4c
--- /dev/null
+++ b/src/doc/converters/cfood_definition.rst
@@ -0,0 +1,50 @@
+CFood definition
+++++++++++++++++
+
+Converter application to data is specified via a tree-like yml file (called ``cfood.yml``, by
+convention).  The yml file specifies which Converters shall be used on which StructureElements, and
+how to treat the generated *child* StructureElements.
+
+The yaml definition may look like this:
+
+.. todo::
+
+  This is outdated, see ``cfood-schema.yml`` for the current specification of a ``cfood.yml``.
+
+.. code-block:: yaml
+
+    <NodeName>:
+	type: <ConverterName>
+	match: ".*"
+	records:
+	    Experiment1:
+		parents:
+		- Experiment
+		- Blablabla
+		date: $DATUM
+		(...)
+	    Experiment2:
+		parents:
+		- Experiment
+	subtree:
+	    (...)
+
+The **<NodeName>** is a description of what the current block represents (e.g.
+``experiment-folder``) and is used as an identifier.
+
+**<type>** selects the converter that is going to be matched against the current structure
+element. If the structure element matches (this is a combination of a typecheck and a detailed
+match, see the :py:class:`~caoscrawler.converters.Converter` source documentation for details), the
+converter will:
+
+- generate records (with :py:meth:`~caoscrawler.converters.Converter.create_records`)
+- possibly process a subtree (with :py:meth:`caoscrawler.converters.Converter.create_children`)
+
+**match** *TODO*
+
+**records** is a dict of definitions that define the semantic structure
+(see details below).
+
+**subtree** makes the yaml recursive: It contains a list of new Converter
+definitions, which work on the StructureElements that are returned by the
+current Converter.
diff --git a/src/doc/converters/custom_converters.rst b/src/doc/converters/custom_converters.rst
new file mode 100644
index 0000000000000000000000000000000000000000..573d9714488eaacd2c794b1fa497306a8d110a5f
--- /dev/null
+++ b/src/doc/converters/custom_converters.rst
@@ -0,0 +1,344 @@
+Custom Converters
++++++++++++++++++
+
+As mentioned before it is possible to create custom converters.
+These custom converters can be used to integrate arbitrary data extraction and ETL capabilities
+into the LinkAhead crawler and make these extensions available to any yaml specification.
+
+Tell the crawler about a custom converter
+=========================================
+
+To use a custom crawler, it must be defined in the ``Converters`` section of the CFood yaml file.
+The basic syntax for adding a custom converter to a definition file is:
+
+.. code-block:: yaml
+
+   Converters:
+     <NameOfTheConverterInYamlFile>:
+       package: <python>.<module>.<name>
+       converter: <PythonClassName>
+
+The Converters section can be either put into the first or the second
+document of the cfood yaml file. It can be also part of a
+single-document yaml cfood file. Please refer to :doc:`the cfood
+documentation<../cfood>` for more details.
+
+Details:
+
+- **<NameOfTheConverterInYamlFile>**: This is the name of the converter as it is going to be used in the present yaml file.
+- **<python>.<module>.<name>**: The name of the module where the converter class resides.
+- **<PythonClassName>**: Within this specified module there must be a class inheriting from base class :py:class:`caoscrawler.converters.Converter`.
+
+Implementing a custom converter
+===============================
+
+Converters inherit from the :py:class:`~caoscrawler.converters.Converter` class.
+
+The following methods are abstract and need to be overwritten by your custom converter to make it work:
+
+:py:meth:`~caoscrawler.converters.Converter.create_children`:
+    Return a list of child StructureElement objects.
+
+- :py:meth:`~caoscrawler.converters.Converter.match`
+- :py:meth:`~caoscrawler.converters.Converter.typecheck`
+
+
+Example
+=======
+
+In the following, we will explain the process of adding a custom converter to a yaml file using
+a SourceResolver that is able to attach a source element to another entity.
+
+**Note**: This example might become a standard crawler soon, as part of the scifolder specification. See https://doi.org/10.3390/data5020043 for details. In this documentation example we will, therefore, add it to a package called "scifolder".
+
+First we will create our package and module structure, which might be:
+
+.. code-block::
+
+   scifolder_package/
+     README.md
+     setup.cfg
+     setup.py
+     Makefile
+     tox.ini
+     src/
+       scifolder/
+	 __init__.py
+	 converters/
+	   __init__.py
+	   sources.py  # <- the actual file containing
+		       #    the converter class
+     doc/
+     unittests/
+
+Now we need to create a class called "SourceResolver" in the file "sources.py". In this - more advanced - example, we will not inherit our converter directly from :py:class:`~caoscrawler.converters.Converter`, but use :py:class:`~caoscrawler.converters.TextElementConverter`. The latter already implements :py:meth:`~caoscrawler.converters.Converter.match` and :py:meth:`~caoscrawler.converters.Converter.typecheck`, so only an implementation for :py:meth:`~caoscrawler.converters.Converter.create_children` has to be provided by us.
+Furthermore we will customize the method :py:meth:`~caoscrawler.converters.Converter.create_records` that allows us to specify a more complex record generation procedure than provided in the standard implementation. One specific limitation of the standard implementation is, that only a fixed
+number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.Converter.create_records` is recommended.
+In this context it is recommended to make use of the function :func:`caoscrawler.converters.create_records` that implements creation of record objects from python dictionaries of the same structure
+that would be given using a yaml definition (see next section below).
+
+.. code-block:: python
+
+    import re
+    from caoscrawler.stores import GeneralStore, RecordStore
+    from caoscrawler.converters import TextElementConverter, create_records
+    from caoscrawler.structure_elements import StructureElement, TextElement
+
+
+    class SourceResolver(TextElementConverter):
+      """
+      This resolver uses a source list element (e.g. from the markdown readme file)
+      to link sources correctly.
+      """
+
+      def __init__(self, definition: dict, name: str,
+		   converter_registry: dict):
+	  """
+	  Initialize a new directory converter.
+	  """
+	  super().__init__(definition, name, converter_registry)
+
+      def create_children(self, generalStore: GeneralStore,
+				element: StructureElement):
+
+	  # The source resolver does not create children:
+
+	  return []
+
+      def create_records(self, values: GeneralStore,
+			 records: RecordStore,
+			 element: StructureElement,
+			 file_path_prefix):
+	  if not isinstance(element, TextElement):
+	      raise RuntimeError()
+
+	  # This function must return a list containing tuples, each one for a modified
+	  # property: (name_of_entity, name_of_property)
+	  keys_modified = []
+
+	  # This is the name of the entity where the source is going to be attached:
+	  attach_to_scientific_activity = self.definition["scientific_activity"]
+	  rec = records[attach_to_scientific_activity]
+
+	  # The "source" is a path to a source project, so it should have the form:
+	  # /<Category>/<project>/<scientific_activity>/
+	  # obtain these information from the structure element:
+	  val = element.value
+	  regexp = (r'/(?P<category>(SimulationData)|(ExperimentalData)|(DataAnalysis))'
+		    '/(?P<project_date>.*?)_(?P<project_identifier>.*)'
+		    '/(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))?/')
+
+	  res = re.match(regexp, val)
+	  if res is None:
+	      raise RuntimeError("Source cannot be parsed correctly.")
+
+	  # Mapping of categories on the file system to corresponding record types in CaosDB:
+	  cat_map = {
+	      "SimulationData": "Simulation",
+	      "ExperimentalData": "Experiment",
+	      "DataAnalysis": "DataAnalysis"}
+	  linkrt = cat_map[res.group("category")]
+
+	  keys_modified.extend(create_records(values, records, {
+	      "Project": {
+		  "date": res.group("project_date"),
+		  "identifier": res.group("project_identifier"),
+	      },
+	      linkrt: {
+		  "date": res.group("date"),
+		  "identifier": res.group("identifier"),
+		  "project": "$Project"
+	      },
+	      attach_to_scientific_activity: {
+		  "sources": "+$" + linkrt
+	      }}, file_path_prefix))
+
+	  # Process the records section of the yaml definition:
+	  keys_modified.extend(
+	      super().create_records(values, records, element, file_path_prefix))
+
+	  # The create_records function must return the modified keys to make it compatible
+	  # to the crawler functions:
+	  return keys_modified
+
+
+If the recommended (python) package structure is used, the package containing the converter
+definition can just be installed using `pip install .` or `pip install -e .` from the
+`scifolder_package` directory.
+
+The following yaml block will register the converter in a yaml file:
+
+.. code-block:: yaml
+
+   Converters:
+     SourceResolver:
+       package: scifolder.converters.sources
+       converter: SourceResolver
+
+
+Using the `create_records` API function
+=======================================
+
+The function :func:`caoscrawler.converters.create_records` was already mentioned above and it is
+the recommended way to create new records from custom converters. Let's have a look at the
+function signature:
+
+.. code-block:: python
+
+    def create_records(values: GeneralStore,  # <- pass the current variables store here
+		       records: RecordStore,  # <- pass the current store of CaosDB records here
+		       def_records: dict):    # <- This is the actual definition of new records!
+
+
+`def_records` is the actual definition of new records according to the yaml cfood specification
+(work in progress, in the docs). Essentially you can do everything here, that you could do
+in the yaml document as well, but using python source code.
+
+Let's have a look at a few examples:
+
+.. code-block:: yaml
+
+  DirConverter:
+    type: Directory
+    match: (?P<dir_name>.*)
+    records:
+      Experiment:
+	identifier: $dir_name
+
+This block will just create a new record with parent `Experiment` and one property
+`identifier` with a value derived from the matching regular expression.
+
+Let's formulate that using `create_records`:
+
+.. code-block:: python
+
+  dir_name = "directory name"
+
+  record_def = {
+    "Experiment": {
+      "identifier": dir_name
+      }
+  }
+
+  keys_modified = create_records(values, records,
+				 record_def)
+
+The `dir_name` is set explicitely here, everything else is identical to the yaml statements.
+
+
+The role of `keys_modified`
+===========================
+
+You probably have noticed already, that :func:`caoscrawler.converters.create_records` returns
+`keys_modified` which is a list of tuples. Each element of `keys_modified` has two elements:
+
+- Element 0 is the name of the record that is modified (as used in the record store `records`).
+- Element 1 is the name of the property that is modified.
+
+It is important, that the correct list of modified keys is returned by
+:py:meth:`~caoscrawler.converters.Converter.create_records` to make the crawler process work.
+
+So, a sketch of a typical implementation within a custom converter could look like this:
+
+
+.. code-block:: python
+
+  def create_records(self, values: GeneralStore,
+		       records: RecordStore,
+		       element: StructureElement,
+		       file_path_prefix: str):
+
+    # Modify some records:
+    record_def = {
+      # ...
+    }
+
+  keys_modified = create_records(values, records,
+				 record_def)
+
+  # You can of course do it multiple times:
+  keys_modified.extend(create_records(values, records,
+				      record_def))
+
+  # You can also process the records section of the yaml definition:
+  keys_modified.extend(
+	 super().create_records(values, records, element, file_path_prefix))
+  # This essentially allows users of your converter to customize the creation of records
+  # by providing a custom "records" section additionally to the modifications provided
+  # in this implementation of the Converter.
+
+  # Important: Return the list of modified keys!
+  return keys_modified
+
+
+More complex example
+====================
+
+Let's have a look at a more complex examples, defining multiple records:
+
+.. code-block:: yaml
+
+  DirConverter:
+    type: Directory
+    match: (?P<dir_name>.*)
+    records:
+      Project:
+	identifier: project_name
+      Experiment:
+	identifier: $dir_name
+	Project: $Project
+      ProjectGroup:
+	projects: +$Project
+
+
+This block will create two new Records:
+
+- A project with a constant identifier
+- An experiment with an identifier, derived from a regular expression and a reference to the new project.
+
+Furthermore a Record `ProjectGroup` will be edited (its initial definition is not given in the
+yaml block): The project that was just created will be added as a list element to the property
+`projects`.
+
+Let's formulate that using `create_records` (again, `dir_name` is constant here):
+
+.. code-block:: python
+
+  dir_name = "directory name"
+
+  record_def = {
+    "Project": {
+      "identifier": "project_name",
+    }
+    "Experiment": {
+      "identifier": dir_name,
+      "Project": "$Project",
+      }
+    "ProjectGroup": {
+      "projects": "+$Project",
+    }
+
+  }
+
+  keys_modified = create_records(values, records,
+				 record_def)
+
+Debugging
+=========
+
+You can add the key `debug_match` to the definition of a Converter in order to create debugging
+output for the match step. The following snippet illustrates this:
+
+.. code-block:: yaml
+
+  DirConverter:
+    type: Directory
+    match: (?P<dir_name>.*)
+    debug_match: True
+    records:
+      Project:
+	identifier: project_name
+
+
+Whenever this Converter tries to match a StructureElement, it logs what was tried to macht against
+what and what the result was.
diff --git a/src/doc/converters/further_converters.rst b/src/doc/converters/further_converters.rst
new file mode 100644
index 0000000000000000000000000000000000000000..539c5159eb1de01765a78e3c04e10fb3f0be9be5
--- /dev/null
+++ b/src/doc/converters/further_converters.rst
@@ -0,0 +1,98 @@
+Further converters
+++++++++++++++++++
+
+More converters, together with cfood definitions and examples can be found in
+the `LinkAhead Crawler Extensions Subgroup
+<https://gitlab.com/linkahead/crawler-extensions>`_ on gitlab. In the following,
+we list converters that are shipped with the crawler library itself but are not
+part of the set of standard converters and may require this library to be
+installed with additional optional dependencies.
+
+HDF5 Converters
+===============
+
+For treating `HDF5 Files
+<https://docs.hdfgroup.org/hdf5/develop/_s_p_e_c.html>`_, there are in total
+four individual converters corresponding to the internal structure of HDF5
+files: the :ref:`H5FileConverter` which opens the file itself and creates
+further structure elements from HDF5 groups, datasets, and included
+multi-dimensional arrays that are in turn treated by the
+:ref:`H5GroupConverter`, the :ref:`H5DatasetConverter`, and the
+:ref:`H5NdarrayConverter`, respectively. You need to install the LinkAhead
+crawler with its optional ``h5-crawler`` dependency for using these converters.
+
+The basic idea when crawling HDF5 files is to treat them very similar to
+:ref:`dictionaries <DictElement Converter>` in which the attributes on root,
+group, or dataset level are essentially treated like ``BooleanElement``,
+``TextElement``, ``FloatElement``, and ``IntegerElement`` in a dictionary: They
+are appended as children and can be accessed via the ``subtree``. The file
+itself and the groups within may contain further groups and datasets, which can
+have their own attributes, subgroups, and datasets, very much like
+``DictElements`` within a dictionary. The main difference to any other
+dictionary type is the presence of multi-dimensional arrays within HDF5
+datasets. Since LinkAhead doesn't have any datatype corresponding to these, and
+since it isn't desirable to store these arrays directly within LinkAhead for
+reasons of performance and of searchability, we wrap them within a specific
+Record as explained :ref:`below <H5NdarrayConverter>`, together with more
+metadata and their internal path within the HDF5 file. Users can thus query for
+datasets and their arrays according to their metadata within LinkAhead and then
+use the internal path information to access the dataset within the file
+directly. The type of this record and the property for storing the internal path
+need to be reflected in the datamodel. Using the default names, you would need a
+datamodel like
+
+.. code-block:: yaml
+
+   H5Ndarray:
+     obligatory_properties:
+       internal_hdf5-path:
+	 datatype: TEXT
+
+although the names of both property and record type can be configured within the
+cfood definition.
+
+A simple example of a cfood definition for HDF5 files can be found in the `unit
+tests
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
+and shows how the individual converters are used in order to crawl a `simple
+example file
+<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
+containing groups, subgroups, and datasets, together with their respective
+attributes.
+
+H5FileConverter
+---------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.SimpleFileConverter` class. It opens the HDF5
+file and creates children for any contained group or dataset. Additionally, the
+root-level attributes of the HDF5 file are accessible as children.
+
+H5GroupConverter
+----------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Children are
+created for all subgroups and datasets in this HDF5 group. Additionally, the
+group-level attributes are accessible as children.
+
+H5DatasetConverter
+------------------
+
+This is an extension of the
+:py:class:`~caoscrawler.converters.DictElementConverter` class. Most
+importantly, it stores the array data in HDF5 dataset into
+:py:class:`~caoscrawler.hdf5_converter.H5NdarrayElement` which is added to its
+children, as well as the dataset attributes.
+
+H5NdarrayConverter
+------------------
+
+This converter creates a wrapper record for the contained dataset. The name of
+this record needs to be specified in the cfood definition of this converter via
+the ``recordname`` option. The RecordType of this record can be configured with
+the ``array_recordtype_name`` option and defaults to ``H5Ndarray``. Via the
+given ``recordname``, this record can be used within the cfood. Most
+importantly, this record stores the internal path of this array within the HDF5
+file in a text property, the name of which can be configured with the
+``internal_path_property_name`` option which defaults to ``internal_hdf5_path``.
diff --git a/src/doc/converters/index.rst b/src/doc/converters/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..38fc11335a2640f645e9b4e093690d1ffa7cd07f
--- /dev/null
+++ b/src/doc/converters/index.rst
@@ -0,0 +1,29 @@
+Converters
+))))))))))
+
+Converters treat a StructureElement and during this process create a number of new
+StructureElements: the children of the initially treated StructureElement.  Thus by treatment of
+existing StructureElements, Converters create a tree of StructureElements.
+
+.. image:: ../img/converter.png
+  :height: 170
+  :alt: Converters are Python classes that tell the crawler how to
+        interprete StructureElements.
+
+Each StructureElement in the tree has a set of properties, organized as
+key-value pairs.
+Some of those properties are specified by the type of StructureElement. For example,
+a file could have the file name as property: ``'filename': myfile.dat``.
+Converters may define additional functions that create further values. For
+example, a regular expression could be used to get a date from a file name.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+
+   CFood definition<cfood_definition>
+   Standard converters<standard_converters>
+   Further converters<further_converters>
+   Custom converters<custom_converters>
+   Transform functions<transform_functions>
+
diff --git a/src/doc/converters/standard_converters.rst b/src/doc/converters/standard_converters.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3dc3c882e76e10706d030ba0695d498631bf7b28
--- /dev/null
+++ b/src/doc/converters/standard_converters.rst
@@ -0,0 +1,333 @@
+Standard Converters
++++++++++++++++++++
+
+These are the standard converters that exist in a default installation.  For writing and applying
+*custom converters*, see :ref:`below <Custom Converters>`.
+
+Directory Converter
+===================
+The Directory Converter creates StructureElements for each File and Directory
+inside the current Directory. You can match a regular expression against the
+directory name using the 'match' key.
+
+Simple File Converter
+=====================
+The Simple File Converter does not create any children and is usually used if
+a file shall be used as it is and be inserted and referenced by other entities.
+
+Markdown File Converter
+=======================
+Reads a YAML header from Markdown files (if such a header exists) and creates
+children elements according to the structure of the header.
+
+DictElement Converter
+=====================
+
+DictElement → StructureElement
+
+Creates a child StructureElement for each key in the dictionary.
+
+Typical Subtree converters
+--------------------------
+The following StructureElement types are typically created by the DictElement converter:
+
+- BooleanElement
+- FloatElement
+- TextElement
+- IntegerElement
+- ListElement
+- DictElement
+
+Note that you may use ``TextElement`` for anything that exists in a text format that can be
+interpreted by the server, such as date and datetime strings in ISO-8601 format.
+
+Scalar Value Converters
+=======================
+`BooleanElementConverter`, `FloatElementConverter`, `TextElementConverter`,  and
+`IntegerElementConverter` behave very similarly.
+
+These converters expect `match_name` and `match_value` in their definition
+which allow to match the key and the value, respectively.
+
+Note that there are defaults for accepting other types. For example,
+FloatElementConverter also accepts IntegerElements. The default
+behavior can be adjusted with the fields `accept_text`, `accept_int`,
+`accept_float`, and `accept_bool`.
+
+The following denotes what kind of StructureElements are accepted by default
+(they are defined in `src/caoscrawler/converters.py`):
+
+- BooleanElementConverter: bool, int
+- FloatElementConverter: int, float
+- TextElementConverter: text, bool, int, float
+- IntegerElementConverter: int
+- ListElementConverter: list
+- DictElementConverter: dict
+
+YAMLFileConverter
+=================
+
+A specialized Dict Converter for yaml files: Yaml files are opened and the contents are
+converted into dictionaries that can be further converted using the typical subtree converters
+of dict converter.
+
+**WARNING**: Currently unfinished implementation.
+
+JSONFileConverter
+=================
+
+
+
+
+TableConverter
+==============
+
+Table → DictElement
+
+A generic converter (abstract) for files containing tables.
+Currently, there are two specialized implementations for XLSX files and CSV files.
+
+All table converters generate a subtree of dicts, which in turn can be converted with DictElementConverters:
+For each row in the table the TableConverter generates a DictElement (structure element). The key of the
+element is the row number. The value of the element is a dict containing the mapping of
+column names to values of the respective cell.
+
+Example:
+
+.. code-block:: yaml
+
+   subtree:
+     TABLE:  # Any name for the table as a whole
+       type: CSVTableConverter
+       match: ^test_table.csv$
+       records:
+	 (...)  # Records edited for the whole table file
+       subtree:
+	 ROW:  # Any name for a data row in the table
+	   type: DictElement
+	   match_name: .*
+	   match_value: .*
+	   records:
+	     (...)  # Records edited for each row
+	   subtree:
+	     COLUMN:  # Any name for a specific type of column in the table
+	       type: FloatElement
+	       match_name: measurement  # Name of the column in the table file
+	       match_value: (?P<column_value).*)
+	       records:
+		 (...)  # Records edited for each cell
+
+
+XLSXTableConverter
+==================
+
+XLSX File → DictElement
+
+CSVTableConverter
+=================
+
+CSV File → DictElement
+
+PropertiesFromDictConverter
+===========================
+
+The :py:class:`~caoscrawler.converters.PropertiesFromDictConverter` is
+a specialization of the
+:py:class:`~caoscrawler.converters.DictElementConverter` and offers
+all its functionality. It is meant to operate on dictionaries (e.g.,
+from reading in a json or a table file), the keys of which correspond
+closely to properties in a LinkAhead datamodel. This is especially
+handy in cases where properties may be added to the data model and
+data sources that are not yet known when writing the cfood definition.
+
+The converter definition of the
+:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has an
+additional required entry ``record_from_dict`` which specifies the
+Record to which the properties extracted from the dict are attached
+to. This Record is identified by its ``variable_name`` by which it can
+be referred to further down the subtree. You can also use the name of
+a Record that was specified earlier in the CFood definition in order
+to extend it by the properties extracted from a dict. Let's have a
+look at a simple example. A CFood definition
+
+.. code-block:: yaml
+
+   PropertiesFromDictElement:
+       type: PropertiesFromDictElement
+       match: ".*"
+       record_from_dict:
+	   variable_name: MyRec
+	   parents:
+	   - MyType1
+	   - MyType2
+
+applied to a dictionary
+
+.. code-block:: json
+
+   {
+     "name": "New name",
+     "a": 5,
+     "b": ["a", "b", "c"],
+     "author": {
+       "full_name": "Silvia Scientist"
+     }
+   }
+
+will create a Record ``New name`` with parents ``MyType1`` and
+``MyType2``. It has a scalar property ``a`` with value 5, a list
+property ``b`` with values "a", "b" and "c", and an ``author``
+property which references an ``author`` with a ``full_name`` property
+with value "Silvia Scientist":
+
+.. image:: ../img/properties-from-dict-records-author.png
+  :height: 210
+  :alt: A Record "New Name" and an author Record with full_name
+        "Silvia Scientist" are generated and filled automatically.
+
+Note how the different dictionary keys are handled differently
+depending on their types: scalar and list values are understood
+automatically, and a dictionary-valued entry like ``author`` is
+translated into a reference to an ``author`` Record automatically.
+
+You can further specify how references are treated with an optional
+``references key`` in ``record_from_dict``. Let's assume that in the
+above example, we have an ``author`` **Property** with datatype
+``Person`` in our data model. We could add this information by
+extending the above example definition by
+
+
+.. code-block:: yaml
+
+   PropertiesFromDictElement:
+       type: PropertiesFromDictElement
+       match: ".*"
+       record_from_dict:
+	   variable_name: MyRec
+	   parents:
+	   - MyType1
+	   - MyType2
+	   references:
+	       author:
+		   parents:
+		   - Person
+
+so that now, a ``Person`` record with a ``full_name`` property with
+value "Silvia Scientist" is created as the value of the ``author``
+property:
+
+.. image:: ../img/properties-from-dict-records-person.png
+  :height: 200
+  :alt: A new Person Record is created which is referenced as an
+        author.
+
+For the time being, only the parents of the referenced record can be
+set via this option. More complicated treatments can be implemented
+via the ``referenced_record_callback`` (see below).
+
+Properties can be blacklisted with the ``properties_blacklist``
+keyword, i.e., all keys listed under ``properties_blacklist`` will be
+excluded from automated treatment. Since the
+:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has
+all the functionality of the
+:py:class:`~caoscrawler.converters.DictElementConverter`, individual
+properties can still be used in a subtree. Together with
+``properties_blacklist`` this can be used to add custom treatment to
+specific properties by blacklisting them in ``record_from_dict`` and
+then treating them in the subtree the same as you would do it in the
+standard
+:py:class:`~caoscrawler.converters.DictElementConverter`. Note that
+the blacklisted keys are excluded on **all** levels of the dictionary,
+i.e., also when they occur in a referenced entity.
+
+For further customization, the
+:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` can be
+used as a basis for :ref:`custom converters<Custom Converters>` which
+can make use of its ``referenced_record_callback`` argument. The
+``referenced_record_callback`` can be a callable object which takes
+exactly a Record as an argument and needs to return that Record after
+doing whatever custom treatment is needed. Additionally, it is given
+the ``RecordStore`` and the ``ValueStore`` in order to be able to
+access the records and values that have already been defined from
+within ``referenced_record_callback``. Such a function might look the
+following:
+
+.. code-block:: python
+
+   def my_callback(rec: db.Record, records: RecordStore, values: GeneralStore):
+       # do something with rec, possibly using other records or values from the stores...
+       rec.description = "This was updated in a callback"
+       return rec
+
+It is applied to all Records that are created from the dictionary and
+it can be used to, e.g., transform values of some properties, or add
+special treatment to all Records of a specific
+type. ``referenced_record_callback`` is applied **after** the
+properties from the dictionary have been applied as explained above.
+
+XML Converters
+==============
+
+There are the following converters for XML content:
+
+
+XMLFileConverter
+----------------
+
+This is a converter that loads an XML file and creates an XMLElement containing the
+root element of the XML tree. It can be matched in the subtree using the XMLTagConverter.
+
+XMLTagConverter
+---------------
+
+The XMLTagConverter is a generic converter for XMLElements with the following main features:
+
+- It allows to match a combination of tag name, attribute names and text contents using the keys:
+
+  - ``match_tag``: regexp, default empty string
+  - ``match_attrib``: dictionary of key-regexps and value-regexp
+    pairs. Each key matches an attribute name and the corresponding
+    value matches its attribute value.
+  - ``match_text``: regexp, default empty string
+- It allows to traverse the tree using XPath (using Python lxml's xpath functions):
+
+  - The key ``xpath`` is used to set the xpath expression and has a
+    default of ``child::*``. Its default would generate just the list of
+    sub nodes of the current node. The result of the xpath expression
+    is used to generate structure elements as children. It furthermore
+    uses the keys ``tags_as_children``, ``attribs_as_children`` and
+    ``text_as_children`` to decide which information from the found
+    nodes will be used as children:
+  - ``tags_as_children``: (default ``true``) For each xml tag element
+    found by the xpath expression, generate one XMLTag structure
+    element. Its name is the full path to the tag using the function
+    ``getelementpath`` from ``lxml``.
+  - ``attribs_as_children``: (default ``false``) For each xml tag element
+    found by the xpath expression, generate one XMLAttributeNode
+    structure element for each of its attributes. The name of the
+    respective attribute node has the form: ``<full path of the tag> @
+    <name of the attribute>`` **Please note:** Currently, there is no
+    converter implemented that can match XMLAttributeNodes.
+  - ``text_as_children``: (default ``false``) For each xml tag element
+    found by the xpath expression, generate one XMLTextNode structure
+    element containing the text content of the tag element. Note that
+    in case of multiple text elements, only the first one is
+    added. The name of the respective attribute node has the form:
+    ``<full path of the tag> /text()`` to the tag using the function
+    ``getelementpath`` from ``lxml``. **Please note:** Currently, there is
+    no converter implemented that can match XMLAttributeNodes.
+
+Namespaces
+**********
+
+The default is to take the namespace map from the current node and use
+it in xpath queries. Because default namespaces cannot be handled by
+xpath, it is possible to remap the default namespace using the key
+``default_namespace``. The key ``nsmap`` can be used to define
+additional nsmap entries.
+
+XMLTextNodeConverter
+--------------------
+
+In the future, this converter can be used to match XMLTextNodes that
+are generated by the XMLTagConverter.
diff --git a/src/doc/converters/transform_functions.rst b/src/doc/converters/transform_functions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..22df35c8521ea0d70b2ebf7b7c8bc7c52e176bd3
--- /dev/null
+++ b/src/doc/converters/transform_functions.rst
@@ -0,0 +1,47 @@
+Transform Functions
++++++++++++++++++++
+Often the situation arises, that you cannot use a value as it is found. Maybe a value should be
+increased by an offset or a string should be split into a list of pieces. In order to allow such
+simple conversions, transform functions can be named in the converter definition that are then
+applied to the respective variables when the converter is executed.
+
+.. code-block:: yaml
+
+    <NodeName>:
+	type: <ConverterName>
+	match: ".*"
+	transform:
+	  <TransformNodeName>:
+	    in: $<in_var_name>
+	    out: $<out_var_name>
+	    functions:
+	    - <func_name>:                         # name of the function to be applied
+		<func_arg1>: <func_arg1_value>     # key value pairs that are passed as parameters
+		<func_arg2>: <func_arg2_value>
+		# ...
+
+An example that splits the variable ``a`` and puts the generated list in ``b`` is the following:
+
+.. code-block:: yaml
+
+    Experiment:
+	type: Dict
+	match: ".*"
+	transform:
+	  param_split:
+	    in: $a
+	    out: $b
+	    functions:
+	    - split:            # split is a function that is defined by default
+		marker: "|"     # its only parameter is the marker that is used to split the string
+	records:
+	  Report:
+	    tags: $b
+
+This splits the string in '$a' and stores the resulting list in '$b'. This is here used to add a
+list valued property to the Report Record.
+
+
+There are a number of transform functions that are defined by default (see
+``src/caoscrawler/default_transformers.yml``). You can define custom transform functions by adding
+them to the cfood definition (see :doc:`CFood Documentation<../cfood>`).
diff --git a/src/doc/index.rst b/src/doc/index.rst
index 8a02ec62e50308a28899e71b4664f626dfa0c27b..a72389b1f4b94430b2c5ff2bfee9757193327ed7 100644
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -10,7 +10,7 @@ CaosDB-Crawler Documentation
    Getting started<getting_started/index>
    Tutorials<tutorials/index>
    Concepts<concepts>
-   Converters<converters>
+   Converters<converters/index>
    CFoods (Crawler Definitions)<cfood>
    Macros<macros>
    How to upgrade<how-to-upgrade>
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index e12302514d16f077882e41d6ff5995953f2228f8..3d4d8dd7a1faf02c49febc1a112fab7c3cef4830 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -643,11 +643,15 @@ def test_load_converters():
     # converter classes can be loaded from their respective packages.
 
     # Please adapt, if defaults change!
-    assert len(converter_registry) == 25
+    assert len(converter_registry) == 28
 
     # All of them are contained in caoscrawler.converters
+    # except for the xml converters:
     for conv_key, conv in converter_registry.items():
-        assert conv["package"] == "caoscrawler.converters"
+        if conv_key in ("XMLTag", "XMLFile", "XMLTextNode"):
+            assert conv["package"] == "caoscrawler.xml_converter"
+        else:
+            assert conv["package"] == "caoscrawler.converters"
         # ... and their names all end in "Converter"
         assert conv["converter"].endswith("Converter")
 
diff --git a/unittests/test_xml_converter.py b/unittests/test_xml_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..93e4a422d94a9315eadca24b8c799682d7d99964
--- /dev/null
+++ b/unittests/test_xml_converter.py
@@ -0,0 +1,320 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+test the converters module
+"""
+import datetime
+import importlib
+import json
+import logging
+import os
+import sys
+from itertools import product
+from pathlib import Path
+
+import pytest
+import yaml
+from caoscrawler.converters import (Converter, ConverterValidationError,
+                                    DateElementConverter, DictElementConverter,
+                                    DictIntegerElementConverter,
+                                    DirectoryConverter, FloatElementConverter,
+                                    IntegerElementConverter, JSONFileConverter,
+                                    ListElementConverter,
+                                    MarkdownFileConverter, YAMLFileConverter,
+                                    _AbstractScalarValueElementConverter,
+                                    handle_value, replace_variables)
+from caoscrawler.crawl import Crawler
+from caoscrawler.scanner import (_load_definition_from_yaml_dict,
+                                 create_converter_registry,
+                                 create_transformer_registry, load_definition)
+from caoscrawler.stores import GeneralStore
+from caoscrawler.structure_elements import (BooleanElement, DictElement,
+                                            Directory, File, FloatElement,
+                                            IntegerElement, ListElement,
+                                            TextElement, XMLTagElement)
+from caoscrawler.xml_converter import XMLTagConverter
+
+from lxml.etree import fromstring
+
+UNITTESTDIR = Path(__file__).parent
+
+
+@pytest.fixture
+def converter_registry():
+    converter_registry: dict[str, dict[str, str]] = {
+        "Directory": {
+            "converter": "DirectoryConverter",
+            "package": "caoscrawler.converters"},
+        "TextElement": {
+            "converter": "TextElementConverter",
+            "package": "caoscrawler.converters"},
+        "XMLTag": {
+            "converter": "XMLTagConverter",
+            "package": "caoscrawler.xml_converter"},
+
+        "XMLTextNode": {
+            "converter": "XMLTextNodeConverter",
+            "package": "caoscrawler.xml_converter"},
+    }
+
+    for key, value in converter_registry.items():
+        module = importlib.import_module(value["package"])
+        value["class"] = getattr(module, value["converter"])
+    return converter_registry
+
+
+@pytest.fixture
+def basic_xmltag_converter(converter_registry):
+    return XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: a
+match_attrib:  # default is the empty dictionary
+    "(?P<ref>(href|url))": "test(?P<number>[0-9])"  # either the "href" or the "url" attribute must be set
+    alt: (.+)  # this attribute must be present and contain at least one character
+match_text: \\s*(?P<node_text>.+)\\s*
+
+subtree:
+    img:
+        type: XMLTag
+        match_name: img
+        match_attrib:
+            src: test2
+"""), "TestXMLTagConverter", converter_registry)
+
+
+@pytest.fixture
+def basic_xpath_xmltag_converter(converter_registry):
+    return XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: a
+match_attrib:  # default is the empty dictionary
+    "(?P<ref>(href|url))": "test(?P<number>[0-9])"  # either the "href" or the "url" attribute must be set
+    alt: (.+)  # this attribute must be present and contain at least one character
+match_text: \\s*(?P<node_text>.+)\\s*
+xpath: child::*/*
+
+subtree:
+    img:
+        type: XMLTag
+        match_name: img
+        match_attrib:
+            src: test2
+    testnode:
+        type: XMLTag
+        match_name: testnode
+"""), "TestXMLTagConverter", converter_registry)
+
+
+def test_simple_xml(basic_xmltag_converter):
+    """
+    Test for basic xml conversion functionality.
+    """
+    xml_text = """
+    <a href="test1" alt="no link">
+    test <img src="test2"/>
+    </a>
+    """
+
+    xml = fromstring(xml_text)
+    tag = XMLTagElement(xml)
+    assert tag.name == "."
+
+    m = basic_xmltag_converter.match(tag)
+
+    assert m is not None
+    assert m["ref"] == "href"
+    assert m["number"] == "1"
+    assert m["node_text"] == "test "
+
+
+def test_not_matching(basic_xmltag_converter):
+    m = basic_xmltag_converter.match(XMLTagElement(fromstring("""
+        <a href="test1">
+        test <img src="test2"/>
+        </a>
+        """)))
+
+    assert m is None  # alt-attribute was missing
+
+    m = basic_xmltag_converter.match(XMLTagElement(fromstring("""
+        <a href="test" alt="no link">
+        test <img src="test2"/>
+        </a>
+        """)))
+
+    assert m is None  # href attribute did not match
+
+    m = basic_xmltag_converter.match(XMLTagElement(fromstring("""
+        <a href="test1" url="http" alt="no link">
+        test <img src="test2"/>
+        </a>
+        """)))
+
+    assert m is None  # href and url must not be present simultaneously
+
+    m = basic_xmltag_converter.match(XMLTagElement(fromstring("""
+        <a href="test1" alt="no link"><img src="test2"/></a>
+        """)))
+
+    assert m is None  # text node is empty
+
+    m = basic_xmltag_converter.match(XMLTagElement(fromstring("""
+        <a href="test1" alt="no link"/>
+        """)))
+
+    assert m is None  # text node is empty
+
+    # TODO: adapt converter -> empty (==None) text node is equivalent to empty string text node
+    # TODO: adapt tests
+    # TODO: how to match "  ajskdlfjaldsf ajsdklfjadkl " without the whitespaces in regexp correctly?
+
+
+def test_nested_simple_xml(basic_xmltag_converter, basic_xpath_xmltag_converter):
+    """
+    Test for xml conversion including children.
+    """
+    xml_text = """
+    <a href="test1" alt="no link">
+    test <img src="test2"/>
+    </a>
+    """
+
+    tag = XMLTagElement(fromstring(xml_text))
+    m = basic_xmltag_converter.match(tag)
+    assert m is not None
+
+    general_store = GeneralStore()
+    children = basic_xmltag_converter.create_children(general_store, tag)
+
+    assert len(children) == 1
+    assert isinstance(children[0], XMLTagElement)
+    assert children[0].name == "img"
+
+    xml_text = """
+    <a href="test1" alt="no link">
+    test <img src="test2">
+        <testnode/> </img>
+    </a>
+    """
+
+    tag = XMLTagElement(fromstring(xml_text))
+    m = basic_xpath_xmltag_converter.match(tag)
+    assert m is not None
+
+    general_store = GeneralStore()
+    children = basic_xpath_xmltag_converter.create_children(general_store, tag)
+
+    assert len(children) == 1
+    assert isinstance(children[0], XMLTagElement)
+    assert children[0].name == "img/testnode"
+
+
+def test_namespace_xml(converter_registry):
+    """
+    Test for xml conversion including children.
+    Nodes have namespaces.
+    """
+
+    xml_text = """
+    <root xmlns="default-namespace" xmlns:test="alternative-namespace">
+        <node1 active="true">
+            Bla
+        </node1>
+        <node1 active="true" size="45">
+        text
+            <node2 xmlns="sub-namespace">
+                <node3>
+                    ok
+                </node3>
+            </node2>
+            <test:node2>
+                sep
+            </test:node2>
+        </node1>
+    </root>
+"""
+
+    # Test unsupported xpath (containing text()):
+    converter = XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: "{default-namespace}root"
+xpath: "default:node1/text()"
+default_namespace: default
+"""), "TestXMLTagConverter", converter_registry)
+
+    tag = XMLTagElement(fromstring(xml_text))
+    m = converter.match(tag)
+    assert m is not None
+
+    with pytest.raises(RuntimeError, match="Only standard xml nodes.*"):
+        converter.create_children(GeneralStore(), tag)
+
+    # Test complex xml using namespaces and text nodes:
+    converter = XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: "{default-namespace}root"
+xpath: "default:node1"
+default_namespace: default
+attribs_as_children: false
+text_as_children: true
+tags_as_children: false
+"""), "TestXMLTagConverter", converter_registry)
+    children = converter.create_children(GeneralStore(), tag)
+    assert len(children) == 2
+    assert children[0].name == "{default-namespace}node1[1]/text()"
+    assert children[0].value.strip() == "Bla"
+    assert children[1].name == "{default-namespace}node1[2]/text()"
+    assert children[1].value.strip() == "text"
+
+    # Check child generation of attributes:
+    converter = XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: "{default-namespace}root"
+xpath: "default:node1"
+default_namespace: default
+attribs_as_children: true
+text_as_children: false
+tags_as_children: false
+"""), "TestXMLTagConverter", converter_registry)
+    children = converter.create_children(GeneralStore(), tag)
+
+    assert len(children) == 3
+    assert children[0].name == "{default-namespace}node1[1]@active"
+    assert children[0].value.strip() == "true"
+    assert children[1].name == "{default-namespace}node1[2]@active"
+    assert children[1].value.strip() == "true"
+    assert children[2].name == "{default-namespace}node1[2]@size"
+    assert children[2].value.strip() == "45"
+
+    # Test setting nsmap entries:
+    converter = XMLTagConverter(yaml.safe_load("""
+type: XMLTag
+match_tag: "{default-namespace}root"
+xpath: "//s:node2"
+default_namespace: default
+nsmap:
+  s: sub-namespace
+"""), "TestXMLTagConverter", converter_registry)
+    children = converter.create_children(GeneralStore(), tag)
+    assert len(children) == 1
+    assert children[0].name == "{default-namespace}node1[2]/{sub-namespace}node2"