diff --git a/.gitlab/issue_templates/Default.md b/.gitlab/issue_templates/Default.md
new file mode 100644
index 0000000000000000000000000000000000000000..aa1a65aca363b87aff50280e1a86824009d2098b
--- /dev/null
+++ b/.gitlab/issue_templates/Default.md
@@ -0,0 +1,28 @@
+## Summary
+
+*Please give a short summary of what the issue is.*
+
+## Expected Behavior
+
+*What did you expect how the software should behave?*
+
+## Actual Behavior
+
+*What did the software actually do?*
+
+## Steps to Reproduce the Problem
+
+*Please describe, step by step, how others can reproduce the problem.  Please try these steps for yourself on a clean system.*
+
+1.
+2.
+3.
+
+## Specifications
+
+- Version: *Which version of this software?*
+- Platform: *Which operating system, which other relevant software versions?*
+
+## Possible fixes
+
+*Do you have ideas how the issue can be resolved?*
diff --git a/.gitlab/merge_request_templates/Default.md b/.gitlab/merge_request_templates/Default.md
new file mode 100644
index 0000000000000000000000000000000000000000..7859b7be21fb1c3eda91ee35173a8e3412a62066
--- /dev/null
+++ b/.gitlab/merge_request_templates/Default.md
@@ -0,0 +1,53 @@
+# Summary
+
+*Insert a meaningful description for this merge request here:  What is the new/changed behavior?
+Which bug has been fixed? Are there related issues?*
+
+
+# Focus
+
+*Point the reviewer to the core of the code change. Where should they start reading? What should
+they focus on (e.g. security, performance, maintainability, user-friendliness, compliance with the
+specs, finding more corner cases, concrete questions)?*
+
+
+# Test Environment
+
+*How to set up a test environment for manual testing?*
+
+
+# Check List for the Author
+
+Please, prepare your MR for a review. Be sure to write a summary and a focus and create gitlab
+comments for the reviewer. They should guide the reviewer through the changes, explain your changes
+and also point out open questions. For further good practices have a look at [our review
+guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md)
+
+- [ ] All automated tests pass
+- [ ] Reference related issues
+- [ ] Up-to-date CHANGELOG.md (or not necessary)
+- [ ] Appropriate user and developer documentation (or not necessary)
+  - How do I use the software?  Assume "stupid" users.
+  - How do I develop or debug the software?  Assume novice developers.
+- [ ] Annotations in code (Gitlab comments)
+  - Intent of new code
+  - Problems with old code
+  - Why this implementation?
+
+
+# Check List for the Reviewer
+
+- [ ] I understand the intent of this MR
+- [ ] All automated tests pass
+- [ ] Up-to-date CHANGELOG.md (or not necessary)
+- [ ] Appropriate user and developer documentation (or not necessary)
+- [ ] The test environment setup works and the intended behavior is reproducible in the test
+  environment
+- [ ] In-code documentation and comments are up-to-date.
+- [ ] Check: Are there specifications? Are they satisfied?
+
+For further good practices have a look at [our review guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md).
+
+
+/assign me
+/target_branch dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f086e1317f05277452659adf3fe20547adab2ae3..c498b9286e0977295066340a2a4172093ac10bfe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,43 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0] - 2023-03-22 ##
+(Florian Spreckelsen)
+
+### Added ###
+
+- DateElementConverter: allows to interpret text as a date object
+- the restricted_path argument allows to crawl only a subtree
+- logging that provides a summary of what is inserted and updated
+- You can now access the file system path of a structure element (if it has one) using the variable
+  name ``<converter name>.path``
+- ``add_prefix`` and ``remove_prefix`` arguments for the command line interface
+  and the ``crawler_main`` function for the adding/removal of path prefixes when
+  creating file entities.
+
+### Changed ###
+
+- The definitions for the default converters were removed from crawl.py and placed into
+  a separate yaml file called `default_converters.yml`. There is a new test testing for
+  the correct loading behavior of that file.
+- JSONFileConverter, YAMLFileConverter and MarkdownFileConverter now inherit from
+  SimpleFileConverter. Behavior is unchanged, except that the MarkdownFileConverter now raises a
+  ConverterValidationError when the YAML header cannot be read instead of silently not matching.
+
+### Deprecated ###
+
+- The ``prefix`` argument of `crawler_main` is deprecated. Use the new argument
+  ``remove_prefix`` instead.
+
+### Removed ###
+- The command line argument ``--prefix``. Use the new argument ``--remove-prefix`` instead.
+
+### Fixed ###
+
+- an empty string as name is treated as no name (as does the server). This, fixes
+  queries for identifiables since it would contain "WITH name=''" otherwise
+  which is an impossible condition. If your cfoods contained this case, they are ill defined.
+
 ## [0.3.0] - 2022-01-30 ##
 (Florian Spreckelsen)
 
diff --git a/CITATION.cff b/CITATION.cff
index ad00d0edb29ecfe2edf4b1aeb621ff35f8304f90..9c8bf551c41a6a3447b076914741b349a8c72b9c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -20,6 +20,6 @@ authors:
     given-names: Stefan
     orcid: https://orcid.org/0000-0001-7214-8125
 title: CaosDB - Crawler
-version: 0.3.0
+version: 0.4.0
 doi: 10.3390/data4020083
-date-released: 2023-01-30
\ No newline at end of file
+date-released: 2023-03-22
\ No newline at end of file
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba220626460c559aeded69d360c85917e0c78066
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,57 @@
+# Installation ##
+
+
+## Linux ####
+
+Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
+documentation.
+
+Then open a terminal and continue in the [Generic installation](#generic-installation) section.
+
+## Windows ####
+
+If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
+for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
+packages you will ever need out of the box.  If you prefer, you may also install the leaner
+"Miniconda" installer, which allows you to install packages as you need them.
+
+After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
+installation](#generic-installation) section.
+
+## MacOS ####
+
+If there is no Python 3 installed yet, there are two main ways to
+obtain it: Either get the binary package from
+[python.org](https://www.python.org/downloads/) or, for advanced
+users, install via [Homebrew](https://brew.sh/). After installation
+from python.org, it is recommended to also update the TLS certificates
+for Python (this requires administrator rights for your user):
+
+```sh
+# Replace this with your Python version number:
+cd /Applications/Python\ 3.9/
+
+# This needs administrator rights:
+sudo ./Install\ Certificates.command
+```
+
+After these steps, you may continue with the [Generic
+installation](#generic-installation).
+
+## Generic installation ####
+
+The CaosDB crawler is available as [PyPi
+package](https://pypi.org/project/caoscrawler/) and can simply installed by
+
+```sh
+pip3 install caoscrawler
+```
+
+Alternatively, obtain the sources from GitLab and install from there (`git` must
+be installed for this option):
+
+```sh
+git clone https://gitlab.com/caosdb/caosdb-crawler
+cd caosdb-crawler
+pip3 install --user .
+```
diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
index 7a64d708667182b80b739812e5fdf3369fc5b462..37a34d125dcff1d121b1bded2fe959c4d30ff403 100644
--- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
+++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
@@ -153,6 +153,13 @@ Data:
                 metadata_json: &metadata_json_template
                   type: JSONFile
                   match: metadata.json
+                  records:
+                    JSONFile:
+                      parents:
+                        - JSONFile
+                      role: File
+                      path: ${metadata_json.path}
+                      file: ${metadata_json.path}
                   validate: schema/dataset.schema.json
                   subtree:
                     jsondict:
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
index 01653bfa821e0a0acbb5a481bfd458e2ed784fb9..36233230ae05f9df58ae4e492ff1f709322f6e51 100644
--- a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
@@ -9,6 +9,7 @@
       "minimum": 20000
     },
     "archived": { "type": "boolean" },
+    "JSONFile": { "type": "object" },
     "url": {
       "type": "string",
       "description": "link to folder on file system (CaosDB or cloud folder)"
diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
index 6495e1828dc56e99459c162f7751951f880ea55c..c55be2157a1f079ecfb5809c3658586f9114fad1 100644
--- a/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
+++ b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
@@ -25,8 +25,8 @@ extroot:
               parents:
                 - mdfile
               role: File
-              path: $DataFile
-              file: $DataFile
+              path: ${DataFile.path}
+              file: ${DataFile.path}
 
             Experiment:
               mdfile: $mdfile
@@ -68,8 +68,8 @@ extroot:
               parents:
                 - mdfile
               role: File
-              path: $DataFile
-              file: $DataFile
+              path: ${DataFile.path}
+              file: ${DataFile.path}
 
             Experiment: {}
 
diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py
index 4158ed22278ef5c871a22d45885e58fbfa84ea3b..cb5ed2c769945af033bc56a2d6af3bf1cec86de4 100644
--- a/integrationtests/test_realworld_example.py
+++ b/integrationtests/test_realworld_example.py
@@ -25,6 +25,7 @@
 an integration test module that runs a test against a (close to) real world example
 """
 from caosdb.utils.register_tests import clear_database, set_test_key
+import logging
 import json
 import os
 
@@ -35,6 +36,7 @@ from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import Directory
 import pytest
 from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+from caosadvancedtools.loadFiles import loadpath
 
 import sys
 
@@ -52,6 +54,17 @@ def rfp(*pathcomponents):
 DATADIR = rfp("test_data", "extroot", "realworld_example")
 
 
+@pytest.fixture
+def addfiles():
+    loadpath(path='/opt/caosdb/mnt/extroot/',
+             include=None,
+             exclude=None,
+             prefix="",
+             dryrun=False,
+             forceAllowSymlinks=True,
+             )
+
+
 @pytest.fixture
 def usemodel():
     # First load dataspace data model
@@ -85,22 +98,21 @@ def create_identifiable_adapter():
     return ident
 
 
-def test_dataset(clear_database, usemodel):
-    ident = create_identifiable_adapter()
-    crawler = Crawler(identifiableAdapter=ident)
-    crawler_definition = crawler.load_definition(
-        os.path.join(DATADIR, "dataset_cfoods.yml"))
-    # print(json.dumps(crawler_definition, indent=3))
-    # Load and register converter packages:
-    converter_registry = crawler.load_converters(crawler_definition)
-    # print("DictIntegerElement" in converter_registry)
-
-    records = crawler.start_crawling(
-        Directory("data", os.path.join(DATADIR, 'data')),
-        crawler_definition,
-        converter_registry
+def test_dataset(clear_database, usemodel, addfiles, caplog):
+    caplog.set_level(logging.DEBUG, logger="caoscrawler")
+    identifiable_path = os.path.join(DATADIR, "identifiables.yml")
+    crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
+    crawler_main(
+        os.path.join(DATADIR, 'data'),
+        crawler_definition_path,
+        identifiable_path,
+        True,
+        os.path.join(DATADIR, "provenance.yml"),
+        False,
+        remove_prefix=DATADIR,
+        # this test will fail without this prefix since the crawler would try to create new files
+        add_prefix="/extroot/realworld_example"
     )
-    crawler.synchronize()
 
     dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
                                  "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
@@ -119,13 +131,17 @@ def test_dataset(clear_database, usemodel):
                             "start_datetime='2022-02-10T16:36:48+01:00'") == 1
     assert db.execute_query(f"FIND Event WITH latitude=53", unique=True)
 
+    # test logging
+    assert "Executed inserts" in caplog.text
+    assert "Going to insert" in caplog.text
+    assert "Executed updates" in caplog.text
+
 
-def test_event_update(clear_database, usemodel):
+def test_event_update(clear_database, usemodel, addfiles):
 
     identifiable_path = os.path.join(DATADIR, "identifiables.yml")
     crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
 
-    # TODO(fspreck): Use crawler_main
     crawler_main(
         os.path.join(DATADIR, 'data'),
         crawler_definition_path,
@@ -133,7 +149,9 @@ def test_event_update(clear_database, usemodel):
         True,
         os.path.join(DATADIR, "provenance.yml"),
         False,
-        ""
+        remove_prefix=DATADIR,
+        # this test will fail without this prefix since the crawler would try to create new files
+        add_prefix="/extroot/realworld_example"
     )
 
     old_dataset_rec = db.execute_query(
diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py
index 91c523be90a4d0117a7cc54217cae0b911511957..5fc0f6c7d85a0fce4490c72952e711fe241a0099 100644
--- a/integrationtests/test_use_case_simple_presentation.py
+++ b/integrationtests/test_use_case_simple_presentation.py
@@ -38,9 +38,7 @@ DATADIR = os.path.join(os.path.dirname(__file__), "test_data",
                        "extroot", "use_case_simple_presentation")
 
 
-def test_complete_crawler(
-        clear_database
-):
+def test_complete_crawler(clear_database):
     # Setup the data model:
     model = parser.parse_model_from_yaml(os.path.join(DATADIR, "model.yml"))
     model.sync_data_model(noquestion=True, verbose=False)
@@ -57,13 +55,24 @@ def test_complete_crawler(
             dryrun=False,
             forceAllowSymlinks=False)
 
+    # test that a bad value for "remove_prefix" leads to runtime error
+    with pytest.raises(RuntimeError) as re:
+        crawler_main(DATADIR,
+                     os.path.join(DATADIR, "cfood.yml"),
+                     os.path.join(DATADIR, "identifiables.yml"),
+                     True,
+                     os.path.join(DATADIR, "provenance.yml"),
+                     False,
+                     remove_prefix="sldkfjsldf")
+    assert "path does not start with the prefix" in str(re.value)
+
     crawler_main(DATADIR,
                  os.path.join(DATADIR, "cfood.yml"),
                  os.path.join(DATADIR, "identifiables.yml"),
                  True,
                  os.path.join(DATADIR, "provenance.yml"),
                  False,
-                 "/use_case_simple_presentation")
+                 remove_prefix=os.path.abspath(DATADIR))
 
     res = db.execute_query("FIND Record Experiment")
     assert len(res) == 1
diff --git a/setup.cfg b/setup.cfg
index e16a49cbbb55699db9abd37fbc5890eca5634ef6..fbdd9d7119312e2831c77fe3e8b24bd16b5826b4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = caoscrawler
-version = 0.3.0
+version = 0.4.0
 author = Alexander Schlemmer
 author_email = alexander.schlemmer@ds.mpg.de
 description = A new crawler for caosdb
diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
index 5e724c83695e098ce980e1aa8e81c65ae8525e19..b0d77bbf5d7ba09df3c0c47d656fa3d22d07b6d2 100644
--- a/src/caoscrawler/cfood-schema.yml
+++ b/src/caoscrawler/cfood-schema.yml
@@ -27,6 +27,7 @@ cfood:
           - BooleanElement
           - Definitions
           - Dict
+          - Date
           - JSONFile
           - CSVTableConverter
           - XLSXTableConverter
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index d4e25f73a8a9e7dad42c50d907745dfb7329bb13..80a3728ce5b1f413d2bdd674b26a7dca1122eef5 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -43,6 +43,8 @@ from string import Template
 import yaml_header_tools
 
 import pandas as pd
+import logging
+
 
 import yaml
 
@@ -51,6 +53,12 @@ import yaml
 SPECIAL_PROPERTIES = ("description", "name", "id", "path",
                       "file", "checksum", "size")
 
+logger = logging.getLogger(__name__)
+
+
+class CrawlerTemplate(Template):
+    braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)"
+
 
 def _only_max(children_with_keys):
 
@@ -106,6 +114,19 @@ class ConverterValidationError(Exception):
         self.message = msg
 
 
+def create_path_value(func):
+    """decorator for create_values functions that adds a value containing the path
+
+    should be used for StructureElement that are associated with file system objects that have a
+    path, like File or Directory.
+    """
+
+    def inner(self, values: GeneralStore, element: StructureElement):
+        func(self, values=values, element=element)
+        values.update({self.name + ".path": element.path})
+    return inner
+
+
 def replace_variables(propvalue, values: GeneralStore):
     """
     This function replaces variables in property values (and possibly other locations,
@@ -129,7 +150,7 @@ def replace_variables(propvalue, values: GeneralStore):
             if isinstance(values[varname], db.Entity):
                 return values[varname]
 
-    propvalue_template = Template(propvalue)
+    propvalue_template = CrawlerTemplate(propvalue)
     return propvalue_template.safe_substitute(**values.get_storage())
 
 
@@ -237,7 +258,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict
                 continue
 
             # Allow replacing variables in keys / names of properties:
-            key_template = Template(key)
+            key_template = CrawlerTemplate(key)
             key = key_template.safe_substitute(**values.get_storage())
 
             keys_modified.append((name, key))
@@ -325,9 +346,7 @@ class Converter(object, metaclass=ABCMeta):
 
         return converter
 
-    def create_values(self,
-                      values: GeneralStore,
-                      element: StructureElement):
+    def create_values(self, values: GeneralStore, element: StructureElement):
         """
         Extract information from the structure element and store them as values in the
         general store.
@@ -346,13 +365,14 @@ class Converter(object, metaclass=ABCMeta):
                         element: StructureElement):
         pass
 
-    def create_records(self, values: GeneralStore,
-                       records: RecordStore,
+    def create_records(self, values: GeneralStore, records: RecordStore,
                        element: StructureElement):
+        # TODO why is element passed but not used???
 
         if "records" not in self.definition:
             return []
 
+        # TODO please rename due to conflict
         return create_records(values,
                               records,
                               self.definition["records"])
@@ -364,7 +384,8 @@ class Converter(object, metaclass=ABCMeta):
 
         if rule not in FILTER_FUNCTIONS:
             raise RuntimeError(
-                f"{rule} is not a known filter rule. Only {list(FILTER_FUNCTIONS.keys())} are implemented."
+                f"{rule} is not a known filter rule. Only "
+                f"{list(FILTER_FUNCTIONS.keys())} are implemented."
             )
 
         to_be_filtered = []
@@ -391,19 +412,21 @@ class Converter(object, metaclass=ABCMeta):
         pass
 
     @staticmethod
-    def _debug_matching_template(name: str, regexp: list[str], matched: list[str], result: Optional[dict]):
+    def _debug_matching_template(name: str, regexp: list[str], matched: list[str],
+                                 result: Optional[dict]):
         """ Template for the debugging output for the match function """
-        print("\n--------", name, "-----------")
+        msg = "\n--------" + name + "-----------"
         for re, ma in zip(regexp, matched):
-            print("matching against:\n" + re)
-            print("matching:\n" + ma)
-            print("---------")
+            msg += "matching against:\n" + re
+            msg += "matching:\n" + ma
+            msg += "---------"
         if result is None:
-            print("No match")
+            msg += "No match"
         else:
-            print("Matched groups:")
-            print(result)
-        print("----------------------------------------")
+            msg += "Matched groups:"
+            msg += str(result)
+        msg += "----------------------------------------"
+        logger.debug(msg)
 
     @staticmethod
     def debug_matching(kind=None):
@@ -471,6 +494,10 @@ class DirectoryConverter(Converter):
 
         return children
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     def typecheck(self, element: StructureElement):
         return isinstance(element, Directory)
 
@@ -518,6 +545,10 @@ class SimpleFileConverter(Converter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         return list()
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     @Converter.debug_matching("name")
     def match(self, element: StructureElement):
         # TODO: See comment on types and inheritance
@@ -536,7 +567,7 @@ class FileConverter(SimpleFileConverter):
         super().__init__(*args, **kwargs)
 
 
-class MarkdownFileConverter(Converter):
+class MarkdownFileConverter(SimpleFileConverter):
     """
     reads the yaml header of markdown files (if a such a header exists).
     """
@@ -546,8 +577,18 @@ class MarkdownFileConverter(Converter):
         if not isinstance(element, File):
             raise RuntimeError("A markdown file is needed to create children.")
 
-        header = yaml_header_tools.get_header_from_file(
-            element.path, clean=False)
+        try:
+            header = yaml_header_tools.get_header_from_file(
+                element.path, clean=False)
+        except yaml_header_tools.NoValidHeader:
+            if generalStore is not None and self.name in generalStore:
+                path = generalStore[self.name]
+            else:
+                path = "<path not set>"
+            raise ConverterValidationError(
+                "Error during the validation (yaml header cannot be read) of the markdown file "
+                "located at the following node in the data structure:\n"
+                f"{path}")
         children: List[StructureElement] = []
 
         for name, entry in header.items():
@@ -560,25 +601,6 @@ class MarkdownFileConverter(Converter):
                     "Header entry {} has incompatible type.".format(name))
         return children
 
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not isinstance(element, File):
-            raise RuntimeError("Element must be a file.")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        try:
-            yaml_header_tools.get_header_from_file(element.path)
-        except yaml_header_tools.NoValidHeader:
-            # TODO(salexan): Raise a validation error instead of just not
-            # matching silently.
-            return None
-        return m.groupdict()
-
 
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                           msg_prefix=""):
@@ -685,20 +707,7 @@ class DictDictElementConverter(DictElementConverter):
         super().__init__(*args, **kwargs)
 
 
-class JSONFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class JSONFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
@@ -714,26 +723,13 @@ class JSONFileConverter(Converter):
                     f"{element.path}\n" + err.message)
         structure_element = convert_basic_element(
             json_data,
-            name=element.name+"_child_dict",
+            name=element.name + "_child_dict",
             msg_prefix="The JSON File contained content that was parsed to a Python object"
             " with an unexpected type.")
         return [structure_element]
 
 
-class YAMLFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class YAMLFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
@@ -749,7 +745,7 @@ class YAMLFileConverter(Converter):
                     f"{element.path}\n" + err.message)
         structure_element = convert_basic_element(
             yaml_data,
-            name=element.name+"_child_dict",
+            name=element.name + "_child_dict",
             msg_prefix="The YAML File contained content that was parsed to a Python object"
             " with an unexpected type.")
         return [structure_element]
@@ -1101,3 +1097,22 @@ class CSVTableConverter(TableConverter):
             child_elements.append(
                 DictElement(str(index), row.to_dict()))
         return child_elements
+
+
+class DateElementConverter(TextElementConverter):
+    """
+    allows to convert different text formats of dates to Python date objects.
+
+    The text to be parsed must be contained in the "date" group. The format string can be supplied
+    under "dateformat" in the Converter definition. The library used is datetime so see its
+    documentation for information on how to create the format string.
+    """
+
+    def match(self, element: StructureElement):
+        matches = super().match(element)
+        if matches is not None and "date" in matches:
+            matches.update({"date": datetime.datetime.strptime(
+                matches["date"],
+                self.definition["date_format"] if "date_format" in self.definition else "%Y-%m-%d"
+            ).date()})
+        return matches
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 6cf025a024e8cc392a7175421d47fb69059302a4..c77dcee1f29eac69732ce353e0271761eca2df13 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -49,6 +49,7 @@ from typing import Any, Optional, Type, Union
 
 import caosdb as db
 
+from caosadvancedtools.utils import create_entity_link
 from caosadvancedtools.cache import UpdateCache, Cache
 from caosadvancedtools.crawler import Crawler as OldCrawler
 from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
@@ -303,8 +304,7 @@ class Crawler(object):
                 # Validator is given by a path
                 if not value.startswith('/'):
                     # Not an absolute path
-                    definition[key] = os.path.join(
-                        os.path.dirname(definition_path), value)
+                    definition[key] = os.path.join(os.path.dirname(definition_path), value)
                     if not os.path.isfile(definition[key]):
                         # TODO(henrik) capture this in `crawler_main` similar to
                         # `ConverterValidationError`.
@@ -312,8 +312,7 @@ class Crawler(object):
                             f"Couldn't find validation file {definition[key]}")
             elif isinstance(value, dict):
                 # Recursively resolve all validators
-                definition[key] = self._resolve_validator_paths(
-                    value, definition_path)
+                definition[key] = self._resolve_validator_paths(value, definition_path)
 
         return definition
 
@@ -328,74 +327,13 @@ class Crawler(object):
         directory:
         schema.yml file
         README.md documentation
+
+        TODO: this function does not make use of self, so it could become static.
         """
 
         # Defaults for the converter registry:
-        converter_registry: dict[str, dict[str, str]] = {
-            "Directory": {
-                "converter": "DirectoryConverter",
-                "package": "caoscrawler.converters"},
-            "SimpleFile": {
-                "converter": "SimpleFileConverter",
-                "package": "caoscrawler.converters"},
-            "MarkdownFile": {
-                "converter": "MarkdownFileConverter",
-                "package": "caoscrawler.converters"},
-            "File": {
-                "converter": "SimpleFileConverter",
-                "package": "caoscrawler.converters"},
-            "JSONFile": {
-                "converter": "JSONFileConverter",
-                "package": "caoscrawler.converters"},
-            "YAMLFile": {
-                "converter": "YAMLFileConverter",
-                "package": "caoscrawler.converters"},
-            "CSVTableConverter": {
-                "converter": "CSVTableConverter",
-                "package": "caoscrawler.converters"},
-            "XLSXTableConverter": {
-                "converter": "XLSXTableConverter",
-                "package": "caoscrawler.converters"},
-            "DictBooleanElement": {
-                "converter": "BooleanElementConverter",
-                "package": "caoscrawler.converters"},
-            "BooleanElement": {
-                "converter": "BooleanElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictFloatElement": {
-                "converter": "FloatElementConverter",
-                "package": "caoscrawler.converters"},
-            "FloatElement": {
-                "converter": "FloatElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictTextElement": {
-                "converter": "TextElementConverter",
-                "package": "caoscrawler.converters"},
-            "TextElement": {
-                "converter": "TextElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictIntegerElement": {
-                "converter": "IntegerElementConverter",
-                "package": "caoscrawler.converters"},
-            "IntegerElement": {
-                "converter": "IntegerElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictListElement": {
-                "converter": "ListElementConverter",
-                "package": "caoscrawler.converters"},
-            "ListElement": {
-                "converter": "ListElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictDictElement": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictElement": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-            "Dict": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-        }
+        with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
+            converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
 
         # More converters from definition file:
         if "Converters" in definition:
@@ -417,11 +355,16 @@ class Crawler(object):
             value["class"] = getattr(module, value["converter"])
         return converter_registry
 
-    def crawl_directory(self, dirname: str, crawler_definition_path: str):
+    def crawl_directory(self, dirname: str, crawler_definition_path: str,
+                        restricted_path: Optional[list[str]] = None):
         """ Crawl a single directory.
 
         Convenience function that starts the crawler (calls start_crawling)
         with a single directory as the StructureElement.
+
+        restricted_path: optional, list of strings
+                Traverse the data tree only along the given path. When the end of the given path
+                is reached, traverse the full tree as normal.
         """
 
         crawler_definition = self.load_definition(crawler_definition_path)
@@ -444,7 +387,9 @@ class Crawler(object):
         self.start_crawling(Directory(dir_structure_name,
                                       dirname),
                             crawler_definition,
-                            converter_registry)
+                            converter_registry,
+                            restricted_path=restricted_path
+                            )
 
     @staticmethod
     def initialize_converters(crawler_definition: dict, converter_registry: dict):
@@ -472,7 +417,8 @@ class Crawler(object):
 
     def start_crawling(self, items: Union[list[StructureElement], StructureElement],
                        crawler_definition: dict,
-                       converter_registry: dict):
+                       converter_registry: dict,
+                       restricted_path: Optional[list[str]] = None):
         """
         Start point of the crawler recursion.
 
@@ -484,6 +430,9 @@ class Crawler(object):
         crawler_definition : dict
              A dictionary representing the crawler definition, possibly from a yaml
              file.
+        restricted_path: optional, list of strings
+             Traverse the data tree only along the given path. When the end of the given path
+             is reached, traverse the full tree as normal.
 
         Returns
         -------
@@ -504,8 +453,14 @@ class Crawler(object):
 
         # This recursive crawling procedure generates the update list:
         self.crawled_data: list[db.Record] = []
-        self._crawl(items, local_converters, self.generalStore, self.recordStore, [], [])
-
+        self._crawl(
+            items=items,
+            local_converters=local_converters,
+            generalStore=self.generalStore,
+            recordStore=self.recordStore,
+            structure_elements_path=[],
+            converters_path=[],
+            restricted_path=restricted_path)
         if self.debug:
             self.debug_converters = local_converters
 
@@ -942,16 +897,17 @@ class Crawler(object):
         because some changes in parents (e.g. of Files) might fail
         if they are not updated first.
         """
+        logger.debug("=== Going to execute parent updates ===")
         Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated)
         parent_updates = db.Container()
 
-        for record in to_be_updated:
-            old_entity = Crawler._get_entity_by_id(record.id)
+        for entity in to_be_updated:
+            old_entity = Crawler._get_entity_by_id(entity.id)
 
             # Check whether the parents have been changed and add them if missing
             # in the old entity:
             changes_made = False
-            for parent in record.parents:
+            for parent in entity.parents:
                 found = False
                 for old_parent in old_entity.parents:
                     if old_parent.id == parent.id:
@@ -1061,20 +1017,25 @@ class Crawler(object):
                                                                            referencing_entities)
             for record in to_be_updated]
         # Merge with existing data to prevent unwanted overwrites
-        to_be_updated = self._merge_properties_from_remote(to_be_updated,
-                                                           identified_records)
+        to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records)
         # remove unnecessary updates from list by comparing the target records
         # to the existing ones
-        to_be_updated = self.remove_unnecessary_updates(
-            to_be_updated, identified_records)
+        to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records)
 
+        logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
+                    f"{len(to_be_inserted)} Entities.")
         if commit_changes:
             self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
                                                 run_id=self.run_id, unique_names=unique_names)
+            logger.info(f"Added parent RecordTypes where necessary.")
             self.execute_inserts_in_list(
                 to_be_inserted, self.securityMode, self.run_id, unique_names=unique_names)
+            logger.info(f"Executed inserts:\n"
+                        + self.create_entity_summary(to_be_inserted))
             self.execute_updates_in_list(
                 to_be_updated, self.securityMode, self.run_id, unique_names=unique_names)
+            logger.info(f"Executed updates:\n"
+                        + self.create_entity_summary(to_be_updated))
 
         update_cache = UpdateCache()
         pending_inserts = update_cache.get_inserts(self.run_id)
@@ -1089,6 +1050,25 @@ class Crawler(object):
 
         return (to_be_inserted, to_be_updated)
 
+    @staticmethod
+    def create_entity_summary(entities: list[db.Entity]):
+        """ Creates a summary string reprensentation of a list of entities."""
+        parents = {}
+        for el in entities:
+            for pp in el.parents:
+                if pp.name not in parents:
+                    parents[pp.name] = [el]
+                else:
+                    parents[pp.name].append(el)
+        output = ""
+        for key, value in parents.items():
+            output += f"{key}:\n"
+            for el in value:
+                output += create_entity_link(el) + ", "
+
+            output = output[:-2] + "\n"
+        return output
+
     @staticmethod
     def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
@@ -1156,11 +1136,14 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
         with open(filename, "w") as f:
             f.write(yaml.dump(paths, sort_keys=False))
 
-    def _crawl(self, items: list[StructureElement],
+    def _crawl(self,
+               items: list[StructureElement],
                local_converters: list[Converter],
                generalStore: GeneralStore,
                recordStore: RecordStore,
-               structure_elements_path: list[str], converters_path: list[str]):
+               structure_elements_path: list[str],
+               converters_path: list[str],
+               restricted_path: Optional[list[str]] = None):
         """
         Crawl a list of StructureElements and apply any matching converters.
 
@@ -1169,20 +1152,35 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                             treating structure elements. A locally defined converter could be
                             one that is only valid for a specific subtree of the originally
                             cralwed StructureElement structure.
-        generalStore and recordStore: This recursion of the crawl function should only operate on copies of the
-                            global stores of the Crawler object.
+        generalStore and recordStore: This recursion of the crawl function should only operate on
+                                      copies of the global stores of the Crawler object.
+        restricted_path: optional, list of strings, traverse the data tree only along the given
+                         path. For example, when a directory contains files a, b and c and b is
+                         given in restricted_path, a and c will be ignroed by the crawler.
+                         When the end of the given path is reached, traverse the full tree as
+                         normal. The first element of the list provided by restricted_path should
+                         be the name of the StructureElement at this level, i.e. denoting the
+                         respective element in the items argument.
         """
+        # This path_found variable stores wether the path given by restricted_path was found in the
+        # data tree
+        path_found = False
+        if restricted_path is not None and len(restricted_path) == 0:
+            restricted_path = None
+
         for element in items:
             for converter in local_converters:
 
                 # type is something like "matches files", replace isinstance with "type_matches"
                 # match function tests regexp for example
-                if (converter.typecheck(element) and
-                        converter.match(element) is not None):
+                if (converter.typecheck(element) and (
+                        restricted_path is None or element.name == restricted_path[0])
+                        and converter.match(element) is not None):
+                    path_found = True
                     generalStore_copy = generalStore.create_scoped_copy()
                     recordStore_copy = recordStore.create_scoped_copy()
 
-                    # Create an entry for this matched structure element:
+                    # Create an entry for this matched structure element that contains the path:
                     generalStore_copy[converter.name] = (
                         os.path.join(*(structure_elements_path + [element.get_name()])))
 
@@ -1196,7 +1194,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     children = converter.create_children(generalStore_copy, element)
 
                     if self.debug:
-                        # add provenance information for each varaible
+                        # add provenance information for each variable
                         self.debug_tree[str(element)] = (
                             generalStore_copy.get_storage(), recordStore_copy.get_storage())
                         self.debug_metadata["copied"][str(element)] = (
@@ -1219,7 +1217,12 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     self._crawl(children, converter.converters,
                                 generalStore_copy, recordStore_copy,
                                 structure_elements_path + [element.get_name()],
-                                converters_path + [converter.name])
+                                converters_path + [converter.name],
+                                restricted_path[1:] if restricted_path is not None else None)
+
+        if restricted_path and not path_found:
+            raise RuntimeError("A 'restricted_path' argument was given that is not contained in "
+                               "the data tree")
         # if the crawler is running out of scope, copy all records in
         # the recordStore, that were created in this scope
         # to the general update container.
@@ -1250,6 +1253,9 @@ def crawler_main(crawled_directory_path: str,
                  prefix: str = "",
                  securityMode: SecurityMode = SecurityMode.UPDATE,
                  unique_names=True,
+                 restricted_path: Optional[list[str]] = None,
+                 remove_prefix: Optional[str] = None,
+                 add_prefix: Optional[str] = None,
                  ):
     """
 
@@ -1268,11 +1274,18 @@ def crawler_main(crawled_directory_path: str,
     dry_run : bool
         do not commit any chnages to the server
     prefix : str
-        remove the given prefix from file paths
+        DEPRECATED, remove the given prefix from file paths
     securityMode : int
         securityMode of Crawler
     unique_names : bool
         whether or not to update or insert entities inspite of name conflicts
+    restricted_path: optional, list of strings
+            Traverse the data tree only along the given path. When the end of the given path
+            is reached, traverse the full tree as normal.
+    remove_prefix : Optional[str]
+        remove the given prefix from file paths
+    add_prefix : Optional[str]
+        add the given prefix to file paths
 
     Returns
     -------
@@ -1281,19 +1294,27 @@ def crawler_main(crawled_directory_path: str,
     """
     crawler = Crawler(debug=debug, securityMode=securityMode)
     try:
-        crawler.crawl_directory(crawled_directory_path, cfood_file_name)
+        crawler.crawl_directory(crawled_directory_path, cfood_file_name, restricted_path)
     except ConverterValidationError as err:
-        print(err)
+        logger.error(err)
         return 1
-    if provenance_file is not None:
+    if provenance_file is not None and debug:
         crawler.save_debug_data(provenance_file)
 
     if identifiables_definition_file is not None:
-
         ident = CaosDBIdentifiableAdapter()
         ident.load_from_yaml_definition(identifiables_definition_file)
         crawler.identifiableAdapter = ident
 
+    if prefix != "":
+        warnings.warn(DeprecationWarning("The prefix argument is deprecated and will be removed "
+                                         "in the future. Please use `remove_prefix` instead."))
+        if remove_prefix is not None:
+            raise ValueError("Please do not supply the (deprecated) `prefix` and the "
+                             "`remove_prefix` argument at the same time. Only use "
+                             "`remove_prefix` instead.")
+        remove_prefix = prefix
+
     if dry_run:
         ins, upd = crawler.synchronize(commit_changes=False)
         inserts = [str(i) for i in ins]
@@ -1308,11 +1329,15 @@ def crawler_main(crawled_directory_path: str,
             if isinstance(elem, db.File):
                 # correct the file path:
                 # elem.file = os.path.join(args.path, elem.file)
-                if prefix is None:
-                    raise RuntimeError(
-                        "No prefix set. Prefix must be set if files are used.")
-                if elem.path.startswith(prefix):
-                    elem.path = elem.path[len(prefix):]
+                if remove_prefix:
+                    if elem.path.startswith(remove_prefix):
+                        elem.path = elem.path[len(remove_prefix):]
+                    else:
+                        raise RuntimeError("Prefix shall be removed from file path but the path "
+                                           "does not start with the prefix:"
+                                           f"\n{remove_prefix}\n{elem.path}")
+                if add_prefix:
+                    elem.path = add_prefix + elem.path
                 elem.file = None
                 # TODO: as long as the new file backend is not finished
                 #       we are using the loadFiles function to insert symlinks.
@@ -1346,6 +1371,15 @@ def parse_args():
                                      formatter_class=RawTextHelpFormatter)
     parser.add_argument("cfood_file_name",
                         help="Path name of the cfood yaml file to be used.")
+    mg = parser.add_mutually_exclusive_group()
+    mg.add_argument("-r", "--restrict", nargs="*",
+                    help="Restrict the crawling to the subtree at the end of the given path."
+                    "I.e. for each level that is given the crawler only treats the element "
+                    "with the given name.")
+    mg.add_argument("--restrict-path", help="same as restrict; instead of a list, this takes a "
+                    "single string that is interpreded as file system path. Note that a trailing"
+                    "separator (e.g. '/') will be ignored. Use --restrict if you need to have "
+                    "empty strings.")
     parser.add_argument("--provenance", required=False,
                         help="Path name of the provenance yaml file. "
                         "This file will only be generated if this option is set.")
@@ -1371,18 +1405,35 @@ def parse_args():
     parser.add_argument("-u", "--unique-names",
                         help="Insert or updates entities even if name conflicts exist.")
     parser.add_argument("-p", "--prefix",
-                        help="Remove the given prefix from the paths "
-                        "of all file objects.")
+                        help="DEPRECATED, use --remove-prefix instead. Remove the given prefix "
+                        "from the paths of all file objects.")
+    parser.add_argument("--remove-prefix",
+                        help="Remove the given prefix from the paths of all file objects.")
+    parser.add_argument("--add-prefix",
+                        help="Add the given prefix to the paths of all file objects.")
 
     return parser.parse_args()
 
 
+def split_restricted_path(path):
+    elements = []
+    while path != "/":
+        path, el = os.path.split(path)
+        if el != "":
+            elements.insert(0, el)
+    return elements
+
+
 def main():
     args = parse_args()
 
     conlogger = logging.getLogger("connection")
     conlogger.setLevel(level=logging.ERROR)
 
+    if args.prefix:
+        print("Please use '--remove-prefix' option instead of '--prefix' or '-p'.")
+        return -1
+
     # logging config for local execution
     logger.addHandler(logging.StreamHandler(sys.stdout))
     if args.debug:
@@ -1392,6 +1443,12 @@ def main():
 
     if args.add_cwd_to_path:
         sys.path.append(os.path.abspath("."))
+    restricted_path = None
+    if args.restrict_path:
+        restricted_path = split_restricted_path(args.restrict_path)
+    if args.restrict:
+        restricted_path = args.restrict
+
     sys.exit(crawler_main(
         crawled_directory_path=args.crawled_directory_path,
         cfood_file_name=args.cfood_file_name,
@@ -1399,11 +1456,13 @@ def main():
         debug=args.debug,
         provenance_file=args.provenance,
         dry_run=args.dry_run,
-        prefix=args.prefix,
         securityMode={"retrieve": SecurityMode.RETRIEVE,
                       "insert": SecurityMode.INSERT,
                       "update": SecurityMode.UPDATE}[args.security_mode],
         unique_names=args.unique_names,
+        restricted_path=restricted_path,
+        remove_prefix=args.remove_prefix,
+        add_prefix=args.add_prefix,
     ))
 
 
diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e192ab1b3bae70a6772cf6defba4a4592a92e584
--- /dev/null
+++ b/src/caoscrawler/default_converters.yml
@@ -0,0 +1,86 @@
+# -------------------------
+# Base Types
+# -------------------------
+
+BooleanElement:
+  converter: BooleanElementConverter
+  package: caoscrawler.converters
+Date:
+  converter: DateElementConverter
+  package: caoscrawler.converters
+Dict:
+  converter: DictElementConverter
+  package: caoscrawler.converters
+FloatElement:
+  converter: FloatElementConverter
+  package: caoscrawler.converters
+IntegerElement:
+  converter: IntegerElementConverter
+  package: caoscrawler.converters
+ListElement:
+  converter: ListElementConverter
+  package: caoscrawler.converters
+TextElement:
+  converter: TextElementConverter
+  package: caoscrawler.converters
+
+  
+DictDictElement:  # deprecated
+  converter: DictElementConverter
+  package: caoscrawler.converters
+DictElement:  # deprecated
+  converter: DictElementConverter
+  package: caoscrawler.converters
+DictBooleanElement:  # deprecated
+  converter: BooleanElementConverter
+  package: caoscrawler.converters
+DictFloatElement:  # deprecated
+  converter: FloatElementConverter
+  package: caoscrawler.converters
+DictIntegerElement:  # deprecated
+  converter: IntegerElementConverter
+  package: caoscrawler.converters
+DictListElement:  # deprecated
+  converter: ListElementConverter
+  package: caoscrawler.converters
+DictTextElement:  # deprecated
+  converter: TextElementConverter
+  package: caoscrawler.converters
+
+# -------------------------
+# Directories and Files
+# -------------------------
+
+Directory:
+  converter: DirectoryConverter
+  package: caoscrawler.converters
+
+
+File:  # deprecated
+  converter: SimpleFileConverter
+  package: caoscrawler.converters
+
+  
+SimpleFile:
+  converter: SimpleFileConverter
+  package: caoscrawler.converters
+
+MarkdownFile:
+  converter: MarkdownFileConverter
+  package: caoscrawler.converters
+
+YAMLFile:
+  converter: YAMLFileConverter
+  package: caoscrawler.converters
+
+JSONFile:
+  converter: JSONFileConverter
+  package: caoscrawler.converters
+
+CSVTableConverter:
+  converter: CSVTableConverter
+  package: caoscrawler.converters
+
+XLSXTableConverter:
+  converter: XLSXTableConverter
+  package: caoscrawler.converters
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index 7ff7172576be08e068ba412f319b059fb349bbeb..eda113d8fc0c5fc64a620ef7540dec4004401aef 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -25,6 +25,9 @@ from datetime import datetime
 import json
 from hashlib import sha256
 from typing import Union
+import logging
+
+logger = logging.getLogger(__name__)
 
 
 class Identifiable():
@@ -62,6 +65,8 @@ class Identifiable():
         self.path = path
         self.record_type = record_type
         self.name = name
+        if name is "":
+            self.name = None
         self.properties: dict = {}
         if properties is not None:
             self.properties = properties
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index 40c801547a85afaf32e1ab6a668bc47d98d60b66..c410159de4364e9b0299a84a4cbc687f773d35c0 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -33,6 +33,7 @@ import caosdb as db
 import logging
 from abc import abstractmethod, ABCMeta
 from .utils import has_parent
+
 logger = logging.getLogger(__name__)
 
 
@@ -447,7 +448,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
     def get_file(self, identifiable: Identifiable):
         if identifiable.path is None:
             raise RuntimeError("Path must not be None for File retrieval.")
-        candidates = db.execute_query("FIND File which is stored at {}".format(
+        candidates = db.execute_query("FIND File which is stored at '{}'".format(
             identifiable.path))
         if len(candidates) > 1:
             raise RuntimeError("Identifiable was not defined unambigiously.")
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
index 1f6e15d408e10e38bce0d9b9fe9b6197ec69bfc3..952a8c94a7dfa24110f320f5dd32b0ad2ac1df01 100644
--- a/src/doc/README_SETUP.md
+++ b/src/doc/README_SETUP.md
@@ -1,63 +1,10 @@
 # Getting started with the CaosDB Crawler #
 
-## Installation ##
-
-### How to install ###
-
-#### Linux ####
-
-Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
-documentation.
-
-Then open a terminal and continue in the [Generic installation](#generic-installation) section.
-
-#### Windows ####
-
-If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
-for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
-packages you will ever need out of the box.  If you prefer, you may also install the leaner
-"Miniconda" installer, which allows you to install packages as you need them.
-
-After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
-installation](#generic-installation) section.
-
-#### MacOS ####
-
-If there is no Python 3 installed yet, there are two main ways to
-obtain it: Either get the binary package from
-[python.org](https://www.python.org/downloads/) or, for advanced
-users, install via [Homebrew](https://brew.sh/). After installation
-from python.org, it is recommended to also update the TLS certificates
-for Python (this requires administrator rights for your user):
-
-```sh
-# Replace this with your Python version number:
-cd /Applications/Python\ 3.9/
-
-# This needs administrator rights:
-sudo ./Install\ Certificates.command
-```
-
-After these steps, you may continue with the [Generic
-installation](#generic-installation).
-
-#### Generic installation ####
-
----
-
-Obtain the sources from GitLab and install from there (`git` must be installed for
-this option):
-
-```sh
-git clone https://gitlab.com/caosdb/caosdb-crawler
-cd caosdb-crawler
-pip3 install --user .
-```
-
-**Note**: In the near future, this package will also be made available on PyPi.
-
+## Installation
+see INSTALL.md
 
 ## Run Unit Tests
+Run `pytest unittests`.
 
 ## Documentation ##
 We use sphinx to create the documentation. Docstrings in the code should comply
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index 37f6a8c7d3be9298ec965c50a4ec29110988ddc6..6564ee677f0b363a52c44dd5ceabe5378c255105 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -149,6 +149,44 @@ create lists or multi properties instead of single values:
 .. code-block:: yaml
                 
         Experiment1:
-            Measurement: +Measurement <- Element in List (list is cleared before run)
-                         *Measurement <- Multi Property (properties are removed before run)
-                         Measurement  <- Overwrite
+            Measurement: +Measurement #  Element in List (list is cleared before run)
+                         *Measurement #  Multi Property (properties are removed before run)
+                         Measurement  #  Overwrite
+
+
+File Entities
+-------------
+
+In order to use File Entities, you must set the appropriate ``role: File``.
+Additionally, the path and file keys have to be given, with values that set the
+paths remotely and locally, respectively. You can use the variable 
+``<converter name>_path`` that is automatically created by converters that deal
+with file system related StructureElements. The file object itsself is stored
+in a vairable with the same name (as it is the case for other Records).
+
+
+.. code-block:: yaml
+
+      somefile:
+        type: SimpleFile
+        match: ^params.*$  # macht any file that starts with "params"
+        records:
+          fileEntity:
+            role: File           # necessary to create a File Entity
+            path: somefile.path  # defines the path in CaosDB
+            file: somefile.path  # path where the file is found locally
+          SomeRecord:
+            ParameterFile: $fileEntity  # creates a reference to the file
+
+Automatically generated keys
+++++++++++++++++++++++++++++
+
+Some variable names are automatically generated and can be used using the
+``$<variable name>`` syntax. Those include:
+
+- ``<converter name>``: access the path of converter names to the current converter
+- ``<converter name>.path``: the file system path to the structure element
+  (file system related converters only; you need curly brackets to use them:
+  ``${<converter name>.path}``)
+- ``<Record key>``: all entities that are created in the ``records`` section
+  are available under the same key
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 89757f21958f3d94649b33e9f9112593f703191d..0881d9302b621d6b47575e171dd9e8c144e29cd4 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -1,6 +1,10 @@
 Concepts
 ))))))))
 
+The CaosDB Crawler can handle any kind of hierarchical data structure. The typical use case is
+directory tree that is traversed. We use the following terms/concepts to describe how the CaosDB
+Crawler works.
+
 Structure Elements
 ++++++++++++++++++
 
diff --git a/src/doc/conf.py b/src/doc/conf.py
index b8d055abe682efcb17f960cdaabca3de4d25a16d..7719a920328c46b4453cd59413b939fcf2d45f5a 100644
--- a/src/doc/conf.py
+++ b/src/doc/conf.py
@@ -33,10 +33,10 @@ copyright = '2021, MPIDS'
 author = 'Alexander Schlemmer'
 
 # The short X.Y version
-version = '0.3.0'
+version = '0.4.0'
 # The full version, including alpha/beta/rc tags
 # release = '0.5.2-rc2'
-release = '0.3.0'
+release = '0.4.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
index b4ba89ced3b5858ca2f8abe7bc724d6710d9203b..95676627d95a5cd6bbca5208b67f9689fffb6806 100644
--- a/src/doc/converters.rst
+++ b/src/doc/converters.rst
@@ -77,7 +77,7 @@ Reads a YAML header from Markdown files (if such a header exists) and creates
 children elements according to the structure of the header.
 
 DictElement Converter
-==============
+=====================
 Creates a child StructureElement for each key in the dictionary.
 
 Typical Subtree converters
diff --git a/src/doc/getting_started/INSTALL.md b/src/doc/getting_started/INSTALL.md
new file mode 120000
index 0000000000000000000000000000000000000000..95b6037c7ab329d91e3a8ed4a2b31eba675eef62
--- /dev/null
+++ b/src/doc/getting_started/INSTALL.md
@@ -0,0 +1 @@
+../../../INSTALL.md
\ No newline at end of file
diff --git a/src/doc/getting_started/helloworld.rst b/src/doc/getting_started/helloworld.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ef4a1398322b59d7983b7dff384534cfa501b660
--- /dev/null
+++ b/src/doc/getting_started/helloworld.rst
@@ -0,0 +1,5 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe the smallest possible crawler run
diff --git a/src/doc/getting_started/index.rst b/src/doc/getting_started/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..74ffa7daeff393d05605e1066a5985984c2e9751
--- /dev/null
+++ b/src/doc/getting_started/index.rst
@@ -0,0 +1,15 @@
+Getting Started
++++++++++++++++
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+   :hidden:
+
+   Installation<INSTALL>
+   prerequisites
+   helloworld
+
+This section will help you get going! From the first installation steps to the first simple crawl.
+
+Let's go!
diff --git a/src/doc/getting_started/prerequisites.rst b/src/doc/getting_started/prerequisites.rst
new file mode 100644
index 0000000000000000000000000000000000000000..dc8022b6cad99a8508f19f47dc01c601fb676c5b
--- /dev/null
+++ b/src/doc/getting_started/prerequisites.rst
@@ -0,0 +1,6 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe what you need to actually do a crawler run: data, CaosDB, ...
+
diff --git a/src/doc/index.rst b/src/doc/index.rst
index b4e30e4728068cabb92626cfac986ab858a0bbb6..d319bf4d24a05a3033b1ae5bbf80433c5ef3646b 100644
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -7,12 +7,12 @@ CaosDB-Crawler Documentation
    :caption: Contents:
    :hidden:
 
-   Getting started<README_SETUP>
+   Getting started<getting_started/index>
+   Tutorials<tutorials/index>
    Concepts<concepts>
    Converters<converters>
    CFoods (Crawler Definitions)<cfood>
    Macros<macros>
-   Tutorials<tutorials/index>
    How to upgrade<how-to-upgrade>
    API documentation<_apidoc/modules>
 
diff --git a/src/doc/macros.rst b/src/doc/macros.rst
index d3a3e9b9634a4e1d72228dd46692a824e1d5acfd..7685731d35afab51074bb4d12c51ede0a7ba1b75 100644
--- a/src/doc/macros.rst
+++ b/src/doc/macros.rst
@@ -195,7 +195,7 @@ The example will be expanded to:
 
 
 Limitation
-----------
+==========
 
 Currently it is not possible to use the same macro twice in the same yaml node, but in different
 positions. Consider:
diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst
index 88d598ece284e1aad315a1e0fcae3fdf494b3aad..02371de196cc139776416882aff31bd6fa4dabbe 100644
--- a/src/doc/tutorials/index.rst
+++ b/src/doc/tutorials/index.rst
@@ -1,9 +1,11 @@
 Tutorials
 +++++++++
 
+This chapter contains a collection of tutorials.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
-   :hidden:
 
    Example CFood<example>
+
diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml
index 74fd027563907c5ae416ca389faba0ecd64d5848..dce219b751c3e980662a1eaa4904e1163d9836a0 100644
--- a/unittests/scifolder_cfood.yml
+++ b/unittests/scifolder_cfood.yml
@@ -22,7 +22,7 @@ Data:  # name of the converter
               parents:
               - Project  # not needed as the name is equivalent
               date: $date
-              identifier: $identifier
+              identifier: ${identifier}
       
           subtree:
             measurement:  # new name for folders on the 3rd level
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 5942b1e124ebd1228a619ed7a1024738c70ee0aa..4d3791fce3ceffaafe529423e4020ebd6a4231ba 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -25,15 +25,18 @@ test the converters module
 """
 import json
 import yaml
+import logging
+import sys
 import importlib
 import os
 from itertools import product
+import datetime
 import pytest
 import yaml
 
 from caoscrawler.converters import (Converter, ConverterValidationError, DictElementConverter,
                                     DirectoryConverter, DictIntegerElementConverter,
-                                    handle_value, MarkdownFileConverter,
+                                    handle_value, MarkdownFileConverter, DateElementConverter,
                                     FloatElementConverter, IntegerElementConverter,
                                     JSONFileConverter, YAMLFileConverter)
 from caoscrawler.converters import _AbstractScalarValueElementConverter
@@ -55,6 +58,9 @@ def converter_registry():
         "MarkdownFile": {
             "converter": "MarkdownFileConverter",
             "package": "caoscrawler.converters"},
+        "Date": {
+            "converter": "DateElementConverter",
+            "package": "caoscrawler.converters"},
         "DictElement": {
             "converter": "DictElementConverter",
             "package": "caoscrawler.converters"},
@@ -64,9 +70,6 @@ def converter_registry():
         "ListElement": {
             "converter": "ListElementConverter",
             "package": "caoscrawler.converters"},
-        "TextElement": {
-            "converter": "TextElementConverter",
-            "package": "caoscrawler.converters"},
         "JSONFile": {
             "converter": "JSONFileConverter",
             "package": "caoscrawler.converters"},
@@ -127,14 +130,11 @@ def test_markdown_converter(converter_registry):
         )
     )
 
-    converter = MarkdownFileConverter({
-        "match": "(.*)"
-    }, "TestMarkdownFileConverter",
-       converter_registry)
+    converter = MarkdownFileConverter({"match": "(.*)"}, "TestMarkdownFileConverter",
+                                      converter_registry)
 
-    m = converter.match(File("test_tool.py", rfp(
-        "test_tool.py")))
-    assert m is None
+    with pytest.raises(ConverterValidationError) as err:
+        converter.create_children(None, File("test_tool.py", rfp("test_tool.py")))
 
     m = converter.match(test_readme)
     assert m is not None
@@ -370,7 +370,6 @@ def test_filter_children_of_directory(converter_registry, capsys):
     dc = DirectoryConverter(
         definition={
             "match": "(.*)",
-            "debug_match": True,
             "filter": {
                 "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                 "group": "date",
@@ -383,14 +382,6 @@ def test_filter_children_of_directory(converter_registry, capsys):
 
     m = dc.match(test_dir)
     assert m is not None
-    # checking debug output
-    captured = capsys.readouterr()
-    # the name
-    assert "examples_filter_children" in captured.out
-    # the regexp
-    assert "(.*)" in captured.out
-    # the empty result set
-    assert "{}" in captured.out
 
     # This should only contain the youngest json and the csv that doesn't match
     # the above filter expression.
@@ -540,7 +531,8 @@ def test_converter_value_match(converter_registry):
     assert m is not None
 
 
-def test_match_debug(converter_registry, capsys):
+def test_match_debug(converter_registry, caplog):
+    caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
     for m, mn, mv in product([".*", None], [".*", None], [".*", None]):
         defi = {"debug_match": True}
         if m:
@@ -562,11 +554,70 @@ def test_match_debug(converter_registry, capsys):
             mtch = dc.match(IntegerElement(name="a", value=4))
         if not (m is None and mn is None and mv is None):
             assert mtch is not None
-            # checking debug output
-            captured = capsys.readouterr()
             # the name
-            assert "a" in captured.out
+            assert "a" in caplog.text
             # the regexp
-            assert ".*" in captured.out
+            assert ".*" in caplog.text
             # the empty result set
-            assert "{}" in captured.out
+            assert "{}" in caplog.text
+            caplog.clear()
+
+
+def test_date_converter():
+    dictconverter = DateElementConverter(
+        definition={"match_value": "(?P<date>.*)"},
+        name="conv",
+        converter_registry=converter_registry)
+    matches = dictconverter.match(TextElement("text", "2022-11-11"))
+    assert "date" in matches
+    assert isinstance(matches["date"], datetime.date)
+    assert matches["date"].year == 2022
+
+    dictconverter = DateElementConverter(
+        definition={"match_value": r"(?P<date>(\d|-)+)",
+                    "date_format": "%y-%m-%d"},
+        name="conv",
+        converter_registry=converter_registry)
+    matches = dictconverter.match(TextElement("text", "22-11-11"))
+    assert "date" in matches
+    assert isinstance(matches["date"], datetime.date)
+    assert matches["date"].year == 2022
+
+    matches = dictconverter.match(TextElement("text", "alve"))
+    assert matches is None
+
+
+def test_load_converters():
+    c = Crawler()
+    converter_registry = c.load_converters({})
+    # The previous function call actually already asserts that all defined
+    # converter classes can be loaded from their respective packages.
+
+    # Please adapt, if defaults change!
+    assert len(converter_registry) == 22
+
+    # All of them are contained in caoscrawler.converters
+    for conv_key, conv in converter_registry.items():
+        assert conv["package"] == "caoscrawler.converters"
+        # ... and their names all end in "Converter"
+        assert conv["converter"].endswith("Converter")
+
+    # Some checks:
+    assert "CSVTableConverter" in converter_registry
+    assert "SimpleFile" in converter_registry
+    assert "Directory" in converter_registry
+    assert "ListElement" in converter_registry
+
+
+def test_create_path_value(converter_registry):
+    """ test whether the variable containing the path is added to the general store"""
+    dc = Converter.converter_factory(
+        definition={
+            "type": "Directory",
+            "match": ".*"
+        },
+        name="Test", converter_registry=converter_registry)
+    values = GeneralStore()
+    dc.create_values(values, Directory("a", "/a"))
+    assert "Test.path" in values
+    assert values["Test.path"] == "/a"
diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py
index 1bf8f0b7d67f00f2018b5b68424d6b9cc17602eb..ac408b2dab0fa151c370d3ec6ffd1dced22c77d7 100644
--- a/unittests/test_scalars_cfood.py
+++ b/unittests/test_scalars_cfood.py
@@ -42,16 +42,23 @@ def test_record_structure_generation(crawler):
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
-    assert len(subd[0]) == 3
-    assert "Data" in subd[0]
-    assert "DataAnalysis" in subd[0]
-    assert "RecordThatGetsParentsLater" in subd[0]
+    if "Data" in subd[0]:
+        subddata = subd[0]
+        subdRTGPL = subd[1]
+    else:
+        subddata = subd[1]
+        subdRTGPL = subd[0]
+    assert len(subddata) == 5
+    assert "DataAnalysis" in subddata
+    assert "DataAnalysis.path" in subddata
+    assert "Data.path" in subddata
+    assert "RecordThatGetsParentsLater" in subddata
 
-    prop = subd[0]["RecordThatGetsParentsLater"].get_property("someId")
+    prop = subddata["RecordThatGetsParentsLater"].get_property("someId")
     assert type(prop.value) == int
     assert prop.value == 23
 
     # record store on Data Analysis node of debug tree
-    assert len(subd[1]) == 1
-    prop2 = subd[1]["RecordThatGetsParentsLater"].get_property("someId")
+    assert len(subdRTGPL) == 1
+    prop2 = subdRTGPL["RecordThatGetsParentsLater"].get_property("someId")
     assert prop == prop2
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 6a828532c1de9796008a6e51c21811f83b85657a..e15d7cb777ced4b92566df2b25b375e90be39295 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -25,11 +25,13 @@
 Tests for the tool using pytest
 Adapted from check-sfs
 """
+import logging
 
+from caoscrawler.stores import GeneralStore, RecordStore
 import os
-from caoscrawler.crawl import Crawler, SecurityMode
+from caoscrawler.crawl import Crawler, SecurityMode, split_restricted_path
 from caoscrawler.identifiable import Identifiable
-from caoscrawler.structure_elements import File, DictTextElement, DictListElement
+from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement
 from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
 from simulated_server_data import full_data
 from functools import partial
@@ -108,15 +110,17 @@ def ident(crawler):
 
 
 def test_record_structure_generation(crawler):
+    # TODO How does this test relate to the test function in test_scalars_cfood with the same name?
+    #      There seems to be code duplication
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
     subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
-    assert len(subd[0]) == 2
+    assert len(subd[0]) == 4
     # record store on Data Analysis node of debug tree
     assert len(subd[1]) == 0
     assert len(subc) == 2
-    assert len(subc[0]) == 2
+    assert len(subc[0]) == 4
     assert len(subc[1]) == 0
 
     # The data analysis node creates one variable for the node itself:
@@ -135,7 +139,7 @@ def test_record_structure_generation(crawler):
     assert subd[1]["Project"].get_property(
         "identifier").value == "climate-model-predict"
 
-    assert len(subd[0]) == 6
+    assert len(subd[0]) == 9
     assert subd[0]["date"] == "2020"
     assert subd[0]["identifier"] == "climate-model-predict"
     assert subd[0]["Project"].__class__ == db.Record
@@ -146,7 +150,7 @@ def test_record_structure_generation(crawler):
     assert subc[0]["project_dir"] is False
 
     # Check the copy flags for the first level in the hierarchy:
-    assert len(subc[0]) == 6
+    assert len(subc[0]) == 9
     assert len(subc[1]) == 1
     assert subc[1]["Project"] is False
     assert subc[0]["Project"] is False
@@ -159,7 +163,7 @@ def test_record_structure_generation(crawler):
     subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
                                                         "2020_climate-model-predict",
                                                         "2020-02-08_prediction-errors")]
-    assert len(subd[0]) == 8
+    assert len(subd[0]) == 12
     assert subd[0]["date"] == "2020-02-08"
     assert subd[0]["identifier"] == "prediction-errors"
     assert subd[0]["Project"].__class__ == db.Record
@@ -779,7 +783,8 @@ def crawler_mocked_for_backref_test(crawler):
     return crawler
 
 
-def test_validation_error_print(capsys):
+def test_validation_error_print(caplog):
+    caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
     # there should be no server interaction since we only test the behavior if a validation error
     # occurs during the data collection stage
     DATADIR = os.path.join(os.path.dirname(__file__), "test_data", "failing_validation")
@@ -789,10 +794,9 @@ def test_validation_error_print(capsys):
                            os.path.join(DATADIR, "identifiables.yml"),
                            True,
                            None,
-                           False,
-                           "/use_case_simple_presentation")
-        captured = capsys.readouterr()
-        assert "Couldn't validate" in captured.out
+                           False)
+        assert "Couldn't validate" in caplog.text
+        caplog.clear()
 
 
 def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
@@ -867,3 +871,131 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_
     insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
     assert len(update) == 2
     assert len(insert) == 1
+
+
+def mock_create_values(values, element):
+    pass
+
+
+@patch("caoscrawler.converters.IntegerElementConverter.create_values")
+def test_restricted_path(create_mock):
+    """
+    The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
+    sure, that is that argument is provided, ideed only the given path of the tree is traversed.
+
+    The check is done using the mock of the create_values function of the IntegerElementConverter.
+    This function is only called if elements are being treated.
+    """
+    crawler_definition = {
+        "DictTest": {
+            "type": "DictElement",
+            "match": "(.*)",
+            "subtree": {
+                "nextdict": {
+                    "type": "DictElement",
+                    "match": "(.*)",
+                    "subtree": {
+                        "int_element": {
+                            "type": "IntegerElement",
+                            "match_name": ".*",
+                            "match_value": "(?P<int_value>.*)",
+                            "records": {
+                                "Dataset": {
+                                    "Subject": "$int_value"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    crawler = Crawler(debug=True)
+    converter_registry = crawler.load_converters(crawler_definition)
+
+    # This structure is crawled
+    test_dict = {
+        "v1": {
+            "a": 1,
+            "b": 2,
+        },
+        "v2": {
+            "c": 3,
+            "d": 4,
+        }
+    }
+    # first test without a restricted_path
+    restricted_path = None
+    records = crawler.start_crawling(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 4
+    create_mock.reset_mock()
+
+    # test with a restricted_path but one that has no effect (single root element)
+    # this also tests that the remainder of the tree is fully traversed
+    restricted_path = ["TestDict"]
+    records = crawler.start_crawling(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 4
+    create_mock.reset_mock()
+
+    # test with a restricted_path that restricts the tree (single root element)
+    restricted_path = ["TestDict", "v2"]
+    records = crawler.start_crawling(
+        DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+        restricted_path
+    )
+    assert create_mock.call_count == 2
+    create_mock.reset_mock()
+
+    # test with a restricted_path that contains a bad element
+    restricted_path = ["TestDict", "v3"]
+    with raises(RuntimeError):
+        records = crawler.start_crawling(
+            DictElement("TestDict", test_dict), crawler_definition, converter_registry,
+            restricted_path
+        )
+
+
+def test_split_restricted_path():
+    assert ["el"] == split_restricted_path("/el")
+    assert ["el"] == split_restricted_path("/el/")
+    assert ["el", "el"] == split_restricted_path("/el/el")
+
+
+def test_deprecated_prefix_option():
+    """Test that calling the crawler's main function with the deprecated
+    `prefix` option raises the correct errors and warnings.
+
+    """
+
+    with pytest.deprecated_call():
+        crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed")
+
+    with raises(ValueError) as ve:
+        crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed",
+                     remove_prefix="to/be/removed")
+
+    assert "(deprecated) `prefix` and the `remove_prefix`" in str(ve.value)
+
+
+def test_create_entity_summary():
+    assert "" == Crawler.create_entity_summary([]).strip()
+
+    entities = [
+        db.Record(id=1).add_parent("A"),
+        db.Record(id=4, name='a').add_parent("B"),
+        db.Record(id=5).add_parent("A"),
+        db.Record(id=6, name='b').add_parent("B"),
+    ]
+    text = Crawler.create_entity_summary(entities).strip()
+    assert 'a' in text
+    assert 'b' in text
+    assert 'A:' in text
+    assert 'B:' in text
+    assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text