diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7df6439d5ee38b236a0731cf5ca09b82c7fcf002..95189b50054033f54054a21388a67ca47c8356ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,20 +8,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased] ##
 
 ### Added ###
+
 - DateElementConverter: allows to interpret text as a date object
 - the restricted_path argument allows to crawl only a subtree
+- logging that provides a summary of what is inserted and updated
+- You can now access the file system path of a structure element (if it has one) using the variable
+  name ``<converter name>.path``
+- ``add_prefix`` and ``remove_prefix`` arguments for the command line interface
+  and the ``crawler_main`` function for the adding/removal of path prefixes when
+  creating file entities.
 
 ### Changed ###
 
 - The definitions for the default converters were removed from crawl.py and placed into
   a separate yaml file called `default_converters.yml`. There is a new test testing for
   the correct loading behavior of that file.
+- JSONFileConverter, YAMLFileConverter and MarkdownFileConverter now inherit from
+  SimpleFileConverter. Behavior is unchanged, except that the MarkdownFileConverter now raises a
+  ConverterValidationError when the YAML header cannot be read instead of silently not matching.
 
 ### Deprecated ###
 
+- The ``prefix`` argument of `crawler_main` is deprecated. Use the new argument
+  ``remove_prefix`` instead.
+
 ### Removed ###
+- The command line argument ``--prefix``. Use the new argument ``--remove-prefix`` instead.
 
 ### Fixed ###
+
 - an empty string as name is treated as no name (as does the server). This, fixes
   queries for identifiables since it would contain "WITH name=''" otherwise
   which is an impossible condition. If your cfoods contained this case, they are ill defined.
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba220626460c559aeded69d360c85917e0c78066
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,57 @@
+# Installation ##
+
+
+## Linux ####
+
+Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
+documentation.
+
+Then open a terminal and continue in the [Generic installation](#generic-installation) section.
+
+## Windows ####
+
+If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
+for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
+packages you will ever need out of the box.  If you prefer, you may also install the leaner
+"Miniconda" installer, which allows you to install packages as you need them.
+
+After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
+installation](#generic-installation) section.
+
+## MacOS ####
+
+If there is no Python 3 installed yet, there are two main ways to
+obtain it: Either get the binary package from
+[python.org](https://www.python.org/downloads/) or, for advanced
+users, install via [Homebrew](https://brew.sh/). After installation
+from python.org, it is recommended to also update the TLS certificates
+for Python (this requires administrator rights for your user):
+
+```sh
+# Replace this with your Python version number:
+cd /Applications/Python\ 3.9/
+
+# This needs administrator rights:
+sudo ./Install\ Certificates.command
+```
+
+After these steps, you may continue with the [Generic
+installation](#generic-installation).
+
+## Generic installation ####
+
+The CaosDB crawler is available as [PyPi
+package](https://pypi.org/project/caoscrawler/) and can simply installed by
+
+```sh
+pip3 install caoscrawler
+```
+
+Alternatively, obtain the sources from GitLab and install from there (`git` must
+be installed for this option):
+
+```sh
+git clone https://gitlab.com/caosdb/caosdb-crawler
+cd caosdb-crawler
+pip3 install --user .
+```
diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
index 7a64d708667182b80b739812e5fdf3369fc5b462..37a34d125dcff1d121b1bded2fe959c4d30ff403 100644
--- a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
+++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
@@ -153,6 +153,13 @@ Data:
                 metadata_json: &metadata_json_template
                   type: JSONFile
                   match: metadata.json
+                  records:
+                    JSONFile:
+                      parents:
+                        - JSONFile
+                      role: File
+                      path: ${metadata_json.path}
+                      file: ${metadata_json.path}
                   validate: schema/dataset.schema.json
                   subtree:
                     jsondict:
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
index 01653bfa821e0a0acbb5a481bfd458e2ed784fb9..36233230ae05f9df58ae4e492ff1f709322f6e51 100644
--- a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
@@ -9,6 +9,7 @@
       "minimum": 20000
     },
     "archived": { "type": "boolean" },
+    "JSONFile": { "type": "object" },
     "url": {
       "type": "string",
       "description": "link to folder on file system (CaosDB or cloud folder)"
diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
index 6495e1828dc56e99459c162f7751951f880ea55c..c55be2157a1f079ecfb5809c3658586f9114fad1 100644
--- a/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
+++ b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml
@@ -25,8 +25,8 @@ extroot:
               parents:
                 - mdfile
               role: File
-              path: $DataFile
-              file: $DataFile
+              path: ${DataFile.path}
+              file: ${DataFile.path}
 
             Experiment:
               mdfile: $mdfile
@@ -68,8 +68,8 @@ extroot:
               parents:
                 - mdfile
               role: File
-              path: $DataFile
-              file: $DataFile
+              path: ${DataFile.path}
+              file: ${DataFile.path}
 
             Experiment: {}
 
diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py
index 4158ed22278ef5c871a22d45885e58fbfa84ea3b..cb5ed2c769945af033bc56a2d6af3bf1cec86de4 100644
--- a/integrationtests/test_realworld_example.py
+++ b/integrationtests/test_realworld_example.py
@@ -25,6 +25,7 @@
 an integration test module that runs a test against a (close to) real world example
 """
 from caosdb.utils.register_tests import clear_database, set_test_key
+import logging
 import json
 import os
 
@@ -35,6 +36,7 @@ from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import Directory
 import pytest
 from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+from caosadvancedtools.loadFiles import loadpath
 
 import sys
 
@@ -52,6 +54,17 @@ def rfp(*pathcomponents):
 DATADIR = rfp("test_data", "extroot", "realworld_example")
 
 
+@pytest.fixture
+def addfiles():
+    loadpath(path='/opt/caosdb/mnt/extroot/',
+             include=None,
+             exclude=None,
+             prefix="",
+             dryrun=False,
+             forceAllowSymlinks=True,
+             )
+
+
 @pytest.fixture
 def usemodel():
     # First load dataspace data model
@@ -85,22 +98,21 @@ def create_identifiable_adapter():
     return ident
 
 
-def test_dataset(clear_database, usemodel):
-    ident = create_identifiable_adapter()
-    crawler = Crawler(identifiableAdapter=ident)
-    crawler_definition = crawler.load_definition(
-        os.path.join(DATADIR, "dataset_cfoods.yml"))
-    # print(json.dumps(crawler_definition, indent=3))
-    # Load and register converter packages:
-    converter_registry = crawler.load_converters(crawler_definition)
-    # print("DictIntegerElement" in converter_registry)
-
-    records = crawler.start_crawling(
-        Directory("data", os.path.join(DATADIR, 'data')),
-        crawler_definition,
-        converter_registry
+def test_dataset(clear_database, usemodel, addfiles, caplog):
+    caplog.set_level(logging.DEBUG, logger="caoscrawler")
+    identifiable_path = os.path.join(DATADIR, "identifiables.yml")
+    crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
+    crawler_main(
+        os.path.join(DATADIR, 'data'),
+        crawler_definition_path,
+        identifiable_path,
+        True,
+        os.path.join(DATADIR, "provenance.yml"),
+        False,
+        remove_prefix=DATADIR,
+        # this test will fail without this prefix since the crawler would try to create new files
+        add_prefix="/extroot/realworld_example"
     )
-    crawler.synchronize()
 
     dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
                                  "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
@@ -119,13 +131,17 @@ def test_dataset(clear_database, usemodel):
                             "start_datetime='2022-02-10T16:36:48+01:00'") == 1
     assert db.execute_query(f"FIND Event WITH latitude=53", unique=True)
 
+    # test logging
+    assert "Executed inserts" in caplog.text
+    assert "Going to insert" in caplog.text
+    assert "Executed updates" in caplog.text
+
 
-def test_event_update(clear_database, usemodel):
+def test_event_update(clear_database, usemodel, addfiles):
 
     identifiable_path = os.path.join(DATADIR, "identifiables.yml")
     crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
 
-    # TODO(fspreck): Use crawler_main
     crawler_main(
         os.path.join(DATADIR, 'data'),
         crawler_definition_path,
@@ -133,7 +149,9 @@ def test_event_update(clear_database, usemodel):
         True,
         os.path.join(DATADIR, "provenance.yml"),
         False,
-        ""
+        remove_prefix=DATADIR,
+        # this test will fail without this prefix since the crawler would try to create new files
+        add_prefix="/extroot/realworld_example"
     )
 
     old_dataset_rec = db.execute_query(
diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py
index 91c523be90a4d0117a7cc54217cae0b911511957..5fc0f6c7d85a0fce4490c72952e711fe241a0099 100644
--- a/integrationtests/test_use_case_simple_presentation.py
+++ b/integrationtests/test_use_case_simple_presentation.py
@@ -38,9 +38,7 @@ DATADIR = os.path.join(os.path.dirname(__file__), "test_data",
                        "extroot", "use_case_simple_presentation")
 
 
-def test_complete_crawler(
-        clear_database
-):
+def test_complete_crawler(clear_database):
     # Setup the data model:
     model = parser.parse_model_from_yaml(os.path.join(DATADIR, "model.yml"))
     model.sync_data_model(noquestion=True, verbose=False)
@@ -57,13 +55,24 @@ def test_complete_crawler(
             dryrun=False,
             forceAllowSymlinks=False)
 
+    # test that a bad value for "remove_prefix" leads to runtime error
+    with pytest.raises(RuntimeError) as re:
+        crawler_main(DATADIR,
+                     os.path.join(DATADIR, "cfood.yml"),
+                     os.path.join(DATADIR, "identifiables.yml"),
+                     True,
+                     os.path.join(DATADIR, "provenance.yml"),
+                     False,
+                     remove_prefix="sldkfjsldf")
+    assert "path does not start with the prefix" in str(re.value)
+
     crawler_main(DATADIR,
                  os.path.join(DATADIR, "cfood.yml"),
                  os.path.join(DATADIR, "identifiables.yml"),
                  True,
                  os.path.join(DATADIR, "provenance.yml"),
                  False,
-                 "/use_case_simple_presentation")
+                 remove_prefix=os.path.abspath(DATADIR))
 
     res = db.execute_query("FIND Record Experiment")
     assert len(res) == 1
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 0d1c5d631e7ba4bea61e6c437f6c125b1a55125a..80a3728ce5b1f413d2bdd674b26a7dca1122eef5 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -56,6 +56,10 @@ SPECIAL_PROPERTIES = ("description", "name", "id", "path",
 logger = logging.getLogger(__name__)
 
 
+class CrawlerTemplate(Template):
+    braceidpattern = r"(?a:[_a-z][_\.a-z0-9]*)"
+
+
 def _only_max(children_with_keys):
 
     return [max(children_with_keys, key=lambda x: x[1])[0]]
@@ -110,6 +114,19 @@ class ConverterValidationError(Exception):
         self.message = msg
 
 
+def create_path_value(func):
+    """decorator for create_values functions that adds a value containing the path
+
+    should be used for StructureElement that are associated with file system objects that have a
+    path, like File or Directory.
+    """
+
+    def inner(self, values: GeneralStore, element: StructureElement):
+        func(self, values=values, element=element)
+        values.update({self.name + ".path": element.path})
+    return inner
+
+
 def replace_variables(propvalue, values: GeneralStore):
     """
     This function replaces variables in property values (and possibly other locations,
@@ -133,7 +150,7 @@ def replace_variables(propvalue, values: GeneralStore):
             if isinstance(values[varname], db.Entity):
                 return values[varname]
 
-    propvalue_template = Template(propvalue)
+    propvalue_template = CrawlerTemplate(propvalue)
     return propvalue_template.safe_substitute(**values.get_storage())
 
 
@@ -241,7 +258,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict
                 continue
 
             # Allow replacing variables in keys / names of properties:
-            key_template = Template(key)
+            key_template = CrawlerTemplate(key)
             key = key_template.safe_substitute(**values.get_storage())
 
             keys_modified.append((name, key))
@@ -477,6 +494,10 @@ class DirectoryConverter(Converter):
 
         return children
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     def typecheck(self, element: StructureElement):
         return isinstance(element, Directory)
 
@@ -524,6 +545,10 @@ class SimpleFileConverter(Converter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         return list()
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     @Converter.debug_matching("name")
     def match(self, element: StructureElement):
         # TODO: See comment on types and inheritance
@@ -542,7 +567,7 @@ class FileConverter(SimpleFileConverter):
         super().__init__(*args, **kwargs)
 
 
-class MarkdownFileConverter(Converter):
+class MarkdownFileConverter(SimpleFileConverter):
     """
     reads the yaml header of markdown files (if a such a header exists).
     """
@@ -552,8 +577,18 @@ class MarkdownFileConverter(Converter):
         if not isinstance(element, File):
             raise RuntimeError("A markdown file is needed to create children.")
 
-        header = yaml_header_tools.get_header_from_file(
-            element.path, clean=False)
+        try:
+            header = yaml_header_tools.get_header_from_file(
+                element.path, clean=False)
+        except yaml_header_tools.NoValidHeader:
+            if generalStore is not None and self.name in generalStore:
+                path = generalStore[self.name]
+            else:
+                path = "<path not set>"
+            raise ConverterValidationError(
+                "Error during the validation (yaml header cannot be read) of the markdown file "
+                "located at the following node in the data structure:\n"
+                f"{path}")
         children: List[StructureElement] = []
 
         for name, entry in header.items():
@@ -566,25 +601,6 @@ class MarkdownFileConverter(Converter):
                     "Header entry {} has incompatible type.".format(name))
         return children
 
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not isinstance(element, File):
-            raise RuntimeError("Element must be a file.")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        try:
-            yaml_header_tools.get_header_from_file(element.path)
-        except yaml_header_tools.NoValidHeader:
-            # TODO(salexan): Raise a validation error instead of just not
-            # matching silently.
-            return None
-        return m.groupdict()
-
 
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                           msg_prefix=""):
@@ -691,20 +707,7 @@ class DictDictElementConverter(DictElementConverter):
         super().__init__(*args, **kwargs)
 
 
-class JSONFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class JSONFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
@@ -726,20 +729,7 @@ class JSONFileConverter(Converter):
         return [structure_element]
 
 
-class YAMLFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class YAMLFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index ef06ab15777be7405c924860d8d0ab0cc32ff3bc..c77dcee1f29eac69732ce353e0271761eca2df13 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -49,6 +49,7 @@ from typing import Any, Optional, Type, Union
 
 import caosdb as db
 
+from caosadvancedtools.utils import create_entity_link
 from caosadvancedtools.cache import UpdateCache, Cache
 from caosadvancedtools.crawler import Crawler as OldCrawler
 from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
@@ -1016,20 +1017,25 @@ class Crawler(object):
                                                                            referencing_entities)
             for record in to_be_updated]
         # Merge with existing data to prevent unwanted overwrites
-        to_be_updated = self._merge_properties_from_remote(to_be_updated,
-                                                           identified_records)
+        to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records)
         # remove unnecessary updates from list by comparing the target records
         # to the existing ones
-        to_be_updated = self.remove_unnecessary_updates(
-            to_be_updated, identified_records)
+        to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records)
 
+        logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
+                    f"{len(to_be_inserted)} Entities.")
         if commit_changes:
             self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
                                                 run_id=self.run_id, unique_names=unique_names)
+            logger.info(f"Added parent RecordTypes where necessary.")
             self.execute_inserts_in_list(
                 to_be_inserted, self.securityMode, self.run_id, unique_names=unique_names)
+            logger.info(f"Executed inserts:\n"
+                        + self.create_entity_summary(to_be_inserted))
             self.execute_updates_in_list(
                 to_be_updated, self.securityMode, self.run_id, unique_names=unique_names)
+            logger.info(f"Executed updates:\n"
+                        + self.create_entity_summary(to_be_updated))
 
         update_cache = UpdateCache()
         pending_inserts = update_cache.get_inserts(self.run_id)
@@ -1044,6 +1050,25 @@ class Crawler(object):
 
         return (to_be_inserted, to_be_updated)
 
+    @staticmethod
+    def create_entity_summary(entities: list[db.Entity]):
+        """ Creates a summary string reprensentation of a list of entities."""
+        parents = {}
+        for el in entities:
+            for pp in el.parents:
+                if pp.name not in parents:
+                    parents[pp.name] = [el]
+                else:
+                    parents[pp.name].append(el)
+        output = ""
+        for key, value in parents.items():
+            output += f"{key}:\n"
+            for el in value:
+                output += create_entity_link(el) + ", "
+
+            output = output[:-2] + "\n"
+        return output
+
     @staticmethod
     def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
@@ -1228,7 +1253,9 @@ def crawler_main(crawled_directory_path: str,
                  prefix: str = "",
                  securityMode: SecurityMode = SecurityMode.UPDATE,
                  unique_names=True,
-                 restricted_path: Optional[list[str]] = None
+                 restricted_path: Optional[list[str]] = None,
+                 remove_prefix: Optional[str] = None,
+                 add_prefix: Optional[str] = None,
                  ):
     """
 
@@ -1247,7 +1274,7 @@ def crawler_main(crawled_directory_path: str,
     dry_run : bool
         do not commit any chnages to the server
     prefix : str
-        remove the given prefix from file paths
+        DEPRECATED, remove the given prefix from file paths
     securityMode : int
         securityMode of Crawler
     unique_names : bool
@@ -1255,6 +1282,10 @@ def crawler_main(crawled_directory_path: str,
     restricted_path: optional, list of strings
             Traverse the data tree only along the given path. When the end of the given path
             is reached, traverse the full tree as normal.
+    remove_prefix : Optional[str]
+        remove the given prefix from file paths
+    add_prefix : Optional[str]
+        add the given prefix to file paths
 
     Returns
     -------
@@ -1271,11 +1302,19 @@ def crawler_main(crawled_directory_path: str,
         crawler.save_debug_data(provenance_file)
 
     if identifiables_definition_file is not None:
-
         ident = CaosDBIdentifiableAdapter()
         ident.load_from_yaml_definition(identifiables_definition_file)
         crawler.identifiableAdapter = ident
 
+    if prefix != "":
+        warnings.warn(DeprecationWarning("The prefix argument is deprecated and will be removed "
+                                         "in the future. Please use `remove_prefix` instead."))
+        if remove_prefix is not None:
+            raise ValueError("Please do not supply the (deprecated) `prefix` and the "
+                             "`remove_prefix` argument at the same time. Only use "
+                             "`remove_prefix` instead.")
+        remove_prefix = prefix
+
     if dry_run:
         ins, upd = crawler.synchronize(commit_changes=False)
         inserts = [str(i) for i in ins]
@@ -1290,11 +1329,15 @@ def crawler_main(crawled_directory_path: str,
             if isinstance(elem, db.File):
                 # correct the file path:
                 # elem.file = os.path.join(args.path, elem.file)
-                if prefix is None:
-                    raise RuntimeError(
-                        "No prefix set. Prefix must be set if files are used.")
-                if elem.path.startswith(prefix):
-                    elem.path = elem.path[len(prefix):]
+                if remove_prefix:
+                    if elem.path.startswith(remove_prefix):
+                        elem.path = elem.path[len(remove_prefix):]
+                    else:
+                        raise RuntimeError("Prefix shall be removed from file path but the path "
+                                           "does not start with the prefix:"
+                                           f"\n{remove_prefix}\n{elem.path}")
+                if add_prefix:
+                    elem.path = add_prefix + elem.path
                 elem.file = None
                 # TODO: as long as the new file backend is not finished
                 #       we are using the loadFiles function to insert symlinks.
@@ -1362,8 +1405,12 @@ def parse_args():
     parser.add_argument("-u", "--unique-names",
                         help="Insert or updates entities even if name conflicts exist.")
     parser.add_argument("-p", "--prefix",
-                        help="Remove the given prefix from the paths "
-                        "of all file objects.")
+                        help="DEPRECATED, use --remove-prefix instead. Remove the given prefix "
+                        "from the paths of all file objects.")
+    parser.add_argument("--remove-prefix",
+                        help="Remove the given prefix from the paths of all file objects.")
+    parser.add_argument("--add-prefix",
+                        help="Add the given prefix to the paths of all file objects.")
 
     return parser.parse_args()
 
@@ -1383,6 +1430,10 @@ def main():
     conlogger = logging.getLogger("connection")
     conlogger.setLevel(level=logging.ERROR)
 
+    if args.prefix:
+        print("Please use '--remove-prefix' option instead of '--prefix' or '-p'.")
+        return -1
+
     # logging config for local execution
     logger.addHandler(logging.StreamHandler(sys.stdout))
     if args.debug:
@@ -1405,12 +1456,13 @@ def main():
         debug=args.debug,
         provenance_file=args.provenance,
         dry_run=args.dry_run,
-        prefix=args.prefix,
         securityMode={"retrieve": SecurityMode.RETRIEVE,
                       "insert": SecurityMode.INSERT,
                       "update": SecurityMode.UPDATE}[args.security_mode],
         unique_names=args.unique_names,
-        restricted_path=restricted_path
+        restricted_path=restricted_path,
+        remove_prefix=args.remove_prefix,
+        add_prefix=args.add_prefix,
     ))
 
 
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
index 1f6e15d408e10e38bce0d9b9fe9b6197ec69bfc3..952a8c94a7dfa24110f320f5dd32b0ad2ac1df01 100644
--- a/src/doc/README_SETUP.md
+++ b/src/doc/README_SETUP.md
@@ -1,63 +1,10 @@
 # Getting started with the CaosDB Crawler #
 
-## Installation ##
-
-### How to install ###
-
-#### Linux ####
-
-Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
-documentation.
-
-Then open a terminal and continue in the [Generic installation](#generic-installation) section.
-
-#### Windows ####
-
-If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
-for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
-packages you will ever need out of the box.  If you prefer, you may also install the leaner
-"Miniconda" installer, which allows you to install packages as you need them.
-
-After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
-installation](#generic-installation) section.
-
-#### MacOS ####
-
-If there is no Python 3 installed yet, there are two main ways to
-obtain it: Either get the binary package from
-[python.org](https://www.python.org/downloads/) or, for advanced
-users, install via [Homebrew](https://brew.sh/). After installation
-from python.org, it is recommended to also update the TLS certificates
-for Python (this requires administrator rights for your user):
-
-```sh
-# Replace this with your Python version number:
-cd /Applications/Python\ 3.9/
-
-# This needs administrator rights:
-sudo ./Install\ Certificates.command
-```
-
-After these steps, you may continue with the [Generic
-installation](#generic-installation).
-
-#### Generic installation ####
-
----
-
-Obtain the sources from GitLab and install from there (`git` must be installed for
-this option):
-
-```sh
-git clone https://gitlab.com/caosdb/caosdb-crawler
-cd caosdb-crawler
-pip3 install --user .
-```
-
-**Note**: In the near future, this package will also be made available on PyPi.
-
+## Installation
+see INSTALL.md
 
 ## Run Unit Tests
+Run `pytest unittests`.
 
 ## Documentation ##
 We use sphinx to create the documentation. Docstrings in the code should comply
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index 37f6a8c7d3be9298ec965c50a4ec29110988ddc6..6564ee677f0b363a52c44dd5ceabe5378c255105 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -149,6 +149,44 @@ create lists or multi properties instead of single values:
 .. code-block:: yaml
                 
         Experiment1:
-            Measurement: +Measurement <- Element in List (list is cleared before run)
-                         *Measurement <- Multi Property (properties are removed before run)
-                         Measurement  <- Overwrite
+            Measurement: +Measurement #  Element in List (list is cleared before run)
+                         *Measurement #  Multi Property (properties are removed before run)
+                         Measurement  #  Overwrite
+
+
+File Entities
+-------------
+
+In order to use File Entities, you must set the appropriate ``role: File``.
+Additionally, the path and file keys have to be given, with values that set the
+paths remotely and locally, respectively. You can use the variable 
+``<converter name>_path`` that is automatically created by converters that deal
+with file system related StructureElements. The file object itsself is stored
+in a vairable with the same name (as it is the case for other Records).
+
+
+.. code-block:: yaml
+
+      somefile:
+        type: SimpleFile
+        match: ^params.*$  # macht any file that starts with "params"
+        records:
+          fileEntity:
+            role: File           # necessary to create a File Entity
+            path: somefile.path  # defines the path in CaosDB
+            file: somefile.path  # path where the file is found locally
+          SomeRecord:
+            ParameterFile: $fileEntity  # creates a reference to the file
+
+Automatically generated keys
+++++++++++++++++++++++++++++
+
+Some variable names are automatically generated and can be used using the
+``$<variable name>`` syntax. Those include:
+
+- ``<converter name>``: access the path of converter names to the current converter
+- ``<converter name>.path``: the file system path to the structure element
+  (file system related converters only; you need curly brackets to use them:
+  ``${<converter name>.path}``)
+- ``<Record key>``: all entities that are created in the ``records`` section
+  are available under the same key
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 89757f21958f3d94649b33e9f9112593f703191d..0881d9302b621d6b47575e171dd9e8c144e29cd4 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -1,6 +1,10 @@
 Concepts
 ))))))))
 
+The CaosDB Crawler can handle any kind of hierarchical data structure. The typical use case is
+directory tree that is traversed. We use the following terms/concepts to describe how the CaosDB
+Crawler works.
+
 Structure Elements
 ++++++++++++++++++
 
diff --git a/src/doc/getting_started/INSTALL.md b/src/doc/getting_started/INSTALL.md
new file mode 120000
index 0000000000000000000000000000000000000000..95b6037c7ab329d91e3a8ed4a2b31eba675eef62
--- /dev/null
+++ b/src/doc/getting_started/INSTALL.md
@@ -0,0 +1 @@
+../../../INSTALL.md
\ No newline at end of file
diff --git a/src/doc/getting_started/helloworld.rst b/src/doc/getting_started/helloworld.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ef4a1398322b59d7983b7dff384534cfa501b660
--- /dev/null
+++ b/src/doc/getting_started/helloworld.rst
@@ -0,0 +1,5 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe the smallest possible crawler run
diff --git a/src/doc/getting_started/index.rst b/src/doc/getting_started/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..74ffa7daeff393d05605e1066a5985984c2e9751
--- /dev/null
+++ b/src/doc/getting_started/index.rst
@@ -0,0 +1,15 @@
+Getting Started
++++++++++++++++
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+   :hidden:
+
+   Installation<INSTALL>
+   prerequisites
+   helloworld
+
+This section will help you get going! From the first installation steps to the first simple crawl.
+
+Let's go!
diff --git a/src/doc/getting_started/prerequisites.rst b/src/doc/getting_started/prerequisites.rst
new file mode 100644
index 0000000000000000000000000000000000000000..dc8022b6cad99a8508f19f47dc01c601fb676c5b
--- /dev/null
+++ b/src/doc/getting_started/prerequisites.rst
@@ -0,0 +1,6 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe what you need to actually do a crawler run: data, CaosDB, ...
+
diff --git a/src/doc/index.rst b/src/doc/index.rst
index b4e30e4728068cabb92626cfac986ab858a0bbb6..d319bf4d24a05a3033b1ae5bbf80433c5ef3646b 100644
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -7,12 +7,12 @@ CaosDB-Crawler Documentation
    :caption: Contents:
    :hidden:
 
-   Getting started<README_SETUP>
+   Getting started<getting_started/index>
+   Tutorials<tutorials/index>
    Concepts<concepts>
    Converters<converters>
    CFoods (Crawler Definitions)<cfood>
    Macros<macros>
-   Tutorials<tutorials/index>
    How to upgrade<how-to-upgrade>
    API documentation<_apidoc/modules>
 
diff --git a/src/doc/macros.rst b/src/doc/macros.rst
index d3a3e9b9634a4e1d72228dd46692a824e1d5acfd..7685731d35afab51074bb4d12c51ede0a7ba1b75 100644
--- a/src/doc/macros.rst
+++ b/src/doc/macros.rst
@@ -195,7 +195,7 @@ The example will be expanded to:
 
 
 Limitation
-----------
+==========
 
 Currently it is not possible to use the same macro twice in the same yaml node, but in different
 positions. Consider:
diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst
index 88d598ece284e1aad315a1e0fcae3fdf494b3aad..02371de196cc139776416882aff31bd6fa4dabbe 100644
--- a/src/doc/tutorials/index.rst
+++ b/src/doc/tutorials/index.rst
@@ -1,9 +1,11 @@
 Tutorials
 +++++++++
 
+This chapter contains a collection of tutorials.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
-   :hidden:
 
    Example CFood<example>
+
diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml
index 74fd027563907c5ae416ca389faba0ecd64d5848..dce219b751c3e980662a1eaa4904e1163d9836a0 100644
--- a/unittests/scifolder_cfood.yml
+++ b/unittests/scifolder_cfood.yml
@@ -22,7 +22,7 @@ Data:  # name of the converter
               parents:
               - Project  # not needed as the name is equivalent
               date: $date
-              identifier: $identifier
+              identifier: ${identifier}
       
           subtree:
             measurement:  # new name for folders on the 3rd level
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index f72deda18152f9d12161d740e41271f90fcb848c..4d3791fce3ceffaafe529423e4020ebd6a4231ba 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -130,14 +130,11 @@ def test_markdown_converter(converter_registry):
         )
     )
 
-    converter = MarkdownFileConverter({
-        "match": "(.*)"
-    }, "TestMarkdownFileConverter",
-       converter_registry)
+    converter = MarkdownFileConverter({"match": "(.*)"}, "TestMarkdownFileConverter",
+                                      converter_registry)
 
-    m = converter.match(File("test_tool.py", rfp(
-        "test_tool.py")))
-    assert m is None
+    with pytest.raises(ConverterValidationError) as err:
+        converter.create_children(None, File("test_tool.py", rfp("test_tool.py")))
 
     m = converter.match(test_readme)
     assert m is not None
@@ -610,3 +607,17 @@ def test_load_converters():
     assert "SimpleFile" in converter_registry
     assert "Directory" in converter_registry
     assert "ListElement" in converter_registry
+
+
+def test_create_path_value(converter_registry):
+    """ test whether the variable containing the path is added to the general store"""
+    dc = Converter.converter_factory(
+        definition={
+            "type": "Directory",
+            "match": ".*"
+        },
+        name="Test", converter_registry=converter_registry)
+    values = GeneralStore()
+    dc.create_values(values, Directory("a", "/a"))
+    assert "Test.path" in values
+    assert values["Test.path"] == "/a"
diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py
index 1bf8f0b7d67f00f2018b5b68424d6b9cc17602eb..ac408b2dab0fa151c370d3ec6ffd1dced22c77d7 100644
--- a/unittests/test_scalars_cfood.py
+++ b/unittests/test_scalars_cfood.py
@@ -42,16 +42,23 @@ def test_record_structure_generation(crawler):
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
-    assert len(subd[0]) == 3
-    assert "Data" in subd[0]
-    assert "DataAnalysis" in subd[0]
-    assert "RecordThatGetsParentsLater" in subd[0]
+    if "Data" in subd[0]:
+        subddata = subd[0]
+        subdRTGPL = subd[1]
+    else:
+        subddata = subd[1]
+        subdRTGPL = subd[0]
+    assert len(subddata) == 5
+    assert "DataAnalysis" in subddata
+    assert "DataAnalysis.path" in subddata
+    assert "Data.path" in subddata
+    assert "RecordThatGetsParentsLater" in subddata
 
-    prop = subd[0]["RecordThatGetsParentsLater"].get_property("someId")
+    prop = subddata["RecordThatGetsParentsLater"].get_property("someId")
     assert type(prop.value) == int
     assert prop.value == 23
 
     # record store on Data Analysis node of debug tree
-    assert len(subd[1]) == 1
-    prop2 = subd[1]["RecordThatGetsParentsLater"].get_property("someId")
+    assert len(subdRTGPL) == 1
+    prop2 = subdRTGPL["RecordThatGetsParentsLater"].get_property("someId")
     assert prop == prop2
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 23b35f2dc9228eeda9137945198c49c19bf5c474..e15d7cb777ced4b92566df2b25b375e90be39295 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -110,15 +110,17 @@ def ident(crawler):
 
 
 def test_record_structure_generation(crawler):
+    # TODO How does this test relate to the test function in test_scalars_cfood with the same name?
+    #      There seems to be code duplication
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
     subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
-    assert len(subd[0]) == 2
+    assert len(subd[0]) == 4
     # record store on Data Analysis node of debug tree
     assert len(subd[1]) == 0
     assert len(subc) == 2
-    assert len(subc[0]) == 2
+    assert len(subc[0]) == 4
     assert len(subc[1]) == 0
 
     # The data analysis node creates one variable for the node itself:
@@ -137,7 +139,7 @@ def test_record_structure_generation(crawler):
     assert subd[1]["Project"].get_property(
         "identifier").value == "climate-model-predict"
 
-    assert len(subd[0]) == 6
+    assert len(subd[0]) == 9
     assert subd[0]["date"] == "2020"
     assert subd[0]["identifier"] == "climate-model-predict"
     assert subd[0]["Project"].__class__ == db.Record
@@ -148,7 +150,7 @@ def test_record_structure_generation(crawler):
     assert subc[0]["project_dir"] is False
 
     # Check the copy flags for the first level in the hierarchy:
-    assert len(subc[0]) == 6
+    assert len(subc[0]) == 9
     assert len(subc[1]) == 1
     assert subc[1]["Project"] is False
     assert subc[0]["Project"] is False
@@ -161,7 +163,7 @@ def test_record_structure_generation(crawler):
     subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
                                                         "2020_climate-model-predict",
                                                         "2020-02-08_prediction-errors")]
-    assert len(subd[0]) == 8
+    assert len(subd[0]) == 12
     assert subd[0]["date"] == "2020-02-08"
     assert subd[0]["identifier"] == "prediction-errors"
     assert subd[0]["Project"].__class__ == db.Record
@@ -792,8 +794,7 @@ def test_validation_error_print(caplog):
                            os.path.join(DATADIR, "identifiables.yml"),
                            True,
                            None,
-                           False,
-                           "/use_case_simple_presentation")
+                           False)
         assert "Couldn't validate" in caplog.text
         caplog.clear()
 
@@ -965,3 +966,36 @@ def test_split_restricted_path():
     assert ["el"] == split_restricted_path("/el")
     assert ["el"] == split_restricted_path("/el/")
     assert ["el", "el"] == split_restricted_path("/el/el")
+
+
+def test_deprecated_prefix_option():
+    """Test that calling the crawler's main function with the deprecated
+    `prefix` option raises the correct errors and warnings.
+
+    """
+
+    with pytest.deprecated_call():
+        crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed")
+
+    with raises(ValueError) as ve:
+        crawler_main("./", rfp("scifolder_cfood.yml"), prefix="to/be/removed",
+                     remove_prefix="to/be/removed")
+
+    assert "(deprecated) `prefix` and the `remove_prefix`" in str(ve.value)
+
+
+def test_create_entity_summary():
+    assert "" == Crawler.create_entity_summary([]).strip()
+
+    entities = [
+        db.Record(id=1).add_parent("A"),
+        db.Record(id=4, name='a').add_parent("B"),
+        db.Record(id=5).add_parent("A"),
+        db.Record(id=6, name='b').add_parent("B"),
+    ]
+    text = Crawler.create_entity_summary(entities).strip()
+    assert 'a' in text
+    assert 'b' in text
+    assert 'A:' in text
+    assert 'B:' in text
+    assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text