diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 00363f9700914adaaa0b2b1c40074db2887983e9..b3537a9e88507f7c438ef0a72ee646f322d6c9a7 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -66,6 +66,8 @@ from .stores import GeneralStore, RecordStore
 from .structure_elements import StructureElement, Directory, NoneElement
 from .version import check_cfood_version
 
+from .scanner import scan_directory
+
 logger = logging.getLogger(__name__)
 
 SPECIAL_PROPERTIES_STRICT = ("description", "name", "id", "path")
@@ -174,27 +176,13 @@ class Crawler(object):
     """
 
     def __init__(self,
-                 generalStore: Optional[GeneralStore] = None,
-                 debug: bool = False,
-                 identifiableAdapter: IdentifiableAdapter = None,
-                 securityMode: SecurityMode = SecurityMode.UPDATE
-                 ):
+                 identifiableAdapter: Optional[IdentifiableAdapter] = None,
+                 securityMode: SecurityMode = SecurityMode.UPDATE):
         """
         Create a new crawler and initialize an empty RecordStore and GeneralStore.
 
         Parameters
         ----------
-        recordStore : GeneralStore
-             An initial GeneralStore which might store e.g. environment variables.
-        debug : bool
-             Create a debugging information tree when set to True.
-             The debugging information tree is a variable stored in
-             self.debug_tree. It is a dictionary mapping directory entries
-             to a tuple of general stores and record stores which are valid for
-             the directory scope.
-             Furthermore, it is stored in a second tree named self.debug_copied whether the
-             objects in debug_tree had been copied from a higher level in the hierarchy
-             of the structureelements.
         identifiableAdapter : IdentifiableAdapter
              TODO describe
         securityMode : int
@@ -207,279 +195,34 @@ class Crawler(object):
         # different caches.
         self.remote_existing_cache = IdentifiedCache()
         self.remote_missing_cache = IdentifiedCache()
-        self.recordStore = RecordStore()
         self.securityMode = securityMode
 
-        self.generalStore = generalStore
-        if generalStore is None:
-            self.generalStore = GeneralStore()
-
         self.identifiableAdapter: IdentifiableAdapter = LocalStorageIdentifiableAdapter()
         if identifiableAdapter is not None:
             self.identifiableAdapter = identifiableAdapter
-        # If a directory is crawled this may hold the path to that directory
-        self.crawled_directory: Optional[str] = None
-        self.debug = debug
-        if self.debug:
-            # order in the tuple:
-            # 0: generalStore
-            # 1: recordStore
-            self.debug_tree: dict[str, tuple] = dict()
-            self.debug_metadata: dict[str, dict] = dict()
-            self.debug_metadata["copied"] = dict()
-            self.debug_metadata["provenance"] = defaultdict(lambda: dict())
-            self.debug_metadata["usage"] = defaultdict(lambda: set())
-
-    def load_definition(self, crawler_definition_path: str):
-        """
-        Load a cfood from a crawler definition defined by
-        crawler definition path and validate it using cfood-schema.yml.
-        """
-
-        # Load the cfood from a yaml file:
-        with open(crawler_definition_path, "r") as f:
-            crawler_definitions = list(yaml.safe_load_all(f))
-
-        crawler_definition = self._load_definition_from_yaml_dict(
-            crawler_definitions)
-
-        return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
-
-    def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
-        """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
-        contains either one or two documents.
-
-        Doesn't resolve the validator paths in the cfood definition, so for
-        internal and testing use only.
-
-        """
-        if len(crawler_definitions) == 1:
-            # Simple case, just one document:
-            crawler_definition = crawler_definitions[0]
-            metadata = {}
-        elif len(crawler_definitions) == 2:
-            metadata = crawler_definitions[0]["metadata"] if "metadata" in crawler_definitions[0] else {
-            }
-            crawler_definition = crawler_definitions[1]
-        else:
-            raise RuntimeError(
-                "Crawler definition must not contain more than two documents.")
-
-        check_cfood_version(metadata)
-
-        # TODO: at this point this function can already load the cfood schema extensions
-        #       from the crawler definition and add them to the yaml schema that will be
-        #       tested in the next lines of code:
-
-        # Load the cfood schema:
-        with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
-            schema = yaml.safe_load(f)
-
-        # Add custom converters to converter enum in schema:
-        if "Converters" in crawler_definition:
-            for key in crawler_definition["Converters"]:
-                schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
-                    key)
-        if len(crawler_definitions) == 2:
-            if "Converters" in metadata:
-                for key in metadata["Converters"]:
-                    schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
-                        key)
-
-        # Validate the cfood schema:
-        validate(instance=crawler_definition, schema=schema["cfood"])
-
-        return crawler_definition
-
-    def _resolve_validator_paths(self, definition: dict, definition_path: str):
-        """Resolve path to validation files with respect to the file in which
-        the crawler was defined.
-
-        """
-
-        for key, value in definition.items():
-
-            if key == "validate" and isinstance(value, str):
-                # Validator is given by a path
-                if not value.startswith('/'):
-                    # Not an absolute path
-                    definition[key] = os.path.join(
-                        os.path.dirname(definition_path), value)
-                    if not os.path.isfile(definition[key]):
-                        # TODO(henrik) capture this in `crawler_main` similar to
-                        # `ConverterValidationError`.
-                        raise FileNotFoundError(
-                            f"Couldn't find validation file {definition[key]}")
-            elif isinstance(value, dict):
-                # Recursively resolve all validators
-                definition[key] = self._resolve_validator_paths(
-                    value, definition_path)
-
-        return definition
-
-    def load_converters(self, definition: dict):
-        """
-        Currently the converter registry is a dictionary containing for each converter:
-        - key is the short code, abbreviation for the converter class name
-        - module is the name of the module to be imported which must be installed
-        - class is the converter class to load and associate with this converter entry
-
-        all other info for the converter needs to be included in the converter plugin
-        directory:
-        schema.yml file
-        README.md documentation
 
-        TODO: this function does not make use of self, so it could become static.
-        """
-
-        # Defaults for the converter registry:
-        with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
-            converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
-
-        # More converters from definition file:
-        if "Converters" in definition:
-            for key, entry in definition["Converters"].items():
-                if key in ["Dict", "DictTextElement", "DictIntegerElement", "DictBooleanElement",
-                           "DictDictElement", "DictListElement", "DictFloatElement"]:
-                    warnings.warn(DeprecationWarning(f"{key} is deprecated. Please use the new"
-                                                     " variant; without 'Dict' prefix or "
-                                                     "'DictElement' in case of 'Dict'"))
-
-                converter_registry[key] = {
-                    "converter": entry["converter"],
-                    "package": entry["package"]
-                }
-
-        # Load modules and associate classes:
-        for key, value in converter_registry.items():
-            module = importlib.import_module(value["package"])
-            value["class"] = getattr(module, value["converter"])
-        return converter_registry
-
-    def crawl_directory(self, dirname: str, crawler_definition_path: str,
+    def crawl_directory(self,
+                        crawled_directory: str,
+                        crawler_definition_path: str,
                         restricted_path: Optional[list[str]] = None):
-        """ Crawl a single directory.
-
-        Convenience function that starts the crawler (calls start_crawling)
-        with a single directory as the StructureElement.
-
-        restricted_path: optional, list of strings
-                Traverse the data tree only along the given path. When the end of the given path
-                is reached, traverse the full tree as normal.
         """
-
-        crawler_definition = self.load_definition(crawler_definition_path)
-        # Load and register converter packages:
-        converter_registry = self.load_converters(crawler_definition)
-
-        if not dirname:
-            raise ValueError(
-                "You have to provide a non-empty path for crawling.")
-        dir_structure_name = os.path.basename(dirname)
-        self.crawled_directory = dirname
-        if not dir_structure_name and dirname.endswith('/'):
-            if dirname == '/':
-                # Crawling the entire file system
-                dir_structure_name = "root"
-            else:
-                # dirname had a trailing '/'
-                dir_structure_name = os.path.basename(dirname[:-1])
-
-        self.start_crawling(Directory(dir_structure_name,
-                                      dirname),
-                            crawler_definition,
-                            converter_registry,
-                            restricted_path=restricted_path
-                            )
-
-    @staticmethod
-    def initialize_converters(crawler_definition: dict, converter_registry: dict):
-        """
-        takes the cfood as dict (`crawler_definition`) and creates the converter objects that
-        are defined on the highest level. Child Converters will in turn be created during the
-        initialization of the Converters.
-        """
-        converters = []
-
-        for key, value in crawler_definition.items():
-            # Definitions and Converters are reserved keywords
-            # on the top level of the yaml file.
-            # TODO: there should also be a top level keyword for the actual
-            #       CFood to avoid confusion between top level keywords
-            #       and the CFood.
-            if key == "Definitions":
-                continue
-            elif key == "Converters":
-                continue
-            converters.append(Converter.converter_factory(
-                value, key, converter_registry))
-
-        return converters
-
-    def start_crawling(self, items: Union[list[StructureElement], StructureElement],
-                       crawler_definition: dict,
-                       converter_registry: dict,
-                       restricted_path: Optional[list[str]] = None):
-        """
-        Start point of the crawler recursion.
-
-        Parameters
-        ----------
-        items: list
-             A list of structure elements (or a single StructureElement) that is used for
-             generating the initial items for the crawler. This could e.g. be a Directory.
-        crawler_definition : dict
-             A dictionary representing the crawler definition, possibly from a yaml
-             file.
-        restricted_path: optional, list of strings
-             Traverse the data tree only along the given path. When the end of the given path
-             is reached, traverse the full tree as normal.
-
-        Returns
-        -------
-        crawled_data : list
-            the final list with the target state of Records.
+        The new main function to run the crawler on a directory.
         """
 
-        # This function builds the tree of converters out of the crawler definition.
-
-        if self.generalStore is None:
-            raise RuntimeError("Should not happen.")
-
-        if not isinstance(items, list):
-            items = [items]
-
+        self.crawled_directory = crawled_directory
         self.run_id = uuid.uuid1()
-        local_converters = Crawler.initialize_converters(crawler_definition, converter_registry)
-
-        # This recursive crawling procedure generates the update list:
-        self.crawled_data: list[db.Record] = []
-        self._crawl(
-            items=items,
-            local_converters=local_converters,
-            generalStore=self.generalStore,
-            recordStore=self.recordStore,
-            structure_elements_path=[],
-            converters_path=[],
-            restricted_path=restricted_path)
-        if self.debug:
-            self.debug_converters = local_converters
-
-        return self.crawled_data
-
-    def synchronize(self, commit_changes: bool = True, unique_names=True):
-        """
-        Carry out the actual synchronization.
-        """
 
-        # After the crawling, the actual synchronization with the database, based on the
-        # update list is carried out:
+        # TODO: This is not ideal yet, the data is just returned and needs to be
+        #       separately supplied to the synchronize function.
 
-        return self._synchronize(self.crawled_data, commit_changes, unique_names=unique_names)
+        return scan_directory(crawled_directory,
+                              crawler_definition_path,
+                              restricted_path)
 
     def _has_reference_value_without_id(self, ident: Identifiable) -> bool:
         """
-        Returns True if there is at least one value in the properties attribute of ``ident`` which:
+        Returns True if there is at least one value in the properties
+        attribute of ``ident`` which:
 
         a) is a reference property AND
         b) where the value is set to a
@@ -947,7 +690,8 @@ class Crawler(object):
         return db.Entity(id=id).retrieve()
 
     @staticmethod
-    def execute_inserts_in_list(to_be_inserted, securityMode, run_id: uuid.UUID = None,
+    def execute_inserts_in_list(to_be_inserted, securityMode,
+                                run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
         for record in to_be_inserted:
             for prop in record.properties:
@@ -975,7 +719,8 @@ class Crawler(object):
                     _resolve_datatype(prop, entity)
 
     @staticmethod
-    def execute_updates_in_list(to_be_updated, securityMode, run_id: uuid.UUID = None,
+    def execute_updates_in_list(to_be_updated, securityMode,
+                                run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
         Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated)
         logger.debug("UPDATE")
@@ -987,7 +732,9 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_updated, run_id)
 
-    def _synchronize(self, crawled_data: list[db.Record], commit_changes: bool = True,
+    def synchronize(self,
+                     crawled_data: list[db.Record],
+                     commit_changes: bool = True,
                      unique_names=True):
         """
         This function applies several stages:
@@ -1068,163 +815,11 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     + " by invoking the crawler"
                     " with the run id: {rid}\n".format(rid=run_id))
 
-    @staticmethod
-    def debug_build_usage_tree(converter: Converter):
-        res: dict[str, dict[str, Any]] = {
-            converter.name: {
-                "usage": ", ".join(converter.metadata["usage"]),
-                "subtree": {}
-            }
-        }
-
-        for subconv in converter.converters:
-            d = Crawler.debug_build_usage_tree(subconv)
-            k = list(d.keys())
-            if len(k) != 1:
-                raise RuntimeError(
-                    "Unkonwn error during building of usage tree.")
-            res[converter.name]["subtree"][k[0]] = d[k[0]]
-        return res
-
-    def save_debug_data(self, filename: str):
-        paths: dict[str, Union[dict, list]] = dict()
-
-        def flatten_debug_info(key):
-            mod_info = self.debug_metadata[key]
-            paths[key] = dict()
-            for record_name in mod_info:
-                if key == "provenance":
-                    paths[key][record_name] = dict()
-                    for prop_name in mod_info[record_name]:
-                        paths[key][record_name][prop_name] = {
-                            "structure_elements_path": "/".join(
-                                mod_info[record_name][prop_name][0]),
-                            "converters_path": "/".join(
-                                mod_info[record_name][prop_name][1])}
-                elif key == "usage":
-                    paths[key][record_name] = ", ".join(mod_info[record_name])
-        for key in ("provenance", "usage"):
-            flatten_debug_info(key)
-
-        paths["converters_usage"] = [self.debug_build_usage_tree(
-            cv) for cv in self.debug_converters]
-
-        with open(filename, "w") as f:
-            f.write(yaml.dump(paths, sort_keys=False))
-
-    def _crawl(self,
-               items: list[StructureElement],
-               local_converters: list[Converter],
-               generalStore: GeneralStore,
-               recordStore: RecordStore,
-               structure_elements_path: list[str],
-               converters_path: list[str],
-               restricted_path: Optional[list[str]] = None):
-        """
-        Crawl a list of StructureElements and apply any matching converters.
-
-        items: structure_elements (e.g. files and folders on one level on the hierarchy)
-        local_converters: locally defined converters for
-                            treating structure elements. A locally defined converter could be
-                            one that is only valid for a specific subtree of the originally
-                            cralwed StructureElement structure.
-        generalStore and recordStore: This recursion of the crawl function should only operate on
-                                      copies of the global stores of the Crawler object.
-        restricted_path: optional, list of strings, traverse the data tree only along the given
-                         path. For example, when a directory contains files a, b and c and b is
-                         given in restricted_path, a and c will be ignroed by the crawler.
-                         When the end of the given path is reached, traverse the full tree as
-                         normal. The first element of the list provided by restricted_path should
-                         be the name of the StructureElement at this level, i.e. denoting the
-                         respective element in the items argument.
-        """
-        # This path_found variable stores wether the path given by restricted_path was found in the
-        # data tree
-        path_found = False
-        if restricted_path is not None and len(restricted_path) == 0:
-            restricted_path = None
-
-        for element in items:
-            for converter in local_converters:
-
-                # type is something like "matches files", replace isinstance with "type_matches"
-                # match function tests regexp for example
-                if (converter.typecheck(element) and (
-                        restricted_path is None or element.name == restricted_path[0])
-                        and converter.match(element) is not None):
-                    path_found = True
-                    generalStore_copy = generalStore.create_scoped_copy()
-                    recordStore_copy = recordStore.create_scoped_copy()
-
-                    # Create an entry for this matched structure element:
-                    generalStore_copy[converter.name] = (
-                        os.path.join(*(structure_elements_path + [element.get_name()])))
-
-                    # extracts values from structure element and stores them in the
-                    # variable store
-                    converter.create_values(generalStore_copy, element)
-
-                    keys_modified = converter.create_records(
-                        generalStore_copy, recordStore_copy, element)
-
-                    children = converter.create_children(generalStore_copy, element)
-
-                    if self.debug:
-                        # add provenance information for each variable
-                        self.debug_tree[str(element)] = (
-                            generalStore_copy.get_storage(), recordStore_copy.get_storage())
-                        self.debug_metadata["copied"][str(element)] = (
-                            generalStore_copy.get_dict_copied(),
-                            recordStore_copy.get_dict_copied())
-                        self.debug_metadata["usage"][str(element)].add(
-                            "/".join(converters_path + [converter.name]))
-                        mod_info = self.debug_metadata["provenance"]
-                        for record_name, prop_name in keys_modified:
-                            # TODO: check
-                            internal_id = recordStore_copy.get_internal_id(
-                                record_name)
-                            record_identifier = record_name + \
-                                "_" + str(internal_id)
-                            converter.metadata["usage"].add(record_identifier)
-                            mod_info[record_identifier][prop_name] = (
-                                structure_elements_path + [element.get_name()],
-                                converters_path + [converter.name])
-
-                    self._crawl(children, converter.converters,
-                                generalStore_copy, recordStore_copy,
-                                structure_elements_path + [element.get_name()],
-                                converters_path + [converter.name],
-                                restricted_path[1:] if restricted_path is not None else None)
-
-        if restricted_path and not path_found:
-            raise RuntimeError("A 'restricted_path' argument was given that is not contained in "
-                               "the data tree")
-        # if the crawler is running out of scope, copy all records in
-        # the recordStore, that were created in this scope
-        # to the general update container.
-        scoped_records = recordStore.get_records_current_scope()
-        for record in scoped_records:
-            self.crawled_data.append(record)
-
-        # TODO: the scoped variables should be cleaned up as soon if the variables
-        #       are no longer in the current scope. This can be implemented as follows,
-        #       but this breaks the test "test_record_structure_generation", because
-        #       some debug info is also deleted. This implementation can be used as soon
-        #       as the remaining problems with the debug_tree are fixed.
-        # Delete the variables that are no longer needed:
-        # scoped_names = recordStore.get_names_current_scope()
-        # for name in scoped_names:
-        #     del recordStore[name]
-        #     del generalStore[name]
-
-        return self.crawled_data
-
 
 def crawler_main(crawled_directory_path: str,
                  cfood_file_name: str,
-                 identifiables_definition_file: str = None,
-                 debug: bool = False,
-                 provenance_file: str = None,
+                 identifiables_definition_file: Optional[str] = None,
+                 provenance_file: Optional[str] = None,
                  dry_run: bool = False,
                  prefix: str = "",
                  securityMode: SecurityMode = SecurityMode.UPDATE,
@@ -1262,14 +857,17 @@ def crawler_main(crawled_directory_path: str,
     return_value : int
         0 if successful
     """
-    crawler = Crawler(debug=debug, securityMode=securityMode)
+    crawler = Crawler(securityMode=securityMode)
     try:
-        crawler.crawl_directory(crawled_directory_path, cfood_file_name, restricted_path)
+        crawled_data, debug_tree = crawler.crawl_directory(crawled_directory_path,
+                                                           cfood_file_name,
+                                                           restricted_path)
     except ConverterValidationError as err:
         print(err)
         return 1
-    if provenance_file is not None and debug:
-        crawler.save_debug_data(provenance_file)
+    if provenance_file is not None:
+        with open(provenance_file, "w") as f:
+            yaml.dump(debug_tree)
 
     if identifiables_definition_file is not None:
 
@@ -1278,7 +876,8 @@ def crawler_main(crawled_directory_path: str,
         crawler.identifiableAdapter = ident
 
     if dry_run:
-        ins, upd = crawler.synchronize(commit_changes=False)
+        ins, upd = crawler.synchronize(crawled_data,
+                                       commit_changes=False)
         inserts = [str(i) for i in ins]
         updates = [str(i) for i in upd]
         with open("dry.yml", "w") as f:
@@ -1287,7 +886,7 @@ def crawler_main(crawled_directory_path: str,
                 "update": updates}))
     else:
         rtsfinder = dict()
-        for elem in crawler.crawled_data:
+        for elem in crawled_data:
             if isinstance(elem, db.File):
                 # correct the file path:
                 # elem.file = os.path.join(args.path, elem.file)
@@ -1320,7 +919,9 @@ def crawler_main(crawled_directory_path: str,
             raise RuntimeError("Missing RecordTypes: {}".
                                format(", ".join(notfound)))
 
-        crawler.synchronize(commit_changes=True, unique_names=unique_names)
+        crawler.synchronize(crawled_data,
+                            commit_changes=True,
+                            unique_names=unique_names)
     return 0
 
 
@@ -1342,7 +943,7 @@ def parse_args():
                         help="Path name of the provenance yaml file. "
                         "This file will only be generated if this option is set.")
     parser.add_argument("--debug", required=False, action="store_true",
-                        help="Path name of the cfood yaml file to be used.")
+                        help="Generate debug output.")
     parser.add_argument("crawled_directory_path",
                         help="The subtree of files below the given path will "
                         "be considered. Use '/' for everything.")
@@ -1357,7 +958,7 @@ def parse_args():
                         help="Create two files dry.yml to show"
                         "what would actually be committed without doing the synchronization.")
 
-    # TODO: load identifiables is a dirty implementation currently
+    # TODO: load identifiables currently is a very simple implementation
     parser.add_argument("-i", "--load-identifiables",
                         help="Load identifiables from the given yaml file.")
     parser.add_argument("-u", "--unique-names",
@@ -1403,7 +1004,6 @@ def main():
         crawled_directory_path=args.crawled_directory_path,
         cfood_file_name=args.cfood_file_name,
         identifiables_definition_file=args.load_identifiables,
-        debug=args.debug,
         provenance_file=args.provenance,
         dry_run=args.dry_run,
         prefix=args.prefix,
diff --git a/src/caoscrawler/debug/__init__.py b/src/caoscrawler/debug/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/caoscrawler/debug/debug_tree.py b/src/caoscrawler/debug/debug_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..825fac2cebafaad9992bbf465b0cd1a899262e2a
--- /dev/null
+++ b/src/caoscrawler/debug/debug_tree.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Alexander Schlemmer
+#               
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+A tree structure that is used during the scanning stage of the crawler
+to store information used for debugging.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import logging
+import os
+import sys
+import uuid
+import warnings
+import yaml
+
+from argparse import RawTextHelpFormatter
+from collections import defaultdict
+from copy import deepcopy
+from enum import Enum
+from importlib_resources import files
+from jsonschema import validate
+from typing import Any, Optional, Type, Union
+
+from ..structure_elements import StructureElement
+from ..converters import Converter
+
+
+from dataclasses import dataclass
+
+
+
+@dataclass
+class DebugTreeStructureElement:
+    path_segment: str  # a name
+    element: StructureElement
+    matching_converters: list[DebugTreeConverter]
+    nonmatching_converters: list[DebugTreeConverter]
+
+@dataclass
+class DebugTreeVariable:
+    key: str
+    value: Any
+    copied: bool
+    internal_id: int
+    
+
+@dataclass
+class DebugTreeConverter:
+    path_segment: str  # a name
+    converter: Converter
+    current_variables: list[DebugTreeVariable]
+    current_records: list[DebugTreeVariable]  # Here, value is always a CaosDB Entity
+    children: list[DebugTreeStructureElement]
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc2924dc41f6835c07b8a21914dfeb86919cdc96
--- /dev/null
+++ b/src/caoscrawler/scanner.py
@@ -0,0 +1,445 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+#               
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+This is the scanner, the original "_crawl" function from crawl.py.
+This is just the functionality, that extracts data from the file system.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import logging
+import os
+import sys
+import warnings
+import yaml
+
+from argparse import RawTextHelpFormatter
+from collections import defaultdict
+from copy import deepcopy
+from enum import Enum
+from importlib_resources import files
+from jsonschema import validate
+from typing import Any, Optional, Type, Union
+
+import caosdb as db
+
+from caosadvancedtools.cache import UpdateCache, Cache
+from caosadvancedtools.crawler import Crawler as OldCrawler
+from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
+                             merge_entities)
+from caosdb.common.datatype import is_reference
+
+from .converters import Converter, DirectoryConverter, ConverterValidationError
+
+from .macros import defmacro_constructor, macro_constructor
+from .stores import Store, GeneralStore, RecordStore
+from .structure_elements import StructureElement, Directory, NoneElement
+from .version import check_cfood_version
+
+from .debug.debug_tree import (DebugTreeStructureElement,
+                                    DebugTreeConverter,
+                                    DebugTreeVariable)
+
+logger = logging.getLogger(__name__)
+
+
+def scanner(items: list[StructureElement],
+            converters: list[Converter],
+            general_store: Optional[GeneralStore] = None,
+            record_store: Optional[RecordStore] = None,
+            structure_elements_path: Optional[list[str]] = None,
+            restricted_path: Optional[list[str]] = None,
+            crawled_data: Optional[list[db.Record]] = None):
+        """
+        Crawl a list of StructureElements and apply any matching converters.
+
+        items: structure_elements (e.g. files and folders on one level on the hierarchy)
+        local_converters: locally defined converters for
+                            treating structure elements. A locally defined converter could be
+                            one that is only valid for a specific subtree of the originally
+                            cralwed StructureElement structure.
+        general_store and record_store: This recursion of the crawl function should only operate on
+                                      copies of the global stores of the Crawler object.
+        restricted_path: optional, list of strings, traverse the data tree only along the given
+                         path. For example, when a directory contains files a, b and c and b is
+                         given in restricted_path, a and c will be ignroed by the crawler.
+                         When the end of the given path is reached, traverse the full tree as
+                         normal. The first element of the list provided by restricted_path should
+                         be the name of the StructureElement at this level, i.e. denoting the
+                         respective element in the items argument.
+        """
+        
+        # The path_found variable stores wether the path given by restricted_path was
+        # found in the data tree
+        path_found = False
+        if restricted_path is not None and len(restricted_path) == 0:
+            restricted_path = None
+
+        # This list stores the debug tree which is returned at the end:
+        tree_elements: list[DebugTreeStructureElement] = list()
+
+        if crawled_data is None:
+            crawled_data = []
+
+        if general_store is None:
+            general_store = GeneralStore()
+
+        if record_store is None:
+            record_store = RecordStore()
+
+        if structure_elements_path is None:
+            structure_elements_path = []
+
+        for element in items:
+            # Create a tree element for the current structure element:
+            tree_SE = DebugTreeStructureElement(
+                element.name, element, [], [])
+            tree_elements.append(tree_SE)
+            
+            for converter in converters:
+                general_store_copy = general_store.create_scoped_copy()
+                record_store_copy = record_store.create_scoped_copy()
+                
+                # Create a tree element for this converter and add it to the tree_SE later:
+                tree_C = DebugTreeConverter(converter.name, converter, [], [], [])
+                    
+
+                # type is something like "matches files", replace isinstance with "type_matches"
+                # match function tests regexp for example
+                if (converter.typecheck(element) and (
+                        restricted_path is None or element.name == restricted_path[0])
+                        and converter.match(element) is not None):
+                    path_found = True
+                    tree_SE.matching_converters.append(tree_C)
+
+                    # Create an entry for this matched structure element:
+                    general_store_copy[converter.name] = (
+                        os.path.join(*(structure_elements_path + [element.get_name()])))
+
+                    # extracts values from structure element and stores them in the
+                    # variable store
+                    converter.create_values(general_store_copy, element)
+
+                    keys_modified = converter.create_records(
+                        general_store_copy, record_store_copy, element)
+
+                    children = converter.create_children(general_store_copy, element)
+
+                    tree_C.current_variables = store_to_tree(general_store_copy)
+                    tree_C.current_records = store_to_tree(record_store_copy)
+
+                    _, tree_elements_children = scanner(children, converter.converters,
+                                                        general_store_copy, record_store_copy,
+                                                        structure_elements_path + [element.get_name()],
+                                                        restricted_path[1:] if restricted_path is not None else None,
+                                                   crawled_data)
+                    tree_C.children.extend(tree_elements_children)
+                else:
+                    tree_SE.nonmatching_converters.append(tree_C)
+
+        if restricted_path and not path_found:
+            raise RuntimeError("A 'restricted_path' argument was given that is not contained in "
+                               "the data tree")
+        # if the crawler is running out of scope, copy all records in
+        # the record_store, that were created in this scope
+        # to the general update container.
+        scoped_records = record_store.get_records_current_scope()
+        for record in scoped_records:
+            crawled_data.append(record)
+
+        # TODO: the scoped variables should be cleaned up as soon if the variables
+        #       are no longer in the current scope. This can be implemented as follows,
+        #       but this breaks the test "test_record_structure_generation", because
+        #       some debug info is also deleted. This implementation can be used as soon
+        #       as the remaining problems with the debug_tree are fixed.
+        # Delete the variables that are no longer needed:
+        # scoped_names = record_store.get_names_current_scope()
+        # for name in scoped_names:
+        #     del record_store[name]
+        #     del general_store[name]
+
+        return crawled_data, tree_elements
+
+    
+def load_definition(crawler_definition_path: str):
+    """
+    Load a cfood from a crawler definition defined by
+    crawler definition path and validate it using cfood-schema.yml.
+    """
+
+    # Load the cfood from a yaml file:
+    with open(crawler_definition_path, "r") as f:
+        crawler_definitions = list(yaml.safe_load_all(f))
+
+    crawler_definition = _load_definition_from_yaml_dict(crawler_definitions)
+    return _resolve_validator_paths(crawler_definition, crawler_definition_path)
+
+def _load_definition_from_yaml_dict(crawler_definitions: list[dict]):
+    """
+    Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
+    contains either one or two documents.
+
+    Doesn't resolve the validator paths in the cfood definition, so for
+    internal and testing use only.
+    """
+    if len(crawler_definitions) == 1:
+        # Simple case, just one document:
+        crawler_definition = crawler_definitions[0]
+        metadata = {}
+    elif len(crawler_definitions) == 2:
+        metadata = (crawler_definitions[0]["metadata"] if "metadata"
+                    in crawler_definitions[0] else {})
+        crawler_definition = crawler_definitions[1]
+    else:
+        raise RuntimeError(
+            "Crawler definition must not contain more than two documents.")
+
+    check_cfood_version(metadata)
+
+    # TODO: at this point this function can already load the cfood schema extensions
+    #       from the crawler definition and add them to the yaml schema that will be
+    #       tested in the next lines of code:
+
+    # Load the cfood schema:
+    with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
+        schema = yaml.safe_load(f)
+
+    # Add custom converters to converter enum in schema:
+    if "Converters" in crawler_definition:
+        for key in crawler_definition["Converters"]:
+            schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
+                key)
+    if len(crawler_definitions) == 2:
+        if "Converters" in metadata:
+            for key in metadata["Converters"]:
+                schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
+                    key)
+
+    # Validate the cfood schema:
+    validate(instance=crawler_definition, schema=schema["cfood"])
+
+    return crawler_definition
+
+def _resolve_validator_paths(definition: dict, definition_path: str):
+    """
+    Resolve path to validation files with respect to the file in which
+    the crawler was defined.
+    """
+
+    for key, value in definition.items():
+
+        if key == "validate" and isinstance(value, str):
+            # Validator is given by a path
+            if not value.startswith('/'):
+                # Not an absolute path
+                definition[key] = os.path.join(
+                    os.path.dirname(definition_path), value)
+                if not os.path.isfile(definition[key]):
+                    # TODO(henrik) capture this in `crawler_main` similar to
+                    # `ConverterValidationError`.
+                    raise FileNotFoundError(
+                        f"Couldn't find validation file {definition[key]}")
+        elif isinstance(value, dict):
+            # Recursively resolve all validators
+            definition[key] = _resolve_validator_paths(
+                value, definition_path)
+
+    return definition
+
+
+def create_converter_registry(definition: dict):
+    """
+    Currently the converter registry is a dictionary containing for each converter:
+    - key is the short code, abbreviation for the converter class name
+    - module is the name of the module to be imported which must be installed
+    - class is the converter class to load and associate with this converter entry
+
+    all other info for the converter needs to be included in the converter plugin
+    directory:
+    schema.yml file
+    README.md documentation
+
+    Returns
+    -------
+    The converter registry which is a dictonary containing
+    dictionaries defining the individual converters.
+    """
+
+    # Defaults for the converter registry:
+    with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
+        converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
+
+    # More converters from definition file:
+    if "Converters" in definition:
+        for key, entry in definition["Converters"].items():
+            if key in ["Dict", "DictTextElement", "DictIntegerElement", "DictBooleanElement",
+                       "DictDictElement", "DictListElement", "DictFloatElement"]:
+                warnings.warn(DeprecationWarning(f"{key} is deprecated. Please use the new"
+                                                 " variant; without 'Dict' prefix or "
+                                                 "'DictElement' in case of 'Dict'"))
+
+            converter_registry[key] = {
+                "converter": entry["converter"],
+                "package": entry["package"]
+            }
+
+    # Load modules and associate classes:
+    for key, value in converter_registry.items():
+        module = importlib.import_module(value["package"])
+        value["class"] = getattr(module, value["converter"])
+    return converter_registry
+
+def initialize_converters(crawler_definition: dict, converter_registry: dict):
+    """
+    takes the cfood as dict (`crawler_definition`) and creates the converter objects that
+    are defined on the highest level. Child Converters will in turn be created during the
+    initialization of the Converters.
+
+    Returns
+    -------
+    A list of converters.
+    """
+    converters = []
+
+    for key, value in crawler_definition.items():
+        # Definitions and Converters are reserved keywords
+        # on the top level of the yaml file.
+        # TODO: there should also be a top level keyword for the actual
+        #       CFood to avoid confusion between top level keywords
+        #       and the CFood.
+        if key == "Definitions":
+            continue
+        elif key == "Converters":
+            continue
+        converters.append(Converter.converter_factory(
+            value, key, converter_registry))
+
+    return converters
+
+def scan_structure_elements(
+        items: Union[list[StructureElement], StructureElement],
+        crawler_definition: dict,
+        converter_registry: dict,
+        restricted_path: Optional[list[str]] = None):
+    """
+    Start point of the crawler recursion.
+
+    Parameters
+    ----------
+    items: list
+         A list of structure elements (or a single StructureElement) that is used for
+         generating the initial items for the crawler. This could e.g. be a Directory.
+    crawler_definition : dict
+         A dictionary representing the crawler definition, possibly from a yaml
+         file.
+    converter_registry : dict
+         TODO: documentation missing
+    restricted_path: optional, list of strings
+         Traverse the data tree only along the given path. When the end of the given path
+         is reached, traverse the full tree as normal.
+
+    Returns
+    -------
+    The result of invoking `scanner`:
+    - A list of resulting objects of type db.Entity
+    - The debug tree
+    """
+
+    # This function builds the tree of converters out of the crawler definition.
+    if not isinstance(items, list):
+        items = [items]
+
+    
+    
+    return scanner(items,
+                   initialize_converters(crawler_definition, converter_registry),
+                   restricted_path=restricted_path)
+
+
+def scan_directory(dirname: str,
+                   crawler_definition_path: str,
+                   restricted_path: Optional[list[str]] = None):
+    """
+    Scan a single directory.
+
+    Convenience function that starts the crawler (calls start_crawling)
+    with a single directory as the StructureElement.
+
+    restricted_path: optional, list of strings
+            Traverse the data tree only along the given path. When the end of the given path
+            is reached, traverse the full tree as normal.
+
+    Returns
+    -------
+    The result of invoking `scanner`:
+    - A list of resulting objects of type db.Entity
+    - The debug tree
+    """
+
+    crawler_definition = load_definition(crawler_definition_path)
+    # Load and register converter packages:
+    converter_registry = create_converter_registry(crawler_definition)
+
+    if not dirname:
+        raise ValueError(
+            "You have to provide a non-empty path for crawling.")
+    dir_structure_name = os.path.basename(dirname)
+
+    if not dir_structure_name and dirname.endswith('/'):
+        if dirname == '/':
+            # Crawling the entire file system
+            dir_structure_name = "root"
+        else:
+            # dirname had a trailing '/'
+            dir_structure_name = os.path.basename(dirname[:-1])
+
+    return scan_structure_elements(
+        Directory(dir_structure_name, dirname),
+        crawler_definition,
+        converter_registry,
+        restricted_path)
+    
+
+# -------------------------
+# Utilities for debugging
+# -------------------------
+    
+def store_to_tree(store: Store):
+    """
+    Converts a (Record-/General-)Store to a simple structure
+    that can be used for debugging.
+    """
+    res: list[DebugTreeVariable] = list()
+    copied = store.get_dict_copied()
+    for name, value in store.get_storage().items():
+        res.append(
+            DebugTreeVariable(
+                name, value,
+                copied[name],
+                store.get_internal_id(name)))
+    return res
diff --git a/unittests/debug_tree_test.py b/unittests/debug_tree_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b7bb9dceedec4b8fd6a4aadb05d37f65844ab37
--- /dev/null
+++ b/unittests/debug_tree_test.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021-2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+Tests for the new debug tree feature of the crawler.
+"""
+
+from caoscrawler.stores import GeneralStore, RecordStore
+import os
+
+from caoscrawler.structure_elements import (File, Directory,
+                                            DictTextElement, DictListElement, DictElement)
+
+from caoscrawler.converters import SimpleFileConverter
+
+
+from functools import partial
+from copy import deepcopy
+from unittest.mock import patch
+
+
+from unittest.mock import MagicMock, Mock
+from os.path import join, dirname, basename
+import yaml
+import caosdb as db
+from caosdb.apiutils import compare_entities
+
+import pytest
+from pytest import raises
+
+from caoscrawler.scanner import scanner
+
+
+def test_scanner():
+    d = File("2023-02-07_ProjectName.txt", "2023-02-07_ProjectName.txt")
+    c = SimpleFileConverter({
+        "match": "^(?P<date>.*?)_(?P<identifier>.*?)$"
+        }, "ProjectFile", {})
+
+    crawled_data, debug_tree = scanner([d], [c])
+    print(yaml.dump(debug_tree))
+    assert False
+
+def test_scanner_directories():
+    d = Directory("2023-02-07_ProjectName", "/")