diff --git a/CHANGELOG.md b/CHANGELOG.md
index c498b9286e0977295066340a2a4172093ac10bfe..87e33c543e6126024504687e945bfb9bb41b4148 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,28 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.0] - 2023-03-28 ##
+(Florian Spreckelsen)
+
+### Changed ###
+
+- Refactoring of the crawl.py module: Now there is a separate scanner module handling the
+  collecting of information that is independent of CaosDB itself.
+- The signature of the function ``save_debug_data`` was changed to explicitely
+  take the ``debug_tree`` as its first argument. This change was necessary, as
+  the ``debug_tree`` is no longer saved as member field of the Crawler class.
+
+
+### Deprecated ###
+
+- The functions ``load_definition``, ``initialize_converters`` and
+  ``load_converters`` are deprecated. Please use the functions
+  ``load_definition``, ``initialize_converters`` and
+  ``create_converter_registry`` from the scanner module instead.
+- The function ``start_crawling`` is deprecated. The function
+  ``scan_structure_elements`` in the scanner module mostly covers its
+  functionality.
+
 ## [0.4.0] - 2023-03-22 ##
 (Florian Spreckelsen)
 
@@ -48,7 +70,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added ###
 
 - Identifiable class to represent the information used to identify Records.
-- Added some StructureElements: BooleanElement, FloatElement, IntegerElement, 
+- Added some StructureElements: BooleanElement, FloatElement, IntegerElement,
   ListElement, DictElement
 - String representation for Identifiables
 - [#43](https://gitlab.com/caosdb/caosdb-crawler/-/issues/43) the crawler
diff --git a/CITATION.cff b/CITATION.cff
index 9c8bf551c41a6a3447b076914741b349a8c72b9c..834f57db4521c983947ed4b960b2877c914b5bb2 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -20,6 +20,6 @@ authors:
     given-names: Stefan
     orcid: https://orcid.org/0000-0001-7214-8125
 title: CaosDB - Crawler
-version: 0.4.0
+version: 0.5.0
 doi: 10.3390/data4020083
-date-released: 2023-03-22
\ No newline at end of file
+date-released: 2023-03-28
\ No newline at end of file
diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py
index 0c847b08a729f3b112cbdf3c38bac31309cda125..b33974d9c2c5600bf2a91cbf14d7c8799ffc2644 100755
--- a/integrationtests/basic_example/test_basic.py
+++ b/integrationtests/basic_example/test_basic.py
@@ -27,6 +27,7 @@ an integration test module that does basic integration tests
 """
 
 from caosadvancedtools.crawler import Crawler as OldCrawler
+from caoscrawler.debug_tree import DebugTree
 import os
 from caosdb import EmptyUniqueQueryError
 import argparse
@@ -36,6 +37,7 @@ from caoscrawler import Crawler, SecurityMode
 from caoscrawler.identifiable import Identifiable
 import caosdb as db
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.scanner import scan_directory
 import pytest
 from caosadvancedtools.models.parser import parse_model_from_yaml
 import yaml
@@ -82,42 +84,46 @@ def ident():
     return ident
 
 
-def crawl_standard_test_directory(cr: Crawler,
-                                  subdir: str = "examples_article",
-                                  cfood: str = "scifolder_cfood.yml"):
-    cr.crawl_directory(rfp("..", "..", "unittests", "test_directories", subdir),
-                       rfp("..", "..", "unittests", cfood))
+def crawl_standard_test_directory(subdir: str = "examples_article",
+                                  cfood: str = "scifolder_cfood.yml",
+                                  debug_tree=None):
+    return scan_directory(rfp("..", "..", "unittests", "test_directories", subdir),
+                          rfp("..", "..", "unittests", cfood),
+                          debug_tree=debug_tree)
 
 
 @pytest.fixture
 def crawler(ident):
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr)
-    return cr
+    cr = Crawler(identifiableAdapter=ident)
+    debug_tree = DebugTree()
+    crawled_data = crawl_standard_test_directory(debug_tree=debug_tree)
+    return cr, crawled_data, debug_tree
 
 
 @pytest.fixture
 def crawler_extended(ident):
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, cfood="scifolder_extended.yml")
+    cr = Crawler(identifiableAdapter=ident)
+    debug_tree = DebugTree()
+    crawled_data = crawl_standard_test_directory(
+        cfood="scifolder_extended.yml", debug_tree=debug_tree)
     # correct paths for current working directory
-    file_list = [r for r in cr.crawled_data if r.role == "File"]
+    file_list = [r for r in crawled_data if r.role == "File"]
     for f in file_list:
         f.file = rfp("..", "..", "unittests", "test_directories", f.file)
-    return cr
+    return cr, crawled_data, debug_tree
 
 
 def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True)
     with pytest.raises(RuntimeError, match=".*unambigiously.*"):
-        print(crawler.identifiableAdapter.retrieve_identified_record_for_identifiable(
+        print(crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable(
             Identifiable(properties={'project': proj.id})))
 
 
 def test_single_insertion(clear_database, usemodel, crawler, ident):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     # This test also generates the file records.xml used in some of the unittesets:
     res = db.execute_query("FIND Record")
@@ -138,94 +144,93 @@ def test_single_insertion(clear_database, usemodel, crawler, ident):
     assert len(ups) == 0
 
     # Do a second run on the same data, there should be no changes:
-    crawler = Crawler(debug=True, identifiableAdapter=ident)
-    crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"),
-                            rfp("../../unittests/scifolder_cfood.yml"))
-    ins, ups = crawler.synchronize()
+    crawler = Crawler(identifiableAdapter=ident)
+    crawled_data = scan_directory(rfp("../../unittests/test_directories", "examples_article"),
+                                  rfp("../../unittests/scifolder_cfood.yml"))
+    ins, ups = crawler.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 0
 
 
 def test_multiple_insertions(clear_database, usemodel, ident, crawler):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     # Do a second run on the same data, there should be no changes:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr)
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory()
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 0
 
 
 def test_insertion(clear_database, usemodel, ident, crawler):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     # Do a second run on the same data, there should a new insert:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, "example_insert")
-    assert len(cr.crawled_data) == 3
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory("example_insert")
+    assert len(crawled_data) == 3
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 1
     assert len(ups) == 0
 
     # Do it again to check whether nothing is changed:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, "example_insert")
-    assert len(cr.crawled_data) == 3
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory("example_insert")
+    assert len(crawled_data) == 3
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 0
 
 
 def test_insert_auth(clear_database, usemodel, ident, crawler):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     # Do a second run on the same data, there should a new insert:
-    cr = Crawler(debug=True, identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE)
-    crawl_standard_test_directory(cr, "example_insert")
-    assert len(cr.crawled_data) == 3
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE)
+    crawled_data = crawl_standard_test_directory("example_insert")
+    assert len(crawled_data) == 3
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 1
     assert not ins[0].is_valid()
     nins, nups = OldCrawler.update_authorized_changes(cr.run_id)
     assert nins == 1
 
     # Do it again to check whether nothing is changed:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, "example_insert")
-    assert len(cr.crawled_data) == 3
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory("example_insert")
+    assert len(crawled_data) == 3
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 0
 
 
 def test_insertion_and_update(clear_database, usemodel, ident, crawler):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, "example_insert")
-    ins, ups = cr.synchronize()
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory("example_insert")
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
 
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, "example_overwrite_1")
-    # print(cr.crawled_data)
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory("example_overwrite_1")
     # cr.save_debug_data(rfp("provenance.yml"))
-    assert len(cr.crawled_data) == 3
-    ins, ups = cr.synchronize()
+    assert len(crawled_data) == 3
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 1
 
 
 def test_identifiable_update(clear_database, usemodel, ident, crawler):
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
 
     # Do a second run on the same data with a change in one
     # of the identifiables:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr)
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory()
 
     # Test the addition of a single property:
-    l = cr.crawled_data
+    l = crawled_data
     for record in l:
         if (record.parents[0].name == "Measurement" and
                 record.get_property("date").value == "2020-01-03"):
@@ -234,28 +239,28 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
                 name="email", value="testperson@testaccount.test")
             print("one change")
             break
-    ins, ups = cr.synchronize()
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 1
 
     # Test the change within one property:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr)
-    l = cr.crawled_data
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory()
+    l = crawled_data
     for record in l:
         if (record.parents[0].name == "Measurement" and
                 record.get_property("date").value == "2020-01-03"):
             record.add_property(name="email", value="testperson@coolmail.test")
             print("one change")
             break
-    ins, ups = cr.synchronize()
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 0
     assert len(ups) == 1
 
     # Changing the date should result in a new insertion:
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr)
-    l = cr.crawled_data
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory()
+    l = crawled_data
     for record in l:
         if (record.parents[0].name == "Measurement" and
                 record.get_property("date").value == "2020-01-03"):
@@ -263,30 +268,31 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
             record.get_property("date").value = "2012-01-02"
             print("one change")
             break
-    ins, ups = cr.synchronize()
+    ins, ups = cr.synchronize(crawled_data=crawled_data)
     assert len(ins) == 1
     assert len(ups) == 0
 
 
 def test_file_insertion_dry(clear_database, usemodel, ident):
-    crawler_extended = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(
-        crawler_extended, cfood="scifolder_extended.yml")
-    file_list = [r for r in crawler_extended.crawled_data if r.role == "File"]
+    crawler_extended = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory(
+        cfood="scifolder_extended.yml")
+    file_list = [r for r in crawled_data if r.role == "File"]
     assert len(file_list) == 11
 
     for f in file_list:
         assert f.path.endswith("README.md")
         assert f.path[1:] == f.file
 
-    ins, ups = crawler_extended.synchronize(commit_changes=False)
+    ins, ups = crawler_extended.synchronize(crawled_data=crawled_data, commit_changes=False)
     assert len(ups) == 0
     file_list_ins = [r for r in ins if r.role == "File"]
     assert len(file_list_ins) == 11
 
 
 def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
-    ins, ups = crawler_extended.synchronize(commit_changes=True)
+    ins, ups = crawler_extended[0].synchronize(
+        crawled_data=crawler_extended[1], commit_changes=True)
     file_list_ins = [r for r in ins if r.role == "File"]
     assert len(file_list_ins) == 11
 
@@ -302,16 +308,17 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
 
 
 def test_file_update(clear_database, usemodel, ident, crawler_extended):
-    ins1, ups1 = crawler_extended.synchronize(commit_changes=True)
+    ins1, ups1 = crawler_extended[0].synchronize(
+        crawled_data=crawler_extended[1], commit_changes=True)
     file_list_ins = [r for r in ins1 if r.role == "File"]
 
-    cr = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr, cfood="scifolder_extended.yml")
+    cr = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory(cfood="scifolder_extended.yml")
 
-    file_list = [r for r in cr.crawled_data if r.role == "File"]
+    file_list = [r for r in crawled_data if r.role == "File"]
     for f in file_list:
         f.file = rfp("..", "..", "unittests", "test_directories", f.file)
-    ins2, ups2 = cr.synchronize(commit_changes=True)
+    ins2, ups2 = cr.synchronize(crawled_data=crawled_data, commit_changes=True)
     assert len(ups1) == 0
     assert len(ups2) == 0
 
@@ -320,13 +327,13 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
     assert len(res) == 11
     assert len(res[0].parents) == 0
 
-    cr2 = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml")
+    cr2 = Crawler(identifiableAdapter=ident)
+    crawled_data = crawl_standard_test_directory(cfood="scifolder_extended2.yml")
 
-    file_list = [r for r in cr2.crawled_data if r.role == "File"]
+    file_list = [r for r in crawled_data if r.role == "File"]
     for f in file_list:
         f.file = rfp("..", "..", "unittests", "test_directories", f.file)
-    ins3, ups3 = cr2.synchronize(commit_changes=True)
+    ins3, ups3 = cr2.synchronize(crawled_data=crawled_data, commit_changes=True)
     assert len(ups3) == 11
 
     res = db.execute_query("Find File")
diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py
index 527b4c0cf67f483d5b61972a0104ff4fb673402d..08e254daf4052670fcec18760626c460604efe15 100644
--- a/integrationtests/test_issues.py
+++ b/integrationtests/test_issues.py
@@ -24,6 +24,8 @@ from caoscrawler.crawl import Crawler
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import DictElement
 
+from caoscrawler.scanner import create_converter_registry, scan_structure_elements
+
 from caosdb.utils.register_tests import clear_database, set_test_key
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
@@ -86,8 +88,8 @@ def test_issue_23(clear_database):
     ident.register_identifiable("TestType", db.RecordType().add_parent(
         name="TestType").add_property(name="identifying_prop"))
 
-    crawler = Crawler(debug=True, identifiableAdapter=ident)
-    converter_registry = crawler.load_converters(crawler_definition)
+    crawler = Crawler(identifiableAdapter=ident)
+    converter_registry = create_converter_registry(crawler_definition)
 
     # the dictionary to be crawled...
     test_dict = {
@@ -95,7 +97,8 @@ def test_issue_23(clear_database):
         "prop_b": "something_else"
     }
 
-    records = crawler.start_crawling(
+    crawler.generate_run_id()
+    records = scan_structure_elements(
         DictElement("TestDict", test_dict), crawler_definition, converter_registry)
 
     assert len(records) == 1
@@ -109,7 +112,7 @@ def test_issue_23(clear_database):
     assert rec_crawled.get_property("prop_a") is None
 
     # synchronize with database and update the record
-    ins, ups = crawler.synchronize()
+    ins, ups = crawler.synchronize(crawled_data=records)
     assert len(ins) == 0
     assert len(ups) == 1
 
diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py
index cb5ed2c769945af033bc56a2d6af3bf1cec86de4..45873ddeb8b4f4a23fbcbc9225cbeea60b213cc4 100644
--- a/integrationtests/test_realworld_example.py
+++ b/integrationtests/test_realworld_example.py
@@ -38,6 +38,8 @@ import pytest
 from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
 from caosadvancedtools.loadFiles import loadpath
 
+from caoscrawler.scanner import load_definition, scan_structure_elements, create_converter_registry
+
 import sys
 
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
@@ -103,12 +105,11 @@ def test_dataset(clear_database, usemodel, addfiles, caplog):
     identifiable_path = os.path.join(DATADIR, "identifiables.yml")
     crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
     crawler_main(
-        os.path.join(DATADIR, 'data'),
-        crawler_definition_path,
-        identifiable_path,
-        True,
-        os.path.join(DATADIR, "provenance.yml"),
-        False,
+        crawled_directory_path=os.path.join(DATADIR, 'data'),
+        cfood_file_name=crawler_definition_path,
+        identifiables_definition_file=identifiable_path,
+        provenance_file=os.path.join(DATADIR, "provenance.yml"),
+        dry_run=False,
         remove_prefix=DATADIR,
         # this test will fail without this prefix since the crawler would try to create new files
         add_prefix="/extroot/realworld_example"
@@ -143,12 +144,11 @@ def test_event_update(clear_database, usemodel, addfiles):
     crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
 
     crawler_main(
-        os.path.join(DATADIR, 'data'),
-        crawler_definition_path,
-        identifiable_path,
-        True,
-        os.path.join(DATADIR, "provenance.yml"),
-        False,
+        crawled_directory_path=os.path.join(DATADIR, 'data'),
+        cfood_file_name=crawler_definition_path,
+        identifiables_definition_file=identifiable_path,
+        provenance_file=os.path.join(DATADIR, "provenance.yml"),
+        dry_run=False,
         remove_prefix=DATADIR,
         # this test will fail without this prefix since the crawler would try to create new files
         add_prefix="/extroot/realworld_example"
@@ -169,10 +169,11 @@ def test_event_update(clear_database, usemodel, addfiles):
     ident.load_from_yaml_definition(identifiable_path)
 
     second_crawler = Crawler(identifiableAdapter=ident)
-    crawler_definition = second_crawler.load_definition(
+    second_crawler.generate_run_id()
+    crawler_definition = load_definition(
         crawler_definition_path)
-    converter_registry = second_crawler.load_converters(crawler_definition)
-    records = second_crawler.start_crawling(
+    converter_registry = create_converter_registry(crawler_definition)
+    records = scan_structure_elements(
         Directory("data", os.path.join(DATADIR, "data")),
         crawler_definition,
         converter_registry
@@ -190,7 +191,7 @@ def test_event_update(clear_database, usemodel, addfiles):
                 "latitude").value = 0.0
             rec.get_property("Event").value[0].get_property(
                 "location").value = "Origin"
-    second_crawler.synchronize()
+    second_crawler.synchronize(crawled_data=records)
 
     # Dataset is still the same Record, but with an updated event
     new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve()
diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py
index 5fc0f6c7d85a0fce4490c72952e711fe241a0099..0f48677d4bf64158374a0eb0865eb2b85ea715db 100644
--- a/integrationtests/test_use_case_simple_presentation.py
+++ b/integrationtests/test_use_case_simple_presentation.py
@@ -57,22 +57,24 @@ def test_complete_crawler(clear_database):
 
     # test that a bad value for "remove_prefix" leads to runtime error
     with pytest.raises(RuntimeError) as re:
-        crawler_main(DATADIR,
-                     os.path.join(DATADIR, "cfood.yml"),
-                     os.path.join(DATADIR, "identifiables.yml"),
-                     True,
-                     os.path.join(DATADIR, "provenance.yml"),
-                     False,
-                     remove_prefix="sldkfjsldf")
+        crawler_main(
+            crawled_directory_path=os.path.join(DATADIR),
+            cfood_file_name=os.path.join(DATADIR, "cfood.yml"),
+            identifiables_definition_file=os.path.join(DATADIR, "identifiables.yml"),
+            provenance_file=os.path.join(DATADIR, "provenance.yml"),
+            dry_run=False,
+            remove_prefix="sldkfjsldf",
+        )
     assert "path does not start with the prefix" in str(re.value)
 
-    crawler_main(DATADIR,
-                 os.path.join(DATADIR, "cfood.yml"),
-                 os.path.join(DATADIR, "identifiables.yml"),
-                 True,
-                 os.path.join(DATADIR, "provenance.yml"),
-                 False,
-                 remove_prefix=os.path.abspath(DATADIR))
+    crawler_main(
+        crawled_directory_path=os.path.join(DATADIR),
+        cfood_file_name=os.path.join(DATADIR, "cfood.yml"),
+        identifiables_definition_file=os.path.join(DATADIR, "identifiables.yml"),
+        provenance_file=os.path.join(DATADIR, "provenance.yml"),
+        dry_run=False,
+        remove_prefix=os.path.abspath(DATADIR),
+    )
 
     res = db.execute_query("FIND Record Experiment")
     assert len(res) == 1
diff --git a/setup.cfg b/setup.cfg
index fbdd9d7119312e2831c77fe3e8b24bd16b5826b4..0323d979854656d33e29cd760113fcb259a77f6e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = caoscrawler
-version = 0.4.0
+version = 0.5.0
 author = Alexander Schlemmer
 author_email = alexander.schlemmer@ds.mpg.de
 description = A new crawler for caosdb
@@ -21,7 +21,7 @@ python_requires = >=3.7
 install_requires =
 	importlib-resources
 	caosdb >= 0.11.0
-	caosadvancedtools >= 0.6.0
+	caosadvancedtools >= 0.7.0
     yaml-header-tools >= 0.2.1
     pyyaml
     odfpy #make optional
diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py
index 044d8f0bf53c4c80dab9b492919fa64ab321a60d..05bad0b54d9098c0b7f165d8295a0faa2966fa32 100644
--- a/src/caoscrawler/__init__.py
+++ b/src/caoscrawler/__init__.py
@@ -1,2 +1,4 @@
 from .crawl import Crawler, SecurityMode
-from .version import CfoodRequiredVersionError, version as __version__
+from .version import CfoodRequiredVersionError, get_caoscrawler_version
+
+__version__ = get_caoscrawler_version()
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index c77dcee1f29eac69732ce353e0271761eca2df13..bacc5356b7b14f43d44db25c461c717fa9c39bc9 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 # encoding: utf-8
 #
-# ** header v3.0
 # This file is a part of the CaosDB Project.
 #
-# Copyright (C) 2021 Henrik tom Wörden
-#               2021 Alexander Schlemmer
+# Copyright (C) 2021      Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#               2021-2023 Research Group Biomedical Physics,
+# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+# Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -67,6 +68,13 @@ from .stores import GeneralStore, RecordStore
 from .structure_elements import StructureElement, Directory, NoneElement
 from .version import check_cfood_version
 
+from .scanner import (scan_directory,
+                      load_definition,
+                      create_converter_registry,
+                      initialize_converters,
+                      scan_structure_elements)
+from .debug_tree import DebugTree
+
 logger = logging.getLogger(__name__)
 
 SPECIAL_PROPERTIES_STRICT = ("description", "name", "id", "path")
@@ -176,26 +184,19 @@ class Crawler(object):
 
     def __init__(self,
                  generalStore: Optional[GeneralStore] = None,
-                 debug: bool = False,
-                 identifiableAdapter: IdentifiableAdapter = None,
-                 securityMode: SecurityMode = SecurityMode.UPDATE
-                 ):
+                 debug: Optional[bool] = None,
+                 identifiableAdapter: Optional[IdentifiableAdapter] = None,
+                 securityMode: SecurityMode = SecurityMode.UPDATE):
         """
         Create a new crawler and initialize an empty RecordStore and GeneralStore.
 
+        Deprecated arguments:
+        - The debug argument does not have an effect anymore.
+        - generalStore: This argument does not have an effect anymore. It might be added to the scanning
+                        functions in the scanner module in the future, if needed.
+
         Parameters
         ----------
-        recordStore : GeneralStore
-             An initial GeneralStore which might store e.g. environment variables.
-        debug : bool
-             Create a debugging information tree when set to True.
-             The debugging information tree is a variable stored in
-             self.debug_tree. It is a dictionary mapping directory entries
-             to a tuple of general stores and record stores which are valid for
-             the directory scope.
-             Furthermore, it is stored in a second tree named self.debug_copied whether the
-             objects in debug_tree had been copied from a higher level in the hierarchy
-             of the structureelements.
         identifiableAdapter : IdentifiableAdapter
              TODO describe
         securityMode : int
@@ -203,278 +204,93 @@ class Crawler(object):
              Please use SecurityMode Enum
         """
 
+        # Remove this once the property `crawled_data` is no longer needed for compatibility
+        # reasons
+        self._crawled_data = None
+
         # The following caches store records, where we checked whether they exist on the remote
         # server. Since, it is important to know whether they exist or not, we store them into two
         # different caches.
         self.remote_existing_cache = IdentifiedCache()
         self.remote_missing_cache = IdentifiedCache()
-        self.recordStore = RecordStore()
         self.securityMode = securityMode
 
-        self.generalStore = generalStore
-        if generalStore is None:
-            self.generalStore = GeneralStore()
-
         self.identifiableAdapter: IdentifiableAdapter = LocalStorageIdentifiableAdapter()
         if identifiableAdapter is not None:
             self.identifiableAdapter = identifiableAdapter
-        # If a directory is crawled this may hold the path to that directory
-        self.crawled_directory: Optional[str] = None
-        self.debug = debug
-        if self.debug:
-            # order in the tuple:
-            # 0: generalStore
-            # 1: recordStore
-            self.debug_tree: dict[str, tuple] = dict()
-            self.debug_metadata: dict[str, dict] = dict()
-            self.debug_metadata["copied"] = dict()
-            self.debug_metadata["provenance"] = defaultdict(lambda: dict())
-            self.debug_metadata["usage"] = defaultdict(lambda: set())
 
-    def load_definition(self, crawler_definition_path: str):
-        """
-        Load a cfood from a crawler definition defined by
-        crawler definition path and validate it using cfood-schema.yml.
-        """
+        if debug is not None:
+            warnings.warn(DeprecationWarning(
+                "The debug argument of the Crawler class is deprecated and has no effect."))
 
-        # Load the cfood from a yaml file:
-        with open(crawler_definition_path, "r") as f:
-            crawler_definitions = list(yaml.safe_load_all(f))
-
-        crawler_definition = self._load_definition_from_yaml_dict(
-            crawler_definitions)
-
-        return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
-
-    def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
-        """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
-        contains either one or two documents.
-
-        Doesn't resolve the validator paths in the cfood definition, so for
-        internal and testing use only.
-
-        """
-        if len(crawler_definitions) == 1:
-            # Simple case, just one document:
-            crawler_definition = crawler_definitions[0]
-            metadata = {}
-        elif len(crawler_definitions) == 2:
-            metadata = crawler_definitions[0]["metadata"] if "metadata" in crawler_definitions[0] else {
-            }
-            crawler_definition = crawler_definitions[1]
-        else:
-            raise RuntimeError(
-                "Crawler definition must not contain more than two documents.")
-
-        check_cfood_version(metadata)
-
-        # TODO: at this point this function can already load the cfood schema extensions
-        #       from the crawler definition and add them to the yaml schema that will be
-        #       tested in the next lines of code:
-
-        # Load the cfood schema:
-        with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
-            schema = yaml.safe_load(f)
-
-        # Add custom converters to converter enum in schema:
-        if "Converters" in crawler_definition:
-            for key in crawler_definition["Converters"]:
-                schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
-                    key)
-        if len(crawler_definitions) == 2:
-            if "Converters" in metadata:
-                for key in metadata["Converters"]:
-                    schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
-                        key)
-
-        # Validate the cfood schema:
-        validate(instance=crawler_definition, schema=schema["cfood"])
-
-        return crawler_definition
-
-    def _resolve_validator_paths(self, definition: dict, definition_path: str):
-        """Resolve path to validation files with respect to the file in which
-        the crawler was defined.
-
-        """
-
-        for key, value in definition.items():
-
-            if key == "validate" and isinstance(value, str):
-                # Validator is given by a path
-                if not value.startswith('/'):
-                    # Not an absolute path
-                    definition[key] = os.path.join(os.path.dirname(definition_path), value)
-                    if not os.path.isfile(definition[key]):
-                        # TODO(henrik) capture this in `crawler_main` similar to
-                        # `ConverterValidationError`.
-                        raise FileNotFoundError(
-                            f"Couldn't find validation file {definition[key]}")
-            elif isinstance(value, dict):
-                # Recursively resolve all validators
-                definition[key] = self._resolve_validator_paths(value, definition_path)
-
-        return definition
+        if generalStore is not None:
+            warnings.warn(DeprecationWarning(
+                "The generalStore argument of the Crawler class is deprecated and has no effect."))
 
     def load_converters(self, definition: dict):
-        """
-        Currently the converter registry is a dictionary containing for each converter:
-        - key is the short code, abbreviation for the converter class name
-        - module is the name of the module to be imported which must be installed
-        - class is the converter class to load and associate with this converter entry
-
-        all other info for the converter needs to be included in the converter plugin
-        directory:
-        schema.yml file
-        README.md documentation
-
-        TODO: this function does not make use of self, so it could become static.
-        """
-
-        # Defaults for the converter registry:
-        with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
-            converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
-
-        # More converters from definition file:
-        if "Converters" in definition:
-            for key, entry in definition["Converters"].items():
-                if key in ["Dict", "DictTextElement", "DictIntegerElement", "DictBooleanElement",
-                           "DictDictElement", "DictListElement", "DictFloatElement"]:
-                    warnings.warn(DeprecationWarning(f"{key} is deprecated. Please use the new"
-                                                     " variant; without 'Dict' prefix or "
-                                                     "'DictElement' in case of 'Dict'"))
-
-                converter_registry[key] = {
-                    "converter": entry["converter"],
-                    "package": entry["package"]
-                }
-
-        # Load modules and associate classes:
-        for key, value in converter_registry.items():
-            module = importlib.import_module(value["package"])
-            value["class"] = getattr(module, value["converter"])
-        return converter_registry
-
-    def crawl_directory(self, dirname: str, crawler_definition_path: str,
-                        restricted_path: Optional[list[str]] = None):
-        """ Crawl a single directory.
+        warnings.warn(DeprecationWarning(
+            "The function load_converters in the crawl module is deprecated. "
+            "Please use create_converter_registry from the scanner module."))
+        return create_converter_registry(definition)
 
-        Convenience function that starts the crawler (calls start_crawling)
-        with a single directory as the StructureElement.
-
-        restricted_path: optional, list of strings
-                Traverse the data tree only along the given path. When the end of the given path
-                is reached, traverse the full tree as normal.
-        """
-
-        crawler_definition = self.load_definition(crawler_definition_path)
-        # Load and register converter packages:
-        converter_registry = self.load_converters(crawler_definition)
-
-        if not dirname:
-            raise ValueError(
-                "You have to provide a non-empty path for crawling.")
-        dir_structure_name = os.path.basename(dirname)
-        self.crawled_directory = dirname
-        if not dir_structure_name and dirname.endswith('/'):
-            if dirname == '/':
-                # Crawling the entire file system
-                dir_structure_name = "root"
-            else:
-                # dirname had a trailing '/'
-                dir_structure_name = os.path.basename(dirname[:-1])
-
-        self.start_crawling(Directory(dir_structure_name,
-                                      dirname),
-                            crawler_definition,
-                            converter_registry,
-                            restricted_path=restricted_path
-                            )
-
-    @staticmethod
-    def initialize_converters(crawler_definition: dict, converter_registry: dict):
-        """
-        takes the cfood as dict (`crawler_definition`) and creates the converter objects that
-        are defined on the highest level. Child Converters will in turn be created during the
-        initialization of the Converters.
-        """
-        converters = []
-
-        for key, value in crawler_definition.items():
-            # Definitions and Converters are reserved keywords
-            # on the top level of the yaml file.
-            # TODO: there should also be a top level keyword for the actual
-            #       CFood to avoid confusion between top level keywords
-            #       and the CFood.
-            if key == "Definitions":
-                continue
-            elif key == "Converters":
-                continue
-            converters.append(Converter.converter_factory(
-                value, key, converter_registry))
-
-        return converters
+    def load_definition(self, crawler_definition_path: str):
+        warnings.warn(DeprecationWarning(
+            "The function load_definition in the crawl module is deprecated. "
+            "Please use load_definition from the scanner module."))
+        return load_definition(crawler_definition_path)
+
+    def initialize_converters(self, crawler_definition: dict, converter_registry: dict):
+        warnings.warn(DeprecationWarning(
+            "The function initialize_converters in the crawl module is deprecated. "
+            "Please use initialize_converters from the scanner module."))
+        return initialize_converters(crawler_definition, converter_registry)
+
+    def generate_run_id(self):
+        self.run_id = uuid.uuid1()
 
     def start_crawling(self, items: Union[list[StructureElement], StructureElement],
                        crawler_definition: dict,
                        converter_registry: dict,
                        restricted_path: Optional[list[str]] = None):
-        """
-        Start point of the crawler recursion.
 
-        Parameters
-        ----------
-        items: list
-             A list of structure elements (or a single StructureElement) that is used for
-             generating the initial items for the crawler. This could e.g. be a Directory.
-        crawler_definition : dict
-             A dictionary representing the crawler definition, possibly from a yaml
-             file.
-        restricted_path: optional, list of strings
-             Traverse the data tree only along the given path. When the end of the given path
-             is reached, traverse the full tree as normal.
-
-        Returns
-        -------
-        crawled_data : list
-            the final list with the target state of Records.
-        """
-
-        # This function builds the tree of converters out of the crawler definition.
-
-        if self.generalStore is None:
-            raise RuntimeError("Should not happen.")
-
-        if not isinstance(items, list):
-            items = [items]
-
-        self.run_id = uuid.uuid1()
-        local_converters = Crawler.initialize_converters(crawler_definition, converter_registry)
-
-        # This recursive crawling procedure generates the update list:
-        self.crawled_data: list[db.Record] = []
-        self._crawl(
-            items=items,
-            local_converters=local_converters,
-            generalStore=self.generalStore,
-            recordStore=self.recordStore,
-            structure_elements_path=[],
-            converters_path=[],
-            restricted_path=restricted_path)
-        if self.debug:
-            self.debug_converters = local_converters
-
-        return self.crawled_data
-
-    def synchronize(self, commit_changes: bool = True, unique_names=True):
+        warnings.warn(DeprecationWarning(
+            "The function start_crawling in the crawl module is deprecated. "
+            "Please use scan_structure_elements from the scanner module."))
+
+        data = scan_structure_elements(
+            items, crawler_definition, converter_registry, restricted_path)
+        self.crawled_data = data
+        return data
+
+    @property
+    def crawled_data(self):
+        warnings.warn(DeprecationWarning(
+            "The use of self.crawled_data is depricated. You should not access this variable. "
+            "Instead, create the data with the scanner and then pass it as argument to Crawler "
+            "functions"))
+        return self._crawled_data
+
+    @crawled_data.setter
+    def crawled_data(self, arg):
+        self._crawled_data = arg
+
+    def crawl_directory(self,
+                        crawled_directory: str,
+                        crawler_definition_path: str,
+                        restricted_path: Optional[list[str]] = None):
         """
-        Carry out the actual synchronization.
+        The new main function to run the crawler on a directory.
         """
 
-        # After the crawling, the actual synchronization with the database, based on the
-        # update list is carried out:
+        warnings.warn(DeprecationWarning(
+            "The function crawl_directory in the crawl module is deprecated. "
+            "Please use scan_directory from the scanner module."))
 
-        return self._synchronize(self.crawled_data, commit_changes, unique_names=unique_names)
+        data = scan_directory(crawled_directory,
+                              crawler_definition_path,
+                              restricted_path)
+        self.crawled_data = data
+        return data
 
     def _has_reference_value_without_id(self, ident: Identifiable) -> bool:
         """
@@ -712,7 +528,7 @@ class Crawler(object):
         # TODO: can the following be removed at some point
         for ent in flat:
             if ent.role == "Record" and len(ent.parents) == 0:
-                raise RuntimeError("Records must have a parent.")
+                raise RuntimeError(f"Records must have a parent.\n{ent}")
 
         resolved_references = True
         # flat contains Entities which could not yet be checked against the remote server
@@ -947,7 +763,8 @@ class Crawler(object):
         return db.Entity(id=id).retrieve()
 
     @staticmethod
-    def execute_inserts_in_list(to_be_inserted, securityMode, run_id: uuid.UUID = None,
+    def execute_inserts_in_list(to_be_inserted, securityMode,
+                                run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
         for record in to_be_inserted:
             for prop in record.properties:
@@ -975,7 +792,8 @@ class Crawler(object):
                     _resolve_datatype(prop, entity)
 
     @staticmethod
-    def execute_updates_in_list(to_be_updated, securityMode, run_id: uuid.UUID = None,
+    def execute_updates_in_list(to_be_updated, securityMode,
+                                run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
         Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated)
         logger.debug("UPDATE")
@@ -987,8 +805,11 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_updated, run_id)
 
-    def _synchronize(self, crawled_data: list[db.Record], commit_changes: bool = True,
-                     unique_names=True):
+    def synchronize(self,
+                    commit_changes: bool = True,
+                    unique_names: bool = True,
+                    crawled_data: Optional[list[db.Record]] = None,
+                    ):
         """
         This function applies several stages:
         1) Retrieve identifiables for all records in crawled_data.
@@ -1003,6 +824,13 @@ class Crawler(object):
 
         Return the final to_be_inserted and to_be_updated as tuple.
         """
+        if crawled_data is None:
+            warnings.warn(DeprecationWarning(
+                "Calling synchronize without the data to be synchronized is depricated. Please "
+                "use for example the Scanner to create this data."))
+            crawled_data = self.crawled_data
+
+        self.generate_run_id()
 
         to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
         referencing_entities = self.create_reference_mapping(to_be_updated + to_be_inserted)
@@ -1023,7 +851,7 @@ class Crawler(object):
         to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records)
 
         logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
-                    f"{len(to_be_inserted)} Entities.")
+                    f"{len(to_be_updated)} Entities.")
         if commit_changes:
             self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
                                                 run_id=self.run_id, unique_names=unique_names)
@@ -1041,12 +869,14 @@ class Crawler(object):
         pending_inserts = update_cache.get_inserts(self.run_id)
         if pending_inserts:
             Crawler.inform_about_pending_changes(
-                pending_inserts, self.run_id, self.crawled_directory)
+                # TODO crawled_directory is no longer available
+                pending_inserts, self.run_id, "missing crawled_directory")
 
         pending_updates = update_cache.get_updates(self.run_id)
         if pending_updates:
             Crawler.inform_about_pending_changes(
-                pending_updates, self.run_id, self.crawled_directory)
+                # TODO crawled_directory is no longer available
+                pending_updates, self.run_id, "missing crawled_directory")
 
         return (to_be_inserted, to_be_updated)
 
@@ -1110,11 +940,15 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
             res[converter.name]["subtree"][k[0]] = d[k[0]]
         return res
 
-    def save_debug_data(self, filename: str):
+    def save_debug_data(self, filename: str, debug_tree: DebugTree = None):
+        """
+        Save the information contained in a debug_tree to a file named filename.
+        """
+
         paths: dict[str, Union[dict, list]] = dict()
 
         def flatten_debug_info(key):
-            mod_info = self.debug_metadata[key]
+            mod_info = debug_tree.debug_metadata[key]
             paths[key] = dict()
             for record_name in mod_info:
                 if key == "provenance":
@@ -1130,125 +964,19 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
         for key in ("provenance", "usage"):
             flatten_debug_info(key)
 
-        paths["converters_usage"] = [self.debug_build_usage_tree(
-            cv) for cv in self.debug_converters]
+        # TODO: clarify what this was used for
+        # paths["converters_usage"] = [self.debug_build_usage_tree(
+        #     cv) for cv in self.debug_converters]
 
         with open(filename, "w") as f:
             f.write(yaml.dump(paths, sort_keys=False))
 
-    def _crawl(self,
-               items: list[StructureElement],
-               local_converters: list[Converter],
-               generalStore: GeneralStore,
-               recordStore: RecordStore,
-               structure_elements_path: list[str],
-               converters_path: list[str],
-               restricted_path: Optional[list[str]] = None):
-        """
-        Crawl a list of StructureElements and apply any matching converters.
-
-        items: structure_elements (e.g. files and folders on one level on the hierarchy)
-        local_converters: locally defined converters for
-                            treating structure elements. A locally defined converter could be
-                            one that is only valid for a specific subtree of the originally
-                            cralwed StructureElement structure.
-        generalStore and recordStore: This recursion of the crawl function should only operate on
-                                      copies of the global stores of the Crawler object.
-        restricted_path: optional, list of strings, traverse the data tree only along the given
-                         path. For example, when a directory contains files a, b and c and b is
-                         given in restricted_path, a and c will be ignroed by the crawler.
-                         When the end of the given path is reached, traverse the full tree as
-                         normal. The first element of the list provided by restricted_path should
-                         be the name of the StructureElement at this level, i.e. denoting the
-                         respective element in the items argument.
-        """
-        # This path_found variable stores wether the path given by restricted_path was found in the
-        # data tree
-        path_found = False
-        if restricted_path is not None and len(restricted_path) == 0:
-            restricted_path = None
-
-        for element in items:
-            for converter in local_converters:
-
-                # type is something like "matches files", replace isinstance with "type_matches"
-                # match function tests regexp for example
-                if (converter.typecheck(element) and (
-                        restricted_path is None or element.name == restricted_path[0])
-                        and converter.match(element) is not None):
-                    path_found = True
-                    generalStore_copy = generalStore.create_scoped_copy()
-                    recordStore_copy = recordStore.create_scoped_copy()
-
-                    # Create an entry for this matched structure element that contains the path:
-                    generalStore_copy[converter.name] = (
-                        os.path.join(*(structure_elements_path + [element.get_name()])))
-
-                    # extracts values from structure element and stores them in the
-                    # variable store
-                    converter.create_values(generalStore_copy, element)
-
-                    keys_modified = converter.create_records(
-                        generalStore_copy, recordStore_copy, element)
-
-                    children = converter.create_children(generalStore_copy, element)
-
-                    if self.debug:
-                        # add provenance information for each variable
-                        self.debug_tree[str(element)] = (
-                            generalStore_copy.get_storage(), recordStore_copy.get_storage())
-                        self.debug_metadata["copied"][str(element)] = (
-                            generalStore_copy.get_dict_copied(),
-                            recordStore_copy.get_dict_copied())
-                        self.debug_metadata["usage"][str(element)].add(
-                            "/".join(converters_path + [converter.name]))
-                        mod_info = self.debug_metadata["provenance"]
-                        for record_name, prop_name in keys_modified:
-                            # TODO: check
-                            internal_id = recordStore_copy.get_internal_id(
-                                record_name)
-                            record_identifier = record_name + \
-                                "_" + str(internal_id)
-                            converter.metadata["usage"].add(record_identifier)
-                            mod_info[record_identifier][prop_name] = (
-                                structure_elements_path + [element.get_name()],
-                                converters_path + [converter.name])
-
-                    self._crawl(children, converter.converters,
-                                generalStore_copy, recordStore_copy,
-                                structure_elements_path + [element.get_name()],
-                                converters_path + [converter.name],
-                                restricted_path[1:] if restricted_path is not None else None)
-
-        if restricted_path and not path_found:
-            raise RuntimeError("A 'restricted_path' argument was given that is not contained in "
-                               "the data tree")
-        # if the crawler is running out of scope, copy all records in
-        # the recordStore, that were created in this scope
-        # to the general update container.
-        scoped_records = recordStore.get_records_current_scope()
-        for record in scoped_records:
-            self.crawled_data.append(record)
-
-        # TODO: the scoped variables should be cleaned up as soon if the variables
-        #       are no longer in the current scope. This can be implemented as follows,
-        #       but this breaks the test "test_record_structure_generation", because
-        #       some debug info is also deleted. This implementation can be used as soon
-        #       as the remaining problems with the debug_tree are fixed.
-        # Delete the variables that are no longer needed:
-        # scoped_names = recordStore.get_names_current_scope()
-        # for name in scoped_names:
-        #     del recordStore[name]
-        #     del generalStore[name]
-
-        return self.crawled_data
-
 
 def crawler_main(crawled_directory_path: str,
                  cfood_file_name: str,
-                 identifiables_definition_file: str = None,
+                 identifiables_definition_file: Optional[str] = None,
                  debug: bool = False,
-                 provenance_file: str = None,
+                 provenance_file: Optional[str] = None,
                  dry_run: bool = False,
                  prefix: str = "",
                  securityMode: SecurityMode = SecurityMode.UPDATE,
@@ -1292,14 +1020,17 @@ def crawler_main(crawled_directory_path: str,
     return_value : int
         0 if successful
     """
-    crawler = Crawler(debug=debug, securityMode=securityMode)
+    crawler = Crawler(securityMode=securityMode)
     try:
-        crawler.crawl_directory(crawled_directory_path, cfood_file_name, restricted_path)
+
+        debug_tree = DebugTree()
+        crawled_data = scan_directory(
+            crawled_directory_path, cfood_file_name, restricted_path, debug_tree=debug_tree)
     except ConverterValidationError as err:
         logger.error(err)
         return 1
     if provenance_file is not None and debug:
-        crawler.save_debug_data(provenance_file)
+        crawler.save_debug_data(debug_tree, provenance_file)
 
     if identifiables_definition_file is not None:
         ident = CaosDBIdentifiableAdapter()
@@ -1316,7 +1047,7 @@ def crawler_main(crawled_directory_path: str,
         remove_prefix = prefix
 
     if dry_run:
-        ins, upd = crawler.synchronize(commit_changes=False)
+        ins, upd = crawler.synchronize(commit_changes=False, crawled_data=crawled_data)
         inserts = [str(i) for i in ins]
         updates = [str(i) for i in upd]
         with open("dry.yml", "w") as f:
@@ -1325,7 +1056,7 @@ def crawler_main(crawled_directory_path: str,
                 "update": updates}))
     else:
         rtsfinder = dict()
-        for elem in crawler.crawled_data:
+        for elem in crawled_data:
             if isinstance(elem, db.File):
                 # correct the file path:
                 # elem.file = os.path.join(args.path, elem.file)
@@ -1362,7 +1093,8 @@ def crawler_main(crawled_directory_path: str,
             raise RuntimeError("Missing RecordTypes: {}".
                                format(", ".join(notfound)))
 
-        crawler.synchronize(commit_changes=True, unique_names=unique_names)
+        crawler.synchronize(commit_changes=True, unique_names=unique_names,
+                            crawled_data=crawled_data)
     return 0
 
 
diff --git a/src/caoscrawler/debug_tree.py b/src/caoscrawler/debug_tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..79701773a5cece1747878c45bff2e394ec0f7f6b
--- /dev/null
+++ b/src/caoscrawler/debug_tree.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+#
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+A structure containing debug tree information.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import logging
+import os
+import sys
+import warnings
+import yaml
+
+from argparse import RawTextHelpFormatter
+from collections import defaultdict
+from copy import deepcopy
+from enum import Enum
+from importlib_resources import files
+from jsonschema import validate
+from typing import Any, Optional, Type, Union
+
+import caosdb as db
+
+from caosadvancedtools.cache import UpdateCache, Cache
+from caosadvancedtools.crawler import Crawler as OldCrawler
+from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
+                             merge_entities)
+from caosdb.common.datatype import is_reference
+
+from .converters import Converter, DirectoryConverter, ConverterValidationError
+
+from .macros import defmacro_constructor, macro_constructor
+from .stores import Store, GeneralStore, RecordStore
+from .structure_elements import StructureElement, Directory, NoneElement
+from .version import check_cfood_version
+
+from caosdb.high_level_api import convert_to_python_object
+
+
+class DebugTree(object):
+
+    def __init__(self):
+        # order in the tuple:
+        # 0: general_store
+        # 1: record_store
+        self.debug_tree: dict[str, tuple] = dict()
+        self.debug_metadata: dict[str, dict] = dict()
+        self.debug_metadata["copied"] = dict()
+        self.debug_metadata["provenance"] = defaultdict(lambda: dict())
+        self.debug_metadata["usage"] = defaultdict(lambda: set())
+
+        # TODO: turn the tuple into two individual elements
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff6156aed3bde639435219a705d6d7d2124f7f38
--- /dev/null
+++ b/src/caoscrawler/scanner.py
@@ -0,0 +1,444 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Research Group Biomedical Physics,
+# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+# Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+#
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+This is the scanner, the original "_crawl" function from crawl.py.
+This is just the functionality, that extracts data from the file system.
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import logging
+import os
+import sys
+import warnings
+import yaml
+
+from argparse import RawTextHelpFormatter
+from collections import defaultdict
+from copy import deepcopy
+from enum import Enum
+from importlib_resources import files
+from jsonschema import validate
+from typing import Any, Optional, Type, Union
+
+import caosdb as db
+
+from caosadvancedtools.cache import UpdateCache, Cache
+from caosadvancedtools.crawler import Crawler as OldCrawler
+from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
+                             merge_entities)
+from caosdb.common.datatype import is_reference
+
+from .converters import Converter, DirectoryConverter, ConverterValidationError
+
+from .macros import defmacro_constructor, macro_constructor
+from .stores import Store, GeneralStore, RecordStore
+from .structure_elements import StructureElement, Directory, NoneElement
+from .version import check_cfood_version
+
+from caosdb.high_level_api import convert_to_python_object
+
+from .debug_tree import DebugTree
+
+logger = logging.getLogger(__name__)
+
+
+def load_definition(crawler_definition_path: str):
+    """
+    Load a cfood from a crawler definition defined by
+    crawler definition path and validate it using cfood-schema.yml.
+    """
+
+    # Load the cfood from a yaml file:
+    with open(crawler_definition_path, "r") as f:
+        crawler_definitions = list(yaml.safe_load_all(f))
+
+    crawler_definition = _load_definition_from_yaml_dict(
+        crawler_definitions)
+
+    return _resolve_validator_paths(crawler_definition, crawler_definition_path)
+
+
+def _load_definition_from_yaml_dict(crawler_definitions: list[dict]):
+    """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
+    contains either one or two documents.
+
+    Doesn't resolve the validator paths in the cfood definition, so for
+    internal and testing use only.
+
+    """
+    if len(crawler_definitions) == 1:
+        # Simple case, just one document:
+        crawler_definition = crawler_definitions[0]
+        metadata = {}
+    elif len(crawler_definitions) == 2:
+        metadata = crawler_definitions[0]["metadata"] if "metadata" in crawler_definitions[0] else {
+        }
+        crawler_definition = crawler_definitions[1]
+    else:
+        raise RuntimeError(
+            "Crawler definition must not contain more than two documents.")
+
+    check_cfood_version(metadata)
+
+    # TODO: at this point this function can already load the cfood schema extensions
+    #       from the crawler definition and add them to the yaml schema that will be
+    #       tested in the next lines of code:
+
+    # Load the cfood schema:
+    with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
+        schema = yaml.safe_load(f)
+
+    # Add custom converters to converter enum in schema:
+    if "Converters" in crawler_definition:
+        for key in crawler_definition["Converters"]:
+            schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
+                key)
+    if len(crawler_definitions) == 2:
+        if "Converters" in metadata:
+            for key in metadata["Converters"]:
+                schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
+                    key)
+
+    # Validate the cfood schema:
+    validate(instance=crawler_definition, schema=schema["cfood"])
+
+    return crawler_definition
+
+
+def _resolve_validator_paths(definition: dict, definition_path: str):
+    """Resolve path to validation files with respect to the file in which
+    the crawler was defined.
+
+    """
+
+    for key, value in definition.items():
+
+        if key == "validate" and isinstance(value, str):
+            # Validator is given by a path
+            if not value.startswith('/'):
+                # Not an absolute path
+                definition[key] = os.path.join(os.path.dirname(definition_path), value)
+                if not os.path.isfile(definition[key]):
+                    # TODO(henrik) capture this in `crawler_main` similar to
+                    # `ConverterValidationError`.
+                    raise FileNotFoundError(
+                        f"Couldn't find validation file {definition[key]}")
+        elif isinstance(value, dict):
+            # Recursively resolve all validators
+            definition[key] = _resolve_validator_paths(value, definition_path)
+
+    return definition
+
+
+def create_converter_registry(definition: dict):
+    """
+    Currently the converter registry is a dictionary containing for each converter:
+    - key is the short code, abbreviation for the converter class name
+    - module is the name of the module to be imported which must be installed
+    - class is the converter class to load and associate with this converter entry
+
+    Formerly known as "load_converters".
+
+    all other info for the converter needs to be included in the converter plugin
+    directory:
+    schema.yml file
+    README.md documentation
+    """
+
+    # Defaults for the converter registry:
+    with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
+        converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
+
+    # More converters from definition file:
+    if "Converters" in definition:
+        for key, entry in definition["Converters"].items():
+            if key in ["Dict", "DictTextElement", "DictIntegerElement", "DictBooleanElement",
+                       "DictDictElement", "DictListElement", "DictFloatElement"]:
+                warnings.warn(DeprecationWarning(f"{key} is deprecated. Please use the new"
+                                                 " variant; without 'Dict' prefix or "
+                                                 "'DictElement' in case of 'Dict'"))
+
+            converter_registry[key] = {
+                "converter": entry["converter"],
+                "package": entry["package"]
+            }
+
+    # Load modules and associate classes:
+    for key, value in converter_registry.items():
+        module = importlib.import_module(value["package"])
+        value["class"] = getattr(module, value["converter"])
+    return converter_registry
+
+
+def initialize_converters(crawler_definition: dict, converter_registry: dict):
+    """
+    takes the cfood as dict (`crawler_definition`) and creates the converter objects that
+    are defined on the highest level. Child Converters will in turn be created during the
+    initialization of the Converters.
+    """
+    converters = []
+
+    for key, value in crawler_definition.items():
+        # Definitions and Converters are reserved keywords
+        # on the top level of the yaml file.
+        # TODO: there should also be a top level keyword for the actual
+        #       CFood to avoid confusion between top level keywords
+        #       and the CFood.
+        if key == "Definitions":
+            continue
+        elif key == "Converters":
+            continue
+        converters.append(Converter.converter_factory(
+            value, key, converter_registry))
+
+    return converters
+
+# --------------------------------------------------------------------------------
+# Main scanner function:
+# --------------------------------------------------------------------------------
+
+
+def scanner(items: list[StructureElement],
+            converters: list[Converter],
+            general_store: Optional[GeneralStore] = None,
+            record_store: Optional[RecordStore] = None,
+            structure_elements_path: Optional[list[str]] = None,
+            converters_path: Optional[list[str]] = None,
+            restricted_path: Optional[list[str]] = None,
+            crawled_data: Optional[list[db.Record]] = None,
+            debug_tree: Optional[DebugTree] = None):
+    """
+    Crawl a list of StructureElements and apply any matching converters.
+
+    Formerly known as "_crawl".
+
+    items: structure_elements (e.g. files and folders on one level on the hierarchy)
+    converters: locally defined converters for
+                        treating structure elements. A locally defined converter could be
+                        one that is only valid for a specific subtree of the originally
+                        cralwed StructureElement structure.
+    general_store and record_store: This recursion of the crawl function should only operate on
+                                  copies of the global stores of the Crawler object.
+    restricted_path: optional, list of strings, traverse the data tree only along the given
+                     path. For example, when a directory contains files a, b and c and b is
+                     given in restricted_path, a and c will be ignroed by the crawler.
+                     When the end of the given path is reached, traverse the full tree as
+                     normal. The first element of the list provided by restricted_path should
+                     be the name of the StructureElement at this level, i.e. denoting the
+                     respective element in the items argument.
+    """
+    # This path_found variable stores wether the path given by restricted_path was found in the
+    # data tree
+    path_found = False
+    if restricted_path is not None and len(restricted_path) == 0:
+        restricted_path = None
+
+    if crawled_data is None:
+        crawled_data = []
+
+    if general_store is None:
+        general_store = GeneralStore()
+
+    if record_store is None:
+        record_store = RecordStore()
+
+    if structure_elements_path is None:
+        structure_elements_path = []
+
+    if converters_path is None:
+        converters_path = []
+
+    for element in items:
+        for converter in converters:
+
+            # type is something like "matches files", replace isinstance with "type_matches"
+            # match function tests regexp for example
+            if (converter.typecheck(element) and (
+                    restricted_path is None or element.name == restricted_path[0])
+                    and converter.match(element) is not None):
+                path_found = True
+                general_store_copy = general_store.create_scoped_copy()
+                record_store_copy = record_store.create_scoped_copy()
+
+                # Create an entry for this matched structure element that contains the path:
+                general_store_copy[converter.name] = (
+                    os.path.join(*(structure_elements_path + [element.get_name()])))
+
+                # extracts values from structure element and stores them in the
+                # variable store
+                converter.create_values(general_store_copy, element)
+
+                keys_modified = converter.create_records(
+                    general_store_copy, record_store_copy, element)
+
+                children = converter.create_children(general_store_copy, element)
+
+                if debug_tree is not None:
+                    # add provenance information for each variable
+                    debug_tree.debug_tree[str(element)] = (
+                        general_store_copy.get_storage(), record_store_copy.get_storage())
+                    debug_tree.debug_metadata["copied"][str(element)] = (
+                        general_store_copy.get_dict_copied(),
+                        record_store_copy.get_dict_copied())
+                    debug_tree.debug_metadata["usage"][str(element)].add(
+                        "/".join(converters_path + [converter.name]))
+                    mod_info = debug_tree.debug_metadata["provenance"]
+                    for record_name, prop_name in keys_modified:
+                        # TODO: check
+                        internal_id = record_store_copy.get_internal_id(
+                            record_name)
+                        record_identifier = record_name + \
+                            "_" + str(internal_id)
+                        converter.metadata["usage"].add(record_identifier)
+                        mod_info[record_identifier][prop_name] = (
+                            structure_elements_path + [element.get_name()],
+                            converters_path + [converter.name])
+
+                scanner(children, converter.converters,
+                        general_store_copy, record_store_copy,
+                        structure_elements_path + [element.get_name()],
+                        converters_path + [converter.name],
+                        restricted_path[1:] if restricted_path is not None else None,
+                        crawled_data, debug_tree)
+
+    if restricted_path and not path_found:
+        raise RuntimeError("A 'restricted_path' argument was given that is not contained in "
+                           "the data tree")
+    # if the crawler is running out of scope, copy all records in
+    # the record_store, that were created in this scope
+    # to the general update container.
+    scoped_records = record_store.get_records_current_scope()
+    for record in scoped_records:
+        crawled_data.append(record)
+
+    # TODO: the scoped variables should be cleaned up as soon if the variables
+    #       are no longer in the current scope. This can be implemented as follows,
+    #       but this breaks the test "test_record_structure_generation", because
+    #       some debug info is also deleted. This implementation can be used as soon
+    #       as the remaining problems with the debug_tree are fixed.
+    # Delete the variables that are no longer needed:
+    # scoped_names = record_store.get_names_current_scope()
+    # for name in scoped_names:
+    #     del record_store[name]
+    #     del general_store[name]
+
+    return crawled_data
+
+
+# --------------------------------------------------------------------------------
+# Main scanning interface functions:
+# --------------------------------------------------------------------------------
+
+
+def scan_directory(dirname: str, crawler_definition_path: str,
+                   restricted_path: Optional[list[str]] = None,
+                   debug_tree: Optional[DebugTree] = None):
+    """ Crawl a single directory.
+
+    Formerly known as "crawl_directory".
+
+    Convenience function that starts the crawler (calls start_crawling)
+    with a single directory as the StructureElement.
+
+    restricted_path: optional, list of strings
+            Traverse the data tree only along the given path. When the end of the given path
+            is reached, traverse the full tree as normal.
+    """
+
+    crawler_definition = load_definition(crawler_definition_path)
+    # Load and register converter packages:
+    converter_registry = create_converter_registry(crawler_definition)
+
+    if not dirname:
+        raise ValueError(
+            "You have to provide a non-empty path for crawling.")
+    dir_structure_name = os.path.basename(dirname)
+
+    # TODO: needs to be covered somewhere else
+    crawled_directory = dirname
+    if not dir_structure_name and dirname.endswith('/'):
+        if dirname == '/':
+            # Crawling the entire file system
+            dir_structure_name = "root"
+        else:
+            # dirname had a trailing '/'
+            dir_structure_name = os.path.basename(dirname[:-1])
+
+    return scan_structure_elements(Directory(dir_structure_name,
+                                             dirname),
+                                   crawler_definition,
+                                   converter_registry,
+                                   restricted_path=restricted_path,
+                                   debug_tree=debug_tree
+                                   )
+
+
+def scan_structure_elements(items: Union[list[StructureElement], StructureElement],
+                            crawler_definition: dict,
+                            converter_registry: dict,
+                            restricted_path: Optional[list[str]] = None,
+                            debug_tree: Optional[DebugTree] = None):
+    """
+    Start point of the crawler recursion.
+
+    Formerly known as "start_crawling".
+
+    Parameters
+    ----------
+    items: list
+         A list of structure elements (or a single StructureElement) that is used for
+         generating the initial items for the crawler. This could e.g. be a Directory.
+    crawler_definition : dict
+         A dictionary representing the crawler definition, possibly from a yaml
+         file.
+    restricted_path: optional, list of strings
+         Traverse the data tree only along the given path. When the end of the given path
+         is reached, traverse the full tree as normal.
+
+    Returns
+    -------
+    crawled_data : list
+        the final list with the target state of Records.
+    """
+
+    # This function builds the tree of converters out of the crawler definition.
+    if not isinstance(items, list):
+        items = [items]
+
+    # TODO: needs to be covered somewhere else
+    # self.run_id = uuid.uuid1()
+    converters = initialize_converters(crawler_definition, converter_registry)
+
+    return scanner(
+        items=items,
+        converters=converters,
+        restricted_path=restricted_path,
+        debug_tree=debug_tree
+    )
diff --git a/src/caoscrawler/version.py b/src/caoscrawler/version.py
index e73905dcd25673eae88f718a7e45b7b4d0665e47..fdc8323452cd190cc3628efa57c15992f30fabeb 100644
--- a/src/caoscrawler/version.py
+++ b/src/caoscrawler/version.py
@@ -25,8 +25,10 @@ except ImportError:  # Python<3.8 dowesn"t support this so use
 from packaging.version import parse as parse_version
 from warnings import warn
 
-# Read in version of locally installed caoscrawler package
-version = importlib_metadata.version("caoscrawler")
+
+def get_caoscrawler_version():
+    """ Read in version of locally installed caoscrawler package"""
+    return importlib_metadata.version("caoscrawler")
 
 
 class CfoodRequiredVersionError(RuntimeError):
@@ -51,7 +53,7 @@ as expected with the installed version of the crawler.
         warn(msg, UserWarning)
         return
 
-    installed_version = parse_version(version)
+    installed_version = parse_version(get_caoscrawler_version())
     cfood_version = parse_version(metadata["crawler-version"])
 
     if cfood_version > installed_version:
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
index 952a8c94a7dfa24110f320f5dd32b0ad2ac1df01..5f5161d0d672ff3ad14db5c5b49f5c65550b06d7 100644
--- a/src/doc/README_SETUP.md
+++ b/src/doc/README_SETUP.md
@@ -17,6 +17,7 @@ Build documentation in `src/doc` with `make html`.
 - `sphinx`
 - `sphinx-autoapi`
 - `recommonmark`
+- `sphinx-rtd-theme`
 
 ### How to contribute ###
 
diff --git a/src/doc/conf.py b/src/doc/conf.py
index 7719a920328c46b4453cd59413b939fcf2d45f5a..544f7292a766d59891d23235bb380ed90ce0d226 100644
--- a/src/doc/conf.py
+++ b/src/doc/conf.py
@@ -33,10 +33,10 @@ copyright = '2021, MPIDS'
 author = 'Alexander Schlemmer'
 
 # The short X.Y version
-version = '0.4.0'
+version = '0.5.0'
 # The full version, including alpha/beta/rc tags
 # release = '0.5.2-rc2'
-release = '0.4.0'
+release = '0.5.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/src/doc/how-to-upgrade.md b/src/doc/how-to-upgrade.md
index 931fa0cd2f2d621c89c35046d6df4ba6ac9b7a1e..4efc78280ca9ddbb893f166ee3530b3363684081 100644
--- a/src/doc/how-to-upgrade.md
+++ b/src/doc/how-to-upgrade.md
@@ -1,6 +1,18 @@
 
 # How to upgrade
 
+## 0.4.0 to 0.5.0
+The crawler was split into two modules: the scanner and the crawler. The scanner creates a Record
+structure from the data and the crawler synchronizes this with the server. Due to this change you
+should:
+- Remove the `debug` argument from the Crawler constructor. For debugging supply a DebugTree as
+  argument to functions like the scanner.
+- Remove the `generalStore` argument from the Crawler constructor. A store can no longer be
+  provided to the crawler.
+- `load_definition` and `initialize_converters` are now part of the scanner module
+- `crawl_directory` is replcaced by `scan_directory` of the scanner module
+- `start_crawling` is replcaced by `scan_structure_elements` of the scanner module
+
 ## 0.2.x to 0.3.0
 DictElementConverter (old: DictConverter) now can use "match" keywords. If
 none are in the definition, the behavior is as before. If you had "match",
diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml
index dce219b751c3e980662a1eaa4904e1163d9836a0..9d6e8cf3ea325ad14641530f2e6cafd43f0dc1bb 100644
--- a/unittests/scifolder_cfood.yml
+++ b/unittests/scifolder_cfood.yml
@@ -2,6 +2,10 @@
 # The full scifolder cfood will be developed here:
 # https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood
 
+---
+metadata:
+  crawler-version: 0.3.1
+---
 Definitions:
   type: Definitions
   #include "description.yml"
diff --git a/unittests/test_cfood_metadata.py b/unittests/test_cfood_metadata.py
index 09d6c88bdc27e1066ed18a9c5865cbfb95270c3a..494bd383d95b4a845b5ea6f86ccff0f9a1db257f 100644
--- a/unittests/test_cfood_metadata.py
+++ b/unittests/test_cfood_metadata.py
@@ -21,20 +21,12 @@ import pytest
 import yaml
 
 from tempfile import NamedTemporaryFile
+from unittest.mock import patch
+from unittest.mock import MagicMock, Mock
 
 import caoscrawler
 
-CRAWLER_VERSION = ""
-
-
-def setup_function(function):
-    """Store original crawler version in case it is altered for tests."""
-    CRAWLER_VERSION = caoscrawler.version.version
-
-
-def teardown_function(function):
-    """Reset version"""
-    caoscrawler.version.version = CRAWLER_VERSION
+from caoscrawler.scanner import load_definition
 
 
 def _temp_file_load(txt: str):
@@ -46,8 +38,7 @@ def _temp_file_load(txt: str):
     with NamedTemporaryFile() as f:
         f.write(txt.encode())
         f.flush()
-        c = caoscrawler.Crawler()
-        definition = c.load_definition(f.name)
+        definition = load_definition(f.name)
     return definition
 
 
@@ -68,9 +59,12 @@ SimulationData:
     with pytest.warns(UserWarning) as uw:
         _temp_file_load(definition_text)
 
-    assert len(uw) == 1
-    assert "No crawler version specified in cfood definition" in uw[0].message.args[0]
-    assert "Specifying a version is highly recommended" in uw[0].message.args[0]
+    found = False
+    for w in uw:
+        if ("No crawler version specified in cfood definition" in w.message.args[0] and
+                "Specifying a version is highly recommended" in w.message.args[0]):
+            found = True
+    assert found
 
     # metadata section is missing alltogether
     definition_text = """
@@ -82,12 +76,16 @@ SimulationData:
     with pytest.warns(UserWarning) as uw:
         _temp_file_load(definition_text)
 
-    assert len(uw) == 1
-    assert "No crawler version specified in cfood definition" in uw[0].message.args[0]
-    assert "Specifying a version is highly recommended" in uw[0].message.args[0]
+    found = False
+    for w in uw:
+        if ("No crawler version specified in cfood definition" in w.message.args[0] and
+                "Specifying a version is highly recommended" in w.message.args[0]):
+            found = True
+    assert found
 
 
-def test_warning_if_version_too_old():
+@patch("caoscrawler.version.get_caoscrawler_version")
+def test_warning_if_version_too_old(get_version):
     """Warn if the cfood was written for an older crawler version."""
 
     definition_text = """
@@ -102,31 +100,38 @@ SimulationData:
   match: SimulationData
     """
 
-    # higher minor
-    caoscrawler.version.version = "0.3.0"
+    get_version.side_effect = lambda: "0.3.0"
     with pytest.warns(UserWarning) as uw:
         _temp_file_load(definition_text)
 
-    assert len(uw) == 1
-    assert "cfood was written for a previous crawler version" in uw[0].message.args[0]
-    assert "version specified in cfood: 0.2.0" in uw[0].message.args[0]
-    assert "version installed on your system: 0.3.0" in uw[0].message.args[0]
+    found = False
+    for w in uw:
+        if ("cfood was written for a previous crawler version" in w.message.args[0] and
+                "version specified in cfood: 0.2.0" in w.message.args[0] and
+                "version installed on your system: 0.3.0" in w.message.args[0]):
+            found = True
+    assert found
 
     # higher major
-    caoscrawler.version.version = "1.1.0"
+    get_version.side_effect = lambda: "1.1.0"
     with pytest.warns(UserWarning) as uw:
         _temp_file_load(definition_text)
 
-    assert len(uw) == 1
-    assert "cfood was written for a previous crawler version" in uw[0].message.args[0]
-    assert "version specified in cfood: 0.2.0" in uw[0].message.args[0]
-    assert "version installed on your system: 1.1.0" in uw[0].message.args[0]
+    found = False
+    for w in uw:
+        if ("cfood was written for a previous crawler version" in w.message.args[0] and
+                "version specified in cfood: 0.2.0" in w.message.args[0] and
+                "version installed on your system: 1.1.0" in w.message.args[0]):
+            found = True
+    assert found
 
 
-def test_error_if_version_too_new():
+@patch("caoscrawler.version.get_caoscrawler_version")
+def test_error_if_version_too_new(get_version):
     """Raise error if the cfood requires a newer crawler version."""
 
     # minor too old
+    get_version.side_effect = lambda: "0.1.5"
     definition_text = """
 ---
 metadata:
@@ -138,7 +143,6 @@ SimulationData:
   type: Directory
   match: SimulationData
     """
-    caoscrawler.version.version = "0.1.5"
     with pytest.raises(caoscrawler.CfoodRequiredVersionError) as cre:
         _temp_file_load(definition_text)
 
@@ -166,7 +170,7 @@ SimulationData:
     assert "version installed on your system: 0.1.5" in str(cre.value)
 
     # patch to old
-    caoscrawler.version.version = "1.0.0"
+    get_version.side_effect = lambda: "1.0.0"
 
     with pytest.raises(caoscrawler.CfoodRequiredVersionError) as cre:
         _temp_file_load(definition_text)
@@ -176,7 +180,8 @@ SimulationData:
     assert "version installed on your system: 1.0.0" in str(cre.value)
 
 
-def test_matching_version():
+@patch("caoscrawler.version.get_caoscrawler_version")
+def test_matching_version(get_version):
     """Test that there is no warning or error in case the version matches."""
 
     definition_text = """
@@ -190,10 +195,10 @@ SimulationData:
   type: Directory
   match: SimulationData
     """
-    caoscrawler.version.version = "0.2.1"
+    get_version.side_effect = lambda: "0.2.1"
     assert _temp_file_load(definition_text)
 
     # The version is also considered a match if the patch version of the
     # installed crawler is newer than the one specified in the cfood metadata
-    caoscrawler.version.version = "0.2.7"
+    get_version.side_effect = lambda: "0.2.7"
     assert _temp_file_load(definition_text)
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 4d3791fce3ceffaafe529423e4020ebd6a4231ba..154724be6d126aefb430c7d0600b86a5ec721812 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -45,6 +45,7 @@ from caoscrawler.stores import GeneralStore
 from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement,
                                             BooleanElement, IntegerElement,
                                             FloatElement, Directory)
+from caoscrawler.scanner import load_definition, _load_definition_from_yaml_dict, create_converter_registry
 
 from test_tool import rfp
 
@@ -437,6 +438,7 @@ def test_filter_children_of_directory(converter_registry, capsys):
         children = dc.create_children(None, test_dir)
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_validate_custom_converters():
     one_doc_yaml = """
 Converters:
@@ -447,8 +449,7 @@ MyElement:
   type: MyNewType
   match: something
     """
-    crawler1 = Crawler()
-    one_doc_definitions = crawler1._load_definition_from_yaml_dict(
+    one_doc_definitions = _load_definition_from_yaml_dict(
         [yaml.load(one_doc_yaml, Loader=yaml.SafeLoader)])
     assert "MyElement" in one_doc_definitions
     assert one_doc_definitions["MyElement"]["type"] == "MyNewType"
@@ -457,6 +458,7 @@ MyElement:
     two_doc_yaml = """
 ---
 metadata:
+  crawler-version: 0.3.1
   Converters:
     MyNewType:
       converter: MyNewTypeConverter
@@ -466,8 +468,7 @@ MyElement:
   type: MyNewType
   match: something
     """
-    crawler2 = Crawler()
-    two_doc_definitions = crawler2._load_definition_from_yaml_dict(
+    two_doc_definitions = _load_definition_from_yaml_dict(
         list(yaml.safe_load_all(two_doc_yaml)))
     assert "MyElement" in two_doc_definitions
     assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"]
@@ -588,8 +589,7 @@ def test_date_converter():
 
 
 def test_load_converters():
-    c = Crawler()
-    converter_registry = c.load_converters({})
+    converter_registry = create_converter_registry({})
     # The previous function call actually already asserts that all defined
     # converter classes can be loaded from their respective packages.
 
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3ad73c5d75acea5fd3e92954e3899983ea73a2a
--- /dev/null
+++ b/unittests/test_crawler.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+test the Crawler class
+"""
+import json
+import os
+
+from pytest import raises
+
+import caosdb as db
+
+from caoscrawler.stores import GeneralStore
+from caoscrawler.crawl import Crawler
+import warnings
+
+from test_tool import rfp
+import pytest
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_constructor():
+    with warnings.catch_warnings(record=True) as w:
+        # Cause all warnings to always be triggered.
+        warnings.filterwarnings("ignore")
+        warnings.filterwarnings("always", category=DeprecationWarning)
+
+        Crawler(debug=True)
+        assert issubclass(w[-1].category, DeprecationWarning)
+        assert "The debug argument of the Crawler class" in str(w[-1].message)
+
+        Crawler(generalStore=GeneralStore())
+        assert issubclass(w[-1].category, DeprecationWarning)
+        assert "The generalStore argument of the Crawler" in str(w[-1].message)
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_deprecated_functions():
+    with warnings.catch_warnings(record=True) as w:
+        # Cause all warnings to always be triggered.
+        warnings.filterwarnings("ignore")
+        warnings.filterwarnings("always", category=DeprecationWarning)
+        cr = Crawler()
+        cr.crawl_directory(".", rfp("scifolder_cfood.yml"))
+        print(w)
+        print(w[0].message)
+        assert issubclass(w[-1].category, DeprecationWarning)
+        assert "The function crawl_directory in the crawl" in str(w[-1].message)
+
+        cr.start_crawling([], {}, {})
+        assert issubclass(w[-1].category, DeprecationWarning)
+        assert "The function start_crawling in the crawl module" in str(w[-1].message)
+
+        cr.crawled_data
+        assert issubclass(w[-1].category, DeprecationWarning)
+        assert "The use of self.crawled_data is depricated" in str(w[-1].message)
diff --git a/unittests/test_issues.py b/unittests/test_issues.py
index a1724e5a989190977a7ec0d86846fc2b7433ab5d..46157af9225c11b79e76dd3ef856d60519a6eb9d 100644
--- a/unittests/test_issues.py
+++ b/unittests/test_issues.py
@@ -30,6 +30,8 @@ from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 from caoscrawler.structure_elements import DictElement
 from test_tool import rfp
 
+from caoscrawler.scanner import create_converter_registry, scan_structure_elements
+
 
 def test_issue_10():
     """Test integer-to-float conversion in dictionaries"""
@@ -55,14 +57,13 @@ def test_issue_10():
         }
     }
 
-    crawler = Crawler(debug=True)
-    converter_registry = crawler.load_converters(crawler_definition)
+    converter_registry = create_converter_registry(crawler_definition)
 
     test_dict = {
         "float_value": 4
     }
 
-    records = crawler.start_crawling(
+    records = scan_structure_elements(
         DictElement("TestDict", test_dict), crawler_definition, converter_registry)
     assert len(records) == 1
     assert records[0].parents[0].name == "TestRec"
@@ -94,7 +95,7 @@ def test_list_datatypes():
         }
     }
 
-    crawler = Crawler(debug=True)
+    crawler = Crawler()
     converter_registry = crawler.load_converters(crawler_definition)
 
     test_dict = {
diff --git a/unittests/test_json.py b/unittests/test_json.py
index 41fd31a43389148ad6fbc4167fd3fbd4f7f2ee9f..3c120be174ff819baeeaa49ddf142cf40dba751e 100644
--- a/unittests/test_json.py
+++ b/unittests/test_json.py
@@ -36,6 +36,7 @@ import caosdb as db
 from caoscrawler.converters import JSONFileConverter
 from caoscrawler.crawl import Crawler
 from caoscrawler.structure_elements import File, JSONFile
+from caoscrawler.scanner import load_definition, create_converter_registry, scan_structure_elements
 from test_tool import rfp, dircheckstr
 
 
@@ -44,12 +45,11 @@ def test_json():
                                   "jsontest_cfood.yml")
     json_file_path = rfp("test_directories", "examples_json", "testjson.json")
 
-    crawler = Crawler(debug=True)
-    crawler_definition = crawler.load_definition(crawler_definition_path)
+    crawler_definition = load_definition(crawler_definition_path)
     # Load and register converter packages:
-    converter_registry = crawler.load_converters(crawler_definition)
+    converter_registry = create_converter_registry(crawler_definition)
 
-    records = crawler.start_crawling(
+    records = scan_structure_elements(
         JSONFile(os.path.basename(json_file_path), json_file_path),
         crawler_definition,
         converter_registry
@@ -70,8 +70,7 @@ def test_json():
 def test_broken_validation():
     crawler_definition_path = rfp(
         "broken_cfoods", "broken_validation_path.yml")
-    crawler = Crawler()
     with raises(FileNotFoundError) as err:
-        crawler_definition = crawler.load_definition(crawler_definition_path)
+        crawler_definition = load_definition(crawler_definition_path)
 
     assert str(err.value).startswith("Couldn't find validation file")
diff --git a/unittests/test_macros.py b/unittests/test_macros.py
index b5ea5d84846f5f33853910c292132d7b5026600e..5244307db8e694ffb4864380d33936ebb76ae715 100644
--- a/unittests/test_macros.py
+++ b/unittests/test_macros.py
@@ -25,6 +25,7 @@
 from caoscrawler.macros import defmacro_constructor, macro_constructor
 from caoscrawler.macros.macro_yaml_object import macro_store
 from caoscrawler.crawl import Crawler
+from caoscrawler.scanner import load_definition
 
 from tempfile import NamedTemporaryFile
 
@@ -52,8 +53,7 @@ def _temp_file_load(txt: str):
     with NamedTemporaryFile() as f:
         f.write(txt.encode())
         f.flush()
-        c = Crawler()
-        definition = c.load_definition(f.name)
+        definition = load_definition(f.name)
     return definition
 
 
@@ -142,6 +142,7 @@ def test_multi_macros_toplevel(register_macros, macro_store_reset):
     dat_loader = list(yaml.safe_load_all("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
     - !defmacro
       name: test_one
@@ -168,6 +169,10 @@ testnode: !macro
 
 def test_load_definition(register_macros, macro_store_reset):
     txt = """
+---
+metadata:
+  crawler-version: 0.3.1
+---
 extroot:
   type: Directory
   match: extroot
@@ -183,6 +188,7 @@ extroot:
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
     - !defmacro
       name: test_one
@@ -256,6 +262,7 @@ def test_circular_macro_definition(register_macros, macro_store_reset):
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
     - !defmacro
       name: test_one
@@ -304,6 +311,7 @@ def test_use_macro_twice():
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
     - !defmacro
       name: test_twice
@@ -337,6 +345,7 @@ def test_documentation_example_2():
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
   - !defmacro
     name: MarkdownFile
@@ -374,6 +383,7 @@ def test_documentation_example_1():
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
   - !defmacro
     name: SimulationDatasetFile
@@ -422,6 +432,7 @@ def test_def_replacements():
     cfood = _temp_file_load("""
 ---
 metadata:
+  crawler-version: 0.3.1
   macros:
     - !defmacro
       name: test_def_replacements
diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py
index ac408b2dab0fa151c370d3ec6ffd1dced22c77d7..89d94fc74ebda6aedfbee422294e99eab2216d73 100644
--- a/unittests/test_scalars_cfood.py
+++ b/unittests/test_scalars_cfood.py
@@ -10,16 +10,11 @@ from caoscrawler.converters import handle_value
 from caoscrawler.crawl import Crawler
 # We need the store for the above function
 from caoscrawler.stores import GeneralStore
-
-from test_tool import dircheckstr, rfp
+from caoscrawler.scanner import scan_directory
+from caoscrawler.debug_tree import DebugTree
 
 
-@pytest.fixture
-def crawler():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "examples_article"),
-                            rfp("cfoods_scalar.yml"))
-    return crawler
+from test_tool import dircheckstr, rfp
 
 
 def test_handle_value():
@@ -38,8 +33,11 @@ def test_handle_value():
     assert handle_value([4, 3, 2], store) == ([4, 3, 2], "single")
 
 
-def test_record_structure_generation(crawler):
-    subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
+def test_record_structure_generation():
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "examples_article"), rfp("cfoods_scalar.yml"),
+                   debug_tree=dbt)
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
     if "Data" in subd[0]:
diff --git a/unittests/test_schema.py b/unittests/test_schema.py
index 0736698eb32146fb3cfbee6acbcf11f5436df27e..0d5bebce98fbc8c789c1080bcf3919f128bdbf54 100644
--- a/unittests/test_schema.py
+++ b/unittests/test_schema.py
@@ -13,6 +13,8 @@ from pytest import raises
 
 from jsonschema.exceptions import ValidationError
 
+from caoscrawler.scanner import load_definition
+
 
 def rfp(*pathcomponents):
     """
@@ -23,9 +25,8 @@ def rfp(*pathcomponents):
 
 
 def test_schema_validation():
-    cr = Crawler()
-    cr.load_definition(rfp("scifolder_cfood.yml"))
-    cr.load_definition(rfp("scifolder_extended.yml"))
+    load_definition(rfp("scifolder_cfood.yml"))
+    load_definition(rfp("scifolder_extended.yml"))
 
     with raises(ValidationError, match=".*enum.*"):
-        cr.load_definition(rfp("broken_cfoods", "broken1.yml"))
+        load_definition(rfp("broken_cfoods", "broken1.yml"))
diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py
index abe4ac85ec4fc0a78e71c177222817e1b84e9e56..d739695fc4c6a019f28f3c3697e3f134e0f1755e 100644
--- a/unittests/test_table_converter.py
+++ b/unittests/test_table_converter.py
@@ -28,6 +28,8 @@ test the converters module
 
 from caoscrawler.converters import Converter
 from caoscrawler.stores import GeneralStore
+from caoscrawler.scanner import scan_directory
+from caoscrawler.debug_tree import DebugTree
 from caoscrawler.converters import (ConverterValidationError,
                                     DictConverter, XLSXTableConverter, CSVTableConverter)
 from caoscrawler.structure_elements import Directory
@@ -91,14 +93,6 @@ def dircheckstr(*pathcomponents):
     return "caoscrawler.structure_elements.File: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_tables", "ExperimentalData", *pathcomponents)
 
 
-@pytest.fixture
-def crawler():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "examples_tables", "ExperimentalData"),
-                            rfp("test_directories", "examples_tables", "crawler_for_tables.yml"))
-    return crawler
-
-
 def test_convert_table(converter_registry):
     extentions = ["xlsx", "csv", "tsv"]
     if importlib.util.find_spec("odf") is not None:
@@ -151,9 +145,13 @@ def test_convert_table(converter_registry):
     assert res[0].name == "jdsfkljadskf"
 
 
-def test_crawl_csv_table(crawler):
+def test_crawl_csv_table():
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "examples_tables", "ExperimentalData"),
+                   rfp("test_directories", "examples_tables", "crawler_for_tables.yml"),
+                   debug_tree=dbt)
     for file_ext in ["xlsx", "csv"]:
-        subd = crawler.debug_tree[dircheckstr("test1." + file_ext)]
+        subd = dbt.debug_tree[dircheckstr("test1." + file_ext)]
         record_experiment = subd[1]["Experiment"]
         assert isinstance(record_experiment, db.Record)
         assert isinstance(record_experiment.get_property("Measurements").value, list)
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index e15d7cb777ced4b92566df2b25b375e90be39295..08b3a0e4f9623e996540746ac408801090b97aa3 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -32,6 +32,8 @@ import os
 from caoscrawler.crawl import Crawler, SecurityMode, split_restricted_path
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement
+from caoscrawler.scanner import scan_directory
+from caoscrawler.debug_tree import DebugTree
 from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
 from simulated_server_data import full_data
 from functools import partial
@@ -48,6 +50,8 @@ from caosdb.apiutils import compare_entities
 import pytest
 from pytest import raises
 
+from caoscrawler.scanner import create_converter_registry, scan_structure_elements
+
 
 def rfp(*pathcomponents):
     """
@@ -74,16 +78,18 @@ def dircheckstr(*pathcomponents):
 
 @pytest.fixture
 def crawler():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "examples_article"),
-                            rfp("scifolder_cfood.yml"))
-    return crawler
+    crawler = Crawler()
+    debug_tree = DebugTree()
+    crawled_data = scan_directory(
+        rfp("test_directories", "examples_article"),
+        rfp("scifolder_cfood.yml"), debug_tree=debug_tree)
+    return crawler, crawled_data, debug_tree
 
 
 @pytest.fixture
 def ident(crawler):
     ident = LocalStorageIdentifiableAdapter()
-    crawler.identifiableAdapter = ident
+    crawler[0].identifiableAdapter = ident
 
     # The records.xml file is constructed as follows:
     # To a full run of the crawler, resolve all identifiables and insert all resulting entities.
@@ -109,11 +115,16 @@ def ident(crawler):
     return ident
 
 
-def test_record_structure_generation(crawler):
+def test_record_structure_generation():
     # TODO How does this test relate to the test function in test_scalars_cfood with the same name?
     #      There seems to be code duplication
-    subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
-    subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
+
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "examples_article"),
+                   rfp("scifolder_cfood.yml"),
+                   debug_tree=dbt)
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
+    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     # variables store on Data Analysis node of debug tree
     assert len(subd[0]) == 4
@@ -127,9 +138,9 @@ def test_record_structure_generation(crawler):
     assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
     assert subc[0]["DataAnalysis"] is False
 
-    subd = crawler.debug_tree[dircheckstr(
+    subd = dbt.debug_tree[dircheckstr(
         "DataAnalysis", "2020_climate-model-predict")]
-    subc = crawler.debug_metadata["copied"][dircheckstr(
+    subc = dbt.debug_metadata["copied"][dircheckstr(
         "DataAnalysis", "2020_climate-model-predict")]
 
     assert len(subd[1]) == 1
@@ -157,12 +168,12 @@ def test_record_structure_generation(crawler):
     assert subc[0]["date"] is False
     assert subc[0]["identifier"] is False
 
-    subd = crawler.debug_tree[dircheckstr("DataAnalysis",
-                                          "2020_climate-model-predict",
-                                          "2020-02-08_prediction-errors")]
-    subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
-                                                        "2020_climate-model-predict",
-                                                        "2020-02-08_prediction-errors")]
+    subd = dbt.debug_tree[dircheckstr("DataAnalysis",
+                                      "2020_climate-model-predict",
+                                      "2020-02-08_prediction-errors")]
+    subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis",
+                                                    "2020_climate-model-predict",
+                                                    "2020-02-08_prediction-errors")]
     assert len(subd[0]) == 12
     assert subd[0]["date"] == "2020-02-08"
     assert subd[0]["identifier"] == "prediction-errors"
@@ -213,6 +224,7 @@ def test_record_structure_generation(crawler):
 
 
 def test_crawler_update_list(crawler, ident):
+    crawled_data = crawler[1]
     # If the following assertions fail, that is a hint, that the test file records.xml has changed
     # and this needs to be updated:
     assert len(ident.get_records()) == 18
@@ -227,7 +239,7 @@ def test_crawler_update_list(crawler, ident):
     ) == 2
 
     # The crawler contains lots of duplicates, because identifiables have not been resolved yet:
-    assert len(ident.get_records()) != len(crawler.crawled_data)
+    assert len(ident.get_records()) != len(crawled_data)
 
     # Check consistency:
     # Check whether identifiables retrieved from current identifiable store return
@@ -283,7 +295,7 @@ def test_crawler_update_list(crawler, ident):
 
 
 def test_synchronization(crawler, ident):
-    insl, updl = crawler.synchronize(commit_changes=False)
+    insl, updl = crawler[0].synchronize(commit_changes=False, crawled_data=crawler[1])
     assert len(insl) == 0
     assert len(updl) == 0
 
@@ -332,16 +344,16 @@ def test_remove_unnecessary_updates():
 @pytest.mark.xfail
 def test_identifiable_adapter_no_identifiable(crawler, ident):
     del ident._registered_identifiables["Person"]
-    insl, updl = crawler.synchronize()
+    insl, updl = crawler[0].synchronize()
     assert len(updl) == 0
 
-    pers = [r for r in crawler.crawled_data if r.parents[0].name == "Person"]
+    pers = [r for r in crawler[0].crawled_data if r.parents[0].name == "Person"]
     # All persons are inserted, because they are not identifiable:
     assert len(insl) == len(pers)
 
 
 def test_provenance_debug_data(crawler):
-    crawler.save_debug_data(rfp("provenance.yml"))
+    crawler[0].save_debug_data(rfp("provenance.yml"), debug_tree=crawler[2])
 
     with open(rfp("provenance.yml"), "r") as f:
         provenance = yaml.load(f, Loader=yaml.SafeLoader)
@@ -356,7 +368,7 @@ def test_provenance_debug_data(crawler):
 
 
 def test_split_into_inserts_and_updates_trivial(crawler):
-    crawler.split_into_inserts_and_updates([])
+    crawler[0].split_into_inserts_and_updates([])
 
 
 def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
@@ -370,21 +382,21 @@ def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
 @pytest.fixture
 def crawler_mocked_identifiable_retrieve(crawler):
     # mock retrieval of registered identifiabls: return Record with just a parent
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(
         side_effect=lambda x: db.Record().add_parent(x.parents[0].name))
 
     # Simulate remote server content by using the names to identify records
     # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+    crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
         basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+    crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
         side_effect=partial(
             basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
     return crawler
 
 
 def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
+    crawler = crawler_mocked_identifiable_retrieve[0]
     identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
     entlist = [db.Record(name="A").add_parent(
         "C"), db.Record(name="B").add_parent("C")]
@@ -409,7 +421,7 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri
 
 
 def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
+    crawler = crawler_mocked_identifiable_retrieve[0]
     a = db.Record(name="A").add_parent("C")
     b = db.Record(name="B").add_parent("C")
     b.add_property("A", a)
@@ -427,7 +439,7 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab
 
 
 def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
+    crawler = crawler_mocked_identifiable_retrieve[0]
     # try it with a reference
     a = db.Record(name="A").add_parent("C")
     b = db.Record(name="B").add_parent("C")
@@ -454,7 +466,7 @@ def test_split_into_inserts_and_updates_with_circ(crawler):
 
 
 def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
+    crawler = crawler_mocked_identifiable_retrieve[0]
     #      A
     #      ^
     #      |
@@ -481,7 +493,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
 
 
 def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
+    crawler = crawler_mocked_identifiable_retrieve[0]
     # assume identifiable is only the name
     a = db.Record(name="A").add_parent("C")
     a.add_property("foo", 1)
@@ -500,7 +512,7 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab
 def test_has_missing_object_in_references(crawler):
     # Simulate remote server content by using the names to identify records
     # There are only two known Records with name A and B
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
+    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
         basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
                                                .add_property("d"),
                                                "D": db.Record(name="D").add_parent("RTD")
@@ -508,56 +520,56 @@ def test_has_missing_object_in_references(crawler):
                                                }))
 
     # one reference with id -> check
-    assert not crawler._has_missing_object_in_references(
+    assert not crawler[0]._has_missing_object_in_references(
         Identifiable(name="C", record_type="RTC", properties={'d': 123}), [])
     # one ref with Entity with id -> check
-    assert not crawler._has_missing_object_in_references(
+    assert not crawler[0]._has_missing_object_in_references(
         Identifiable(name="C", record_type="RTC", properties={'d': db.Record(id=123)
                                                               .add_parent("C")}), [])
     # one ref with id one with Entity with id (mixed) -> check
-    assert not crawler._has_missing_object_in_references(
+    assert not crawler[0]._has_missing_object_in_references(
         Identifiable(name="C", record_type="RTD",
                      properties={'d': 123, 'b': db.Record(id=123).add_parent("RTC")}), [])
     # entity to be referenced in the following
     a = db.Record(name="C").add_parent("C").add_property("d", 12311)
     # one ref with id one with Entity without id (but not identifying) -> fail
-    assert not crawler._has_missing_object_in_references(
+    assert not crawler[0]._has_missing_object_in_references(
         Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}), [])
 
     # one ref with id one with Entity without id (mixed) -> fail
-    assert not crawler._has_missing_object_in_references(
+    assert not crawler[0]._has_missing_object_in_references(
         Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
 
-    crawler.add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC",
-                                                        properties={'d': 12311}))
+    crawler[0].add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC",
+                                                           properties={'d': 12311}))
     # one ref with id one with Entity without id but in cache -> check
-    assert crawler._has_missing_object_in_references(
+    assert crawler[0]._has_missing_object_in_references(
         Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), [])
 
     # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    crawler[0].identifiableAdapter.get_registered_identifiable.assert_called()
 
 
 @pytest.mark.xfail()
 def test_references_entities_without_ids(crawler, ident):
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('last_name', 123)
-                                                       .add_property('first_name', 123))
+    assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                          .add_property('last_name', 123)
+                                                          .add_property('first_name', 123))
     # id and rec with id
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('first_name', 123)
-                                                       .add_property('last_name',
-                                                                     db.Record(id=123)))
+    assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                          .add_property('first_name', 123)
+                                                          .add_property('last_name',
+                                                                        db.Record(id=123)))
     # id and rec with id and one unneeded prop
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('stuff', db.Record())
-                                                   .add_property('last_name', db.Record(id=123)))
+    assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                      .add_property('first_name', 123)
+                                                      .add_property('stuff', db.Record())
+                                                      .add_property('last_name', db.Record(id=123)))
 
     # one identifying prop is missing
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('last_name', db.Record()))
+    assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person")
+                                                      .add_property('first_name', 123)
+                                                      .add_property('last_name', db.Record()))
 
 
 def test_replace_entities_with_ids(crawler):
@@ -565,7 +577,7 @@ def test_replace_entities_with_ids(crawler):
          .add_property("B", db.Record(id=12345))
          .add_property("C", [db.Record(id=12345), 233324]))
 
-    crawler.replace_entities_with_ids(a)
+    crawler[0].replace_entities_with_ids(a)
     assert a.get_property("A").value == 12345
     assert a.get_property("B").value == 12345
     assert a.get_property("C").value == [12345, 233324]
@@ -589,12 +601,14 @@ def mock_get_entity_by_name(name):
 
 
 def prepare_crawler_with_sec_mode(mode, ident):
-    crawler = Crawler(debug=True, securityMode=mode)
-    crawler.crawl_directory(rfp("test_directories", "examples_article"),
-                            rfp("scifolder_cfood.yml"))
+    crawler = Crawler(securityMode=mode)
+    debug_tree = DebugTree()
+    crawled_data = scan_directory(
+        rfp("test_directories", "examples_article"),
+        rfp("scifolder_cfood.yml"), debug_tree=debug_tree)
     crawler.identifiableAdapter = ident
 
-    return crawler
+    return crawler, crawled_data, debug_tree
 
 
 def reset_mocks(mocks):
@@ -647,19 +661,19 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     records_backup = deepcopy(ident._records)
 
     # trivial case: nothing to do
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
-    crawler.synchronize(commit_changes=True)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
     updateCacheMock.assert_not_called()
 
     # RETRIEVE: insert only
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
     # remove one element
     del ident._records[-1]
     # insert forbidden
-    crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
@@ -670,10 +684,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     ident._records = deepcopy(records_backup)
 
     # RETRIEVE: update only
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident)
     # change one element
     change_non_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
@@ -684,10 +698,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     ident._records = deepcopy(records_backup)
 
     # INSERT: insert only
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
     # remove one element
     del ident._records[-1]
-    crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.assert_called_once()
     upmock.assert_not_called()
@@ -698,10 +712,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     ident._records = deepcopy(records_backup)
 
     # INSERT: update only
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
     # change one element
     change_non_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
@@ -712,11 +726,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     ident._records = deepcopy(records_backup)
 
     # INSERT: insert and update
-    crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
+    crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident)
     # change two elements
     change_non_identifiable_prop(ident)
     change_identifiable_prop(ident)
-    crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
     insmock.asser_called_once()
     upmock.assert_not_called()
@@ -769,14 +783,14 @@ def crawler_mocked_for_backref_test(crawler):
                 "is_referenced_by", value=["BR", "BR2"])
         else:
             return db.Record().add_parent(x.parents[0].name)
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
+    crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
 
     # Simulate remote server content by using the names to identify records
     # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+    crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
         basic_retrieve_by_name_mock_up, known={"A":
                                                db.Record(id=1111, name="A").add_parent("BR")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+    crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
         side_effect=partial(
             basic_retrieve_by_name_mock_up, known={"A":
                                                    db.Record(id=1111, name="A").add_parent("BR")}))
@@ -800,7 +814,7 @@ def test_validation_error_print(caplog):
 
 
 def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
-    crawler = crawler_mocked_for_backref_test
+    crawler = crawler_mocked_for_backref_test[0]
     identlist = [Identifiable(name="A", record_type="BR"),
                  Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
     referenced = db.Record(name="B").add_parent("C")
@@ -835,7 +849,7 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test)
 
 def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
     # test whether multiple references of the same record type are correctly used
-    crawler = crawler_mocked_for_backref_test
+    crawler = crawler_mocked_for_backref_test[0]
     referenced = db.Record(name="B").add_parent("C")
     entlist = [referenced,
                db.Record(name="A").add_parent("BR").add_property("ref", referenced),
@@ -855,7 +869,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
 
 def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
     # test whether multiple references of the different record types are correctly used
-    crawler = crawler_mocked_for_backref_test
+    crawler = crawler_mocked_for_backref_test[0]
     referenced = db.Record(name="B").add_parent("D")
     entlist = [referenced,
                db.Record(name="A").add_parent("BR").add_property("ref", referenced),
@@ -911,8 +925,8 @@ def test_restricted_path(create_mock):
         }
     }
 
-    crawler = Crawler(debug=True)
-    converter_registry = crawler.load_converters(crawler_definition)
+    crawler = Crawler()
+    converter_registry = create_converter_registry(crawler_definition)
 
     # This structure is crawled
     test_dict = {
@@ -927,7 +941,7 @@ def test_restricted_path(create_mock):
     }
     # first test without a restricted_path
     restricted_path = None
-    records = crawler.start_crawling(
+    records = scan_structure_elements(
         DictElement("TestDict", test_dict), crawler_definition, converter_registry,
         restricted_path
     )
@@ -937,7 +951,7 @@ def test_restricted_path(create_mock):
     # test with a restricted_path but one that has no effect (single root element)
     # this also tests that the remainder of the tree is fully traversed
     restricted_path = ["TestDict"]
-    records = crawler.start_crawling(
+    records = scan_structure_elements(
         DictElement("TestDict", test_dict), crawler_definition, converter_registry,
         restricted_path
     )
@@ -946,7 +960,7 @@ def test_restricted_path(create_mock):
 
     # test with a restricted_path that restricts the tree (single root element)
     restricted_path = ["TestDict", "v2"]
-    records = crawler.start_crawling(
+    records = scan_structure_elements(
         DictElement("TestDict", test_dict), crawler_definition, converter_registry,
         restricted_path
     )
@@ -956,7 +970,7 @@ def test_restricted_path(create_mock):
     # test with a restricted_path that contains a bad element
     restricted_path = ["TestDict", "v3"]
     with raises(RuntimeError):
-        records = crawler.start_crawling(
+        records = scan_structure_elements(
             DictElement("TestDict", test_dict), crawler_definition, converter_registry,
             restricted_path
         )
@@ -968,6 +982,9 @@ def test_split_restricted_path():
     assert ["el", "el"] == split_restricted_path("/el/el")
 
 
+# Filter the warning because we want to have it here and this way it does not hinder running
+# tests with -Werror.
+@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
 def test_deprecated_prefix_option():
     """Test that calling the crawler's main function with the deprecated
     `prefix` option raises the correct errors and warnings.
diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py
index d0b431a539a15e3e83906540c69becff437742ec..7dd4282e4c6d206c8c360424d865b9f736b5e582 100644
--- a/unittests/test_tool_extended.py
+++ b/unittests/test_tool_extended.py
@@ -6,7 +6,9 @@
 from caoscrawler import Crawler
 from caoscrawler.structure_elements import File, DictTextElement, DictListElement
 from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
+from caoscrawler.scanner import scan_directory
 from functools import partial
+from caoscrawler.debug_tree import DebugTree
 from copy import deepcopy
 from unittest.mock import MagicMock, Mock
 from os.path import join, dirname, basename
@@ -69,10 +71,14 @@ def crawler():
 #     return ident
 
 
-def test_file_structure_generation(crawler):
-    sd = crawler.debug_tree[dircheckstr("SimulationData",
-                                        "2020_climate-model-predict", "2020-02-01",
-                                        "README.md", structure_element_type="File")]
+def test_file_structure_generation():
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "examples_article"),
+                   rfp("scifolder_extended.yml"),
+                   debug_tree=dbt)
+    sd = dbt.debug_tree[dircheckstr("SimulationData",
+                                    "2020_climate-model-predict", "2020-02-01",
+                                    "README.md", structure_element_type="File")]
     assert sd[1]["ReadmeFile"].role == "File"
     assert len(sd[1]["ReadmeFile"].path) > 0
     assert len(sd[1]["ReadmeFile"].file) > 0
diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py
index f6c3b6375a3111faff9d746779805ba16af260b7..f13e759982e8102bbf37e65311ff4073ba52e5a2 100644
--- a/unittests/test_variable_substitutions.py
+++ b/unittests/test_variable_substitutions.py
@@ -2,7 +2,9 @@
 # Tests for variable substitutions
 # A. Schlemmer, 05/2022
 
+from caoscrawler.debug_tree import DebugTree
 from caoscrawler import Crawler
+from caoscrawler.scanner import scan_directory
 from caoscrawler.structure_elements import File, DictTextElement, DictListElement
 from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
 from functools import partial
@@ -32,33 +34,21 @@ def dircheckstr(element_type, *pathcomponents):
     return "caoscrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents)
 
 
-@pytest.fixture
-def crawler():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
-                            rfp("test_directories", "example_substitutions", "substitutions.yml"))
-    return crawler
+def test_substitutions():
 
-
-@pytest.fixture
-def crawler_2():
-    crawler = Crawler(debug=True)
-    crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
-                            rfp("test_directories", "example_substitutions",
-                                "substitutions_parents.yml"))
-    return crawler
-
-
-def test_substitutions(crawler):
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
+                   rfp("test_directories", "example_substitutions", "substitutions.yml"),
+                   debug_tree=dbt)
     # @review Florian Spreckelsen 2022-05-13
     for i in range(2):
-        subd = crawler.debug_tree[dircheckstr(
+        subd = dbt.debug_tree[dircheckstr(
             "File", "ExperimentalData", "220512_data.dat")]
         assert subd[i]["Experiment"].get_property("date").value == "2022-05-12"
         assert isinstance(subd[i]["ExperimentSeries"].get_property(
             "Experiment").value, db.Record)
 
-        subd = crawler.debug_tree[dircheckstr("Directory", "ExperimentalData")]
+        subd = dbt.debug_tree[dircheckstr("Directory", "ExperimentalData")]
         assert subd[i]["Project"].name == "project"
         assert isinstance(subd[i]["Project"].get_property(
             "Experiments").value, list)
@@ -70,11 +60,16 @@ def test_substitutions(crawler):
             "dates").value[0] == "2022-05-12"
 
 
-def test_substitutions_parents(crawler_2):
+def test_substitutions_parents():
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
+                   rfp("test_directories", "example_substitutions",
+                       "substitutions_parents.yml"),
+                   debug_tree=dbt)
     # This is a test for:
     # https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/35
     # ... testing whether variable substitutions can be used in parent declarations.
-    subd = crawler_2.debug_tree[dircheckstr(
+    subd = dbt.debug_tree[dircheckstr(
         "File", "ExperimentalData", "220512_data.dat")]
     # subd[0] <- generalStore
     # subd[1] <- recordStore
@@ -85,11 +80,16 @@ def test_substitutions_parents(crawler_2):
     assert parents[1].name == "Month_05"
 
 
-def test_empty_parents(crawler_2):
+def test_empty_parents():
+    dbt = DebugTree()
+    scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
+                   rfp("test_directories", "example_substitutions",
+                       "substitutions_parents.yml"),
+                   debug_tree=dbt)
     # This is a test for:
     # https://gitlab.com/caosdb/caosdb-crawler/-/issues/8
 
-    subd = crawler_2.debug_tree[dircheckstr(
+    subd = dbt.debug_tree[dircheckstr(
         "File", "ExperimentalData", "220512_data.dat")]
 
     parents = subd[1]["RecordWithoutParents"].get_parents()