diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index f93c0ec436e4106c22f3bb065137394c54f12754..fd348e224131dc301c7d99555c80948392b5cbff 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -204,6 +204,10 @@ class Crawler(object):
              Please use SecurityMode Enum
         """
 
+        # Remove this once the property `crawled_data` is no longer needed for compatibility
+        # reasons
+        self._crawled_data = None
+
         # The following caches store records, where we checked whether they exist on the remote
         # server. Since, it is important to know whether they exist or not, we store them into two
         # different caches.
@@ -254,10 +258,22 @@ class Crawler(object):
             "The function start_crawling in the crawl module is deprecated. "
             "Please use scan_structure_elements from the scanner module."))
 
-        self.generate_run_id()
-
-        return scan_structure_elements(
+        data = scan_structure_elements(
             items, crawler_definition, converter_registry, restrict_path)
+        self.crawled_data = data
+        return data
+
+    @property
+    def crawled_data(self):
+        warnings.warn(DeprecationWarning(
+            "The use of self.crawled_data is depricated. You should not access this variable. "
+            "Instead, create the data with the scanner and then pass it as argument to Crawler "
+            "functions"))
+        return self._crawled_data
+
+    @crawled_data.setter
+    def crawled_data(self, arg):
+        self._crawled_data = arg
 
     def crawl_directory(self,
                         crawled_directory: str,
@@ -267,15 +283,16 @@ class Crawler(object):
         The new main function to run the crawler on a directory.
         """
 
+        warnings.warn(DeprecationWarning(
+            "The function crawl_directory in the crawl module is deprecated. "
+            "Please use scan_directory from the scanner module."))
         self.crawled_directory = crawled_directory
-        self.generate_run_id()
-
-        # TODO: This is not ideal yet, the data is just returned and needs to be
-        #       separately supplied to the synchronize function.
 
-        return scan_directory(crawled_directory,
+        data = scan_directory(crawled_directory,
                               crawler_definition_path,
                               restricted_path)
+        self.crawled_data = data
+        return data
 
     def _has_reference_value_without_id(self, ident: Identifiable) -> bool:
         """
@@ -791,9 +808,10 @@ class Crawler(object):
                 update_cache.insert(to_be_updated, run_id)
 
     def synchronize(self,
-                    crawled_data: list[db.Record],
                     commit_changes: bool = True,
-                    unique_names=True):
+                    unique_names: bool = True,
+                    crawled_data: Optional[list[db.Record]] = None,
+                    ):
         """
         This function applies several stages:
         1) Retrieve identifiables for all records in crawled_data.
@@ -808,6 +826,13 @@ class Crawler(object):
 
         Return the final to_be_inserted and to_be_updated as tuple.
         """
+        if crawled_data is None:
+            warnings.warn(DeprecationWarning(
+                "Calling synchronize without the data to be synchronized is depricated. Please "
+                "use for example the Scanner to create this data."))
+            crawled_data = self.crawled_data
+
+        self.generate_run_id()
 
         to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
         referencing_entities = self.create_reference_mapping(to_be_updated + to_be_inserted)