From cd8c3bfe876beb20534fabff98bb74c3241d8c13 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander@mail-schlemmer.de> Date: Thu, 23 Mar 2023 18:53:14 +0100 Subject: [PATCH] REVIEW: added deprecated function start_crawling --- CHANGELOG.md | 2 ++ src/caoscrawler/crawl.py | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0572dda7..08b47840 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The functions ``load_definition``, ``initialize_converters`` and ``load_converters`` are deprecated. Please use the functions ``load_definition``, ``initialize_converters`` and ``create_converter_registry`` from the scanner module instead. +- The function ``start_crawling`` is deprecated. The function ``scan_structure_elements`` in the + scanner module mostly covers its functionality. ### Removed ### - The command line argument ``--prefix``. Use the new argument ``--remove-prefix`` instead. diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3dd10011..7de19275 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -70,7 +70,8 @@ from .version import check_cfood_version from .scanner import (scan_directory, load_definition, create_converter_registry, - initialize_converters) + initialize_converters, + scan_structure_elements) from .debug_tree import DebugTree logger = logging.getLogger(__name__) @@ -244,6 +245,22 @@ class Crawler(object): def generate_run_id(self): self.run_id = uuid.uuid1() + + def start_crawling(self, items: Union[list[StructureElement], StructureElement], + crawler_definition: dict, + converter_registry: dict, + restricted_path: Optional[list[str]] = None): + + warnings.warn(DeprecationWarning( + "The function start_crawling in the crawl module is deprecated. " + "Please use scan_structure_elements from the scanner module.")) + + self.generate_run_id() + + return scan_structure_elements( + items, crawler_definition, converter_registry, restrict_path) + + def crawl_directory(self, crawled_directory: str, crawler_definition_path: str, -- GitLab