diff --git a/CHANGELOG.md b/CHANGELOG.md index 0572dda73cb0096081fb019c41a20b486ef07865..08b47840c4c517abeb9f8c9a8c096dd6f3e4ba92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The functions ``load_definition``, ``initialize_converters`` and ``load_converters`` are deprecated. Please use the functions ``load_definition``, ``initialize_converters`` and ``create_converter_registry`` from the scanner module instead. +- The function ``start_crawling`` is deprecated. The function ``scan_structure_elements`` in the + scanner module mostly covers its functionality. ### Removed ### - The command line argument ``--prefix``. Use the new argument ``--remove-prefix`` instead. diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3dd1001172cfbebbd82bd50af8a4490fdcb0d3b7..7de1927516f2bb9a5532b1c7ac8dcd3df83a4efa 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -70,7 +70,8 @@ from .version import check_cfood_version from .scanner import (scan_directory, load_definition, create_converter_registry, - initialize_converters) + initialize_converters, + scan_structure_elements) from .debug_tree import DebugTree logger = logging.getLogger(__name__) @@ -244,6 +245,22 @@ class Crawler(object): def generate_run_id(self): self.run_id = uuid.uuid1() + + def start_crawling(self, items: Union[list[StructureElement], StructureElement], + crawler_definition: dict, + converter_registry: dict, + restricted_path: Optional[list[str]] = None): + + warnings.warn(DeprecationWarning( + "The function start_crawling in the crawl module is deprecated. " + "Please use scan_structure_elements from the scanner module.")) + + self.generate_run_id() + + return scan_structure_elements( + items, crawler_definition, converter_registry, restrict_path) + + def crawl_directory(self, crawled_directory: str, crawler_definition_path: str,