diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index cc0a50b5a4607288c9f40c050117735ff653880b..ded56aad1efe6ada80c1f2b5f55b6611d1f1b558 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -224,19 +224,21 @@ def initialize_converters(crawler_definition: dict, converter_registry: dict): # Main scanner function: # -------------------------------------------------------------------------------- -def _crawl(self, - items: list[StructureElement], - local_converters: list[Converter], - generalStore: GeneralStore, - recordStore: RecordStore, - structure_elements_path: list[str], - converters_path: list[str], - restricted_path: Optional[list[str]] = None): +def scanner(self, + items: list[StructureElement], + converters: list[Converter], + generalStore: GeneralStore, + recordStore: RecordStore, + structure_elements_path: list[str], + converters_path: list[str], + restricted_path: Optional[list[str]] = None): """ Crawl a list of StructureElements and apply any matching converters. + Formerly known as "_crawl". + items: structure_elements (e.g. files and folders on one level on the hierarchy) - local_converters: locally defined converters for + converters: locally defined converters for treating structure elements. A locally defined converter could be one that is only valid for a specific subtree of the originally cralwed StructureElement structure. @@ -257,7 +259,7 @@ def _crawl(self, restricted_path = None for element in items: - for converter in local_converters: + for converter in converters: # type is something like "matches files", replace isinstance with "type_matches" # match function tests regexp for example @@ -302,7 +304,7 @@ def _crawl(self, structure_elements_path + [element.get_name()], converters_path + [converter.name]) - self._crawl(children, converter.converters, + self.scanner(children, converter.converters, generalStore_copy, recordStore_copy, structure_elements_path + [element.get_name()], converters_path + [converter.name], @@ -411,9 +413,9 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen self.run_id = uuid.uuid1() converters = initialize_converters(crawler_definition, converter_registry) - return _crawl( + return scanner( items=items, - local_converters=converters, + converters=converters, generalStore=self.generalStore, recordStore=self.recordStore, structure_elements_path=[],