Skip to content
Snippets Groups Projects

Create a new scanner module and move functions from crawl module there

Merged Alexander Schlemmer requested to merge f-refactor-scanner-crawler into dev
1 file
+ 15
13
Compare changes
  • Side-by-side
  • Inline
+ 15
13
@@ -224,19 +224,21 @@ def initialize_converters(crawler_definition: dict, converter_registry: dict):
@@ -224,19 +224,21 @@ def initialize_converters(crawler_definition: dict, converter_registry: dict):
# Main scanner function:
# Main scanner function:
# --------------------------------------------------------------------------------
# --------------------------------------------------------------------------------
def _crawl(self,
def scanner(self,
items: list[StructureElement],
items: list[StructureElement],
local_converters: list[Converter],
converters: list[Converter],
generalStore: GeneralStore,
generalStore: GeneralStore,
recordStore: RecordStore,
recordStore: RecordStore,
structure_elements_path: list[str],
structure_elements_path: list[str],
converters_path: list[str],
converters_path: list[str],
restricted_path: Optional[list[str]] = None):
restricted_path: Optional[list[str]] = None):
"""
"""
Crawl a list of StructureElements and apply any matching converters.
Crawl a list of StructureElements and apply any matching converters.
 
Formerly known as "_crawl".
 
items: structure_elements (e.g. files and folders on one level on the hierarchy)
items: structure_elements (e.g. files and folders on one level on the hierarchy)
local_converters: locally defined converters for
converters: locally defined converters for
treating structure elements. A locally defined converter could be
treating structure elements. A locally defined converter could be
one that is only valid for a specific subtree of the originally
one that is only valid for a specific subtree of the originally
cralwed StructureElement structure.
cralwed StructureElement structure.
@@ -257,7 +259,7 @@ def _crawl(self,
@@ -257,7 +259,7 @@ def _crawl(self,
restricted_path = None
restricted_path = None
for element in items:
for element in items:
for converter in local_converters:
for converter in converters:
# type is something like "matches files", replace isinstance with "type_matches"
# type is something like "matches files", replace isinstance with "type_matches"
# match function tests regexp for example
# match function tests regexp for example
@@ -302,7 +304,7 @@ def _crawl(self,
@@ -302,7 +304,7 @@ def _crawl(self,
structure_elements_path + [element.get_name()],
structure_elements_path + [element.get_name()],
converters_path + [converter.name])
converters_path + [converter.name])
self._crawl(children, converter.converters,
self.scanner(children, converter.converters,
generalStore_copy, recordStore_copy,
generalStore_copy, recordStore_copy,
structure_elements_path + [element.get_name()],
structure_elements_path + [element.get_name()],
converters_path + [converter.name],
converters_path + [converter.name],
@@ -411,9 +413,9 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
@@ -411,9 +413,9 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
self.run_id = uuid.uuid1()
self.run_id = uuid.uuid1()
converters = initialize_converters(crawler_definition, converter_registry)
converters = initialize_converters(crawler_definition, converter_registry)
return _crawl(
return scanner(
items=items,
items=items,
local_converters=converters,
converters=converters,
generalStore=self.generalStore,
generalStore=self.generalStore,
recordStore=self.recordStore,
recordStore=self.recordStore,
structure_elements_path=[],
structure_elements_path=[],
Loading