Skip to content
Snippets Groups Projects
Commit f547fa39 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

MAINT: made utility and converter registry functions top level functions without references to self

parent 8cc9c99a
No related branches found
No related tags found
2 merge requests!108Release 0.5.0,!104Create a new scanner module and move functions from crawl module there
......@@ -70,7 +70,7 @@ from .debug.debug_tree import (DebugTreeStructureElement,
logger = logging.getLogger(__name__)
def load_definition(self, crawler_definition_path: str):
def load_definition(crawler_definition_path: str):
"""
Load a cfood from a crawler definition defined by
crawler definition path and validate it using cfood-schema.yml.
......@@ -80,12 +80,12 @@ def load_definition(self, crawler_definition_path: str):
with open(crawler_definition_path, "r") as f:
crawler_definitions = list(yaml.safe_load_all(f))
crawler_definition = self._load_definition_from_yaml_dict(
crawler_definition = _load_definition_from_yaml_dict(
crawler_definitions)
return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
return _resolve_validator_paths(crawler_definition, crawler_definition_path)
def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
def _load_definition_from_yaml_dict(crawler_definitions: list[dict]):
"""Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
contains either one or two documents.
......@@ -131,7 +131,8 @@ def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
return crawler_definition
def _resolve_validator_paths(self, definition: dict, definition_path: str):
def _resolve_validator_paths(definition: dict, definition_path: str):
"""Resolve path to validation files with respect to the file in which
the crawler was defined.
......@@ -151,7 +152,7 @@ def _resolve_validator_paths(self, definition: dict, definition_path: str):
f"Couldn't find validation file {definition[key]}")
elif isinstance(value, dict):
# Recursively resolve all validators
definition[key] = self._resolve_validator_paths(value, definition_path)
definition[key] = _resolve_validator_paths(value, definition_path)
return definition
......@@ -194,6 +195,35 @@ def create_converter_registry(definition: dict):
return converter_registry
def initialize_converters(crawler_definition: dict, converter_registry: dict):
"""
takes the cfood as dict (`crawler_definition`) and creates the converter objects that
are defined on the highest level. Child Converters will in turn be created during the
initialization of the Converters.
"""
converters = []
for key, value in crawler_definition.items():
# Definitions and Converters are reserved keywords
# on the top level of the yaml file.
# TODO: there should also be a top level keyword for the actual
# CFood to avoid confusion between top level keywords
# and the CFood.
if key == "Definitions":
continue
elif key == "Converters":
continue
converters.append(Converter.converter_factory(
value, key, converter_registry))
return converters
# --------------------------------------------------------------------------------
# Main scanning functions:
# --------------------------------------------------------------------------------
def crawl_directory(self, dirname: str, crawler_definition_path: str,
restricted_path: Optional[list[str]] = None):
""" Crawl a single directory.
......@@ -229,31 +259,7 @@ def crawl_directory(self, dirname: str, crawler_definition_path: str,
converter_registry,
restricted_path=restricted_path
)
def initialize_converters(crawler_definition: dict, converter_registry: dict):
"""
takes the cfood as dict (`crawler_definition`) and creates the converter objects that
are defined on the highest level. Child Converters will in turn be created during the
initialization of the Converters.
"""
converters = []
for key, value in crawler_definition.items():
# Definitions and Converters are reserved keywords
# on the top level of the yaml file.
# TODO: there should also be a top level keyword for the actual
# CFood to avoid confusion between top level keywords
# and the CFood.
if key == "Definitions":
continue
elif key == "Converters":
continue
converters.append(Converter.converter_factory(
value, key, converter_registry))
return converters
def start_crawling(self, items: Union[list[StructureElement], StructureElement],
crawler_definition: dict,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment