Skip to content
Snippets Groups Projects
Commit 0e7c4676 authored by florian's avatar florian
Browse files

FIX: Validate with custom converters defined in metadata document

parent 80cc116c
No related branches found
No related tags found
2 merge requests!53Release 0.1,!47F fix two doc validation
Pipeline #29052 failed
...@@ -229,6 +229,19 @@ class Crawler(object): ...@@ -229,6 +229,19 @@ class Crawler(object):
with open(crawler_definition_path, "r") as f: with open(crawler_definition_path, "r") as f:
crawler_definitions = list(yaml.safe_load_all(f)) crawler_definitions = list(yaml.safe_load_all(f))
crawler_definition = self._load_definition_from_yaml_dict(
crawler_definitions, crawler_definition_path)
return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
def _load_definition_from_yaml_dict(self, crawler_definitions: List[Dict]):
"""Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
contains either one or two documents.
Doesn't resolve the validator paths in the cfood definition, so for
internal and testing use only.
"""
if len(crawler_definitions) == 1: if len(crawler_definitions) == 1:
# Simple case, just one document: # Simple case, just one document:
crawler_definition = crawler_definitions[0] crawler_definition = crawler_definitions[0]
...@@ -251,11 +264,16 @@ class Crawler(object): ...@@ -251,11 +264,16 @@ class Crawler(object):
for key in crawler_definition["Converters"]: for key in crawler_definition["Converters"]:
schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append( schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
key) key)
if len(crawler_definitions) == 2:
if "Converters" in crawler_definitions[0]["metadata"]:
for key in crawler_definitions[0]["metadata"]["Converters"]:
schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append(
key)
# Validate the cfood schema: # Validate the cfood schema:
validate(instance=crawler_definition, schema=schema["cfood"]) validate(instance=crawler_definition, schema=schema["cfood"])
return self._resolve_validator_paths(crawler_definition, crawler_definition_path) return crawler_definition
def _resolve_validator_paths(self, definition: dict, definition_path: str): def _resolve_validator_paths(self, definition: dict, definition_path: str):
"""Resolve path to validation files with respect to the file in which """Resolve path to validation files with respect to the file in which
...@@ -405,7 +423,8 @@ class Crawler(object): ...@@ -405,7 +423,8 @@ class Crawler(object):
continue continue
elif key == "Converters": elif key == "Converters":
continue continue
converters.append(Converter.converter_factory(value, key, converter_registry)) converters.append(Converter.converter_factory(
value, key, converter_registry))
return converters return converters
...@@ -1089,7 +1108,8 @@ def crawler_main(crawled_directory_path: str, ...@@ -1089,7 +1108,8 @@ def crawler_main(crawled_directory_path: str,
# correct the file path: # correct the file path:
# elem.file = os.path.join(args.path, elem.file) # elem.file = os.path.join(args.path, elem.file)
if prefix is None: if prefix is None:
raise RuntimeError("No prefix set. Prefix must be set if files are used.") raise RuntimeError(
"No prefix set. Prefix must be set if files are used.")
if elem.path.startswith(prefix): if elem.path.startswith(prefix):
elem.path = elem.path[len(prefix):] elem.path = elem.path[len(prefix):]
elem.file = None elem.file = None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment