diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 471282bf616ffa199c6a8e67b5b883ac76a66066..9aca139b3ee74153a78632e3624f174b8c938bd0 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -179,6 +179,13 @@ class Crawler(object): # Load and validate the cfood schema: with open(os.path.join(os.path.dirname(__file__), "cfood-schema.yml"), "r") as f: schema = yaml.safe_load(f) + + # Add custom converters to converter enum in schema: + for key in crawler_definition["Converters"]: + print("Adding converter {}".format(key)) + schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append( + key) + validate(instance=crawler_definition, schema=schema["cfood"]) return crawler_definition @@ -219,8 +226,7 @@ class Crawler(object): # More converters from definition file: if "Converters" in definition: - for entry in definition["Converters"]: - key = entry["key"] + for key, entry in definition["Converters"].items(): converter_registry[key] = { "converter": entry["converter"], "package": entry["package"] @@ -706,8 +712,8 @@ def main(): args = parse_args() crawler = Crawler(debug=args.debug) crawler.crawl_directory(args.path, args.cfood) - if args.provenance_filename is not None: - crawler.save_debug_data(args.provenance_filename) + if args.provenance is not None: + crawler.save_debug_data(args.provenance) return 0 diff --git a/src/newcrawler/extension-converters-config-schema.yml b/src/newcrawler/extension-converters-config-schema.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391