diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index fdfed23f503e09f11e9be26fcad7dd2b4f13e7e0..67775163af0ca778c08fb9eccdd580f26481b3a1 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -41,7 +41,7 @@ from caoscrawler import scanner # from collections import OrderedDict -def load_json_schema_from_datamodel_yaml(filename: str) -> list: +def load_json_schema_from_datamodel_yaml(filename: str) -> dict[str, dict]: """ Load a data model yaml file (using caosadvancedtools) and convert all record types into a json schema using the json_schema_exporter module. @@ -53,15 +53,16 @@ def load_json_schema_from_datamodel_yaml(filename: str) -> list: Returns ------- - A list of json schema objects. + A dict of json schema objects. The keys are the record types for which the schemas + are generated. """ model = parse_model_from_yaml(filename) - rt_schemas = [] + rt_schemas = {} for el_key, el in model.items(): if isinstance(el, db.RecordType): - rt_schemas.append(recordtype_to_json_schema(el)) + rt_schemas[el_key] = recordtype_to_json_schema(el) return rt_schemas @@ -119,10 +120,10 @@ def convert_record(record: db.Record): The record that is supposed to be converted. """ pobj = convert_to_python_object(record).serialize() - return apply_schema_patches(pobj) + return _apply_schema_patches(pobj) -def validate(records: list[db.Record], schemas: list[dict]) -> list[tuple[bool, list]]: +def validate(records: list[db.Record], schemas: dict[str, dict]) -> list[tuple[bool, list]]: """ Validate a list of records against a list of possible JSON schemas. @@ -146,21 +147,17 @@ def validate(records: list[db.Record], schemas: list[dict]) -> list[tuple[bool, - Index 1: A list of schemas matching the record at this position of the list `records`. """ - # TODO: - # I think it makes sense to change the behavior as follows: - # - Only validate the schema that was generated for a specific record type that matches the parent - # record that is validated. - # - With this behavior for each record a single schema is matched, and if it does not match the - # validation error can be returned. - retval = [] for r in records: - matching_schemas = [] - for schema in schemas: - try: - jsonschema.validate(convert_record(r), schema) - matching_schemas.append(schema) - except ValidationError: - pass - retval.append((len(matching_schemas) > 0, matching_schemas)) + if len(r.parents) != 0: + raise RuntimeError( + "Schema validation is only supported if records have exactly one parent.") + if r.parents[0] not in schemas: + raise RuntimeError( + "No schema for record type {} in schema dictionary.".format(r.parents[0])) + try: + jsonschema.validate(convert_record(r), schemas[r.parents[0]]) + retval.append((True, None)) + except ValidationError as ex: + retval.append((False, ex)) return retval