diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index 91313daf1cd88539e2dd50182fc0c15a644e9b08..c91a5224fb885565666c18fb448106f46ecd98b6 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -28,14 +28,12 @@ This module contains functions to validate the output of a scanner run with a json schema. """ - -import json - import jsonschema import linkahead as db # from caosadvancedtools.models.parser import parse_model_from_string from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema from caosadvancedtools.models.parser import parse_model_from_yaml +from jsonschema import ValidationError from linkahead.high_level_api import convert_to_python_object from caoscrawler import scanner @@ -101,9 +99,12 @@ def convert_record(record: db.Record): return pobj -def validate(records: list[db.Record], schema: dict) -> tuple[list, list]: +def validate(records: list[db.Record], schemas: list[dict]) -> tuple[list, list]: """ - Validate a list of records against a JSON schema. + Validate a list of records against a list of possible JSON schemas. + + It is tried to validate each schema from the list of schemas. If none of them validates + without error, it is assumed that it does not match at all. Arguments: ---------- @@ -111,8 +112,8 @@ def validate(records: list[db.Record], schema: dict) -> tuple[list, list]: records: list[db.Record] List of records that will be validated. - schema: dict - A JSON schema generated using `load_json_schema_from_datamodel_yaml`. + schemas: list[dict] + A list of JSON schemas generated using `load_json_schema_from_datamodel_yaml`. Returns: -------- @@ -120,7 +121,16 @@ def validate(records: list[db.Record], schema: dict) -> tuple[list, list]: - Index 0: A list of boolean values, one for each record in `records` determining whether the validation was successful. - - Index 1: A list of ValidationErrors (in case of insuccesful validation) or None if - the validation was successful. + - Index 1: A list of schemas matching the record at this position of the list `records`. """ - pass + retval = [] + for r in records: + matching_schemas = [] + for schema in schemas: + try: + jsonschema.validate(convert_record(r), schema) + matching_schemas.append(schema) + except ValidationError: + pass + retval.append((len(matching_schemas) > 0, matching_schemas)) + return retval diff --git a/unittests/test_validation.py b/unittests/test_validation.py index 45462ac813e9700782c8eebeb6de8463ff51fee9..42bf33baa648670c932e92367e49c8a98dabd07f 100644 --- a/unittests/test_validation.py +++ b/unittests/test_validation.py @@ -34,7 +34,8 @@ import linkahead as db import pytest import yaml from caoscrawler.validator import (convert_record, - load_json_schema_from_datamodel_yaml) + load_json_schema_from_datamodel_yaml, + validate) from jsonschema import ValidationError UNITTESTDIR = Path(__file__).parent @@ -62,3 +63,26 @@ def test_create_json_schema(): with pytest.raises(ValidationError, match=".*'keywords' is a required property.*"): jsonschema.validate(pobj, json[0]) + + +def test_validation(): + """ + Test for the main validation API function `validate` + """ + json = load_json_schema_from_datamodel_yaml( + join(UNITTESTDIR, "datamodels", "datamodel.yaml")) + r1 = db.Record() + r1.add_parent(name="Dataset") + r1.add_property(name="keywords", value="jakdlfjakdf") + r1.add_property(name="dateModified", value="2024-11-16") + + r2 = db.Record() + r2.add_parent(name="Dataset") + r2.add_property(name="keywordss", value="jakdlfjakdf") + r2.add_property(name="dateModified", value="2024-11-16") + + valres = validate([r1, r2], json) + assert valres[0][0] + assert len(valres[0][1]) == 1 + assert valres[0][1][0] == json[0] + assert len(valres[1][1]) == 0