Skip to content
Snippets Groups Projects
Commit facdec19 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-json-validator' into f-casting-transformers

parents 1907eee2 5f84d8fd
Branches
Tags
2 merge requests!217TST: Make NamedTemporaryFiles Windows-compatible,!203New transformer functions for casting types of variables
Pipeline #58209 passed
...@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### ### Added ###
- Validation module for checking a list of generated records against a list of json schemas
that can be generated from a yaml data model file.
- DictElementConverters can now make use of `match_properties` which - DictElementConverters can now make use of `match_properties` which
works analogous to `match_properties` in ROCrateEntityConverter and works analogous to `match_properties` in ROCrateEntityConverter and
`match_attrib` in XMLConverter. `match_attrib` in XMLConverter.
......
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Alexander Schlemmer
#
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
This module contains functions to validate the output of a scanner run with a
json schema.
"""
import jsonschema
import linkahead as db
# from caosadvancedtools.models.parser import parse_model_from_string
from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema
from caosadvancedtools.models.parser import parse_model_from_yaml
from jsonschema import ValidationError
from linkahead.high_level_api import convert_to_python_object
from caoscrawler import scanner
# from collections import OrderedDict
def load_json_schema_from_datamodel_yaml(filename: str) -> list:
"""
Load a data model yaml file (using caosadvancedtools) and convert
all record types into a json schema using the json_schema_exporter module.
Arguments
---------
filename: str
The filename of the yaml file to load.
Returns
-------
A list of json schema objects.
"""
model = parse_model_from_yaml(filename)
rt_schemas = []
for el_key, el in model.items():
if isinstance(el, db.RecordType):
rt_schemas.append(recordtype_to_json_schema(el))
return rt_schemas
def representer_ordereddict(dumper, data):
# yaml.add_representer(OrderedDict, caoscrawler.validator.representer_ordereddict)
return dumper.represent_data(dict(data))
def convert_record(record: db.Record):
"""
Convert a record into a form suitable for validation with jsonschema.
Uses high_level_api.convert_to_python_object
Changes applied:
- properties are moved vom subitem "proeprties" to top-level.
- The following keys are deleted: parents, role, name, description, metadata, properties
Arguments:
----------
record: db.Record
The record that is supposed to be converted.
"""
pobj = convert_to_python_object(record).serialize()
for prop in pobj["properties"]:
pobj[prop] = pobj["properties"][prop]
for keyd in ("parents", "role", "name",
"description", "metadata", "properties"):
if keyd in pobj:
del pobj[keyd]
return pobj
def validate(records: list[db.Record], schemas: list[dict]) -> list[tuple[bool, list]]:
"""
Validate a list of records against a list of possible JSON schemas.
It is tried to validate each schema from the list of schemas. If none of them validates
without error, it is assumed that it does not match at all.
Arguments:
----------
records: list[db.Record]
List of records that will be validated.
schemas: list[dict]
A list of JSON schemas generated using `load_json_schema_from_datamodel_yaml`.
Returns:
--------
A list of tuples, one element for each record:
- Index 0: A boolean that determines whether at least one schema matched for this record.
- Index 1: A list of schemas matching the record at this position of the list `records`.
"""
retval = []
for r in records:
matching_schemas = []
for schema in schemas:
try:
jsonschema.validate(convert_record(r), schema)
matching_schemas.append(schema)
except ValidationError:
pass
retval.append((len(matching_schemas) > 0, matching_schemas))
return retval
Dataset:
obligatory_properties:
keywords:
datatype: TEXT
dateModified:
datatype: DATETIME
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
test validation
"""
import importlib
import os
from os.path import join
from pathlib import Path
import caoscrawler
import jsonschema
import linkahead as db
import pytest
import yaml
from caoscrawler.validator import (convert_record,
load_json_schema_from_datamodel_yaml,
validate)
from jsonschema import ValidationError
UNITTESTDIR = Path(__file__).parent
def test_create_json_schema():
json = load_json_schema_from_datamodel_yaml(join(UNITTESTDIR, "datamodels", "datamodel.yaml"))
r = db.Record()
r.add_parent(name="Dataset")
r.add_property(name="keywords", value="jakdlfjakdf")
r.add_property(name="dateModified", value="2024-11-16")
pobj = convert_record(r)
# print(yaml.dump(pobj))
# print(yaml.dump(json[0]))
jsonschema.validate(pobj, json[0])
# Failing test:
r = db.Record()
r.add_parent(name="Dataset")
r.add_property(name="keywordss", value="jakdlfjakdf")
r.add_property(name="dateModified", value="2024-11-16")
pobj = convert_record(r)
with pytest.raises(ValidationError, match=".*'keywords' is a required property.*"):
jsonschema.validate(pobj, json[0])
def test_validation():
"""
Test for the main validation API function `validate`
"""
json = load_json_schema_from_datamodel_yaml(
join(UNITTESTDIR, "datamodels", "datamodel.yaml"))
r1 = db.Record()
r1.add_parent(name="Dataset")
r1.add_property(name="keywords", value="jakdlfjakdf")
r1.add_property(name="dateModified", value="2024-11-16")
r2 = db.Record()
r2.add_parent(name="Dataset")
r2.add_property(name="keywordss", value="jakdlfjakdf")
r2.add_property(name="dateModified", value="2024-11-16")
valres = validate([r1, r2], json)
assert valres[0][0]
assert len(valres[0][1]) == 1
assert valres[0][1][0] == json[0]
assert len(valres[1][1]) == 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment