From 60ae1ad4eef9d588016764b0593b61d5134082b9 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <a.schlemmer@indiscale.com> Date: Mon, 25 Nov 2024 14:21:16 +0100 Subject: [PATCH] ENH(scanner): json schema validation functions and test --- src/caoscrawler/validator.py | 42 +++++++++++++++++-- unittests/datamodels/datamodel.yaml | 6 +++ unittests/test_validation.py | 64 +++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 unittests/datamodels/datamodel.yaml create mode 100644 unittests/test_validation.py diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index 7e7a2eb3..e1d36a39 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -40,6 +40,8 @@ from linkahead.high_level_api import convert_to_python_object from caoscrawler import scanner +# from collections import OrderedDict + def load_json_schema_from_datamodel_yaml(filename: str) -> list: """ @@ -58,11 +60,45 @@ def load_json_schema_from_datamodel_yaml(filename: str) -> list: model = parse_model_from_yaml(filename) - # TODO: fix needed (https://gitlab.indiscale.com/caosdb/customers/f-fit/management/-/issues/58) - rt_schemas = [] - for el in model: + for el_key, el in model.items(): if isinstance(el, db.RecordType): rt_schemas.append(recordtype_to_json_schema(el)) return rt_schemas + + +def representer_ordereddict(dumper, data): + # yaml.add_representer(OrderedDict, caoscrawler.validator.representer_ordereddict) + return dumper.represent_data(dict(data)) + + +def convert_record(record: db.Record): + """ + Convert a record into a form suitable for validation with jsonschema. + + Uses high_level_api.convert_to_python_object + + Changes applied: + - properties are moved vom subitem "proeprties" to top-level. + - The following keys are deleted: parents, role, name, description, metadata, properties + + Arguments: + ---------- + record: db.Record + The record that is supposed to be converted. + """ + pobj = convert_to_python_object(record).serialize() + + for prop in pobj["properties"]: + pobj[prop] = pobj["properties"][prop] + + for keyd in ("parents", "role", "name", + "description", "metadata", "properties"): + if keyd in pobj: + del pobj[keyd] + + return pobj + +# def validate(schema, records): +# pass diff --git a/unittests/datamodels/datamodel.yaml b/unittests/datamodels/datamodel.yaml new file mode 100644 index 00000000..2759ecba --- /dev/null +++ b/unittests/datamodels/datamodel.yaml @@ -0,0 +1,6 @@ +Dataset: + obligatory_properties: + keywords: + datatype: TEXT + dateModified: + datatype: DATETIME diff --git a/unittests/test_validation.py b/unittests/test_validation.py new file mode 100644 index 00000000..45462ac8 --- /dev/null +++ b/unittests/test_validation.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +test validation +""" +import importlib +import os +from os.path import join +from pathlib import Path + +import caoscrawler +import jsonschema +import linkahead as db +import pytest +import yaml +from caoscrawler.validator import (convert_record, + load_json_schema_from_datamodel_yaml) +from jsonschema import ValidationError + +UNITTESTDIR = Path(__file__).parent + + +def test_create_json_schema(): + json = load_json_schema_from_datamodel_yaml(join(UNITTESTDIR, "datamodels", "datamodel.yaml")) + r = db.Record() + r.add_parent(name="Dataset") + r.add_property(name="keywords", value="jakdlfjakdf") + r.add_property(name="dateModified", value="2024-11-16") + + pobj = convert_record(r) + # print(yaml.dump(pobj)) + # print(yaml.dump(json[0])) + jsonschema.validate(pobj, json[0]) + + # Failing test: + r = db.Record() + r.add_parent(name="Dataset") + r.add_property(name="keywordss", value="jakdlfjakdf") + r.add_property(name="dateModified", value="2024-11-16") + + pobj = convert_record(r) + + with pytest.raises(ValidationError, match=".*'keywords' is a required property.*"): + jsonschema.validate(pobj, json[0]) -- GitLab