diff --git a/src/caoscrawler/validator.py b/src/caoscrawler/validator.py index 7e7a2eb32be09498a9a0dfb528f5eaa8b5744d90..e1d36a392f21870e2ff2c20a5548144899f338da 100644 --- a/src/caoscrawler/validator.py +++ b/src/caoscrawler/validator.py @@ -40,6 +40,8 @@ from linkahead.high_level_api import convert_to_python_object from caoscrawler import scanner +# from collections import OrderedDict + def load_json_schema_from_datamodel_yaml(filename: str) -> list: """ @@ -58,11 +60,45 @@ def load_json_schema_from_datamodel_yaml(filename: str) -> list: model = parse_model_from_yaml(filename) - # TODO: fix needed (https://gitlab.indiscale.com/caosdb/customers/f-fit/management/-/issues/58) - rt_schemas = [] - for el in model: + for el_key, el in model.items(): if isinstance(el, db.RecordType): rt_schemas.append(recordtype_to_json_schema(el)) return rt_schemas + + +def representer_ordereddict(dumper, data): + # yaml.add_representer(OrderedDict, caoscrawler.validator.representer_ordereddict) + return dumper.represent_data(dict(data)) + + +def convert_record(record: db.Record): + """ + Convert a record into a form suitable for validation with jsonschema. + + Uses high_level_api.convert_to_python_object + + Changes applied: + - properties are moved vom subitem "proeprties" to top-level. + - The following keys are deleted: parents, role, name, description, metadata, properties + + Arguments: + ---------- + record: db.Record + The record that is supposed to be converted. + """ + pobj = convert_to_python_object(record).serialize() + + for prop in pobj["properties"]: + pobj[prop] = pobj["properties"][prop] + + for keyd in ("parents", "role", "name", + "description", "metadata", "properties"): + if keyd in pobj: + del pobj[keyd] + + return pobj + +# def validate(schema, records): +# pass diff --git a/unittests/datamodels/datamodel.yaml b/unittests/datamodels/datamodel.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2759ecba7f2967062937d9b2f4805a9b501ab6c4 --- /dev/null +++ b/unittests/datamodels/datamodel.yaml @@ -0,0 +1,6 @@ +Dataset: + obligatory_properties: + keywords: + datatype: TEXT + dateModified: + datatype: DATETIME diff --git a/unittests/test_validation.py b/unittests/test_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..45462ac813e9700782c8eebeb6de8463ff51fee9 --- /dev/null +++ b/unittests/test_validation.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +test validation +""" +import importlib +import os +from os.path import join +from pathlib import Path + +import caoscrawler +import jsonschema +import linkahead as db +import pytest +import yaml +from caoscrawler.validator import (convert_record, + load_json_schema_from_datamodel_yaml) +from jsonschema import ValidationError + +UNITTESTDIR = Path(__file__).parent + + +def test_create_json_schema(): + json = load_json_schema_from_datamodel_yaml(join(UNITTESTDIR, "datamodels", "datamodel.yaml")) + r = db.Record() + r.add_parent(name="Dataset") + r.add_property(name="keywords", value="jakdlfjakdf") + r.add_property(name="dateModified", value="2024-11-16") + + pobj = convert_record(r) + # print(yaml.dump(pobj)) + # print(yaml.dump(json[0])) + jsonschema.validate(pobj, json[0]) + + # Failing test: + r = db.Record() + r.add_parent(name="Dataset") + r.add_property(name="keywordss", value="jakdlfjakdf") + r.add_property(name="dateModified", value="2024-11-16") + + pobj = convert_record(r) + + with pytest.raises(ValidationError, match=".*'keywords' is a required property.*"): + jsonschema.validate(pobj, json[0])