diff --git a/CHANGELOG.md b/CHANGELOG.md index a6b2de738c79b3ad38c6bf77a2abb3611a6511eb..62105323a81f22594c92601a405e287dc76106ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CFood that creates a Record for each line in a csv file - `generic_analysis.py` allows to easily call scripts to perform analyses in server side scripting [EXPERIMENTAL] +- **EXPERIMENTAL:** Models parser can import from Json Schema files now: + `models.parser.parse_model_from_json_schema(...)`. See the documentation of + `models.parser.JsonSchemaParser` for the limitations of the current + implementation. +- New keyword "role" in yaml data model that allows creation of Records and Files. +- It is now possible to set values of properties and default values of properties + directly in the yaml model. ### Changed ### @@ -22,6 +29,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +- CaosDB internal properties `name`, `unit` and `description` can now be used via the `extern` + keyword in YAML datamodel specifications. + ### Security ### ## [0.3.1] - 2021-12-06 ## @@ -29,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### - `check_reference_field` function to check whether entities with provided ids exits (for example when importing data from a table) -- added the `datatypes` argument to `TableImporter` for columns that do not +- added the `datatypes` argument to `TableImporter` for columns that do not need a special conversion function ## [0.3.0] - 2021-11-02 ## @@ -43,14 +53,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New class to collect possible problems with the data model - New class for checking and importing tables - Function to get a file path to a shared resource directory -- Function to setup logging appropriate for server side scripts with webui +- Function to setup logging appropriate for server side scripts with webui output - New class for collecting information for exporting tables, e.g., to metadata repositories - new name parsing - new test for software folder structure - new assure_name_is function -- two utility functions when working with files: NameCollector and +- two utility functions when working with files: NameCollector and get_file_via_download - Automated documentation builds: `make doc` - Crawler documentation @@ -63,8 +73,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### -- identifiables of single CFoods are now treated one after the other. This - allows them to have dependencies among each other if they are ordered +- identifiables of single CFoods are now treated one after the other. This + allows them to have dependencies among each other if they are ordered correctly - identifiables must have at least one property or a name - `caosadvancedtools.serverside.helper.init_data_model` also checks the role @@ -92,9 +102,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 cause an `sqlite3.IntegrityError` if more than one change was cached for the same entity. - #40 Insertion of identifiables with missing obligatory properties -- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation +- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation of a RecordType. This is fixed now. -- #52 `XLSimporter.read_xls` throwed a wrong error when reading from a file with a wrong ending. +- #52 `XLSimporter.read_xls` throwed a wrong error when reading from a file with a wrong ending. Now, a `DataInconsistencyError` is raised instead of a ValueError. - List properties are no longer updated unnecessarily by the crawler. diff --git a/README_SETUP.md b/README_SETUP.md index d33316612c1d2870c3b2c416e842df4220ecf858..43047d554afbe8ffba11aef67b20dde44d29bdcf 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -38,7 +38,7 @@ Optional h5-crawler: 1. Change directory to `integrationtests/`. 2. Mount `extroot` to the folder that will be used as extroot. E.g. `sudo mount - -o bind extroot ../../caosdb-deploy/profiles/empty/paths/extroot` (or + -o bind extroot ../../caosdb-deploy/profiles/debug/paths/extroot` (or whatever path the extroot of the empty profile to be used is located at). 3. Start (or restart) an empty (!) CaosDB instance (with the mounted extroot). The database will be cleared during testing, so it's important to diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 1c0357e265eec770069166e614fc0a3aa6ecc548..a142d917215eb7469faab9c66a581539ce867e4e 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -57,10 +57,11 @@ then exit 1 fi set -e -echo "undo changes" +echo "Undoing previous changes to extroot content..." cd extroot egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' cd .. +echo "Done." python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" @@ -82,5 +83,11 @@ python3 -m pytest test_crawl_with_datamodel_problems.py echo "Testing table export" python3 -m pytest test_base_table_exporter_integration.py +echo "Testing json-schema datamodel parser" +python3 -m pytest test_json_schema_datamodel_parser.py + +echo "Testing yaml datamodel parser" +python3 -m pytest test_yaml_parser.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_datamodel.schema.json b/integrationtests/test_datamodel.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..356964702dd83a8c81edf1e8d72bf4a30468e6f2 --- /dev/null +++ b/integrationtests/test_datamodel.schema.json @@ -0,0 +1,85 @@ +[ + { + "title": "TestTypeWithAtomicProps", + "description": "RecordType with scalar atomic properties", + "type": "object", + "properties": { + "simple_text_prop": { "type": "string" }, + "int_prop_with_name": { "type": "integer", "title": "IntegerProperty" }, + "double_prop": { + "type": "number", + "description": "Some generic double-valued property" + }, + "bool_prop": { "type": "boolean" }, + "datetime_prop": { "type": "string", "format": "date-time" }, + "date_prop": { "type": "string", "format": "date" } + }, + "required": [ "simple_text_prop", "double_prop" ] + }, + { + "title": "TestTypeWithReferencesAndEnum", + "type": "object", + "properties": { + "TestTypeWithAtomicProps": {}, + "OtherReference": { + "type": "object", + "description": "Some generic refernced RecordType", + "properties": {} + }, + "named_refernce": { + "type": "object", + "title": "NamedReference", + "properties": { + "simple_text_prop": {} + } + }, + "string_enum": { + "type": "string", + "enum": [ "StringEnumA", "StringEnumB", "StringEnumC" ] + }, + "named_enum": { + "type": "string", + "title": "NamedEnum", + "enum": [ "NameA", "NameB", "NameC" ] + } + } + }, + { + "title": "TestTypeWithLists", + "type": "object", + "properties": { + "string_list": { + "type": "array", + "description": "A list of words", + "items": { "type": "string" } + }, + "named_int_list": { + "type": "array", + "title": "NamedIntList", + "items": { "type": "integer" } + }, + "ListRecordType": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "NamedReferenceList": { + "type": "array", + "items": { + "title": "ReferencedListTypeWithName", + "type": "object", + "description": "Referenced by a named list-of-references property", + "properties": { + "double_prop": {} + } + } + }, + "ListNumberEnum": { + "type": "array", + "items": { + "type": "number", + "enum": [ 1.1, 2.2, 3.3 ] + } + } + } + } +] diff --git a/integrationtests/test_json_schema_datamodel_parser.py b/integrationtests/test_json_schema_datamodel_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..21ae8d2d7bad5527a7a314220b38af8ff816475f --- /dev/null +++ b/integrationtests/test_json_schema_datamodel_parser.py @@ -0,0 +1,174 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import os + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_json_schema + + +def _clear_db(): + ents = db.execute_query("FIND ENTITY WITH ID>99") + if ents: + ents.delete() + + +def setup_module(): + _clear_db() + + +def teardown_module(): + _clear_db() + + +def _load_and_sync(fname): + """Load datamodel from json schema in fname and synchronize it without asking. + + """ + # @author Florian Spreckelsen + # @date 2022-03-23 + fpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), fname) + model = parse_model_from_json_schema(fpath) + model.sync_data_model(noquestion=True) + + +def test_json_parsed_datamodel(): + # @author Florian Spreckelsen + # @date 2022-03-23 + + _load_and_sync("test_datamodel.schema.json") + + # RecordType with atomic properties + rt1 = db.execute_query( + "FIND RECORDTYPE TestTypeWithAtomicProps", unique=True) + assert rt1.description == "RecordType with scalar atomic properties" + assert rt1.get_property("simple_text_prop") is not None + assert rt1.get_property("simple_text_prop").datatype == db.TEXT + assert rt1.get_importance("simple_text_prop") == db.OBLIGATORY + + assert rt1.get_property("IntegerProperty") is not None + assert rt1.get_property("IntegerProperty").datatype == db.INTEGER + assert rt1.get_importance("IntegerProperty") == db.RECOMMENDED + + assert rt1.get_property("double_prop") is not None + assert rt1.get_property("double_prop").datatype == db.DOUBLE + assert rt1.get_importance("double_prop") == db.OBLIGATORY + assert (db.Property(name="double_prop").retrieve().description == + "Some generic double-valued property") + + further_props = [ + ("bool_prop", db.BOOLEAN), + ("datetime_prop", db.DATETIME), + ("date_prop", db.DATETIME) + ] + for name, dtype in further_props: + assert rt1.get_property(name) is not None + assert rt1.get_property(name).datatype == dtype + assert rt1.get_importance(name) == db.RECOMMENDED + + # RecordType with references and enums + rt2 = db.execute_query( + "FIND RECORDTYPE TestTypeWithReferencesAndEnum", unique=True) + assert rt2.get_property(rt1.name) is not None + assert rt2.get_property(rt1.name).is_reference() + assert rt2.get_property(rt1.name).name == rt1.name + assert rt2.get_property(rt1.name).id == rt1.id + + other_ref_type = db.execute_query( + "FIND RECORDTYPE OtherReference", unique=True) + assert rt2.get_property(other_ref_type.name) is not None + assert rt2.get_property(other_ref_type.name).is_reference() + assert rt2.get_property(other_ref_type.name).name == other_ref_type.name + assert rt2.get_property(other_ref_type.name).id == other_ref_type.id + assert other_ref_type.description == "Some generic refernced RecordType" + assert len(other_ref_type.properties) == 0 + + named_ref_type = db.execute_query( + "FIND RECORDTYPE NamedReference", unique=True) + assert rt2.get_property(named_ref_type.name) is not None + assert rt2.get_property(named_ref_type.name).is_reference() + assert rt2.get_property(named_ref_type.name).name == named_ref_type.name + assert rt2.get_property(named_ref_type.name).id == named_ref_type.id + assert named_ref_type.get_property("simple_text_prop") is not None + assert (named_ref_type.get_property("simple_text_prop").id == + rt1.get_property("simple_text_prop").id) + assert (named_ref_type.get_property("simple_text_prop").datatype == + rt1.get_property("simple_text_prop").datatype) + + enums = { + "string_enum": ["StringEnumA", "StringEnumB", "StringEnumC"], + "NamedEnum": ["NameA", "NameB", "NameC"] + } + for enum_type_name, enum_names in enums.items(): + enum_type = db.execute_query( + f"FIND RECORDTYPE {enum_type_name}", unique=True) + assert len(enum_type.properties) == 0 + enum_records = db.execute_query(f"FIND RECORD {enum_type_name}") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt2.get_property(enum_type_name) is not None + assert rt2.get_property(enum_type_name).is_reference() + assert rt2.get_property(enum_type_name).name == enum_type.name + assert rt2.get_property(enum_type_name).id == enum_type.id + + # Recordtype with lists + rt3 = db.execute_query("FIND RECORDTYPE TestTypeWithLists", unique=True) + assert rt3.get_property("string_list") is not None + assert rt3.get_property("string_list").datatype == db.LIST(db.TEXT) + string_list_prop = db.Property(name="string_list").retrieve() + assert string_list_prop.description == "A list of words" + assert string_list_prop.datatype == db.LIST(db.TEXT) + assert string_list_prop.id == rt3.get_property("string_list").id + + assert rt3.get_property("NamedIntList") is not None + assert rt3.get_property("NamedIntList").datatype == db.LIST(db.INTEGER) + + # This is a list of a plain references to a specific type + list_rt = db.execute_query("FIND RECORDTYPE ListRecordType", unique=True) + assert len(list_rt.properties) == 0 + assert rt3.get_property(list_rt.name) is not None + assert rt3.get_property(list_rt.name).is_reference() + assert rt3.get_property(list_rt.name).datatype == db.LIST(list_rt) + assert rt3.get_property(list_rt.name).id == list_rt.id + + # This is a list property of its own, referencing another separate RT + referenced_list_rt = db.execute_query( + "FIND RECORDTYPE ReferencedListTypeWithName", unique=True) + assert referenced_list_rt.description == "Referenced by a named list-of-references property" + assert referenced_list_rt.get_property("double_prop") is not None + assert (referenced_list_rt.get_property("double_prop").id == + rt1.get_property("double_prop").id) + assert rt3.get_property("NamedReferenceList") is not None + assert rt3.get_property("NamedReferenceList").is_reference() + assert rt3.get_property( + "NamedReferenceList").datatype == db.LIST(referenced_list_rt) + assert rt3.get_property("NamedReferenceList").id != referenced_list_rt.id + + enum_type = db.execute_query("FIND RECORDTYPE ListNumberEnum", unique=True) + assert len(enum_type.properties) == 0 + enum_names = ["1.1", "2.2", "3.3"] + enum_records = db.execute_query("FIND RECORD ListNumberEnum") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt3.get_property(enum_type.name) is not None + assert rt3.get_property(enum_type.name).datatype == db.LIST(enum_type) + assert rt3.get_property(enum_type.name).id == enum_type.id diff --git a/integrationtests/test_yaml_parser.py b/integrationtests/test_yaml_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..e2a2c4c056ced56d2605d93914186c2cba97e137 --- /dev/null +++ b/integrationtests/test_yaml_parser.py @@ -0,0 +1,69 @@ +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_string + + +def _delete_everything(): + ents = db.execute_query("FIND ENTITY WITH ID > 99") + if ents: + ents.delete() + + +def setup_module(): + _delete_everything() + + +def teardown_module(): + _delete_everything() + + +def test_internal_props_in_extern(): + """Test adding the internal `name` property as a parent to an existing + property. + + """ + + model = """ +extern: +- name +- test_name +- description +- unit +test_name: + inherit_from_suggested: + - name + - description + - unit +""" + db.Property(name="test_name", datatype=db.TEXT).insert() + ents = parse_model_from_string(model) + ents.sync_data_model(noquestion=True) + + test_prop = db.Property(name="test_name").retrieve() + assert len(test_prop.parents) == 3 + desc_prop = db.Property(name="description").retrieve() + name_prop = db.Property(name="name").retrieve() + unit_prop = db.Property(name="unit").retrieve() + assert test_prop.has_parent(desc_prop) + assert test_prop.has_parent(name_prop) + assert test_prop.has_parent(unit_prop) diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..8a12125d4b71d3df5f7866277c41ee15401a4a93 --- /dev/null +++ b/pylintrc @@ -0,0 +1,9 @@ +# -*- mode:conf; -*- + +[FORMAT] +# Good variable names which should always be accepted, separated by a comma +good-names=ii,rt + + +[TYPECHECK] +ignored-modules=etree diff --git a/setup.py b/setup.py index 2ec401e843df6337b8a2a9e543a986eec8313c3f..3268bf8c129c02dd882d74198db31544e19c0ffd 100755 --- a/setup.py +++ b/setup.py @@ -155,6 +155,7 @@ def setup_package(): author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', install_requires=["caosdb>=0.7.0", + "jsonschema>=4.4.0", "numpy>=1.17.3", "openpyxl>=3.0.0", "pandas>=1.2.0", diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index e198d15ca2c56eceec29c356cabdf28ac44895b2..d9079e6196b4751ca86ba41275108330b946d57c 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -33,6 +33,13 @@ import caosdb as db from caosdb.apiutils import compare_entities, describe_diff +CAOSDB_INTERNAL_PROPERTIES = [ + "description", + "name", + "unit", +] + + class DataModel(dict): """Provides tools for managing a data model. @@ -129,8 +136,13 @@ class DataModel(dict): any_change = False for ent in existing_entities: - q = db.Query("FIND * with id={}".format(ent.id)) - ref = q.execute(unique=True) + if ent.name in CAOSDB_INTERNAL_PROPERTIES: + # Workaround for the usage of internal properties like name + # in via the extern keyword: + ref = db.Property(name=ent.name).retrieve() + else: + query = db.Query(f"FIND * with id={ent.id}") + ref = query.execute(unique=True) diff = (describe_diff(*compare_entities(ent, ref ), name=ent.name)) diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index d87bc9a4d22231f339e877bd80c63e602cb116ec..c05b7d218872bb1c7f163c955edb7246bfbd923a 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -1,3 +1,22 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + """ This module (and script) provides methods to read a DataModel from a YAML file. @@ -16,18 +35,23 @@ not defined, simply the name can be supplied with no value. Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs to be a list with the names. Here, NO NEW entities can be defined. """ +import json import re import os import sys - -import caosdb as db import yaml from jsonschema import validate -from .data_model import DataModel +from typing import List + +import jsonschema +import caosdb as db + +from .data_model import CAOSDB_INTERNAL_PROPERTIES, DataModel # Keywords which are allowed in data model descriptions. -KEYWORDS = ["parent", +KEYWORDS = ["parent", # deprecated, use inherit_from_* instead: + # https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/36 "importance", "datatype", # for example TEXT, INTEGER or REFERENCE "unit", @@ -37,13 +61,24 @@ KEYWORDS = ["parent", "suggested_properties", "inherit_from_recommended", "inherit_from_suggested", - "inherit_from_obligatory", ] + "inherit_from_obligatory", + "role", + "value", + ] +# TODO: check whether it's really ignored # These KEYWORDS are not forbidden as properties, but merely ignored. KEYWORDS_IGNORED = [ "unit", ] +JSON_SCHEMA_ATOMIC_TYPES = [ + "string", + "boolean", + "integer", + "number" +] + def _get_listdatatype(dtype): """matches a string to check whether the type definition is a list @@ -95,6 +130,14 @@ class YamlDefinitionError(RuntimeError): super().__init__(template.format(line)) +class JsonSchemaDefinitionError(RuntimeError): + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + def __init__(self, msg): + super().__init__(msg) + + def parse_model_from_yaml(filename): """Shortcut if the Parser object is not needed.""" parser = Parser() @@ -109,8 +152,40 @@ def parse_model_from_string(string): return parser.parse_model_from_string(string) +def parse_model_from_json_schema(filename: str): + """Return a datamodel parsed from a json schema definition. + + Parameters + ---------- + filename : str + The path of the json schema file that is to be parsed + + Returns + ------- + out : Datamodel + The datamodel generated from the input schema which then can be used for + synchronizing with CaosDB. + + Note + ---- + This is an experimental feature, see ``JsonSchemaParser`` for information + about the limitations of the current implementation. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + parser = JsonSchemaParser() + + return parser.parse_model_from_json_schema(filename) + + class Parser(object): def __init__(self): + """Initialize an empty parser object and initialize the dictionary of entities and the list of + treated elements. + + """ self.model = {} self.treated = [] @@ -193,13 +268,14 @@ class Parser(object): ymlmodel["extern"] = [] for name in ymlmodel["extern"]: - if db.execute_query("COUNT Property {}".format(name)) > 0: - self.model[name] = db.execute_query( - "FIND Property WITH name={}".format(name), unique=True) - - elif db.execute_query("COUNT RecordType {}".format(name)) > 0: - self.model[name] = db.execute_query( - "FIND RecordType WITH name={}".format(name), unique=True) + if name in CAOSDB_INTERNAL_PROPERTIES: + self.model[name] = db.Property(name=name).retrieve() + continue + for role in ("Property", "RecordType", "Record", "File"): + if db.execute_query("COUNT {} {}".format(role, name)) > 0: + self.model[name] = db.execute_query( + "FIND {} WITH name={}".format(role, name), unique=True) + break else: raise Exception("Did not find {}".format(name)) @@ -251,6 +327,8 @@ class Parser(object): """ adds names of Properties and RecordTypes to the model dictionary Properties are also initialized. + + name is the key of the yaml element and definition the value. """ if name == "__line__": @@ -274,9 +352,29 @@ class Parser(object): # and create the new property self.model[name] = db.Property(name=name, datatype=definition["datatype"]) + elif (self.model[name] is None and isinstance(definition, dict) + and "role" in definition): + if definition["role"] == "RecordType": + self.model[name] = db.RecordType(name=name) + elif definition["role"] == "Record": + self.model[name] = db.Record(name=name) + elif definition["role"] == "File": + # TODO(fspreck) Implement files at some later point in time + raise NotImplementedError( + "The definition of file objects is not yet implemented.") + + # self.model[name] = db.File(name=name) + elif definition["role"] == "Property": + self.model[name] = db.Property(name=name) + else: + raise RuntimeError("Unknown role {} in definition of entity.".format( + definition["role"])) - # add other definitions recursively + # for setting values of properties directly: + if not isinstance(definition, dict): + return + # add other definitions recursively for prop_type in ["recommended_properties", "suggested_properties", "obligatory_properties"]: @@ -300,7 +398,25 @@ class Parser(object): raise def _add_to_recordtype(self, ent_name, props, importance): - """Add properties to a RecordType.""" + """Add properties to a RecordType. + + Parameters + ---------- + ent_name : str + The name of the entity to which the properties shall be added. + + props : dict [str -> dict or :doc:`Entity`] + The properties, indexed by their names. Properties may be given as :doc:`Entity` objects + or as dictionaries. + + importance + The importance as used in :doc:`Entity.add_property`. + + Returns + ------- + None + + """ for n, e in props.items(): if n in KEYWORDS: @@ -313,15 +429,28 @@ class Parser(object): continue n = self._stringify(n) - if (isinstance(e, dict) and "datatype" in e - and (_get_listdatatype(e["datatype"]) is not None)): - self.model[ent_name].add_property( - name=n, - importance=importance, - datatype=db.LIST(_get_listdatatype(e["datatype"]))) + if isinstance(e, dict): + if "datatype" in e and _get_listdatatype(e["datatype"]) is not None: + # Reuse the existing datatype for lists. + datatype = db.LIST(_get_listdatatype(e["datatype"])) + else: + # Ignore a possible e["datatype"] here if it's not a list + # since it has been treated in the definition of the + # property (entity) already + datatype = None + if "value" in e: + value = e["value"] + else: + value = None + else: - self.model[ent_name].add_property(name=n, - importance=importance) + value = e + datatype = None + + self.model[ent_name].add_property(name=n, + value=value, + importance=importance, + datatype=datatype) def _inherit(self, name, prop, inheritance): if not isinstance(prop, list): @@ -344,6 +473,10 @@ class Parser(object): if definition is None: return + # for setting values of properties directly: + if not isinstance(definition, dict): + return + if ("datatype" in definition and definition["datatype"].startswith("LIST")): @@ -360,6 +493,9 @@ class Parser(object): if prop_name == "unit": self.model[name].unit = prop + elif prop_name == "value": + self.model[name].value = prop + elif prop_name == "description": self.model[name].description = prop @@ -388,6 +524,10 @@ class Parser(object): elif prop_name == "datatype": continue + # role has already been used + elif prop_name == "role": + continue + elif prop_name == "inherit_from_obligatory": self._inherit(name, prop, db.OBLIGATORY) elif prop_name == "inherit_from_recommended": @@ -448,7 +588,8 @@ class Parser(object): continue - raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype)) + raise ValueError("Property {} has an unknown datatype: {}".format( + value.name, value.datatype)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ @@ -458,6 +599,202 @@ class Parser(object): self.model[key] = db.RecordType(name=key) +class JsonSchemaParser(Parser): + """Extends the yaml parser to read in datamodels defined in a json schema. + + **EXPERIMENTAL:** While this calss can already be used to create data models + from basic json schemas, there are the following limitations and missing + features: + + * Due to limitations of json-schema itself, we currently do not support + inheritance in the imported data models + * The same goes for suggested properties of RecordTypes + * Currently, ``$defs`` and ``$ref`` in the input schema are not resolved. + * Already defined RecordTypes and (scalar) Properties can't be re-used as + list properties + * Reference properties that are different from the referenced RT. (Although + this is possible for list of references) + * Values + * Roles + * The extern keyword from the yaml parser + * Currently, a json-schema cannot be transformed into a data model if its + root element isn't a RecordType (or Property) with ``title`` and ``type``. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + + def parse_model_from_json_schema(self, filename: str): + """Return a datamodel created from the definition in the json schema in + `filename`. + + Parameters + ---------- + filename : str + The path to the json-schema file containing the datamodel definition + + Returns + ------- + out : DataModel + The created DataModel + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + with open(filename, 'r') as schema_file: + model_dict = json.load(schema_file) + + return self._create_model_from_dict(model_dict) + + def _create_model_from_dict(self, model_dict: [dict, List[dict]]): + """Parse a dictionary read in from the model definition in a json schema and + return the Datamodel created from it. + + Parameters + ---------- + model_dict : dict or list[dict] + One or several dictionaries read in from a json-schema file + + Returns + ------- + our : DataModel + The datamodel defined in `model_dict` + """ + # @review Timm Fitschen 2022-02-30 + if isinstance(model_dict, dict): + model_dict = [model_dict] + + for ii, elt in enumerate(model_dict): + if "title" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `title` key word") + if "type" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `type` key word") + # Check if this is a valid Json Schema + try: + jsonschema.Draft202012Validator.check_schema(elt) + except jsonschema.SchemaError as err: + raise JsonSchemaDefinitionError( + f"Json Schema error in {elt['title']}:\n{str(err)}") from err + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + + return DataModel(self.model.values()) + + def _get_atomic_datatype(self, elt): + # @review Timm Fitschen 2022-02-30 + if elt["type"] == "string": + if "format" in elt and elt["format"] in ["date", "date-time"]: + return db.DATETIME + else: + return db.TEXT + elif elt["type"] == "integer": + return db.INTEGER + elif elt["type"] == "number": + return db.DOUBLE + elif elt["type"] == "boolean": + return db.BOOLEAN + else: + raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") + + def _treat_element(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + force_list = False + if name in self.model: + return self.model[name], force_list + if "type" not in elt: + # Each element must have a specific type + raise JsonSchemaDefinitionError( + f"`type` is missing in element {name}.") + if "enum" in elt: + ent = self._treat_enum(elt, name) + elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: + ent = db.Property( + name=name, datatype=self._get_atomic_datatype(elt)) + elif elt["type"] == "object": + ent = self._treat_record_type(elt, name) + elif elt["type"] == "array": + ent, force_list = self._treat_list(elt, name) + else: + raise NotImplementedError( + f"Cannot parse items of type '{elt['type']}' (yet).") + if "description" in elt and ent.description is None: + # There is a description and it hasn't been set by another + # treat_something function + ent.description = elt["description"] + + self.model[name] = ent + return ent, force_list + + def _treat_record_type(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + rt = db.RecordType(name=name) + if "required" in elt: + required = elt["required"] + else: + required = [] + if "properties" in elt: + for key, prop in elt["properties"].items(): + if "title" in prop: + name = self._stringify(prop["title"]) + else: + name = self._stringify(key) + prop_ent, force_list = self._treat_element(prop, name) + importance = db.OBLIGATORY if key in required else db.RECOMMENDED + if not force_list: + rt.add_property(prop_ent, importance=importance) + else: + # Special case of rt used as a list property + rt.add_property(prop_ent, importance=importance, + datatype=db.LIST(prop_ent)) + + if "description" in elt: + rt.description = elt["description"] + return rt + + def _treat_enum(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + if "type" in elt and elt["type"] == "integer": + raise NotImplementedError( + "Integer-enums are not allowd until " + "https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 " + "has been fixed." + ) + rt = db.RecordType(name=name) + for enum_elt in elt["enum"]: + rec = db.Record(name=self._stringify(enum_elt)) + rec.add_parent(rt) + self.model[enum_elt] = rec + + return rt + + def _treat_list(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + + if not "items" in elt: + raise JsonSchemaDefinitionError( + f"The definition of the list items is missing in {elt}.") + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if not "title" in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + + if __name__ == "__main__": model = parse_model_from_yaml('data_model.yml') print(model) diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst index dcf4c5d6c7a674bd8d32d92df0a509e511af26f5..476e92829238a0fc9dac851c61790c022e9fcde9 100644 --- a/src/doc/yaml_interface.rst +++ b/src/doc/yaml_interface.rst @@ -50,7 +50,7 @@ This example defines 3 ``RecordType``s: - A Person with a ``firstName`` and a ``lastName`` (as recommended properties) - A ``LabbookEntry`` with multiple recommended properties of different data types - It is assumed that the server knows a RecordType or Property with the name - "Textfile". + ``Textfile``. One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB: @@ -66,7 +66,8 @@ Note the difference between the three property declarations of ``LabbookEntry``: - ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above. - ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``. -If the data model depends on already existing parts, those can be added using the ``extern`` keyword. +If the data model depends on record types or properties which already exist in CaosDB, those can be +added using the ``extern`` keyword: ``extern`` takes a list of previously defined names. Datatypes --------- diff --git a/tox.ini b/tox.ini index c0c976fde9aef8c0883161b3d1b955e2a7919cf1..9fc4f1d75e841806c95cca626051699552231986 100644 --- a/tox.ini +++ b/tox.ini @@ -10,3 +10,6 @@ deps=pandas xlrd == 1.2 h5py commands=py.test --cov=caosadvancedtools -vv {posargs} + +[flake8] +max-line-length=100 diff --git a/unittests/json-schema-models/datamodel_atomic_properties.schema.json b/unittests/json-schema-models/datamodel_atomic_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..3828f131180a839d5c9b8bc5aa1a1285717da723 --- /dev/null +++ b/unittests/json-schema-models/datamodel_atomic_properties.schema.json @@ -0,0 +1,24 @@ +[ + { + "title": "Dataset1", + "description": "Some description", + "type": "object", + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "number_prop": { "type": "number", "description": "Some float property" } + }, + "required": [ "title", "number_prop" ] + }, + { + "title": "Dataset2", + "type": "object", + "properties": { + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } + } +] diff --git a/unittests/json-schema-models/datamodel_enum_prop.schema.json b/unittests/json-schema-models/datamodel_enum_prop.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..a14008d141606368519c0caadc30b16a1dc9d16d --- /dev/null +++ b/unittests/json-schema-models/datamodel_enum_prop.schema.json @@ -0,0 +1,16 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "license": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + }, + "number_enum": { + "type": "number", + "enum": [1.1, 2.2, 3.3] + } + }, + "required": ["license"] +} diff --git a/unittests/json-schema-models/datamodel_int_enum_broken.schema.json b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..159b84ac36c26325b59cdd25d2830152c4acdaaa --- /dev/null +++ b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json @@ -0,0 +1,11 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "int_enum": { + "type": "integer", + "enum": [1, 2, 3] + } + } +} diff --git a/unittests/json-schema-models/datamodel_list_properties.schema.json b/unittests/json-schema-models/datamodel_list_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..b95f468a1c13f1912266e65f029654077ce6a14e --- /dev/null +++ b/unittests/json-schema-models/datamodel_list_properties.schema.json @@ -0,0 +1,46 @@ +{ + "title": "Dataset", + "description": "Dataset with list (array) properties", + "type": "object", + "properties": { + "keywords": { + "type": "array", + "items": { "type": "string" } + }, + "booleans": { + "type": "array", + "items": { "type": "boolean" } + }, + "integers": { + "type": "array", + "items": { "type": "integer" } + }, + "floats": { + "type": "array", + "items": { "type": "number" } + }, + "datetimes": { + "type": "array", + "items": { "type": "string", "format": "date-time" } + }, + "dates": { + "type": "array", + "items": { "type": "string", "format": "date" } + }, + "reference": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "reference_with_name": { + "type": "array", + "items": { "type": "object", "title": "event", "properties": {} } + }, + "license": { + "type": "array", + "items": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + } + } + } +} diff --git a/unittests/json-schema-models/datamodel_missing_property_type.schema.json b/unittests/json-schema-models/datamodel_missing_property_type.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..eac3cc563df587568c4e9610d72618610566beef --- /dev/null +++ b/unittests/json-schema-models/datamodel_missing_property_type.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "method": { "description": "Missing property type" } + } +} diff --git a/unittests/json-schema-models/datamodel_references.schema.json b/unittests/json-schema-models/datamodel_references.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..6b79a9bcdbbd8beaf9974a600e9c5ff30cb513f4 --- /dev/null +++ b/unittests/json-schema-models/datamodel_references.schema.json @@ -0,0 +1,24 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + "properties": { + "event": { + "type": "object", + "properties": { + "longitude": { + "type": "number" + }, + "latitude": { + "type": "number" + }, + "location": { + "type": "string", + "description": "geographical location (e.g., North Sea; Espoo, Finland)" + } + }, + "required": ["longitude", "latitude"] + } + }, + "required": ["event"] +} diff --git a/unittests/json-schema-models/datamodel_required_no_list.schema.json b/unittests/json-schema-models/datamodel_required_no_list.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..f3697a71320bc8baf05156bec2c71f3915378654 --- /dev/null +++ b/unittests/json-schema-models/datamodel_required_no_list.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "required": "Dataset" +} diff --git a/unittests/json-schema-models/datamodel_string_properties.schema.json b/unittests/json-schema-models/datamodel_string_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..62bc0a2a4250050e5433038bf61e7c9692bb0200 --- /dev/null +++ b/unittests/json-schema-models/datamodel_string_properties.schema.json @@ -0,0 +1,14 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "method": { "type": "string", "description": "FIXME" }, + "titled": { "title": "The title", "type": "string", "description": "None" } + }, + + "required": ["title"] +} diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..4b44f6efa1cda19c04ee13a6a50b04cefbff9177 --- /dev/null +++ b/unittests/test_json_schema_model_parser.py @@ -0,0 +1,342 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +# @review Daniel Hornung 2022-02-18 + +import os +import pytest + +import caosdb as db +from caosadvancedtools.models.parser import (parse_model_from_json_schema, + JsonSchemaDefinitionError) + +FILEPATH = os.path.join(os.path.dirname( + os.path.abspath(__file__)), 'json-schema-models') + + +def test_rt_with_string_properties(): + """Test datamodel parsing of datamodel_string_properties.schema.json""" + # @author Florian Spreckelsen + # @date 2022-02-17 + + model = parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_string_properties.schema.json")) + assert "Dataset" in model + dataset_rt = model["Dataset"] + assert isinstance(dataset_rt, db.RecordType) + assert dataset_rt.name == "Dataset" + assert dataset_rt.description == "" + assert len(dataset_rt.get_properties()) == 4 + + assert dataset_rt.get_property("title") is not None + assert dataset_rt.get_property("campaign") is not None + assert dataset_rt.get_property("method") is not None + + assert dataset_rt.get_property("The title") is not None + assert dataset_rt.get_property("titled") is None + + title_prop = dataset_rt.get_property("title") + assert title_prop.datatype == db.TEXT + assert dataset_rt.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = dataset_rt.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert dataset_rt.get_importance(campaign_prop.name) == db.RECOMMENDED + + method_prop = dataset_rt.get_property("method") + assert method_prop.datatype == db.TEXT + assert dataset_rt.get_importance(method_prop.name) == db.RECOMMENDED + + +def test_datamodel_with_atomic_properties(): + """Test read-in of two separate record types with atomic-typed properties.""" + # @author Florian Spreckelsen + # @date 2022-02-18 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_atomic_properties.schema.json")) + assert "Dataset1" in model + assert "Dataset2" in model + + rt1 = model["Dataset1"] + assert isinstance(rt1, db.RecordType) + assert rt1.name == "Dataset1" + assert rt1.description == "Some description" + assert len(rt1.get_properties()) == 3 + + assert rt1.get_property("title") is not None + assert rt1.get_property("campaign") is not None + assert rt1.get_property("number_prop") is not None + + title_prop = rt1.get_property("title") + assert title_prop.datatype == db.TEXT + assert rt1.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = rt1.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED + + float_prop = rt1.get_property("number_prop") + assert float_prop.datatype == db.DOUBLE + assert rt1.get_importance(float_prop.name) == db.OBLIGATORY + + rt2 = model["Dataset2"] + assert isinstance(rt2, db.RecordType) + assert rt2.name == "Dataset2" + assert not rt2.description + assert len(rt2.get_properties()) == 5 + + date_prop = rt2.get_property("date") + assert date_prop.datatype == db.DATETIME + + datetime_prop = rt2.get_property("date_time") + assert date_prop.datatype == db.DATETIME + + int_prop = rt2.get_property("integer") + assert int_prop.datatype == db.INTEGER + assert int_prop.description == "Some integer property" + + bool_prop = rt2.get_property("boolean") + assert bool_prop.datatype == db.BOOLEAN + + float_prop2 = rt2.get_property("number_prop") + assert float_prop.datatype == float_prop2.datatype + + +def test_required_no_list(): + """Exception must be raised when "required" is not a list.""" + # @author Daniel Hornung + # @date 2022-02-18 + + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_required_no_list.schema.json")) + assert "'Dataset' is not of type 'array'" in str(err.value) + + +def test_missing_property_type(): + """Exception must be raised when "type" is missing.""" + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_missing_property_type.schema.json")) + assert "`type` is missing" in str(err.value) + + +def test_enum(): + """Enums are represented in references to records of a specific type.""" + # @author Florian Spreckelsen + # @date 2022-03-16 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_enum_prop.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + for name in ["Dataset", "license"] + licenses: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("license") is not None + assert model["Dataset"].get_property("license").is_reference() + assert model["Dataset"].get_property("license").datatype.name == "license" + assert isinstance(model["license"], db.RecordType) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) + + # Also allow enums with non-string types + number_enums = ["1.1", "2.2", "3.3"] + for name in ["number_enum"] + number_enums: + assert name in model + + assert isinstance(model["number_enum"], db.RecordType) + assert model["Dataset"].get_property("number_enum") is not None + assert model["Dataset"].get_property("number_enum").is_reference() + assert model["Dataset"].get_property( + "number_enum").datatype.name == "number_enum" + + for name in number_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["number_enum"]) + + +@pytest.mark.xfail(reason="Don't allow integer enums until https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 has been fixed") +def test_int_enum(): + """Check an enum property with type: integer""" + # @author Florian Spreckelsen + # @date 2022-03-22 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_int_enum_broken.schema.json")) + int_enums = ["1", "2", "3"] + for name in ["Dataset", "int_enum"] + int_enums: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("int_enum") is not None + assert model["Dataset"].get_property("int_enum").is_reference + assert model["Dataset"].get_property( + "int_enum").datatype.name == "int_enum" + assert isinstance(model["int_enum"], db.RecordType) + + for name in int_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["int_enum"]) + + +def test_references(): + """Test reference properties""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_references.schema.json")) + for name in ["Dataset", "event", "longitude", "latitude", "location"]: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("event") is not None + assert model["Dataset"].get_importance("event") == db.OBLIGATORY + assert model["Dataset"].get_property("event").is_reference() + assert model["Dataset"].get_property("event").datatype.name == "event" + + assert isinstance(model["event"], db.RecordType) + assert model["event"].get_property("longitude") is not None + assert model["event"].get_importance("longitude") == db.OBLIGATORY + assert model["event"].get_property("longitude").datatype == db.DOUBLE + + assert model["event"].get_property("latitude") is not None + assert model["event"].get_importance("latitude") == db.OBLIGATORY + assert model["event"].get_property("latitude").datatype == db.DOUBLE + + assert model["event"].get_property("location") is not None + assert model["event"].get_importance("location") == db.RECOMMENDED + assert model["event"].get_property("location").datatype == db.TEXT + + assert isinstance(model["longitude"], db.Property) + assert model["longitude"].datatype == db.DOUBLE + + assert isinstance(model["latitude"], db.Property) + assert model["latitude"].datatype == db.DOUBLE + + assert isinstance(model["location"], db.Property) + assert model["location"].datatype == db.TEXT + assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)" + + +def test_list(): + """Test list properties with all possible datatypes.""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_list_properties.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + names = ["Dataset", "keywords", "booleans", "integers", "floats", + "datetimes", "dates", "reference", "reference_with_name", "event", + "license"] + for name in names + licenses: + assert name in model + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("keywords") is not None + assert dataset_rt.get_property("keywords").datatype == db.LIST(db.TEXT) + assert isinstance(model["keywords"], db.Property) + assert model["keywords"].name == "keywords" + assert model["keywords"].datatype == db.LIST(db.TEXT) + + assert dataset_rt.get_property("booleans") is not None + assert dataset_rt.get_property("booleans").datatype == db.LIST(db.BOOLEAN) + assert isinstance(model["booleans"], db.Property) + assert model["booleans"].name == "booleans" + assert model["booleans"].datatype == db.LIST(db.BOOLEAN) + + assert dataset_rt.get_property("integers") is not None + assert dataset_rt.get_property("integers").datatype == db.LIST(db.INTEGER) + assert isinstance(model["integers"], db.Property) + assert model["integers"].name == "integers" + assert model["integers"].datatype == db.LIST(db.INTEGER) + + assert dataset_rt.get_property("floats") is not None + assert dataset_rt.get_property("floats").datatype == db.LIST(db.DOUBLE) + assert isinstance(model["floats"], db.Property) + assert model["floats"].name == "floats" + assert model["floats"].datatype == db.LIST(db.DOUBLE) + + assert dataset_rt.get_property("datetimes") is not None + assert dataset_rt.get_property( + "datetimes").datatype == db.LIST(db.DATETIME) + assert isinstance(model["datetimes"], db.Property) + assert model["datetimes"].name == "datetimes" + assert model["datetimes"].datatype == db.LIST(db.DATETIME) + + assert dataset_rt.get_property("dates") is not None + assert dataset_rt.get_property( + "dates").datatype == db.LIST(db.DATETIME) + assert isinstance(model["dates"], db.Property) + assert model["dates"].name == "dates" + assert model["dates"].datatype == db.LIST(db.DATETIME) + + # Simple reference list property + assert dataset_rt.get_property("reference") is not None + assert dataset_rt.get_property("reference").is_reference() + assert dataset_rt.get_property( + "reference").datatype == db.LIST("reference") + assert isinstance(model["reference"], db.RecordType) + assert model["reference"].name == "reference" + assert dataset_rt.get_property( + "reference").datatype == db.LIST(model["reference"]) + + # Reference list with name + assert dataset_rt.get_property("reference_with_name") is not None + assert dataset_rt.get_property("reference_with_name").is_reference() + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST("event") + assert isinstance(model["event"], db.RecordType) + assert model["event"].name == "event" + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST(model["event"]) + assert isinstance(model["reference_with_name"], db.Property) + assert model["reference_with_name"].name == "reference_with_name" + assert model["reference_with_name"].datatype == db.LIST(model["event"]) + + # References to enum types + assert dataset_rt.get_property("license") is not None + assert dataset_rt.get_property("license").is_reference() + assert dataset_rt.get_property("license").datatype == db.LIST("license") + assert isinstance(model["license"], db.RecordType) + assert model["license"].name == "license" + assert dataset_rt.get_property( + "license").datatype == db.LIST(model["license"]) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) diff --git a/unittests/test_parser.py b/unittests/test_yaml_model_parser.py similarity index 65% rename from unittests/test_parser.py rename to unittests/test_yaml_model_parser.py index 85e6b7e5fe5f0337ac1ae5a711f50484866d98b3..74af36ff46259595201f2ba796d76c16f4b24824 100644 --- a/unittests/test_parser.py +++ b/unittests/test_yaml_model_parser.py @@ -1,5 +1,7 @@ import unittest +from datetime import date from tempfile import NamedTemporaryFile +from pytest import raises import jsonschema @@ -17,6 +19,8 @@ def to_file(string): return f.name +# TODO: check purpose of this function... add documentation + def parse_str(string): parse_model_from_yaml(to_file(string)) @@ -70,7 +74,8 @@ RT2: a: """ - self.assertRaises(TwiceDefinedException, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises(TwiceDefinedException, + lambda: parse_model_from_yaml(to_file(string))) def test_typical_case(self): string = """ @@ -105,7 +110,8 @@ RT5: - RT1: - RT2: """ - self.assertRaises(jsonschema.exceptions.ValidationError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_unknown_kwarg(self): string = """ @@ -113,7 +119,8 @@ RT1: datetime: p1: """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_definition_in_inheritance(self): string = """ @@ -123,7 +130,8 @@ RT2: - RT1: description: "tach" """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_inheritance(self): string = """ @@ -303,6 +311,8 @@ class ExternTest(unittest.TestCase): class ErrorMessageTest(unittest.TestCase): """Tests for understandable error messages.""" + # Note: This was changed with implementation of role keyword + @unittest.expectedFailure def test_non_dict(self): """When a value is given, where a list or mapping is expected.""" recordtype_value = """ @@ -330,3 +340,139 @@ A: with self.assertRaises(YamlDefinitionError) as yde: parse_str(string) assert("line {}".format(line) in yde.exception.args[0]) + + +def test_define_role(): + model = """ +A: + role: Record +""" + entities = parse_model_from_string(model) + assert "A" in entities + assert isinstance(entities["A"], db.Record) + assert entities["A"].role == "Record" + + model = """ +A: + role: Record + inherit_from_obligatory: + - C + obligatory_properties: + b: +b: + datatype: INTEGER +C: + obligatory_properties: + b: +D: + role: RecordType +""" + entities = parse_model_from_string(model) + for l, ent in (("A", "Record"), ("b", "Property"), + ("C", "RecordType"), ("D", "RecordType")): + assert l in entities + assert isinstance(entities[l], getattr(db, ent)) + assert entities[l].role == ent + + assert entities["A"].parents[0].name == "C" + assert entities["A"].name == "A" + + assert entities["A"].properties[0].name == "b" + assert entities["A"].properties[0].value is None + + assert entities["C"].properties[0].name == "b" + assert entities["C"].properties[0].value is None + + model = """ +A: + role: Record + obligatory_properties: + b: 42 +b: + datatype: INTEGER +""" + + entities = parse_model_from_string(model) + assert entities["A"].get_property("b").value == 42 + assert entities["b"].value is None + + model = """ +b: + datatype: INTEGER + value: 18 +""" + entities = parse_model_from_string(model) + assert entities["b"].value == 18 + + +def test_issue_72(): + """Tests for + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/72 + + In some cases, faulty values would be read in for properties without a + specified value. + + """ + model = """ +Experiment: + obligatory_properties: + date: + datatype: DATETIME + description: 'date of the experiment' + identifier: + datatype: TEXT + description: 'identifier of the experiment' + temperature: + datatype: DOUBLE + description: 'temp' +TestExperiment: + role: Record + inherit_from_obligatory: + - Experiment + obligatory_properties: + date: 2022-03-02 + identifier: Test + temperature: 23 + recommended_properties: + additional_prop: + datatype: INTEGER + value: 7 +""" + entities = parse_model_from_string(model) + assert "Experiment" in entities + assert "date" in entities + assert "identifier" in entities + assert "temperature" in entities + assert "TestExperiment" in entities + assert "additional_prop" in entities + assert isinstance(entities["Experiment"], db.RecordType) + + assert entities["Experiment"].get_property("date") is not None + # No value is set, so this has to be None + assert entities["Experiment"].get_property("date").value is None + + assert entities["Experiment"].get_property("identifier") is not None + assert entities["Experiment"].get_property("identifier").value is None + + assert entities["Experiment"].get_property("temperature") is not None + assert entities["Experiment"].get_property("temperature").value is None + + test_rec = entities["TestExperiment"] + assert isinstance(test_rec, db.Record) + assert test_rec.get_property("date").value == date(2022, 3, 2) + assert test_rec.get_property("identifier").value == "Test" + assert test_rec.get_property("temperature").value == 23 + assert test_rec.get_property("additional_prop").value == 7 + + +def test_file_role(): + """Not implemented for now, see + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/74. + + """ + model = """ +F: + role: File +""" + with raises(NotImplementedError): + entities = parse_model_from_string(model)