Skip to content
Snippets Groups Projects

F json schema datamodel

Merged Florian Spreckelsen requested to merge f-json-schema-datamodel into dev
Compare and
9 files
+ 337
3
Compare changes
  • Side-by-side
  • Inline
Files
9
 
# This file is a part of the CaosDB Project.
 
#
 
# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
 
# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
 
# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com>
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as
 
# published by the Free Software Foundation, either version 3 of the
 
# License, or (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program. If not, see <https://www.gnu.org/licenses/>.
 
"""
"""
This module (and script) provides methods to read a DataModel from a YAML file.
This module (and script) provides methods to read a DataModel from a YAML file.
@@ -16,11 +35,15 @@ not defined, simply the name can be supplied with no value.
@@ -16,11 +35,15 @@ not defined, simply the name can be supplied with no value.
Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
to be a list with the names. Here, NO NEW entities can be defined.
to be a list with the names. Here, NO NEW entities can be defined.
"""
"""
 
import json
import re
import re
import sys
import sys
 
import yaml
 
 
from typing import List
 
import jsonschema
import caosdb as db
import caosdb as db
import yaml
from .data_model import DataModel
from .data_model import DataModel
@@ -93,6 +116,14 @@ class YamlDefinitionError(RuntimeError):
@@ -93,6 +116,14 @@ class YamlDefinitionError(RuntimeError):
super().__init__(template.format(line))
super().__init__(template.format(line))
 
class JsonSchemaDefinitionError(RuntimeError):
 
# @author Florian Spreckelsen
 
# @date 2022-02-17
 
# @review Daniel Hornung 2022-02-18
 
def __init__(self, msg):
 
super().__init__(msg)
 
 
def parse_model_from_yaml(filename):
def parse_model_from_yaml(filename):
"""Shortcut if the Parser object is not needed."""
"""Shortcut if the Parser object is not needed."""
parser = Parser()
parser = Parser()
@@ -107,6 +138,16 @@ def parse_model_from_string(string):
@@ -107,6 +138,16 @@ def parse_model_from_string(string):
return parser.parse_model_from_string(string)
return parser.parse_model_from_string(string)
 
def parse_model_from_json_schema(filename: str):
 
"""Return a datamodel parsed from a json schema definition."""
 
# @author Florian Spreckelsen
 
# @date 2022-02-17
 
# @review Daniel Hornung 2022-02-18
 
parser = JsonSchemaParser()
 
 
return parser.parse_model_from_json_schema(filename)
 
 
class Parser(object):
class Parser(object):
def __init__(self):
def __init__(self):
self.model = {}
self.model = {}
@@ -432,7 +473,8 @@ class Parser(object):
@@ -432,7 +473,8 @@ class Parser(object):
continue
continue
raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype))
raise ValueError("Property {} has an unknown datatype: {}".format(
 
value.name, value.datatype))
def _set_recordtypes(self):
def _set_recordtypes(self):
""" properties are defined in first iteration; set remaining as RTs """
""" properties are defined in first iteration; set remaining as RTs """
@@ -442,6 +484,110 @@ class Parser(object):
@@ -442,6 +484,110 @@ class Parser(object):
self.model[key] = db.RecordType(name=key)
self.model[key] = db.RecordType(name=key)
 
class JsonSchemaParser(Parser):
 
"""Extends the yaml parser to read in datamodels defined in a json schema."""
 
# @author Florian Spreckelsen
 
# @date 2022-02-17
 
# @review Daniel Hornung 2022-02-18
 
 
def parse_model_from_json_schema(self, filename: str):
 
"""Return a datamodel created from the definition in the json schema in
 
`filename`.
 
 
Parameters
 
----------
 
filename : str
 
The path to the json-schema file containing the datamodel definition
 
 
Returns
 
-------
 
out : DataModel
 
The created DataModel
 
"""
 
# @author Florian Spreckelsen
 
# @date 2022-02-17
 
with open(filename, 'r') as schema_file:
 
model_dict = json.load(schema_file)
 
 
return self._create_model_from_dict(model_dict)
 
 
def _create_model_from_dict(self, model_dict: [dict, List[dict]]):
 
"""Parse a dictionary read in from the model definition in a json schema and
 
return the Datamodel created from it.
 
 
Parameters
 
----------
 
model_dict : dict or list[dict]
 
One or several dictionaries read in from a json-schema file
 
 
Returns
 
-------
 
our : DataModel
 
The datamodel defined in `model_dict`
 
"""
 
if isinstance(model_dict, dict):
 
model_dict = [model_dict]
 
 
for ii, elt in enumerate(model_dict):
 
if "title" not in elt:
 
raise JsonSchemaDefinitionError(f"Object {ii+1} is lacking the `title` key word")
 
if "type" not in elt:
 
raise JsonSchemaDefinitionError(f"Object {ii+1} is lacking the `type` key word")
 
# Check if this is a valid Json Schema
 
try:
 
jsonschema.Draft202012Validator.check_schema(elt)
 
except jsonschema.SchemaError as err:
 
raise JsonSchemaDefinitionError(
 
f"Json Schema error in {elt['title']}:\n{str(err)}") from err
 
name = self._stringify(elt["title"], context=elt)
 
self._treat_element(elt, name)
 
 
return DataModel(self.model.values())
 
 
def _treat_element(self, elt: dict, name: str):
 
if name in self.model:
 
return self.model[name]
 
if elt["type"] == "string":
 
if "format" in elt and elt["format"] == "date-time":
 
# Treat datetime strings separately
 
ent = db.Property(name=name, datatype=db.DATETIME)
 
else:
 
ent = db.Property(name=name, datatype=db.TEXT)
 
elif elt["type"] == "integer":
 
ent = db.Property(name=name, datatype=db.INTEGER)
 
elif elt["type"] == "number":
 
ent = db.Property(name=name, datatype=db.DOUBLE)
 
elif elt["type"] == "boolean":
 
ent = db.Property(name=name, datatype=db.BOOLEAN)
 
elif elt["type"] == "object":
 
ent = self._treat_record_type(elt, name)
 
else:
 
raise NotImplementedError(f"Cannot parse items of type '{elt['type']}' (yet).")
 
if "description" in elt:
 
ent.description = elt["description"]
 
 
self.model[name] = ent
 
return ent
 
 
def _treat_record_type(self, elt: dict, name: str):
 
rt = db.RecordType(name=name)
 
if "required" in elt:
 
required = elt["required"]
 
else:
 
required = []
 
if "properties" in elt:
 
for key, prop in elt["properties"].items():
 
if "title" in prop:
 
name = self._stringify(prop["title"])
 
else:
 
name = self._stringify(key)
 
prop_ent = self._treat_element(prop, name)
 
importance = db.OBLIGATORY if key in required else db.RECOMMENDED
 
rt.add_property(prop_ent, importance=importance)
 
 
return rt
 
 
if __name__ == "__main__":
if __name__ == "__main__":
model = parse_model_from_yaml('data_model.yml')
model = parse_model_from_yaml('data_model.yml')
print(model)
print(model)
Loading