Skip to content
Snippets Groups Projects

Extend json-schema model parser

Merged Florian Spreckelsen requested to merge f-enhance-json-parser into dev
3 unresolved threads
Files
5
@@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value.
Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
to be a list with the names. Here, NO NEW entities can be defined.
"""
import json
import argparse
import json
import jsonref
import re
import sys
import yaml
@@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [
"string",
"boolean",
"integer",
"number"
"number",
"null"
]
@@ -152,13 +154,17 @@ def parse_model_from_string(string):
return parser.parse_model_from_string(string)
def parse_model_from_json_schema(filename: str):
def parse_model_from_json_schema(filename: str, top_level_recordtype: bool = True):
"""Return a datamodel parsed from a json schema definition.
Parameters
----------
filename : str
The path of the json schema file that is to be parsed
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
Returns
-------
@@ -177,7 +183,7 @@ def parse_model_from_json_schema(filename: str):
# @review Daniel Hornung 2022-02-18
parser = JsonSchemaParser()
return parser.parse_model_from_json_schema(filename)
return parser.parse_model_from_json_schema(filename, top_level_recordtype)
class Parser(object):
@@ -623,7 +629,7 @@ class JsonSchemaParser(Parser):
# @date 2022-02-17
# @review Timm Fitschen 2022-02-30
def parse_model_from_json_schema(self, filename: str):
def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True):
"""Return a datamodel created from the definition in the json schema in
`filename`.
@@ -631,6 +637,9 @@ class JsonSchemaParser(Parser):
----------
filename : str
The path to the json-schema file containing the datamodel definition
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
Returns
-------
@@ -641,11 +650,11 @@ class JsonSchemaParser(Parser):
# @date 2022-02-17
# @review Timm Fitschen 2022-02-30
with open(filename, 'r') as schema_file:
model_dict = json.load(schema_file)
model_dict = jsonref.load(schema_file)
return self._create_model_from_dict(model_dict)
return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype)
def _create_model_from_dict(self, model_dict: [dict, List[dict]]):
def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True):
"""Parse a dictionary and return the Datamodel created from it.
The dictionary was typically created from the model definition in a json schema file.
@@ -654,6 +663,9 @@ class JsonSchemaParser(Parser):
----------
model_dict : dict or list[dict]
One or several dictionaries read in from a json-schema file
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
Returns
-------
@@ -665,23 +677,52 @@ class JsonSchemaParser(Parser):
model_dict = [model_dict]
for ii, elt in enumerate(model_dict):
if "title" not in elt:
raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `title` key word")
if "type" not in elt:
raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `type` key word")
# Check if this is a valid Json Schema
try:
jsonschema.Draft202012Validator.check_schema(elt)
except jsonschema.SchemaError as err:
key = elt["title"] if "title" in elt else f"element {ii}"
raise JsonSchemaDefinitionError(
f"Json Schema error in {elt['title']}:\n{str(err)}") from err
name = self._stringify(elt["title"], context=elt)
self._treat_element(elt, name)
f"Json Schema error in {key}:\n{str(err)}") from err
if top_level_recordtype:
if "title" not in elt:
raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `title` key word")
if "type" not in elt:
raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `type` key word")
# Check if this is a valid Json Schema
name = self._stringify(elt["title"], context=elt)
self._treat_element(elt, name)
elif "properties" in elt or "patternProperties":
# No top-level type but there are entities
if "properties" in elt:
for key, prop in elt["properties"].items():
name = self._get_name_from_property(key, prop)
self._treat_element(prop, name)
if "patternProperties" in elt:
# See also treatment in ``_treat_record_type``. Since here,
# there is no top-level RT we use the prefix `__Pattern`,
# i.e., the resulting Record Types will be called
# `__PatternElement`.
self._treat_pattern_properties(
elt["patternProperties"], name_prefix="__Pattern")
else:
# Neither RecordType itself, nor further properties in schema,
# so nothing to do here. Maybe add something in the future.
continue
return DataModel(self.model.values())
def _get_name_from_property(self, key: str, prop: dict):
if "title" in prop:
name = self._stringify(prop["title"])
else:
name = self._stringify(key)
return name
def _get_atomic_datatype(self, elt):
# @review Timm Fitschen 2022-02-30
if elt["type"] == "string":
@@ -695,6 +736,10 @@ class JsonSchemaParser(Parser):
return db.DOUBLE
elif elt["type"] == "boolean":
return db.BOOLEAN
elif elt["type"] == "null":
# This could be any datatype since a valid json will never have a
# value in a null property. We use TEXT for convenience.
return db.TEXT
else:
raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.")
@@ -716,6 +761,11 @@ class JsonSchemaParser(Parser):
"be identified with CaosDB's name property."
)
return None, force_list
# LinkAhead suports null for all types, so in the very special case of
# `"type": ["null", "<other_type>"]`, only consider the other type:
if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]:
elt["type"].remove("null")
elt["type"] = elt["type"][0]
if "enum" in elt:
ent = self._treat_enum(elt, name)
elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES:
@@ -725,6 +775,9 @@ class JsonSchemaParser(Parser):
ent = self._treat_record_type(elt, name)
elif elt["type"] == "array":
ent, force_list = self._treat_list(elt, name)
elif elt["type"] == "null":
# null
return None, force_list
else:
raise NotImplementedError(
f"Cannot parse items of type '{elt['type']}' (yet).")
@@ -745,10 +798,7 @@ class JsonSchemaParser(Parser):
required = []
if "properties" in elt:
for key, prop in elt["properties"].items():
if "title" in prop:
name = self._stringify(prop["title"])
else:
name = self._stringify(key)
name = self._get_name_from_property(key, prop)
prop_ent, force_list = self._treat_element(prop, name)
if prop_ent is None:
# Nothing to be appended since the property has to be
@@ -762,6 +812,17 @@ class JsonSchemaParser(Parser):
rt.add_property(prop_ent, importance=importance,
datatype=db.LIST(prop_ent))
if "patternProperties" in elt:
pattern_property_rts = self._treat_pattern_properties(
elt["patternProperties"], name_prefix=name)
for ppr in pattern_property_rts:
# add reference to pattern property type. These can never be
# obligatory since pattern properties cannot be required in the
# original schema (since their actual names are not known a
# priori).
rt.add_property(ppr)
if "description" in elt:
rt.description = elt["description"]
return rt
@@ -806,6 +867,62 @@ class JsonSchemaParser(Parser):
self.model[ref_rt.name] = ref_rt
return db.Property(name=name, datatype=db.LIST(ref_rt)), False
def _get_pattern_prop(self):
if "__pattern_property_pattern_property" in self.model:
return self.model["__pattern_property_pattern_property"]
pp = db.Property(name="__matched_pattern", datatype=db.TEXT)
self.model["__pattern_property_pattern_property"] = pp
return pp
def _treat_pattern_properties(self, pattern_elements, name_prefix=""):
"""Special Treatment for pattern properties: A RecordType is created for
each pattern property. In case of a `type: object` PatternProperty, the
remaining properties of the JSON entry are appended to the new
RecordType; in case of an atomic type PatternProperty, a single value
Property is added to the RecordType.
Raises
------
NotImplementedError
In case of patternProperties with non-object, non-atomic type, e.g.,
array.
"""
num_patterns = len(pattern_elements)
pattern_prop = self._get_pattern_prop()
returns = []
for ii, (key, element) in enumerate(pattern_elements.items()):
name_suffix = f"_{ii+1}" if num_patterns > 1 else ""
name = name_prefix + "Entry" + name_suffix
if element["type"] == "object":
# simple, is already an object, so can be treated like any other
# record type.
pattern_type = self._treat_record_type(element, name)
elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES:
# create a property that stores the actual value of the pattern
# property.
propname = f"{name}_value"
prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element))
self.model[propname] = prop
pattern_type = db.RecordType(name=name)
pattern_type.add_property(prop)
else:
raise NotImplementedError(
"Pattern properties are currently only supported for types " +
", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.")
# Add pattern property and description
pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY)
if pattern_type.description:
pattern_type.description += f"\n\npattern: {key}"
else:
pattern_type.description = f"pattern: {key}"
self.model[name] = pattern_type
returns.append(pattern_type)
return returns
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__,
Loading