Skip to content
Snippets Groups Projects
Commit f5cd9aa6 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-more-jsonschema-export' into 'dev'

ENH: JsonSchemaExporter accepts do_not_create parameter.

See merge request !82
parents d605fd58 0af91939
No related branches found
No related tags found
2 merge requests!89ENH: JsonSchemaExporter accepts do_not_create parameter.,!82ENH: JsonSchemaExporter accepts do_not_create parameter.
Pipeline #43214 passed
...@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
added. added.
* The `json_schema_exporter` module which introduces tools to create a json * The `json_schema_exporter` module which introduces tools to create a json
schema from a RecordType, e.g., for the usage in web forms. schema from a RecordType, e.g., for the usage in web forms.
* `DataModel.get_deep(name: str)` method which uses the DataModel as a kind of cache pool.
### Changed ### ### Changed ###
......
...@@ -19,55 +19,90 @@ ...@@ -19,55 +19,90 @@
# You should have received a copy of the GNU Affero General Public License along # You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
# #
"""Module for converting a data model into a json schema compatible dictionary.
"""
import re from typing import Any, List, Optional
from typing import Optional
import linkahead as db import linkahead as db
from linkahead.common.datatype import get_list_datatype, is_list_datatype from linkahead.common.datatype import get_list_datatype, is_list_datatype
def _make_required_list(rt: db.RecordType): class JsonSchemaExporter:
"""Return the list of names of properties with importance db.OBLIGATORY.""" """A class which collects everything needed for the conversion.
return [prop.name for prop in rt.properties """
if rt.get_importance(prop.name) == db.OBLIGATORY]
def _make_segment_from_prop(prop: db.Property, additional_properties: bool, def __init__(self, additional_properties: bool = True,
name_and_description_in_properties: bool, name_and_description_in_properties: bool = False,
additional_options_for_text_props: Optional[dict], additional_options_for_text_props: dict = None,
units_in_description: bool): units_in_description: bool = True,
"""Return the JSON Schema segment for the given property do_not_create: List[str] = None,
):
"""Set up a JsonSchemaExporter, which can then be applied on RecordTypes.
Parameters Parameters
---------- ----------
prop : db.Property
the property to be transformed
additional_properties : bool, optional additional_properties : bool, optional
Whether additional properties will be admitted in the resulting Whether additional properties will be admitted in the resulting
schema. Optional, default is True. schema. Optional, default is True.
name_and_description_in_properties : bool, optional name_and_description_in_properties : bool, optional
Whether to include name and description in the `properties` section of Whether to include name and description in the `properties` section of
the schema to be exported. Optional, default is False. the schema to be exported. Optional, default is False.
additional_options_for_text_props : Optional[dict] additional_options_for_text_props : dict, optional
dict of dicts that may contain the keys 'pattern' and 'format' to Dictionary containing additional "pattern" or "format" options for
further define the rules for the JSON Schema segment string-typed properties. Optional, default is empty.
units_in_description : bool units_in_description : bool, optional
Whether to store the unit of a LinkAhead property in the description of Whether to add the unit of a LinkAhead property (if it has any) to the
the corresponding json schema item or to create a separate `unit` key description of the corresponding schema entry. If set to false, an
instead. additional `unit` key is added to the schema itself which is purely
annotational and ignored, e.g., in validation. Default is True.
do_not_create : list[str]
A list of RedcordType names, for which there should be no option
to create them. Instead, only the choice of existing elements should
be given.
""" """
if not additional_options_for_text_props: if not additional_options_for_text_props:
additional_options_for_text_props = {} additional_options_for_text_props = {}
if not do_not_create:
do_not_create = []
self._additional_properties = additional_properties
self._name_and_description_in_properties = name_and_description_in_properties
self._additional_options_for_text_props = additional_options_for_text_props
self._units_in_description = units_in_description
self._do_not_create = do_not_create
@staticmethod
def _make_required_list(rt: db.RecordType):
"""Return the list of names of properties with importance db.OBLIGATORY."""
required_list = []
for prop in rt.properties:
if rt.get_importance(prop.name) != db.OBLIGATORY:
continue
prop_name = prop.name
if isinstance(prop.datatype, db.Entity):
prop_name = prop.datatype.name
required_list.append(prop_name)
return required_list
def _make_segment_from_prop(self, prop: db.Property):
"""Return the JSON Schema segment for the given property
Parameters
----------
prop : db.Property
The property to be transformed.
"""
if prop.datatype == db.TEXT or prop.datatype == db.DATETIME: if prop.datatype == db.TEXT or prop.datatype == db.DATETIME:
text_format = None text_format = None
text_pattern = None text_pattern = None
if prop.name in additional_options_for_text_props: if prop.name in self._additional_options_for_text_props:
if "pattern" in additional_options_for_text_props[prop.name]: if "pattern" in self._additional_options_for_text_props[prop.name]:
text_pattern = additional_options_for_text_props[prop.name]["pattern"] text_pattern = self._additional_options_for_text_props[prop.name]["pattern"]
if "format" in additional_options_for_text_props[prop.name]: if "format" in self._additional_options_for_text_props[prop.name]:
text_format = additional_options_for_text_props[prop.name]["format"] text_format = self._additional_options_for_text_props[prop.name]["format"]
elif prop.datatype == db.DATETIME: elif prop.datatype == db.DATETIME:
# Set the date or datetime format if only a pattern is given ... # Set the date or datetime format if only a pattern is given ...
text_format = ["date", "date-time"] text_format = ["date", "date-time"]
...@@ -76,12 +111,12 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool, ...@@ -76,12 +111,12 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
# options list. # options list.
text_format = ["date", "date-time"] text_format = ["date", "date-time"]
return _make_text_property(prop.description, text_format, text_pattern) return self._make_text_property(prop.description, text_format, text_pattern)
json_prop = {} json_prop = {}
if prop.description: if prop.description:
json_prop["description"] = prop.description json_prop["description"] = prop.description
if units_in_description and prop.unit: if self._units_in_description and prop.unit:
if "description" in json_prop: if "description" in json_prop:
json_prop["description"] += f" Unit is {prop.unit}." json_prop["description"] += f" Unit is {prop.unit}."
else: else:
...@@ -99,29 +134,34 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool, ...@@ -99,29 +134,34 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
json_prop["type"] = "array" json_prop["type"] = "array"
list_element_prop = db.Property( list_element_prop = db.Property(
name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True))
json_prop["items"] = _make_segment_from_prop( json_prop["items"] = self._make_segment_from_prop(list_element_prop)
list_element_prop, additional_properties,
name_and_description_in_properties, additional_options_for_text_props,
units_in_description
)
elif prop.is_reference(): elif prop.is_reference():
if prop.datatype == db.REFERENCE: if prop.datatype == db.REFERENCE:
# No Record creation since no RT is specified and we don't know what # No Record creation since no RT is specified and we don't know what
# schema to use, so only enum of all Records and all Files. # schema to use, so only enum of all Records and all Files.
values = _retrieve_enum_values("RECORD") + _retrieve_enum_values("FILE") values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
json_prop["enum"] = values json_prop["enum"] = values
elif prop.datatype == db.FILE: elif prop.datatype == db.FILE:
# TODO: different issue # TODO: different issue
raise NotImplementedError("Files have not been implemented yet.") raise NotImplementedError("Files have not been implemented yet.")
else: else:
values = _retrieve_enum_values(f"RECORD '{prop.datatype}'") prop_name = prop.datatype
rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'", unique=True) if isinstance(prop.datatype, db.Entity):
subschema = _make_segment_from_recordtype(rt, additional_properties, prop_name = prop.datatype.name
name_and_description_in_properties, values = self._retrieve_enum_values(f"RECORD '{prop_name}'")
additional_options_for_text_props, if prop_name in self._do_not_create:
units_in_description) # Only a simple list of values
json_prop["enum"] = values
else:
rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop_name}'",
unique=True)
subschema = self._make_segment_from_recordtype(rt)
subschema["title"] = "Create new"
json_prop["oneOf"] = [ json_prop["oneOf"] = [
{"enum": values}, {
"title": "Existing entries",
"enum": values,
},
subschema subschema
] ]
...@@ -131,9 +171,8 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool, ...@@ -131,9 +171,8 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
return json_prop return json_prop
@staticmethod
def _make_text_property(description="", text_format=None, text_pattern=None): def _make_text_property(description="", text_format=None, text_pattern=None):
prop = { prop = {
"type": "string" "type": "string"
} }
...@@ -161,7 +200,7 @@ def _make_text_property(description="", text_format=None, text_pattern=None): ...@@ -161,7 +200,7 @@ def _make_text_property(description="", text_format=None, text_pattern=None):
return prop return prop
@staticmethod
def _retrieve_enum_values(role: str): def _retrieve_enum_values(role: str):
possible_values = db.execute_query(f"SELECT name, id FROM {role}") possible_values = db.execute_query(f"SELECT name, id FROM {role}")
...@@ -175,24 +214,21 @@ def _retrieve_enum_values(role: str): ...@@ -175,24 +214,21 @@ def _retrieve_enum_values(role: str):
return vals return vals
def _make_segment_from_recordtype(self, rt: db.RecordType):
def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool = True,
name_and_description_in_properties: bool = False,
additional_options_for_text_props: Optional[dict] = None,
units_in_description: bool = True):
"""Return a Json schema segment for the given RecordType. """Return a Json schema segment for the given RecordType.
""" """
schema = { schema: dict[str, Any] = {
"type": "object" "type": "object"
} }
schema["required"] = _make_required_list(rt) schema["required"] = self._make_required_list(rt)
schema["additionalProperties"] = additional_properties schema["additionalProperties"] = self._additional_properties
props = {} props = {}
if name_and_description_in_properties: if self._name_and_description_in_properties:
props["name"] = _make_text_property("The name of the Record to be created") props["name"] = self._make_text_property("The name of the Record to be created")
props["description"] = _make_text_property("The description of the Record to be created") props["description"] = self._make_text_property(
"The description of the Record to be created")
for prop in rt.properties: for prop in rt.properties:
if prop.name in props: if prop.name in props:
...@@ -201,28 +237,54 @@ def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool ...@@ -201,28 +237,54 @@ def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool
"Creating a schema for multi-properties is not specified. " "Creating a schema for multi-properties is not specified. "
f"Property {prop.name} occurs more than once." f"Property {prop.name} occurs more than once."
) )
props[prop.name] = _make_segment_from_prop( props[prop.name] = self._make_segment_from_prop(prop)
prop, additional_properties, name_and_description_in_properties,
additional_options_for_text_props, units_in_description)
schema["properties"] = props schema["properties"] = props
return schema return schema
def recordtype_to_json_schema(self, rt: db.RecordType):
"""Create a jsonschema from a given RecordType that can be used, e.g., to
validate a json specifying a record of the given type.
Parameters
----------
rt : RecordType
The RecordType from which a json schema will be created.
Returns
-------
schema : dict
A dict containing the json schema created from the given RecordType's properties.
"""
schema = self._make_segment_from_recordtype(rt)
schema["$schema"] = "https://json-schema.org/draft/2019-09/schema"
if rt.name:
schema["title"] = rt.name
if rt.description:
schema["description"] = rt.description
return schema
def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True, def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
name_and_description_in_properties: bool = False, name_and_description_in_properties: bool = False,
additional_options_for_text_props: Optional[dict] = None, additional_options_for_text_props: Optional[dict] = None,
units_in_description: bool = True): units_in_description: bool = True,
do_not_create: List[str] = None):
"""Create a jsonschema from a given RecordType that can be used, e.g., to """Create a jsonschema from a given RecordType that can be used, e.g., to
validate a json specifying a record of the given type. validate a json specifying a record of the given type.
This is a standalone function which works without manually creating a
JsonSchemaExporter object.
Parameters Parameters
---------- ----------
rt : RecordType rt : RecordType
The RecordType from which a json schema will be created. The RecordType from which a json schema will be created.
additional_properties : bool, optional additional_properties : bool, optional
Whether additional propeties will be admitted in the resulting Whether additional properties will be admitted in the resulting
schema. Optional, default is True. schema. Optional, default is True.
name_and_description_in_properties : bool, optional name_and_description_in_properties : bool, optional
Whether to include name and description in the `properties` section of Whether to include name and description in the `properties` section of
...@@ -235,6 +297,10 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T ...@@ -235,6 +297,10 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
description of the corresponding schema entry. If set to false, an description of the corresponding schema entry. If set to false, an
additional `unit` key is added to the schema itself which is purely additional `unit` key is added to the schema itself which is purely
annotational and ignored, e.g., in validation. Default is True. annotational and ignored, e.g., in validation. Default is True.
do_not_create : list[str]
A list of RedcordType names, for which there should be no option
to create them. Instead, only the choice of existing elements should
be given.
Returns Returns
------- -------
...@@ -243,17 +309,11 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T ...@@ -243,17 +309,11 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
""" """
if additional_options_for_text_props is None: exporter = JsonSchemaExporter(
additional_options_for_text_props = {} additional_properties=additional_properties,
name_and_description_in_properties=name_and_description_in_properties,
schema = _make_segment_from_recordtype(rt, additional_properties, additional_options_for_text_props=additional_options_for_text_props,
name_and_description_in_properties, units_in_description=units_in_description,
additional_options_for_text_props, do_not_create=do_not_create,
units_in_description) )
schema["$schema"] = "https://json-schema.org/draft/2019-09/schema" return exporter.recordtype_to_json_schema(rt)
if rt.name:
schema["title"] = rt.name
if rt.description:
schema["description"] = rt.description
return schema
...@@ -262,3 +262,37 @@ class DataModel(dict): ...@@ -262,3 +262,37 @@ class DataModel(dict):
all_ents[prop.name] = prop all_ents[prop.name] = prop
return list(all_ents.values()) return list(all_ents.values())
def get_deep(self, name: str, visited: set = None):
"""Attempt to resolve references for the given ``name``.
This methods only uses data which is available in this datamodel, which acts kind of like a
cache pool.
Note that this may change this data model (subsequent "get" like calls may also return deep
content.)
"""
entity = self.get(name)
if not entity:
return entity
if not visited:
visited = set()
# new_props = []
for prop in list(entity.get_properties()): # Make a change-resistant list copy.
if prop.name in visited:
continue
visited.add(prop.name)
if prop.name in self:
deep_prop = self.get_deep(prop.name, visited=visited)
linked_prop = entity.get_property(prop)
if not linked_prop.datatype:
if deep_prop.role == "Property":
linked_prop.datatype = deep_prop.datatype
elif deep_prop.role == "RecordType":
linked_prop.datatype = deep_prop
if deep_prop.description:
linked_prop.description = deep_prop.description
else:
print(f"Referenced property \"{prop.name}\" not found in data model.")
return entity
...@@ -2,6 +2,7 @@ import unittest ...@@ -2,6 +2,7 @@ import unittest
import caosdb as db import caosdb as db
from caosadvancedtools.models.data_model import DataModel from caosadvancedtools.models.data_model import DataModel
from caosadvancedtools.models.parser import parse_model_from_string
class DataModelTest(unittest.TestCase): class DataModelTest(unittest.TestCase):
...@@ -33,3 +34,31 @@ class DataModelTest(unittest.TestCase): ...@@ -33,3 +34,31 @@ class DataModelTest(unittest.TestCase):
DataModel.sync_ids_by_name(l1, l2) DataModel.sync_ids_by_name(l1, l2)
assert l1["TestRecord"].id == rt.id assert l1["TestRecord"].id == rt.id
assert l1["TestRecord2"].id < 0 assert l1["TestRecord2"].id < 0
def test_get_deep(self):
model_recursive_str = """
RT1:
description: some description
obligatory_properties:
RT1:
"""
model_recursive = parse_model_from_string(model_recursive_str)
prop1 = model_recursive["RT1"].get_property("RT1")
assert prop1.datatype is None
# TODO The next line actually changes model_recursive in place, is this OK?
RT1 = model_recursive.get_deep("RT1")
assert model_recursive["RT1"] == RT1
model_unresolved_str = """
RT1:
description: some description
obligatory_properties:
unresolved:
"""
model_unresolved = parse_model_from_string(model_unresolved_str)
rt1_unresolved = model_unresolved["RT1"]
prop_unresolved = model_unresolved.get_deep("unresolved")
assert prop_unresolved.datatype is None
rt1_deep = model_unresolved.get_deep("RT1")
assert rt1_deep == rt1_unresolved
assert rt1_deep is rt1_unresolved
...@@ -20,6 +20,10 @@ ...@@ -20,6 +20,10 @@
# with this program. If not, see <https://www.gnu.org/licenses/>. # with this program. If not, see <https://www.gnu.org/licenses/>.
# #
"""Tests the Json schema exporter."""
import json
import linkahead as db import linkahead as db
from jsonschema import FormatChecker, validate, ValidationError from jsonschema import FormatChecker, validate, ValidationError
...@@ -27,6 +31,16 @@ from pytest import raises ...@@ -27,6 +31,16 @@ from pytest import raises
from unittest.mock import Mock, patch from unittest.mock import Mock, patch
from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs
from caosadvancedtools.models.parser import parse_model_from_string
RT1 = parse_model_from_string("""
RT1:
description: some description
obligatory_properties:
some_date:
datatype: DATETIME
description: Just some date
""").get_deep("RT1")
def _mock_execute_query(query_string, unique=False, **kwargs): def _mock_execute_query(query_string, unique=False, **kwargs):
...@@ -60,11 +74,16 @@ def _mock_execute_query(query_string, unique=False, **kwargs): ...@@ -60,11 +74,16 @@ def _mock_execute_query(query_string, unique=False, **kwargs):
return referencing_type_records return referencing_type_records
elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True: elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True:
return referencing_type_rt return referencing_type_rt
elif query_string == "SELECT name, id FROM RECORD 'RT1'":
return referencing_type_records # wrong types, but who cares for the test?
elif query_string == "FIND RECORDTYPE WITH name='RT1'" and unique is True:
return RT1
elif query_string == "SELECT name, id FROM RECORD": elif query_string == "SELECT name, id FROM RECORD":
return all_records return all_records
elif query_string == "SELECT name, id FROM FILE": elif query_string == "SELECT name, id FROM FILE":
return all_files return all_files
else: else:
print(f"Query string: {query_string}")
return db.Container() return db.Container()
...@@ -546,3 +565,119 @@ def test_broken(): ...@@ -546,3 +565,119 @@ def test_broken():
rtjs(rt) rtjs(rt)
assert "MultiProp" in str(nie) assert "MultiProp" in str(nie)
assert str(nie).startswith("Creating a schema for multi-properties is not specified.") assert str(nie).startswith("Creating a schema for multi-properties is not specified.")
@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query))
def test_reference_options():
"""Testing miscellaneous options.
"""
model_str = """
RT1:
description: some description
obligatory_properties:
some_date:
datatype: DATETIME
description: Just some date
RT2:
obligatory_properties:
RT1:
"""
model = parse_model_from_string(model_str)
# First test: without reference
rt1_dict = rtjs(model.get_deep("RT1"))
assert json.dumps(rt1_dict, indent=2) == """{
"type": "object",
"required": [
"some_date"
],
"additionalProperties": true,
"properties": {
"some_date": {
"description": "Just some date",
"anyOf": [
{
"type": "string",
"format": "date"
},
{
"type": "string",
"format": "date-time"
}
]
}
},
"$schema": "https://json-schema.org/draft/2019-09/schema",
"title": "RT1",
"description": "some description"
}"""
# Second test: with reference
rt2_deep = model.get_deep("RT2")
rt2_dict = rtjs(rt2_deep)
assert json.dumps(rt2_dict, indent=2) == """{
"type": "object",
"required": [
"RT1"
],
"additionalProperties": true,
"properties": {
"RT1": {
"description": "some description",
"oneOf": [
{
"title": "Existing entries",
"enum": [
"103",
"104, referencing"
]
},
{
"type": "object",
"required": [
"some_date"
],
"additionalProperties": true,
"properties": {
"some_date": {
"description": "Just some date",
"anyOf": [
{
"type": "string",
"format": "date"
},
{
"type": "string",
"format": "date-time"
}
]
}
},
"title": "Create new"
}
]
}
},
"$schema": "https://json-schema.org/draft/2019-09/schema",
"title": "RT2"
}"""
# Third test: Reference prop shall be only existing references, no option to create new ones.
rt2_dict = rtjs(model.get_deep("RT2"), do_not_create=["RT1"])
assert json.dumps(rt2_dict, indent=2) == """{
"type": "object",
"required": [
"RT1"
],
"additionalProperties": true,
"properties": {
"RT1": {
"description": "some description",
"enum": [
"103",
"104, referencing"
]
}
},
"$schema": "https://json-schema.org/draft/2019-09/schema",
"title": "RT2"
}"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment