diff --git a/CHANGELOG.md b/CHANGELOG.md index 3662e200959ba5604ee4fdf25962d4f39eaa6114..490d5179d256e1b17259058c433af3e9e8b0aad0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 added. * The `json_schema_exporter` module which introduces tools to create a json schema from a RecordType, e.g., for the usage in web forms. +* `DataModel.get_deep(name: str)` method which uses the DataModel as a kind of cache pool. ### Changed ### diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py index 39f660f0d6686fa6a8c2892213eeb0bf276655c7..4f331585e3bbbfd5eed3b2d22b573f2d3d60cb56 100644 --- a/src/caosadvancedtools/json_schema_exporter.py +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -19,210 +19,272 @@ # You should have received a copy of the GNU Affero General Public License along # with this program. If not, see <https://www.gnu.org/licenses/>. # +"""Module for converting a data model into a json schema compatible dictionary. +""" -import re -from typing import Optional +from typing import Any, List, Optional import linkahead as db from linkahead.common.datatype import get_list_datatype, is_list_datatype -def _make_required_list(rt: db.RecordType): - """Return the list of names of properties with importance db.OBLIGATORY.""" - return [prop.name for prop in rt.properties - if rt.get_importance(prop.name) == db.OBLIGATORY] - - -def _make_segment_from_prop(prop: db.Property, additional_properties: bool, - name_and_description_in_properties: bool, - additional_options_for_text_props: Optional[dict], - units_in_description: bool): - """Return the JSON Schema segment for the given property - - Parameters - ---------- - prop : db.Property - the property to be transformed - additional_properties : bool, optional - Whether additional properties will be admitted in the resulting - schema. Optional, default is True. - name_and_description_in_properties : bool, optional - Whether to include name and description in the `properties` section of - the schema to be exported. Optional, default is False. - additional_options_for_text_props : Optional[dict] - dict of dicts that may contain the keys 'pattern' and 'format' to - further define the rules for the JSON Schema segment - units_in_description : bool - Whether to store the unit of a LinkAhead property in the description of - the corresponding json schema item or to create a separate `unit` key - instead. +class JsonSchemaExporter: + """A class which collects everything needed for the conversion. """ - if not additional_options_for_text_props: - additional_options_for_text_props = {} - if prop.datatype == db.TEXT or prop.datatype == db.DATETIME: - text_format = None - text_pattern = None - if prop.name in additional_options_for_text_props: - if "pattern" in additional_options_for_text_props[prop.name]: - text_pattern = additional_options_for_text_props[prop.name]["pattern"] - if "format" in additional_options_for_text_props[prop.name]: - text_format = additional_options_for_text_props[prop.name]["format"] + def __init__(self, additional_properties: bool = True, + name_and_description_in_properties: bool = False, + additional_options_for_text_props: dict = None, + units_in_description: bool = True, + do_not_create: List[str] = None, + ): + """Set up a JsonSchemaExporter, which can then be applied on RecordTypes. + + Parameters + ---------- + additional_properties : bool, optional + Whether additional properties will be admitted in the resulting + schema. Optional, default is True. + name_and_description_in_properties : bool, optional + Whether to include name and description in the `properties` section of + the schema to be exported. Optional, default is False. + additional_options_for_text_props : dict, optional + Dictionary containing additional "pattern" or "format" options for + string-typed properties. Optional, default is empty. + units_in_description : bool, optional + Whether to add the unit of a LinkAhead property (if it has any) to the + description of the corresponding schema entry. If set to false, an + additional `unit` key is added to the schema itself which is purely + annotational and ignored, e.g., in validation. Default is True. + do_not_create : list[str] + A list of RedcordType names, for which there should be no option + to create them. Instead, only the choice of existing elements should + be given. + """ + if not additional_options_for_text_props: + additional_options_for_text_props = {} + if not do_not_create: + do_not_create = [] + + self._additional_properties = additional_properties + self._name_and_description_in_properties = name_and_description_in_properties + self._additional_options_for_text_props = additional_options_for_text_props + self._units_in_description = units_in_description + self._do_not_create = do_not_create + + @staticmethod + def _make_required_list(rt: db.RecordType): + """Return the list of names of properties with importance db.OBLIGATORY.""" + required_list = [] + for prop in rt.properties: + if rt.get_importance(prop.name) != db.OBLIGATORY: + continue + prop_name = prop.name + if isinstance(prop.datatype, db.Entity): + prop_name = prop.datatype.name + required_list.append(prop_name) + + return required_list + + def _make_segment_from_prop(self, prop: db.Property): + """Return the JSON Schema segment for the given property + + Parameters + ---------- + prop : db.Property + The property to be transformed. + """ + + if prop.datatype == db.TEXT or prop.datatype == db.DATETIME: + text_format = None + text_pattern = None + if prop.name in self._additional_options_for_text_props: + if "pattern" in self._additional_options_for_text_props[prop.name]: + text_pattern = self._additional_options_for_text_props[prop.name]["pattern"] + if "format" in self._additional_options_for_text_props[prop.name]: + text_format = self._additional_options_for_text_props[prop.name]["format"] + elif prop.datatype == db.DATETIME: + # Set the date or datetime format if only a pattern is given ... + text_format = ["date", "date-time"] elif prop.datatype == db.DATETIME: - # Set the date or datetime format if only a pattern is given ... + # ... again, for those props that don't appear in the additional + # options list. text_format = ["date", "date-time"] - elif prop.datatype == db.DATETIME: - # ... again, for those props that don't appear in the additional - # options list. - text_format = ["date", "date-time"] - - return _make_text_property(prop.description, text_format, text_pattern) - - json_prop = {} - if prop.description: - json_prop["description"] = prop.description - if units_in_description and prop.unit: - if "description" in json_prop: - json_prop["description"] += f" Unit is {prop.unit}." - else: - json_prop["description"] = f"Unit is {prop.unit}." - elif prop.unit: - json_prop["unit"] = prop.unit - - if prop.datatype == db.BOOLEAN: - json_prop["type"] = "boolean" - elif prop.datatype == db.INTEGER: - json_prop["type"] = "integer" - elif prop.datatype == db.DOUBLE: - json_prop["type"] = "number" - elif is_list_datatype(prop.datatype): - json_prop["type"] = "array" - list_element_prop = db.Property( - name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) - json_prop["items"] = _make_segment_from_prop( - list_element_prop, additional_properties, - name_and_description_in_properties, additional_options_for_text_props, - units_in_description - ) - elif prop.is_reference(): - if prop.datatype == db.REFERENCE: - # No Record creation since no RT is specified and we don't know what - # schema to use, so only enum of all Records and all Files. - values = _retrieve_enum_values("RECORD") + _retrieve_enum_values("FILE") - json_prop["enum"] = values - elif prop.datatype == db.FILE: - # TODO: different issue - raise NotImplementedError("Files have not been implemented yet.") - else: - values = _retrieve_enum_values(f"RECORD '{prop.datatype}'") - rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'", unique=True) - subschema = _make_segment_from_recordtype(rt, additional_properties, - name_and_description_in_properties, - additional_options_for_text_props, - units_in_description) - json_prop["oneOf"] = [ - {"enum": values}, - subschema - ] - - else: - raise ValueError( - f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}") - - return json_prop - - -def _make_text_property(description="", text_format=None, text_pattern=None): - - prop = { - "type": "string" - } - if description: - prop["description"] = description - if text_format is not None: - if isinstance(text_format, list): - # We want the type inside the options, not in the head: - # "datetime property": { - # "anyOf": [ - # { - # "type": "string", - # "format": "date" - # }, - # { - # "type": "string", - # "format": "date-time" - # }]} - prop.pop("type") - prop["anyOf"] = [{"type": "string", "format": tf} for tf in text_format] - else: - prop["format"] = text_format - if text_pattern is not None: - prop["pattern"] = text_pattern - - return prop + return self._make_text_property(prop.description, text_format, text_pattern) + + json_prop = {} + if prop.description: + json_prop["description"] = prop.description + if self._units_in_description and prop.unit: + if "description" in json_prop: + json_prop["description"] += f" Unit is {prop.unit}." + else: + json_prop["description"] = f"Unit is {prop.unit}." + elif prop.unit: + json_prop["unit"] = prop.unit + + if prop.datatype == db.BOOLEAN: + json_prop["type"] = "boolean" + elif prop.datatype == db.INTEGER: + json_prop["type"] = "integer" + elif prop.datatype == db.DOUBLE: + json_prop["type"] = "number" + elif is_list_datatype(prop.datatype): + json_prop["type"] = "array" + list_element_prop = db.Property( + name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) + json_prop["items"] = self._make_segment_from_prop(list_element_prop) + elif prop.is_reference(): + if prop.datatype == db.REFERENCE: + # No Record creation since no RT is specified and we don't know what + # schema to use, so only enum of all Records and all Files. + values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE") + json_prop["enum"] = values + elif prop.datatype == db.FILE: + # TODO: different issue + raise NotImplementedError("Files have not been implemented yet.") + else: + prop_name = prop.datatype + if isinstance(prop.datatype, db.Entity): + prop_name = prop.datatype.name + values = self._retrieve_enum_values(f"RECORD '{prop_name}'") + if prop_name in self._do_not_create: + # Only a simple list of values + json_prop["enum"] = values + else: + rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop_name}'", + unique=True) + subschema = self._make_segment_from_recordtype(rt) + subschema["title"] = "Create new" + json_prop["oneOf"] = [ + { + "title": "Existing entries", + "enum": values, + }, + subschema + ] -def _retrieve_enum_values(role: str): - - possible_values = db.execute_query(f"SELECT name, id FROM {role}") - - vals = [] - for val in possible_values: - if val.name: - vals.append(f"{val.id}, {val.name}") else: - vals.append(f"{val.id}") - - return vals - - -def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool = True, - name_and_description_in_properties: bool = False, - additional_options_for_text_props: Optional[dict] = None, - units_in_description: bool = True): - """Return a Json schema segment for the given RecordType. - """ - schema = { - "type": "object" - } - - schema["required"] = _make_required_list(rt) - schema["additionalProperties"] = additional_properties - - props = {} - if name_and_description_in_properties: - props["name"] = _make_text_property("The name of the Record to be created") - props["description"] = _make_text_property("The description of the Record to be created") - - for prop in rt.properties: - if prop.name in props: - # Multi property - raise NotImplementedError( - "Creating a schema for multi-properties is not specified. " - f"Property {prop.name} occurs more than once." - ) - props[prop.name] = _make_segment_from_prop( - prop, additional_properties, name_and_description_in_properties, - additional_options_for_text_props, units_in_description) - - schema["properties"] = props - - return schema + raise ValueError( + f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}") + + return json_prop + + @staticmethod + def _make_text_property(description="", text_format=None, text_pattern=None): + prop = { + "type": "string" + } + if description: + prop["description"] = description + if text_format is not None: + if isinstance(text_format, list): + # We want the type inside the options, not in the head: + # "datetime property": { + # "anyOf": [ + # { + # "type": "string", + # "format": "date" + # }, + # { + # "type": "string", + # "format": "date-time" + # }]} + prop.pop("type") + prop["anyOf"] = [{"type": "string", "format": tf} for tf in text_format] + else: + prop["format"] = text_format + if text_pattern is not None: + prop["pattern"] = text_pattern + + return prop + + @staticmethod + def _retrieve_enum_values(role: str): + + possible_values = db.execute_query(f"SELECT name, id FROM {role}") + + vals = [] + for val in possible_values: + if val.name: + vals.append(f"{val.id}, {val.name}") + else: + vals.append(f"{val.id}") + + return vals + + def _make_segment_from_recordtype(self, rt: db.RecordType): + """Return a Json schema segment for the given RecordType. + """ + schema: dict[str, Any] = { + "type": "object" + } + + schema["required"] = self._make_required_list(rt) + schema["additionalProperties"] = self._additional_properties + + props = {} + if self._name_and_description_in_properties: + props["name"] = self._make_text_property("The name of the Record to be created") + props["description"] = self._make_text_property( + "The description of the Record to be created") + + for prop in rt.properties: + if prop.name in props: + # Multi property + raise NotImplementedError( + "Creating a schema for multi-properties is not specified. " + f"Property {prop.name} occurs more than once." + ) + props[prop.name] = self._make_segment_from_prop(prop) + + schema["properties"] = props + + return schema + + def recordtype_to_json_schema(self, rt: db.RecordType): + """Create a jsonschema from a given RecordType that can be used, e.g., to + validate a json specifying a record of the given type. + + Parameters + ---------- + rt : RecordType + The RecordType from which a json schema will be created. + + Returns + ------- + schema : dict + A dict containing the json schema created from the given RecordType's properties. + """ + + schema = self._make_segment_from_recordtype(rt) + schema["$schema"] = "https://json-schema.org/draft/2019-09/schema" + if rt.name: + schema["title"] = rt.name + if rt.description: + schema["description"] = rt.description + + return schema def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True, name_and_description_in_properties: bool = False, additional_options_for_text_props: Optional[dict] = None, - units_in_description: bool = True): + units_in_description: bool = True, + do_not_create: List[str] = None): """Create a jsonschema from a given RecordType that can be used, e.g., to validate a json specifying a record of the given type. + This is a standalone function which works without manually creating a + JsonSchemaExporter object. + Parameters ---------- rt : RecordType The RecordType from which a json schema will be created. additional_properties : bool, optional - Whether additional propeties will be admitted in the resulting + Whether additional properties will be admitted in the resulting schema. Optional, default is True. name_and_description_in_properties : bool, optional Whether to include name and description in the `properties` section of @@ -235,6 +297,10 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T description of the corresponding schema entry. If set to false, an additional `unit` key is added to the schema itself which is purely annotational and ignored, e.g., in validation. Default is True. + do_not_create : list[str] + A list of RedcordType names, for which there should be no option + to create them. Instead, only the choice of existing elements should + be given. Returns ------- @@ -243,17 +309,11 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T """ - if additional_options_for_text_props is None: - additional_options_for_text_props = {} - - schema = _make_segment_from_recordtype(rt, additional_properties, - name_and_description_in_properties, - additional_options_for_text_props, - units_in_description) - schema["$schema"] = "https://json-schema.org/draft/2019-09/schema" - if rt.name: - schema["title"] = rt.name - if rt.description: - schema["description"] = rt.description - - return schema + exporter = JsonSchemaExporter( + additional_properties=additional_properties, + name_and_description_in_properties=name_and_description_in_properties, + additional_options_for_text_props=additional_options_for_text_props, + units_in_description=units_in_description, + do_not_create=do_not_create, + ) + return exporter.recordtype_to_json_schema(rt) diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index beff7e9d847c6a0854d4e38d7cee900d8a376eab..bb40939a52a0700883f119ff03ddf499c2589845 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -262,3 +262,37 @@ class DataModel(dict): all_ents[prop.name] = prop return list(all_ents.values()) + + def get_deep(self, name: str, visited: set = None): + """Attempt to resolve references for the given ``name``. + + This methods only uses data which is available in this datamodel, which acts kind of like a + cache pool. + + Note that this may change this data model (subsequent "get" like calls may also return deep + content.) + """ + entity = self.get(name) + if not entity: + return entity + if not visited: + visited = set() + + # new_props = [] + for prop in list(entity.get_properties()): # Make a change-resistant list copy. + if prop.name in visited: + continue + visited.add(prop.name) + if prop.name in self: + deep_prop = self.get_deep(prop.name, visited=visited) + linked_prop = entity.get_property(prop) + if not linked_prop.datatype: + if deep_prop.role == "Property": + linked_prop.datatype = deep_prop.datatype + elif deep_prop.role == "RecordType": + linked_prop.datatype = deep_prop + if deep_prop.description: + linked_prop.description = deep_prop.description + else: + print(f"Referenced property \"{prop.name}\" not found in data model.") + return entity diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 159adfca1d589bb092b6f59110828b5868401e25..5aa151b2891fd335959098202f35de1152c1b16b 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -2,6 +2,7 @@ import unittest import caosdb as db from caosadvancedtools.models.data_model import DataModel +from caosadvancedtools.models.parser import parse_model_from_string class DataModelTest(unittest.TestCase): @@ -33,3 +34,31 @@ class DataModelTest(unittest.TestCase): DataModel.sync_ids_by_name(l1, l2) assert l1["TestRecord"].id == rt.id assert l1["TestRecord2"].id < 0 + + def test_get_deep(self): + model_recursive_str = """ +RT1: + description: some description + obligatory_properties: + RT1: + """ + model_recursive = parse_model_from_string(model_recursive_str) + prop1 = model_recursive["RT1"].get_property("RT1") + assert prop1.datatype is None + # TODO The next line actually changes model_recursive in place, is this OK? + RT1 = model_recursive.get_deep("RT1") + assert model_recursive["RT1"] == RT1 + + model_unresolved_str = """ +RT1: + description: some description + obligatory_properties: + unresolved: + """ + model_unresolved = parse_model_from_string(model_unresolved_str) + rt1_unresolved = model_unresolved["RT1"] + prop_unresolved = model_unresolved.get_deep("unresolved") + assert prop_unresolved.datatype is None + rt1_deep = model_unresolved.get_deep("RT1") + assert rt1_deep == rt1_unresolved + assert rt1_deep is rt1_unresolved diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py index 937e1b1b625f5c81db2113f4008b00af100305ae..597c86a9a375e05fdc6b85fad4e0cb1a44b125e9 100644 --- a/unittests/test_json_schema_exporter.py +++ b/unittests/test_json_schema_exporter.py @@ -20,6 +20,10 @@ # with this program. If not, see <https://www.gnu.org/licenses/>. # +"""Tests the Json schema exporter.""" + +import json + import linkahead as db from jsonschema import FormatChecker, validate, ValidationError @@ -27,6 +31,16 @@ from pytest import raises from unittest.mock import Mock, patch from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs +from caosadvancedtools.models.parser import parse_model_from_string + +RT1 = parse_model_from_string(""" +RT1: + description: some description + obligatory_properties: + some_date: + datatype: DATETIME + description: Just some date +""").get_deep("RT1") def _mock_execute_query(query_string, unique=False, **kwargs): @@ -60,11 +74,16 @@ def _mock_execute_query(query_string, unique=False, **kwargs): return referencing_type_records elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True: return referencing_type_rt + elif query_string == "SELECT name, id FROM RECORD 'RT1'": + return referencing_type_records # wrong types, but who cares for the test? + elif query_string == "FIND RECORDTYPE WITH name='RT1'" and unique is True: + return RT1 elif query_string == "SELECT name, id FROM RECORD": return all_records elif query_string == "SELECT name, id FROM FILE": return all_files else: + print(f"Query string: {query_string}") return db.Container() @@ -546,3 +565,119 @@ def test_broken(): rtjs(rt) assert "MultiProp" in str(nie) assert str(nie).startswith("Creating a schema for multi-properties is not specified.") + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_reference_options(): + """Testing miscellaneous options. + """ + + model_str = """ +RT1: + description: some description + obligatory_properties: + some_date: + datatype: DATETIME + description: Just some date +RT2: + obligatory_properties: + RT1: + """ + model = parse_model_from_string(model_str) + # First test: without reference + rt1_dict = rtjs(model.get_deep("RT1")) + assert json.dumps(rt1_dict, indent=2) == """{ + "type": "object", + "required": [ + "some_date" + ], + "additionalProperties": true, + "properties": { + "some_date": { + "description": "Just some date", + "anyOf": [ + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + } + }, + "$schema": "https://json-schema.org/draft/2019-09/schema", + "title": "RT1", + "description": "some description" +}""" + # Second test: with reference + rt2_deep = model.get_deep("RT2") + rt2_dict = rtjs(rt2_deep) + assert json.dumps(rt2_dict, indent=2) == """{ + "type": "object", + "required": [ + "RT1" + ], + "additionalProperties": true, + "properties": { + "RT1": { + "description": "some description", + "oneOf": [ + { + "title": "Existing entries", + "enum": [ + "103", + "104, referencing" + ] + }, + { + "type": "object", + "required": [ + "some_date" + ], + "additionalProperties": true, + "properties": { + "some_date": { + "description": "Just some date", + "anyOf": [ + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + } + }, + "title": "Create new" + } + ] + } + }, + "$schema": "https://json-schema.org/draft/2019-09/schema", + "title": "RT2" +}""" + + # Third test: Reference prop shall be only existing references, no option to create new ones. + rt2_dict = rtjs(model.get_deep("RT2"), do_not_create=["RT1"]) + assert json.dumps(rt2_dict, indent=2) == """{ + "type": "object", + "required": [ + "RT1" + ], + "additionalProperties": true, + "properties": { + "RT1": { + "description": "some description", + "enum": [ + "103", + "104, referencing" + ] + } + }, + "$schema": "https://json-schema.org/draft/2019-09/schema", + "title": "RT2" +}"""