Merge branch 'f-more-jsonschema-export' into 'dev'

ENH: JsonSchemaExporter accepts do_not_create parameter. See merge request !82

Merge branch 'f-more-jsonschema-export' into 'dev'
f5cd9aa6 · Florian Spreckelsen · d605fd58 · 0af91939 · f5cd9aa6 · f5cd9aa6
Commit f5cd9aa6 authored 1 year ago by Florian Spreckelsen
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  added.
 * The `json_schema_exporter` module which introduces tools to create a json
  schema from a RecordType, e.g., for the usage in web forms.
+* `DataModel.get_deep(name: str)` method which uses the DataModel as a kind of cache pool.
 ### Changed ###

--- a/src/caosadvancedtools/json_schema_exporter.py
+++ b/src/caosadvancedtools/json_schema_exporter.py
@@ -19,55 +19,90 @@
 # You should have received a copy of the GNU Affero General Public License along
 # with this program. If not, see <https://www.gnu.org/licenses/>.
 #
+"""Module for converting a data model into a json schema compatible dictionary.
+"""
-import re
+from typing import Any, List, Optional
-from typing import Optional
 import linkahead as db
 from linkahead.common.datatype import get_list_datatype, is_list_datatype
-def _make_required_list(rt: db.RecordType):
+class JsonSchemaExporter:
-    """Return the list of names of properties with importance db.OBLIGATORY."""
+    """A class which collects everything needed for the conversion.
-    return [prop.name for prop in rt.properties
+    """
-            if rt.get_importance(prop.name) == db.OBLIGATORY]
-def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
+    def __init__(self, additional_properties: bool = True,
-                            name_and_description_in_properties: bool,
+                 name_and_description_in_properties: bool = False,
-                            additional_options_for_text_props: Optional[dict],
+                 additional_options_for_text_props: dict = None,
-                            units_in_description: bool):
+                 units_in_description: bool = True,
-    """Return the JSON Schema segment for the given property
+                 do_not_create: List[str] = None,
+                 ):
+        """Set up a JsonSchemaExporter, which can then be applied on RecordTypes.
        Parameters
        ----------
-    prop : db.Property
-        the property to be transformed
        additional_properties : bool, optional
            Whether additional properties will be admitted in the resulting
            schema. Optional, default is True.
        name_and_description_in_properties : bool, optional
            Whether to include name and description in the `properties` section of
            the schema to be exported. Optional, default is False.
-    additional_options_for_text_props : Optional[dict]
+        additional_options_for_text_props : dict, optional
-        dict of dicts that may contain the keys 'pattern' and 'format' to
+            Dictionary containing additional "pattern" or "format" options for
-        further define the rules for the JSON Schema segment
+            string-typed properties. Optional, default is empty.
-    units_in_description : bool
+        units_in_description : bool, optional
-        Whether to store the unit of a LinkAhead property in the description of
+            Whether to add the unit of a LinkAhead property (if it has any) to the
-        the corresponding json schema item or to create a separate `unit` key
+            description of the corresponding schema entry. If set to false, an
-        instead.
+            additional `unit` key is added to the schema itself which is purely
+            annotational and ignored, e.g., in validation. Default is True.
+        do_not_create : list[str]
+            A list of RedcordType names, for which there should be no option
+            to create them.  Instead, only the choice of existing elements should
+            be given.
        """
        if not additional_options_for_text_props:
            additional_options_for_text_props = {}
+        if not do_not_create:
+            do_not_create = []
+        self._additional_properties = additional_properties
+        self._name_and_description_in_properties = name_and_description_in_properties
+        self._additional_options_for_text_props = additional_options_for_text_props
+        self._units_in_description = units_in_description
+        self._do_not_create = do_not_create
+    @staticmethod
+    def _make_required_list(rt: db.RecordType):
+        """Return the list of names of properties with importance db.OBLIGATORY."""
+        required_list = []
+        for prop in rt.properties:
+            if rt.get_importance(prop.name) != db.OBLIGATORY:
+                continue
+            prop_name = prop.name
+            if isinstance(prop.datatype, db.Entity):
+                prop_name = prop.datatype.name
+            required_list.append(prop_name)
+        return required_list
+    def _make_segment_from_prop(self, prop: db.Property):
+        """Return the JSON Schema segment for the given property
+        Parameters
+        ----------
+        prop : db.Property
+            The property to be transformed.
+        """
        if prop.datatype == db.TEXT or prop.datatype == db.DATETIME:
            text_format = None
            text_pattern = None
-        if prop.name in additional_options_for_text_props:
+            if prop.name in self._additional_options_for_text_props:
-            if "pattern" in additional_options_for_text_props[prop.name]:
+                if "pattern" in self._additional_options_for_text_props[prop.name]:
-                text_pattern = additional_options_for_text_props[prop.name]["pattern"]
+                    text_pattern = self._additional_options_for_text_props[prop.name]["pattern"]
-            if "format" in additional_options_for_text_props[prop.name]:
+                if "format" in self._additional_options_for_text_props[prop.name]:
-                text_format = additional_options_for_text_props[prop.name]["format"]
+                    text_format = self._additional_options_for_text_props[prop.name]["format"]
                elif prop.datatype == db.DATETIME:
                    # Set the date or datetime format if only a pattern is given ...
                    text_format = ["date", "date-time"]
@@ -76,12 +111,12 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
                # options list.
                text_format = ["date", "date-time"]
-        return _make_text_property(prop.description, text_format, text_pattern)
+            return self._make_text_property(prop.description, text_format, text_pattern)
        json_prop = {}
        if prop.description:
            json_prop["description"] = prop.description
-    if units_in_description and prop.unit:
+        if self._units_in_description and prop.unit:
            if "description" in json_prop:
                json_prop["description"] += f" Unit is {prop.unit}."
            else:
@@ -99,29 +134,34 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
            json_prop["type"] = "array"
            list_element_prop = db.Property(
                name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True))
-        json_prop["items"] = _make_segment_from_prop(
+            json_prop["items"] = self._make_segment_from_prop(list_element_prop)
-            list_element_prop, additional_properties,
-            name_and_description_in_properties, additional_options_for_text_props,
-            units_in_description
-        )
        elif prop.is_reference():
            if prop.datatype == db.REFERENCE:
                # No Record creation since no RT is specified and we don't know what
                # schema to use, so only enum of all Records and all Files.
-            values = _retrieve_enum_values("RECORD") + _retrieve_enum_values("FILE")
+                values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
                json_prop["enum"] = values
            elif prop.datatype == db.FILE:
                # TODO: different issue
                raise NotImplementedError("Files have not been implemented yet.")
            else:
-            values = _retrieve_enum_values(f"RECORD '{prop.datatype}'")
+                prop_name = prop.datatype
-            rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'", unique=True)
+                if isinstance(prop.datatype, db.Entity):
-            subschema = _make_segment_from_recordtype(rt, additional_properties,
+                    prop_name = prop.datatype.name
-                                                      name_and_description_in_properties,
+                values = self._retrieve_enum_values(f"RECORD '{prop_name}'")
-                                                      additional_options_for_text_props,
+                if prop_name in self._do_not_create:
-                                                      units_in_description)
+                    # Only a simple list of values
+                    json_prop["enum"] = values
+                else:
+                    rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop_name}'",
+                                          unique=True)
+                    subschema = self._make_segment_from_recordtype(rt)
+                    subschema["title"] = "Create new"
                    json_prop["oneOf"] = [
-                {"enum": values},
+                        {
+                            "title": "Existing entries",
+                            "enum": values,
+                        },
                        subschema
                    ]
@@ -131,9 +171,8 @@ def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
        return json_prop
+    @staticmethod
    def _make_text_property(description="", text_format=None, text_pattern=None):
        prop = {
            "type": "string"
        }
@@ -161,7 +200,7 @@ def _make_text_property(description="", text_format=None, text_pattern=None):
        return prop
+    @staticmethod
    def _retrieve_enum_values(role: str):
        possible_values = db.execute_query(f"SELECT name, id FROM {role}")
@@ -175,24 +214,21 @@ def _retrieve_enum_values(role: str):
        return vals
+    def _make_segment_from_recordtype(self, rt: db.RecordType):
-def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool = True,
-                                  name_and_description_in_properties: bool = False,
-                                  additional_options_for_text_props: Optional[dict] = None,
-                                  units_in_description: bool = True):
        """Return a Json schema segment for the given RecordType.
        """
-    schema = {
+        schema: dict[str, Any] = {
            "type": "object"
        }
-    schema["required"] = _make_required_list(rt)
+        schema["required"] = self._make_required_list(rt)
-    schema["additionalProperties"] = additional_properties
+        schema["additionalProperties"] = self._additional_properties
        props = {}
-    if name_and_description_in_properties:
+        if self._name_and_description_in_properties:
-        props["name"] = _make_text_property("The name of the Record to be created")
+            props["name"] = self._make_text_property("The name of the Record to be created")
-        props["description"] = _make_text_property("The description of the Record to be created")
+            props["description"] = self._make_text_property(
+                "The description of the Record to be created")
        for prop in rt.properties:
            if prop.name in props:
@@ -201,28 +237,54 @@ def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool
                    "Creating a schema for multi-properties is not specified. "
                    f"Property {prop.name} occurs more than once."
                )
-        props[prop.name] = _make_segment_from_prop(
+            props[prop.name] = self._make_segment_from_prop(prop)
-            prop, additional_properties, name_and_description_in_properties,
-            additional_options_for_text_props, units_in_description)
        schema["properties"] = props
        return schema
+    def recordtype_to_json_schema(self, rt: db.RecordType):
+        """Create a jsonschema from a given RecordType that can be used, e.g., to
+        validate a json specifying a record of the given type.
+        Parameters
+        ----------
+        rt : RecordType
+            The RecordType from which a json schema will be created.
+        Returns
+        -------
+        schema : dict
+            A dict containing the json schema created from the given RecordType's properties.
+        """
+        schema = self._make_segment_from_recordtype(rt)
+        schema["$schema"] = "https://json-schema.org/draft/2019-09/schema"
+        if rt.name:
+            schema["title"] = rt.name
+        if rt.description:
+            schema["description"] = rt.description
+        return schema
 def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
                              name_and_description_in_properties: bool = False,
                              additional_options_for_text_props: Optional[dict] = None,
-                              units_in_description: bool = True):
+                              units_in_description: bool = True,
+                              do_not_create: List[str] = None):
    """Create a jsonschema from a given RecordType that can be used, e.g., to
    validate a json specifying a record of the given type.
+    This is a standalone function which works without manually creating a
+    JsonSchemaExporter object.
    Parameters
    ----------
    rt : RecordType
        The RecordType from which a json schema will be created.
    additional_properties : bool, optional
-        Whether additional propeties will be admitted in the resulting
+        Whether additional properties will be admitted in the resulting
        schema. Optional, default is True.
    name_and_description_in_properties : bool, optional
        Whether to include name and description in the `properties` section of
@@ -235,6 +297,10 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
        description of the corresponding schema entry. If set to false, an
        additional `unit` key is added to the schema itself which is purely
        annotational and ignored, e.g., in validation. Default is True.
+    do_not_create : list[str]
+        A list of RedcordType names, for which there should be no option
+        to create them.  Instead, only the choice of existing elements should
+        be given.
    Returns
    -------
@@ -243,17 +309,11 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
    """
-    if additional_options_for_text_props is None:
+    exporter = JsonSchemaExporter(
-        additional_options_for_text_props = {}
+        additional_properties=additional_properties,
+        name_and_description_in_properties=name_and_description_in_properties,
-    schema = _make_segment_from_recordtype(rt, additional_properties,
+        additional_options_for_text_props=additional_options_for_text_props,
-                                           name_and_description_in_properties,
+        units_in_description=units_in_description,
-                                           additional_options_for_text_props,
+        do_not_create=do_not_create,
-                                           units_in_description)
+    )
-    schema["$schema"] = "https://json-schema.org/draft/2019-09/schema"
+    return exporter.recordtype_to_json_schema(rt)
-    if rt.name:
-        schema["title"] = rt.name
-    if rt.description:
-        schema["description"] = rt.description
-    return schema
--- a/src/caosadvancedtools/models/data_model.py
+++ b/src/caosadvancedtools/models/data_model.py
@@ -262,3 +262,37 @@ class DataModel(dict):
                all_ents[prop.name] = prop
        return list(all_ents.values())
+    def get_deep(self, name: str, visited: set = None):
+        """Attempt to resolve references for the given ``name``.
+        This methods only uses data which is available in this datamodel, which acts kind of like a
+        cache pool.
+        Note that this may change this data model (subsequent "get" like calls may also return deep
+        content.)
+        """
+        entity = self.get(name)
+        if not entity:
+            return entity
+        if not visited:
+            visited = set()
+        # new_props = []
+        for prop in list(entity.get_properties()):  # Make a change-resistant list copy.
+            if prop.name in visited:
+                continue
+            visited.add(prop.name)
+            if prop.name in self:
+                deep_prop = self.get_deep(prop.name, visited=visited)
+                linked_prop = entity.get_property(prop)
+                if not linked_prop.datatype:
+                    if deep_prop.role == "Property":
+                        linked_prop.datatype = deep_prop.datatype
+                    elif deep_prop.role == "RecordType":
+                        linked_prop.datatype = deep_prop
+                if deep_prop.description:
+                    linked_prop.description = deep_prop.description
+            else:
+                print(f"Referenced property \"{prop.name}\" not found in data model.")
+        return entity
--- a/unittests/test_data_model.py
+++ b/unittests/test_data_model.py
@@ -2,6 +2,7 @@ import unittest
 import caosdb as db
 from caosadvancedtools.models.data_model import DataModel
+from caosadvancedtools.models.parser import parse_model_from_string
 class DataModelTest(unittest.TestCase):
@@ -33,3 +34,31 @@ class DataModelTest(unittest.TestCase):
        DataModel.sync_ids_by_name(l1, l2)
        assert l1["TestRecord"].id == rt.id
        assert l1["TestRecord2"].id < 0
+    def test_get_deep(self):
+        model_recursive_str = """
+RT1:
+  description: some description
+  obligatory_properties:
+    RT1:
+        """
+        model_recursive = parse_model_from_string(model_recursive_str)
+        prop1 = model_recursive["RT1"].get_property("RT1")
+        assert prop1.datatype is None
+        # TODO The next line actually changes model_recursive in place, is this OK?
+        RT1 = model_recursive.get_deep("RT1")
+        assert model_recursive["RT1"] == RT1
+        model_unresolved_str = """
+RT1:
+  description: some description
+  obligatory_properties:
+    unresolved:
+        """
+        model_unresolved = parse_model_from_string(model_unresolved_str)
+        rt1_unresolved = model_unresolved["RT1"]
+        prop_unresolved = model_unresolved.get_deep("unresolved")
+        assert prop_unresolved.datatype is None
+        rt1_deep = model_unresolved.get_deep("RT1")
+        assert rt1_deep == rt1_unresolved
+        assert rt1_deep is rt1_unresolved
--- a/unittests/test_json_schema_exporter.py
+++ b/unittests/test_json_schema_exporter.py
@@ -20,6 +20,10 @@
 # with this program. If not, see <https://www.gnu.org/licenses/>.
 #
+"""Tests the Json schema exporter."""
+import json
 import linkahead as db
 from jsonschema import FormatChecker, validate, ValidationError
@@ -27,6 +31,16 @@ from pytest import raises
 from unittest.mock import Mock, patch
 from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs
+from caosadvancedtools.models.parser import parse_model_from_string
+RT1 = parse_model_from_string("""
+RT1:
+  description: some description
+  obligatory_properties:
+    some_date:
+      datatype: DATETIME
+      description: Just some date
+""").get_deep("RT1")
 def _mock_execute_query(query_string, unique=False, **kwargs):
@@ -60,11 +74,16 @@ def _mock_execute_query(query_string, unique=False, **kwargs):
        return referencing_type_records
    elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True:
        return referencing_type_rt
+    elif query_string == "SELECT name, id FROM RECORD 'RT1'":
+        return referencing_type_records  # wrong types, but who cares for the test?
+    elif query_string == "FIND RECORDTYPE WITH name='RT1'" and unique is True:
+        return RT1
    elif query_string == "SELECT name, id FROM RECORD":
        return all_records
    elif query_string == "SELECT name, id FROM FILE":
        return all_files
    else:
+        print(f"Query string: {query_string}")
        return db.Container()
@@ -546,3 +565,119 @@ def test_broken():
        rtjs(rt)
        assert "MultiProp" in str(nie)
        assert str(nie).startswith("Creating a schema for multi-properties is not specified.")
+@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query))
+def test_reference_options():
+    """Testing miscellaneous options.
+    """
+    model_str = """
+RT1:
+  description: some description
+  obligatory_properties:
+    some_date:
+      datatype: DATETIME
+      description: Just some date
+RT2:
+  obligatory_properties:
+    RT1:
+    """
+    model = parse_model_from_string(model_str)
+    # First test: without reference
+    rt1_dict = rtjs(model.get_deep("RT1"))
+    assert json.dumps(rt1_dict, indent=2) == """{
+  "type": "object",
+  "required": [
+    "some_date"
+  ],
+  "additionalProperties": true,
+  "properties": {
+    "some_date": {
+      "description": "Just some date",
+      "anyOf": [
+        {
+          "type": "string",
+          "format": "date"
+        },
+        {
+          "type": "string",
+          "format": "date-time"
+        }
+      ]
+    }
+  },
+  "$schema": "https://json-schema.org/draft/2019-09/schema",
+  "title": "RT1",
+  "description": "some description"
+}"""
+    # Second test: with reference
+    rt2_deep = model.get_deep("RT2")
+    rt2_dict = rtjs(rt2_deep)
+    assert json.dumps(rt2_dict, indent=2) == """{
+  "type": "object",
+  "required": [
+    "RT1"
+  ],
+  "additionalProperties": true,
+  "properties": {
+    "RT1": {
+      "description": "some description",
+      "oneOf": [
+        {
+          "title": "Existing entries",
+          "enum": [
+            "103",
+            "104, referencing"
+          ]
+        },
+        {
+          "type": "object",
+          "required": [
+            "some_date"
+          ],
+          "additionalProperties": true,
+          "properties": {
+            "some_date": {
+              "description": "Just some date",
+              "anyOf": [
+                {
+                  "type": "string",
+                  "format": "date"
+                },
+                {
+                  "type": "string",
+                  "format": "date-time"
+                }
+              ]
+            }
+          },
+          "title": "Create new"
+        }
+      ]
+    }
+  },
+  "$schema": "https://json-schema.org/draft/2019-09/schema",
+  "title": "RT2"
+}"""
+    # Third test: Reference prop shall be only existing references, no option to create new ones.
+    rt2_dict = rtjs(model.get_deep("RT2"), do_not_create=["RT1"])
+    assert json.dumps(rt2_dict, indent=2) == """{
+  "type": "object",
+  "required": [
+    "RT1"
+  ],
+  "additionalProperties": true,
+  "properties": {
+    "RT1": {
+      "description": "some description",
+      "enum": [
+        "103",
+        "104, referencing"
+      ]
+    }
+  },
+  "$schema": "https://json-schema.org/draft/2019-09/schema",
+  "title": "RT2"
+}"""