From 1a34df846fe8af05efc55e7fe75eed4eb66c2429 Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Mon, 30 Oct 2023 14:28:20 +0100
Subject: [PATCH] ENH: JsonSchemaExporter accepts do_not_create parameter.

Also moved it into a class.
---
 src/caosadvancedtools/json_schema_exporter.py | 436 ++++++++++--------
 unittests/test_json_schema_exporter.py        |   2 +
 2 files changed, 243 insertions(+), 195 deletions(-)

diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py
index af0b9e7d..49442922 100644
--- a/src/caosadvancedtools/json_schema_exporter.py
+++ b/src/caosadvancedtools/json_schema_exporter.py
@@ -19,213 +19,261 @@
 # You should have received a copy of the GNU Affero General Public License along
 # with this program. If not, see <https://www.gnu.org/licenses/>.
 #
+"""Module for converting a data model into a json schema compatible dictionary.
+"""
 
-import re
-from typing import Optional
+from typing import Any, Optional
 
 import linkahead as db
 from linkahead.common.datatype import get_list_datatype, is_list_datatype
 
 
-def _make_required_list(rt: db.RecordType):
-    """Return the list of names of properties with importance db.OBLIGATORY."""
-    return [prop.name for prop in rt.properties
-            if rt.get_importance(prop.name) == db.OBLIGATORY]
-
-
-def _make_segment_from_prop(prop: db.Property, additional_properties: bool,
-                            name_and_description_in_properties: bool,
-                            additional_options_for_text_props: Optional[dict],
-                            units_in_description: bool):
-    """Return the JSON Schema segment for the given property
-
-    Parameters
-    ----------
-    prop : db.Property
-        the property to be transformed
-    additional_properties : bool, optional
-        Whether additional properties will be admitted in the resulting
-        schema. Optional, default is True.
-    name_and_description_in_properties : bool, optional
-        Whether to include name and description in the `properties` section of
-        the schema to be exported. Optional, default is False.
-    additional_options_for_text_props : Optional[dict]
-        dict of dicts that may contain the keys 'pattern' and 'format' to
-        further define the rules for the JSON Schema segment
-    units_in_description : bool
-        Whether to store the unit of a LinkAhead property in the description of
-        the corresponding json schema item or to create a separate `unit` key
-        instead.
+class JsonSchemaExporter:
+    """A class which collects everything needed for the conversion.
     """
 
-    if not additional_options_for_text_props:
-        additional_options_for_text_props = {}
-    if prop.datatype == db.TEXT or prop.datatype == db.DATETIME:
-        text_format = None
-        text_pattern = None
-        if prop.name in additional_options_for_text_props:
-            if "pattern" in additional_options_for_text_props[prop.name]:
-                text_pattern = additional_options_for_text_props[prop.name]["pattern"]
-            if "format" in additional_options_for_text_props[prop.name]:
-                text_format = additional_options_for_text_props[prop.name]["format"]
+    def __init__(self, additional_properties: bool = True,
+                 name_and_description_in_properties: bool = False,
+                 additional_options_for_text_props: dict = None,
+                 units_in_description: bool = True,
+                 do_not_create: list[str] = None,
+                 ):
+        """Set up a JsonSchemaExporter, which can then be applied on RecordTypes.
+
+        Parameters
+        ----------
+        additional_properties : bool, optional
+            Whether additional properties will be admitted in the resulting
+            schema. Optional, default is True.
+        name_and_description_in_properties : bool, optional
+            Whether to include name and description in the `properties` section of
+            the schema to be exported. Optional, default is False.
+        additional_options_for_text_props : dict, optional
+            Dictionary containing additional "pattern" or "format" options for
+            string-typed properties. Optional, default is empty.
+        units_in_description : bool, optional
+            Whether to add the unit of a LinkAhead property (if it has any) to the
+            description of the corresponding schema entry. If set to false, an
+            additional `unit` key is added to the schema itself which is purely
+            annotational and ignored, e.g., in validation. Default is True.
+        do_not_create : list[str]
+            A list of RedcordType names, for which there should be no option
+            to create them.  Instead, only the choice of existing elements should
+            be given.
+        """
+        if not additional_options_for_text_props:
+            additional_options_for_text_props = {}
+        if not do_not_create:
+            do_not_create = []
+
+        self._additional_properties = additional_properties
+        self._name_and_description_in_properties = name_and_description_in_properties
+        self._additional_options_for_text_props = additional_options_for_text_props
+        self._units_in_description = units_in_description
+        self._do_not_create = do_not_create
+
+    @staticmethod
+    def _make_required_list(rt: db.RecordType):
+        """Return the list of names of properties with importance db.OBLIGATORY."""
+        return [prop.name for prop in rt.properties
+                if rt.get_importance(prop.name) == db.OBLIGATORY]
+
+    def _make_segment_from_prop(self, prop: db.Property):
+        """Return the JSON Schema segment for the given property
+
+        Parameters
+        ----------
+        prop : db.Property
+            The property to be transformed.
+        """
+
+        if prop.datatype == db.TEXT or prop.datatype == db.DATETIME:
+            text_format = None
+            text_pattern = None
+            if prop.name in self._additional_options_for_text_props:
+                if "pattern" in self._additional_options_for_text_props[prop.name]:
+                    text_pattern = self._additional_options_for_text_props[prop.name]["pattern"]
+                if "format" in self._additional_options_for_text_props[prop.name]:
+                    text_format = self._additional_options_for_text_props[prop.name]["format"]
+                elif prop.datatype == db.DATETIME:
+                    # Set the date or datetime format if only a pattern is given ...
+                    text_format = ["date", "date-time"]
             elif prop.datatype == db.DATETIME:
-                # Set the date or datetime format if only a pattern is given ...
+                # ... again, for those props that don't appear in the additional
+                # options list.
                 text_format = ["date", "date-time"]
-        elif prop.datatype == db.DATETIME:
-            # ... again, for those props that don't appear in the additional
-            # options list.
-            text_format = ["date", "date-time"]
-
-        return _make_text_property(prop.description, text_format, text_pattern)
-
-    json_prop = {}
-    if prop.description:
-        json_prop["description"] = prop.description
-    if units_in_description and prop.unit:
-        if "description" in json_prop:
-            json_prop["description"] += f" Unit is {prop.unit}."
-        else:
-            json_prop["description"] = f"Unit is {prop.unit}."
-    elif prop.unit:
-        json_prop["unit"] = prop.unit
-
-    if prop.datatype == db.BOOLEAN:
-        json_prop["type"] = "boolean"
-    elif prop.datatype == db.INTEGER:
-        json_prop["type"] = "integer"
-    elif prop.datatype == db.DOUBLE:
-        json_prop["type"] = "number"
-    elif is_list_datatype(prop.datatype):
-        json_prop["type"] = "array"
-        list_element_prop = db.Property(
-            name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True))
-        json_prop["items"] = _make_segment_from_prop(
-            list_element_prop, additional_properties,
-            name_and_description_in_properties, additional_options_for_text_props,
-            units_in_description
-        )
-    elif prop.is_reference():
-        if prop.datatype == db.REFERENCE:
-            # No Record creation since no RT is specified and we don't know what
-            # schema to use, so only enum of all Records and all Files.
-            values = _retrieve_enum_values("RECORD") + _retrieve_enum_values("FILE")
-            json_prop["enum"] = values
-        elif prop.datatype == db.FILE:
-            # TODO: different issue
-            raise NotImplementedError("Files have not been implemented yet.")
-        else:
-            values = _retrieve_enum_values(f"RECORD '{prop.datatype}'")
-            rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'", unique=True)
-            subschema = _make_segment_from_recordtype(rt, additional_properties,
-                                                      name_and_description_in_properties,
-                                                      additional_options_for_text_props,
-                                                      units_in_description)
-            subschema["title"] = "Create new"
-            json_prop["oneOf"] = [
-                {
-                    "title": "Existing entries",
-                    "enum": values},
-                subschema
-            ]
-
-    else:
-        raise ValueError(
-            f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}")
-
-    return json_prop
-
-
-def _make_text_property(description="", text_format=None, text_pattern=None):
-
-    prop = {
-        "type": "string"
-    }
-    if description:
-        prop["description"] = description
-    if text_format is not None:
-        if isinstance(text_format, list):
-            # We want the type inside the options, not in the head:
-            # "datetime property": {
-            #   "anyOf": [
-            #     {
-            #       "type": "string",
-            #       "format": "date"
-            #     },
-            #     {
-            #       "type": "string",
-            #       "format": "date-time"
-            #     }]}
-            prop.pop("type")
-            prop["anyOf"] = [{"type": "string", "format": tf} for tf in text_format]
-        else:
-            prop["format"] = text_format
-    if text_pattern is not None:
-        prop["pattern"] = text_pattern
-
-    return prop
 
+            return self._make_text_property(prop.description, text_format, text_pattern)
+
+        json_prop = {}
+        if prop.description:
+            json_prop["description"] = prop.description
+        if self._units_in_description and prop.unit:
+            if "description" in json_prop:
+                json_prop["description"] += f" Unit is {prop.unit}."
+            else:
+                json_prop["description"] = f"Unit is {prop.unit}."
+        elif prop.unit:
+            json_prop["unit"] = prop.unit
+
+        if prop.datatype == db.BOOLEAN:
+            json_prop["type"] = "boolean"
+        elif prop.datatype == db.INTEGER:
+            json_prop["type"] = "integer"
+        elif prop.datatype == db.DOUBLE:
+            json_prop["type"] = "number"
+        elif is_list_datatype(prop.datatype):
+            json_prop["type"] = "array"
+            list_element_prop = db.Property(
+                name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True))
+            json_prop["items"] = self._make_segment_from_prop(list_element_prop)
+        elif prop.is_reference():
+            if prop.datatype == db.REFERENCE:
+                # No Record creation since no RT is specified and we don't know what
+                # schema to use, so only enum of all Records and all Files.
+                values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
+                json_prop["enum"] = values
+            elif prop.datatype == db.FILE:
+                # TODO: different issue
+                raise NotImplementedError("Files have not been implemented yet.")
+            else:
+                values = self._retrieve_enum_values(f"RECORD '{prop.datatype}'")
+                if prop.datatype in self._do_not_create:
+                    # Only a simple list of values
+                    json_prop["enum"] = values
+                else:
+                    rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'",
+                                          unique=True)
+                    subschema = self._make_segment_from_recordtype(rt)
+                    subschema["title"] = "Create new"
+                    json_prop["oneOf"] = [
+                        {
+                            "title": "Existing entries",
+                            "enum": values,
+                        },
+                        subschema
+                    ]
 
-def _retrieve_enum_values(role: str):
-
-    possible_values = db.execute_query(f"SELECT name, id FROM {role}")
-
-    vals = []
-    for val in possible_values:
-        if val.name:
-            vals.append(f"{val.id}, {val.name}")
         else:
-            vals.append(f"{val.id}")
-
-    return vals
-
-
-def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool = True,
-                                  name_and_description_in_properties: bool = False,
-                                  additional_options_for_text_props: Optional[dict] = None,
-                                  units_in_description: bool = True):
-    """Return a Json schema segment for the given RecordType.
-    """
-    schema = {
-        "type": "object"
-    }
-
-    schema["required"] = _make_required_list(rt)
-    schema["additionalProperties"] = additional_properties
-
-    props = {}
-    if name_and_description_in_properties:
-        props["name"] = _make_text_property("The name of the Record to be created")
-        props["description"] = _make_text_property("The description of the Record to be created")
-
-    for prop in rt.properties:
-        if prop.name in props:
-            # Multi property
-            raise NotImplementedError(
-                "Creating a schema for multi-properties is not specified. "
-                f"Property {prop.name} occurs more than once."
-            )
-        props[prop.name] = _make_segment_from_prop(
-            prop, additional_properties, name_and_description_in_properties,
-            additional_options_for_text_props, units_in_description)
-
-    schema["properties"] = props
-
-    return schema
+            raise ValueError(
+                f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}")
+
+        return json_prop
+
+    @staticmethod
+    def _make_text_property(description="", text_format=None, text_pattern=None):
+        prop = {
+            "type": "string"
+        }
+        if description:
+            prop["description"] = description
+        if text_format is not None:
+            if isinstance(text_format, list):
+                # We want the type inside the options, not in the head:
+                # "datetime property": {
+                #   "anyOf": [
+                #     {
+                #       "type": "string",
+                #       "format": "date"
+                #     },
+                #     {
+                #       "type": "string",
+                #       "format": "date-time"
+                #     }]}
+                prop.pop("type")
+                prop["anyOf"] = [{"type": "string", "format": tf} for tf in text_format]
+            else:
+                prop["format"] = text_format
+        if text_pattern is not None:
+            prop["pattern"] = text_pattern
+
+        return prop
+
+    @staticmethod
+    def _retrieve_enum_values(role: str):
+
+        possible_values = db.execute_query(f"SELECT name, id FROM {role}")
+
+        vals = []
+        for val in possible_values:
+            if val.name:
+                vals.append(f"{val.id}, {val.name}")
+            else:
+                vals.append(f"{val.id}")
+
+        return vals
+
+    def _make_segment_from_recordtype(self, rt: db.RecordType):
+        """Return a Json schema segment for the given RecordType.
+        """
+        schema: dict[str, Any] = {
+            "type": "object"
+        }
+
+        schema["required"] = self._make_required_list(rt)
+        schema["additionalProperties"] = self._additional_properties
+
+        props = {}
+        if self._name_and_description_in_properties:
+            props["name"] = self._make_text_property("The name of the Record to be created")
+            props["description"] = self._make_text_property(
+                "The description of the Record to be created")
+
+        for prop in rt.properties:
+            if prop.name in props:
+                # Multi property
+                raise NotImplementedError(
+                    "Creating a schema for multi-properties is not specified. "
+                    f"Property {prop.name} occurs more than once."
+                )
+            props[prop.name] = self._make_segment_from_prop(prop)
+
+        schema["properties"] = props
+
+        return schema
+
+    def recordtype_to_json_schema(self, rt: db.RecordType):
+        """Create a jsonschema from a given RecordType that can be used, e.g., to
+        validate a json specifying a record of the given type.
+
+        Parameters
+        ----------
+        rt : RecordType
+            The RecordType from which a json schema will be created.
+
+        Returns
+        -------
+        schema : dict
+            A dict containing the json schema created from the given RecordType's properties.
+        """
+
+        schema = self._make_segment_from_recordtype(rt)
+        schema["$schema"] = "https://json-schema.org/draft/2019-09/schema"
+        if rt.name:
+            schema["title"] = rt.name
+        if rt.description:
+            schema["description"] = rt.description
+
+        return schema
 
 
 def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
                               name_and_description_in_properties: bool = False,
                               additional_options_for_text_props: Optional[dict] = None,
-                              units_in_description: bool = True):
+                              units_in_description: bool = True,
+                              do_not_create: list[str] = None):
     """Create a jsonschema from a given RecordType that can be used, e.g., to
     validate a json specifying a record of the given type.
 
+    This is a standalone function which works without manually creating a
+    JsonSchemaExporter object.
+
     Parameters
     ----------
     rt : RecordType
         The RecordType from which a json schema will be created.
     additional_properties : bool, optional
-        Whether additional propeties will be admitted in the resulting
+        Whether additional properties will be admitted in the resulting
         schema. Optional, default is True.
     name_and_description_in_properties : bool, optional
         Whether to include name and description in the `properties` section of
@@ -238,6 +286,10 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
         description of the corresponding schema entry. If set to false, an
         additional `unit` key is added to the schema itself which is purely
         annotational and ignored, e.g., in validation. Default is True.
+    do_not_create : list[str]
+        A list of RedcordType names, for which there should be no option
+        to create them.  Instead, only the choice of existing elements should
+        be given.
 
     Returns
     -------
@@ -246,17 +298,11 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
 
     """
 
-    if additional_options_for_text_props is None:
-        additional_options_for_text_props = {}
-
-    schema = _make_segment_from_recordtype(rt, additional_properties,
-                                           name_and_description_in_properties,
-                                           additional_options_for_text_props,
-                                           units_in_description)
-    schema["$schema"] = "https://json-schema.org/draft/2019-09/schema"
-    if rt.name:
-        schema["title"] = rt.name
-    if rt.description:
-        schema["description"] = rt.description
-
-    return schema
+    exporter = JsonSchemaExporter(
+        additional_properties=additional_properties,
+        name_and_description_in_properties=name_and_description_in_properties,
+        additional_options_for_text_props=additional_options_for_text_props,
+        units_in_description=units_in_description,
+        do_not_create=do_not_create,
+    )
+    return exporter.recordtype_to_json_schema(rt)
diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py
index 937e1b1b..8a8c3bcc 100644
--- a/unittests/test_json_schema_exporter.py
+++ b/unittests/test_json_schema_exporter.py
@@ -20,6 +20,8 @@
 # with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 
+"""Tests the Json schema exporter."""
+
 import linkahead as db
 
 from jsonschema import FormatChecker, validate, ValidationError
-- 
GitLab