Florian Spreckelsen · Florian Spreckelsen · 460700e0 · 330808fa · 66d4f8e3 · bd4696f0
--- a/src/caosadvancedtools/models/parser.py

+ 139

− 22
+++ b/src/caosadvancedtools/models/parser.py

+ 139

− 22
 @@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value.
 Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
 to be a list with the names. Here, NO NEW entities can be defined.
 """
-import json
 import argparse
+import json
+import jsonref
 import re
 import sys
 import yaml
 @@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [
    "string",
    "boolean",
    "integer",
-    "number"
+    "number",
+    "null"
 ]


 @@ -152,13 +154,17 @@ def parse_model_from_string(string):
    return parser.parse_model_from_string(string)


-def parse_model_from_json_schema(filename: str):
+def parse_model_from_json_schema(filename: str, top_level_recordtype: bool = True):
    """Return a datamodel parsed from a json schema definition.

    Parameters
    ----------
    filename : str
        The path of the json schema file that is to be parsed
+    top_level_recordtype : bool, optional
+        Whether there is a record type defined at the top level of the
+        schema. Default is true.
+

    Returns
    -------
 @@ -177,7 +183,7 @@ def parse_model_from_json_schema(filename: str):
    # @review Daniel Hornung 2022-02-18
    parser = JsonSchemaParser()

-    return parser.parse_model_from_json_schema(filename)
+    return parser.parse_model_from_json_schema(filename, top_level_recordtype)


 class Parser(object):
 @@ -623,7 +629,7 @@ class JsonSchemaParser(Parser):
    # @date 2022-02-17
    # @review Timm Fitschen 2022-02-30

-    def parse_model_from_json_schema(self, filename: str):
+    def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True):
        """Return a datamodel created from the definition in the json schema in
        `filename`.

 @@ -631,6 +637,9 @@ class JsonSchemaParser(Parser):
        ----------
        filename : str
            The path to the json-schema file containing the datamodel definition
+        top_level_recordtype : bool, optional
+            Whether there is a record type defined at the top level of the
+            schema. Default is true.

        Returns
        -------
 @@ -641,11 +650,11 @@ class JsonSchemaParser(Parser):
        # @date 2022-02-17
        # @review Timm Fitschen 2022-02-30
        with open(filename, 'r') as schema_file:
-            model_dict = json.load(schema_file)
+            model_dict = jsonref.load(schema_file)

-        return self._create_model_from_dict(model_dict)
+        return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype)

-    def _create_model_from_dict(self, model_dict: [dict, List[dict]]):
+    def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True):
        """Parse a dictionary and return the Datamodel created from it.

        The dictionary was typically created from the model definition in a json schema file.
 @@ -654,6 +663,9 @@ class JsonSchemaParser(Parser):
        ----------
        model_dict : dict or list[dict]
            One or several dictionaries read in from a json-schema file
+        top_level_recordtype : bool, optional
+            Whether there is a record type defined at the top level of the
+            schema. Default is true.

        Returns
        -------
 @@ -665,23 +677,52 @@ class JsonSchemaParser(Parser):
            model_dict = [model_dict]

        for ii, elt in enumerate(model_dict):
-            if "title" not in elt:
-                raise JsonSchemaDefinitionError(
-                    f"Object {ii+1} is lacking the `title` key word")
-            if "type" not in elt:
-                raise JsonSchemaDefinitionError(
-                    f"Object {ii+1} is lacking the `type` key word")
-            # Check if this is a valid Json Schema
            try:
                jsonschema.Draft202012Validator.check_schema(elt)
            except jsonschema.SchemaError as err:
+                key = elt["title"] if "title" in elt else f"element {ii}"
                raise JsonSchemaDefinitionError(
-                    f"Json Schema error in {elt['title']}:\n{str(err)}") from err
-            name = self._stringify(elt["title"], context=elt)
-            self._treat_element(elt, name)
+                    f"Json Schema error in {key}:\n{str(err)}") from err
+
+            if top_level_recordtype:
+                if "title" not in elt:
+                    raise JsonSchemaDefinitionError(
+                        f"Object {ii+1} is lacking the `title` key word")
+                if "type" not in elt:
+                    raise JsonSchemaDefinitionError(
+                        f"Object {ii+1} is lacking the `type` key word")
+                # Check if this is a valid Json Schema
+                name = self._stringify(elt["title"], context=elt)
+                self._treat_element(elt, name)
+            elif "properties" in elt or "patternProperties":
+                # No top-level type but there are entities
+                if "properties" in elt:
+                    for key, prop in elt["properties"].items():
+                        name = self._get_name_from_property(key, prop)
+                        self._treat_element(prop, name)
+                if "patternProperties" in elt:
+                    # See also treatment in ``_treat_record_type``. Since here,
+                    # there is no top-level RT we use the prefix `__Pattern`,
+                    # i.e., the resulting Record Types will be called
+                    # `__PatternElement`.
+                    self._treat_pattern_properties(
+                        elt["patternProperties"], name_prefix="__Pattern")
+            else:
+                # Neither RecordType itself, nor further properties in schema,
+                # so nothing to do here. Maybe add something in the future.
+                continue

        return DataModel(self.model.values())

+    def _get_name_from_property(self, key: str, prop: dict):
+
+        if "title" in prop:
+            name = self._stringify(prop["title"])
+        else:
+            name = self._stringify(key)
+
+        return name
+
    def _get_atomic_datatype(self, elt):
        # @review Timm Fitschen 2022-02-30
        if elt["type"] == "string":
 @@ -695,6 +736,10 @@ class JsonSchemaParser(Parser):
            return db.DOUBLE
        elif elt["type"] == "boolean":
            return db.BOOLEAN
+        elif elt["type"] == "null":
+            # This could be any datatype since a valid json will never have a
+            # value in a null property. We use TEXT for convenience.
+            return db.TEXT
        else:
            raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.")

 @@ -716,6 +761,11 @@ class JsonSchemaParser(Parser):
                    "be identified with CaosDB's name property."
                )
            return None, force_list
+        # LinkAhead suports null for all types, so in the very special case of
+        # `"type": ["null", "<other_type>"]`, only consider the other type:
+        if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]:
+            elt["type"].remove("null")
+            elt["type"] = elt["type"][0]
        if "enum" in elt:
            ent = self._treat_enum(elt, name)
        elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES:
 @@ -725,6 +775,9 @@ class JsonSchemaParser(Parser):
            ent = self._treat_record_type(elt, name)
        elif elt["type"] == "array":
            ent, force_list = self._treat_list(elt, name)
+        elif elt["type"] == "null":
+            # null
+            return None, force_list
        else:
            raise NotImplementedError(
                f"Cannot parse items of type '{elt['type']}' (yet).")
 @@ -745,10 +798,7 @@ class JsonSchemaParser(Parser):
            required = []
        if "properties" in elt:
            for key, prop in elt["properties"].items():
-                if "title" in prop:
-                    name = self._stringify(prop["title"])
-                else:
-                    name = self._stringify(key)
+                name = self._get_name_from_property(key, prop)
                prop_ent, force_list = self._treat_element(prop, name)
                if prop_ent is None:
                    # Nothing to be appended since the property has to be
 @@ -762,6 +812,17 @@ class JsonSchemaParser(Parser):
                    rt.add_property(prop_ent, importance=importance,
                                    datatype=db.LIST(prop_ent))

+        if "patternProperties" in elt:
+
+            pattern_property_rts = self._treat_pattern_properties(
+                elt["patternProperties"], name_prefix=name)
+            for ppr in pattern_property_rts:
+                # add reference to pattern property type. These can never be
+                # obligatory since pattern properties cannot be required in the
+                # original schema (since their actual names are not known a
+                # priori).
+                rt.add_property(ppr)
+
        if "description" in elt:
            rt.description = elt["description"]
        return rt
 @@ -806,6 +867,62 @@ class JsonSchemaParser(Parser):
                self.model[ref_rt.name] = ref_rt
                return db.Property(name=name, datatype=db.LIST(ref_rt)), False

+    def _get_pattern_prop(self):
+        if "__pattern_property_pattern_property" in self.model:
+            return self.model["__pattern_property_pattern_property"]
+        pp = db.Property(name="__matched_pattern", datatype=db.TEXT)
+        self.model["__pattern_property_pattern_property"] = pp
+        return pp
+
+    def _treat_pattern_properties(self, pattern_elements, name_prefix=""):
+        """Special Treatment for pattern properties: A RecordType is created for
+        each pattern property. In case of a `type: object` PatternProperty, the
+        remaining properties of the JSON entry are appended to the new
+        RecordType; in case of an atomic type PatternProperty, a single value
+        Property is added to the RecordType.
+
+        Raises
+        ------
+        NotImplementedError
+            In case of patternProperties with non-object, non-atomic type, e.g.,
+            array.
+
+        """
+        num_patterns = len(pattern_elements)
+        pattern_prop = self._get_pattern_prop()
+        returns = []
+        for ii, (key, element) in enumerate(pattern_elements.items()):
+            name_suffix = f"_{ii+1}" if num_patterns > 1 else ""
+            name = name_prefix + "Entry" + name_suffix
+            if element["type"] == "object":
+                # simple, is already an object, so can be treated like any other
+                # record type.
+                pattern_type = self._treat_record_type(element, name)
+            elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES:
+                # create a property that stores the actual value of the pattern
+                # property.
+                propname = f"{name}_value"
+                prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element))
+                self.model[propname] = prop
+                pattern_type = db.RecordType(name=name)
+                pattern_type.add_property(prop)
+            else:
+                raise NotImplementedError(
+                    "Pattern properties are currently only supported for types " +
+                    ", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.")
+
+            # Add pattern property and description
+            pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY)
+            if pattern_type.description:
+                pattern_type.description += f"\n\npattern: {key}"
+            else:
+                pattern_type.description = f"pattern: {key}"
+
+            self.model[name] = pattern_type
+            returns.append(pattern_type)
+
+        return returns
+

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__,