Merge branch 'f-enhance-json-parser' into 'dev'

Extend json-schema model parser See merge request !72

Merge branch 'f-enhance-json-parser' into 'dev'
ac2535c8 · Florian Spreckelsen · db1ccf4b · a7197b5b · ac2535c8 · ac2535c8
Commit ac2535c8 authored 2 years ago by Florian Spreckelsen
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ build/
 # documentation
 _apidoc
 /dist/
+*~
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,16 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased] ##
 ### Added ###
 - TableImporter now accepts a `existing_columns` argument which demands that certain columns exist
+- The `JsonSchemaParser` class supports `patternProperties`
+- The `JsonSchemaParser` calss supports json-schema references (`$ref`)
 ### Changed ###
 - The converters and datatype arguments of TableImporter now may have keys for nonexisting columns
+- The `JsonSchemaParser` class does not require the top-level entry of a json
+  schema definition to specify a RecordType.
 ### Deprecated ###
 ### Removed ###
 ### Fixed ###
 - refactored to work with the new default key word in FIND queries: RECORD
 ### Security ###

--- a/setup.py
+++ b/setup.py
@@ -156,6 +156,7 @@ def setup_package():
        author_email='h.tomwoerden@indiscale.com',
        python_requires='>=3.7',
        install_requires=["caosdb>=0.11.0",
+                          "jsonref",
                          "jsonschema>=4.4.0",
                          "numpy>=1.17.3",
                          "openpyxl>=3.0.7",

--- a/src/caosadvancedtools/models/parser.py
+++ b/src/caosadvancedtools/models/parser.py
@@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value.
 Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
 to be a list with the names. Here, NO NEW entities can be defined.
 """
-import json
 import argparse
+import json
+import jsonref
 import re
 import sys
 import yaml
@@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [
    "string",
    "boolean",
    "integer",
-    "number"
+    "number",
+    "null"
 ]
@@ -152,13 +154,29 @@ def parse_model_from_string(string):
    return parser.parse_model_from_string(string)
-def parse_model_from_json_schema(filename: str):
+def parse_model_from_json_schema(
+        filename: str,
+        top_level_recordtype: bool = True,
+        types_for_missing_array_items: dict = {},
+        ignore_unspecified_array_items: bool = False
+):
    """Return a datamodel parsed from a json schema definition.
    Parameters
    ----------
    filename : str
        The path of the json schema file that is to be parsed
+    top_level_recordtype : bool, optional
+        Whether there is a record type defined at the top level of the
+        schema. Default is true.
+    types_for_missing_array_items : dict, optional
+        dictionary containing fall-back types for json entries with `type:
+        array` but without `items` specification. Default is an empty dict.
+    ignore_unspecified_array_items : bool, optional
+        Whether to ignore `type: array` entries the type of which is not
+        specified by their `items` property or given in
+        `types_for_missing_array_items`. An error is raised if they are not
+        ignored. Default is False.
    Returns
    -------
@@ -174,10 +192,10 @@ def parse_model_from_json_schema(filename: str):
    """
    # @author Florian Spreckelsen
    # @date 2022-02-17
-    # @review Daniel Hornung 2022-02-18
+    # @review Timm Fitschen 2023-05-25
-    parser = JsonSchemaParser()
+    parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items)
-    return parser.parse_model_from_json_schema(filename)
+    return parser.parse_model_from_json_schema(filename, top_level_recordtype)
 class Parser(object):
@@ -600,14 +618,13 @@ class Parser(object):
 class JsonSchemaParser(Parser):
    """Extends the yaml parser to read in datamodels defined in a json schema.
-    **EXPERIMENTAL:** While this calss can already be used to create data models
+    **EXPERIMENTAL:** While this class can already be used to create data models
    from basic json schemas, there are the following limitations and missing
    features:
    * Due to limitations of json-schema itself, we currently do not support
      inheritance in the imported data models
    * The same goes for suggested properties of RecordTypes
-    * Currently, ``$defs`` and ``$ref`` in the input schema are not resolved.
    * Already defined RecordTypes and (scalar) Properties can't be re-used as
      list properties
    * Reference properties that are different from the referenced RT. (Although
@@ -615,15 +632,18 @@ class JsonSchemaParser(Parser):
    * Values
    * Roles
    * The extern keyword from the yaml parser
-    * Currently, a json-schema cannot be transformed into a data model if its
-      root element isn't a RecordType (or Property) with ``title`` and ``type``.
    """
    # @author Florian Spreckelsen
    # @date 2022-02-17
-    # @review Timm Fitschen 2022-02-30
+    # @review Timm Fitschen 2023-05-25
-    def parse_model_from_json_schema(self, filename: str):
+    def __init__(self, types_for_missing_array_items={}, ignore_unspecified_array_items=False):
+        super().__init__()
+        self.types_for_missing_array_items = types_for_missing_array_items
+        self.ignore_unspecified_array_items = ignore_unspecified_array_items
+    def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True):
        """Return a datamodel created from the definition in the json schema in
        `filename`.
@@ -631,6 +651,9 @@ class JsonSchemaParser(Parser):
        ----------
        filename : str
            The path to the json-schema file containing the datamodel definition
+        top_level_recordtype : bool, optional
+            Whether there is a record type defined at the top level of the
+            schema. Default is true.
        Returns
        -------
@@ -639,13 +662,13 @@ class JsonSchemaParser(Parser):
        """
        # @author Florian Spreckelsen
        # @date 2022-02-17
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
        with open(filename, 'r') as schema_file:
-            model_dict = json.load(schema_file)
+            model_dict = jsonref.load(schema_file)
-        return self._create_model_from_dict(model_dict)
+        return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype)
-    def _create_model_from_dict(self, model_dict: [dict, List[dict]]):
+    def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True):
        """Parse a dictionary and return the Datamodel created from it.
        The dictionary was typically created from the model definition in a json schema file.
@@ -654,17 +677,28 @@ class JsonSchemaParser(Parser):
        ----------
        model_dict : dict or list[dict]
            One or several dictionaries read in from a json-schema file
+        top_level_recordtype : bool, optional
+            Whether there is a record type defined at the top level of the
+            schema. Default is true.
        Returns
        -------
        our : DataModel
            The datamodel defined in `model_dict`
        """
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
        if isinstance(model_dict, dict):
            model_dict = [model_dict]
        for ii, elt in enumerate(model_dict):
+            try:
+                jsonschema.Draft202012Validator.check_schema(elt)
+            except jsonschema.SchemaError as err:
+                key = elt["title"] if "title" in elt else f"element {ii}"
+                raise JsonSchemaDefinitionError(
+                    f"Json Schema error in {key}:\n{str(err)}") from err
+            if top_level_recordtype:
                if "title" not in elt:
                    raise JsonSchemaDefinitionError(
                        f"Object {ii+1} is lacking the `title` key word")
@@ -672,18 +706,39 @@ class JsonSchemaParser(Parser):
                    raise JsonSchemaDefinitionError(
                        f"Object {ii+1} is lacking the `type` key word")
                # Check if this is a valid Json Schema
-            try:
-                jsonschema.Draft202012Validator.check_schema(elt)
-            except jsonschema.SchemaError as err:
-                raise JsonSchemaDefinitionError(
-                    f"Json Schema error in {elt['title']}:\n{str(err)}") from err
                name = self._stringify(elt["title"], context=elt)
                self._treat_element(elt, name)
+            elif "properties" in elt or "patternProperties" in elt:
+                # No top-level type but there are entities
+                if "properties" in elt:
+                    for key, prop in elt["properties"].items():
+                        name = self._get_name_from_property(key, prop)
+                        self._treat_element(prop, name)
+                if "patternProperties" in elt:
+                    # See also treatment in ``_treat_record_type``. Since here,
+                    # there is no top-level RT we use the prefix `__Pattern`,
+                    # i.e., the resulting Record Types will be called
+                    # `__PatternElement`.
+                    self._treat_pattern_properties(
+                        elt["patternProperties"], name_prefix="__Pattern")
+            else:
+                # Neither RecordType itself, nor further properties in schema,
+                # so nothing to do here. Maybe add something in the future.
+                continue
        return DataModel(self.model.values())
+    def _get_name_from_property(self, key: str, prop: dict):
+        # @review Timm Fitschen 2023-05-25
+        if "title" in prop:
+            name = self._stringify(prop["title"])
+        else:
+            name = self._stringify(key)
+        return name
    def _get_atomic_datatype(self, elt):
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
        if elt["type"] == "string":
            if "format" in elt and elt["format"] in ["date", "date-time"]:
                return db.DATETIME
@@ -695,11 +750,15 @@ class JsonSchemaParser(Parser):
            return db.DOUBLE
        elif elt["type"] == "boolean":
            return db.BOOLEAN
+        elif elt["type"] == "null":
+            # This could be any datatype since a valid json will never have a
+            # value in a null property. We use TEXT for convenience.
+            return db.TEXT
        else:
            raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.")
    def _treat_element(self, elt: dict, name: str):
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
        force_list = False
        if name in self.model:
            return self.model[name], force_list
@@ -710,12 +769,17 @@ class JsonSchemaParser(Parser):
        if name == "name":
            # This is identified with the CaosDB name property as long as the
            # type is correct.
-            if not elt["type"] == "string":
+            if not elt["type"] == "string" and "string" not in elt["type"]:
                raise JsonSchemaDefinitionError(
                    "The 'name' property must be string-typed, otherwise it cannot "
                    "be identified with CaosDB's name property."
                )
            return None, force_list
+        # LinkAhead suports null for all types, so in the very special case of
+        # `"type": ["null", "<other_type>"]`, only consider the other type:
+        if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]:
+            elt["type"].remove("null")
+            elt["type"] = elt["type"][0]
        if "enum" in elt:
            ent = self._treat_enum(elt, name)
        elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES:
@@ -733,11 +797,12 @@ class JsonSchemaParser(Parser):
            # treat_something function
            ent.description = elt["description"]
+        if ent is not None:
            self.model[name] = ent
        return ent, force_list
    def _treat_record_type(self, elt: dict, name: str):
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
        rt = db.RecordType(name=name)
        if "required" in elt:
            required = elt["required"]
@@ -745,10 +810,7 @@ class JsonSchemaParser(Parser):
            required = []
        if "properties" in elt:
            for key, prop in elt["properties"].items():
-                if "title" in prop:
+                name = self._get_name_from_property(key, prop)
-                    name = self._stringify(prop["title"])
-                else:
-                    name = self._stringify(key)
                prop_ent, force_list = self._treat_element(prop, name)
                if prop_ent is None:
                    # Nothing to be appended since the property has to be
@@ -762,6 +824,17 @@ class JsonSchemaParser(Parser):
                    rt.add_property(prop_ent, importance=importance,
                                    datatype=db.LIST(prop_ent))
+        if "patternProperties" in elt:
+            pattern_property_rts = self._treat_pattern_properties(
+                elt["patternProperties"], name_prefix=name)
+            for ppr in pattern_property_rts:
+                # add reference to pattern property type. These can never be
+                # obligatory since pattern properties cannot be required in the
+                # original schema (since their actual names are not known a
+                # priori).
+                rt.add_property(ppr)
        if "description" in elt:
            rt.description = elt["description"]
        return rt
@@ -783,11 +856,14 @@ class JsonSchemaParser(Parser):
        return rt
    def _treat_list(self, elt: dict, name: str):
-        # @review Timm Fitschen 2022-02-30
+        # @review Timm Fitschen 2023-05-25
-        if "items" not in elt:
+        if "items" not in elt and name not in self.types_for_missing_array_items:
+            if self.ignore_unspecified_array_items:
+                return None, False
            raise JsonSchemaDefinitionError(
                f"The definition of the list items is missing in {elt}.")
+        if "items" in elt:
            items = elt["items"]
            if "enum" in items:
                return self._treat_enum(items, name), True
@@ -805,6 +881,71 @@ class JsonSchemaParser(Parser):
                        items, self._stringify(items["title"]))
                    self.model[ref_rt.name] = ref_rt
                    return db.Property(name=name, datatype=db.LIST(ref_rt)), False
+        else:
+            # Use predefined type:
+            datatype = db.LIST(self.types_for_missing_array_items[name])
+            return db.Property(name=name, datatype=datatype), False
+    def _get_pattern_prop(self):
+        # @review Timm Fitschen 2023-05-25
+        if "__pattern_property_pattern_property" in self.model:
+            return self.model["__pattern_property_pattern_property"]
+        pp = db.Property(name="__matched_pattern", datatype=db.TEXT)
+        self.model["__pattern_property_pattern_property"] = pp
+        return pp
+    def _treat_pattern_properties(self, pattern_elements, name_prefix=""):
+        """Special Treatment for pattern properties: A RecordType is created for
+        each pattern property. In case of a `type: object` PatternProperty, the
+        remaining properties of the JSON entry are appended to the new
+        RecordType; in case of an atomic type PatternProperty, a single value
+        Property is added to the RecordType.
+        Raises
+        ------
+        NotImplementedError
+            In case of patternProperties with non-object, non-atomic type, e.g.,
+            array.
+        """
+        # @review Timm Fitschen 2023-05-25
+        num_patterns = len(pattern_elements)
+        pattern_prop = self._get_pattern_prop()
+        returns = []
+        for ii, (key, element) in enumerate(pattern_elements.items()):
+            if "title" not in element:
+                name_suffix = f"_{ii+1}" if num_patterns > 1 else ""
+                name = name_prefix + "Entry" + name_suffix
+            else:
+                name = element["title"]
+            if element["type"] == "object":
+                # simple, is already an object, so can be treated like any other
+                # record type.
+                pattern_type = self._treat_record_type(element, name)
+            elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES:
+                # create a property that stores the actual value of the pattern
+                # property.
+                propname = f"{name}_value"
+                prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element))
+                self.model[propname] = prop
+                pattern_type = db.RecordType(name=name)
+                pattern_type.add_property(prop)
+            else:
+                raise NotImplementedError(
+                    "Pattern properties are currently only supported for types " +
+                    ", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.")
+            # Add pattern property and description
+            pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY)
+            if pattern_type.description:
+                pattern_type.description += f"\n\npattern: {key}"
+            else:
+                pattern_type.description = f"pattern: {key}"
+            self.model[name] = pattern_type
+            returns.append(pattern_type)
+        return returns
 if __name__ == "__main__":

--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
   Concepts <concepts>
   The Caosdb Crawler <crawler>
   YAML data model specification <yaml_interface>
+   Specifying a datamodel with JSON schema <json_schema_interface>
   _apidoc/modules

--- a/src/doc/json_schema_interface.rst
+++ b/src/doc/json_schema_interface.rst
+Defining datamodels with a JSON schema specification
+====================================================
+TODO, see https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/42
+Further information
+###################
+Pattern Properties
+%%%%%%%%%%%%%%%%%%
+The JSON-schema parser has rudimentary support for ``patternProperties``. Since
+their names (only the pattern that their names will suffice) are not known a
+priori, we create RecordTypes for all pattern properties. The names of these
+RecordTypes are created from their parent element's name by appending the string
+``"Entry"`` and possibly a number if there are more than one pattern properties
+for one parent.
+All the RecordTypes created for pattern properties have at least an obligatory
+``__matched_pattern`` property which will -- as the name suggests -- store the
+matched pattern of an actual data entry.
+.. note::
+   The ``__matched_pattern`` property is added automatically to your datamodel
+   as soon as there is at least one pattern property in your JSON schema. So be
+   sure that you don't happen to have an entity with exactly this name in your
+   database.
+E.g., a json schema with
+.. code-block:: json
+   "dataset": {
+     "patternProperties": {
+        "^[0-9]{4,4}": {
+            "type": "boolean"
+        },
+        "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
+            "type": "object",
+            "properties": {
+                "date_id": {
+                    "$ref": "#/definitions/uuid"
+                }
+            }
+        }
+     }
+   }
+Would result in a ``Dataset`` RecordType that has the two properties
+``DatasetEntry_1`` and ``DatasetEntry_2`` (as always, name can be overwritten
+explicitly by specifying the ``title`` property), referencing corresponding
+``DatasetEntry_1`` and ``DatasetEntry_2`` Records.
+Apart from the aforementioned ``__matched_pattern`` property, ``DatasetEntry_1``
+also has the ``DatasetEntry_1_value`` property with datatype ``BOOLEAN``, that
+stores the actual value. In turn, ``DatasetEntry_2`` is of ``type: object`` and
+is treated like any other RecordType. Consequently, it has, appart from the
+``__matched_pattern`` property, a ``date_id`` property as specified in its
+``properties``.
+Array entries without ``items`` specification
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+JSON schema allows for properties of ``type: array`` without the ``items``
+specification that consequently can be arrays of any (and of mixed) types. While
+this is in general problematic when specifying a data model, sometimes these
+properties cannot be specified further, e.g., when you're using an external
+schema that you cannot change.
+These properties can still be added to your datamodel by specifying their types
+explicitly in a dictionary or, alternatively, they can be ignored. See the
+``types_for_missing_array_items`` and ``ignore_unspecified_array_items``
+parameters of ``models.parser.JsonSchemaParser``, respectively, for more
+information.
--- a/unittests/json-schema-models/datamodel_atomic_properties.schema.json
+++ b/unittests/json-schema-models/datamodel_atomic_properties.schema.json
@@ -18,7 +18,8 @@
            "date": { "type": "string", "format": "date" },
            "integer": { "type": "integer", "description": "Some integer property" },
            "boolean": { "type": "boolean" },
-            "number_prop": { "type": "number", "description": "Some float property" }
+            "number_prop": { "type": "number", "description": "Some float property" },
+            "null_prop": { "type": "null", "description": "This property will never have a value." }
        }
    }
 ]
--- a/unittests/json-schema-models/datamodel_missing_array_items.schema.json
+++ b/unittests/json-schema-models/datamodel_missing_array_items.schema.json
+{
+    "title": "something_with_missing_array_items",
+    "type": "object",
+    "properties": {
+        "missing": {
+            "type": "array"
+        }
+    }
+}
--- a/unittests/json-schema-models/datamodel_no_toplevel_entity.schema.json
+++ b/unittests/json-schema-models/datamodel_no_toplevel_entity.schema.json
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "$id": "https://my-schema-id.net",
+    "type": "object",
+    "definitions": {
+        "uuid": {
+            "type": [
+                "string",
+                "null"
+            ],
+            "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+        },
+        "datetime": {
+            "type": "string",
+            "format": "date-time"
+        }
+    },
+    "properties": {
+        "Dataset1": {
+            "title": "Dataset1",
+            "description": "Some description",
+            "type": "object",
+            "properties": {
+                "title": {
+                    "type": "string",
+                    "description": "full dataset title"
+                },
+                "campaign": {
+                    "type": "string",
+                    "description": "FIXME"
+                },
+                "number_prop": {
+                    "type": "number",
+                    "description": "Some float property"
+                },
+                "user_id": {
+                    "$ref": "#/definitions/uuid"
+                }
+            },
+            "required": ["title", "number_prop"]
+        }
+    },
+    "patternProperties": {
+        "^[0-9]{4,4}": {
+            "type": "boolean"
+        },
+        "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
+            "type": "object",
+            "properties": {
+                "date_id": {
+                    "$ref": "#/definitions/uuid"
+                }
+            }
+        }
+    }
+}
--- a/unittests/json-schema-models/datamodel_pattern_properties.schema.json
+++ b/unittests/json-schema-models/datamodel_pattern_properties.schema.json
+[
+    {
+        "title": "Dataset",
+        "type": "object",
+        "patternProperties": {
+            "^[0-9]{4,4}": {
+                "type": "boolean"
+            },
+            "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
+                "type": "object",
+                "properties": {
+                    "date_id": {
+                        "type": [
+                            "string",
+                            "null"
+                        ],
+                        "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+                    }
+                }
+            }
+        }
+    },
+    {
+        "title": "Dataset2",
+        "type": "object",
+        "properties": {
+            "datetime": {
+                "type": "string",
+                "format": "date-time"
+            }
+        },
+        "patternProperties": {
+            ".*": {
+                "title": "Literally anything",
+                "type": "object"
+            }
+        }
+    }
+]
--- a/unittests/test_json_schema_model_parser.py
+++ b/unittests/test_json_schema_model_parser.py
@@ -103,7 +103,7 @@ def test_datamodel_with_atomic_properties():
    assert isinstance(rt2, db.RecordType)
    assert rt2.name == "Dataset2"
    assert not rt2.description
-    assert len(rt2.get_properties()) == 5
+    assert len(rt2.get_properties()) == 6
    date_prop = rt2.get_property("date")
    assert date_prop.datatype == db.DATETIME
@@ -121,6 +121,9 @@ def test_datamodel_with_atomic_properties():
    float_prop2 = rt2.get_property("number_prop")
    assert float_prop.datatype == float_prop2.datatype
+    null_prop = rt2.get_property("null_prop")
+    assert null_prop.datatype == db.TEXT
 def test_required_no_list():
    """Exception must be raised when "required" is not a list."""
@@ -356,3 +359,130 @@ def test_name_property():
    assert str(err.value).startswith(
        "The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's "
        "name property.")
+def test_no_toplevel_entity():
+    model = parse_model_from_json_schema(os.path.join(
+        FILEPATH, "datamodel_no_toplevel_entity.schema.json"), top_level_recordtype=False)
+    assert "Dataset1" in model
+    rt1 = model["Dataset1"]
+    assert rt1.name == "Dataset1"
+    assert rt1.description == "Some description"
+    assert len(rt1.get_properties()) == 4
+    assert rt1.get_property("title") is not None
+    assert rt1.get_property("campaign") is not None
+    assert rt1.get_property("number_prop") is not None
+    assert rt1.get_property("user_id") is not None
+    title_prop = rt1.get_property("title")
+    assert title_prop.datatype == db.TEXT
+    assert rt1.get_importance(title_prop.name) == db.OBLIGATORY
+    campaign_prop = rt1.get_property("campaign")
+    assert campaign_prop.datatype == db.TEXT
+    assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED
+    float_prop = rt1.get_property("number_prop")
+    assert float_prop.datatype == db.DOUBLE
+    assert rt1.get_importance(float_prop.name) == db.OBLIGATORY
+    uid_prop = rt1.get_property("user_id")
+    assert uid_prop.datatype == db.TEXT
+    assert rt1.get_importance(uid_prop.name) == db.RECOMMENDED
+    # pattern properties without top-level entity:
+    assert "__PatternEntry_1" in model
+    assert "__PatternEntry_2" in model
+    pattern_boolean_rt = model["__PatternEntry_1"]
+    assert "pattern: " in pattern_boolean_rt.description
+    assert len(pattern_boolean_rt.properties) == 2
+    pp = pattern_boolean_rt.get_property("__matched_pattern")
+    assert pp.datatype == db.TEXT
+    assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY
+    value_prop = pattern_boolean_rt.get_property("__PatternEntry_1_value")
+    assert value_prop.datatype == db.BOOLEAN
+    pattern_object_rt = model["__PatternEntry_2"]
+    assert "pattern: " in pattern_object_rt.description
+    assert len(pattern_object_rt.properties) == 2
+    pp = pattern_object_rt.get_property("__matched_pattern")
+    assert pp.datatype == db.TEXT
+    assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY
+    date_id_prop = pattern_object_rt.get_property("date_id")
+    assert date_id_prop.datatype == db.TEXT
+def test_missing_array_items():
+    # strict behavior
+    with pytest.raises(JsonSchemaDefinitionError) as err:
+        parse_model_from_json_schema(os.path.join(
+            FILEPATH, "datamodel_missing_array_items.schema.json"))
+    assert "{'type': 'array'}" in str(err)
+    # ignore all problems, so a RT is created that does not have the property
+    model = parse_model_from_json_schema(os.path.join(
+        FILEPATH, "datamodel_missing_array_items.schema.json"), ignore_unspecified_array_items=True)
+    assert "something_with_missing_array_items" in model
+    rt = model["something_with_missing_array_items"]
+    assert isinstance(rt, db.RecordType)
+    assert rt.get_property("missing") is None
+    # specify the type:
+    type_dict = {"missing": db.FILE}
+    model = parse_model_from_json_schema(os.path.join(
+        FILEPATH, "datamodel_missing_array_items.schema.json"), types_for_missing_array_items=type_dict)
+    assert "something_with_missing_array_items" in model
+    rt = model["something_with_missing_array_items"]
+    assert rt.get_property("missing") is not None
+    assert rt.get_property("missing").datatype == db.LIST(db.FILE)
+def test_pattern_properties():
+    model = parse_model_from_json_schema(os.path.join(
+        FILEPATH, "datamodel_pattern_properties.schema.json"))
+    assert "Dataset" in model
+    rt1 = model["Dataset"]
+    assert len(rt1.properties) == 2
+    for name in ["DatasetEntry_1", "DatasetEntry_2"]:
+        assert rt1.get_property(name) is not None
+        assert rt1.get_property(name).is_reference()
+    pattern_boolean_rt = model["DatasetEntry_1"]
+    assert "pattern: " in pattern_boolean_rt.description
+    assert len(pattern_boolean_rt.properties) == 2
+    pp = pattern_boolean_rt.get_property("__matched_pattern")
+    assert pp.datatype == db.TEXT
+    assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY
+    value_prop = pattern_boolean_rt.get_property("DatasetEntry_1_value")
+    assert value_prop.datatype == db.BOOLEAN
+    pattern_object_rt = model["DatasetEntry_2"]
+    assert "pattern: " in pattern_object_rt.description
+    assert len(pattern_object_rt.properties) == 2
+    pp = pattern_object_rt.get_property("__matched_pattern")
+    assert pp.datatype == db.TEXT
+    assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY
+    date_id_prop = pattern_object_rt.get_property("date_id")
+    assert date_id_prop.datatype == db.TEXT
+    assert "Dataset2" in model
+    rt2 = model["Dataset2"]
+    assert len(rt2.properties) == 2
+    # This has been tested elsewhere, just make sure that it is properly created
+    # in the presence of pattern properties, too.
+    assert rt2.get_property("datetime") is not None
+    assert rt2.get_property("Literally anything") is not None
+    assert rt2.get_property("Literally anything").is_reference()
+    pattern_named_rt = model["Literally anything"]
+    assert len(pattern_named_rt.properties) == 1
+    assert pattern_named_rt.get_property("__matched_pattern") is not None