Skip to content
Snippets Groups Projects
Commit ac2535c8 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-enhance-json-parser' into 'dev'

Extend json-schema model parser

See merge request !72
parents db1ccf4b a7197b5b
Branches
Tags
2 merge requests!73MAINT: change wording of TableImporter argument and allow converters and...,!72Extend json-schema model parser
Pipeline #37107 passed
...@@ -18,3 +18,4 @@ build/ ...@@ -18,3 +18,4 @@ build/
# documentation # documentation
_apidoc _apidoc
/dist/ /dist/
*~
...@@ -7,16 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -7,16 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## ## [Unreleased] ##
### Added ### ### Added ###
- TableImporter now accepts a `existing_columns` argument which demands that certain columns exist - TableImporter now accepts a `existing_columns` argument which demands that certain columns exist
- The `JsonSchemaParser` class supports `patternProperties`
- The `JsonSchemaParser` calss supports json-schema references (`$ref`)
### Changed ### ### Changed ###
- The converters and datatype arguments of TableImporter now may have keys for nonexisting columns - The converters and datatype arguments of TableImporter now may have keys for nonexisting columns
- The `JsonSchemaParser` class does not require the top-level entry of a json
schema definition to specify a RecordType.
### Deprecated ### ### Deprecated ###
### Removed ### ### Removed ###
### Fixed ### ### Fixed ###
- refactored to work with the new default key word in FIND queries: RECORD - refactored to work with the new default key word in FIND queries: RECORD
### Security ### ### Security ###
......
...@@ -156,6 +156,7 @@ def setup_package(): ...@@ -156,6 +156,7 @@ def setup_package():
author_email='h.tomwoerden@indiscale.com', author_email='h.tomwoerden@indiscale.com',
python_requires='>=3.7', python_requires='>=3.7',
install_requires=["caosdb>=0.11.0", install_requires=["caosdb>=0.11.0",
"jsonref",
"jsonschema>=4.4.0", "jsonschema>=4.4.0",
"numpy>=1.17.3", "numpy>=1.17.3",
"openpyxl>=3.0.7", "openpyxl>=3.0.7",
......
...@@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value. ...@@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value.
Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs
to be a list with the names. Here, NO NEW entities can be defined. to be a list with the names. Here, NO NEW entities can be defined.
""" """
import json
import argparse import argparse
import json
import jsonref
import re import re
import sys import sys
import yaml import yaml
...@@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [ ...@@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [
"string", "string",
"boolean", "boolean",
"integer", "integer",
"number" "number",
"null"
] ]
...@@ -152,13 +154,29 @@ def parse_model_from_string(string): ...@@ -152,13 +154,29 @@ def parse_model_from_string(string):
return parser.parse_model_from_string(string) return parser.parse_model_from_string(string)
def parse_model_from_json_schema(filename: str): def parse_model_from_json_schema(
filename: str,
top_level_recordtype: bool = True,
types_for_missing_array_items: dict = {},
ignore_unspecified_array_items: bool = False
):
"""Return a datamodel parsed from a json schema definition. """Return a datamodel parsed from a json schema definition.
Parameters Parameters
---------- ----------
filename : str filename : str
The path of the json schema file that is to be parsed The path of the json schema file that is to be parsed
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
types_for_missing_array_items : dict, optional
dictionary containing fall-back types for json entries with `type:
array` but without `items` specification. Default is an empty dict.
ignore_unspecified_array_items : bool, optional
Whether to ignore `type: array` entries the type of which is not
specified by their `items` property or given in
`types_for_missing_array_items`. An error is raised if they are not
ignored. Default is False.
Returns Returns
------- -------
...@@ -174,10 +192,10 @@ def parse_model_from_json_schema(filename: str): ...@@ -174,10 +192,10 @@ def parse_model_from_json_schema(filename: str):
""" """
# @author Florian Spreckelsen # @author Florian Spreckelsen
# @date 2022-02-17 # @date 2022-02-17
# @review Daniel Hornung 2022-02-18 # @review Timm Fitschen 2023-05-25
parser = JsonSchemaParser() parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items)
return parser.parse_model_from_json_schema(filename) return parser.parse_model_from_json_schema(filename, top_level_recordtype)
class Parser(object): class Parser(object):
...@@ -600,14 +618,13 @@ class Parser(object): ...@@ -600,14 +618,13 @@ class Parser(object):
class JsonSchemaParser(Parser): class JsonSchemaParser(Parser):
"""Extends the yaml parser to read in datamodels defined in a json schema. """Extends the yaml parser to read in datamodels defined in a json schema.
**EXPERIMENTAL:** While this calss can already be used to create data models **EXPERIMENTAL:** While this class can already be used to create data models
from basic json schemas, there are the following limitations and missing from basic json schemas, there are the following limitations and missing
features: features:
* Due to limitations of json-schema itself, we currently do not support * Due to limitations of json-schema itself, we currently do not support
inheritance in the imported data models inheritance in the imported data models
* The same goes for suggested properties of RecordTypes * The same goes for suggested properties of RecordTypes
* Currently, ``$defs`` and ``$ref`` in the input schema are not resolved.
* Already defined RecordTypes and (scalar) Properties can't be re-used as * Already defined RecordTypes and (scalar) Properties can't be re-used as
list properties list properties
* Reference properties that are different from the referenced RT. (Although * Reference properties that are different from the referenced RT. (Although
...@@ -615,15 +632,18 @@ class JsonSchemaParser(Parser): ...@@ -615,15 +632,18 @@ class JsonSchemaParser(Parser):
* Values * Values
* Roles * Roles
* The extern keyword from the yaml parser * The extern keyword from the yaml parser
* Currently, a json-schema cannot be transformed into a data model if its
root element isn't a RecordType (or Property) with ``title`` and ``type``.
""" """
# @author Florian Spreckelsen # @author Florian Spreckelsen
# @date 2022-02-17 # @date 2022-02-17
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
def parse_model_from_json_schema(self, filename: str): def __init__(self, types_for_missing_array_items={}, ignore_unspecified_array_items=False):
super().__init__()
self.types_for_missing_array_items = types_for_missing_array_items
self.ignore_unspecified_array_items = ignore_unspecified_array_items
def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True):
"""Return a datamodel created from the definition in the json schema in """Return a datamodel created from the definition in the json schema in
`filename`. `filename`.
...@@ -631,6 +651,9 @@ class JsonSchemaParser(Parser): ...@@ -631,6 +651,9 @@ class JsonSchemaParser(Parser):
---------- ----------
filename : str filename : str
The path to the json-schema file containing the datamodel definition The path to the json-schema file containing the datamodel definition
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
Returns Returns
------- -------
...@@ -639,13 +662,13 @@ class JsonSchemaParser(Parser): ...@@ -639,13 +662,13 @@ class JsonSchemaParser(Parser):
""" """
# @author Florian Spreckelsen # @author Florian Spreckelsen
# @date 2022-02-17 # @date 2022-02-17
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
with open(filename, 'r') as schema_file: with open(filename, 'r') as schema_file:
model_dict = json.load(schema_file) model_dict = jsonref.load(schema_file)
return self._create_model_from_dict(model_dict) return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype)
def _create_model_from_dict(self, model_dict: [dict, List[dict]]): def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True):
"""Parse a dictionary and return the Datamodel created from it. """Parse a dictionary and return the Datamodel created from it.
The dictionary was typically created from the model definition in a json schema file. The dictionary was typically created from the model definition in a json schema file.
...@@ -654,17 +677,28 @@ class JsonSchemaParser(Parser): ...@@ -654,17 +677,28 @@ class JsonSchemaParser(Parser):
---------- ----------
model_dict : dict or list[dict] model_dict : dict or list[dict]
One or several dictionaries read in from a json-schema file One or several dictionaries read in from a json-schema file
top_level_recordtype : bool, optional
Whether there is a record type defined at the top level of the
schema. Default is true.
Returns Returns
------- -------
our : DataModel our : DataModel
The datamodel defined in `model_dict` The datamodel defined in `model_dict`
""" """
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
if isinstance(model_dict, dict): if isinstance(model_dict, dict):
model_dict = [model_dict] model_dict = [model_dict]
for ii, elt in enumerate(model_dict): for ii, elt in enumerate(model_dict):
try:
jsonschema.Draft202012Validator.check_schema(elt)
except jsonschema.SchemaError as err:
key = elt["title"] if "title" in elt else f"element {ii}"
raise JsonSchemaDefinitionError(
f"Json Schema error in {key}:\n{str(err)}") from err
if top_level_recordtype:
if "title" not in elt: if "title" not in elt:
raise JsonSchemaDefinitionError( raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `title` key word") f"Object {ii+1} is lacking the `title` key word")
...@@ -672,18 +706,39 @@ class JsonSchemaParser(Parser): ...@@ -672,18 +706,39 @@ class JsonSchemaParser(Parser):
raise JsonSchemaDefinitionError( raise JsonSchemaDefinitionError(
f"Object {ii+1} is lacking the `type` key word") f"Object {ii+1} is lacking the `type` key word")
# Check if this is a valid Json Schema # Check if this is a valid Json Schema
try:
jsonschema.Draft202012Validator.check_schema(elt)
except jsonschema.SchemaError as err:
raise JsonSchemaDefinitionError(
f"Json Schema error in {elt['title']}:\n{str(err)}") from err
name = self._stringify(elt["title"], context=elt) name = self._stringify(elt["title"], context=elt)
self._treat_element(elt, name) self._treat_element(elt, name)
elif "properties" in elt or "patternProperties" in elt:
# No top-level type but there are entities
if "properties" in elt:
for key, prop in elt["properties"].items():
name = self._get_name_from_property(key, prop)
self._treat_element(prop, name)
if "patternProperties" in elt:
# See also treatment in ``_treat_record_type``. Since here,
# there is no top-level RT we use the prefix `__Pattern`,
# i.e., the resulting Record Types will be called
# `__PatternElement`.
self._treat_pattern_properties(
elt["patternProperties"], name_prefix="__Pattern")
else:
# Neither RecordType itself, nor further properties in schema,
# so nothing to do here. Maybe add something in the future.
continue
return DataModel(self.model.values()) return DataModel(self.model.values())
def _get_name_from_property(self, key: str, prop: dict):
# @review Timm Fitschen 2023-05-25
if "title" in prop:
name = self._stringify(prop["title"])
else:
name = self._stringify(key)
return name
def _get_atomic_datatype(self, elt): def _get_atomic_datatype(self, elt):
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
if elt["type"] == "string": if elt["type"] == "string":
if "format" in elt and elt["format"] in ["date", "date-time"]: if "format" in elt and elt["format"] in ["date", "date-time"]:
return db.DATETIME return db.DATETIME
...@@ -695,11 +750,15 @@ class JsonSchemaParser(Parser): ...@@ -695,11 +750,15 @@ class JsonSchemaParser(Parser):
return db.DOUBLE return db.DOUBLE
elif elt["type"] == "boolean": elif elt["type"] == "boolean":
return db.BOOLEAN return db.BOOLEAN
elif elt["type"] == "null":
# This could be any datatype since a valid json will never have a
# value in a null property. We use TEXT for convenience.
return db.TEXT
else: else:
raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.")
def _treat_element(self, elt: dict, name: str): def _treat_element(self, elt: dict, name: str):
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
force_list = False force_list = False
if name in self.model: if name in self.model:
return self.model[name], force_list return self.model[name], force_list
...@@ -710,12 +769,17 @@ class JsonSchemaParser(Parser): ...@@ -710,12 +769,17 @@ class JsonSchemaParser(Parser):
if name == "name": if name == "name":
# This is identified with the CaosDB name property as long as the # This is identified with the CaosDB name property as long as the
# type is correct. # type is correct.
if not elt["type"] == "string": if not elt["type"] == "string" and "string" not in elt["type"]:
raise JsonSchemaDefinitionError( raise JsonSchemaDefinitionError(
"The 'name' property must be string-typed, otherwise it cannot " "The 'name' property must be string-typed, otherwise it cannot "
"be identified with CaosDB's name property." "be identified with CaosDB's name property."
) )
return None, force_list return None, force_list
# LinkAhead suports null for all types, so in the very special case of
# `"type": ["null", "<other_type>"]`, only consider the other type:
if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]:
elt["type"].remove("null")
elt["type"] = elt["type"][0]
if "enum" in elt: if "enum" in elt:
ent = self._treat_enum(elt, name) ent = self._treat_enum(elt, name)
elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES:
...@@ -733,11 +797,12 @@ class JsonSchemaParser(Parser): ...@@ -733,11 +797,12 @@ class JsonSchemaParser(Parser):
# treat_something function # treat_something function
ent.description = elt["description"] ent.description = elt["description"]
if ent is not None:
self.model[name] = ent self.model[name] = ent
return ent, force_list return ent, force_list
def _treat_record_type(self, elt: dict, name: str): def _treat_record_type(self, elt: dict, name: str):
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
rt = db.RecordType(name=name) rt = db.RecordType(name=name)
if "required" in elt: if "required" in elt:
required = elt["required"] required = elt["required"]
...@@ -745,10 +810,7 @@ class JsonSchemaParser(Parser): ...@@ -745,10 +810,7 @@ class JsonSchemaParser(Parser):
required = [] required = []
if "properties" in elt: if "properties" in elt:
for key, prop in elt["properties"].items(): for key, prop in elt["properties"].items():
if "title" in prop: name = self._get_name_from_property(key, prop)
name = self._stringify(prop["title"])
else:
name = self._stringify(key)
prop_ent, force_list = self._treat_element(prop, name) prop_ent, force_list = self._treat_element(prop, name)
if prop_ent is None: if prop_ent is None:
# Nothing to be appended since the property has to be # Nothing to be appended since the property has to be
...@@ -762,6 +824,17 @@ class JsonSchemaParser(Parser): ...@@ -762,6 +824,17 @@ class JsonSchemaParser(Parser):
rt.add_property(prop_ent, importance=importance, rt.add_property(prop_ent, importance=importance,
datatype=db.LIST(prop_ent)) datatype=db.LIST(prop_ent))
if "patternProperties" in elt:
pattern_property_rts = self._treat_pattern_properties(
elt["patternProperties"], name_prefix=name)
for ppr in pattern_property_rts:
# add reference to pattern property type. These can never be
# obligatory since pattern properties cannot be required in the
# original schema (since their actual names are not known a
# priori).
rt.add_property(ppr)
if "description" in elt: if "description" in elt:
rt.description = elt["description"] rt.description = elt["description"]
return rt return rt
...@@ -783,11 +856,14 @@ class JsonSchemaParser(Parser): ...@@ -783,11 +856,14 @@ class JsonSchemaParser(Parser):
return rt return rt
def _treat_list(self, elt: dict, name: str): def _treat_list(self, elt: dict, name: str):
# @review Timm Fitschen 2022-02-30 # @review Timm Fitschen 2023-05-25
if "items" not in elt: if "items" not in elt and name not in self.types_for_missing_array_items:
if self.ignore_unspecified_array_items:
return None, False
raise JsonSchemaDefinitionError( raise JsonSchemaDefinitionError(
f"The definition of the list items is missing in {elt}.") f"The definition of the list items is missing in {elt}.")
if "items" in elt:
items = elt["items"] items = elt["items"]
if "enum" in items: if "enum" in items:
return self._treat_enum(items, name), True return self._treat_enum(items, name), True
...@@ -805,6 +881,71 @@ class JsonSchemaParser(Parser): ...@@ -805,6 +881,71 @@ class JsonSchemaParser(Parser):
items, self._stringify(items["title"])) items, self._stringify(items["title"]))
self.model[ref_rt.name] = ref_rt self.model[ref_rt.name] = ref_rt
return db.Property(name=name, datatype=db.LIST(ref_rt)), False return db.Property(name=name, datatype=db.LIST(ref_rt)), False
else:
# Use predefined type:
datatype = db.LIST(self.types_for_missing_array_items[name])
return db.Property(name=name, datatype=datatype), False
def _get_pattern_prop(self):
# @review Timm Fitschen 2023-05-25
if "__pattern_property_pattern_property" in self.model:
return self.model["__pattern_property_pattern_property"]
pp = db.Property(name="__matched_pattern", datatype=db.TEXT)
self.model["__pattern_property_pattern_property"] = pp
return pp
def _treat_pattern_properties(self, pattern_elements, name_prefix=""):
"""Special Treatment for pattern properties: A RecordType is created for
each pattern property. In case of a `type: object` PatternProperty, the
remaining properties of the JSON entry are appended to the new
RecordType; in case of an atomic type PatternProperty, a single value
Property is added to the RecordType.
Raises
------
NotImplementedError
In case of patternProperties with non-object, non-atomic type, e.g.,
array.
"""
# @review Timm Fitschen 2023-05-25
num_patterns = len(pattern_elements)
pattern_prop = self._get_pattern_prop()
returns = []
for ii, (key, element) in enumerate(pattern_elements.items()):
if "title" not in element:
name_suffix = f"_{ii+1}" if num_patterns > 1 else ""
name = name_prefix + "Entry" + name_suffix
else:
name = element["title"]
if element["type"] == "object":
# simple, is already an object, so can be treated like any other
# record type.
pattern_type = self._treat_record_type(element, name)
elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES:
# create a property that stores the actual value of the pattern
# property.
propname = f"{name}_value"
prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element))
self.model[propname] = prop
pattern_type = db.RecordType(name=name)
pattern_type.add_property(prop)
else:
raise NotImplementedError(
"Pattern properties are currently only supported for types " +
", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.")
# Add pattern property and description
pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY)
if pattern_type.description:
pattern_type.description += f"\n\npattern: {key}"
else:
pattern_type.description = f"pattern: {key}"
self.model[name] = pattern_type
returns.append(pattern_type)
return returns
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th ...@@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
Concepts <concepts> Concepts <concepts>
The Caosdb Crawler <crawler> The Caosdb Crawler <crawler>
YAML data model specification <yaml_interface> YAML data model specification <yaml_interface>
Specifying a datamodel with JSON schema <json_schema_interface>
_apidoc/modules _apidoc/modules
......
Defining datamodels with a JSON schema specification
====================================================
TODO, see https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/42
Further information
###################
Pattern Properties
%%%%%%%%%%%%%%%%%%
The JSON-schema parser has rudimentary support for ``patternProperties``. Since
their names (only the pattern that their names will suffice) are not known a
priori, we create RecordTypes for all pattern properties. The names of these
RecordTypes are created from their parent element's name by appending the string
``"Entry"`` and possibly a number if there are more than one pattern properties
for one parent.
All the RecordTypes created for pattern properties have at least an obligatory
``__matched_pattern`` property which will -- as the name suggests -- store the
matched pattern of an actual data entry.
.. note::
The ``__matched_pattern`` property is added automatically to your datamodel
as soon as there is at least one pattern property in your JSON schema. So be
sure that you don't happen to have an entity with exactly this name in your
database.
E.g., a json schema with
.. code-block:: json
"dataset": {
"patternProperties": {
"^[0-9]{4,4}": {
"type": "boolean"
},
"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
"type": "object",
"properties": {
"date_id": {
"$ref": "#/definitions/uuid"
}
}
}
}
}
Would result in a ``Dataset`` RecordType that has the two properties
``DatasetEntry_1`` and ``DatasetEntry_2`` (as always, name can be overwritten
explicitly by specifying the ``title`` property), referencing corresponding
``DatasetEntry_1`` and ``DatasetEntry_2`` Records.
Apart from the aforementioned ``__matched_pattern`` property, ``DatasetEntry_1``
also has the ``DatasetEntry_1_value`` property with datatype ``BOOLEAN``, that
stores the actual value. In turn, ``DatasetEntry_2`` is of ``type: object`` and
is treated like any other RecordType. Consequently, it has, appart from the
``__matched_pattern`` property, a ``date_id`` property as specified in its
``properties``.
Array entries without ``items`` specification
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
JSON schema allows for properties of ``type: array`` without the ``items``
specification that consequently can be arrays of any (and of mixed) types. While
this is in general problematic when specifying a data model, sometimes these
properties cannot be specified further, e.g., when you're using an external
schema that you cannot change.
These properties can still be added to your datamodel by specifying their types
explicitly in a dictionary or, alternatively, they can be ignored. See the
``types_for_missing_array_items`` and ``ignore_unspecified_array_items``
parameters of ``models.parser.JsonSchemaParser``, respectively, for more
information.
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
"date": { "type": "string", "format": "date" }, "date": { "type": "string", "format": "date" },
"integer": { "type": "integer", "description": "Some integer property" }, "integer": { "type": "integer", "description": "Some integer property" },
"boolean": { "type": "boolean" }, "boolean": { "type": "boolean" },
"number_prop": { "type": "number", "description": "Some float property" } "number_prop": { "type": "number", "description": "Some float property" },
"null_prop": { "type": "null", "description": "This property will never have a value." }
} }
} }
] ]
{
"title": "something_with_missing_array_items",
"type": "object",
"properties": {
"missing": {
"type": "array"
}
}
}
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://my-schema-id.net",
"type": "object",
"definitions": {
"uuid": {
"type": [
"string",
"null"
],
"pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
},
"datetime": {
"type": "string",
"format": "date-time"
}
},
"properties": {
"Dataset1": {
"title": "Dataset1",
"description": "Some description",
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "full dataset title"
},
"campaign": {
"type": "string",
"description": "FIXME"
},
"number_prop": {
"type": "number",
"description": "Some float property"
},
"user_id": {
"$ref": "#/definitions/uuid"
}
},
"required": ["title", "number_prop"]
}
},
"patternProperties": {
"^[0-9]{4,4}": {
"type": "boolean"
},
"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
"type": "object",
"properties": {
"date_id": {
"$ref": "#/definitions/uuid"
}
}
}
}
}
[
{
"title": "Dataset",
"type": "object",
"patternProperties": {
"^[0-9]{4,4}": {
"type": "boolean"
},
"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": {
"type": "object",
"properties": {
"date_id": {
"type": [
"string",
"null"
],
"pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
}
}
}
}
},
{
"title": "Dataset2",
"type": "object",
"properties": {
"datetime": {
"type": "string",
"format": "date-time"
}
},
"patternProperties": {
".*": {
"title": "Literally anything",
"type": "object"
}
}
}
]
...@@ -103,7 +103,7 @@ def test_datamodel_with_atomic_properties(): ...@@ -103,7 +103,7 @@ def test_datamodel_with_atomic_properties():
assert isinstance(rt2, db.RecordType) assert isinstance(rt2, db.RecordType)
assert rt2.name == "Dataset2" assert rt2.name == "Dataset2"
assert not rt2.description assert not rt2.description
assert len(rt2.get_properties()) == 5 assert len(rt2.get_properties()) == 6
date_prop = rt2.get_property("date") date_prop = rt2.get_property("date")
assert date_prop.datatype == db.DATETIME assert date_prop.datatype == db.DATETIME
...@@ -121,6 +121,9 @@ def test_datamodel_with_atomic_properties(): ...@@ -121,6 +121,9 @@ def test_datamodel_with_atomic_properties():
float_prop2 = rt2.get_property("number_prop") float_prop2 = rt2.get_property("number_prop")
assert float_prop.datatype == float_prop2.datatype assert float_prop.datatype == float_prop2.datatype
null_prop = rt2.get_property("null_prop")
assert null_prop.datatype == db.TEXT
def test_required_no_list(): def test_required_no_list():
"""Exception must be raised when "required" is not a list.""" """Exception must be raised when "required" is not a list."""
...@@ -356,3 +359,130 @@ def test_name_property(): ...@@ -356,3 +359,130 @@ def test_name_property():
assert str(err.value).startswith( assert str(err.value).startswith(
"The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's " "The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's "
"name property.") "name property.")
def test_no_toplevel_entity():
model = parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_no_toplevel_entity.schema.json"), top_level_recordtype=False)
assert "Dataset1" in model
rt1 = model["Dataset1"]
assert rt1.name == "Dataset1"
assert rt1.description == "Some description"
assert len(rt1.get_properties()) == 4
assert rt1.get_property("title") is not None
assert rt1.get_property("campaign") is not None
assert rt1.get_property("number_prop") is not None
assert rt1.get_property("user_id") is not None
title_prop = rt1.get_property("title")
assert title_prop.datatype == db.TEXT
assert rt1.get_importance(title_prop.name) == db.OBLIGATORY
campaign_prop = rt1.get_property("campaign")
assert campaign_prop.datatype == db.TEXT
assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED
float_prop = rt1.get_property("number_prop")
assert float_prop.datatype == db.DOUBLE
assert rt1.get_importance(float_prop.name) == db.OBLIGATORY
uid_prop = rt1.get_property("user_id")
assert uid_prop.datatype == db.TEXT
assert rt1.get_importance(uid_prop.name) == db.RECOMMENDED
# pattern properties without top-level entity:
assert "__PatternEntry_1" in model
assert "__PatternEntry_2" in model
pattern_boolean_rt = model["__PatternEntry_1"]
assert "pattern: " in pattern_boolean_rt.description
assert len(pattern_boolean_rt.properties) == 2
pp = pattern_boolean_rt.get_property("__matched_pattern")
assert pp.datatype == db.TEXT
assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY
value_prop = pattern_boolean_rt.get_property("__PatternEntry_1_value")
assert value_prop.datatype == db.BOOLEAN
pattern_object_rt = model["__PatternEntry_2"]
assert "pattern: " in pattern_object_rt.description
assert len(pattern_object_rt.properties) == 2
pp = pattern_object_rt.get_property("__matched_pattern")
assert pp.datatype == db.TEXT
assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY
date_id_prop = pattern_object_rt.get_property("date_id")
assert date_id_prop.datatype == db.TEXT
def test_missing_array_items():
# strict behavior
with pytest.raises(JsonSchemaDefinitionError) as err:
parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_missing_array_items.schema.json"))
assert "{'type': 'array'}" in str(err)
# ignore all problems, so a RT is created that does not have the property
model = parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_missing_array_items.schema.json"), ignore_unspecified_array_items=True)
assert "something_with_missing_array_items" in model
rt = model["something_with_missing_array_items"]
assert isinstance(rt, db.RecordType)
assert rt.get_property("missing") is None
# specify the type:
type_dict = {"missing": db.FILE}
model = parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_missing_array_items.schema.json"), types_for_missing_array_items=type_dict)
assert "something_with_missing_array_items" in model
rt = model["something_with_missing_array_items"]
assert rt.get_property("missing") is not None
assert rt.get_property("missing").datatype == db.LIST(db.FILE)
def test_pattern_properties():
model = parse_model_from_json_schema(os.path.join(
FILEPATH, "datamodel_pattern_properties.schema.json"))
assert "Dataset" in model
rt1 = model["Dataset"]
assert len(rt1.properties) == 2
for name in ["DatasetEntry_1", "DatasetEntry_2"]:
assert rt1.get_property(name) is not None
assert rt1.get_property(name).is_reference()
pattern_boolean_rt = model["DatasetEntry_1"]
assert "pattern: " in pattern_boolean_rt.description
assert len(pattern_boolean_rt.properties) == 2
pp = pattern_boolean_rt.get_property("__matched_pattern")
assert pp.datatype == db.TEXT
assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY
value_prop = pattern_boolean_rt.get_property("DatasetEntry_1_value")
assert value_prop.datatype == db.BOOLEAN
pattern_object_rt = model["DatasetEntry_2"]
assert "pattern: " in pattern_object_rt.description
assert len(pattern_object_rt.properties) == 2
pp = pattern_object_rt.get_property("__matched_pattern")
assert pp.datatype == db.TEXT
assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY
date_id_prop = pattern_object_rt.get_property("date_id")
assert date_id_prop.datatype == db.TEXT
assert "Dataset2" in model
rt2 = model["Dataset2"]
assert len(rt2.properties) == 2
# This has been tested elsewhere, just make sure that it is properly created
# in the presence of pattern properties, too.
assert rt2.get_property("datetime") is not None
assert rt2.get_property("Literally anything") is not None
assert rt2.get_property("Literally anything").is_reference()
pattern_named_rt = model["Literally anything"]
assert len(pattern_named_rt.properties) == 1
assert pattern_named_rt.get_property("__matched_pattern") is not None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment