diff --git a/.gitignore b/.gitignore index 0848d3fbca6412aac998fdbaa45ea03835fe67d0..4c175607e5327472c301949f187c58d925f0d05e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ build/ # documentation _apidoc /dist/ +*~ diff --git a/CHANGELOG.md b/CHANGELOG.md index 2317fa8c76086e9c5a9079daa5cbf31d74f4d088..fe7f449ab1d2017e1ae802c0774e77a7d8bd9de7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,16 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## ### Added ### + - TableImporter now accepts a `existing_columns` argument which demands that certain columns exist +- The `JsonSchemaParser` class supports `patternProperties` +- The `JsonSchemaParser` calss supports json-schema references (`$ref`) ### Changed ### + - The converters and datatype arguments of TableImporter now may have keys for nonexisting columns +- The `JsonSchemaParser` class does not require the top-level entry of a json + schema definition to specify a RecordType. ### Deprecated ### ### Removed ### ### Fixed ### + - refactored to work with the new default key word in FIND queries: RECORD ### Security ### diff --git a/setup.py b/setup.py index 6201149eb90df793e8c28e5684f79cf617548d0b..4f4d8943fa16b842dfe3d25f3d83bc76b64e42b8 100755 --- a/setup.py +++ b/setup.py @@ -156,6 +156,7 @@ def setup_package(): author_email='h.tomwoerden@indiscale.com', python_requires='>=3.7', install_requires=["caosdb>=0.11.0", + "jsonref", "jsonschema>=4.4.0", "numpy>=1.17.3", "openpyxl>=3.0.7", @@ -163,7 +164,7 @@ def setup_package(): "xlrd>=2.0", ], extras_require={"h5-crawler": ["h5py>=3.3.0", ], - "gitignore-parser ": ["gitignore-parser >=0.1.0", ], + "gitignore-parser": ["gitignore-parser >=0.1.0", ], }, packages=find_packages('src'), package_dir={'': 'src'}, diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index bb06823a7398dd4846712077afa0b35d0713185d..f76761839172994dd14bcc65a3d8e1b290d78a9f 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -35,8 +35,9 @@ not defined, simply the name can be supplied with no value. Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs to be a list with the names. Here, NO NEW entities can be defined. """ -import json import argparse +import json +import jsonref import re import sys import yaml @@ -76,7 +77,8 @@ JSON_SCHEMA_ATOMIC_TYPES = [ "string", "boolean", "integer", - "number" + "number", + "null" ] @@ -152,13 +154,29 @@ def parse_model_from_string(string): return parser.parse_model_from_string(string) -def parse_model_from_json_schema(filename: str): +def parse_model_from_json_schema( + filename: str, + top_level_recordtype: bool = True, + types_for_missing_array_items: dict = {}, + ignore_unspecified_array_items: bool = False +): """Return a datamodel parsed from a json schema definition. Parameters ---------- filename : str The path of the json schema file that is to be parsed + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. + types_for_missing_array_items : dict, optional + dictionary containing fall-back types for json entries with `type: + array` but without `items` specification. Default is an empty dict. + ignore_unspecified_array_items : bool, optional + Whether to ignore `type: array` entries the type of which is not + specified by their `items` property or given in + `types_for_missing_array_items`. An error is raised if they are not + ignored. Default is False. Returns ------- @@ -174,10 +192,10 @@ def parse_model_from_json_schema(filename: str): """ # @author Florian Spreckelsen # @date 2022-02-17 - # @review Daniel Hornung 2022-02-18 - parser = JsonSchemaParser() + # @review Timm Fitschen 2023-05-25 + parser = JsonSchemaParser(types_for_missing_array_items, ignore_unspecified_array_items) - return parser.parse_model_from_json_schema(filename) + return parser.parse_model_from_json_schema(filename, top_level_recordtype) class Parser(object): @@ -600,14 +618,13 @@ class Parser(object): class JsonSchemaParser(Parser): """Extends the yaml parser to read in datamodels defined in a json schema. - **EXPERIMENTAL:** While this calss can already be used to create data models + **EXPERIMENTAL:** While this class can already be used to create data models from basic json schemas, there are the following limitations and missing features: * Due to limitations of json-schema itself, we currently do not support inheritance in the imported data models * The same goes for suggested properties of RecordTypes - * Currently, ``$defs`` and ``$ref`` in the input schema are not resolved. * Already defined RecordTypes and (scalar) Properties can't be re-used as list properties * Reference properties that are different from the referenced RT. (Although @@ -615,15 +632,18 @@ class JsonSchemaParser(Parser): * Values * Roles * The extern keyword from the yaml parser - * Currently, a json-schema cannot be transformed into a data model if its - root element isn't a RecordType (or Property) with ``title`` and ``type``. """ # @author Florian Spreckelsen # @date 2022-02-17 - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 + + def __init__(self, types_for_missing_array_items={}, ignore_unspecified_array_items=False): + super().__init__() + self.types_for_missing_array_items = types_for_missing_array_items + self.ignore_unspecified_array_items = ignore_unspecified_array_items - def parse_model_from_json_schema(self, filename: str): + def parse_model_from_json_schema(self, filename: str, top_level_recordtype: bool = True): """Return a datamodel created from the definition in the json schema in `filename`. @@ -631,6 +651,9 @@ class JsonSchemaParser(Parser): ---------- filename : str The path to the json-schema file containing the datamodel definition + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. Returns ------- @@ -639,13 +662,13 @@ class JsonSchemaParser(Parser): """ # @author Florian Spreckelsen # @date 2022-02-17 - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 with open(filename, 'r') as schema_file: - model_dict = json.load(schema_file) + model_dict = jsonref.load(schema_file) - return self._create_model_from_dict(model_dict) + return self._create_model_from_dict(model_dict, top_level_recordtype=top_level_recordtype) - def _create_model_from_dict(self, model_dict: [dict, List[dict]]): + def _create_model_from_dict(self, model_dict: [dict, List[dict]], top_level_recordtype: bool = True): """Parse a dictionary and return the Datamodel created from it. The dictionary was typically created from the model definition in a json schema file. @@ -654,36 +677,68 @@ class JsonSchemaParser(Parser): ---------- model_dict : dict or list[dict] One or several dictionaries read in from a json-schema file + top_level_recordtype : bool, optional + Whether there is a record type defined at the top level of the + schema. Default is true. Returns ------- our : DataModel The datamodel defined in `model_dict` """ - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 if isinstance(model_dict, dict): model_dict = [model_dict] for ii, elt in enumerate(model_dict): - if "title" not in elt: - raise JsonSchemaDefinitionError( - f"Object {ii+1} is lacking the `title` key word") - if "type" not in elt: - raise JsonSchemaDefinitionError( - f"Object {ii+1} is lacking the `type` key word") - # Check if this is a valid Json Schema try: jsonschema.Draft202012Validator.check_schema(elt) except jsonschema.SchemaError as err: + key = elt["title"] if "title" in elt else f"element {ii}" raise JsonSchemaDefinitionError( - f"Json Schema error in {elt['title']}:\n{str(err)}") from err - name = self._stringify(elt["title"], context=elt) - self._treat_element(elt, name) + f"Json Schema error in {key}:\n{str(err)}") from err + + if top_level_recordtype: + if "title" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `title` key word") + if "type" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `type` key word") + # Check if this is a valid Json Schema + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + elif "properties" in elt or "patternProperties" in elt: + # No top-level type but there are entities + if "properties" in elt: + for key, prop in elt["properties"].items(): + name = self._get_name_from_property(key, prop) + self._treat_element(prop, name) + if "patternProperties" in elt: + # See also treatment in ``_treat_record_type``. Since here, + # there is no top-level RT we use the prefix `__Pattern`, + # i.e., the resulting Record Types will be called + # `__PatternElement`. + self._treat_pattern_properties( + elt["patternProperties"], name_prefix="__Pattern") + else: + # Neither RecordType itself, nor further properties in schema, + # so nothing to do here. Maybe add something in the future. + continue return DataModel(self.model.values()) + def _get_name_from_property(self, key: str, prop: dict): + # @review Timm Fitschen 2023-05-25 + if "title" in prop: + name = self._stringify(prop["title"]) + else: + name = self._stringify(key) + + return name + def _get_atomic_datatype(self, elt): - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 if elt["type"] == "string": if "format" in elt and elt["format"] in ["date", "date-time"]: return db.DATETIME @@ -695,11 +750,15 @@ class JsonSchemaParser(Parser): return db.DOUBLE elif elt["type"] == "boolean": return db.BOOLEAN + elif elt["type"] == "null": + # This could be any datatype since a valid json will never have a + # value in a null property. We use TEXT for convenience. + return db.TEXT else: raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") def _treat_element(self, elt: dict, name: str): - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 force_list = False if name in self.model: return self.model[name], force_list @@ -710,12 +769,17 @@ class JsonSchemaParser(Parser): if name == "name": # This is identified with the CaosDB name property as long as the # type is correct. - if not elt["type"] == "string": + if not elt["type"] == "string" and "string" not in elt["type"]: raise JsonSchemaDefinitionError( "The 'name' property must be string-typed, otherwise it cannot " "be identified with CaosDB's name property." ) return None, force_list + # LinkAhead suports null for all types, so in the very special case of + # `"type": ["null", "<other_type>"]`, only consider the other type: + if isinstance(elt["type"], list) and len(elt["type"]) == 2 and "null" in elt["type"]: + elt["type"].remove("null") + elt["type"] = elt["type"][0] if "enum" in elt: ent = self._treat_enum(elt, name) elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: @@ -733,11 +797,12 @@ class JsonSchemaParser(Parser): # treat_something function ent.description = elt["description"] - self.model[name] = ent + if ent is not None: + self.model[name] = ent return ent, force_list def _treat_record_type(self, elt: dict, name: str): - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 rt = db.RecordType(name=name) if "required" in elt: required = elt["required"] @@ -745,10 +810,7 @@ class JsonSchemaParser(Parser): required = [] if "properties" in elt: for key, prop in elt["properties"].items(): - if "title" in prop: - name = self._stringify(prop["title"]) - else: - name = self._stringify(key) + name = self._get_name_from_property(key, prop) prop_ent, force_list = self._treat_element(prop, name) if prop_ent is None: # Nothing to be appended since the property has to be @@ -762,6 +824,17 @@ class JsonSchemaParser(Parser): rt.add_property(prop_ent, importance=importance, datatype=db.LIST(prop_ent)) + if "patternProperties" in elt: + + pattern_property_rts = self._treat_pattern_properties( + elt["patternProperties"], name_prefix=name) + for ppr in pattern_property_rts: + # add reference to pattern property type. These can never be + # obligatory since pattern properties cannot be required in the + # original schema (since their actual names are not known a + # priori). + rt.add_property(ppr) + if "description" in elt: rt.description = elt["description"] return rt @@ -783,28 +856,96 @@ class JsonSchemaParser(Parser): return rt def _treat_list(self, elt: dict, name: str): - # @review Timm Fitschen 2022-02-30 + # @review Timm Fitschen 2023-05-25 - if "items" not in elt: + if "items" not in elt and name not in self.types_for_missing_array_items: + if self.ignore_unspecified_array_items: + return None, False raise JsonSchemaDefinitionError( f"The definition of the list items is missing in {elt}.") - items = elt["items"] - if "enum" in items: - return self._treat_enum(items, name), True - if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: - datatype = db.LIST(self._get_atomic_datatype(items)) + if "items" in elt: + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if "title" not in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + else: + # Use predefined type: + datatype = db.LIST(self.types_for_missing_array_items[name]) return db.Property(name=name, datatype=datatype), False - if items["type"] == "object": - if "title" not in items or self._stringify(items["title"]) == name: - # Property is RecordType - return self._treat_record_type(items, name), True + + def _get_pattern_prop(self): + # @review Timm Fitschen 2023-05-25 + if "__pattern_property_pattern_property" in self.model: + return self.model["__pattern_property_pattern_property"] + pp = db.Property(name="__matched_pattern", datatype=db.TEXT) + self.model["__pattern_property_pattern_property"] = pp + return pp + + def _treat_pattern_properties(self, pattern_elements, name_prefix=""): + """Special Treatment for pattern properties: A RecordType is created for + each pattern property. In case of a `type: object` PatternProperty, the + remaining properties of the JSON entry are appended to the new + RecordType; in case of an atomic type PatternProperty, a single value + Property is added to the RecordType. + + Raises + ------ + NotImplementedError + In case of patternProperties with non-object, non-atomic type, e.g., + array. + + """ + # @review Timm Fitschen 2023-05-25 + num_patterns = len(pattern_elements) + pattern_prop = self._get_pattern_prop() + returns = [] + for ii, (key, element) in enumerate(pattern_elements.items()): + if "title" not in element: + name_suffix = f"_{ii+1}" if num_patterns > 1 else "" + name = name_prefix + "Entry" + name_suffix + else: + name = element["title"] + if element["type"] == "object": + # simple, is already an object, so can be treated like any other + # record type. + pattern_type = self._treat_record_type(element, name) + elif element["type"] in JSON_SCHEMA_ATOMIC_TYPES: + # create a property that stores the actual value of the pattern + # property. + propname = f"{name}_value" + prop = db.Property(name=propname, datatype=self._get_atomic_datatype(element)) + self.model[propname] = prop + pattern_type = db.RecordType(name=name) + pattern_type.add_property(prop) + else: + raise NotImplementedError( + "Pattern properties are currently only supported for types " + + ", ".join(JSON_SCHEMA_ATOMIC_TYPES) + ", and object.") + + # Add pattern property and description + pattern_type.add_property(pattern_prop, importance=db.OBLIGATORY) + if pattern_type.description: + pattern_type.description += f"\n\npattern: {key}" else: - # List property will be an entity of its own with a name - # different from the referenced RT - ref_rt = self._treat_record_type( - items, self._stringify(items["title"])) - self.model[ref_rt.name] = ref_rt - return db.Property(name=name, datatype=db.LIST(ref_rt)), False + pattern_type.description = f"pattern: {key}" + + self.model[name] = pattern_type + returns.append(pattern_type) + + return returns if __name__ == "__main__": diff --git a/src/doc/index.rst b/src/doc/index.rst index 5fdb78da4eddfd0145d0357202246d4b5352dcf4..6c2c5f9894ad5c0f5dc3f124de726d264f46d452 100644 --- a/src/doc/index.rst +++ b/src/doc/index.rst @@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th Concepts <concepts> The Caosdb Crawler <crawler> YAML data model specification <yaml_interface> + Specifying a datamodel with JSON schema <json_schema_interface> _apidoc/modules diff --git a/src/doc/json_schema_interface.rst b/src/doc/json_schema_interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..0e8aebd3a4204f29608212f7ed0c115fd1d4a134 --- /dev/null +++ b/src/doc/json_schema_interface.rst @@ -0,0 +1,75 @@ +Defining datamodels with a JSON schema specification +==================================================== + +TODO, see https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/42 + +Further information +################### + +Pattern Properties +%%%%%%%%%%%%%%%%%% + +The JSON-schema parser has rudimentary support for ``patternProperties``. Since +their names (only the pattern that their names will suffice) are not known a +priori, we create RecordTypes for all pattern properties. The names of these +RecordTypes are created from their parent element's name by appending the string +``"Entry"`` and possibly a number if there are more than one pattern properties +for one parent. + +All the RecordTypes created for pattern properties have at least an obligatory +``__matched_pattern`` property which will -- as the name suggests -- store the +matched pattern of an actual data entry. + +.. note:: + + The ``__matched_pattern`` property is added automatically to your datamodel + as soon as there is at least one pattern property in your JSON schema. So be + sure that you don't happen to have an entity with exactly this name in your + database. + +E.g., a json schema with + +.. code-block:: json + + "dataset": { + "patternProperties": { + "^[0-9]{4,4}": { + "type": "boolean" + }, + "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": { + "type": "object", + "properties": { + "date_id": { + "$ref": "#/definitions/uuid" + } + } + } + } + } + +Would result in a ``Dataset`` RecordType that has the two properties +``DatasetEntry_1`` and ``DatasetEntry_2`` (as always, name can be overwritten +explicitly by specifying the ``title`` property), referencing corresponding +``DatasetEntry_1`` and ``DatasetEntry_2`` Records. + +Apart from the aforementioned ``__matched_pattern`` property, ``DatasetEntry_1`` +also has the ``DatasetEntry_1_value`` property with datatype ``BOOLEAN``, that +stores the actual value. In turn, ``DatasetEntry_2`` is of ``type: object`` and +is treated like any other RecordType. Consequently, it has, appart from the +``__matched_pattern`` property, a ``date_id`` property as specified in its +``properties``. + +Array entries without ``items`` specification +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +JSON schema allows for properties of ``type: array`` without the ``items`` +specification that consequently can be arrays of any (and of mixed) types. While +this is in general problematic when specifying a data model, sometimes these +properties cannot be specified further, e.g., when you're using an external +schema that you cannot change. + +These properties can still be added to your datamodel by specifying their types +explicitly in a dictionary or, alternatively, they can be ignored. See the +``types_for_missing_array_items`` and ``ignore_unspecified_array_items`` +parameters of ``models.parser.JsonSchemaParser``, respectively, for more +information. diff --git a/unittests/json-schema-models/datamodel_atomic_properties.schema.json b/unittests/json-schema-models/datamodel_atomic_properties.schema.json index 3828f131180a839d5c9b8bc5aa1a1285717da723..7b4a23e5bb48b995d07a261bcae0a8a486b7969a 100644 --- a/unittests/json-schema-models/datamodel_atomic_properties.schema.json +++ b/unittests/json-schema-models/datamodel_atomic_properties.schema.json @@ -18,7 +18,8 @@ "date": { "type": "string", "format": "date" }, "integer": { "type": "integer", "description": "Some integer property" }, "boolean": { "type": "boolean" }, - "number_prop": { "type": "number", "description": "Some float property" } + "number_prop": { "type": "number", "description": "Some float property" }, + "null_prop": { "type": "null", "description": "This property will never have a value." } } } ] diff --git a/unittests/json-schema-models/datamodel_missing_array_items.schema.json b/unittests/json-schema-models/datamodel_missing_array_items.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..8ac17ac3162def3dbf070d7027fd318366bb4682 --- /dev/null +++ b/unittests/json-schema-models/datamodel_missing_array_items.schema.json @@ -0,0 +1,9 @@ +{ + "title": "something_with_missing_array_items", + "type": "object", + "properties": { + "missing": { + "type": "array" + } + } +} diff --git a/unittests/json-schema-models/datamodel_no_toplevel_entity.schema.json b/unittests/json-schema-models/datamodel_no_toplevel_entity.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..35240d765479b719576e6ee67e387790d3d6d160 --- /dev/null +++ b/unittests/json-schema-models/datamodel_no_toplevel_entity.schema.json @@ -0,0 +1,56 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://my-schema-id.net", + "type": "object", + "definitions": { + "uuid": { + "type": [ + "string", + "null" + ], + "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + }, + "datetime": { + "type": "string", + "format": "date-time" + } + }, + "properties": { + "Dataset1": { + "title": "Dataset1", + "description": "Some description", + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "full dataset title" + }, + "campaign": { + "type": "string", + "description": "FIXME" + }, + "number_prop": { + "type": "number", + "description": "Some float property" + }, + "user_id": { + "$ref": "#/definitions/uuid" + } + }, + "required": ["title", "number_prop"] + } + }, + "patternProperties": { + "^[0-9]{4,4}": { + "type": "boolean" + }, + "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": { + "type": "object", + "properties": { + "date_id": { + "$ref": "#/definitions/uuid" + } + } + } + } +} diff --git a/unittests/json-schema-models/datamodel_pattern_properties.schema.json b/unittests/json-schema-models/datamodel_pattern_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..9b85c7b80cf0990713f8f130050c21751e311b42 --- /dev/null +++ b/unittests/json-schema-models/datamodel_pattern_properties.schema.json @@ -0,0 +1,39 @@ +[ + { + "title": "Dataset", + "type": "object", + "patternProperties": { + "^[0-9]{4,4}": { + "type": "boolean" + }, + "^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}": { + "type": "object", + "properties": { + "date_id": { + "type": [ + "string", + "null" + ], + "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + } + } + } + } + }, + { + "title": "Dataset2", + "type": "object", + "properties": { + "datetime": { + "type": "string", + "format": "date-time" + } + }, + "patternProperties": { + ".*": { + "title": "Literally anything", + "type": "object" + } + } + } +] diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py index a136f9ba2ae9965978c7f1234acb16289a3ca305..a991076e6a1e1a3e92cafc7f1bb88b42b4b2ab3d 100644 --- a/unittests/test_json_schema_model_parser.py +++ b/unittests/test_json_schema_model_parser.py @@ -103,7 +103,7 @@ def test_datamodel_with_atomic_properties(): assert isinstance(rt2, db.RecordType) assert rt2.name == "Dataset2" assert not rt2.description - assert len(rt2.get_properties()) == 5 + assert len(rt2.get_properties()) == 6 date_prop = rt2.get_property("date") assert date_prop.datatype == db.DATETIME @@ -121,6 +121,9 @@ def test_datamodel_with_atomic_properties(): float_prop2 = rt2.get_property("number_prop") assert float_prop.datatype == float_prop2.datatype + null_prop = rt2.get_property("null_prop") + assert null_prop.datatype == db.TEXT + def test_required_no_list(): """Exception must be raised when "required" is not a list.""" @@ -356,3 +359,130 @@ def test_name_property(): assert str(err.value).startswith( "The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's " "name property.") + + +def test_no_toplevel_entity(): + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_no_toplevel_entity.schema.json"), top_level_recordtype=False) + + assert "Dataset1" in model + rt1 = model["Dataset1"] + + assert rt1.name == "Dataset1" + assert rt1.description == "Some description" + assert len(rt1.get_properties()) == 4 + + assert rt1.get_property("title") is not None + assert rt1.get_property("campaign") is not None + assert rt1.get_property("number_prop") is not None + assert rt1.get_property("user_id") is not None + + title_prop = rt1.get_property("title") + assert title_prop.datatype == db.TEXT + assert rt1.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = rt1.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED + + float_prop = rt1.get_property("number_prop") + assert float_prop.datatype == db.DOUBLE + assert rt1.get_importance(float_prop.name) == db.OBLIGATORY + + uid_prop = rt1.get_property("user_id") + assert uid_prop.datatype == db.TEXT + assert rt1.get_importance(uid_prop.name) == db.RECOMMENDED + + # pattern properties without top-level entity: + assert "__PatternEntry_1" in model + assert "__PatternEntry_2" in model + + pattern_boolean_rt = model["__PatternEntry_1"] + assert "pattern: " in pattern_boolean_rt.description + assert len(pattern_boolean_rt.properties) == 2 + pp = pattern_boolean_rt.get_property("__matched_pattern") + assert pp.datatype == db.TEXT + assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY + value_prop = pattern_boolean_rt.get_property("__PatternEntry_1_value") + assert value_prop.datatype == db.BOOLEAN + + pattern_object_rt = model["__PatternEntry_2"] + assert "pattern: " in pattern_object_rt.description + assert len(pattern_object_rt.properties) == 2 + pp = pattern_object_rt.get_property("__matched_pattern") + assert pp.datatype == db.TEXT + assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY + date_id_prop = pattern_object_rt.get_property("date_id") + assert date_id_prop.datatype == db.TEXT + + +def test_missing_array_items(): + + # strict behavior + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json")) + + assert "{'type': 'array'}" in str(err) + + # ignore all problems, so a RT is created that does not have the property + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json"), ignore_unspecified_array_items=True) + assert "something_with_missing_array_items" in model + rt = model["something_with_missing_array_items"] + assert isinstance(rt, db.RecordType) + assert rt.get_property("missing") is None + + # specify the type: + type_dict = {"missing": db.FILE} + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_missing_array_items.schema.json"), types_for_missing_array_items=type_dict) + assert "something_with_missing_array_items" in model + rt = model["something_with_missing_array_items"] + assert rt.get_property("missing") is not None + assert rt.get_property("missing").datatype == db.LIST(db.FILE) + + +def test_pattern_properties(): + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_pattern_properties.schema.json")) + + assert "Dataset" in model + rt1 = model["Dataset"] + assert len(rt1.properties) == 2 + for name in ["DatasetEntry_1", "DatasetEntry_2"]: + assert rt1.get_property(name) is not None + assert rt1.get_property(name).is_reference() + + pattern_boolean_rt = model["DatasetEntry_1"] + assert "pattern: " in pattern_boolean_rt.description + assert len(pattern_boolean_rt.properties) == 2 + pp = pattern_boolean_rt.get_property("__matched_pattern") + assert pp.datatype == db.TEXT + assert pattern_boolean_rt.get_importance(pp.name) == db.OBLIGATORY + value_prop = pattern_boolean_rt.get_property("DatasetEntry_1_value") + assert value_prop.datatype == db.BOOLEAN + + pattern_object_rt = model["DatasetEntry_2"] + assert "pattern: " in pattern_object_rt.description + assert len(pattern_object_rt.properties) == 2 + pp = pattern_object_rt.get_property("__matched_pattern") + assert pp.datatype == db.TEXT + assert pattern_object_rt.get_importance(pp.name) == db.OBLIGATORY + date_id_prop = pattern_object_rt.get_property("date_id") + assert date_id_prop.datatype == db.TEXT + + assert "Dataset2" in model + rt2 = model["Dataset2"] + assert len(rt2.properties) == 2 + # This has been tested elsewhere, just make sure that it is properly created + # in the presence of pattern properties, too. + assert rt2.get_property("datetime") is not None + + assert rt2.get_property("Literally anything") is not None + assert rt2.get_property("Literally anything").is_reference() + + pattern_named_rt = model["Literally anything"] + assert len(pattern_named_rt.properties) == 1 + assert pattern_named_rt.get_property("__matched_pattern") is not None