diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000000000000000000000000000000000..1dc4f3d8c8d375ba3f7b352aa3e18702ec731d83 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[html] +show_contexts = True diff --git a/CHANGELOG.md b/CHANGELOG.md index 108c0cd02724a9f38d8ebe5b54ec07061879a094..3662e200959ba5604ee4fdf25962d4f39eaa6114 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Parsing from YAML now allows to give an existing model to which the YAML data model shall be added. * The `json_schema_exporter` module which introduces tools to create a json - schema from a RecordType, e.g., for the useage in web forms. + schema from a RecordType, e.g., for the usage in web forms. ### Changed ### @@ -33,6 +33,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Documentation ### +* Test coverage reports are now generated in `.tox/cov_html/` by tox. + ## [0.8.0] - 2023-05-30 ## (Florian Spreckelsen) diff --git a/README.md b/README.md index ebda4f641bc3b5e6a74cf72f4bc25a4237a73131..662bf6a6309aaa307505b0b8027b30664756bf10 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This is the **CaosDB Advanced User Tools** repository and a part of the CaosDB project. + This project contains tools that are beyond the typical use of the CaosDB python client. Especially, this includes the crawler which will typically be used by a data curator. diff --git a/README_SETUP.md b/README_SETUP.md index 894777aeccb64ceb0fe840ba5f16f0cc540b2996..bf4f25d92106c19cccc276389b6c97aa22904923 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -32,7 +32,10 @@ Optional h5-crawler: - `pip install .[h5-crawler] --user` ## Run Unit Tests -`tox` + +- All tests: `tox` +- One specific test with tox: `tox -- unittests/test_myusecase.py -k expression` +- Or even using only pytest: `pytest unittests/test_myusecase.py -k expression` ## Run Integration Tests Locally diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 07293254de9c09cc8ace8cd6b2f3f426464ccd62..a31afcfd2f74770b656eef41002b2f444b7962de 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -14,7 +14,7 @@ then fi fi OUT=/tmp/crawler.output -ls +ls cat pycaosdb.ini python3 -c "import linkahead; print('LinkAhead Version:', linkahead.__version__)" rm -rf /tmp/caosdb_identifiable_cache.db @@ -57,7 +57,7 @@ echo "./crawl.py -a $RUN_ID /" ./crawl.py -a $RUN_ID / | tee "$OUT" set +e if grep "There where unauthorized changes" "$OUT" -then +then echo "There still were unauthorized changes, which should not have happend!" echo "Test FAILED" exit 1 @@ -96,5 +96,8 @@ python3 -m pytest test_json_schema_datamodel_parser.py echo "Testing yaml datamodel parser" python3 -m pytest test_yaml_parser.py +echo "Testing json-schema exporter" +python3 -m pytest test_json_schema_exporter.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_json_schema_exporter.py b/integrationtests/test_json_schema_exporter.py new file mode 100644 index 0000000000000000000000000000000000000000..b80a1aa5e781fd5de5479e3166a4892df1dc2764 --- /dev/null +++ b/integrationtests/test_json_schema_exporter.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import linkahead as db + +from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs + + +def _delete_everything(): + ents = db.execute_query("FIND ENTITY WITH ID > 99") + if ents: + ents.delete() + + +def setup_module(): + _delete_everything() + + +def teardown_module(): + _delete_everything() + + +def test_uniqueness_of_reference_types(): + parent_type = db.RecordType(name="ParentType").insert() + int_prop = db.Property(name="IntegerProp", datatype=db.INTEGER).insert() + sub_type = db.RecordType(name="SubType").add_parent(parent_type).add_property( + int_prop, importance=db.RECOMMENDED).insert() + referencing_type = db.RecordType(name="ReferencingType") + referencing_type.add_property(int_prop, importance=db.OBLIGATORY) + referencing_type.add_property(parent_type) + referencing_type.insert() + recA = db.Record(name="RecAParent").add_parent(parent_type).insert() + recB = db.Record(name="RecBSub").add_parent(sub_type).insert() + + rt = db.execute_query(f"FIND RECORDTYPE WITH name='{referencing_type.name}'", unique=True) + + schema = rtjs(rt) + assert schema["title"] == referencing_type.name + assert schema["type"] == "object" + assert len(schema["required"]) == 1 + assert "IntegerProp" in schema["required"] + assert "IntegerProp" in schema["properties"] + assert schema["properties"]["IntegerProp"]["type"] == "integer" + assert parent_type.name in schema["properties"] + assert "oneOf" in schema["properties"][parent_type.name] + one_of = schema["properties"][parent_type.name]["oneOf"] + assert len(one_of) == 2 + enum_index = 0 + if "enum" not in one_of[enum_index]: + # As in unittests, we can't rely on the order of oneOf. + enum_index = 1 - enum_index + assert "enum" in one_of[enum_index] + assert len(one_of[enum_index]["enum"]) == 2 + assert f"{recA.id}, {recA.name}" in one_of[enum_index]["enum"] + assert f"{recB.id}, {recB.name}" in one_of[enum_index]["enum"] + assert one_of[1 - enum_index]["type"] == "object" + # No properties in parent_type + assert len(one_of[1 - enum_index]["properties"]) == 0 diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py index d34bed9c0189789eb384c303f8311abb38be7324..39f660f0d6686fa6a8c2892213eeb0bf276655c7 100644 --- a/src/caosadvancedtools/json_schema_exporter.py +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -33,14 +33,22 @@ def _make_required_list(rt: db.RecordType): if rt.get_importance(prop.name) == db.OBLIGATORY] -def _make_prop_from_prop(prop: db.Property, additional_options_for_text_props: Optional[dict], - units_in_description: bool): +def _make_segment_from_prop(prop: db.Property, additional_properties: bool, + name_and_description_in_properties: bool, + additional_options_for_text_props: Optional[dict], + units_in_description: bool): """Return the JSON Schema segment for the given property Parameters ---------- prop : db.Property the property to be transformed + additional_properties : bool, optional + Whether additional properties will be admitted in the resulting + schema. Optional, default is True. + name_and_description_in_properties : bool, optional + Whether to include name and description in the `properties` section of + the schema to be exported. Optional, default is False. additional_options_for_text_props : Optional[dict] dict of dicts that may contain the keys 'pattern' and 'format' to further define the rules for the JSON Schema segment @@ -50,11 +58,6 @@ def _make_prop_from_prop(prop: db.Property, additional_options_for_text_props: O instead. """ - if prop.is_reference(): - raise NotImplementedError( - "Reference properties are not supported in this version of the json schema exporter." - ) - if not additional_options_for_text_props: additional_options_for_text_props = {} if prop.datatype == db.TEXT or prop.datatype == db.DATETIME: @@ -96,8 +99,32 @@ def _make_prop_from_prop(prop: db.Property, additional_options_for_text_props: O json_prop["type"] = "array" list_element_prop = db.Property( name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) - json_prop["items"] = _make_prop_from_prop( - list_element_prop, additional_options_for_text_props, units_in_description) + json_prop["items"] = _make_segment_from_prop( + list_element_prop, additional_properties, + name_and_description_in_properties, additional_options_for_text_props, + units_in_description + ) + elif prop.is_reference(): + if prop.datatype == db.REFERENCE: + # No Record creation since no RT is specified and we don't know what + # schema to use, so only enum of all Records and all Files. + values = _retrieve_enum_values("RECORD") + _retrieve_enum_values("FILE") + json_prop["enum"] = values + elif prop.datatype == db.FILE: + # TODO: different issue + raise NotImplementedError("Files have not been implemented yet.") + else: + values = _retrieve_enum_values(f"RECORD '{prop.datatype}'") + rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop.datatype}'", unique=True) + subschema = _make_segment_from_recordtype(rt, additional_properties, + name_and_description_in_properties, + additional_options_for_text_props, + units_in_description) + json_prop["oneOf"] = [ + {"enum": values}, + subschema + ] + else: raise ValueError( f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}") @@ -135,6 +162,54 @@ def _make_text_property(description="", text_format=None, text_pattern=None): return prop +def _retrieve_enum_values(role: str): + + possible_values = db.execute_query(f"SELECT name, id FROM {role}") + + vals = [] + for val in possible_values: + if val.name: + vals.append(f"{val.id}, {val.name}") + else: + vals.append(f"{val.id}") + + return vals + + +def _make_segment_from_recordtype(rt: db.RecordType, additional_properties: bool = True, + name_and_description_in_properties: bool = False, + additional_options_for_text_props: Optional[dict] = None, + units_in_description: bool = True): + """Return a Json schema segment for the given RecordType. + """ + schema = { + "type": "object" + } + + schema["required"] = _make_required_list(rt) + schema["additionalProperties"] = additional_properties + + props = {} + if name_and_description_in_properties: + props["name"] = _make_text_property("The name of the Record to be created") + props["description"] = _make_text_property("The description of the Record to be created") + + for prop in rt.properties: + if prop.name in props: + # Multi property + raise NotImplementedError( + "Creating a schema for multi-properties is not specified. " + f"Property {prop.name} occurs more than once." + ) + props[prop.name] = _make_segment_from_prop( + prop, additional_properties, name_and_description_in_properties, + additional_options_for_text_props, units_in_description) + + schema["properties"] = props + + return schema + + def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True, name_and_description_in_properties: bool = False, additional_options_for_text_props: Optional[dict] = None, @@ -171,32 +246,14 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T if additional_options_for_text_props is None: additional_options_for_text_props = {} - schema = { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "type": "object" - } + schema = _make_segment_from_recordtype(rt, additional_properties, + name_and_description_in_properties, + additional_options_for_text_props, + units_in_description) + schema["$schema"] = "https://json-schema.org/draft/2019-09/schema" if rt.name: schema["title"] = rt.name if rt.description: schema["description"] = rt.description - schema["required"] = _make_required_list(rt) - schema["additionalProperties"] = additional_properties - - props = {} - if name_and_description_in_properties: - props["name"] = _make_text_property("The name of the Record to be created") - props["description"] = _make_text_property("The description of the Record to be created") - - for prop in rt.properties: - if prop.name in props: - # Multi property - raise NotImplementedError( - "Creating a schema for multi-properties is not specified. " - f"Property {prop.name} occurs more than once." - ) - props[prop.name] = _make_prop_from_prop( - prop, additional_options_for_text_props, units_in_description) - - schema["properties"] = props return schema diff --git a/tox.ini b/tox.ini index 0e9664c377366923b60e11d138825e1543cc8c7f..00548dea25c5017f1d0301a00a629c62d16631ef 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,7 @@ deps=nose openpyxl >= 3.0.7 xlrd == 1.2 h5py -commands=py.test --cov=caosadvancedtools -vv {posargs} +commands=py.test --cov=caosadvancedtools --cov-report=html:.tox/cov_html -vv {posargs} [flake8] max-line-length=100 diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py index b1a51f1aee87d500dab6536b0df08f1535226dbf..937e1b1b625f5c81db2113f4008b00af100305ae 100644 --- a/unittests/test_json_schema_exporter.py +++ b/unittests/test_json_schema_exporter.py @@ -24,10 +24,50 @@ import linkahead as db from jsonschema import FormatChecker, validate, ValidationError from pytest import raises +from unittest.mock import Mock, patch from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs +def _mock_execute_query(query_string, unique=False, **kwargs): + """Mock the response to queries for references.""" + all_records = db.Container() + all_files = db.Container() + other_type_rt = db.RecordType(name="OtherType") + other_type_rt.add_property(name="IntegerProp", datatype=db.INTEGER, importance=db.OBLIGATORY) + other_type_records = db.Container().extend([ + db.Record(id=100, name="otherA").add_parent(other_type_rt), + db.Record(id=101, name="otherB").add_parent(other_type_rt), + db.Record(id=102).add_parent(other_type_rt) + ]) + all_records.extend(other_type_records) + + referencing_type_rt = db.RecordType(name="ReferencingType") + referencing_type_rt.add_property(name=other_type_rt.name, datatype=db.LIST(other_type_rt.name)) + referencing_type_records = db.Container().extend([ + db.Record(id=103).add_parent(referencing_type_rt), + db.Record(id=104, name="referencing").add_parent(referencing_type_rt) + ]) + all_records.extend(referencing_type_records) + + all_files.append(db.File(id=105, name="GenericFile.txt")) + + if query_string == "SELECT name, id FROM RECORD 'OtherType'": + return other_type_records + elif query_string == "FIND RECORDTYPE WITH name='OtherType'" and unique is True: + return other_type_rt + elif query_string == "SELECT name, id FROM RECORD 'ReferencingType'": + return referencing_type_records + elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True: + return referencing_type_rt + elif query_string == "SELECT name, id FROM RECORD": + return all_records + elif query_string == "SELECT name, id FROM FILE": + return all_files + else: + return db.Container() + + def test_empty_rt(): rt = db.RecordType(name="Test", description="descr") @@ -251,39 +291,240 @@ def test_rt_with_list_props(): validate(example, schema, format_checker=FormatChecker()) +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) def test_rt_with_references(): - """References and lists of references will come later, so test if the errors - are thrown correctly. + """References and lists of references to files will come later, so test if + the errors are thrown correctly. """ rt = db.RecordType() rt.add_property(name="RefProp", datatype=db.REFERENCE) - with raises(NotImplementedError): + schema = rtjs(rt) + props = schema["properties"] + assert "RefProp" in props + assert "enum" in props["RefProp"] + assert isinstance(props["RefProp"]["enum"], list) + assert len(props["RefProp"]["enum"]) == len( + db.execute_query("SELECT name, id FROM RECORD")) + len( + db.execute_query("SELECT name, id FROM FILE")) + assert "oneOf" not in props["RefProp"] - rtjs(rt) + example = { + "RefProp": "101, otherB" + } + validate(example, schema) + example = { + "RefProp": "23, I don't exist" + } + with raises(ValidationError): + # Wrong enum value + validate(example, schema) + example = { + "RefProp": { + "IntegerProp": 12 + } + } + with raises(ValidationError): + # Can't have objects in generic references + validate(example, schema) rt = db.RecordType() rt.add_property(name="RefProp", datatype="OtherType") + rt.add_property(name="OtherTextProp", datatype=db.TEXT) - with raises(NotImplementedError): + schema = rtjs(rt) + props = schema["properties"] + assert "RefProp" in props + assert "oneOf" in props["RefProp"] + assert len(props["RefProp"]["oneOf"]) == 2 + enum_index = 0 + if "enum" not in props["RefProp"]["oneOf"][enum_index]: + # We can't really require the order here, so we just know that one of + # the two elements must be the enum, the other the object. + enum_index = 1 - enum_index + assert "enum" in props["RefProp"]["oneOf"][enum_index] + assert isinstance(props["RefProp"]["oneOf"][enum_index]["enum"], list) + assert len(props["RefProp"]["oneOf"][enum_index]["enum"]) == 3 + assert "100, otherA" in props["RefProp"]["oneOf"][enum_index]["enum"] + assert "101, otherB" in props["RefProp"]["oneOf"][enum_index]["enum"] + assert "102" in props["RefProp"]["oneOf"][enum_index]["enum"] + # the other element of oneOf is the OtherType object + assert props["RefProp"]["oneOf"][1 - enum_index]["type"] == "object" + other_props = props["RefProp"]["oneOf"][1 - enum_index]["properties"] + assert "IntegerProp" in other_props + assert other_props["IntegerProp"]["type"] == "integer" + assert "required" in props["RefProp"]["oneOf"][1 - enum_index] + assert len(props["RefProp"]["oneOf"][1 - enum_index]["required"]) == 1 + assert "IntegerProp" in props["RefProp"]["oneOf"][1 - enum_index]["required"] + # The other prop also works as before + assert "OtherTextProp" in props + assert props["OtherTextProp"]["type"] == "string" - rtjs(rt) + example = { + "RefProp": { + "IntegerProp": 12 + } + } + validate(example, schema) - rt = db.RecordType() - rt.add_property(name="RefProp", datatype=db.LIST(db.REFERENCE)) + example = { + "RefProp": "101, otherB", + "OtherTextProp": "something" + } + validate(example, schema) - with raises(NotImplementedError): + rt = db.RecordType(name="TestType", description="Some description") + rt.add_property(name="RefProp", datatype=db.LIST(db.REFERENCE), + description="I'm a list of references.") - rtjs(rt) + schema = rtjs(rt) + assert schema["title"] == rt.name + assert schema["description"] == rt.description + assert "RefProp" in schema["properties"] + ref_prop = schema["properties"]["RefProp"] + assert ref_prop["type"] == "array" + assert "description" in ref_prop + assert ref_prop["description"] == "I'm a list of references." + assert "items" in ref_prop + items = ref_prop["items"] + assert "enum" in items + assert isinstance(items["enum"], list) + assert len(items["enum"]) == len( + db.execute_query("SELECT name, id FROM RECORD")) + len( + db.execute_query("SELECT name, id FROM FILE")) + assert "oneOf" not in items + assert "description" not in items + + example = { + "RefProp": "101, otherB" + } + with raises(ValidationError): + # Should be list but isn't + validate(example, schema) + example = { + "RefProp": ["101, otherB"] + } + validate(example, schema) + example = { + "RefProp": ["101, otherB", "102", "104, referencing"] + } + validate(example, schema) rt = db.RecordType() rt.add_property(name="RefProp", datatype=db.LIST("OtherType")) + schema = rtjs(rt, additional_properties=False, + name_and_description_in_properties=True) + assert schema["additionalProperties"] is False + assert "name" in schema["properties"] + assert schema["properties"]["name"]["type"] == "string" + assert "description" in schema["properties"] + assert schema["properties"]["description"]["type"] == "string" + assert "RefProp" in schema["properties"] + assert schema["properties"]["RefProp"]["type"] == "array" + assert "additionalProperties" not in schema["properties"]["RefProp"] + assert "items" in schema["properties"]["RefProp"] + items = schema["properties"]["RefProp"]["items"] + assert "oneOf" in items + assert len(items["oneOf"]) == 2 + # same as above, we can't rely on the order + enum_index = 0 + if "enum" not in items["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert "enum" in items["oneOf"][enum_index] + assert isinstance(items["oneOf"][enum_index]["enum"], list) + assert len(items["oneOf"][enum_index]["enum"]) == 3 + assert "100, otherA" in items["oneOf"][enum_index]["enum"] + assert "101, otherB" in items["oneOf"][enum_index]["enum"] + assert "102" in items["oneOf"][enum_index]["enum"] + other_type = items["oneOf"][1 - enum_index] + assert other_type["type"] == "object" + assert other_type["additionalProperties"] is False + assert "IntegerProp" in other_type["properties"] + assert len(other_type["required"]) == 1 + assert "IntegerProp" in other_type["required"] + + example = { + "RefProp": ["101, otherB", "102", "104, referencing"] + } + with raises(ValidationError): + # Wrong value in enum + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12}] + } + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12, "additionalProperty": "something"}] + } + with raises(ValidationError): + # we have additional_properties=False which propagates to subschemas + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12}, "101, otherB"] + } + validate(example, schema) + + rt = db.RecordType(name="ReferenceofReferencesType") + rt.add_property(name="RefRefProp", datatype="ReferencingType") + + schema = rtjs(rt) + + assert "RefRefProp" in schema["properties"] + ref_ref = schema["properties"]["RefRefProp"] + assert "oneOf" in ref_ref + assert len(ref_ref["oneOf"]) == 2 + enum_index = 0 + if "enum" not in ref_ref["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert len(ref_ref["oneOf"][enum_index]["enum"]) == 2 + assert "103" in ref_ref["oneOf"][enum_index]["enum"] + assert "104, referencing" in ref_ref["oneOf"][enum_index]["enum"] + assert ref_ref["oneOf"][1 - enum_index]["type"] == "object" + assert "OtherType" in ref_ref["oneOf"][1 - enum_index]["properties"] + assert ref_ref["oneOf"][1 - enum_index]["properties"]["OtherType"]["type"] == "array" + items = ref_ref["oneOf"][1 - enum_index]["properties"]["OtherType"]["items"] + assert "oneOf" in items + assert len(items["oneOf"]) == 2 + # same as above, we can't rely on the order + enum_index = 0 + if "enum" not in items["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert "enum" in items["oneOf"][enum_index] + assert isinstance(items["oneOf"][enum_index]["enum"], list) + assert len(items["oneOf"][enum_index]["enum"]) == 3 + assert "100, otherA" in items["oneOf"][enum_index]["enum"] + assert "101, otherB" in items["oneOf"][enum_index]["enum"] + assert "102" in items["oneOf"][enum_index]["enum"] + other_type = items["oneOf"][1 - enum_index] + assert other_type["type"] == "object" + assert "IntegerProp" in other_type["properties"] + assert len(other_type["required"]) == 1 + assert "IntegerProp" in other_type["required"] + + example = { + "RefRefProp": { + "OtherType": [ + "100, otherA", + {"IntegerProp": 12} + ] + } + } + validate(example, schema) + + rt = db.RecordType() + rt.add_property(name="FileProp", datatype=db.FILE) + with raises(NotImplementedError): + schema = rtjs(rt) - rtjs(rt) + rt = db.RecordType() + rt.add_property(name="FileProp", datatype=db.LIST(db.FILE)) + + with raises(NotImplementedError): + schema = rtjs(rt) def test_broken():