diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000000000000000000000000000000000..1dc4f3d8c8d375ba3f7b352aa3e18702ec731d83 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[html] +show_contexts = True diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 80f38616e1abbe4e560d9bbfbd2542d02bfe5007..d2abdcd653c3315335c29058a8ca2774dad34577 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -131,7 +131,7 @@ unittest_py39: script: # First verify that system Python actually is 3.9 - python3 -c "import sys; assert sys.version.startswith('3.9')" - - python3 -c "import caosdb; print('CaosDB Version:', caosdb.__version__)" + - python3 -c "import linkahead; print('LinkAhead Version:', linkahead.__version__)" - tox unittest_py37: @@ -161,7 +161,6 @@ unittest_py311: stage: unittest image: python:3.11 script: *python_test_script - allow_failure: true # Build the sphinx documentation and make it ready for deployment by Gitlab Pages # Special job for serving a static website. See https://docs.gitlab.com/ee/ci/yaml/README.html#pages diff --git a/.gitlab/merge_request_templates/Default.md b/.gitlab/merge_request_templates/Default.md index 35c6d01c5904289b77fc7f1de9419ef91a1510e9..3629e0ca3695000863d8c254516f64bf59a7bf60 100644 --- a/.gitlab/merge_request_templates/Default.md +++ b/.gitlab/merge_request_templates/Default.md @@ -28,6 +28,7 @@ guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md) - [ ] Up-to-date CHANGELOG.md (or not necessary) - [ ] Up-to-date JSON schema (or not necessary) - [ ] Appropriate user and developer documentation (or not necessary) + - Update / write published documentation (`make doc`). - How do I use the software? Assume "stupid" users. - How do I develop or debug the software? Assume novice developers. - [ ] Annotations in code (Gitlab comments) @@ -41,7 +42,8 @@ guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md) - [ ] I understand the intent of this MR - [ ] All automated tests pass - [ ] Up-to-date CHANGELOG.md (or not necessary) -- [ ] Appropriate user and developer documentation (or not necessary) +- [ ] Appropriate user and developer documentation (or not necessary), also in published + documentation. - [ ] The test environment setup works and the intended behavior is reproducible in the test environment - [ ] In-code documentation and comments are up-to-date. diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d3998f6225e3e8dfbe81fd98bf3152dc51ce42c..2419b32ffa48bdc9f0d1c93319348dea70cd744c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.0] - 2023-11-27 ## + +### Added ### + +* Added support for passing callables as `find_func` to the `BaseTableExporter`. +* Added member `BaseTableExporter.all_keys` +* Parsing from YAML now allows to give an existing model to which the YAML data model shall be + added. +* The `json_schema_exporter` module which introduces tools to create a json + schema from a RecordType, e.g., for the usage in web forms. +* `DataModel.get_deep(name: str)` method which uses the DataModel as a kind of cache pool. + +### Changed ### + +* A bit better error handling in the yaml model parser. +* `TableImporter.check_datatypes` allows numeric values in string columns if + `strict=False` (default). + +### Fixed ### + +* `TableImporter.check_missing` in case of array-valued fields in table +* YAML model parser has better description handling. + +### Documentation ### + +* Test coverage reports are now generated in `.tox/cov_html/` by tox. + ## [0.8.0] - 2023-05-30 ## (Florian Spreckelsen) diff --git a/CITATION.cff b/CITATION.cff index bd468e5ad1704db033f8e81bac5277194adc2158..eea049338996c494a88076b9e9e0f3131ed44a66 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,6 +20,6 @@ authors: given-names: Stefan orcid: https://orcid.org/0000-0001-7214-8125 title: CaosDB - Advanced User Tools -version: 0.8.0 +version: 0.9.0 doi: 10.3390/data4020083 -date-released: 2023-05-30 \ No newline at end of file +date-released: 2023-11-27 \ No newline at end of file diff --git a/README.md b/README.md index ebda4f641bc3b5e6a74cf72f4bc25a4237a73131..662bf6a6309aaa307505b0b8027b30664756bf10 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This is the **CaosDB Advanced User Tools** repository and a part of the CaosDB project. + This project contains tools that are beyond the typical use of the CaosDB python client. Especially, this includes the crawler which will typically be used by a data curator. diff --git a/README_SETUP.md b/README_SETUP.md index 894777aeccb64ceb0fe840ba5f16f0cc540b2996..bf4f25d92106c19cccc276389b6c97aa22904923 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -32,7 +32,10 @@ Optional h5-crawler: - `pip install .[h5-crawler] --user` ## Run Unit Tests -`tox` + +- All tests: `tox` +- One specific test with tox: `tox -- unittests/test_myusecase.py -k expression` +- Or even using only pytest: `pytest unittests/test_myusecase.py -k expression` ## Run Integration Tests Locally diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 9f8d003c9219f7e243fd50c5d846b9a7450b9c7b..a31afcfd2f74770b656eef41002b2f444b7962de 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -14,9 +14,9 @@ then fi fi OUT=/tmp/crawler.output -ls +ls cat pycaosdb.ini -python3 -c "import caosdb; print('CaosDB Version:', caosdb.__version__)" +python3 -c "import linkahead; print('LinkAhead Version:', linkahead.__version__)" rm -rf /tmp/caosdb_identifiable_cache.db set -e echo "Clearing database" @@ -57,7 +57,7 @@ echo "./crawl.py -a $RUN_ID /" ./crawl.py -a $RUN_ID / | tee "$OUT" set +e if grep "There where unauthorized changes" "$OUT" -then +then echo "There still were unauthorized changes, which should not have happend!" echo "Test FAILED" exit 1 @@ -96,5 +96,8 @@ python3 -m pytest test_json_schema_datamodel_parser.py echo "Testing yaml datamodel parser" python3 -m pytest test_yaml_parser.py +echo "Testing json-schema exporter" +python3 -m pytest test_json_schema_exporter.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_cache.py b/integrationtests/test_cache.py index da1824e8c3cdf3f68bb82f8c7f39e6eecb111f92..aacef1792e6028bf056093c517f45f6367f471d6 100644 --- a/integrationtests/test_cache.py +++ b/integrationtests/test_cache.py @@ -63,6 +63,12 @@ class CacheTest(unittest.TestCase): update = UpdateCache(db_file=self.cache) run_id = "a" + print(db.execute_query("FIND Record TestRecord", unique=True)) + print(db.execute_query("FIND entity with id="+str(rec.id), unique=True)) + try: + print(db.execute_query("FIND Record "+str(rec.id), unique=True)) + except BaseException: + print("Query does not work as expected") update.insert(cont, run_id) assert len(update.get_updates(run_id)) == 1 diff --git a/integrationtests/test_data_model.py b/integrationtests/test_data_model.py index 5bf168cd25873975d73cbbaa0249f2fd4c21299b..bd74a40bde2540bb57245de1de464a1bfd84bc72 100644 --- a/integrationtests/test_data_model.py +++ b/integrationtests/test_data_model.py @@ -1,4 +1,5 @@ import unittest +import pytest import caosdb as db from caosadvancedtools.models.data_model import DataModel @@ -55,6 +56,19 @@ class DataModelTest(unittest.TestCase): assert len(exist) == 1 assert exist[0].name == "TestRecord" + def test_large_data_model(self): + # create RT and one property + dm = DataModel() + long = "Long" * 50 + first_RT = db.RecordType(name=f"TestRecord_first") + for index in range(20): + this_RT = db.RecordType(name=f"TestRecord_{long}_{index:02d}") + first_RT.add_property(this_RT) + dm.append(this_RT) + dm.append(first_RT) + dm.sync_data_model(noquestion=True) # Insert + dm.sync_data_model(noquestion=True) # Check again + def tearDown(self): try: tests = db.execute_query("FIND ENTITY test*") diff --git a/integrationtests/test_json_schema_exporter.py b/integrationtests/test_json_schema_exporter.py new file mode 100644 index 0000000000000000000000000000000000000000..69edcf42d1fd285c030ad6d6ccb7f73f2d1b5536 --- /dev/null +++ b/integrationtests/test_json_schema_exporter.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import linkahead as db + +from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs + + +def _delete_everything(): + ents = db.execute_query("FIND ENTITY WITH ID > 99") + if ents: + ents.delete() + + +def setup_module(): + _delete_everything() + + +def teardown_module(): + _delete_everything() + + +def test_uniqueness_of_reference_types(): + parent_type = db.RecordType(name="ParentType").insert() + int_prop = db.Property(name="IntegerProp", datatype=db.INTEGER).insert() + sub_type = db.RecordType(name="SubType").add_parent(parent_type).add_property( + int_prop, importance=db.RECOMMENDED).insert() + referencing_type = db.RecordType(name="ReferencingType") + referencing_type.add_property(int_prop, importance=db.OBLIGATORY) + referencing_type.add_property(parent_type) + referencing_type.insert() + recA = db.Record(name="RecAParent").add_parent(parent_type).insert() + recB = db.Record(name="RecBSub").add_parent(sub_type).insert() + + rt = db.execute_query(f"FIND RECORDTYPE WITH name='{referencing_type.name}'", unique=True) + + schema = rtjs(rt) + assert schema["title"] == referencing_type.name + assert schema["type"] == "object" + assert len(schema["required"]) == 1 + assert "IntegerProp" in schema["required"] + assert "IntegerProp" in schema["properties"] + assert schema["properties"]["IntegerProp"]["type"] == "integer" + assert parent_type.name in schema["properties"] + assert "oneOf" in schema["properties"][parent_type.name] + one_of = schema["properties"][parent_type.name]["oneOf"] + assert len(one_of) == 2 + enum_index = 0 + if "enum" not in one_of[enum_index]: + # As in unittests, we can't rely on the order of oneOf. + enum_index = 1 - enum_index + assert "enum" in one_of[enum_index] + assert len(one_of[enum_index]["enum"]) == 2 + assert recA.name in one_of[enum_index]["enum"] + assert recB.name in one_of[enum_index]["enum"] + assert one_of[1 - enum_index]["type"] == "object" + # No properties in parent_type + assert len(one_of[1 - enum_index]["properties"]) == 0 diff --git a/manual_tests/test_labfolder_import.py b/manual_tests/test_labfolder_import.py index e1e9d3266478900b7fae02b3493fbc3d41ea2bd5..c767feb55cdf3958343d8d9780d01fa10c70f6ec 100644 --- a/manual_tests/test_labfolder_import.py +++ b/manual_tests/test_labfolder_import.py @@ -32,7 +32,7 @@ from caosadvancedtools.converter import labfolder_export as labfolder def main(args): """The main function.""" - model = parse_model_from_yaml("./model.yml") + model = parse_model_from_yaml("./models/model.yml") model.sync_data_model() labfolder.import_data(args.folder) diff --git a/manual_tests/test_labfolder_retrieve.py b/manual_tests/test_labfolder_retrieve.py index 8c3f12d84a8990412d0d19cd6026a3452677f943..5bbaf91d0221a402e3a39246a129413adfa5f871 100644 --- a/manual_tests/test_labfolder_retrieve.py +++ b/manual_tests/test_labfolder_retrieve.py @@ -31,7 +31,7 @@ from caosadvancedtools.converter.labfolder_api import Importer def main(args): """The main function.""" - model = parse_model_from_yaml("./model.yml") + model = parse_model_from_yaml("./models/model.yml") # model.sync_data_model() importer = Importer() diff --git a/release.sh b/release.sh index 1af097f014de6cd9eb3d3e8ba5da34aea0fe1671..f6335ae20d0c29e760b508aac831a35460a59ef3 100755 --- a/release.sh +++ b/release.sh @@ -1,4 +1,4 @@ #!/bin/bash rm -rf dist/ build/ .eggs/ python setup.py sdist bdist_wheel -python -m twine upload -s dist/* +python -m twine upload dist/* diff --git a/setup.py b/setup.py index 3487be6a3e4eaa3fc3f96e3654985c6e53f81747..50cb59467a5162663d31a23e4cfd2159a611bf9c 100755 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ from setuptools import find_packages, setup ######################################################################## MAJOR = 0 -MINOR = 8 +MINOR = 9 MICRO = 0 PRE = "" # e.g. rc0, alpha.1, 0.beta-23 ISRELEASED = True @@ -155,9 +155,9 @@ def setup_package(): author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', python_requires='>=3.7', - install_requires=["caosdb>=0.11.0", + install_requires=["linkahead>=0.13.1", "jsonref", - "jsonschema>=4.4.0", + "jsonschema[format]>=4.4.0", "numpy>=1.17.3", "openpyxl>=3.0.7", "pandas>=1.2.0", diff --git a/src/caosadvancedtools/bloxberg/swagger_client/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py index 136c5b27a37cfbd9135230468ae5a29cb0eb2b77..255d6d3124dc352f10366e22f1eb8b461ff6593d 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/__init__.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py @@ -13,22 +13,23 @@ """ from __future__ import absolute_import +from swagger_client.models.validation_error import ValidationError +from swagger_client.models.http_validation_error import HTTPValidationError +from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate +from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate +from swagger_client.models.batch import Batch +from swagger_client.configuration import Configuration +from swagger_client.api_client import ApiClient +from swagger_client.api.pdf_api import PdfApi +from swagger_client.api.certificate_api import CertificateApi # Fake the installation -import sys, pathlib +import sys +import pathlib __this_dir = str(pathlib.Path(__file__).parent.parent) if __this_dir not in sys.path: sys.path.append(__this_dir) # import apis into sdk package -from swagger_client.api.certificate_api import CertificateApi -from swagger_client.api.pdf_api import PdfApi # import ApiClient -from swagger_client.api_client import ApiClient -from swagger_client.configuration import Configuration # import models into sdk package -from swagger_client.models.batch import Batch -from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate -from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate -from swagger_client.models.http_validation_error import HTTPValidationError -from swagger_client.models.validation_error import ValidationError diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api_client.py b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py index 25e6501a4e36b09bca266f2eb375807053a58870..7337ca334c545b2c2502a20cb5369db331149037 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/api_client.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py @@ -591,7 +591,7 @@ class ApiClient(object): ) def __hasattr(self, object, name): - return name in object.__class__.__dict__ + return name in object.__class__.__dict__ def __deserialize_model(self, data, klass): """Deserializes list or dict to model. diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py index 7a347cf7ac9148df8ec9a43200f4058f127447b9..474ca01a69a6a06c93b7e9a640695fa709890997 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py @@ -15,6 +15,7 @@ import re # noqa: F401 import six + class Batch(object): """NOTE: This class is auto generated by the swagger code generator program. diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py index 2d7fd2d763ba40c9a384203301aa3e70efdf7783..8c1b50d8816b09c1a466cf7d11cee1ca605dfd3a 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py @@ -15,6 +15,7 @@ import re # noqa: F401 import six + class ControllerCertToolsGeneratePdfJsonCertificate(object): """NOTE: This class is auto generated by the swagger code generator program. diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py index 4a6d2d3f0e15faa8672f001e964d66c6e0a27780..fa0da3cb0c09e384cdddbd4ce458a4baf14f4b5d 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py @@ -15,6 +15,7 @@ import re # noqa: F401 import six + class ControllerCertToolsGenerateUnsignedCertificateJsonCertificate(object): """NOTE: This class is auto generated by the swagger code generator program. diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py index 21c9e467311c596499f3f408c5ac670b5852c6fa..67c23fba87467a7888bff82fc7f11e9d90e15f15 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py @@ -15,6 +15,7 @@ import re # noqa: F401 import six + class HTTPValidationError(object): """NOTE: This class is auto generated by the swagger code generator program. diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py index 7ae6bf0900449ff3612798a4503692c4e38e1c11..96d1e23734698efbdad8423c33012473e9aac03b 100644 --- a/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py @@ -15,6 +15,7 @@ import re # noqa: F401 import six + class ValidationError(object): """NOTE: This class is auto generated by the swagger code generator program. diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index 2b79f9ae7eedaf6e7d6896450a8e7b14e1dc9b30..cf74e330d3efb754d8e79d84ba816877c295c784 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -27,16 +27,15 @@ # something to replace this. import os import sqlite3 -from copy import deepcopy +import tempfile +import warnings from abc import ABC, abstractmethod +from copy import deepcopy from hashlib import sha256 -import warnings import caosdb as db from lxml import etree -import tempfile - def put_in_container(stuff): if isinstance(stuff, list): @@ -344,7 +343,7 @@ class UpdateCache(AbstractCache): old_ones = db.Container() for ent in cont: - old_ones.append(db.execute_query("FIND ENTITY {}".format(ent.id), + old_ones.append(db.execute_query("FIND ENTITY WITH ID={}".format(ent.id), unique=True)) return old_ones diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py new file mode 100644 index 0000000000000000000000000000000000000000..700c24e890c36a5b4219a1c2cc7d74ce38d6d398 --- /dev/null +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -0,0 +1,691 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# +"""Module for converting a data model into a json schema compatible dictionary. + +The scope of this json schema is the automatic generation of user interfaces. +""" + +from collections import OrderedDict +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union + +import linkahead as db +from linkahead.common.datatype import get_list_datatype, is_list_datatype + + +class JsonSchemaExporter: + """A class which collects everything needed for the conversion. + """ + + def __init__(self, additional_properties: bool = True, + name_property_for_new_records: bool = False, + description_property_for_new_records: bool = False, + additional_options_for_text_props: dict = None, + additional_json_schema: Dict[str, dict] = None, + additional_ui_schema: Dict[str, dict] = None, + units_in_description: bool = True, + do_not_create: List[str] = None, + do_not_retrieve: List[str] = None, + no_remote: bool = False, + multiple_choice: List[str] = None, + wrap_files_in_objects: bool = False, + ): + """Set up a JsonSchemaExporter, which can then be applied on RecordTypes. + + Parameters + ---------- + additional_properties : bool, optional + Whether additional properties will be admitted in the resulting + schema. Optional, default is True. + name_property_for_new_records : bool, optional + Whether objects shall generally have a `name` property in the generated schema. + Optional, default is False. + description_property_for_new_records : bool, optional + Whether objects shall generally have a `description` property in the generated schema. + Optional, default is False. + additional_options_for_text_props : dict, optional + Dictionary containing additional "pattern" or "format" options for + string-typed properties. Optional, default is empty. + additional_json_schema : dict[str, dict], optional + Additional schema content for elements of the given names. + additional_ui_schema : dict[str, dict], optional + Additional ui schema content for elements of the given names. + units_in_description : bool, optional + Whether to add the unit of a LinkAhead property (if it has any) to the + description of the corresponding schema entry. If set to false, an + additional `unit` key is added to the schema itself which is purely + annotational and ignored, e.g., in validation. Default is True. + do_not_create : list[str] + A list of reference Property names, for which there should be no option + to create them. Instead, only the choice of existing elements should + be given. + do_not_retrieve : list[str] + A list of RedcordType names, for which no Records shall be retrieved. Instead, only an + object description should be given. If this list overlaps with the `do_not_create` + parameter, the behavior is undefined. + no_remote : bool + If True, do not attempt to connect to a LinkAhead server at all. Default is False. + multiple_choice : list[str], optional + A list of reference Property names which shall be denoted as multiple choice properties. + This means that each option in this property may be selected at most once. This is not + implemented yet if the Property is not in ``do_not_create`` as well. + wrap_files_in_objects : bool, optional + Whether (lists of) files should be wrapped into an array of objects + that have a file property. The sole purpose of this wrapping is to + provide a workaround for a `react-jsonschema-form + bug<https://github.com/rjsf-team/react-jsonschema-form/issues/3957>`_ + so only set this to True if you're using the exported schema with + react-json-form and you are experiencing the bug. Default is False. + """ + if not additional_options_for_text_props: + additional_options_for_text_props = {} + if not additional_json_schema: + additional_json_schema = {} + if not additional_ui_schema: + additional_ui_schema = {} + if not do_not_create: + do_not_create = [] + if not do_not_retrieve: + do_not_retrieve = [] + if not multiple_choice: + multiple_choice = [] + + self._additional_properties = additional_properties + self._name_property_for_new_records = name_property_for_new_records + self._description_property_for_new_records = description_property_for_new_records + self._additional_options_for_text_props = additional_options_for_text_props + self._additional_json_schema = additional_json_schema + self._additional_ui_schema = additional_ui_schema + self._units_in_description = units_in_description + self._do_not_create = do_not_create + self._do_not_retrieve = do_not_retrieve + self._no_remote = no_remote + self._multiple_choice = multiple_choice + self._wrap_files_in_objects = wrap_files_in_objects + + @staticmethod + def _make_required_list(rt: db.RecordType): + """Return the list of names of properties with importance db.OBLIGATORY.""" + required_list = [] + for prop in rt.properties: + if rt.get_importance(prop.name) != db.OBLIGATORY: + continue + prop_name = prop.name + if isinstance(prop.datatype, db.Entity): + prop_name = prop.datatype.name + required_list.append(prop_name) + + return required_list + + def _make_segment_from_prop(self, prop: db.Property) -> Tuple[OrderedDict, dict]: + """Return the JSON Schema and ui schema segments for the given property. + + The result may either be a simple json schema segment, such as a `string + <https://json-schema.org/understanding-json-schema/reference/string>`_ element (or another + simple type), a combination such as `anyOf + <https://json-schema.org/understanding-json-schema/reference/combining#anyof>`_ or an `array + <https://json-schema.org/understanding-json-schema/reference/array>`_ element + + Parameters + ---------- + prop : db.Property + The property to be transformed. + """ + json_prop = OrderedDict() + ui_schema: dict = {} + if prop.datatype == db.TEXT or prop.datatype == db.DATETIME: + text_format = None + text_pattern = None + if prop.name in self._additional_options_for_text_props: + if "pattern" in self._additional_options_for_text_props[prop.name]: + text_pattern = self._additional_options_for_text_props[prop.name]["pattern"] + if "format" in self._additional_options_for_text_props[prop.name]: + text_format = self._additional_options_for_text_props[prop.name]["format"] + elif prop.datatype == db.DATETIME: + # Set the date or datetime format if only a pattern is given ... + text_format = ["date", "date-time"] + elif prop.datatype == db.DATETIME: + # ... again, for those props that don't appear in the additional + # options list. + text_format = ["date", "date-time"] + + json_prop = self._make_text_property(prop.description, text_format, text_pattern) + return self._customize(json_prop, ui_schema, prop) + + if prop.description: + json_prop["description"] = prop.description + if self._units_in_description and prop.unit: + if "description" in json_prop: + json_prop["description"] += f" Unit is {prop.unit}." + else: + json_prop["description"] = f"Unit is {prop.unit}." + elif prop.unit: + json_prop["unit"] = prop.unit + + if prop.datatype == db.BOOLEAN: + json_prop["type"] = "boolean" + elif prop.datatype == db.INTEGER: + json_prop["type"] = "integer" + elif prop.datatype == db.DOUBLE: + json_prop["type"] = "number" + elif is_list_datatype(prop.datatype) and not ( + self._wrap_files_in_objects and get_list_datatype(prop.datatype, + strict=True) == db.FILE): + json_prop["type"] = "array" + list_element_prop = db.Property( + name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True)) + json_prop["items"], inner_ui_schema = self._make_segment_from_prop(list_element_prop) + if prop.name in self._multiple_choice and prop.name in self._do_not_create: + # TODO: if not multiple_choice, but do_not_create: + # "ui:widget" = "radio" & "ui:inline" = true + # TODO: set threshold for number of items. + json_prop["uniqueItems"] = True + ui_schema["ui:widget"] = "checkboxes" + ui_schema["ui:inline"] = True + if inner_ui_schema: + ui_schema["items"] = inner_ui_schema + elif prop.is_reference(): + if prop.datatype == db.REFERENCE: + # No Record creation since no RT is specified and we don't know what + # schema to use, so only enum of all Records and all Files. + values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE") + json_prop["enum"] = values + if prop.name in self._multiple_choice: + json_prop["uniqueItems"] = True + elif prop.datatype == db.FILE or ( + self._wrap_files_in_objects and + is_list_datatype(prop.datatype) and + get_list_datatype(prop.datatype, strict=True) == db.FILE + ): + # Singular FILE (wrapped or unwrapped), or wrapped LIST<FILE> + if self._wrap_files_in_objects: + # Workaround for react-jsonschema-form bug + # https://github.com/rjsf-team/react-jsonschema-form/issues/3957: + # Wrap all FILE references (regardless whether lists or + # scalars) in an array of objects that have a file property, + # since objects can be deleted, files can't. + json_prop["type"] = "array" + json_prop["items"] = { + "type": "object", + "title": "Next file", + # The wrapper object must wrap a file and can't be empty. + "required": ["file"], + # Wrapper objects must only contain the wrapped file. + "additionalProperties": False, + "properties": { + "file": { + "title": "Enter your file.", + "type": "string", + "format": "data-url" + } + } + } + if not is_list_datatype(prop.datatype): + # Scalar file, so the array has maximum length 1 + json_prop["maxItems"] = 1 + else: + json_prop["type"] = "string" + json_prop["format"] = "data-url" + else: + prop_name = prop.datatype + if isinstance(prop.datatype, db.Entity): + prop_name = prop.datatype.name + if prop.name in self._do_not_retrieve: + values = [] + else: + values = self._retrieve_enum_values(f"RECORD '{prop_name}'") + if prop.name in self._do_not_create: + # Only a simple list of values + json_prop["enum"] = values + else: + if self._no_remote: + rt = prop.datatype + else: + rt = db.execute_query(f"FIND RECORDTYPE WITH name='{prop_name}'", + unique=True) + subschema, ui_schema = self._make_segment_from_recordtype(rt) + # if inner_ui_schema: + # ui_schema = inner_ui_schema + if values: + subschema["title"] = "Create new" + json_prop["oneOf"] = [ + { + "title": "Existing entries", + "enum": values, + }, + subschema + ] + else: + json_prop = subschema + + else: + raise ValueError( + f"Unknown or no property datatype. Property {prop.name} with type {prop.datatype}") + + return self._customize(json_prop, ui_schema, prop) + + @staticmethod + def _make_text_property(description="", text_format=None, text_pattern=None) -> OrderedDict: + """Create a text element. + + Can be a `string <https://json-schema.org/understanding-json-schema/reference/string>`_ + element or an `anyOf + <https://json-schema.org/understanding-json-schema/reference/combining#anyof>`_ combination + thereof. + + Example: + + .. code-block:: json + + { + "type": "string", + "description": "Some description", + "pattern": "[0-9]{2..4}-[0-9]{2-4}", + "format": "hostname", + } + """ + prop: OrderedDict[str, Union[str, list]] = OrderedDict({ + "type": "string" + }) + if description: + prop["description"] = description + if text_format is not None: + if isinstance(text_format, list): + # We want the type inside the options, not in the head: + # "datetime property": { + # "anyOf": [ + # { + # "type": "string", + # "format": "date" + # }, + # { + # "type": "string", + # "format": "date-time" + # }]} + prop.pop("type") + prop["anyOf"] = [{"type": "string", "format": tf} for tf in text_format] + else: + prop["format"] = text_format + if text_pattern is not None: + prop["pattern"] = text_pattern + + return prop + + def _retrieve_enum_values(self, role: str): + + if self._no_remote: + return [] + + possible_values = db.execute_query(f"SELECT name, id FROM {role}") + + vals = [] + for val in possible_values: + if val.name: + vals.append(f"{val.name}") + else: + vals.append(f"{val.id}") + + return vals + + def _make_segment_from_recordtype(self, rt: db.RecordType) -> Tuple[OrderedDict, dict]: + """Return Json schema and uischema segments for the given RecordType. + + The result is an element of type `object + <https://json-schema.org/understanding-json-schema/reference/object>`_ and typically + contains more properties: + + .. code-block:: json + + { + "type": "object", + "title": "MyRecordtypeName", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + } + } + """ + schema: OrderedDict[str, Any] = OrderedDict({ + "type": "object" + }) + ui_schema = {} + + schema["required"] = self._make_required_list(rt) + schema["additionalProperties"] = self._additional_properties + if rt.description: + schema["description"] = rt.description + + if rt.name: + schema["title"] = rt.name + + props = OrderedDict() + if self._name_property_for_new_records: + props["name"] = self._make_text_property("The name of the Record to be created") + if self._description_property_for_new_records: + props["description"] = self._make_text_property( + "The description of the Record to be created") + + for prop in rt.properties: + if prop.name in props: + # Multi property + raise NotImplementedError( + "Creating a schema for multi-properties is not specified. " + f"Property {prop.name} occurs more than once." + ) + props[prop.name], inner_ui_schema = self._make_segment_from_prop(prop) + if inner_ui_schema: + ui_schema[prop.name] = inner_ui_schema + + schema["properties"] = props + + return schema, ui_schema + + def _customize(self, schema: OrderedDict, ui_schema: dict, entity: db.Entity = None) -> ( + Tuple[OrderedDict, dict]): + """Generic customization method. + +Walk over the available customization stores and apply all applicable ones. No specific order is +guaranteed (as of now). + + Parameters + ---------- + schema, ui_schema : dict + The input schemata. + entity: db.Entity : , optional + An Entity object, may be useful in the future for customizers. + + Returns + ------- + out : Tuple[dict, dict] + The modified input schemata. + """ + + name = schema.get("title", None) + if entity and entity.name: + name = entity.name + for key, add_schema in self._additional_json_schema.items(): + if key == name: + schema.update(add_schema) + for key, add_schema in self._additional_ui_schema.items(): + if key == name: + ui_schema.update(add_schema) + + return schema, ui_schema + + def recordtype_to_json_schema(self, rt: db.RecordType, rjsf: bool = False) -> Union[ + dict, Tuple[dict, dict]]: + """Create a jsonschema from a given RecordType that can be used, e.g., to + validate a json specifying a record of the given type. + + Parameters + ---------- + rt : RecordType + The RecordType from which a json schema will be created. + rjsf : bool, optional + If True, uiSchema definitions for react-jsonschema-forms will be output as the second + return value. Default is False + + Returns + ------- + schema : dict + A dict containing the json schema created from the given RecordType's properties. + + ui_schema : dict, optional + A ui schema. Only if a parameter asks for it (e.g. ``rjsf``). + """ + if rt is None: + raise ValueError( + "recordtype_to_json_schema(...) cannot be called with a `None` RecordType.") + schema, inner_uischema = self._make_segment_from_recordtype(rt) + schema["$schema"] = "https://json-schema.org/draft/2020-12/schema" + if rt.description: + schema["description"] = rt.description + schema, inner_uischema = self._customize(schema, inner_uischema, rt) + + if rjsf: + uischema = {} + if inner_uischema: + uischema = inner_uischema + return schema, uischema + return schema + + +def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True, + name_property_for_new_records: bool = False, + description_property_for_new_records: bool = False, + additional_options_for_text_props: Optional[dict] = None, + additional_json_schema: Dict[str, dict] = None, + additional_ui_schema: Dict[str, dict] = None, + units_in_description: bool = True, + do_not_create: List[str] = None, + do_not_retrieve: List[str] = None, + no_remote: bool = False, + multiple_choice: List[str] = None, + rjsf: bool = False, + wrap_files_in_objects: bool = False + ) -> Union[dict, Tuple[dict, dict]]: + """Create a jsonschema from a given RecordType that can be used, e.g., to + validate a json specifying a record of the given type. + + This is a standalone function which works without manually creating a + JsonSchemaExporter object. + + Parameters + ---------- + rt : RecordType + The RecordType from which a json schema will be created. + additional_properties : bool, optional + Whether additional properties will be admitted in the resulting + schema. Optional, default is True. + name_property_for_new_records : bool, optional + Whether objects shall generally have a `name` property in the generated schema. Optional, + default is False. + description_property_for_new_records : bool, optional + Whether objects shall generally have a `description` property in the generated schema. + Optional, default is False. + additional_options_for_text_props : dict, optional + Dictionary containing additional "pattern" or "format" options for + string-typed properties. Optional, default is empty. + additional_json_schema : dict[str, dict], optional + Additional schema content for elements of the given names. + additional_ui_schema : dict[str, dict], optional + Additional ui schema content for elements of the given names. + units_in_description : bool, optional + Whether to add the unit of a LinkAhead property (if it has any) to the + description of the corresponding schema entry. If set to false, an + additional `unit` key is added to the schema itself which is purely + annotational and ignored, e.g., in validation. Default is True. + do_not_create : list[str], optional + A list of reference Property names, for which there should be no option + to create them. Instead, only the choice of existing elements should + be given. + do_not_retrieve : list[str], optional + A list of RedcordType names, for which no Records shall be retrieved. Instead, only an + object description should be given. If this list overlaps with the `do_not_create` + parameter, the behavior is undefined. + no_remote : bool, optional + If True, do not attempt to connect to a LinkAhead server at all. Default is False. + multiple_choice : list[str], optional + A list of reference Property names which shall be denoted as multiple choice properties. + This means that each option in this property may be selected at most once. This is not + implemented yet if the Property is not in ``do_not_create`` as well. + rjsf : bool, optional + If True, uiSchema definitions for react-jsonschema-forms will be output as the second return + value. Default is False. + wrap_files_in_objects : bool, optional + Whether (lists of) files should be wrapped into an array of objects that + have a file property. The sole purpose of this wrapping is to provide a + workaround for a `react-jsonschema-form + bug<https://github.com/rjsf-team/react-jsonschema-form/issues/3957>`_ so + only set this to True if you're using the exported schema with + react-json-form and you are experiencing the bug. Default is False. + + + Returns + ------- + schema : dict + A dict containing the json schema created from the given RecordType's properties. + + ui_schema : dict, optional + A ui schema. Only if a parameter asks for it (e.g. ``rjsf``). + """ + + exporter = JsonSchemaExporter( + additional_properties=additional_properties, + name_property_for_new_records=name_property_for_new_records, + description_property_for_new_records=description_property_for_new_records, + additional_options_for_text_props=additional_options_for_text_props, + additional_json_schema=additional_json_schema, + additional_ui_schema=additional_ui_schema, + units_in_description=units_in_description, + do_not_create=do_not_create, + do_not_retrieve=do_not_retrieve, + no_remote=no_remote, + multiple_choice=multiple_choice, + wrap_files_in_objects=wrap_files_in_objects + ) + return exporter.recordtype_to_json_schema(rt, rjsf=rjsf) + + +def make_array(schema: dict, rjsf_uischema: dict = None) -> Union[dict, Tuple[dict, dict]]: + """Create an array of the given schema. + +The result will look like this: + +.. code:: js + + { "type": "array", + "items": { + // the schema + } + } + +Parameters +---------- + +schema : dict + The JSON schema which shall be packed into an array. + +rjsf_uischema : dict, optional + A react-jsonschema-forms ui schema that shall be wrapped as well. + +Returns +------- + +schema : dict + A JSON schema dict with a top-level array which contains instances of the given schema. + +ui_schema : dict, optional + The wrapped ui schema. Only returned if ``rjsf_uischema`` is given as parameter. + """ + result = { + "type": "array", + "items": schema, + "$schema": "https://json-schema.org/draft/2020-12/schema", + } + + if rjsf_uischema is not None: + ui_schema = {"items": rjsf_uischema} + return result, ui_schema + return result + + +def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]], + rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None) -> ( + Union[dict, Tuple[dict, dict]]): + """Merge the given schemata into a single schema. + +The result will look like this: + +.. code:: js + + { + "type": "object", + "properties": { + // A, B, C + }, + "required": [ + // "A", "B", "C" + ], + "additionalProperties": false + } + + +Parameters +---------- + +schemas : dict[str, dict] | Iterable[dict] + A dict or iterable of schemata which shall be merged together. If this is a dict, the keys will + be used as property names, otherwise the titles of the submitted schemata. If they have no title, + numbers will be used as a fallback. Note that even with a dict, the original schema's "title" is + not changed. +rjsf_uischemas : dict[str, dict] | Iterable[dict], optional + If given, also merge the react-jsonschema-forms from this argument and return as the second return + value. If ``schemas`` is a dict, this parameter must also be a dict, if ``schemas`` is only an + iterable, this paramater must support numerical indexing. + +Returns +------- + +schema : dict + A JSON schema dict with a top-level object which contains the given schemata as properties. + +uischema : dict + If ``rjsf_uischemas`` was given, this contains the merged UI schemata. + """ + sub_schemas: dict[str, dict] = OrderedDict() + required = [] + ui_schema = None + + if isinstance(schemas, dict): + sub_schemas = schemas + required = [str(k) for k in schemas.keys()] + if rjsf_uischemas is not None: + if not isinstance(rjsf_uischemas, dict): + raise ValueError("Parameter `rjsf_uischemas` must be a dict, because `schemas` is " + f"as well, but it is a {type(rjsf_uischemas)}.") + ui_schema = {k: rjsf_uischemas[k] for k in schemas.keys()} + else: + for i, schema in enumerate(schemas, start=1): + title = schema.get("title", str(i)) + sub_schemas[title] = schema + required.append(title) + if rjsf_uischemas is not None: + if not isinstance(rjsf_uischemas, Sequence): + raise ValueError("Parameter `rjsf_uischemas` must be a sequence, because `schemas` " + f"is as well, but it is a {type(rjsf_uischemas)}.") + ui_schema = {} + for i, title in enumerate(sub_schemas.keys()): + ui_schema[title] = rjsf_uischemas[i] + # ui_schema = {"index": ui_schema} + + result = { + "type": "object", + "properties": sub_schemas, + "required": required, + "additionalProperties": False, + "$schema": "https://json-schema.org/draft/2020-12/schema", + } + + if ui_schema is not None: + return result, ui_schema + return result diff --git a/src/caosadvancedtools/loadFiles.py b/src/caosadvancedtools/loadFiles.py index 27d867c41a11ee4a6b08e7ffc9df677a3697eced..405b3d135c8af89e32c74015bd04f76f21828e20 100755 --- a/src/caosadvancedtools/loadFiles.py +++ b/src/caosadvancedtools/loadFiles.py @@ -175,8 +175,8 @@ def loadpath(path, include, exclude, prefix, dryrun, forceAllowSymlinks, caosdbi for f in files: totalsize += f.size - logger.info("Made in total {} new files with a combined size of {} " - "accessible.".format(len(files), convert_size(totalsize))) + logger.info( + f"Made new files accessible: {len(files)}, combined size: {convert_size(totalsize)} ") return diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index df2f3ad2244c24049830cb3c2f06d1def5b22e0c..266414893bcdf1ab45ee1345fc549e15f4a66250 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -29,8 +29,9 @@ from copy import deepcopy # remove this, when we drop support for old Python versions. from typing import List -import caosdb as db -from caosdb.apiutils import compare_entities, describe_diff +import linkahead as db +import linkahead.common.models as models +from linkahead.apiutils import compare_entities, describe_diff, merge_entities CAOSDB_INTERNAL_PROPERTIES = [ @@ -60,7 +61,8 @@ class DataModel(dict): different purpose (e.g. someone else's experiment). DataModel inherits from dict. The keys are always the names of the - entities. Thus you cannot have unnamed entities in your model. + entities. Thus you cannot have unnamed or ambiguously named entities in your + model. Example: @@ -261,3 +263,73 @@ class DataModel(dict): all_ents[prop.name] = prop return list(all_ents.values()) + + def get_deep(self, name: str, visited_props: dict = None, visited_parents: set = None): + """Attempt to resolve references for the given ``name``. + + The returned entity has all the properties it inherits from its ancestry and all properties + have the correct descriptions and datatypes. This methods only uses data which is available + in this DataModel, which acts kind of like a cache pool. + + Note that this may change this data model (subsequent "get" like calls may also return + deeper content.) + + """ + entity = self.get(name) + if not entity: + return entity + if not visited_props: + visited_props = {} + if not visited_parents: + visited_parents = set() + + importances = { + models.OBLIGATORY: 0, + models.RECOMMENDED: 1, + models.SUGGESTED: 2, + } + + for parent in list(entity.get_parents()): # Make a change-resistant list copy. + if parent.name in visited_parents: + continue + visited_parents.add(parent.name) + parent_importance = importances.get(parent._flags.get("inheritance"), 999) + if parent.name in self: + deep_parent = self.get_deep(parent.name, # visited_props=visited_props, + visited_parents=visited_parents + ) + + for prop in deep_parent.properties: + importance = importances[deep_parent.get_importance(prop.name)] + if (importance <= parent_importance + and prop.name not in [prop.name for prop in entity.properties]): + entity.add_property(prop) + else: + print(f"Referenced parent \"{parent.name}\" not found in data model.") + + for prop in list(entity.get_properties()): # Make a change-resistant list copy. + if prop.name in visited_props: + if visited_props[prop.name]: + deep_prop = visited_props[prop.name] + merge_entities(prop, deep_prop) + prop.datatype = deep_prop.datatype + prop.value = deep_prop.value + prop.unit = deep_prop.unit + continue + visited_props[prop.name] = None + if prop.name in self: + deep_prop = self.get_deep(prop.name, visited_props=visited_props, + visited_parents=visited_parents) + linked_prop = entity.get_property(prop) + if not linked_prop.datatype: + if deep_prop.role == "Property": + linked_prop.datatype = deep_prop.datatype + elif deep_prop.role == "RecordType": + linked_prop.datatype = deep_prop + if deep_prop.description: + linked_prop.description = deep_prop.description + visited_props[prop.name] = deep_prop + else: + print(f"Referenced property \"{prop.name}\" not found in data model.") + + return entity diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index b77b37669b27ee0d2ddf749eeae54915714e54ec..37f34e7bcbae48188c96b9bea6434d59571020fd 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -1,8 +1,8 @@ # This file is a part of the CaosDB Project. # -# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> # Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> -# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -42,12 +42,13 @@ import re import sys import yaml -from typing import List +from typing import List, Optional from warnings import warn import jsonschema -import caosdb as db +import linkahead as db +from linkahead.common.datatype import get_list_datatype from .data_model import CAOSDB_INTERNAL_PROPERTIES, DataModel # Keywords which are allowed in data model descriptions. @@ -82,23 +83,6 @@ JSON_SCHEMA_ATOMIC_TYPES = [ ] -def _get_listdatatype(dtype): - """matches a string to check whether the type definition is a list - - returns the type within the list or None, if it cannot be matched with a - list definition - """ - # TODO: string representation should be the same as used by the server: - # e.g. LIST<TEXT> - # this should be changed in the module and the old behavour should be - # marked as depricated - match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype) - - if match is None: - return None - else: - return match.group("dt") - # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by # https://stackoverflow.com/users/2572431/augurar @@ -140,46 +124,82 @@ class JsonSchemaDefinitionError(RuntimeError): super().__init__(msg) -def parse_model_from_yaml(filename): - """Shortcut if the Parser object is not needed.""" - parser = Parser() +def parse_model_from_yaml(filename, existing_model: Optional[dict] = None, debug: bool = False): + """Parse a data model from a YAML file. + +This is a convenience function if the Parser object is not needed, it calls +``Parser.parse_model_from_yaml(...)`` internally. + + +Parameters +---------- + +existing_model : dict, optional + An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ + parser = Parser(debug=debug) + + return parser.parse_model_from_yaml(filename, existing_model=existing_model) + - return parser.parse_model_from_yaml(filename) +def parse_model_from_string(string, existing_model: Optional[dict] = None, debug: bool = False): + """Parse a data model from a YAML string +This is a convenience function if the Parser object is not needed, it calls +``Parser.parse_model_from_string(...)`` internally. -def parse_model_from_string(string): - """Shortcut if the Parser object is not needed.""" - parser = Parser() +Parameters +---------- - return parser.parse_model_from_string(string) +existing_model : dict, optional + An existing model to which the created model shall be added. + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ + parser = Parser(debug=debug) + + return parser.parse_model_from_string(string, existing_model=existing_model) def parse_model_from_json_schema( filename: str, top_level_recordtype: bool = True, types_for_missing_array_items: dict = {}, - ignore_unspecified_array_items: bool = False + ignore_unspecified_array_items: bool = False, + existing_model: Optional[dict] = None ): """Return a datamodel parsed from a json schema definition. Parameters ---------- + filename : str The path of the json schema file that is to be parsed + top_level_recordtype : bool, optional Whether there is a record type defined at the top level of the schema. Default is true. + types_for_missing_array_items : dict, optional dictionary containing fall-back types for json entries with `type: array` but without `items` specification. Default is an empty dict. + ignore_unspecified_array_items : bool, optional Whether to ignore `type: array` entries the type of which is not specified by their `items` property or given in `types_for_missing_array_items`. An error is raised if they are not ignored. Default is False. + existing_model : dict, optional + An existing model to which the created model shall be added. Not implemented yet. + Returns ------- + out : Datamodel The datamodel generated from the input schema which then can be used for synchronizing with CaosDB. @@ -190,6 +210,9 @@ def parse_model_from_json_schema( about the limitations of the current implementation. """ + if existing_model is not None: + raise NotImplementedError("Adding to an existing model is not implemented yet.") + # @author Florian Spreckelsen # @date 2022-02-17 # @review Timm Fitschen 2023-05-25 @@ -199,15 +222,22 @@ def parse_model_from_json_schema( class Parser(object): - def __init__(self): + def __init__(self, debug: bool = False): """Initialize an empty parser object and initialize the dictionary of entities and the list of treated elements. +Parameters +---------- + +debug : bool, optional + If True, turn on miscellaneous debugging. Default is False. + """ self.model = {} self.treated = [] + self.debug = debug - def parse_model_from_yaml(self, filename): + def parse_model_from_yaml(self, filename, existing_model: Optional[dict] = None): """Create and return a data model from the given file. Parameters @@ -215,6 +245,9 @@ class Parser(object): filename : str The path to the YAML file. + existing_model : dict, optional + An existing model to which the created model shall be added. + Returns ------- out : DataModel @@ -223,9 +256,9 @@ class Parser(object): with open(filename, 'r') as outfile: ymlmodel = yaml.load(outfile, Loader=SafeLineLoader) - return self._create_model_from_dict(ymlmodel) + return self._create_model_from_dict(ymlmodel, existing_model=existing_model) - def parse_model_from_string(self, string): + def parse_model_from_string(self, string, existing_model: Optional[dict] = None): """Create and return a data model from the given YAML string. Parameters @@ -233,6 +266,9 @@ class Parser(object): string : str The YAML string. + existing_model : dict, optional + An existing model to which the created model shall be added. + Returns ------- out : DataModel @@ -240,9 +276,9 @@ class Parser(object): """ ymlmodel = yaml.load(string, Loader=SafeLineLoader) - return self._create_model_from_dict(ymlmodel) + return self._create_model_from_dict(ymlmodel, existing_model=existing_model) - def _create_model_from_dict(self, ymlmodel): + def _create_model_from_dict(self, ymlmodel, existing_model: Optional[dict] = None): """Create and return a data model out of the YAML dict `ymlmodel`. Parameters @@ -250,6 +286,9 @@ class Parser(object): ymlmodel : dict The dictionary parsed from a YAML file. + existing_model : dict, optional + An existing model to which the created model shall be added. + Returns ------- out : DataModel @@ -259,6 +298,9 @@ class Parser(object): if not isinstance(ymlmodel, dict): raise ValueError("Yaml file should only contain one dictionary!") + if existing_model is not None: + self.model.update(existing_model) + # Extern keyword: # The extern keyword can be used to include Properties and RecordTypes # from existing CaosDB datamodels into the current model. @@ -294,7 +336,12 @@ class Parser(object): self._check_and_convert_datatypes() for name, entity in ymlmodel.items(): - self._treat_entity(name, entity, line=ymlmodel["__line__"]) + try: + self._treat_entity(name, entity, line=ymlmodel["__line__"]) + except ValueError as err: + err_str = err.args[0].replace("invalid keyword:", + f"invalid keyword in line {entity['__line__']}:", 1) + raise ValueError(err_str, *err.args[1:]) from err return DataModel(self.model.values()) @@ -345,13 +392,12 @@ class Parser(object): if definition is None: return - if (self.model[name] is None - and isinstance(definition, dict) + if (self.model[name] is None and isinstance(definition, dict) # is it a property and "datatype" in definition # but not simply an RT of the model - and not (_get_listdatatype(definition["datatype"]) == name and - _get_listdatatype(definition["datatype"]) in self.model)): + and not (get_list_datatype(definition["datatype"]) == name and + get_list_datatype(definition["datatype"]) in self.model)): # and create the new property self.model[name] = db.Property(name=name, @@ -401,6 +447,9 @@ class Parser(object): raise YamlDefinitionError(line) from None raise + if self.debug and self.model[name] is not None: + self.model[name].__line__ = definition["__line__"] + def _add_to_recordtype(self, ent_name, props, importance): """Add properties to a RecordType. @@ -434,9 +483,9 @@ class Parser(object): n = self._stringify(n) if isinstance(e, dict): - if "datatype" in e and _get_listdatatype(e["datatype"]) is not None: + if "datatype" in e and get_list_datatype(e["datatype"]) is not None: # Reuse the existing datatype for lists. - datatype = db.LIST(_get_listdatatype(e["datatype"])) + datatype = db.LIST(get_list_datatype(e["datatype"])) else: # Ignore a possible e["datatype"] here if it's not a list # since it has been treated in the definition of the @@ -458,6 +507,9 @@ class Parser(object): def _inherit(self, name, prop, inheritance): if not isinstance(prop, list): + if isinstance(prop, str): + raise YamlDefinitionError( + f"Parents must be a list but is given as string: {name} > {prop}") raise YamlDefinitionError("Parents must be a list, error in line {}".format( prop["__line__"])) @@ -481,9 +533,13 @@ class Parser(object): if not isinstance(definition, dict): return - if ("datatype" in definition - and definition["datatype"].startswith("LIST")): + # These definition items must be handled even for list props. + for prop_name, prop in definition.items(): + if prop_name == "description": + self.model[name].description = prop + # For lists, everything else is not needed at this level. + if ("datatype" in definition and definition["datatype"].startswith("LIST")): return if name in self.treated: @@ -501,7 +557,8 @@ class Parser(object): self.model[name].value = prop elif prop_name == "description": - self.model[name].description = prop + # Handled above + continue elif prop_name == "recommended_properties": self._add_to_recordtype( @@ -575,15 +632,19 @@ class Parser(object): dtype = value.datatype is_list = False - if _get_listdatatype(value.datatype) is not None: - dtype = _get_listdatatype(value.datatype) + if get_list_datatype(dtype) is not None: + dtype = get_list_datatype(dtype) is_list = True - if dtype in self.model: + dtype_name = dtype + if not isinstance(dtype_name, str): + dtype_name = dtype.name + + if dtype_name in self.model: if is_list: - value.datatype = db.LIST(self.model[dtype]) + value.datatype = db.LIST(self.model[dtype_name]) else: - value.datatype = self.model[dtype] + value.datatype = self.model[dtype_name] continue @@ -605,7 +666,7 @@ class Parser(object): continue raise ValueError("Property {} has an unknown datatype: {}".format( - value.name, value.datatype)) + value.name, dtype_name)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ diff --git a/src/caosadvancedtools/table_export.py b/src/caosadvancedtools/table_export.py index 056207a76fa01357e2269cd4cb8e9a09905d5d90..eabb10754bdb93859dcc6ef3d3ff0838fa6ff6d4 100644 --- a/src/caosadvancedtools/table_export.py +++ b/src/caosadvancedtools/table_export.py @@ -27,6 +27,7 @@ them for an export as a table, e.g., for the export to metadata repositories. """ +from inspect import signature import json import logging @@ -83,7 +84,7 @@ class BaseTableExporter(object): ``` {"entry_to_be_exported: { "optional": True/False - "find_func": name of member function + "find_func": callable or name of member function "query": query string "selector": selector for the query "error": error explanation @@ -97,8 +98,8 @@ class BaseTableExporter(object): - optional: True or False, if not present, the entry is assumed to be mandatory. - find_func: name of the member function that returns the - value for this entry. Must not exist together with - `query` + value for this entry or callable object. Must not exist + together with `query` - query: Query string for finding the value for this entry. If this is given, a record must be given to the constructor of this class. The query is then executed as @@ -132,6 +133,7 @@ class BaseTableExporter(object): self._check_sanity_of_export_dict() self.raise_error_if_missing = raise_error_if_missing self.info = {} + self.all_keys = [key for key in self.export_dict] def collect_information(self): """Use the items of `export_dict` to collect the information for the @@ -139,7 +141,8 @@ class BaseTableExporter(object): """ - for e, d in self.export_dict.items(): + for e in self.all_keys: + d = self.export_dict[e] if QUERY in d: # TODO: How do we make this more general? There might # be queries that don't need the record or work with @@ -163,12 +166,15 @@ class BaseTableExporter(object): else: self._append_missing(e, d) elif FIND_FUNCTION in d: - find_fun = getattr(self, d[FIND_FUNCTION]) try: - self.info[e] = find_fun() + val = self._call_find_function(d[FIND_FUNCTION], e) + if val is not None: + self.info[e] = val + else: + self._append_missing(e, d) except Exception as exc: self._append_missing(e, d) - logger.debug(exc) + logger.error(exc) # last resort: check if record has e as property: else: try: @@ -200,6 +206,20 @@ class BaseTableExporter(object): else: logger.error(errmssg) + def _call_find_function(self, find_function, e): + if callable(find_function): + find_fun = find_function + else: + find_fun = getattr(self, find_function) + + sig = signature(find_fun) + params = sig.parameters + if len(params) > 1: + return find_fun(self.record, e) + elif len(params) > 0: + return find_fun(self.record) + return find_fun() + def prepare_csv_export(self, delimiter=',', print_header=False, skip_empty_optionals=False): """Return the values in self.info as a single-line string, separated @@ -238,7 +258,8 @@ class BaseTableExporter(object): if print_header: header = "" - for e, d in self.export_dict.items(): + for e in self.all_keys: + d = self.export_dict[e] if e in self.info: body += str(self.info[e]) + delimiter @@ -287,7 +308,9 @@ class BaseTableExporter(object): # check find function if present if FIND_FUNCTION in d: - if not hasattr(self, d[FIND_FUNCTION]): + if callable(d[FIND_FUNCTION]): + pass + elif not hasattr(self, d[FIND_FUNCTION]): raise TableExportError( "Find function " + d[FIND_FUNCTION] + " was specified for entry " + e + diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 8f793584051386796bce18bdbaded6c7e34c06ca..bae813b23195c93ccfd369a626424dd069164fb0 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -322,7 +322,7 @@ class TableImporter(): .. note:: If columns are integer, but should be float, this method converts the respective columns - in place. + in place. The same for columns that should have string value but have numeric value. Parameters ---------- @@ -338,9 +338,11 @@ class TableImporter(): # float, because CaosDB does not have different sizes anyway. col_dtype = df.dtypes[key] if not strict and not np.issubdtype(col_dtype, datatype): - issub = np.issubdtype # These special cases should be fine. - if issub(col_dtype, np.integer) and issub(datatype, np.floating): + if ((datatype == str) + or (np.issubdtype(col_dtype, np.integer) + and np.issubdtype(datatype, np.floating)) + ): # NOQA df[key] = df[key].astype(datatype) # Now check each element @@ -388,7 +390,8 @@ class TableImporter(): if key not in df.columns: continue - if pd.isnull(row.loc[key]): + null_check = pd.isnull(row.loc[key]) + if (isinstance(null_check, np.ndarray) and null_check.any()) or (not isinstance(null_check, np.ndarray) and null_check): errmsg = ( "Required information is missing ({}) in {}. row" " (without header) of " diff --git a/src/doc/conf.py b/src/doc/conf.py index 9db07d72a4178e2d09761aef752ebe13b20a8856..60630cf6c2c00aae2c9a48a328613e247e0a4015 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -23,13 +23,13 @@ import sphinx_rtd_theme # -- Project information ----------------------------------------------------- project = 'caosadvancedtools' -copyright = '2021, IndiScale GmbH' +copyright = '2023, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.8.0' +version = '0.9.0' # The full version, including alpha/beta/rc tags -release = '0.8.0' +release = '0.9.0' # -- General configuration --------------------------------------------------- diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst index 78ff4cdd6fee201c7ebe17977f497b84e9657aa2..ac3914385d31df5a306b3f8400fbcb6b005f17fa 100644 --- a/src/doc/yaml_interface.rst +++ b/src/doc/yaml_interface.rst @@ -125,7 +125,14 @@ You can use the yaml parser directly in python as follows: This creates a DataModel object containing all entities defined in the yaml file. -You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize +If the parsed data model shall be appended to a pre-exsting data model, the optional +``extisting_model`` can be used: + +.. code-block:: python + + new_model = parser.parse_model_from_yaml("model.yml", existing_model=old_model) + +You can now use the functions from ``DataModel`` to synchronize the model with a CaosDB instance, e.g.: .. code-block:: python diff --git a/tox.ini b/tox.ini index 0e9664c377366923b60e11d138825e1543cc8c7f..00548dea25c5017f1d0301a00a629c62d16631ef 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,7 @@ deps=nose openpyxl >= 3.0.7 xlrd == 1.2 h5py -commands=py.test --cov=caosadvancedtools -vv {posargs} +commands=py.test --cov=caosadvancedtools --cov-report=html:.tox/cov_html -vv {posargs} [flake8] max-line-length=100 diff --git a/unittests/model.yml b/unittests/models/model.yml similarity index 100% rename from unittests/model.yml rename to unittests/models/model.yml diff --git a/unittests/models/model_invalid.yml b/unittests/models/model_invalid.yml new file mode 100644 index 0000000000000000000000000000000000000000..c8368b9701db9b3461b7e0f1f3514c2411f56b56 --- /dev/null +++ b/unittests/models/model_invalid.yml @@ -0,0 +1,2 @@ +Project: + ObligatoryProperties: diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index 62e4b114a1c5cd8f3631f774637f3876c545afd3..e2f15ffdc7929fbd67aee37bccdb0f44cacef104 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -32,7 +32,7 @@ from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood, CMeal, get_entity_for_path) from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.example_cfood import ExampleCFood -from caosdb.common.models import _parse_single_xml_element +from linkahead.common.models import _parse_single_xml_element from lxml import etree from datetime import datetime, timezone diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 159adfca1d589bb092b6f59110828b5868401e25..cafeb6ca6a43d7e0409aee3352b43f26d5208732 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -2,6 +2,7 @@ import unittest import caosdb as db from caosadvancedtools.models.data_model import DataModel +from caosadvancedtools.models.parser import parse_model_from_string class DataModelTest(unittest.TestCase): @@ -33,3 +34,49 @@ class DataModelTest(unittest.TestCase): DataModel.sync_ids_by_name(l1, l2) assert l1["TestRecord"].id == rt.id assert l1["TestRecord2"].id < 0 + + def test_get_deep(self): + model_recursive_str = """ +RT1: + description: some description + obligatory_properties: + RT1: + """ + model_recursive = parse_model_from_string(model_recursive_str) + prop1 = model_recursive["RT1"].get_property("RT1") + assert prop1.datatype is None + # TODO The next line actually changes model_recursive in place, is this OK? + RT1 = model_recursive.get_deep("RT1") + assert model_recursive["RT1"] == RT1 + + model_unresolved_str = """ +RT1: + description: some description + obligatory_properties: + unresolved: + """ + model_unresolved = parse_model_from_string(model_unresolved_str) + rt1_unresolved = model_unresolved["RT1"] + prop_unresolved = model_unresolved.get_deep("unresolved") + assert prop_unresolved.datatype is None + rt1_deep = model_unresolved.get_deep("RT1") + assert rt1_deep == rt1_unresolved + assert rt1_deep is rt1_unresolved + + model_double_property = """ +p1: + description: Hello world + datatype: TEXT +RT1: + recommended_properties: + p1: +RT2: + recommended_properties: + RT1: + p1: +""" + model_unresolved = parse_model_from_string(model_double_property) + rt2_deep = model_unresolved.get_deep("RT2") + p1 = rt2_deep.get_property("p1") + assert p1.datatype == "TEXT" + assert p1.description == "Hello world" diff --git a/unittests/test_h5.py b/unittests/test_h5.py index 360d4b28938492d0f2af6d696e39dffb1cc3fead..961dd4246ef4b02208226ada5d3e1389133ddbcc 100644 --- a/unittests/test_h5.py +++ b/unittests/test_h5.py @@ -1,8 +1,8 @@ import unittest from tempfile import NamedTemporaryFile -import caosdb as db -import caosdb.apiutils +import linkahead as db +import linkahead.apiutils import h5py import numpy as np from caosadvancedtools.cfoods import h5 @@ -77,8 +77,8 @@ class H5CFoodTest(unittest.TestCase): # TODO this does probably break the code: The function will not be # restored correctly. # Change it to use the BaseMockUpTest - real_retrieve = caosdb.apiutils.retrieve_entity_with_id - caosdb.apiutils.retrieve_entity_with_id = dummy_get + real_retrieve = linkahead.apiutils.retrieve_entity_with_id + linkahead.apiutils.retrieve_entity_with_id = dummy_get # should run without problem h5.collect_existing_structure(db.Record(), db.Record(id=234), h5.EntityMapping()) @@ -151,7 +151,7 @@ class H5CFoodTest(unittest.TestCase): self.assertEqual(em.to_existing[r_child2._cuid], ENTS[101]) self.assertEqual(em.to_target[101], r_child2) - caosdb.apiutils.retrieve_entity_with_id = real_retrieve + linkahead.apiutils.retrieve_entity_with_id = real_retrieve def test_h5_attr_to_property(self): @@ -160,7 +160,8 @@ class H5CFoodTest(unittest.TestCase): test_float = np.float_(1.0) test_str = "Test" test_complex: complex = 2+3j - self.assertRaises(NotImplementedError, h5_attr_to_property, test_int) # only numpy-integers processed? + self.assertRaises(NotImplementedError, h5_attr_to_property, + test_int) # only numpy-integers processed? self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(test_integer)) self.assertTupleEqual((1.0, db.DOUBLE), h5_attr_to_property(test_float)) self.assertTupleEqual(("Test", db.TEXT), h5_attr_to_property(test_str)) @@ -187,4 +188,5 @@ class H5CFoodTest(unittest.TestCase): # Test scalar values given as np.array self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1))) self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123))) - self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World"))) + self.assertTupleEqual(('Hello World', db.TEXT), + h5_attr_to_property(np.array("Hello World"))) diff --git a/unittests/test_json_schema_exporter.py b/unittests/test_json_schema_exporter.py new file mode 100644 index 0000000000000000000000000000000000000000..f0503385a25eb89e66dd3518d71a32b91d07bf88 --- /dev/null +++ b/unittests/test_json_schema_exporter.py @@ -0,0 +1,1049 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +"""Tests the Json schema exporter.""" + +import json + +import linkahead as db +import caosadvancedtools.json_schema_exporter as jsex + +from collections import OrderedDict + +from jsonschema import FormatChecker, validate, ValidationError +from pytest import raises +from unittest.mock import Mock, patch + +from caosadvancedtools.json_schema_exporter import recordtype_to_json_schema as rtjs +from caosadvancedtools.models.parser import parse_model_from_string + +GLOBAL_MODEL = parse_model_from_string(""" +RT1: + description: some description + obligatory_properties: + some_date: + datatype: DATETIME + description: Just some date +RT21: + obligatory_properties: + RT1: + datatype: LIST<RT1> +RT31: + obligatory_properties: + RT1: + +""") + +RT1 = GLOBAL_MODEL.get_deep("RT1") +RT21 = GLOBAL_MODEL.get_deep("RT21") +RT31 = GLOBAL_MODEL.get_deep("RT31") + + +def _mock_execute_query(query_string, unique=False, **kwargs): + """Mock the response to queries for references.""" + all_records = db.Container() + all_files = db.Container() + other_type_rt = db.RecordType(name="OtherType") + other_type_rt.add_property(name="IntegerProp", datatype=db.INTEGER, importance=db.OBLIGATORY) + other_type_records = db.Container().extend([ + db.Record(id=100, name="otherA").add_parent(other_type_rt), + db.Record(id=101, name="otherB").add_parent(other_type_rt), + db.Record(id=102).add_parent(other_type_rt) + ]) + all_records.extend(other_type_records) + + referencing_type_rt = db.RecordType(name="ReferencingType") + referencing_type_rt.add_property(name=other_type_rt.name, datatype=db.LIST(other_type_rt.name)) + referencing_type_records = db.Container().extend([ + db.Record(id=103).add_parent(referencing_type_rt), + db.Record(id=104, name="referencing").add_parent(referencing_type_rt) + ]) + all_records.extend(referencing_type_records) + + all_files.append(db.File(id=105, name="GenericFile.txt")) + + if query_string == "SELECT name, id FROM RECORD 'OtherType'": + return other_type_records + elif query_string == "FIND RECORDTYPE WITH name='OtherType'" and unique is True: + return other_type_rt + elif query_string == "SELECT name, id FROM RECORD 'ReferencingType'": + return referencing_type_records + elif query_string == "FIND RECORDTYPE WITH name='ReferencingType'" and unique is True: + return referencing_type_rt + elif query_string == "SELECT name, id FROM RECORD 'RT1'": + return referencing_type_records # wrong types, but who cares for the test? + elif query_string == "FIND RECORDTYPE WITH name='RT1'" and unique is True: + return RT1 + elif query_string == "FIND RECORDTYPE WITH name='RT21'" and unique is True: + return RT21 + elif query_string == "FIND RECORDTYPE WITH name='RT31'" and unique is True: + return RT31 + elif query_string == "SELECT name, id FROM RECORD": + return all_records + elif query_string == "SELECT name, id FROM FILE": + return all_files + else: + print(f"Query string: {query_string}") + if unique is True: + return db.Entity() + return db.Container() + + +def test_empty_rt(): + + rt = db.RecordType(name="Test", description="descr") + + schema = rtjs(rt) + + assert schema["title"] == rt.name + assert schema["description"] == rt.description + assert len(schema["properties"]) == 0 + assert len(schema["required"]) == 0 + assert schema["additionalProperties"] is True + + schema = rtjs(rt, additional_properties=False) + + assert schema["title"] == rt.name + assert schema["description"] == rt.description + assert len(schema["properties"]) == 0 + assert len(schema["required"]) == 0 + assert schema["additionalProperties"] is False + + schema = rtjs(rt, name_property_for_new_records=True, + description_property_for_new_records=True) + + assert len(schema["properties"]) == 2 + assert "name" in schema["properties"] + assert "description" in schema["properties"] + assert schema["properties"]["name"]["type"] == "string" + assert schema["properties"]["description"]["type"] == "string" + + +def test_rt_with_scalar_props(): + + rt = db.RecordType(name="Test") + rt.add_property(name="SimpleText", datatype=db.TEXT, description="This is a simple text") + rt.add_property(name="ObligatoryDatetime", datatype=db.DATETIME, importance=db.OBLIGATORY) + rt.add_property(name="JustDateNoTime", datatype=db.DATETIME, description="Only dates, no times") + rt.add_property(name="ObligatoryInteger", datatype=db.INTEGER, importance=db.OBLIGATORY) + rt.add_property(name="Double", datatype=db.DOUBLE) + # Suggested shouldn't influence the result in any way. + rt.add_property(name="Boolean", datatype=db.BOOLEAN, importance=db.SUGGESTED) + + schema = rtjs(rt, additional_options_for_text_props={"JustDateNoTime": {"format": "date"}}) + + assert "properties" in schema + props = schema["properties"] + assert len(props) == 6 + assert "required" in schema + assert len(schema["required"]) == 2 + assert "ObligatoryDatetime" in schema["required"] + assert "ObligatoryInteger" in schema["required"] + + assert "SimpleText" in props + assert props["SimpleText"]["type"] == "string" + assert "format" not in props["SimpleText"] + assert "description" in props["SimpleText"] + assert props["SimpleText"]["description"] == "This is a simple text" + + assert "ObligatoryDatetime" in props + assert "type" not in props["ObligatoryDatetime"] + assert "anyOf" in props["ObligatoryDatetime"] + assert len(props["ObligatoryDatetime"]["anyOf"]) == 2 + date_found = 0 + datetime_found = 0 + for option in props["ObligatoryDatetime"]["anyOf"]: + assert option["type"] == "string" + fmt = option["format"] + if fmt == "date": + date_found += 1 + if fmt == "date-time": + datetime_found += 1 + assert date_found == 1 + assert datetime_found == 1 + + assert "JustDateNoTime" in props + assert props["JustDateNoTime"]["type"] == "string" + assert "anyOf" not in props["JustDateNoTime"] + assert "pattern" not in props["JustDateNoTime"] + assert props["JustDateNoTime"]["format"] == "date" + assert props["JustDateNoTime"]["description"] == "Only dates, no times" + + assert "ObligatoryInteger" in props + assert props["ObligatoryInteger"]["type"] == "integer" + + assert "Double" in props + assert props["Double"]["type"] == "number" + + assert "Boolean" in props + assert props["Boolean"]["type"] == "boolean" + + # test validation (we turst the jsonschema.validat function, so only test + # some more or less tricky cases with format or required). + example = { + "SimpleText": "something", + "ObligatoryInteger": 23, + "ObligatoryDatetime": "1900-01-01T12:34:56.0Z", + "JustDateNoTime": "2023-10-13" + } + + # We need to explicitly enable the FormatChecker, otherwise format will be + # ignored + # (https://python-jsonschema.readthedocs.io/en/latest/validate/#validating-formats) + validate(example, schema, format_checker=FormatChecker()) + + example = { + "SimpleText": "something", + "ObligatoryInteger": 23, + "ObligatoryDatetime": "1900-01-01", + "JustDateNoTime": "2023-10-13" + } + validate(example, schema, format_checker=FormatChecker()) + + example = { + "SimpleText": "something", + "ObligatoryDatetime": "1900-01-01T12:34:56.0Z", + "JustDateNoTime": "2023-10-13" + } + + with raises(ValidationError): + # required missing + validate(example, schema, format_checker=FormatChecker()) + + example = { + "SimpleText": "something", + "ObligatoryInteger": 23, + "ObligatoryDatetime": "1900-01-01T12:34:56.0Z", + "JustDateNoTime": "2023-10-13T23:59:59.123Z" + } + + with raises(ValidationError): + # date expected in JustDateNoTime, but datetime given + validate(example, schema, format_checker=FormatChecker()) + + +def test_units(): + + rt = db.RecordType() + rt.add_property(name="ScalarWithUnit", datatype=db.DOUBLE, unit="m") + rt.add_property(name="ListWithUnit", description="This is a list.", + datatype=db.LIST(db.DOUBLE), unit="m") + + schema = rtjs(rt, units_in_description=True) + + props = schema["properties"] + assert "ScalarWithUnit" in props + assert props["ScalarWithUnit"]["type"] == "number" + assert "description" in props["ScalarWithUnit"] + assert props["ScalarWithUnit"]["description"] == "Unit is m." + assert "unit" not in props["ScalarWithUnit"] + + assert "ListWithUnit" in props + assert props["ListWithUnit"]["type"] == "array" + assert "items" in props["ListWithUnit"] + assert props["ListWithUnit"]["items"]["type"] == "number" + assert "description" in props["ListWithUnit"] + assert props["ListWithUnit"]["description"] == "This is a list. Unit is m." + assert "unit" not in props["ListWithUnit"] + + schema = rtjs(rt, units_in_description=False) + + props = schema["properties"] + assert "ScalarWithUnit" in props + assert props["ScalarWithUnit"]["type"] == "number" + assert "description" not in props["ScalarWithUnit"] + assert "unit" in props["ScalarWithUnit"] + assert props["ScalarWithUnit"]["unit"] == "m" + + assert "ListWithUnit" in props + assert props["ListWithUnit"]["type"] == "array" + assert "items" in props["ListWithUnit"] + assert props["ListWithUnit"]["items"]["type"] == "number" + assert "description" in props["ListWithUnit"] + assert props["ListWithUnit"]["description"] == "This is a list." + assert "unit" in props["ListWithUnit"] + assert props["ListWithUnit"]["unit"] == "m" + + +def test_rt_with_list_props(): + + rt = db.RecordType() + rt.add_property(name="ListOfIntegers", datatype=db.LIST( + db.INTEGER), description="List of integers") + rt.add_property(name="ListOfPatterns", datatype=db.LIST(db.TEXT)) + + schema = rtjs(rt, additional_options_for_text_props={"ListOfPatterns": {"pattern": "[A-Z]+"}}) + + props = schema["properties"] + + assert "ListOfIntegers" in props + assert props["ListOfIntegers"]["type"] == "array" + assert "items" in props["ListOfIntegers"] + assert props["ListOfIntegers"]["items"]["type"] == "integer" + assert "description" not in props["ListOfIntegers"]["items"] + assert props["ListOfIntegers"]["description"] == "List of integers" + + assert "ListOfPatterns" in props + assert props["ListOfPatterns"]["type"] == "array" + assert "items" in props["ListOfPatterns"] + assert props["ListOfPatterns"]["items"]["type"] == "string" + assert props["ListOfPatterns"]["items"]["pattern"] == "[A-Z]+" + + # Validation + example = { + "ListOfIntegers": [1, 2, 3], + "ListOfPatterns": ["A", "BB", "CCC"] + } + validate(example, schema, format_checker=FormatChecker()) + + example = { + "ListOfIntegers": 1, + "ListOfPatterns": ["A", "BB", "CCC"] + } + with raises(ValidationError): + # No list + validate(example, schema, format_checker=FormatChecker()) + + example = { + "ListOfIntegers": [1, 2, 3], + "ListOfPatterns": ["A", "bb", "CCC"] + } + with raises(ValidationError): + # Pattern doesn't match + validate(example, schema, format_checker=FormatChecker()) + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_rt_with_references(): + + rt = db.RecordType() + rt.add_property(name="RefProp", datatype=db.REFERENCE) + + schema = rtjs(rt) + props = schema["properties"] + assert "RefProp" in props + assert "enum" in props["RefProp"] + assert isinstance(props["RefProp"]["enum"], list) + assert len(props["RefProp"]["enum"]) == len( + db.execute_query("SELECT name, id FROM RECORD")) + len( + db.execute_query("SELECT name, id FROM FILE")) + assert "oneOf" not in props["RefProp"] + + example = { + "RefProp": "otherB" + } + validate(example, schema) + example = { + "RefProp": "I don't exist" + } + with raises(ValidationError): + # Wrong enum value + validate(example, schema) + example = { + "RefProp": { + "IntegerProp": 12 + } + } + with raises(ValidationError): + # Can't have objects in generic references + validate(example, schema) + + rt = db.RecordType() + rt.add_property(name="RefProp", datatype="OtherType") + rt.add_property(name="OtherTextProp", datatype=db.TEXT) + + schema = rtjs(rt) + props = schema["properties"] + assert "RefProp" in props + assert "oneOf" in props["RefProp"] + assert len(props["RefProp"]["oneOf"]) == 2 + enum_index = 0 + if "enum" not in props["RefProp"]["oneOf"][enum_index]: + # We can't really require the order here, so we just know that one of + # the two elements must be the enum, the other the object. + enum_index = 1 - enum_index + assert "enum" in props["RefProp"]["oneOf"][enum_index] + assert isinstance(props["RefProp"]["oneOf"][enum_index]["enum"], list) + assert len(props["RefProp"]["oneOf"][enum_index]["enum"]) == 3 + assert "otherA" in props["RefProp"]["oneOf"][enum_index]["enum"] + assert "otherB" in props["RefProp"]["oneOf"][enum_index]["enum"] + assert "102" in props["RefProp"]["oneOf"][enum_index]["enum"] + # the other element of oneOf is the OtherType object + assert props["RefProp"]["oneOf"][1 - enum_index]["type"] == "object" + other_props = props["RefProp"]["oneOf"][1 - enum_index]["properties"] + assert "IntegerProp" in other_props + assert other_props["IntegerProp"]["type"] == "integer" + assert "required" in props["RefProp"]["oneOf"][1 - enum_index] + assert len(props["RefProp"]["oneOf"][1 - enum_index]["required"]) == 1 + assert "IntegerProp" in props["RefProp"]["oneOf"][1 - enum_index]["required"] + # The other prop also works as before + assert "OtherTextProp" in props + assert props["OtherTextProp"]["type"] == "string" + + example = { + "RefProp": { + "IntegerProp": 12 + } + } + validate(example, schema) + + example = { + "RefProp": "otherB", + "OtherTextProp": "something" + } + validate(example, schema) + + rt = db.RecordType(name="TestType", description="Some description") + rt.add_property(name="RefProp", datatype=db.LIST(db.REFERENCE), + description="I'm a list of references.") + + schema = rtjs(rt) + assert schema["title"] == rt.name + assert schema["description"] == rt.description + assert "RefProp" in schema["properties"] + ref_prop = schema["properties"]["RefProp"] + assert ref_prop["type"] == "array" + assert "description" in ref_prop + assert ref_prop["description"] == "I'm a list of references." + assert "items" in ref_prop + items = ref_prop["items"] + assert "enum" in items + assert isinstance(items["enum"], list) + assert len(items["enum"]) == len( + db.execute_query("SELECT name, id FROM RECORD")) + len( + db.execute_query("SELECT name, id FROM FILE")) + assert "oneOf" not in items + assert "description" not in items + + example = { + "RefProp": "otherB" + } + with raises(ValidationError): + # Should be list but isn't + validate(example, schema) + example = { + "RefProp": ["otherB"] + } + validate(example, schema) + example = { + "RefProp": ["otherB", "102", "referencing"] + } + validate(example, schema) + + rt = db.RecordType() + rt.add_property(name="RefProp", datatype=db.LIST("OtherType")) + + schema = rtjs(rt, additional_properties=False, name_property_for_new_records=True, + description_property_for_new_records=True) + assert schema["additionalProperties"] is False + assert "name" in schema["properties"] + assert schema["properties"]["name"]["type"] == "string" + assert "description" in schema["properties"] + assert schema["properties"]["description"]["type"] == "string" + assert "RefProp" in schema["properties"] + assert schema["properties"]["RefProp"]["type"] == "array" + assert "additionalProperties" not in schema["properties"]["RefProp"] + assert "items" in schema["properties"]["RefProp"] + items = schema["properties"]["RefProp"]["items"] + assert "oneOf" in items + assert len(items["oneOf"]) == 2 + # same as above, we can't rely on the order + enum_index = 0 + if "enum" not in items["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert "enum" in items["oneOf"][enum_index] + assert isinstance(items["oneOf"][enum_index]["enum"], list) + assert len(items["oneOf"][enum_index]["enum"]) == 3 + assert "otherA" in items["oneOf"][enum_index]["enum"] + assert "otherB" in items["oneOf"][enum_index]["enum"] + assert "102" in items["oneOf"][enum_index]["enum"] + other_type = items["oneOf"][1 - enum_index] + assert other_type["type"] == "object" + assert other_type["additionalProperties"] is False + assert "IntegerProp" in other_type["properties"] + assert len(other_type["required"]) == 1 + assert "IntegerProp" in other_type["required"] + + example = { + "RefProp": ["otherB", "102", "referencing"] + } + with raises(ValidationError): + # Wrong value in enum + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12}] + } + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12, "additionalProperty": "something"}] + } + with raises(ValidationError): + # we have additional_properties=False which propagates to subschemas + validate(example, schema) + example = { + "RefProp": [{"IntegerProp": 12}, "otherB"] + } + validate(example, schema) + + rt = db.RecordType(name="ReferenceofReferencesType") + rt.add_property(name="RefRefProp", datatype="ReferencingType") + + schema = rtjs(rt) + + assert "RefRefProp" in schema["properties"] + ref_ref = schema["properties"]["RefRefProp"] + assert "oneOf" in ref_ref + assert len(ref_ref["oneOf"]) == 2 + enum_index = 0 + if "enum" not in ref_ref["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert len(ref_ref["oneOf"][enum_index]["enum"]) == 2 + assert "103" in ref_ref["oneOf"][enum_index]["enum"] + assert "referencing" in ref_ref["oneOf"][enum_index]["enum"] + assert ref_ref["oneOf"][1 - enum_index]["type"] == "object" + assert "OtherType" in ref_ref["oneOf"][1 - enum_index]["properties"] + assert ref_ref["oneOf"][1 - enum_index]["properties"]["OtherType"]["type"] == "array" + items = ref_ref["oneOf"][1 - enum_index]["properties"]["OtherType"]["items"] + assert "oneOf" in items + assert len(items["oneOf"]) == 2 + # same as above, we can't rely on the order + enum_index = 0 + if "enum" not in items["oneOf"][enum_index]: + enum_index = 1 - enum_index + assert "enum" in items["oneOf"][enum_index] + assert isinstance(items["oneOf"][enum_index]["enum"], list) + assert len(items["oneOf"][enum_index]["enum"]) == 3 + assert "otherA" in items["oneOf"][enum_index]["enum"] + assert "otherB" in items["oneOf"][enum_index]["enum"] + assert "102" in items["oneOf"][enum_index]["enum"] + other_type = items["oneOf"][1 - enum_index] + assert other_type["type"] == "object" + assert "IntegerProp" in other_type["properties"] + assert len(other_type["required"]) == 1 + assert "IntegerProp" in other_type["required"] + + example = { + "RefRefProp": { + "OtherType": [ + "otherA", + {"IntegerProp": 12} + ] + } + } + validate(example, schema) + + # Single file and multiple files + rt = db.RecordType() + rt.add_property(name="FileProp", datatype=db.FILE) + + schema = rtjs(rt) + assert schema["properties"]["FileProp"]["type"] == "string" + assert schema["properties"]["FileProp"]["format"] == "data-url" + + # wrap in array (cf. https://github.com/rjsf-team/react-jsonschema-form/issues/3957) + schema = rtjs(rt, wrap_files_in_objects=True) + assert schema["properties"]["FileProp"]["type"] == "array" + assert schema["properties"]["FileProp"]["maxItems"] == 1 + assert "items" in schema["properties"]["FileProp"] + items = schema["properties"]["FileProp"]["items"] + assert items["type"] == "object" + assert len(items["required"]) == 1 + assert "file" in items["required"] + assert items["additionalProperties"] is False + assert len(items["properties"]) == 1 + assert "file" in items["properties"] + assert items["properties"]["file"]["type"] == "string" + assert items["properties"]["file"]["format"] == "data-url" + + rt = db.RecordType() + rt.add_property(name="FileProp", datatype=db.LIST(db.FILE)) + + schema = rtjs(rt) + assert schema["properties"]["FileProp"]["type"] == "array" + assert schema["properties"]["FileProp"]["items"]["type"] == "string" + assert schema["properties"]["FileProp"]["items"]["format"] == "data-url" + + # wrap in array (cf. https://github.com/rjsf-team/react-jsonschema-form/issues/3957) + print(schema) + schema = rtjs(rt, wrap_files_in_objects=True) + assert schema["properties"]["FileProp"]["type"] == "array" + assert "maxItems" not in schema["properties"]["FileProp"] + assert "items" in schema["properties"]["FileProp"] + items = schema["properties"]["FileProp"]["items"] + assert items["type"] == "object" + assert len(items["required"]) == 1 + assert "file" in items["required"] + assert items["additionalProperties"] is False + assert len(items["properties"]) == 1 + assert "file" in items["properties"] + assert items["properties"]["file"]["type"] == "string" + assert items["properties"]["file"]["format"] == "data-url" + + +def test_broken(): + + rt = db.RecordType() + rt.add_property(name="something", datatype=None) + + with raises(ValueError) as ve: + + rtjs(rt) + assert str(ve).startswith("Unknown or no property datatype.") + + rt = db.RecordType() + rt.add_property(name="MultiProp", datatype=db.INTEGER) + rt.add_property(name="MultiProp", datatype=db.INTEGER) + + with raises(NotImplementedError) as nie: + + rtjs(rt) + assert "MultiProp" in str(nie) + assert str(nie).startswith("Creating a schema for multi-properties is not specified.") + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_reference_options(): + """Testing miscellaneous options. + """ + + model_str = """ +RT1: + description: some description + obligatory_properties: + some_date: + datatype: DATETIME + description: Just some date +RT2: + obligatory_properties: + RT1: + +RT3: + obligatory_properties: + RT1_prop: + datatype: RT1 + description: property description + """ + model = parse_model_from_string(model_str) + # First test: without reference + rt1_dict = rtjs(model.get_deep("RT1")) + assert json.dumps(rt1_dict, indent=2) == """{ + "type": "object", + "required": [ + "some_date" + ], + "additionalProperties": true, + "description": "some description", + "title": "RT1", + "properties": { + "some_date": { + "description": "Just some date", + "anyOf": [ + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" +}""" + # Second test: with reference + rt2_deep = model.get_deep("RT2") + rt2_dict = rtjs(rt2_deep) + assert json.dumps(rt2_dict, indent=2) == """{ + "type": "object", + "required": [ + "RT1" + ], + "additionalProperties": true, + "title": "RT2", + "properties": { + "RT1": { + "description": "some description", + "oneOf": [ + { + "title": "Existing entries", + "enum": [ + "103", + "referencing" + ] + }, + { + "type": "object", + "required": [ + "some_date" + ], + "additionalProperties": true, + "description": "some description", + "title": "Create new", + "properties": { + "some_date": { + "description": "Just some date", + "anyOf": [ + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + } + } + } + ] + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" +}""" + + # Third test: Reference prop shall be only existing references, no option to create new ones. + rt2_dict = rtjs(model.get_deep("RT2"), do_not_create=["RT1"]) + assert json.dumps(rt2_dict, indent=2) == """{ + "type": "object", + "required": [ + "RT1" + ], + "additionalProperties": true, + "title": "RT2", + "properties": { + "RT1": { + "description": "some description", + "enum": [ + "103", + "referencing" + ] + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" +}""" + # No effect of do_not_create (real property name should be used) + rt3_dict = rtjs(model.get_deep("RT3"), do_not_create=["RT1"]) + rt1_prop = rt3_dict["properties"]["RT1_prop"] + assert rt1_prop["description"] == "property description" + assert "oneOf" in rt1_prop.keys() + assert "enum" not in rt1_prop.keys() + + # Now we use the real property name + rt3_dict = rtjs(model.get_deep("RT3"), do_not_create=["RT1_prop"]) + rt1_prop = rt3_dict["properties"]["RT1_prop"] + assert rt1_prop["description"] == "property description" + assert "oneOf" not in rt1_prop.keys() + assert "enum" in rt1_prop.keys() + assert rt1_prop["enum"][0] == "103" + + +def test_schema_modification(): + """Testing functions which modify json schema dicts: + +- make_array() +- merge_schemas(). + """ + + model_str = """ +some_date: + datatype: DATETIME +RT1: + obligatory_properties: + some_date: + +some_text: + datatype: TEXT +RT2: + obligatory_properties: + some_text: + """ + model = parse_model_from_string(model_str) + schema_RT1 = rtjs(model.get_deep("RT1"), additional_properties=False) + schema_RT2 = rtjs(model.get_deep("RT2"), additional_properties=False) + + # Merge the schemata + merged_list = jsex.merge_schemas([schema_RT1, schema_RT2]) + with raises(ValidationError): + validate({}, merged_list) + assert merged_list["type"] == "object" + assert merged_list["properties"]["RT1"]["title"] == "RT1" + assert merged_list["properties"]["RT2"]["properties"]["some_text"]["type"] == "string" + + merged_dict = jsex.merge_schemas({"schema1": schema_RT1, "schema2": schema_RT2}) + with raises(ValidationError): + validate({}, merged_dict) + assert merged_dict["type"] == "object" + assert merged_dict["properties"]["schema1"]["title"] == "RT1" + assert merged_dict["properties"]["schema2"]["properties"]["some_text"]["type"] == "string" + + # Make an array + array = jsex.make_array(schema_RT1) + with raises(ValidationError): + validate({}, array) + assert array["type"] == "array" + assert array["items"] == schema_RT1 + + +def test_inheritance(): + """Test data models with inherited properties.""" + model_str = """ +some_date: + datatype: DATETIME +RT1: + obligatory_properties: + some_date: +RT2: + inherit_from_suggested: + - RT1 + """ + model = parse_model_from_string(model_str) + rt2_deep = model.get_deep("RT2") + assert "some_date" in [prop.name for prop in rt2_deep.properties] + + model_str = """ +RT1: + obligatory_properties: + RT2: +RT2: + inherit_from_suggested: + - RT1 +RT3: + inherit_from_suggested: + - RT4 +RT4: + inherit_from_suggested: + - RT3 +RT5: + inherit_from_suggested: + - RT5 + """ + model = parse_model_from_string(model_str) + # This must not lead to an infinite recursion + rt1_deep = model.get_deep("RT1") + rt2_deep = model.get_deep("RT2") + assert rt2_deep.get_property("RT2").name == rt1_deep.get_property("RT2").name + rt3_deep = model.get_deep("RT3") + assert rt3_deep.get_parents()[0].name == "RT4" + rt4_deep = model.get_deep("RT4") + assert rt4_deep.get_parents()[0].name == "RT3" + rt5_deep = model.get_deep("RT5") + assert rt5_deep.get_parents()[0].name == "RT5" + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_empty_retrieve(): + """Special case: ``do_not_retrieve`` is set, or the retrieve result is empty.""" + model_str = """ +RT1: + description: Some text. +RT2: + obligatory_properties: + RT1: +# some_text: +# datatype: TEXT +NoRecords: + description: A RecordType without Records. + recommended_properties: + some_text: + datatype: TEXT +RT3: + obligatory_properties: + NoRecords: + """ + model = parse_model_from_string(model_str) + schema_default = rtjs(model.get_deep("RT2")) + assert "oneOf" in schema_default["properties"]["RT1"] + assert any([el.get("title") == "Existing entries" for el in + schema_default["properties"]["RT1"]["oneOf"]]) + + schema_noexist = rtjs(model.get_deep("RT3")) + assert schema_noexist["properties"]["NoRecords"].get("type") == "object" + + schema_noexist_noremote = rtjs(model.get_deep("RT3"), no_remote=True) + assert schema_noexist_noremote["properties"]["NoRecords"].get("type") == "object" + assert (schema_noexist_noremote["properties"]["NoRecords"].get("properties") + == OrderedDict([('some_text', {'type': 'string'})])) + + uischema = {} + schema_noexist_noretrieve = rtjs(model.get_deep("RT2"), do_not_retrieve=["RT1"], + rjsf=uischema) + assert schema_noexist_noretrieve["properties"]["RT1"].get("type") == "object" + assert "some_date" in schema_noexist_noretrieve["properties"]["RT1"].get("properties") + assert not uischema + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_multiple_choice(): + """Multiple choice is mostyly a matter of UI.""" + model_str = """ +RT1: +RT21: + obligatory_properties: + RT1: + datatype: LIST<RT1> +RT3: + obligatory_properties: + RT21: +RT4: + obligatory_properties: + RT21: + datatype: LIST<RT21> + """ + model = parse_model_from_string(model_str) + # generate a multiple choice, in first level + schema, uischema = rtjs(model.get_deep("RT21"), additional_properties=False, + do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) + assert schema["properties"]["RT1"]["uniqueItems"] is True + assert str(uischema) == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}" + + # second level + schema, uischema = rtjs(model.get_deep("RT3"), additional_properties=False, + do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) + assert schema["properties"]["RT21"]["properties"]["RT1"]["uniqueItems"] is True + assert (str(uischema) + == "{'RT21': {'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}}") + + # second level with lists + schema, uischema = rtjs(model.get_deep("RT4"), additional_properties=False, + do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) + assert schema["properties"]["RT21"]["items"]["properties"]["RT1"]["uniqueItems"] is True + assert (str(uischema) == + "{'RT21': {'items': {'RT1': {'ui:widget': 'checkboxes', " + "'ui:inline': True}}}}") + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_uischema(): + model_str = """ +RT1: +RT2: + obligatory_properties: + RT1: + datatype: LIST<RT1> +RT3: + obligatory_properties: + RT1: + datatype: LIST<RT1> + """ + model = parse_model_from_string(model_str) + schema_2, uischema_2 = rtjs(model.get_deep("RT2"), additional_properties=False, + do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) + schema_3, uischema_3 = rtjs(model.get_deep("RT3"), additional_properties=False, + do_not_create=["RT1"], multiple_choice=["RT1"], rjsf=True) + + # Merging ################################################################# + # Using dictionaries + schemas_dict = {"schema_2": schema_2, "schema_3": schema_3} + uischemas_dict = {"schema_2": uischema_2, "schema_3": uischema_3} + merged_dict, merged_dict_ui = jsex.merge_schemas(schemas_dict, uischemas_dict) + assert merged_dict_ui["schema_2"] == merged_dict_ui["schema_3"] + assert (str(merged_dict_ui["schema_2"]) + == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}") + + # Using lists + schemas_list = [schema_2, schema_3] + uischemas_list = [uischema_2, uischema_3] + merged_list, merged_list_ui = jsex.merge_schemas(schemas_list, uischemas_list) + assert merged_list["properties"]["RT2"] == merged_dict["properties"]["schema_2"] + assert merged_list_ui["RT2"] == merged_list_ui["RT3"] + assert merged_list_ui["RT2"] == merged_dict_ui["schema_2"] + + # Asserting failures + with raises(ValueError): + jsex.merge_schemas(schemas_dict, uischemas_list) + with raises(ValueError): + jsex.merge_schemas(schemas_list, uischemas_dict) + + # Arraying ################################################################ + array2, array2_ui = jsex.make_array(schema_2, uischema_2) + assert array2["items"] == schema_2 + assert array2_ui["items"] == uischema_2 + assert (str(array2_ui["items"]) + == "{'RT1': {'ui:widget': 'checkboxes', 'ui:inline': True}}") + + +@patch("linkahead.execute_query", new=Mock(side_effect=_mock_execute_query)) +def test_schema_customization_with_dicts(): + """Testing the ``additional_json_schema`` and ``additional_ui_schema`` parameters.""" + model_str = """ +RT1: +RT21: + obligatory_properties: + RT1: + datatype: LIST<RT1> + text: + datatype: TEXT + description: Some description +RT3: + obligatory_properties: + number: + datatype: INTEGER + """ + model = parse_model_from_string(model_str) + + custom_schema = { + "RT21": { + "minProperties": 2, + }, + "text": { + "format": "email", + "description": "Better description.", + }, + "number": { + "minimum": 0, + "exclusiveMaximum": 100, + }, + } + + custom_ui_schema = { + "text": { + "ui:help": "Hint: keep it short.", + "ui:widget": "password", + }, + "number": { + "ui:order": 2, + } + } + + schema_21, uischema_21 = rtjs(model.get_deep("RT21"), additional_properties=False, + do_not_create=["RT1"], rjsf=True) + assert len(uischema_21) == 0 + assert schema_21["properties"]["text"]["description"] == "Some description" + assert "format" not in schema_21["properties"]["text"] + + schema_21, uischema_21 = rtjs(model.get_deep("RT21"), additional_properties=False, + additional_json_schema=custom_schema, + additional_ui_schema=custom_ui_schema, do_not_create=["RT1"], + rjsf=True) + assert (str(uischema_21) + == "{'text': {'ui:help': 'Hint: keep it short.', 'ui:widget': 'password'}}") + assert schema_21["properties"]["text"]["description"] == "Better description." + assert schema_21["properties"]["text"].get("format") == "email" + assert schema_21.get("minProperties") == 2 + + schema_3, uischema_3 = rtjs(model.get_deep("RT3"), additional_properties=False, + additional_json_schema=custom_schema, + additional_ui_schema=custom_ui_schema, rjsf=True) + assert (json.dumps(schema_3["properties"]["number"]) == + '{"type": "integer", "minimum": 0, "exclusiveMaximum": 100}') + assert (str(uischema_3) == "{'number': {'ui:order': 2}}") diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 0b3f0d7c7fc81b2a9d64e24fb2262c686ea669da..599ea535d95d0b6c1216a935813d71c8e90c1d3b 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -44,7 +44,7 @@ from test_utils import BaseMockUpTest # For testing the table importer IMPORTER_KWARGS = dict( converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist - datatypes={'a': str, 'b': int, 'x': int}, # x does not exist + datatypes={'a': str, 'b': int, 'float': float, 'x': int}, # x does not exist obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')], existing_columns=['e'], ) @@ -181,12 +181,47 @@ class TableImporterTest(unittest.TestCase): self.assertEqual(df_new.shape[1], 4) self.assertEqual(df_new.iloc[0].b, 5) + # check that missing array-valued fields are detected correctly: + df = pd.DataFrame([[[None, None], 4, 2.0, 'yes'], + ['b', 5, 3.0, 'no']], + columns=['a', 'b', 'c', 'd']) + df_new = importer.check_missing(df) + self.assertEqual(df_new.shape[0], 1) + self.assertEqual(df_new.shape[1], 4) + self.assertEqual(df_new.iloc[0].b, 5) + def test_wrong_datatype(self): importer = TableImporter(**self.importer_kwargs) - df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], + df = pd.DataFrame([[1234, 0, 2.0, 3, 'yes'], + [5678, 1, 2.0, 3, 'yes']], + columns=['a', 'b', 'c', 'float', 'd']) + # wrong datatypes before + assert df["a"].dtype == int + assert df["float"].dtype == int + # strict = False by default, so this shouldn't raise an error + importer.check_datatype(df) + # The types should be correct now. + assert df["a"].dtype == pd.StringDtype + assert df["float"].dtype == float + + # Resetting `df` since check_datatype may change datatypes + df = pd.DataFrame([[None, 0, 2.0, 'yes'], [5, 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd']) - self.assertRaises(DataInconsistencyError, importer.check_datatype, df) + # strict=True, so number in str column raises an error + self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True) + + df = pd.DataFrame([[0], + [1]], + columns=['float']) + # strict=True, so int in float column raises an error + self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True) + + # This is always wrong (float in int column) + df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], + [5, 1.7, 2.0, 'yes']], + columns=['a', 'b', 'c', 'd']) + self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, False) def test_unique(self): importer = TableImporter(**self.importer_kwargs) @@ -266,6 +301,30 @@ class CSVImporterTest(TableImporterTest): importer = CSVImporter(**self.importer_kwargs) importer.read_file(tmp.name) + def test_with_generous_datatypes(self): + """Same as above but check that values are converted as expected.""" + tmp = NamedTemporaryFile(delete=False, suffix=".csv") + tmp.close() + self.valid_df.to_csv(tmp.name) + # Copy and use float for columns with integer values, string for columns + # with numeric values + kwargs = self.importer_kwargs.copy() + kwargs["datatypes"] = { + 'a': str, + 'b': float, + 'c': str + } + importer = CSVImporter(**kwargs) + importer.read_file(tmp.name) + + kwargs["datatypes"] = { + 'a': str, + 'b': str, + 'c': str + } + importer = CSVImporter(**kwargs) + importer.read_file(tmp.name) + class TSVImporterTest(TableImporterTest): def test_full(self): diff --git a/unittests/test_yaml_model_parser.py b/unittests/test_yaml_model_parser.py index 6cdea7922a8503be082e8947edecd7e8c849730b..1019a93a0aa4292cea75fe8fba57e19e55359baa 100644 --- a/unittests/test_yaml_model_parser.py +++ b/unittests/test_yaml_model_parser.py @@ -1,9 +1,27 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + import unittest from datetime import date from tempfile import NamedTemporaryFile -from pytest import deprecated_call, raises +from pytest import deprecated_call, raises, mark -import caosdb as db +import linkahead as db from caosadvancedtools.models.parser import (TwiceDefinedException, YamlDefinitionError, parse_model_from_string, @@ -284,10 +302,12 @@ A: def test_reference_property(self): """Test correct creation of reference property using an RT.""" - modeldef = """A: + modeldef = """ +A: recommended_properties: ref: datatype: LIST<A> + description: new description """ model = parse_model_from_string(modeldef) self.assertEqual(len(model), 2) @@ -297,6 +317,7 @@ A: elif key == "ref": self.assertTrue(isinstance(value, db.Property)) self.assertEqual(value.datatype, "LIST<A>") + assert value.description == "new description" class ExternTest(unittest.TestCase): @@ -340,6 +361,35 @@ A: assert "line {}".format(line) in yde.exception.args[0] +def test_existing_model(): + """Parsing more than one model may require to append to existing models.""" + model_str_1 = """ +A: + obligatory_properties: + number: + datatype: INTEGER + """ + model_str_2 = """ +B: + obligatory_properties: + A: + """ + model_1 = parse_model_from_string(model_str_1) + model_2 = parse_model_from_string(model_str_2, existing_model=model_1) + for ent in ["A", "B", "number"]: + assert ent in model_2 + + model_str_redefine = """ +number: + datatype: DOUBLE + description: Hello number! + """ + model_redefine = parse_model_from_string(model_str_redefine, existing_model=model_1) + print(model_redefine) + assert model_redefine["number"].description == "Hello number!" + assert model_redefine["number"].datatype == db.INTEGER # FIXME Shouldn't this be DOUBLE? + + def test_define_role(): model = """ A: @@ -511,3 +561,39 @@ R3: # Until removal, both do the same assert has_parent(r3, par) assert r3.get_parent(par)._flags["inheritance"] == db.OBLIGATORY + + +def test_yaml_error(): + """Testing error while parsing a yaml. + """ + + with raises(ValueError, match=r"line 2: .*"): + parse_model_from_yaml("unittests/models/model_invalid.yml") + + +def test_inherit_error(): + """Must fail with an understandable exception.""" + model_string = """ +prop1: + inherit_from_obligatory: prop2 + """ + with raises(YamlDefinitionError, + match=r"Parents must be a list but is given as string: prop1 > prop2"): + parse_model_from_string(model_string) + + +@mark.xfail(reason="""Issue is + https://gitlab.com/linkahead/linkahead-advanced-user-tools/-/issues/57""") +def test_inherit_properties(): + # TODO Is not even specified yet. + model_string = """ +prop1: + datatype: DOUBLE +prop2: +# role: Property + inherit_from_obligatory: + - prop1 + """ + model = parse_model_from_string(model_string) + prop2 = model["prop2"] + assert prop2.role == "Property" diff --git a/utils/branch_exists.py b/utils/branch_exists.py new file mode 100755 index 0000000000000000000000000000000000000000..9626e4aa81e4ee2bd9a239f6a0650dc4e383593f --- /dev/null +++ b/utils/branch_exists.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +""" +Exit with error code 2 if the branch does not exist. +""" +import sys +import argparse +import requests +from ref_to_commit import get_remote + + +def branch_exists(repository, branch): + remote = get_remote(repository) + resp = requests.get(remote+"/repository/branches/"+branch).json() + return "message" not in resp + + +def define_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("repository") + parser.add_argument("branchname") + + return parser + + +if __name__ == "__main__": + parser = define_parser() + args = parser.parse_args() + ret = branch_exists(repository=args.repository, branch=args.branchname) + if ret is False: + print("branch does not exist.") + sys.exit(2) + else: + print("branch exists.") diff --git a/utils/ref_to_commit.py b/utils/ref_to_commit.py new file mode 100755 index 0000000000000000000000000000000000000000..93f15f31b6158172cfca5a5095b13f6a4fcb22ab --- /dev/null +++ b/utils/ref_to_commit.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +replaces git branch names with the newest commit hash using gitlab api +""" +import argparse + +import requests + + +_REPOS = { + "SERVER": "https://gitlab.indiscale.com/api/v4/projects/100", + "WEBUI": "https://gitlab.indiscale.com/api/v4/projects/98", + "PYLIB": "https://gitlab.indiscale.com/api/v4/projects/97", + "MYSQLBACKEND": "https://gitlab.indiscale.com/api/v4/projects/101", + "PYINT": "https://gitlab.indiscale.com/api/v4/projects/99", + "CPPLIB": "https://gitlab.indiscale.com/api/v4/projects/107", + "CPPINT": "https://gitlab.indiscale.com/api/v4/projects/111", + "ADVANCEDUSERTOOLS": "https://gitlab.indiscale.com/api/v4/projects/104" +} + + +def get_remote(repository): + return _REPOS[repository] + + +def ref_to_commit(repository, reference): + remote = get_remote(repository) + r = requests.get(remote+"/repository/branches/"+reference).json() + + if "name" in r: + return r["commit"]["short_id"] + + return reference + + +def define_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("repository") + parser.add_argument("reference") + + return parser + + +if __name__ == "__main__": + parser = define_parser() + args = parser.parse_args() + ret = ref_to_commit(repository=args.repository, reference=args.reference) + print(ret)