diff --git a/CHANGELOG.md b/CHANGELOG.md index 404424ded153081a1b16d7e5b0923d9284695949..dc465d7821d8a496dc15f364f865b266e066fcc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +- Added table_json_conversion.export_import_xlsx with a public function + export_container_to_xlsx, which exports the data of a given Entity + Container to a XLSX file. + ### Changed ### ### Deprecated ### diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py index 56568ca18eb10f501fa13bc766613367050c034d..bce3102eef423a84206eb4596352d49dd4d8ff9f 100644 --- a/src/caosadvancedtools/json_schema_exporter.py +++ b/src/caosadvancedtools/json_schema_exporter.py @@ -70,6 +70,7 @@ class JsonSchemaExporter: def __init__(self, additional_properties: bool = True, name_property_for_new_records: bool = False, + use_id_for_identification: bool = False, description_property_for_new_records: bool = False, additional_options_for_text_props: dict = None, additional_json_schema: Dict[str, dict] = None, @@ -92,6 +93,9 @@ class JsonSchemaExporter: name_property_for_new_records : bool, optional Whether objects shall generally have a `name` property in the generated schema. Optional, default is False. + use_id_for_identification: bool, optional + If set to true, an 'id' property is added to all records, and + foreign key references are assumed to be ids. description_property_for_new_records : bool, optional Whether objects shall generally have a `description` property in the generated schema. Optional, default is False. @@ -151,6 +155,7 @@ class JsonSchemaExporter: self._additional_properties = additional_properties self._name_property_for_new_records = name_property_for_new_records + self._use_id_for_identification = use_id_for_identification self._description_property_for_new_records = description_property_for_new_records self._additional_options_for_text_props = additional_options_for_text_props self._additional_json_schema = additional_json_schema @@ -257,7 +262,18 @@ ui_schema : dict if inner_ui_schema: ui_schema["items"] = inner_ui_schema elif prop.is_reference(): - if prop.datatype == db.REFERENCE: + if self._use_id_for_identification: + json_prop["type"] = "object" + json_prop["required"] = [] + json_prop["additionalProperties"] = False + json_prop["title"] = prop.name + if prop.datatype == db.FILE: + json_prop["description"] = "Path to file" + json_prop["properties"] = {"path": {"type": "string"}} + else: + json_prop["properties"] = { + "id": {"oneOf": [{"type": "integer"}, {"type": "string"}]}} + elif prop.datatype == db.REFERENCE: # No Record creation since no RT is specified and we don't know what # schema to use, so only enum of all Records and all Files. values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE") @@ -410,7 +426,9 @@ ui_schema : dict vals = [] for val in possible_values: - if val.name: + if self._use_id_for_identification: + vals.append(val.id) + elif val.name: vals.append(f"{val.name}") else: vals.append(f"{val.id}") @@ -453,6 +471,8 @@ ui_schema : dict props = OrderedDict() if self._name_property_for_new_records: props["name"] = self._make_text_property("The name of the Record to be created") + if self._use_id_for_identification: + props["id"] = self._make_text_property("The id of the Record") if self._description_property_for_new_records: props["description"] = self._make_text_property( "The description of the Record to be created") @@ -544,6 +564,7 @@ guaranteed (as of now). def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True, name_property_for_new_records: bool = False, + use_id_for_identification: bool = False, description_property_for_new_records: bool = False, additional_options_for_text_props: Optional[dict] = None, additional_json_schema: Dict[str, dict] = None, @@ -573,6 +594,9 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T name_property_for_new_records : bool, optional Whether objects shall generally have a `name` property in the generated schema. Optional, default is False. + use_id_for_identification: bool, optional + If set to true, an 'id' property is added to all records, and foreign + key references are assumed to be ids. description_property_for_new_records : bool, optional Whether objects shall generally have a `description` property in the generated schema. Optional, default is False. @@ -629,6 +653,7 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T exporter = JsonSchemaExporter( additional_properties=additional_properties, name_property_for_new_records=name_property_for_new_records, + use_id_for_identification=use_id_for_identification, description_property_for_new_records=description_property_for_new_records, additional_options_for_text_props=additional_options_for_text_props, additional_json_schema=additional_json_schema, @@ -696,8 +721,8 @@ ui_schema : dict, optional def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]], - rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None) -> ( - Union[dict, Tuple[dict, dict]]): + rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None, + return_data_schema=False) -> (Union[dict, Tuple[dict, dict]]): """Merge the given schemata into a single schema. The result will look like this: @@ -728,6 +753,11 @@ rjsf_uischemas : dict[str, dict] | Iterable[dict], optional If given, also merge the react-jsonschema-forms from this argument and return as the second return value. If ``schemas`` is a dict, this parameter must also be a dict, if ``schemas`` is only an iterable, this paramater must support numerical indexing. +return_data_schema : bool, default False + If set to True, a second schema with all top-level entries wrapped in an + array will be returned. This is necessary if the schema describes the + data layout of an XLSX file. + Cannot be used together with rjsf_uischemas. Returns ------- @@ -737,10 +767,13 @@ schema : dict uischema : dict If ``rjsf_uischemas`` was given, this contains the merged UI schemata. +data_schema : dict + If ``return_data_schema`` was given, this contains the XLSX file schema. """ sub_schemas: dict[str, dict] = OrderedDict() required = [] ui_schema = None + data_sub_schemas = OrderedDict() if isinstance(schemas, dict): sub_schemas = schemas @@ -754,6 +787,8 @@ uischema : dict for i, schema in enumerate(schemas, start=1): title = schema.get("title", str(i)) sub_schemas[title] = schema + if return_data_schema: + data_sub_schemas[title] = {"type": "array", "items": schema} required.append(title) if rjsf_uischemas is not None: if not isinstance(rjsf_uischemas, Sequence): @@ -771,7 +806,17 @@ uischema : dict "additionalProperties": False, "$schema": "https://json-schema.org/draft/2020-12/schema", } + if return_data_schema: + data_schema = { + "type": "object", + "properties": data_sub_schemas, + "required": required, + "additionalProperties": False, + "$schema": "https://json-schema.org/draft/2020-12/schema", + } if ui_schema is not None: return result, ui_schema + if return_data_schema: + return result, data_schema return result diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index 7a3d63a2444d09f0c9f695edfa8fd6865593f62e..33432b85298f98125dc7861fad82d220a6d4b5aa 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -31,12 +31,12 @@ from operator import getitem from types import SimpleNamespace from typing import Any, BinaryIO, Callable, TextIO, Union, Optional from warnings import warn -from copy import deepcopy import jsonschema from openpyxl import load_workbook from openpyxl.worksheet.worksheet import Worksheet +from .validation_utils import _validate_jsonschema from caosadvancedtools.table_json_conversion import xlsx_utils from caosadvancedtools.table_json_conversion.fill_xlsx import read_or_dict @@ -153,51 +153,6 @@ class ForeignError(KeyError): self.definitions = definitions -def _validate_jsonschema(instance, schema): - # Checks whether a key: value pair is in the given schema or fulfills the - # criteria of a direct subschema (anyOf, allOf, oneOf) - def in_schema(key, val, schema): - if schema.get(key, None) == val: - return True - if 'anyOf' in schema: - return any([in_schema(key, val, sub) for sub in schema['anyOf']]) - if 'allOf' in schema: - return all([in_schema(key, val, sub) for sub in schema['allOf']]) - if 'oneOf' in schema: - return [in_schema(key, val, sub) for sub in schema['oneOf']].count(True) == 1 - return False - - # Removes Key: None and datetime instances from nested dicts and lists of - # any depth. Key: None is currently valid as there is no 'obligatory with - # value', and datetime cannot be checked by jsonschema. - def remove_incompatible_values(it, schema): - if isinstance(it, list): - schema = schema.get('items', schema) - for elem in it: - remove_incompatible_values(elem, schema) - elif isinstance(it, dict): - schema = schema.get('properties', schema) - for key, elem in list(it.items()): - if elem is None: - it.pop(key) - elif isinstance(elem, datetime.date) or isinstance(elem, datetime.datetime): - if in_schema('format', 'date', schema[key]) or in_schema('format', 'date-time', schema[key]): - it.pop(key) - elif isinstance(it, (dict, list)): - remove_incompatible_values(elem, schema[key]) - return it - - # If instance is not a dict, remove_incompatible_values would not remove - # the value if it is valid, so we need to check manually by wrapping - instance = deepcopy(instance) - if not isinstance(instance, dict): - if remove_incompatible_values({'key': instance}, {'key': schema}) == {}: - return - # Clean dict and validate - instance = remove_incompatible_values(deepcopy(instance), schema) - jsonschema.validate(instance, schema=schema) - - class XLSXConverter: """Class for conversion from XLSX to JSON. diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py new file mode 100644 index 0000000000000000000000000000000000000000..ea18a374d761945b29668538098a8cf1e6a9c2f2 --- /dev/null +++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py @@ -0,0 +1,242 @@ +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" +Utilities for automatically exporting and importing data to and from xlsx. +""" + +import json +import tempfile +import warnings +import logging +from typing import Union +from pathlib import Path + +import linkahead +from linkahead.common.models import Container +from linkahead import execute_query + +from ..json_schema_exporter import JsonSchemaExporter, merge_schemas +from .table_generator import XLSXTemplateGenerator +from .fill_xlsx import fill_template + +# The high_level_api import would normally warn about the API being +# experimental. We know this, so suppress the warning. +logging.disable(logging.WARNING) +from linkahead.high_level_api import convert_to_python_object # noqa: E402, pylint: disable=wrong-import-position +logging.disable(logging.NOTSET) + + +def _generate_jsonschema_from_recordtypes(recordtypes: list, + out_path: Union[str, Path] = None) -> dict: + """ + Generate a combined jsonschema for all given recordtypes. + + Parameters + ---------- + recordtypes : Iterable + List of RecordType entities for which a schema should be generated. + out_path : str, Path + If given, the resulting jsonschema will also be written to the file + given by out_path. + Optional, default None + + Returns + ------- + data_schema : dict + The generated schema. + """ + # Generate schema + schema_generator = JsonSchemaExporter(additional_properties=False, + name_property_for_new_records=True, + use_id_for_identification=True) + schemas = [schema_generator.recordtype_to_json_schema(recordtype) + for recordtype in recordtypes] + _, data_schema = merge_schemas(schemas, return_data_schema=True) + # If indicated, save as json file + if out_path is not None: + with open(out_path, mode="w", encoding="utf8") as json_file: + json.dump(data_schema, json_file, ensure_ascii=False, indent=2) + # Return + return data_schema + + +def _generate_jsondata_from_records(records: Container, + out_path: Union[str, Path] = None) -> dict: + """ + Extract relevant information (id, name, properties, etc.) from the given + records and converts this information to json. + + Parameters + ---------- + records : Iterable + List of Record entities from which the data will be converted to json. + out_path : str, Path + If given, the resulting jsondata will also be written to the file given + by out_path. + Optional, default None + + Returns + ------- + json_data : dict + The given records data in json form. + """ + json_data = {} + for record in records: + # Convert records to high level api objects + record_obj = convert_to_python_object(record) + try: + record_obj.resolve_references(False, None) + except linkahead.LinkAheadException: + warnings.warn(f"Data for record with id {record_obj.id} might be " + f"incomplete, unsuccessful retrieve.") + # Get json representation & adjust layout for compatibility + raw_data = record_obj.serialize() + raw_data.update(raw_data.get('properties', {})) + raw_data.pop('properties') + if record.parents[0].name not in json_data: + json_data[record.parents[0].name] = [] + json_data[record.parents[0].name].append(raw_data) + # If indicated, save as json file + if out_path is not None: + with open(out_path, mode="w", encoding="utf8") as json_file: + json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str) + # Return + return json_data + + +def _generate_xlsx_template_file(schema: dict, + recordtype_names: Union[list, set], + out_path: Union[str, Path]): + """ + Generate an empty XLSX template file for the given schema at the indicated + location. + + Parameters + ---------- + schema : dict + Jsonschema for which an xlsx template should be generated. + recordtype_names : Iterable + List of all RecordType names in the given schema. + out_path : str, Path + The resulting xlsx template will be written to the file at this path. + """ + generator = XLSXTemplateGenerator() + foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names} + generator.generate(schema=schema, foreign_keys=foreign_keys, + filepath=out_path) + + +def export_container_to_xlsx(records: Container, + xlsx_data_filepath: Union[str, Path], + include_referenced_entities: bool = False, + jsonschema_filepath: Union[str, Path] = None, + jsondata_filepath: Union[str, Path] = None, + xlsx_template_filepath: Union[str, Path] = None): + """ + Export the data of the given records to an xlsx file. + + Parameters + ---------- + records : Container, Iterable + List of records to export. + xlsx_data_filepath : str, Path + Write the resulting xlsx file to the file at this location. + include_referenced_entities : bool + If set to true, any records referenced by properties of those given in + 'records' will also be exported. + Optional, default False + jsonschema_filepath : str, Path + If given, write the jsonschema to this file. + Optional, default None + jsondata_filepath : str, Path + If given, write the json data to this file. + Optional, default None + xlsx_template_filepath : str, Path + If given, write the xlsx template to this file. + Optional, default None + """ + # Ensure every record is only handled once by using id as key. + entity_ids = {record.id for record in records} + # If indicated, also get and add the records referenced on the first level + # in the given container + if include_referenced_entities: + for record in records: + for prop in record.properties: + if prop.is_reference() and prop.value is not None: + try: + ref_list = prop.value + if not isinstance(ref_list, list): + ref_list = [ref_list] + for element in ref_list: + if isinstance(element, (int, str)): + elem_id = element + elif isinstance(element, linkahead.Entity): + elem_id = element.id + else: + warnings.warn(f"Cannot handle referenced " + f"entity '{prop.value}'") + continue + entity_ids.add(elem_id) + except linkahead.LinkAheadException as e: + warnings.warn(f"Cannot handle referenced entity " + f"'{prop.value}' because of error '{e}'") + # Retrieve data + new_records = [] + for entity_id in entity_ids: + entity_id = str(entity_id).split('@')[0] # Queries cannot handle version + entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True) + # We can currently only handle Entities with a parent, as otherwise we + # do not know which sheet they belong in. + if len(entity.get_parents()) > 0: + new_records.append(entity) + # ToDo: Handle Files and other Entities (e.g. Properties) separately + records = new_records + recordtypes = {record.parents[0] for record in records} + recordtype_ids = {recordtype.id for recordtype in recordtypes} + recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})", + unique=True) + for rt_id in recordtype_ids] + recordtype_names = {recordtype.name for recordtype in recordtypes} + # Generate schema and data from the records + json_schema = _generate_jsonschema_from_recordtypes(recordtypes, + jsonschema_filepath) + json_data = _generate_jsondata_from_records(records, jsondata_filepath) + # Generate xlsx template + # _generate_xlsx_template_file needs a file name, so use NamedTemporaryFile + # ToDo: This might not work on windows, if not, fix _generate file handling + if xlsx_template_filepath is None: + xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx') + xlsx_template_filepath = xlsx_template_file.name + else: + xlsx_template_file = None + _generate_xlsx_template_file(json_schema, recordtype_names, + xlsx_template_filepath) + # Fill xlsx file with data + with warnings.catch_warnings(): + # We have a lot of information in the json data that we do not need + warnings.filterwarnings("ignore", + message="^.*Ignoring path with missing sheet index.*$") + warnings.filterwarnings("ignore", + message="^.*No validation schema.*$") + fill_template(data=json_data, template=xlsx_template_filepath, + result=xlsx_data_filepath) + # Cleanup + if xlsx_template_file is not None: + xlsx_template_file.close() diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py index f2e0abc3fc684172065d683c99c1c4309c80d6c0..1f39f66d38c27c41389ce14a184671a4be23271b 100644 --- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py +++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py @@ -24,15 +24,16 @@ from __future__ import annotations import datetime import pathlib +import warnings from types import SimpleNamespace from typing import Any, Optional, TextIO, Union from warnings import warn -from jsonschema import FormatChecker, validate from jsonschema.exceptions import ValidationError from openpyxl import load_workbook, Workbook from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE +from .validation_utils import _validate_jsonschema from .xlsx_utils import ( array_schema_from_model_schema, get_foreign_key_columns, @@ -354,15 +355,17 @@ validation_schema: dict, optional # Validation if validation_schema is not None: - validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema)) + # convert to array_schema if given schema is a model_schema + if 'properties' in validation_schema and validation_schema['properties'].values(): + if list(validation_schema['properties'].values())[0]["type"] != "array": + validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema)) try: - # FIXME redefine checker for datetime - validate(data, validation_schema, format_checker=FormatChecker()) + _validate_jsonschema(data, validation_schema) except ValidationError as verr: print(verr.message) raise verr else: - print("No validation schema given, continue at your own risk.") + warnings.warn("No validation schema given, continue at your own risk.") # Filling the data result_wb = load_workbook(template) diff --git a/src/caosadvancedtools/table_json_conversion/validation_utils.py b/src/caosadvancedtools/table_json_conversion/validation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f1e77f48f7120c5dcd812cb42ba4757a06deb762 --- /dev/null +++ b/src/caosadvancedtools/table_json_conversion/validation_utils.py @@ -0,0 +1,101 @@ +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" +Utilities for validation of conversion / import / export results. +For internal use. +""" + +import datetime +import json +from copy import deepcopy +from typing import Union + +import jsonschema + + +def _validate_jsonschema(instance: Union[dict, int, str, bool], + schema: Union[str, dict]): + """ + A table_json_conversion compatible variant of jsonschema.validate(). + Accepts instances with datetime instances and None in not-nullable entries. + + Parameters + ---------- + instance : dict, int, str, bool + Either a dict or a json entry to check against the given schema. + schema : str, dict + Either a dict with the jsonschema to check against, or a path to a file + containing the same. + """ + # Helper Functions + def _in_schema(key, val, schema): + """ + Checks whether a key: value pair is in the given schema or fulfills the + criteria of a direct subschema (anyOf, allOf, oneOf). + """ + if schema.get(key, None) == val: + return True + if 'anyOf' in schema: + return any([_in_schema(key, val, sub) for sub in schema['anyOf']]) + if 'allOf' in schema: + return all([_in_schema(key, val, sub) for sub in schema['allOf']]) + if 'oneOf' in schema: + return [_in_schema(key, val, sub) for sub in schema['oneOf']].count(True) == 1 + return False + + def _remove_incompatible_vals(iterable, schema): + """ + Removes Key: None and datetime instances from nested dicts and lists of + any depth. Key: None is currently valid as there is no 'obligatory with + value', and datetime cannot be checked by jsonschema. + """ + if isinstance(iterable, list): + schema = schema.get('items', schema) + for elem in iterable: + _remove_incompatible_vals(elem, schema) + elif isinstance(iterable, dict): + schema = schema.get('properties', schema) + for key, elem in list(iterable.items()): + if elem is None: + iterable.pop(key) + elif isinstance(elem, (datetime.date, datetime.datetime)): + if (_in_schema('format', 'date', schema[key]) or + _in_schema('format', 'date-time', schema[key])): + iterable.pop(key) + elif isinstance(iterable, (dict, list)): + try: + _remove_incompatible_vals(elem, schema[key]) + except KeyError: + pass + return iterable + + # If jsonschema is a file, load its content + if str(schema).endswith(".json"): + with open(schema, encoding="utf-8") as content: + schema = json.load(content) + # If instance is not a dict, remove_incompatible_values would not remove + # the value if it is valid, so we need to check manually by wrapping + instance = deepcopy(instance) + if not isinstance(instance, dict): + if _remove_incompatible_vals({'key': instance}, {'key': schema}) == {}: + return + # Clean dict and validate + instance = _remove_incompatible_vals(deepcopy(instance), schema) + jsonschema.validate(instance, schema=schema) diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index 899bb81ef1f91f3326f214f49f135a55b97d299f..084f19baccedb6778ceb4ff67bf21dbe0b8e66ac 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -59,13 +59,20 @@ schema: str, optional, custom_output: str, optional If given, write to this file and drop into an IPython shell. For development only. """ + if schema is not None: + with open(schema, encoding="utf8", mode="r") as sch_f: + model_schema = json.load(sch_f) + data_schema = xlsx_utils.array_schema_from_model_schema(model_schema) + else: + data_schema = schema + with tempfile.TemporaryDirectory() as tmpdir: outfile = os.path.join(tmpdir, 'test.xlsx') assert not os.path.exists(outfile) if custom_output is not None: outfile = custom_output fill_template(data=json_file, template=template_file, result=outfile, - validation_schema=schema) + validation_schema=data_schema) assert os.path.exists(outfile) generated = load_workbook(outfile) # workbook can be read known_good_wb = load_workbook(known_good) @@ -189,6 +196,32 @@ def test_errors(): known_good=rfp("data/simple_data.xlsx"), schema=rfp("data/simple_schema.json")) assert exc.value.message == "0.5 is not of type 'integer'" + # Check wrong data + with open(rfp("data/simple_data.json")) as json_file: + json_data = json.load(json_file) + json_data["Training"][0]["date"] = "2023-01" + with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file: + json.dump(json_data, temp_file) + temp_file.seek(0) + with pytest.raises(AssertionError) as exc: + fill_and_compare(json_file=temp_file.name, + template_file=rfp("data/simple_template.xlsx"), + known_good=rfp("data/simple_data.xlsx"), + schema=rfp("data/simple_schema.json")) + assert "Training" in str(exc) and "2023-01" in str(exc) + # Check wrong schema + with open(rfp("data/simple_schema.json")) as json_file: + json_schema = json.load(json_file) + json_schema["properties"]["Person"]["properties"]["given_name"]["type"] = "integer" + with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file: + json.dump(json_schema, temp_file) + temp_file.seek(0) + with pytest.raises(schema_exc.ValidationError) as exc: + fill_and_compare(json_file=rfp("data/simple_data.json"), + template_file=rfp("data/simple_template.xlsx"), + known_good=rfp("data/simple_data.xlsx"), + schema=temp_file.name) + assert "integer" in str(exc) def test_data_schema_generation(): diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py index d453ab3593ec36aa1197727f5ed51d1fb6fea10f..10b462df83a68a6a51088170f8d7c0216bd495e5 100644 --- a/unittests/table_json_conversion/test_read_xlsx.py +++ b/unittests/table_json_conversion/test_read_xlsx.py @@ -24,6 +24,7 @@ import datetime import json import os import re +import tempfile from types import SimpleNamespace from typing import Optional @@ -43,7 +44,7 @@ def rfp(*pathcomponents): def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str, known_good_data: Optional[dict] = None, strict: bool = False, - validate: bool = False) -> dict: + validate: bool = True) -> dict: """Convert an XLSX file and compare to a known result. Exactly one of ``known_good_file`` and ``known_good_data`` should be non-empty. @@ -57,7 +58,7 @@ json: dict model_schema = json.load(sch_f) data_schema = xlsx_utils.array_schema_from_model_schema(model_schema) - result = convert.to_dict(xlsx=xlsx_file, schema=data_schema, validate=True) + result = convert.to_dict(xlsx=xlsx_file, schema=data_schema, validate=validate) if known_good_file: with open(known_good_file, encoding="utf-8") as myfile: expected = json.load(myfile) @@ -101,6 +102,33 @@ def test_conversions(): assert str(err.value).startswith("Values at path ['Training', 0, ") +def test_validation(): + # Check wrong data + with open(rfp("data/simple_data.json")) as json_file: + known_good = json.load(json_file) + known_good["Training"][0]["date"] = "2023-01-02" + with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file: + json.dump(known_good, temp_file) + temp_file.seek(0) + with pytest.raises(AssertionError) as exc: + convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"), + schema_file=rfp("data/simple_schema.json"), + known_good_file=temp_file.name) + assert "Training" in str(exc) and "2023-01-02" in str(exc) + # Check wrong schema + with open(rfp("data/simple_schema.json")) as json_file: + json_schema = json.load(json_file) + json_schema["properties"]["Person"]["properties"]["given_name"]["type"] = "integer" + with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file: + json.dump(json_schema, temp_file) + temp_file.seek(0) + with pytest.raises(jsonschema.ValidationError) as exc: + convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"), + schema_file=temp_file.name, + known_good_file=rfp("data/simple_data.json")) + assert "integer" in str(exc) + + def test_missing_columns(): with pytest.raises(ValueError) as caught: convert.to_dict(xlsx=rfp("data/simple_data_missing.xlsx"),