diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index fe0e20eb08eb17ac7c9800c976a56c265812e4d1..1ba31324c545e85e57168bcdc9f41aa0a307c932 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -84,7 +84,7 @@ out: dict self._result = {} for sheetname in self._workbook.sheetnames: if sheetname not in self._handled_sheets: - self._handle_sheet(self._workbook.get_sheet_by_name(sheetname)) + self._handle_sheet(self._workbook[sheetname]) return self._result def _handle_sheet(self, sheet: Worksheet) -> None: @@ -108,7 +108,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na if parent: parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index) if parent_sheetname not in self._handled_sheets: - self._handle_sheet(self._workbook.get_sheet_by_name(parent_sheetname)) + self._handle_sheet(self._workbook[parent_sheetname]) # # We save single entries in lists, indexed by their foreign key contents. Each entry # # consists of: @@ -122,7 +122,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na continue foreign_repr = "" foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value] - data = {} # Local data dict + data: dict = {} # Local data dict # Collect data (in dict relative to current level) and foreign data information for col_idx, value in enumerate(row): if col_idx in foreign_column_paths: @@ -151,12 +151,11 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na # print(f"Added sheet: {sheet.title}") def _get_parent_dict(self, parent_path: list[str], foreign: list[list]) -> dict: - """For a ``foreign`` specification, get the correct list from the current result-in-making. - - """ - # if not foreign: - # return self._result + """Return the dict into which values can be inserted. +This method returns, from the current result-in-making, the entry at ``parent_path`` which matches +the values given in the ``foreign`` specification. +""" foreign_groups = _group_foreign_paths(foreign, common=parent_path) current_object = self._result diff --git a/unittests/table_json_conversion/create_jsonschema.py b/unittests/table_json_conversion/create_jsonschema.py index 9585f5458edf8f9d3f785099295a3e675230932c..8ab4ad2d973b78522e858b3ee866b870ecf187a4 100755 --- a/unittests/table_json_conversion/create_jsonschema.py +++ b/unittests/table_json_conversion/create_jsonschema.py @@ -20,17 +20,18 @@ """ +from __future__ import annotations + import argparse import json -from typing import List import caosadvancedtools.json_schema_exporter as jsex from caosadvancedtools.models import parser # import tomli -def prepare_datamodel(modelfile, recordtypes: List[str], outfile: str, - do_not_create: List[str] = None): +def prepare_datamodel(modelfile, recordtypes: list[str], outfile: str, + do_not_create: list[str] = None): if do_not_create is None: do_not_create = [] model = parser.parse_model_from_yaml(modelfile) diff --git a/unittests/table_json_conversion/test_read_data.py b/unittests/table_json_conversion/test_read_data.py index 3fbdf3ddea5d81ba449b21b53d98700016beea20..b5dcafc7b433446e311dae6f409b3d7772e1a08e 100644 --- a/unittests/table_json_conversion/test_read_data.py +++ b/unittests/table_json_conversion/test_read_data.py @@ -23,16 +23,13 @@ import json import os import re -import tempfile from types import SimpleNamespace -import jsonschema.exceptions as schema_exc import pytest -import caosadvancedtools.table_json_conversion.convert as convert -from openpyxl import load_workbook +from caosadvancedtools.table_json_conversion import convert -from .utils import compare_workbooks +from .utils import assert_equal_jsons def rfp(*pathcomponents): @@ -41,85 +38,42 @@ def rfp(*pathcomponents): return os.path.join(os.path.dirname(__file__), *pathcomponents) -def fill_and_compare(json_file: str, template_file: str, known_good: str, - schema: str = None, custom_output: str = None): - """Fill the data into a template and compare to a known good. +def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str) -> dict: + """Convert an XLSX file and compare to a known result. -Parameters: ------------ -schema: str, optional, - Json schema to validate against. -custom_output: str, optional - If given, write to this file and drop into an IPython shell. For development only. +Returns +------- +json: dict + The result of the conversion. """ - with tempfile.TemporaryDirectory() as tmpdir: - outfile = os.path.join(tmpdir, 'test.xlsx') - assert not os.path.exists(outfile) - if custom_output is not None: - outfile = custom_output - # fill_template(data=json_file, template=template_file, result=outfile, - # validation_schema=schema) - assert os.path.exists(outfile) - generated = load_workbook(outfile) # workbook can be read - known_good_wb = load_workbook(known_good) - compare_workbooks(generated, known_good_wb) - - -def _assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True, - path: list = None) -> None: - """Compare two json objects for near equality. - -Raise an assertion exception if they are not equal.""" - if path is None: - path = [] - assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}" - if isinstance(json1, dict): - keys = set(json1.keys()).union(json2.keys()) - for key in keys: - this_path = path + [key] - # Case 1: both exist - if key in json1 and key in json2: - el1 = json1[key] - el2 = json2[key] - assert type(el1) is type(el2), f"Type mismatch, path: {this_path}" - if isinstance(el1, (dict, list)): - # Iterables: Recursion - _assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty, - path=this_path) - continue - assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}" - continue - # Case 2: only one exists - existing = json1.get(key, json2.get(key)) - assert (allow_none and existing is None) or (allow_empty and existing == []), ( - f"Element at path {this_path} is None or empty in one json and does not exist in " - "the other.") - return - assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}" - assert len(json1) == len(json2), f"Lists must have equal length, path: {path}" - for idx, (el1, el2) in enumerate(zip(json1, json2)): - this_path = path + [idx] - if isinstance(el1, dict): - _assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty, - path=this_path) - else: - assert el1 == el2 + result = convert.to_dict(xlsx=xlsx_file, schema=schema_file) + with open(known_good_file, encoding="utf-8") as myfile: + expected = json.load(myfile) + assert_equal_jsons(result, expected) + return result def test_conversions(): - result = convert.to_dict(xlsx=rfp("data/simple_data.xlsx"), schema=rfp("data/simple_schema.json")) - expected = json.load(open(rfp("data/simple_data.json"))) - # result = convert.to_dict(xlsx=rfp("data/multiple_refs_data.xlsx"), - # schema=rfp("data/multiple_refs_schema.json")) - # expected = json.load(open(rfp("data/multiple_refs_data.json"))) - # breakpoint() - _assert_equal_jsons(result, expected) - # breakpoint() - # conv = XLSXConverter(schema=rfp("data/simple_schema.json")) - # result = conv.to_dict(rfp("data/simple_template.xlsx")) + """Test conversion from XLSX to JSON.""" + convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"), + schema_file=rfp("data/simple_schema.json"), + known_good_file=rfp("data/simple_data.json")) + convert_and_compare(xlsx_file=rfp("data/multiple_refs_data.xlsx"), + schema_file=rfp("data/multiple_refs_schema.json"), + known_good_file=rfp("data/multiple_refs_data.json")) + convert_and_compare(xlsx_file=rfp("data/indirect_data.xlsx"), + schema_file=rfp("data/indirect_schema.json"), + known_good_file=rfp("data/indirect_data.json")) + convert_and_compare(xlsx_file=rfp("data/simple_data_ascii_chars.xlsx"), + schema_file=rfp("data/simple_schema.json"), + known_good_file=rfp("data/simple_data_ascii_chars.json")) + convert_and_compare(xlsx_file=rfp("data/multiple_choice_data.xlsx"), + schema_file=rfp("data/multiple_choice_schema.json"), + known_good_file=rfp("data/multiple_choice_data.json")) def test_set_in_nested(): + """Test the ``_set_in_nested`` function.""" set_in_nested = convert._set_in_nested # pylint: disable=protected-access test_data_in = [ @@ -158,6 +112,7 @@ def test_set_in_nested(): def test_group_foreign_paths(): + """Test the ``_group_foreign_paths`` function.""" group = convert._group_foreign_paths # pylint: disable=protected-access foreign = [ diff --git a/unittests/table_json_conversion/test_table_template_generator.py b/unittests/table_json_conversion/test_table_template_generator.py index 070a7908dc3884a5a3f721140ff245617753d5e5..07409213ee373d01edcb13395ad6d7856b8185fa 100644 --- a/unittests/table_json_conversion/test_table_template_generator.py +++ b/unittests/table_json_conversion/test_table_template_generator.py @@ -22,7 +22,6 @@ import json import os import tempfile -from typing import Tuple import pytest from caosadvancedtools.table_json_conversion.table_generator import XLSXTemplateGenerator @@ -41,7 +40,7 @@ def rfp(*pathcomponents): def _compare_generated_to_known_good(schema_file: str, known_good: str, foreign_keys: dict = None, - outfile: str = None) -> Tuple: + outfile: str = None) -> tuple: """Generate an XLSX from the schema, then compare to known good output. Returns diff --git a/unittests/table_json_conversion/test_test_utils.py b/unittests/table_json_conversion/test_test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..30171f61de26b1ae11fb25c730c96b31aa8f06a3 --- /dev/null +++ b/unittests/table_json_conversion/test_test_utils.py @@ -0,0 +1,42 @@ +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +"""Testing the ``utils`` module in this folder.""" + + +from .utils import _is_recursively_none + + +def test_recursively_none(): + """Testing ``_is_recursively_none``.""" + assert _is_recursively_none(None) + assert _is_recursively_none([]) + assert _is_recursively_none({}) + assert _is_recursively_none([None]) + assert _is_recursively_none({"a": None}) + assert _is_recursively_none([[], [None, None]]) + assert _is_recursively_none({1: [], 2: [None], 3: {"3.1": None}, 4: {"4.1": [None]}}) + + assert not _is_recursively_none(1) + assert not _is_recursively_none([1]) + assert not _is_recursively_none({1: 2}) + assert not _is_recursively_none([[1]]) + assert not _is_recursively_none({"a": None, "b": "b"}) + assert not _is_recursively_none([[], [None, 2]]) + assert not _is_recursively_none({1: [], 2: [None], 3: {"3.1": 3.141}, 4: {"4.1": [None]}}) diff --git a/unittests/table_json_conversion/utils.py b/unittests/table_json_conversion/utils.py index 6c32117c1296e686290ad75bf5f704a1abfb2547..47fe83704a9bdc549a2256e708f4512df4cee2a7 100644 --- a/unittests/table_json_conversion/utils.py +++ b/unittests/table_json_conversion/utils.py @@ -1,3 +1,5 @@ +# encoding: utf-8 +# # This file is a part of the LinkAhead Project. # # Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> @@ -19,9 +21,53 @@ """Utilities for the tests. """ +from typing import Iterable, Union + from openpyxl import Workbook +def assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True, + path: list = None) -> None: + """Compare two json objects for near equality. + +Raise an assertion exception if they are not equal.""" + if path is None: + path = [] + assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}" + if isinstance(json1, dict): + keys = set(json1.keys()).union(json2.keys()) + for key in keys: + this_path = path + [key] + # Case 1: exists in both collections + if key in json1 and key in json2: + el1 = json1[key] + el2 = json2[key] + assert type(el1) is type(el2), f"Type mismatch, path: {this_path}" + if isinstance(el1, (dict, list)): + # Iterables: Recursion + assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty, + path=this_path) + continue + assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}" + continue + # Case 2: exists only in one collection + existing = json1.get(key, json2.get(key)) + assert ((allow_none and _is_recursively_none(existing)) + or (allow_empty and existing == [])), ( + f"Element at path {this_path} is None or empty in one json and does not exist in " + "the other.") + return + assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}" + assert len(json1) == len(json2), f"Lists must have equal length, path: {path}" + for idx, (el1, el2) in enumerate(zip(json1, json2)): + this_path = path + [idx] + if isinstance(el1, dict): + assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty, + path=this_path) + else: + assert el1 == el2 + + def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True): """Compare two workbooks for equal content. @@ -52,3 +98,19 @@ hidden: bool, optional f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n" f"{cell1.value}\n{cell2.value}" ) + + +def _is_recursively_none(obj: Union[list, dict] = None): + """Test if ``obj`` is None or recursively consists only of None-like objects.""" + if obj is None: + return True + if isinstance(obj, (list, dict)): + if isinstance(obj, list): + mylist: Iterable = obj + else: + mylist = obj.values() + for element in mylist: + if not _is_recursively_none(element): + return False + return True + return False