MAINT, TEST: Misc. styling and linting, docs, and tests.

Also more WIP for XLSX conversion.

MAINT, TEST: Misc. styling and linting, docs, and tests.
955a7e14 · Daniel Hornung · 2ca96738 · 955a7e14 · 955a7e14 · 955a7e14
Verified Commit 955a7e14 authored 1 year ago by Daniel Hornung
--- a/src/caosadvancedtools/table_json_conversion/convert.py
+++ b/src/caosadvancedtools/table_json_conversion/convert.py
@@ -84,7 +84,7 @@ out: dict
        self._result = {}
        for sheetname in self._workbook.sheetnames:
            if sheetname not in self._handled_sheets:
-                self._handle_sheet(self._workbook.get_sheet_by_name(sheetname))
+                self._handle_sheet(self._workbook[sheetname])
        return self._result
    def _handle_sheet(self, sheet: Worksheet) -> None:
@@ -108,7 +108,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
        if parent:
            parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index)
            if parent_sheetname not in self._handled_sheets:
-                self._handle_sheet(self._workbook.get_sheet_by_name(parent_sheetname))
+                self._handle_sheet(self._workbook[parent_sheetname])
        # # We save single entries in lists, indexed by their foreign key contents.  Each entry
        # # consists of:
@@ -122,7 +122,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
                continue
            foreign_repr = ""
            foreign = []  # A list of lists, each of which is: [path1, path2, ..., leaf, value]
-            data = {}     # Local data dict
+            data: dict = {}     # Local data dict
            # Collect data (in dict relative to current level) and foreign data information
            for col_idx, value in enumerate(row):
                if col_idx in foreign_column_paths:
@@ -151,12 +151,11 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
        # print(f"Added sheet: {sheet.title}")
    def _get_parent_dict(self, parent_path: list[str], foreign: list[list]) -> dict:
-        """For a ``foreign`` specification, get the correct list from the current result-in-making.
+        """Return the dict into which values can be inserted.
+This method returns, from the current result-in-making, the entry at ``parent_path`` which matches
+the values given in the ``foreign`` specification.
 """
-        # if not foreign:
-        #     return self._result
        foreign_groups = _group_foreign_paths(foreign, common=parent_path)
        current_object = self._result

--- a/unittests/table_json_conversion/create_jsonschema.py
+++ b/unittests/table_json_conversion/create_jsonschema.py
@@ -20,17 +20,18 @@
 """
+from __future__ import annotations
 import argparse
 import json
-from typing import List
 import caosadvancedtools.json_schema_exporter as jsex
 from caosadvancedtools.models import parser
 # import tomli
-def prepare_datamodel(modelfile, recordtypes: List[str], outfile: str,
+def prepare_datamodel(modelfile, recordtypes: list[str], outfile: str,
-                      do_not_create: List[str] = None):
+                      do_not_create: list[str] = None):
    if do_not_create is None:
        do_not_create = []
    model = parser.parse_model_from_yaml(modelfile)

--- a/unittests/table_json_conversion/test_read_data.py
+++ b/unittests/table_json_conversion/test_read_data.py
@@ -23,16 +23,13 @@
 import json
 import os
 import re
-import tempfile
 from types import SimpleNamespace
-import jsonschema.exceptions as schema_exc
 import pytest
-import caosadvancedtools.table_json_conversion.convert as convert
+from caosadvancedtools.table_json_conversion import convert
-from openpyxl import load_workbook
-from .utils import compare_workbooks
+from .utils import assert_equal_jsons
 def rfp(*pathcomponents):
@@ -41,85 +38,42 @@ def rfp(*pathcomponents):
    return os.path.join(os.path.dirname(__file__), *pathcomponents)
-def fill_and_compare(json_file: str, template_file: str, known_good: str,
+def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str) -> dict:
-                     schema: str = None, custom_output: str = None):
+    """Convert an XLSX file and compare to a known result.
-    """Fill the data into a template and compare to a known good.
-Parameters:
+Returns
-----------
+-------
-schema: str, optional,
+json: dict
-  Json schema to validate against.
+  The result of the conversion.
-custom_output: str, optional
-  If given, write to this file and drop into an IPython shell.  For development only.
    """
-    with tempfile.TemporaryDirectory() as tmpdir:
+    result = convert.to_dict(xlsx=xlsx_file, schema=schema_file)
-        outfile = os.path.join(tmpdir, 'test.xlsx')
+    with open(known_good_file, encoding="utf-8") as myfile:
-        assert not os.path.exists(outfile)
+        expected = json.load(myfile)
-        if custom_output is not None:
+    assert_equal_jsons(result, expected)
-            outfile = custom_output
+    return result
-        # fill_template(data=json_file, template=template_file, result=outfile,
-        #               validation_schema=schema)
-        assert os.path.exists(outfile)
-        generated = load_workbook(outfile)  # workbook can be read
-    known_good_wb = load_workbook(known_good)
-    compare_workbooks(generated, known_good_wb)
-def _assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True,
-                        path: list = None) -> None:
-    """Compare two json objects for near equality.
-Raise an assertion exception if they are not equal."""
-    if path is None:
-        path = []
-    assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}"
-    if isinstance(json1, dict):
-        keys = set(json1.keys()).union(json2.keys())
-        for key in keys:
-            this_path = path + [key]
-            # Case 1: both exist
-            if key in json1 and key in json2:
-                el1 = json1[key]
-                el2 = json2[key]
-                assert type(el1) is type(el2), f"Type mismatch, path: {this_path}"
-                if isinstance(el1, (dict, list)):
-                    # Iterables: Recursion
-                    _assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
-                                        path=this_path)
-                    continue
-                assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}"
-                continue
-            # Case 2: only one exists
-            existing = json1.get(key, json2.get(key))
-            assert (allow_none and existing is None) or (allow_empty and existing == []), (
-                f"Element at path {this_path} is None or empty in one json and does not exist in "
-                "the other.")
-        return
-    assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}"
-    assert len(json1) == len(json2), f"Lists must have equal length, path: {path}"
-    for idx, (el1, el2) in enumerate(zip(json1, json2)):
-        this_path = path + [idx]
-        if isinstance(el1, dict):
-            _assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
-                                path=this_path)
-        else:
-            assert el1 == el2
 def test_conversions():
-    result = convert.to_dict(xlsx=rfp("data/simple_data.xlsx"), schema=rfp("data/simple_schema.json"))
+    """Test conversion from XLSX to JSON."""
-    expected = json.load(open(rfp("data/simple_data.json")))
+    convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"),
-    # result = convert.to_dict(xlsx=rfp("data/multiple_refs_data.xlsx"),
+                        schema_file=rfp("data/simple_schema.json"),
-    #                          schema=rfp("data/multiple_refs_schema.json"))
+                        known_good_file=rfp("data/simple_data.json"))
-    # expected = json.load(open(rfp("data/multiple_refs_data.json")))
+    convert_and_compare(xlsx_file=rfp("data/multiple_refs_data.xlsx"),
-    # breakpoint()
+                        schema_file=rfp("data/multiple_refs_schema.json"),
-    _assert_equal_jsons(result, expected)
+                        known_good_file=rfp("data/multiple_refs_data.json"))
-    # breakpoint()
+    convert_and_compare(xlsx_file=rfp("data/indirect_data.xlsx"),
-    # conv = XLSXConverter(schema=rfp("data/simple_schema.json"))
+                        schema_file=rfp("data/indirect_schema.json"),
-    # result = conv.to_dict(rfp("data/simple_template.xlsx"))
+                        known_good_file=rfp("data/indirect_data.json"))
+    convert_and_compare(xlsx_file=rfp("data/simple_data_ascii_chars.xlsx"),
+                        schema_file=rfp("data/simple_schema.json"),
+                        known_good_file=rfp("data/simple_data_ascii_chars.json"))
+    convert_and_compare(xlsx_file=rfp("data/multiple_choice_data.xlsx"),
+                        schema_file=rfp("data/multiple_choice_schema.json"),
+                        known_good_file=rfp("data/multiple_choice_data.json"))
 def test_set_in_nested():
+    """Test the ``_set_in_nested`` function."""
    set_in_nested = convert._set_in_nested  # pylint: disable=protected-access
    test_data_in = [
@@ -158,6 +112,7 @@ def test_set_in_nested():
 def test_group_foreign_paths():
+    """Test the ``_group_foreign_paths`` function."""
    group = convert._group_foreign_paths  # pylint: disable=protected-access
    foreign = [

--- a/unittests/table_json_conversion/test_table_template_generator.py
+++ b/unittests/table_json_conversion/test_table_template_generator.py
@@ -22,7 +22,6 @@
 import json
 import os
 import tempfile
-from typing import Tuple
 import pytest
 from caosadvancedtools.table_json_conversion.table_generator import XLSXTemplateGenerator
@@ -41,7 +40,7 @@ def rfp(*pathcomponents):
 def _compare_generated_to_known_good(schema_file: str, known_good: str, foreign_keys: dict = None,
-                                     outfile: str = None) -> Tuple:
+                                     outfile: str = None) -> tuple:
    """Generate an XLSX from the schema, then compare to known good output.
 Returns

--- a/unittests/table_json_conversion/test_test_utils.py
+++ b/unittests/table_json_conversion/test_test_utils.py
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""Testing the ``utils`` module in this folder."""
+from .utils import _is_recursively_none
+def test_recursively_none():
+    """Testing ``_is_recursively_none``."""
+    assert _is_recursively_none(None)
+    assert _is_recursively_none([])
+    assert _is_recursively_none({})
+    assert _is_recursively_none([None])
+    assert _is_recursively_none({"a": None})
+    assert _is_recursively_none([[], [None, None]])
+    assert _is_recursively_none({1: [], 2: [None], 3: {"3.1": None}, 4: {"4.1": [None]}})
+    assert not _is_recursively_none(1)
+    assert not _is_recursively_none([1])
+    assert not _is_recursively_none({1: 2})
+    assert not _is_recursively_none([[1]])
+    assert not _is_recursively_none({"a": None, "b": "b"})
+    assert not _is_recursively_none([[], [None, 2]])
+    assert not _is_recursively_none({1: [], 2: [None], 3: {"3.1": 3.141}, 4: {"4.1": [None]}})
--- a/unittests/table_json_conversion/utils.py
+++ b/unittests/table_json_conversion/utils.py
+# encoding: utf-8
+#
 # This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
@@ -19,9 +21,53 @@
 """Utilities for the tests.
 """
+from typing import Iterable, Union
 from openpyxl import Workbook
+def assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True,
+                       path: list = None) -> None:
+    """Compare two json objects for near equality.
+Raise an assertion exception if they are not equal."""
+    if path is None:
+        path = []
+    assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}"
+    if isinstance(json1, dict):
+        keys = set(json1.keys()).union(json2.keys())
+        for key in keys:
+            this_path = path + [key]
+            # Case 1: exists in both collections
+            if key in json1 and key in json2:
+                el1 = json1[key]
+                el2 = json2[key]
+                assert type(el1) is type(el2), f"Type mismatch, path: {this_path}"
+                if isinstance(el1, (dict, list)):
+                    # Iterables: Recursion
+                    assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
+                                       path=this_path)
+                    continue
+                assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}"
+                continue
+            # Case 2: exists only in one collection
+            existing = json1.get(key, json2.get(key))
+            assert ((allow_none and _is_recursively_none(existing))
+                    or (allow_empty and existing == [])), (
+                f"Element at path {this_path} is None or empty in one json and does not exist in "
+                "the other.")
+        return
+    assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}"
+    assert len(json1) == len(json2), f"Lists must have equal length, path: {path}"
+    for idx, (el1, el2) in enumerate(zip(json1, json2)):
+        this_path = path + [idx]
+        if isinstance(el1, dict):
+            assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
+                               path=this_path)
+        else:
+            assert el1 == el2
 def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True):
    """Compare two workbooks for equal content.
@@ -52,3 +98,19 @@ hidden: bool, optional
                    f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n"
                    f"{cell1.value}\n{cell2.value}"
                )
+def _is_recursively_none(obj: Union[list, dict] = None):
+    """Test if ``obj`` is None or recursively consists only of None-like objects."""
+    if obj is None:
+        return True
+    if isinstance(obj, (list, dict)):
+        if isinstance(obj, list):
+            mylist: Iterable = obj
+        else:
+            mylist = obj.values()
+        for element in mylist:
+            if not _is_recursively_none(element):
+                return False
+        return True
+    return False