Skip to content
Snippets Groups Projects
Verified Commit 955a7e14 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

MAINT, TEST: Misc. styling and linting, docs, and tests.

Also more WIP for XLSX conversion.
parent 2ca96738
No related branches found
No related tags found
2 merge requests!107Release v0.11.0,!102ENH: XLSX reader
...@@ -84,7 +84,7 @@ out: dict ...@@ -84,7 +84,7 @@ out: dict
self._result = {} self._result = {}
for sheetname in self._workbook.sheetnames: for sheetname in self._workbook.sheetnames:
if sheetname not in self._handled_sheets: if sheetname not in self._handled_sheets:
self._handle_sheet(self._workbook.get_sheet_by_name(sheetname)) self._handle_sheet(self._workbook[sheetname])
return self._result return self._result
def _handle_sheet(self, sheet: Worksheet) -> None: def _handle_sheet(self, sheet: Worksheet) -> None:
...@@ -108,7 +108,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na ...@@ -108,7 +108,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
if parent: if parent:
parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index) parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index)
if parent_sheetname not in self._handled_sheets: if parent_sheetname not in self._handled_sheets:
self._handle_sheet(self._workbook.get_sheet_by_name(parent_sheetname)) self._handle_sheet(self._workbook[parent_sheetname])
# # We save single entries in lists, indexed by their foreign key contents. Each entry # # We save single entries in lists, indexed by their foreign key contents. Each entry
# # consists of: # # consists of:
...@@ -122,7 +122,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na ...@@ -122,7 +122,7 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
continue continue
foreign_repr = "" foreign_repr = ""
foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value] foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value]
data = {} # Local data dict data: dict = {} # Local data dict
# Collect data (in dict relative to current level) and foreign data information # Collect data (in dict relative to current level) and foreign data information
for col_idx, value in enumerate(row): for col_idx, value in enumerate(row):
if col_idx in foreign_column_paths: if col_idx in foreign_column_paths:
...@@ -151,12 +151,11 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na ...@@ -151,12 +151,11 @@ Look at ``xlsx_utils.get_path_position`` for the specification of the "proper na
# print(f"Added sheet: {sheet.title}") # print(f"Added sheet: {sheet.title}")
def _get_parent_dict(self, parent_path: list[str], foreign: list[list]) -> dict: def _get_parent_dict(self, parent_path: list[str], foreign: list[list]) -> dict:
"""For a ``foreign`` specification, get the correct list from the current result-in-making. """Return the dict into which values can be inserted.
This method returns, from the current result-in-making, the entry at ``parent_path`` which matches
the values given in the ``foreign`` specification.
""" """
# if not foreign:
# return self._result
foreign_groups = _group_foreign_paths(foreign, common=parent_path) foreign_groups = _group_foreign_paths(foreign, common=parent_path)
current_object = self._result current_object = self._result
......
...@@ -20,17 +20,18 @@ ...@@ -20,17 +20,18 @@
""" """
from __future__ import annotations
import argparse import argparse
import json import json
from typing import List
import caosadvancedtools.json_schema_exporter as jsex import caosadvancedtools.json_schema_exporter as jsex
from caosadvancedtools.models import parser from caosadvancedtools.models import parser
# import tomli # import tomli
def prepare_datamodel(modelfile, recordtypes: List[str], outfile: str, def prepare_datamodel(modelfile, recordtypes: list[str], outfile: str,
do_not_create: List[str] = None): do_not_create: list[str] = None):
if do_not_create is None: if do_not_create is None:
do_not_create = [] do_not_create = []
model = parser.parse_model_from_yaml(modelfile) model = parser.parse_model_from_yaml(modelfile)
......
...@@ -23,16 +23,13 @@ ...@@ -23,16 +23,13 @@
import json import json
import os import os
import re import re
import tempfile
from types import SimpleNamespace from types import SimpleNamespace
import jsonschema.exceptions as schema_exc
import pytest import pytest
import caosadvancedtools.table_json_conversion.convert as convert from caosadvancedtools.table_json_conversion import convert
from openpyxl import load_workbook
from .utils import compare_workbooks from .utils import assert_equal_jsons
def rfp(*pathcomponents): def rfp(*pathcomponents):
...@@ -41,85 +38,42 @@ def rfp(*pathcomponents): ...@@ -41,85 +38,42 @@ def rfp(*pathcomponents):
return os.path.join(os.path.dirname(__file__), *pathcomponents) return os.path.join(os.path.dirname(__file__), *pathcomponents)
def fill_and_compare(json_file: str, template_file: str, known_good: str, def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str) -> dict:
schema: str = None, custom_output: str = None): """Convert an XLSX file and compare to a known result.
"""Fill the data into a template and compare to a known good.
Parameters: Returns
----------- -------
schema: str, optional, json: dict
Json schema to validate against. The result of the conversion.
custom_output: str, optional
If given, write to this file and drop into an IPython shell. For development only.
""" """
with tempfile.TemporaryDirectory() as tmpdir: result = convert.to_dict(xlsx=xlsx_file, schema=schema_file)
outfile = os.path.join(tmpdir, 'test.xlsx') with open(known_good_file, encoding="utf-8") as myfile:
assert not os.path.exists(outfile) expected = json.load(myfile)
if custom_output is not None: assert_equal_jsons(result, expected)
outfile = custom_output return result
# fill_template(data=json_file, template=template_file, result=outfile,
# validation_schema=schema)
assert os.path.exists(outfile)
generated = load_workbook(outfile) # workbook can be read
known_good_wb = load_workbook(known_good)
compare_workbooks(generated, known_good_wb)
def _assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True,
path: list = None) -> None:
"""Compare two json objects for near equality.
Raise an assertion exception if they are not equal."""
if path is None:
path = []
assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}"
if isinstance(json1, dict):
keys = set(json1.keys()).union(json2.keys())
for key in keys:
this_path = path + [key]
# Case 1: both exist
if key in json1 and key in json2:
el1 = json1[key]
el2 = json2[key]
assert type(el1) is type(el2), f"Type mismatch, path: {this_path}"
if isinstance(el1, (dict, list)):
# Iterables: Recursion
_assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
path=this_path)
continue
assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}"
continue
# Case 2: only one exists
existing = json1.get(key, json2.get(key))
assert (allow_none and existing is None) or (allow_empty and existing == []), (
f"Element at path {this_path} is None or empty in one json and does not exist in "
"the other.")
return
assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}"
assert len(json1) == len(json2), f"Lists must have equal length, path: {path}"
for idx, (el1, el2) in enumerate(zip(json1, json2)):
this_path = path + [idx]
if isinstance(el1, dict):
_assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
path=this_path)
else:
assert el1 == el2
def test_conversions(): def test_conversions():
result = convert.to_dict(xlsx=rfp("data/simple_data.xlsx"), schema=rfp("data/simple_schema.json")) """Test conversion from XLSX to JSON."""
expected = json.load(open(rfp("data/simple_data.json"))) convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"),
# result = convert.to_dict(xlsx=rfp("data/multiple_refs_data.xlsx"), schema_file=rfp("data/simple_schema.json"),
# schema=rfp("data/multiple_refs_schema.json")) known_good_file=rfp("data/simple_data.json"))
# expected = json.load(open(rfp("data/multiple_refs_data.json"))) convert_and_compare(xlsx_file=rfp("data/multiple_refs_data.xlsx"),
# breakpoint() schema_file=rfp("data/multiple_refs_schema.json"),
_assert_equal_jsons(result, expected) known_good_file=rfp("data/multiple_refs_data.json"))
# breakpoint() convert_and_compare(xlsx_file=rfp("data/indirect_data.xlsx"),
# conv = XLSXConverter(schema=rfp("data/simple_schema.json")) schema_file=rfp("data/indirect_schema.json"),
# result = conv.to_dict(rfp("data/simple_template.xlsx")) known_good_file=rfp("data/indirect_data.json"))
convert_and_compare(xlsx_file=rfp("data/simple_data_ascii_chars.xlsx"),
schema_file=rfp("data/simple_schema.json"),
known_good_file=rfp("data/simple_data_ascii_chars.json"))
convert_and_compare(xlsx_file=rfp("data/multiple_choice_data.xlsx"),
schema_file=rfp("data/multiple_choice_schema.json"),
known_good_file=rfp("data/multiple_choice_data.json"))
def test_set_in_nested(): def test_set_in_nested():
"""Test the ``_set_in_nested`` function."""
set_in_nested = convert._set_in_nested # pylint: disable=protected-access set_in_nested = convert._set_in_nested # pylint: disable=protected-access
test_data_in = [ test_data_in = [
...@@ -158,6 +112,7 @@ def test_set_in_nested(): ...@@ -158,6 +112,7 @@ def test_set_in_nested():
def test_group_foreign_paths(): def test_group_foreign_paths():
"""Test the ``_group_foreign_paths`` function."""
group = convert._group_foreign_paths # pylint: disable=protected-access group = convert._group_foreign_paths # pylint: disable=protected-access
foreign = [ foreign = [
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
import json import json
import os import os
import tempfile import tempfile
from typing import Tuple
import pytest import pytest
from caosadvancedtools.table_json_conversion.table_generator import XLSXTemplateGenerator from caosadvancedtools.table_json_conversion.table_generator import XLSXTemplateGenerator
...@@ -41,7 +40,7 @@ def rfp(*pathcomponents): ...@@ -41,7 +40,7 @@ def rfp(*pathcomponents):
def _compare_generated_to_known_good(schema_file: str, known_good: str, foreign_keys: dict = None, def _compare_generated_to_known_good(schema_file: str, known_good: str, foreign_keys: dict = None,
outfile: str = None) -> Tuple: outfile: str = None) -> tuple:
"""Generate an XLSX from the schema, then compare to known good output. """Generate an XLSX from the schema, then compare to known good output.
Returns Returns
......
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Testing the ``utils`` module in this folder."""
from .utils import _is_recursively_none
def test_recursively_none():
"""Testing ``_is_recursively_none``."""
assert _is_recursively_none(None)
assert _is_recursively_none([])
assert _is_recursively_none({})
assert _is_recursively_none([None])
assert _is_recursively_none({"a": None})
assert _is_recursively_none([[], [None, None]])
assert _is_recursively_none({1: [], 2: [None], 3: {"3.1": None}, 4: {"4.1": [None]}})
assert not _is_recursively_none(1)
assert not _is_recursively_none([1])
assert not _is_recursively_none({1: 2})
assert not _is_recursively_none([[1]])
assert not _is_recursively_none({"a": None, "b": "b"})
assert not _is_recursively_none([[], [None, 2]])
assert not _is_recursively_none({1: [], 2: [None], 3: {"3.1": 3.141}, 4: {"4.1": [None]}})
# encoding: utf-8
#
# This file is a part of the LinkAhead Project. # This file is a part of the LinkAhead Project.
# #
# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> # Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
...@@ -19,9 +21,53 @@ ...@@ -19,9 +21,53 @@
"""Utilities for the tests. """Utilities for the tests.
""" """
from typing import Iterable, Union
from openpyxl import Workbook from openpyxl import Workbook
def assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True,
path: list = None) -> None:
"""Compare two json objects for near equality.
Raise an assertion exception if they are not equal."""
if path is None:
path = []
assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}"
if isinstance(json1, dict):
keys = set(json1.keys()).union(json2.keys())
for key in keys:
this_path = path + [key]
# Case 1: exists in both collections
if key in json1 and key in json2:
el1 = json1[key]
el2 = json2[key]
assert type(el1) is type(el2), f"Type mismatch, path: {this_path}"
if isinstance(el1, (dict, list)):
# Iterables: Recursion
assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
path=this_path)
continue
assert el1 == el2, f"Values at path {this_path} are not equal:\n{el1},\n{el2}"
continue
# Case 2: exists only in one collection
existing = json1.get(key, json2.get(key))
assert ((allow_none and _is_recursively_none(existing))
or (allow_empty and existing == [])), (
f"Element at path {this_path} is None or empty in one json and does not exist in "
"the other.")
return
assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}"
assert len(json1) == len(json2), f"Lists must have equal length, path: {path}"
for idx, (el1, el2) in enumerate(zip(json1, json2)):
this_path = path + [idx]
if isinstance(el1, dict):
assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
path=this_path)
else:
assert el1 == el2
def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True): def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True):
"""Compare two workbooks for equal content. """Compare two workbooks for equal content.
...@@ -52,3 +98,19 @@ hidden: bool, optional ...@@ -52,3 +98,19 @@ hidden: bool, optional
f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n" f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n"
f"{cell1.value}\n{cell2.value}" f"{cell1.value}\n{cell2.value}"
) )
def _is_recursively_none(obj: Union[list, dict] = None):
"""Test if ``obj`` is None or recursively consists only of None-like objects."""
if obj is None:
return True
if isinstance(obj, (list, dict)):
if isinstance(obj, list):
mylist: Iterable = obj
else:
mylist = obj.values()
for element in mylist:
if not _is_recursively_none(element):
return False
return True
return False
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment