WIP: Filling XLSX

0dadcca6 · Daniel Hornung · b10f9376 · 0dadcca6 · 0dadcca6 · 0dadcca6
Verified Commit 0dadcca6 authored 1 year ago by Daniel Hornung
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -121,10 +121,16 @@ def _next_row_index(sheet: Worksheet) -> int:


 class TemplateFiller:
+    """Class to fill XLSX templates.  Has an index for all relevant columns."""
+
    def __init__(self, workbook: Workbook):
        self._workbook = workbook
        self._create_index()
-        self._context: Optional[dict] = None
+        self._context: Optional[dict[str, Any]] = None
+
+    @property
+    def workbook(self):
+        return self._workbook

    def fill_data(self, data: dict):
        """Fill the data into the workbook."""
@@ -194,6 +200,7 @@ out: union[dict, None]
        """
        if current_path is None:
            current_path = []
+        assert self._context is not None
        insertables: Dict[str, Any] = {}
        for name, content in data.items():
            path = current_path + [name]
@@ -248,7 +255,6 @@ out: union[dict, None]
                insert_row = _next_row_index(sheet)

            sheet.cell(row=insert_row+1, column=col_index+1, value=value)
-            # self._handle_simple_data(data=content, current_path=path)

        # Insert foreign keys
        if insert_row is not None and sheet is not None and _is_exploded_sheet(sheet):
@@ -284,27 +290,9 @@ result: str
        data = json.load(data)
    else:
        raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
+    assert isinstance(data, dict)
+
    result_wb = load_workbook(template)
    template_filler = TemplateFiller(result_wb)
-    # For each top level key in the json we iterate the values (if it is an array). Those are the
-    # root elements that belong to a particular sheet.
-    #       After treating a root element, the row index for the corresponding sheet needs to be
-    #       increased
-    #       When we finished treating an object that goes into a lower ranked sheet (see below), we
-    #       increase the row index of that sheet.
-    #
-    # We can generate a hierarchy of sheets in the beginning (using the paths). The lower sheets
-    # are for objects referenced by objects in higher ranked sheets.
-    # We can detect the sheet corresponding to a root element by looking at the first path element:
-    # The first path element must be the root element every where.
-    # Suggestion:
-    # row indices: Dict[str, int] string is the sheet name
-    # sheet_hirarchy: List[Tuple[str]]  elements are sheet names
    template_filler.fill_data(data=data)
-
-    # Question:
-    # We can create an internal representation where we assign as sheet_names the same names that
-    # are used in table generator. Or should we create another special row that contains this
-    # somehow?
-
    result_wb.save(result)
--- a/unittests/table_json_conversion/__init__.py
+++ b/unittests/table_json_conversion/__init__.py
--- a/unittests/table_json_conversion/example_single.json
+++ b/unittests/table_json_conversion/example_single.json
+{
+  "Training": {
+    "date": "2023-01-01",
+    "url": "www.indiscale.com",
+    "coach": [
+      {
+        "family_name": "Sky",
+        "given_name": "Max",
+        "Organisation": "ECB"
+      },
+      {
+        "family_name": "Sky",
+        "given_name": "Min",
+        "Organisation": "ECB"
+      }
+    ],
+    "supervisor": {
+      "family_name": "Steve",
+      "given_name": "Stevie",
+            "Organisation": "IMF"
+    },
+    "duration": 1.0,
+    "participants": 1,
+    "subjects": ["Math", "Physics"],
+    "remote": false
+  },
+  "Person": {
+    "family_name": "Steve",
+    "given_name": "Stevie",
+    "Organisation": "IMF"
+  }
+}
--- a/unittests/table_json_conversion/example_single_data.xlsx
+++ b/unittests/table_json_conversion/example_single_data.xlsx
--- a/unittests/table_json_conversion/test_fill_xlsx.py
+++ b/unittests/table_json_conversion/test_fill_xlsx.py
@@ -26,6 +26,8 @@ from caosadvancedtools.table_json_conversion.fill_xlsx import (
    _get_path_rows, _get_row_type_column_index, fill_template)
 from openpyxl import load_workbook

+from .utils import compare_workbooks
+

 def rfp(*pathcomponents):
    """
@@ -35,6 +37,31 @@ def rfp(*pathcomponents):
    return os.path.join(os.path.dirname(__file__), *pathcomponents)


+def fill_and_compare(json_file: str, template_file: str, known_good: str,
+                     custom_output: str = None):
+    """Fill the data into a template and compare to a known good.
+
+Parameters:
+-----------
+
+custom_output: str, optional
+  If given, write to this file and drop into an IPython shell.  For development only.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        outfile = os.path.join(tmpdir, 'test.xlsx')
+        assert not os.path.exists(outfile)
+        if custom_output is not None:
+            outfile = custom_output
+        fill_template(data=json_file, template=template_file, result=outfile)
+        assert os.path.exists(outfile)
+        generated = load_workbook(outfile)  # workbook can be read
+    known_good_wb = load_workbook(known_good)
+    if custom_output is not None:
+        from IPython import embed
+        embed()
+    compare_workbooks(generated, known_good_wb)
+
+
 def test_detect():
    example = load_workbook(rfp("example_template.xlsx"))
    assert 0 == _get_row_type_column_index(example['Person'])
@@ -42,8 +69,5 @@ def test_detect():


 def test_fill_xlsx():
-    path = os.path.join(tempfile.mkdtemp(), 'test.xlsx')
-    assert not os.path.exists(path)
-    fill_template(data=rfp('example.json'), template=rfp('example_template.xlsx'), result=path)
-    assert os.path.exists(path)
-    generated = load_workbook(path)  # workbook can be read
+    fill_and_compare(json_file="example_single.json", template_file="example_template.xlsx",
+                     known_good="example_single_data.xlsx")
--- a/unittests/table_json_conversion/test_table_template_generator.py
+++ b/unittests/table_json_conversion/test_table_template_generator.py
@@ -29,6 +29,8 @@ from caosadvancedtools.table_json_conversion.table_generator import (
    ColumnType, XLSXTemplateGenerator)
 from openpyxl import load_workbook

+from .utils import compare_workbooks
+

 def rfp(*pathcomponents):
    """
@@ -52,30 +54,19 @@ out: tuple
        foreign_keys = {}
    with open(schema_file, encoding="utf-8") as schema_input:
        schema = json.load(schema_input)
-    if outfile is None:
-        outpath = os.path.join(tempfile.mkdtemp(), 'generated.xlsx')
-    else:
-        outpath = outfile
-    assert not os.path.exists(outpath)
-    generator.generate(schema=schema,
-                       foreign_keys=foreign_keys,
-                       filepath=outpath)
-    assert os.path.exists(outpath)
-    generated = load_workbook(outpath)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        if outfile is None:
+            outpath = os.path.join(tmpdir, 'generated.xlsx')
+        else:
+            outpath = outfile
+        assert not os.path.exists(outpath)
+        generator.generate(schema=schema,
+                           foreign_keys=foreign_keys,
+                           filepath=outpath)
+        assert os.path.exists(outpath)
+        generated = load_workbook(outpath)
    good = load_workbook(known_good)
-    assert generated.sheetnames == good.sheetnames
-    for sheetname in good.sheetnames:
-        gen_sheet = generated[sheetname]
-        good_sheet = good[sheetname]
-        for irow, (erow, grow) in enumerate(zip(good_sheet.iter_rows(), gen_sheet.iter_rows())):
-            assert (good_sheet.row_dimensions[irow].hidden
-                    == gen_sheet.row_dimensions[irow].hidden), f"row: {sheetname}, {irow}"
-            for icol, (ecol, gcol) in enumerate(zip(erow, grow)):
-                assert (good_sheet.column_dimensions[ecol.column_letter].hidden
-                        == gen_sheet.column_dimensions[ecol.column_letter].hidden), (
-                            f"col: {sheetname}, {icol}")
-                cell = gen_sheet.cell(irow+1, icol+1)
-                assert ecol.value == gcol.value, f"Sheet: {sheetname}, cell: {cell.coordinate}"
+    compare_workbooks(generated, good)
    return generated, good



--- a/unittests/table_json_conversion/utils.py
+++ b/unittests/table_json_conversion/utils.py
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Utilities for the tests.
+"""
+
+from openpyxl import Workbook
+
+
+def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True):
+    """Compare two workbooks for equal content.
+
+Raises an error if differences are found.
+
+Parameters
+----------
+
+hidden: bool, optional
+  Test if the "hidden" status of rows and columns is the same.
+    """
+    assert wb1.sheetnames == wb2.sheetnames, "Sheet names are different."
+    for sheetname in wb2.sheetnames:
+        sheet_1 = wb1[sheetname]
+        sheet_2 = wb2[sheetname]
+        for irow, (row1, row2) in enumerate(zip(sheet_1.iter_rows(), sheet_2.iter_rows())):
+            if hidden:
+                assert (sheet_1.row_dimensions[irow].hidden
+                        == sheet_2.row_dimensions[irow].hidden), f"hidden row: {sheetname}, {irow}"
+            for icol, (cell1, cell2) in enumerate(zip(row1, row2)):
+                if hidden:
+                    assert (sheet_1.column_dimensions[cell1.column_letter].hidden
+                            == sheet_2.column_dimensions[cell2.column_letter].hidden), (
+                                f"hidden col: {sheetname}, {icol}")
+                assert cell1.value == cell2.value, (
+                    f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n"
+                    f"{cell1.value}\n{cell2.value}"
+                )