Skip to content
Snippets Groups Projects
Verified Commit 0dadcca6 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

WIP: Filling XLSX

parent b10f9376
No related branches found
No related tags found
2 merge requests!100WIP: Filling XLSX: Seems to be working.,!93Filling XLSX: Everything except multiple choice.
Pipeline #48226 failed
......@@ -121,10 +121,16 @@ def _next_row_index(sheet: Worksheet) -> int:
class TemplateFiller:
"""Class to fill XLSX templates. Has an index for all relevant columns."""
def __init__(self, workbook: Workbook):
self._workbook = workbook
self._create_index()
self._context: Optional[dict] = None
self._context: Optional[dict[str, Any]] = None
@property
def workbook(self):
return self._workbook
def fill_data(self, data: dict):
"""Fill the data into the workbook."""
......@@ -194,6 +200,7 @@ out: union[dict, None]
"""
if current_path is None:
current_path = []
assert self._context is not None
insertables: Dict[str, Any] = {}
for name, content in data.items():
path = current_path + [name]
......@@ -248,7 +255,6 @@ out: union[dict, None]
insert_row = _next_row_index(sheet)
sheet.cell(row=insert_row+1, column=col_index+1, value=value)
# self._handle_simple_data(data=content, current_path=path)
# Insert foreign keys
if insert_row is not None and sheet is not None and _is_exploded_sheet(sheet):
......@@ -284,27 +290,9 @@ result: str
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
assert isinstance(data, dict)
result_wb = load_workbook(template)
template_filler = TemplateFiller(result_wb)
# For each top level key in the json we iterate the values (if it is an array). Those are the
# root elements that belong to a particular sheet.
# After treating a root element, the row index for the corresponding sheet needs to be
# increased
# When we finished treating an object that goes into a lower ranked sheet (see below), we
# increase the row index of that sheet.
#
# We can generate a hierarchy of sheets in the beginning (using the paths). The lower sheets
# are for objects referenced by objects in higher ranked sheets.
# We can detect the sheet corresponding to a root element by looking at the first path element:
# The first path element must be the root element every where.
# Suggestion:
# row indices: Dict[str, int] string is the sheet name
# sheet_hirarchy: List[Tuple[str]] elements are sheet names
template_filler.fill_data(data=data)
# Question:
# We can create an internal representation where we assign as sheet_names the same names that
# are used in table generator. Or should we create another special row that contains this
# somehow?
result_wb.save(result)
{
"Training": {
"date": "2023-01-01",
"url": "www.indiscale.com",
"coach": [
{
"family_name": "Sky",
"given_name": "Max",
"Organisation": "ECB"
},
{
"family_name": "Sky",
"given_name": "Min",
"Organisation": "ECB"
}
],
"supervisor": {
"family_name": "Steve",
"given_name": "Stevie",
"Organisation": "IMF"
},
"duration": 1.0,
"participants": 1,
"subjects": ["Math", "Physics"],
"remote": false
},
"Person": {
"family_name": "Steve",
"given_name": "Stevie",
"Organisation": "IMF"
}
}
File added
......@@ -26,6 +26,8 @@ from caosadvancedtools.table_json_conversion.fill_xlsx import (
_get_path_rows, _get_row_type_column_index, fill_template)
from openpyxl import load_workbook
from .utils import compare_workbooks
def rfp(*pathcomponents):
"""
......@@ -35,6 +37,31 @@ def rfp(*pathcomponents):
return os.path.join(os.path.dirname(__file__), *pathcomponents)
def fill_and_compare(json_file: str, template_file: str, known_good: str,
custom_output: str = None):
"""Fill the data into a template and compare to a known good.
Parameters:
-----------
custom_output: str, optional
If given, write to this file and drop into an IPython shell. For development only.
"""
with tempfile.TemporaryDirectory() as tmpdir:
outfile = os.path.join(tmpdir, 'test.xlsx')
assert not os.path.exists(outfile)
if custom_output is not None:
outfile = custom_output
fill_template(data=json_file, template=template_file, result=outfile)
assert os.path.exists(outfile)
generated = load_workbook(outfile) # workbook can be read
known_good_wb = load_workbook(known_good)
if custom_output is not None:
from IPython import embed
embed()
compare_workbooks(generated, known_good_wb)
def test_detect():
example = load_workbook(rfp("example_template.xlsx"))
assert 0 == _get_row_type_column_index(example['Person'])
......@@ -42,8 +69,5 @@ def test_detect():
def test_fill_xlsx():
path = os.path.join(tempfile.mkdtemp(), 'test.xlsx')
assert not os.path.exists(path)
fill_template(data=rfp('example.json'), template=rfp('example_template.xlsx'), result=path)
assert os.path.exists(path)
generated = load_workbook(path) # workbook can be read
fill_and_compare(json_file="example_single.json", template_file="example_template.xlsx",
known_good="example_single_data.xlsx")
......@@ -29,6 +29,8 @@ from caosadvancedtools.table_json_conversion.table_generator import (
ColumnType, XLSXTemplateGenerator)
from openpyxl import load_workbook
from .utils import compare_workbooks
def rfp(*pathcomponents):
"""
......@@ -52,30 +54,19 @@ out: tuple
foreign_keys = {}
with open(schema_file, encoding="utf-8") as schema_input:
schema = json.load(schema_input)
if outfile is None:
outpath = os.path.join(tempfile.mkdtemp(), 'generated.xlsx')
else:
outpath = outfile
assert not os.path.exists(outpath)
generator.generate(schema=schema,
foreign_keys=foreign_keys,
filepath=outpath)
assert os.path.exists(outpath)
generated = load_workbook(outpath)
with tempfile.TemporaryDirectory() as tmpdir:
if outfile is None:
outpath = os.path.join(tmpdir, 'generated.xlsx')
else:
outpath = outfile
assert not os.path.exists(outpath)
generator.generate(schema=schema,
foreign_keys=foreign_keys,
filepath=outpath)
assert os.path.exists(outpath)
generated = load_workbook(outpath)
good = load_workbook(known_good)
assert generated.sheetnames == good.sheetnames
for sheetname in good.sheetnames:
gen_sheet = generated[sheetname]
good_sheet = good[sheetname]
for irow, (erow, grow) in enumerate(zip(good_sheet.iter_rows(), gen_sheet.iter_rows())):
assert (good_sheet.row_dimensions[irow].hidden
== gen_sheet.row_dimensions[irow].hidden), f"row: {sheetname}, {irow}"
for icol, (ecol, gcol) in enumerate(zip(erow, grow)):
assert (good_sheet.column_dimensions[ecol.column_letter].hidden
== gen_sheet.column_dimensions[ecol.column_letter].hidden), (
f"col: {sheetname}, {icol}")
cell = gen_sheet.cell(irow+1, icol+1)
assert ecol.value == gcol.value, f"Sheet: {sheetname}, cell: {cell.coordinate}"
compare_workbooks(generated, good)
return generated, good
......
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Utilities for the tests.
"""
from openpyxl import Workbook
def compare_workbooks(wb1: Workbook, wb2: Workbook, hidden: bool = True):
"""Compare two workbooks for equal content.
Raises an error if differences are found.
Parameters
----------
hidden: bool, optional
Test if the "hidden" status of rows and columns is the same.
"""
assert wb1.sheetnames == wb2.sheetnames, "Sheet names are different."
for sheetname in wb2.sheetnames:
sheet_1 = wb1[sheetname]
sheet_2 = wb2[sheetname]
for irow, (row1, row2) in enumerate(zip(sheet_1.iter_rows(), sheet_2.iter_rows())):
if hidden:
assert (sheet_1.row_dimensions[irow].hidden
== sheet_2.row_dimensions[irow].hidden), f"hidden row: {sheetname}, {irow}"
for icol, (cell1, cell2) in enumerate(zip(row1, row2)):
if hidden:
assert (sheet_1.column_dimensions[cell1.column_letter].hidden
== sheet_2.column_dimensions[cell2.column_letter].hidden), (
f"hidden col: {sheetname}, {icol}")
assert cell1.value == cell2.value, (
f"Sheet: {sheetname}, cell: {cell1.coordinate}, Values: \n"
f"{cell1.value}\n{cell2.value}"
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment