Skip to content
Snippets Groups Projects
Verified Commit 2dc4f469 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

WIP: Filling XLSX

parent b1573034
No related branches found
No related tags found
2 merge requests!100WIP: Filling XLSX: Seems to be working.,!93Filling XLSX: Everything except multiple choice.
Pipeline #48177 passed
......@@ -5,6 +5,7 @@
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
......@@ -19,6 +20,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import json
from types import SimpleNamespace
from typing import List, Union, TextIO
from openpyxl import load_workbook
from .table_generator import ColumnType, RowType
......@@ -34,7 +39,7 @@ def _fill_leaves(json_doc: dict, workbook):
workbook.cell(1, 2, el)
def _get_row_type_column(worksheet):
def _get_row_type_column_index(worksheet):
for col in worksheet.columns:
for cell in col:
if cell.value == RowType.COL_TYPE.name:
......@@ -44,7 +49,7 @@ def _get_row_type_column(worksheet):
def _get_path_rows(worksheet):
rows = []
rt_col = _get_row_type_column(worksheet)
rt_col = _get_row_type_column_index(worksheet)
for cell in list(worksheet.columns)[rt_col-1]:
print(cell.value)
if cell.value == RowType.PATH.name:
......@@ -53,18 +58,128 @@ def _get_path_rows(worksheet):
def _generate_path_col_mapping(workbook):
rt_col = _get_row_type_column(workbook)
rt_col = _get_row_type_column_index(workbook)
for col in workbook.columns:
pass
def fill_template(template_path: str, json_path: str, result_path: str) -> None:
"""
Fill the contents of the JSON document stored at ``json_path`` into the template stored at
``template_path`` and store the result under ``result_path``.
"""
template = load_workbook(template_path)
class TemplateFiller:
def __init__(self, workbook):
self._workbook = workbook
self._create_index()
def fill_data(self, data: dict):
"""Fill the data into the workbook."""
self._handle_data(data=data, current_path=[])
def _create_index(self, ):
"""Create a sheet index for the workbook.
Index the sheets by their relevant path array. Also create a simple column index by column
type and path.
"""
self._sheet_index = {}
for sheetname in self._workbook.sheetnames:
sheet = self._workbook[sheetname]
type_column = [x.value for x in list(sheet.columns)[
_get_row_type_column_index(sheet) - 1]]
# 0-indexed, as everything outside of sheet.cell(...):
coltype_idx = type_column.index(RowType.COL_TYPE.name)
path_indices = [i for i, typ in enumerate(type_column) if typ == RowType.PATH.name]
# Get the paths, use without the leaf component for sheet indexing, with type prefix and
# leaf for column indexing.
paths = []
col_index = {}
for col_idx, col in enumerate(sheet.columns):
if col[coltype_idx].value == RowType.COL_TYPE.name:
continue
path = []
for path_idx in path_indices:
if col[path_idx].value is not None:
path.append(col[path_idx].value)
col_key = ".".join([col[coltype_idx].value] + path)
col_index[col_key] = SimpleNamespace(column=col, col_index=col_idx)
if col[coltype_idx].value not in [ColumnType.SCALAR.name, ColumnType.LIST.name]:
continue
paths.append(path[:-1])
# Find common components:
common_path = []
for idx, component in enumerate(paths[0]):
for path in paths:
if not path[idx] == component:
break
else:
common_path.append(component)
assert len(common_path) >= 1
self._sheet_index[".".join(common_path)] = SimpleNamespace(
common_path=common_path, sheetname=sheetname, sheet=sheet)
def _handle_data(self, data: dict, current_path: List[str] = None):
"""Handle the data and write it into ``workbook``.
"""
if current_path is None:
current_path = []
for name, content in data.items():
path = current_path + [name]
if isinstance(content, list):
if not content:
continue
assert len(set(type(entry) for entry in content)) == 1
if isinstance(content[0], dict):
# An array of objects: must go into exploded sheet
for entry in content:
self._handle_data(data=entry, current_path=path)
continue
for entry in content:
pass
else:
self._handle_single_data(data=content, current_path=path)
def _handle_single_data(self, data, current_path: List[str]):
"""Enter this single data item into the workbook.
"""
sheet = self._sheet_index[".".join(current_path)].sheet
for name, content in data.items():
if isinstance(content, list):
# TODO handle later
continue
if isinstance(content, dict):
pass
# from IPython import embed
# embed()
def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None:
"""Insert json data into an xlsx file, according to a template.
This function fills the json data into the template stored at ``template`` and stores the result as
``result``.
Parameters
----------
data: Union[dict, str, TextIO]
The data, given as Python dict, path to a file or a file-like object.
template: str
Path to the XLSX template.
result: str
Path for the result XLSX.
"""
if isinstance(data, dict):
pass
elif isinstance(data, str):
with open(data, encoding="utf-8") as infile:
data = json.load(infile)
elif hasattr(data, "read"):
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
result_wb = load_workbook(template)
template_filler = TemplateFiller(result_wb)
# For each top level key in the json we iterate the values (if it is an array). Those are the
# root elements that belong to a particular sheet.
# After treating a root element, the row index for the corresponding sheet needs to be
......@@ -72,7 +187,6 @@ def fill_template(template_path: str, json_path: str, result_path: str) -> None:
# When we finished treating an object that goes into a lower ranked sheet (see below), we
# increase the row index of that sheet.
#
# We can generate a hierarchy of sheets in the beginning (using the paths). The lower sheets
# are for objects referenced by objects in higher ranked sheets.
# We can detect the sheet corresponding to a root element by looking at the first path element:
......@@ -80,10 +194,11 @@ def fill_template(template_path: str, json_path: str, result_path: str) -> None:
# Suggestion:
# row indices: Dict[str, int] string is the sheet name
# sheet_hirarchy: List[Tuple[str]] elements are sheet names
#
template_filler.fill_data(data=data)
# Question:
# We can create an internal representation where we assign as sheet_names the same names that
# are used in table generator. Or should we create another special row that contains this
# somehow?
template.save(result_path)
result_wb.save(result)
......@@ -310,8 +310,8 @@ class XLSXTemplateGenerator(TableTemplateGenerator):
"""Create and return a nice workbook for the given sheets."""
wb = Workbook()
yellowfill = PatternFill(fill_type="solid", fgColor='00FFFFAA')
assert wb.sheetnames == ["Sheet"]
# remove initial sheet
assert wb.sheetnames == ["Sheet"]
del wb['Sheet']
for sheetname, sheetdef in sheets.items():
......
No preview for this file type
......@@ -23,7 +23,7 @@ import os
import tempfile
from caosadvancedtools.table_json_conversion.fill_xlsx import (
_get_path_rows, _get_row_type_column, fill_template)
_get_path_rows, _get_row_type_column_index, fill_template)
from openpyxl import load_workbook
......@@ -37,7 +37,7 @@ def rfp(*pathcomponents):
def test_detect():
example = load_workbook(rfp("example_template.xlsx"))
assert 1 == _get_row_type_column(example['Person'])
assert 1 == _get_row_type_column_index(example['Person'])
assert [2, 3] == _get_path_rows(example['Person'])
......
......@@ -61,7 +61,7 @@ out: tuple
foreign_keys=foreign_keys,
filepath=outpath)
assert os.path.exists(outpath)
generated = load_workbook(outpath) # workbook can be read
generated = load_workbook(outpath)
good = load_workbook(known_good)
assert generated.sheetnames == good.sheetnames
for sheetname in good.sheetnames:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment