diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py index 79c7bfea1925adec47cf74af94d99deaf4fabc06..cca0735f3491277675b06142063b5f8cf712f01f 100644 --- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py +++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py @@ -5,6 +5,7 @@ # # Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> # Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -19,6 +20,10 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. +import json +from types import SimpleNamespace +from typing import List, Union, TextIO + from openpyxl import load_workbook from .table_generator import ColumnType, RowType @@ -34,7 +39,7 @@ def _fill_leaves(json_doc: dict, workbook): workbook.cell(1, 2, el) -def _get_row_type_column(worksheet): +def _get_row_type_column_index(worksheet): for col in worksheet.columns: for cell in col: if cell.value == RowType.COL_TYPE.name: @@ -44,7 +49,7 @@ def _get_row_type_column(worksheet): def _get_path_rows(worksheet): rows = [] - rt_col = _get_row_type_column(worksheet) + rt_col = _get_row_type_column_index(worksheet) for cell in list(worksheet.columns)[rt_col-1]: print(cell.value) if cell.value == RowType.PATH.name: @@ -53,18 +58,128 @@ def _get_path_rows(worksheet): def _generate_path_col_mapping(workbook): - rt_col = _get_row_type_column(workbook) + rt_col = _get_row_type_column_index(workbook) for col in workbook.columns: pass -def fill_template(template_path: str, json_path: str, result_path: str) -> None: - """ - Fill the contents of the JSON document stored at ``json_path`` into the template stored at - ``template_path`` and store the result under ``result_path``. - """ - template = load_workbook(template_path) +class TemplateFiller: + def __init__(self, workbook): + self._workbook = workbook + self._create_index() + + def fill_data(self, data: dict): + """Fill the data into the workbook.""" + self._handle_data(data=data, current_path=[]) + + def _create_index(self, ): + """Create a sheet index for the workbook. + + Index the sheets by their relevant path array. Also create a simple column index by column + type and path. + + """ + self._sheet_index = {} + for sheetname in self._workbook.sheetnames: + sheet = self._workbook[sheetname] + type_column = [x.value for x in list(sheet.columns)[ + _get_row_type_column_index(sheet) - 1]] + # 0-indexed, as everything outside of sheet.cell(...): + coltype_idx = type_column.index(RowType.COL_TYPE.name) + path_indices = [i for i, typ in enumerate(type_column) if typ == RowType.PATH.name] + + # Get the paths, use without the leaf component for sheet indexing, with type prefix and + # leaf for column indexing. + paths = [] + col_index = {} + for col_idx, col in enumerate(sheet.columns): + if col[coltype_idx].value == RowType.COL_TYPE.name: + continue + path = [] + for path_idx in path_indices: + if col[path_idx].value is not None: + path.append(col[path_idx].value) + col_key = ".".join([col[coltype_idx].value] + path) + col_index[col_key] = SimpleNamespace(column=col, col_index=col_idx) + if col[coltype_idx].value not in [ColumnType.SCALAR.name, ColumnType.LIST.name]: + continue + paths.append(path[:-1]) + + # Find common components: + common_path = [] + for idx, component in enumerate(paths[0]): + for path in paths: + if not path[idx] == component: + break + else: + common_path.append(component) + assert len(common_path) >= 1 + + self._sheet_index[".".join(common_path)] = SimpleNamespace( + common_path=common_path, sheetname=sheetname, sheet=sheet) + + def _handle_data(self, data: dict, current_path: List[str] = None): + """Handle the data and write it into ``workbook``. + """ + if current_path is None: + current_path = [] + for name, content in data.items(): + path = current_path + [name] + if isinstance(content, list): + if not content: + continue + assert len(set(type(entry) for entry in content)) == 1 + if isinstance(content[0], dict): + # An array of objects: must go into exploded sheet + for entry in content: + self._handle_data(data=entry, current_path=path) + continue + for entry in content: + pass + else: + self._handle_single_data(data=content, current_path=path) + + def _handle_single_data(self, data, current_path: List[str]): + """Enter this single data item into the workbook. + """ + sheet = self._sheet_index[".".join(current_path)].sheet + for name, content in data.items(): + if isinstance(content, list): + # TODO handle later + continue + if isinstance(content, dict): + pass + # from IPython import embed + # embed() + + +def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None: + """Insert json data into an xlsx file, according to a template. + +This function fills the json data into the template stored at ``template`` and stores the result as +``result``. + +Parameters +---------- +data: Union[dict, str, TextIO] + The data, given as Python dict, path to a file or a file-like object. +template: str + Path to the XLSX template. +result: str + Path for the result XLSX. +""" + if isinstance(data, dict): + pass + elif isinstance(data, str): + with open(data, encoding="utf-8") as infile: + data = json.load(infile) + elif hasattr(data, "read"): + data = json.load(data) + else: + raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}") + result_wb = load_workbook(template) + template_filler = TemplateFiller(result_wb) # For each top level key in the json we iterate the values (if it is an array). Those are the # root elements that belong to a particular sheet. # After treating a root element, the row index for the corresponding sheet needs to be @@ -72,7 +187,6 @@ def fill_template(template_path: str, json_path: str, result_path: str) -> None: # When we finished treating an object that goes into a lower ranked sheet (see below), we # increase the row index of that sheet. # - # We can generate a hierarchy of sheets in the beginning (using the paths). The lower sheets # are for objects referenced by objects in higher ranked sheets. # We can detect the sheet corresponding to a root element by looking at the first path element: @@ -80,10 +194,11 @@ def fill_template(template_path: str, json_path: str, result_path: str) -> None: # Suggestion: # row indices: Dict[str, int] string is the sheet name # sheet_hirarchy: List[Tuple[str]] elements are sheet names - # + template_filler.fill_data(data=data) + # Question: # We can create an internal representation where we assign as sheet_names the same names that # are used in table generator. Or should we create another special row that contains this # somehow? - template.save(result_path) + result_wb.save(result) diff --git a/src/caosadvancedtools/table_json_conversion/table_generator.py b/src/caosadvancedtools/table_json_conversion/table_generator.py index 8794496fa369ba2e6804084ad784073dfc065cca..0074e4aed7162152c50ee0568056610e5cdb7a6c 100644 --- a/src/caosadvancedtools/table_json_conversion/table_generator.py +++ b/src/caosadvancedtools/table_json_conversion/table_generator.py @@ -310,8 +310,8 @@ class XLSXTemplateGenerator(TableTemplateGenerator): """Create and return a nice workbook for the given sheets.""" wb = Workbook() yellowfill = PatternFill(fill_type="solid", fgColor='00FFFFAA') - assert wb.sheetnames == ["Sheet"] # remove initial sheet + assert wb.sheetnames == ["Sheet"] del wb['Sheet'] for sheetname, sheetdef in sheets.items(): diff --git a/unittests/table_json_conversion/multiple_refs.xlsx b/unittests/table_json_conversion/multiple_refs.xlsx index 34bea9b9b9d29c9308ec5cff496c91e7ecaa45c9..cff3dad99a3c296e360d660ed5178a0eee48cd40 100644 Binary files a/unittests/table_json_conversion/multiple_refs.xlsx and b/unittests/table_json_conversion/multiple_refs.xlsx differ diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index 10ddcc32dad9a95d02fe5e6f1f593f9db26e0330..c3f26251011333fa8d5c3071ed481315f096c47f 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -23,7 +23,7 @@ import os import tempfile from caosadvancedtools.table_json_conversion.fill_xlsx import ( - _get_path_rows, _get_row_type_column, fill_template) + _get_path_rows, _get_row_type_column_index, fill_template) from openpyxl import load_workbook @@ -37,7 +37,7 @@ def rfp(*pathcomponents): def test_detect(): example = load_workbook(rfp("example_template.xlsx")) - assert 1 == _get_row_type_column(example['Person']) + assert 1 == _get_row_type_column_index(example['Person']) assert [2, 3] == _get_path_rows(example['Person']) diff --git a/unittests/table_json_conversion/test_table_template_generator.py b/unittests/table_json_conversion/test_table_template_generator.py index 8115c187396baac2bcf7b58d2e891c02c4be28b2..670f7df1dc59c4928ec64d4bf3bdd7b7d5f45cd0 100644 --- a/unittests/table_json_conversion/test_table_template_generator.py +++ b/unittests/table_json_conversion/test_table_template_generator.py @@ -61,7 +61,7 @@ out: tuple foreign_keys=foreign_keys, filepath=outpath) assert os.path.exists(outpath) - generated = load_workbook(outpath) # workbook can be read + generated = load_workbook(outpath) good = load_workbook(known_good) assert generated.sheetnames == good.sheetnames for sheetname in good.sheetnames: