diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py index bf6f8ef044c04c12dc1c7a5821c8a26f6d812530..b0f8d2dfe33f420df5e22d45c48e4aa6fea58d21 100644 --- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py +++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py @@ -29,16 +29,7 @@ from openpyxl import load_workbook, Workbook from openpyxl.worksheet.worksheet import Worksheet from .table_generator import ColumnType, RowType - - -def _fill_leaves(json_doc: dict, workbook): - for key, value in json_doc: - if not isinstance(value, list): - value = [value] - for el in value: - if isinstance(el, dict): - _fill_leaves(el, workbook) - workbook.cell(1, 2, el) +from .utils import p2s def _is_exploded_sheet(sheet: Worksheet) -> bool: @@ -48,44 +39,80 @@ def _is_exploded_sheet(sheet: Worksheet) -> bool: sheet. A sheet is detected as exploded iff it has FOREIGN columns. """ column_types = _get_column_types(sheet) - return ColumnType.FOREIGN.value in column_types.values() + return ColumnType.FOREIGN.name in column_types.values() def _get_column_types(sheet: Worksheet) -> OrderedDict: """Return an OrderedDict: column index -> column type for the sheet. """ result = OrderedDict() - type_row_index = _get_row_type_column_index(sheet) - 1 + type_row_index = _get_row_type_column_index(sheet) for idx, col in enumerate(sheet.columns): type_cell = col[type_row_index] result[idx] = type_cell.value - assert hasattr(ColumnType, type_cell.value) or type_cell.value is None, ( + assert (hasattr(ColumnType, type_cell.value) + or type_cell.value == RowType.COL_TYPE.name + or type_cell.value is None), ( f"Unexpected column type value: {type_cell.value}") return result -def _get_row_type_column_index(worksheet): - """Return the column index (1-indexed) of the column which defines the row types. +def _get_foreign_key_columns(sheet: Worksheet) -> Dict[str, SimpleNamespace]: + """Return the foreign keys of the worksheet. + +Returns +------- +out: dict[str, SimpleNamespace] + The keys are the stringified paths. The values are SimpleNamespace objects with ``index``, + ``path`` and ``column`` attributes. + """ + column_types = _get_column_types(sheet) + path_rows = _get_path_rows(sheet) + result = OrderedDict() + for for_idx, name in column_types.items(): + if name != ColumnType.FOREIGN.name: + continue + path = [] + for row in path_rows: + component = sheet.cell(row=row+1, column=for_idx+1).value + if component is None: + break + assert isinstance(component, str), f"Expected string: {component}" + path.append(component) + result[p2s(path)] = SimpleNamespace(index=for_idx, path=path, + column=list(sheet.columns)[for_idx]) + return result + + +def _get_deep_value(data: Dict[str, Any], path: List[str]): + """Return the value at ``path`` inside the dict ``data``. + """ + if len(path) > 1: + return _get_deep_value(data[path.pop(0)], path) + return data[path[0]] + + +def _get_row_type_column_index(sheet: Worksheet): + """Return the column index (0-indexed) of the column which defines the row types. """ - for col in worksheet.columns: + for col in sheet.columns: for cell in col: if cell.value == RowType.COL_TYPE.name: - return cell.column + return cell.column - 1 raise ValueError("The column which defines row types (COL_TYPE, PATH, ...) is missing") -def _get_path_rows(worksheet): - """Return the 1-based indices of the rows which represent paths.""" +def _get_path_rows(sheet: Worksheet): + """Return the 0-based indices of the rows which represent paths.""" rows = [] - rt_col = _get_row_type_column_index(worksheet) - for cell in list(worksheet.columns)[rt_col-1]: - print(cell.value) + rt_col = _get_row_type_column_index(sheet) + for cell in list(sheet.columns)[rt_col]: if cell.value == RowType.PATH.name: - rows.append(cell.row) + rows.append(cell.row-1) return rows -def _next_row_index(sheet) -> int: +def _next_row_index(sheet: Worksheet) -> int: """Return the index for the next data row. This is defined as the first row without any content. @@ -94,13 +121,16 @@ def _next_row_index(sheet) -> int: class TemplateFiller: - def __init__(self, workbook): + def __init__(self, workbook: Workbook): self._workbook = workbook self._create_index() + self._context: Optional[dict] = None def fill_data(self, data: dict): """Fill the data into the workbook.""" + self._context = data self._handle_data(data=data, current_path=[]) + self._context = None def _create_index(self): """Create a sheet index for the workbook. @@ -113,7 +143,7 @@ class TemplateFiller: for sheetname in self._workbook.sheetnames: sheet = self._workbook[sheetname] type_column = [x.value for x in list(sheet.columns)[ - _get_row_type_column_index(sheet) - 1]] + _get_row_type_column_index(sheet)]] # 0-indexed, as everything outside of sheet.cell(...): coltype_idx = type_column.index(RowType.COL_TYPE.name) path_indices = [i for i, typ in enumerate(type_column) if typ == RowType.PATH.name] @@ -127,12 +157,12 @@ class TemplateFiller: for path_idx in path_indices: if col[path_idx].value is not None: path.append(col[path_idx].value) - # col_key = ".".join([col[coltype_idx].value] + path) + # col_key = p2s([col[coltype_idx].value] + path) # col_index[col_key] = SimpleNamespace(column=col, col_index=col_idx) if col[coltype_idx].value not in [ColumnType.SCALAR.name, ColumnType.LIST.name]: continue - path_str = ".".join(path) + path_str = p2s(path) assert path_str not in self._sheet_index self._sheet_index[path_str] = SimpleNamespace( sheetname=sheetname, sheet=sheet, col_index=col_idx, @@ -197,13 +227,13 @@ out: union[dict, None] value = content[0] else: value = ";".join(content) - path_str = ".".join(path) + path_str = p2s(path) assert path_str not in insertables insertables[path_str] = value if only_collect_insertables: return insertables if not current_path: - return + return None # actual data insertion insert_row = None @@ -219,6 +249,14 @@ out: union[dict, None] sheet.cell(row=insert_row+1, column=col_index+1, value=value) # self._handle_simple_data(data=content, current_path=path) + + # Insert foreign keys + if insert_row is not None and sheet is not None and _is_exploded_sheet(sheet): + foreigns = _get_foreign_key_columns(sheet) + for index, path in ((f.index, f.path) for f in foreigns.values()): + value = _get_deep_value(self._context, path) + sheet.cell(row=insert_row+1, column=index+1, value=value) + return None diff --git a/src/caosadvancedtools/table_json_conversion/table_generator.py b/src/caosadvancedtools/table_json_conversion/table_generator.py index 0074e4aed7162152c50ee0568056610e5cdb7a6c..905c225f8548b6aad10f43eec18f48e57258bcab 100644 --- a/src/caosadvancedtools/table_json_conversion/table_generator.py +++ b/src/caosadvancedtools/table_json_conversion/table_generator.py @@ -5,6 +5,7 @@ # # Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> # Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -31,6 +32,8 @@ from openpyxl import Workbook from openpyxl.styles import PatternFill from openpyxl.workbook.child import INVALID_TITLE_REGEX +from .utils import p2s + class ColumnType(Enum): """ column types enum """ @@ -187,7 +190,7 @@ class TableTemplateGenerator(ABC): if (schema['items'].get('type') == 'object' and len(path) > 1): # list of references; special treatment # we add a new sheet with columns generated from the subtree of the schema - sheetname = ".".join(path) + sheetname = p2s(path) if sheetname in sheets: raise ValueError("The schema would lead to two sheets with the same name, " f"which is forbidden: {sheetname}") @@ -200,11 +203,11 @@ class TableTemplateGenerator(ABC): for array_path in array_paths: foreigns = self._get_foreign_keys(foreign_keys, array_path) for foreign in foreigns: - internal_key = ".".join(array_path + [foreign]) + internal_key = p2s(array_path + [foreign]) if internal_key in sheets[sheetname]: raise ValueError("The schema would lead to two columns with the same " f"name, which is forbidden: {internal_key}") - ref_sheet = ".".join(array_path) + ref_sheet = p2s(array_path) sheets[sheetname][internal_key] = ( ColumnType.FOREIGN, f"see sheet '{ref_sheet}'", array_path + [foreign]) # Columns are added to the new sheet, thus we do not return any columns for the @@ -237,7 +240,7 @@ class TableTemplateGenerator(ABC): # The schema is a leaf. description = schema['description'] if 'description' in schema else None # definition of a single column - default_return = {".".join(path[level_in_sheet_name:]): (ctype, description, path)} + default_return = {p2s(path[level_in_sheet_name:]): (ctype, description, path)} if 'type' not in schema and 'enum' in schema: return default_return if 'type' not in schema and 'anyOf' in schema: diff --git a/src/caosadvancedtools/table_json_conversion/utils.py b/src/caosadvancedtools/table_json_conversion/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..15ae488d7cb8e142afba58424b49e8fc3a15e0d6 --- /dev/null +++ b/src/caosadvancedtools/table_json_conversion/utils.py @@ -0,0 +1,25 @@ +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +from typing import List + + +def p2s(path: List[str]): + """Path to string: dot-separated. + """ + return ".".join(path) diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index c3f26251011333fa8d5c3071ed481315f096c47f..3fa03aa6d67828771369e3d20342541164055733 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -37,8 +37,8 @@ def rfp(*pathcomponents): def test_detect(): example = load_workbook(rfp("example_template.xlsx")) - assert 1 == _get_row_type_column_index(example['Person']) - assert [2, 3] == _get_path_rows(example['Person']) + assert 0 == _get_row_type_column_index(example['Person']) + assert [1, 2] == _get_path_rows(example['Person']) def test_fill_xlsx():