diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py index bc6ad6106ad0c0b4f336b3957027a15fdedec3a2..75a67e020df38112bc80a28b8079d489488add83 100644 --- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py +++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py @@ -26,10 +26,11 @@ import json import pathlib from collections import OrderedDict from types import SimpleNamespace -from typing import Any, Dict, List, Optional, Union, TextIO +from typing import Any, Dict, List, Optional, TextIO, Union from jsonschema import FormatChecker, validate -from openpyxl import load_workbook, Workbook +from jsonschema.exceptions import ValidationError +from openpyxl import Workbook, load_workbook from openpyxl.worksheet.worksheet import Worksheet from .table_generator import ColumnType, RowType @@ -183,6 +184,7 @@ class TemplateFiller: self._props[fullpath] = value def fill_from_data(self, data: Dict[str, Any]): + # TODO recursive for dicts and list? """Fill current level with all scalar elements of ``data``.""" for name, value in data.items(): if not isinstance(value, (dict, list)): @@ -272,6 +274,9 @@ out: union[dict, None] insertables: Dict[str, Any] = {} for name, content in data.items(): + # TODO is this the best way to do it???? + if name == "file": + continue path = current_path + [name] next_context = context.next_level(name) # preprocessing @@ -328,7 +333,11 @@ out: union[dict, None] # Insert foreign keys if insert_row is not None and sheet is not None and _is_exploded_sheet(sheet): - foreigns = _get_foreign_key_columns(sheet) + try: + foreigns = _get_foreign_key_columns(sheet) + except ValueError: + print(f"Sheet: {sheet}") + raise for index, path in ((f.index, f.path) for f in foreigns.values()): value = context[path] sheet.cell(row=insert_row+1, column=index+1, value=value) @@ -361,7 +370,11 @@ validation_schema: dict, optional # Validation if validation_schema is not None: validation_schema = _read_or_dict(validation_schema) - validate(data, validation_schema, format_checker=FormatChecker()) + try: + validate(data, validation_schema, format_checker=FormatChecker()) + except ValidationError as ve: + print(ve.message) + raise RuntimeError("Validation failed") # Filling the data result_wb = load_workbook(template) diff --git a/src/caosadvancedtools/table_json_conversion/table_generator.py b/src/caosadvancedtools/table_json_conversion/table_generator.py index d769c679c89c289635efc29cfede04d988b5d3c9..15e1eac86403359616eb7911fb6e9734c42c4695 100644 --- a/src/caosadvancedtools/table_json_conversion/table_generator.py +++ b/src/caosadvancedtools/table_json_conversion/table_generator.py @@ -196,26 +196,28 @@ class TableTemplateGenerator(ABC): if sheetname in sheets: raise ValueError("The schema would lead to two sheets with the same name, " f"which is forbidden: {sheetname}") - sheets[sheetname] = self._treat_schema_element( + col_def = self._treat_schema_element( schema=schema['items'], sheets=sheets, path=path, foreign_keys=foreign_keys, level_in_sheet_name=len(path), array_paths=array_paths+[path] # since this level is an array extend the list ) - # and add the foreign keys that are necessary up to this point - for array_path in array_paths: - foreigns = self._get_foreign_keys(foreign_keys, array_path) - if isinstance(foreigns, str): - raise ValueError("Foreign keys must be a list of strings, but a single " - "string was given:\n" - f"{array_path} -> {foreigns}") - for foreign in foreigns: - internal_key = p2s(array_path + [foreign]) - if internal_key in sheets[sheetname]: - raise ValueError("The schema would lead to two columns with the same " - f"name, which is forbidden:\n{foreign} -> {internal_key}") - ref_sheet = p2s(array_path) - sheets[sheetname][internal_key] = ( - ColumnType.FOREIGN, f"see sheet '{ref_sheet}'", array_path + [foreign]) + if col_def: + sheets[sheetname] = col_def + # and add the foreign keys that are necessary up to this point + for array_path in array_paths: + foreigns = self._get_foreign_keys(foreign_keys, array_path) + if isinstance(foreigns, str): + raise ValueError("Foreign keys must be a list of strings, but a single " + "string was given:\n" + f"{array_path} -> {foreigns}") + for foreign in foreigns: + internal_key = p2s(array_path + [foreign]) + if internal_key in sheets[sheetname]: + raise ValueError("The schema would lead to two columns with the same " + f"name, which is forbidden:\n{foreign} -> {internal_key}") + ref_sheet = p2s(array_path) + sheets[sheetname][internal_key] = ( + ColumnType.FOREIGN, f"see sheet '{ref_sheet}'", array_path + [foreign]) # Columns are added to the new sheet, thus we do not return any columns for the # current sheet. return {} @@ -326,6 +328,8 @@ class XLSXTemplateGenerator(TableTemplateGenerator): del wb['Sheet'] for sheetname, sheetdef in sheets.items(): + if not sheetdef: + continue ws = wb.create_sheet(re.sub(INVALID_TITLE_REGEX, '_', sheetname)) # First row will by the COL_TYPE row. # First column will be the indicator row with values COL_TYPE, PATH, IGNORE. diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index 9d981360f6535d1bf2716ede8a3f485a175969da..52416343dbe870bbea23719b9866086b5cbd60aa 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -19,7 +19,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. +import json import os +import re import tempfile import jsonschema.exceptions as schema_exc @@ -69,6 +71,56 @@ def test_detect(): assert [1, 2] == _get_path_rows(example['Person']) +def test_temporary(): + # TODO: remove the following after manual testing + di = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/crawler/schemas' + dd = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/' + allreadydone = [ + "Präventionsmaßnahmen", + "Beratungsstellen", + "Schutzeinrichtungen", + "Einzelfallversorgung", + "Strategiedokumente", + "Kooperationsvereinbarungen", + "Gremien", + "Verwaltungsvorschriften", + "Gewaltschutzkonzepte und -maßnahmen", + "Polizeilicher Opferschutz", + "Feedback", + ] + for prefix, _, files in os.walk(dd): + for fi in files: + match = re.match(r"(?P<teilb>.*)_2024-.*\.json", fi) + + if match: + print(match.group('teilb')) + tb = match.group('teilb') + if tb in allreadydone: + continue + # allreadydone.append(tb) + template = os.path.join(di, "template_"+tb+".xlsx") + schema = os.path.join(di, "schema_"+tb+".json") + if not os.path.exists(template): + print(template) + assert False + jfi = os.path.join(prefix, fi) + print(jfi) + if not fi.startswith("Art"): + continue + # if jfi != "/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/data/datenhalterin_gg/he_gg_2/Art__13_Bewusstseinsbildung_2024-01-11T10:22:26.json": + # continue + with open(jfi, encoding="utf-8") as infile: + data = json.load(infile) + data = data["form_data"] + if "__version__" in data: + del data["__version__"] + with tempfile.TemporaryDirectory() as tmpdir: + outfile = os.path.join(tmpdir, 'test.xlsx') + fill_template(data=data, template=template, result=outfile, + validation_schema=schema) + os.system(f'libreoffice {outfile}') + + def test_fill_xlsx(): fill_and_compare(json_file=rfp("data/simple_data.json"), template_file=rfp("data/simple_template.xlsx"), @@ -78,6 +130,7 @@ def test_fill_xlsx(): template_file=rfp("data/multiple_refs_template.xlsx"), known_good=rfp("data/multiple_refs_data.xlsx"), schema=rfp("data/multiple_refs_schema.json")) + fill_and_compare(json_file=rfp("data/indirect_data.json"), template_file=rfp("data/indirect_template.xlsx"), known_good=rfp("data/indirect_data.xlsx"), diff --git a/unittests/table_json_conversion/test_table_template_generator.py b/unittests/table_json_conversion/test_table_template_generator.py index 19cdff2a7fcbb091ca665ee1f27f9d9e27a8a51f..5acf67caf772d811f5fcc9dc0dc4efe78033f328 100644 --- a/unittests/table_json_conversion/test_table_template_generator.py +++ b/unittests/table_json_conversion/test_table_template_generator.py @@ -206,21 +206,44 @@ def test_template_generator(): assert ws.column_dimensions['A'].hidden is True # TODO: remove the following after manual testing - di = '/home/professional/CaosDB/management/external/dimr/eingabemaske/crawler/schemas' + di = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/crawler/schemas' if not os.path.exists(di): return for fi in os.listdir(di): rp = os.path.join(di, fi) + if not fi.startswith("schema_"): + continue with open(rp) as sfi: schema = json.load(sfi) fk_path = os.path.join(di, "foreign_keys"+fi[len('schema'):]) - if not os.path.exists(fk_path): - print(f"No foreign keys file for:\n{rp}") + path =os.path.join(di, "template"+fi[len('schema'):-4]+"xlsx") + allreadydone = [ +"Präventionsmaßnahmen" , +"Beratungsstellen" , +"Schutzeinrichtungen", +"Einzelfallversorgung" , +"Strategiedokumente" , +"Kooperationsvereinbarungen" , +"Gremien" , +"Verwaltungsvorschriften" , +"Gewaltschutzkonzepte und -maßnahmen", +"Polizeilicher Opferschutz", +"Feedback", + ] + if any([path.startswith("template_"+k) for k in allreadydone]): continue + + if not os.path.exists(fk_path): + print(f"No foreign keys file for:\n{fk_path}") + assert False with open(fk_path) as sfi: fk = json.load(sfi) - generator.generate(schema=schema, foreign_keys=fk, filepath=path) - os.system(f'libreoffice {path}') + generator = XLSXTemplateGenerator() + if not os.path.exists(path): + generator.generate(schema=schema, foreign_keys=fk, filepath=path) + os.system(f'libreoffice {path}') + else: + print(f"Not creating template because it exists:\n{path}") # TODO test collisions of sheet or colnames # TODO test escaping of values