From 734b70b4738e41775fca9f601b7dcf4b9a7f69e8 Mon Sep 17 00:00:00 2001 From: Daniel <d.hornung@indiscale.com> Date: Fri, 8 Mar 2024 11:05:19 +0100 Subject: [PATCH] WIP: Filling XLSX: Schema validation and tests. --- .../table_json_conversion/fill_xlsx.py | 38 ++++++++++++++----- .../data/multiple_refs_schema.json | 7 +--- .../data/simple_schema.json | 12 ++++-- .../table_json_conversion/test_fill_xlsx.py | 14 ++++--- 4 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py index 5a620d67..977b4e90 100644 --- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py +++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py @@ -27,6 +27,7 @@ from collections import OrderedDict from types import SimpleNamespace from typing import Any, Dict, List, Optional, Union, TextIO +from jsonschema import FormatChecker, validate from openpyxl import load_workbook, Workbook from openpyxl.worksheet.worksheet import Worksheet @@ -122,6 +123,21 @@ def _next_row_index(sheet: Worksheet) -> int: return sheet.max_row +def _read_or_dict(data: Union[dict, str, TextIO]) -> dict: + """If data is a json file name or input stream, read data from there.""" + if isinstance(data, dict): + pass + elif isinstance(data, str): + with open(data, encoding="utf-8") as infile: + data = json.load(infile) + elif hasattr(data, "read"): + data = json.load(data) + else: + raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}") + assert isinstance(data, dict) + return data + + class TemplateFiller: """Class to fill XLSX templates. Has an index for all relevant columns.""" @@ -319,7 +335,8 @@ out: union[dict, None] return None -def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None: +def fill_template(data: Union[dict, str, TextIO], template: str, result: str, + validation_schema: Union[dict, str, TextIO] = None) -> None: """Insert json data into an xlsx file, according to a template. This function fills the json data into the template stored at ``template`` and stores the result as @@ -333,18 +350,19 @@ template: str Path to the XLSX template. result: str Path for the result XLSX. +validation_schema: dict, optional + If given, validate the date against this schema first. This raises an exception if the validation + fails. """ - if isinstance(data, dict): - pass - elif isinstance(data, str): - with open(data, encoding="utf-8") as infile: - data = json.load(infile) - elif hasattr(data, "read"): - data = json.load(data) - else: - raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}") + data = _read_or_dict(data) assert isinstance(data, dict) + # Validation + if validation_schema is not None: + validation_schema = _read_or_dict(validation_schema) + validate(data, validation_schema, format_checker=FormatChecker()) + + # Filling the data result_wb = load_workbook(template) template_filler = TemplateFiller(result_wb) template_filler.fill_data(data=data) diff --git a/unittests/table_json_conversion/data/multiple_refs_schema.json b/unittests/table_json_conversion/data/multiple_refs_schema.json index 7acf5e0d..2adec8de 100644 --- a/unittests/table_json_conversion/data/multiple_refs_schema.json +++ b/unittests/table_json_conversion/data/multiple_refs_schema.json @@ -204,10 +204,7 @@ "$schema": "https://json-schema.org/draft/2020-12/schema" } }, - "required": [ - "Training", - "Person" - ], + "required": [], "additionalProperties": false, "$schema": "https://json-schema.org/draft/2020-12/schema" -} \ No newline at end of file +} diff --git a/unittests/table_json_conversion/data/simple_schema.json b/unittests/table_json_conversion/data/simple_schema.json index f18fd6af..01a732d6 100644 --- a/unittests/table_json_conversion/data/simple_schema.json +++ b/unittests/table_json_conversion/data/simple_schema.json @@ -54,7 +54,9 @@ }, "Organisation": { "enum": [ - "Federal Reserve" + "Federal Reserve", + "IMF", + "ECB" ] } } @@ -78,7 +80,9 @@ }, "Organisation": { "enum": [ - "Federal Reserve" + "Federal Reserve", + "IMF", + "ECB" ] } } @@ -117,7 +121,9 @@ }, "Organisation": { "enum": [ - "Federal Reserve" + "Federal Reserve", + "IMF", + "ECB" ] } }, diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index fcc9ad23..0353eaa3 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -38,12 +38,13 @@ def rfp(*pathcomponents): def fill_and_compare(json_file: str, template_file: str, known_good: str, - custom_output: str = None): + schema: str = None, custom_output: str = None): """Fill the data into a template and compare to a known good. Parameters: ----------- - +schema: str, optional, + Json schema to validate against. custom_output: str, optional If given, write to this file and drop into an IPython shell. For development only. """ @@ -52,7 +53,8 @@ custom_output: str, optional assert not os.path.exists(outfile) if custom_output is not None: outfile = custom_output - fill_template(data=json_file, template=template_file, result=outfile) + fill_template(data=json_file, template=template_file, result=outfile, + validation_schema=schema) assert os.path.exists(outfile) generated = load_workbook(outfile) # workbook can be read known_good_wb = load_workbook(known_good) @@ -68,7 +70,9 @@ def test_detect(): def test_fill_xlsx(): fill_and_compare(json_file=rfp("data/simple_data.json"), template_file=rfp("data/simple_template.xlsx"), - known_good=rfp("data/simple_data.xlsx")) + known_good=rfp("data/simple_data.xlsx"), + schema=rfp("data/simple_schema.json")) fill_and_compare(json_file=rfp("data/multiple_refs_data.json"), template_file=rfp("data/multiple_refs_template.xlsx"), - known_good=rfp("data/multiple_refs_data.xlsx")) + known_good=rfp("data/multiple_refs_data.xlsx"), + schema=rfp("data/multiple_refs_schema.json")) -- GitLab