Skip to content
Snippets Groups Projects
Verified Commit 734b70b4 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

WIP: Filling XLSX: Schema validation and tests.

parent 0d6b9970
No related branches found
No related tags found
2 merge requests!100WIP: Filling XLSX: Seems to be working.,!93Filling XLSX: Everything except multiple choice.
Pipeline #48264 failed
...@@ -27,6 +27,7 @@ from collections import OrderedDict ...@@ -27,6 +27,7 @@ from collections import OrderedDict
from types import SimpleNamespace from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Union, TextIO from typing import Any, Dict, List, Optional, Union, TextIO
from jsonschema import FormatChecker, validate
from openpyxl import load_workbook, Workbook from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet from openpyxl.worksheet.worksheet import Worksheet
...@@ -122,6 +123,21 @@ def _next_row_index(sheet: Worksheet) -> int: ...@@ -122,6 +123,21 @@ def _next_row_index(sheet: Worksheet) -> int:
return sheet.max_row return sheet.max_row
def _read_or_dict(data: Union[dict, str, TextIO]) -> dict:
"""If data is a json file name or input stream, read data from there."""
if isinstance(data, dict):
pass
elif isinstance(data, str):
with open(data, encoding="utf-8") as infile:
data = json.load(infile)
elif hasattr(data, "read"):
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
assert isinstance(data, dict)
return data
class TemplateFiller: class TemplateFiller:
"""Class to fill XLSX templates. Has an index for all relevant columns.""" """Class to fill XLSX templates. Has an index for all relevant columns."""
...@@ -319,7 +335,8 @@ out: union[dict, None] ...@@ -319,7 +335,8 @@ out: union[dict, None]
return None return None
def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None: def fill_template(data: Union[dict, str, TextIO], template: str, result: str,
validation_schema: Union[dict, str, TextIO] = None) -> None:
"""Insert json data into an xlsx file, according to a template. """Insert json data into an xlsx file, according to a template.
This function fills the json data into the template stored at ``template`` and stores the result as This function fills the json data into the template stored at ``template`` and stores the result as
...@@ -333,18 +350,19 @@ template: str ...@@ -333,18 +350,19 @@ template: str
Path to the XLSX template. Path to the XLSX template.
result: str result: str
Path for the result XLSX. Path for the result XLSX.
validation_schema: dict, optional
If given, validate the date against this schema first. This raises an exception if the validation
fails.
""" """
if isinstance(data, dict): data = _read_or_dict(data)
pass
elif isinstance(data, str):
with open(data, encoding="utf-8") as infile:
data = json.load(infile)
elif hasattr(data, "read"):
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
assert isinstance(data, dict) assert isinstance(data, dict)
# Validation
if validation_schema is not None:
validation_schema = _read_or_dict(validation_schema)
validate(data, validation_schema, format_checker=FormatChecker())
# Filling the data
result_wb = load_workbook(template) result_wb = load_workbook(template)
template_filler = TemplateFiller(result_wb) template_filler = TemplateFiller(result_wb)
template_filler.fill_data(data=data) template_filler.fill_data(data=data)
......
...@@ -204,10 +204,7 @@ ...@@ -204,10 +204,7 @@
"$schema": "https://json-schema.org/draft/2020-12/schema" "$schema": "https://json-schema.org/draft/2020-12/schema"
} }
}, },
"required": [ "required": [],
"Training",
"Person"
],
"additionalProperties": false, "additionalProperties": false,
"$schema": "https://json-schema.org/draft/2020-12/schema" "$schema": "https://json-schema.org/draft/2020-12/schema"
} }
...@@ -54,7 +54,9 @@ ...@@ -54,7 +54,9 @@
}, },
"Organisation": { "Organisation": {
"enum": [ "enum": [
"Federal Reserve" "Federal Reserve",
"IMF",
"ECB"
] ]
} }
} }
...@@ -78,7 +80,9 @@ ...@@ -78,7 +80,9 @@
}, },
"Organisation": { "Organisation": {
"enum": [ "enum": [
"Federal Reserve" "Federal Reserve",
"IMF",
"ECB"
] ]
} }
} }
...@@ -117,7 +121,9 @@ ...@@ -117,7 +121,9 @@
}, },
"Organisation": { "Organisation": {
"enum": [ "enum": [
"Federal Reserve" "Federal Reserve",
"IMF",
"ECB"
] ]
} }
}, },
......
...@@ -38,12 +38,13 @@ def rfp(*pathcomponents): ...@@ -38,12 +38,13 @@ def rfp(*pathcomponents):
def fill_and_compare(json_file: str, template_file: str, known_good: str, def fill_and_compare(json_file: str, template_file: str, known_good: str,
custom_output: str = None): schema: str = None, custom_output: str = None):
"""Fill the data into a template and compare to a known good. """Fill the data into a template and compare to a known good.
Parameters: Parameters:
----------- -----------
schema: str, optional,
Json schema to validate against.
custom_output: str, optional custom_output: str, optional
If given, write to this file and drop into an IPython shell. For development only. If given, write to this file and drop into an IPython shell. For development only.
""" """
...@@ -52,7 +53,8 @@ custom_output: str, optional ...@@ -52,7 +53,8 @@ custom_output: str, optional
assert not os.path.exists(outfile) assert not os.path.exists(outfile)
if custom_output is not None: if custom_output is not None:
outfile = custom_output outfile = custom_output
fill_template(data=json_file, template=template_file, result=outfile) fill_template(data=json_file, template=template_file, result=outfile,
validation_schema=schema)
assert os.path.exists(outfile) assert os.path.exists(outfile)
generated = load_workbook(outfile) # workbook can be read generated = load_workbook(outfile) # workbook can be read
known_good_wb = load_workbook(known_good) known_good_wb = load_workbook(known_good)
...@@ -68,7 +70,9 @@ def test_detect(): ...@@ -68,7 +70,9 @@ def test_detect():
def test_fill_xlsx(): def test_fill_xlsx():
fill_and_compare(json_file=rfp("data/simple_data.json"), fill_and_compare(json_file=rfp("data/simple_data.json"),
template_file=rfp("data/simple_template.xlsx"), template_file=rfp("data/simple_template.xlsx"),
known_good=rfp("data/simple_data.xlsx")) known_good=rfp("data/simple_data.xlsx"),
schema=rfp("data/simple_schema.json"))
fill_and_compare(json_file=rfp("data/multiple_refs_data.json"), fill_and_compare(json_file=rfp("data/multiple_refs_data.json"),
template_file=rfp("data/multiple_refs_template.xlsx"), template_file=rfp("data/multiple_refs_template.xlsx"),
known_good=rfp("data/multiple_refs_data.xlsx")) known_good=rfp("data/multiple_refs_data.xlsx"),
schema=rfp("data/multiple_refs_schema.json"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment