Skip to content
Snippets Groups Projects
Verified Commit 734b70b4 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

WIP: Filling XLSX: Schema validation and tests.

parent 0d6b9970
No related branches found
No related tags found
2 merge requests!100WIP: Filling XLSX: Seems to be working.,!93Filling XLSX: Everything except multiple choice.
Pipeline #48264 failed
......@@ -27,6 +27,7 @@ from collections import OrderedDict
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Union, TextIO
from jsonschema import FormatChecker, validate
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
......@@ -122,6 +123,21 @@ def _next_row_index(sheet: Worksheet) -> int:
return sheet.max_row
def _read_or_dict(data: Union[dict, str, TextIO]) -> dict:
"""If data is a json file name or input stream, read data from there."""
if isinstance(data, dict):
pass
elif isinstance(data, str):
with open(data, encoding="utf-8") as infile:
data = json.load(infile)
elif hasattr(data, "read"):
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
assert isinstance(data, dict)
return data
class TemplateFiller:
"""Class to fill XLSX templates. Has an index for all relevant columns."""
......@@ -319,7 +335,8 @@ out: union[dict, None]
return None
def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None:
def fill_template(data: Union[dict, str, TextIO], template: str, result: str,
validation_schema: Union[dict, str, TextIO] = None) -> None:
"""Insert json data into an xlsx file, according to a template.
This function fills the json data into the template stored at ``template`` and stores the result as
......@@ -333,18 +350,19 @@ template: str
Path to the XLSX template.
result: str
Path for the result XLSX.
validation_schema: dict, optional
If given, validate the date against this schema first. This raises an exception if the validation
fails.
"""
if isinstance(data, dict):
pass
elif isinstance(data, str):
with open(data, encoding="utf-8") as infile:
data = json.load(infile)
elif hasattr(data, "read"):
data = json.load(data)
else:
raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
data = _read_or_dict(data)
assert isinstance(data, dict)
# Validation
if validation_schema is not None:
validation_schema = _read_or_dict(validation_schema)
validate(data, validation_schema, format_checker=FormatChecker())
# Filling the data
result_wb = load_workbook(template)
template_filler = TemplateFiller(result_wb)
template_filler.fill_data(data=data)
......
......@@ -204,10 +204,7 @@
"$schema": "https://json-schema.org/draft/2020-12/schema"
}
},
"required": [
"Training",
"Person"
],
"required": [],
"additionalProperties": false,
"$schema": "https://json-schema.org/draft/2020-12/schema"
}
\ No newline at end of file
}
......@@ -54,7 +54,9 @@
},
"Organisation": {
"enum": [
"Federal Reserve"
"Federal Reserve",
"IMF",
"ECB"
]
}
}
......@@ -78,7 +80,9 @@
},
"Organisation": {
"enum": [
"Federal Reserve"
"Federal Reserve",
"IMF",
"ECB"
]
}
}
......@@ -117,7 +121,9 @@
},
"Organisation": {
"enum": [
"Federal Reserve"
"Federal Reserve",
"IMF",
"ECB"
]
}
},
......
......@@ -38,12 +38,13 @@ def rfp(*pathcomponents):
def fill_and_compare(json_file: str, template_file: str, known_good: str,
custom_output: str = None):
schema: str = None, custom_output: str = None):
"""Fill the data into a template and compare to a known good.
Parameters:
-----------
schema: str, optional,
Json schema to validate against.
custom_output: str, optional
If given, write to this file and drop into an IPython shell. For development only.
"""
......@@ -52,7 +53,8 @@ custom_output: str, optional
assert not os.path.exists(outfile)
if custom_output is not None:
outfile = custom_output
fill_template(data=json_file, template=template_file, result=outfile)
fill_template(data=json_file, template=template_file, result=outfile,
validation_schema=schema)
assert os.path.exists(outfile)
generated = load_workbook(outfile) # workbook can be read
known_good_wb = load_workbook(known_good)
......@@ -68,7 +70,9 @@ def test_detect():
def test_fill_xlsx():
fill_and_compare(json_file=rfp("data/simple_data.json"),
template_file=rfp("data/simple_template.xlsx"),
known_good=rfp("data/simple_data.xlsx"))
known_good=rfp("data/simple_data.xlsx"),
schema=rfp("data/simple_schema.json"))
fill_and_compare(json_file=rfp("data/multiple_refs_data.json"),
template_file=rfp("data/multiple_refs_template.xlsx"),
known_good=rfp("data/multiple_refs_data.xlsx"))
known_good=rfp("data/multiple_refs_data.xlsx"),
schema=rfp("data/multiple_refs_schema.json"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment