WIP: Filling XLSX: Schema validation and tests.

734b70b4 · Daniel Hornung · 0d6b9970 · 734b70b4 · 734b70b4 · 734b70b4
Verified Commit 734b70b4 authored 1 year ago by Daniel Hornung
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -27,6 +27,7 @@ from collections import OrderedDict
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Union, TextIO
+from jsonschema import FormatChecker, validate
 from openpyxl import load_workbook, Workbook
 from openpyxl.worksheet.worksheet import Worksheet
@@ -122,6 +123,21 @@ def _next_row_index(sheet: Worksheet) -> int:
    return sheet.max_row
+def _read_or_dict(data: Union[dict, str, TextIO]) -> dict:
+    """If data is a json file name or input stream, read data from there."""
+    if isinstance(data, dict):
+        pass
+    elif isinstance(data, str):
+        with open(data, encoding="utf-8") as infile:
+            data = json.load(infile)
+    elif hasattr(data, "read"):
+        data = json.load(data)
+    else:
+        raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
+    assert isinstance(data, dict)
+    return data
 class TemplateFiller:
    """Class to fill XLSX templates.  Has an index for all relevant columns."""
@@ -319,7 +335,8 @@ out: union[dict, None]
        return None
-def fill_template(data: Union[dict, str, TextIO], template: str, result: str) -> None:
+def fill_template(data: Union[dict, str, TextIO], template: str, result: str,
+                  validation_schema: Union[dict, str, TextIO] = None) -> None:
    """Insert json data into an xlsx file, according to a template.
 This function fills the json data into the template stored at ``template`` and stores the result as
@@ -333,18 +350,19 @@ template: str
  Path to the XLSX template.
 result: str
  Path for the result XLSX.
+validation_schema: dict, optional
+  If given, validate the date against this schema first.  This raises an exception if the validation
+  fails.
 """
-    if isinstance(data, dict):
+    data = _read_or_dict(data)
-        pass
-    elif isinstance(data, str):
-        with open(data, encoding="utf-8") as infile:
-            data = json.load(infile)
-    elif hasattr(data, "read"):
-        data = json.load(data)
-    else:
-        raise ValueError(f"I don't know how to handle the datatype of `data`: {type(data)}")
    assert isinstance(data, dict)
+    # Validation
+    if validation_schema is not None:
+        validation_schema = _read_or_dict(validation_schema)
+        validate(data, validation_schema, format_checker=FormatChecker())
+    # Filling the data
    result_wb = load_workbook(template)
    template_filler = TemplateFiller(result_wb)
    template_filler.fill_data(data=data)

--- a/unittests/table_json_conversion/data/multiple_refs_schema.json
+++ b/unittests/table_json_conversion/data/multiple_refs_schema.json
@@ -204,10 +204,7 @@
      "$schema": "https://json-schema.org/draft/2020-12/schema"
    }
  },
-  "required": [
+  "required": [],
-    "Training",
-    "Person"
-  ],
  "additionalProperties": false,
  "$schema": "https://json-schema.org/draft/2020-12/schema"
 }
--- a/unittests/table_json_conversion/data/simple_schema.json
+++ b/unittests/table_json_conversion/data/simple_schema.json
@@ -54,7 +54,9 @@
              },
              "Organisation": {
                "enum": [
-                  "Federal Reserve"
+                  "Federal Reserve",
+                  "IMF",
+                  "ECB"
                ]
              }
            }
@@ -78,7 +80,9 @@
            },
            "Organisation": {
              "enum": [
-                "Federal Reserve"
+                "Federal Reserve",
+                "IMF",
+                "ECB"
              ]
            }
          }
@@ -117,7 +121,9 @@
        },
        "Organisation": {
          "enum": [
-            "Federal Reserve"
+            "Federal Reserve",
+            "IMF",
+            "ECB"
          ]
        }
      },

--- a/unittests/table_json_conversion/test_fill_xlsx.py
+++ b/unittests/table_json_conversion/test_fill_xlsx.py
@@ -38,12 +38,13 @@ def rfp(*pathcomponents):
 def fill_and_compare(json_file: str, template_file: str, known_good: str,
-                     custom_output: str = None):
+                     schema: str = None, custom_output: str = None):
    """Fill the data into a template and compare to a known good.
 Parameters:
 -----------
+schema: str, optional,
+  Json schema to validate against.
 custom_output: str, optional
  If given, write to this file and drop into an IPython shell.  For development only.
    """
@@ -52,7 +53,8 @@ custom_output: str, optional
        assert not os.path.exists(outfile)
        if custom_output is not None:
            outfile = custom_output
-        fill_template(data=json_file, template=template_file, result=outfile)
+        fill_template(data=json_file, template=template_file, result=outfile,
+                      validation_schema=schema)
        assert os.path.exists(outfile)
        generated = load_workbook(outfile)  # workbook can be read
    known_good_wb = load_workbook(known_good)
@@ -68,7 +70,9 @@ def test_detect():
 def test_fill_xlsx():
    fill_and_compare(json_file=rfp("data/simple_data.json"),
                     template_file=rfp("data/simple_template.xlsx"),
-                     known_good=rfp("data/simple_data.xlsx"))
+                     known_good=rfp("data/simple_data.xlsx"),
+                     schema=rfp("data/simple_schema.json"))
    fill_and_compare(json_file=rfp("data/multiple_refs_data.json"),
                     template_file=rfp("data/multiple_refs_template.xlsx"),
-                     known_good=rfp("data/multiple_refs_data.xlsx"))
+                     known_good=rfp("data/multiple_refs_data.xlsx"),
+                     schema=rfp("data/multiple_refs_schema.json"))