Skip to content
Snippets Groups Projects

ENH: xlsx template generator

Merged Henrik tom Wörden requested to merge f-xlsx-json into f-more-jsonschema-export
2 unresolved threads
@@ -28,7 +28,7 @@ import sys
from abc import ABC, abstractmethod
from argparse import RawTextHelpFormatter
from enum import Enum
from typing import Union
from typing import Dict, List, Tuple, Union
from openpyxl import Workbook
from openpyxl.workbook.child import INVALID_TITLE_REGEX
@@ -74,7 +74,7 @@ class TableTemplateGenerator(ABC):
pass
def _generate_sheets_from_schema(self, schema: dict, foreign_keys: dict = None
) -> dict[str, dict[str, list]]:
) -> Dict[str, Dict[str, list]]:
""" generates a sheet definition from a given JSON schema
Parameters
@@ -104,7 +104,7 @@ class TableTemplateGenerator(ABC):
foreign_keys = {}
# here, we treat the top level
# sheets[sheetname][colname]= (COL_TYPE, description, [path])
sheets: dict[str, dict[str, tuple[str, list]]] = {}
sheets: Dict[str, dict[str, Tuple[str, list]]] = {}
if "properties" not in schema:
raise ValueError("Inappropriate JSON schema: The following part should contain "
f"the 'properties' key:\n{schema}\n")
@@ -113,35 +113,25 @@ class TableTemplateGenerator(ABC):
sheets, [RTname], foreign_keys)
return sheets
def _get_foreign_keys(self, foreign_keys: dict, path: list) -> list:
def _get_foreign_keys(self, keys: dict, path: list) -> list:
""" returns the foreign keys that are needed at the location to which path points """
cpath = list(path)
keys = foreign_keys
selected_keys = None
while cpath:
if keys is None:
raise ValueError(f"A foreign key definition is missing for path:"
f"\n{path}\n{foreign_keys}")
if cpath[0] not in keys:
raise ValueError(f"A foreign key definition is missing for path: \n{path}\n{keys}")
keys = keys[cpath[0]]
if isinstance(keys, list):
selected_keys, keys = keys, None
else:
selected_keys, keys = None, keys
cpath = cpath[1:]
msg = (f"A foreign key definition is missing for path:\n{path}\n{keys}")
while path:
if keys is None or path[0] not in keys:
raise ValueError(msg)
keys = keys[path[0]]
path = path[1:]
if isinstance(keys, dict) and "__this__" in keys:
selected_keys = keys["__this__"]
if selected_keys is None:
raise ValueError(f"A foreign key definition is missing for path:"
f"\n{path}\n{foreign_keys}")
return selected_keys
return keys["__this__"]
elif isinstance(keys, list):
return keys
else:
raise ValueError(msg)
def _treat_schema_element(self, schema: dict, sheets: dict = None, path: list = None,
foreign_keys: dict = None, level_in_sheet_name: int = 1,
array_paths: list = None
) -> dict[str, tuple[str, str, list]]:
) -> Dict[str, Tuple[str, str, list]]:
""" recursively transforms elements from the schema into column definitions
sheets is modified in place.
@@ -149,6 +139,9 @@ class TableTemplateGenerator(ABC):
----------
array_paths: list
a list of path along the way to the current object, where the json contains arrays
schema: dict
part of the json schema; it must be the level that contains the type definition
(e.g. 'type' or 'oneOf' key)
Returns
-------
@@ -171,20 +164,26 @@ class TableTemplateGenerator(ABC):
if 'type' in schema and schema['type'] == 'array':
if ('type' in schema['items'] and schema['items']['type'] == 'object'
and len(path) > 1): # list of references; special treatment
# we add a new sheet
# we add a new sheet with columns generated from the subtree of the schema
sheetname = ".".join(path)
if sheetname in sheets:
raise ValueError(f"The shema would lead to two sheets with the same name which"
f" is forbidden:{sheetname}")
sheets[sheetname] = self._treat_schema_element(
schema['items'], sheets, path, foreign_keys, len(path),
array_paths=array_paths+[path] # since this level is an array, we extend the list
)
# and add the foreign keys that are necessary up to this point
for p in array_paths:
keys = self._get_foreign_keys(foreign_keys, p)
for k in keys:
sheets[sheetname].update({k: (ColumnType.FOREIGN, f"see sheet '{path[0]}'", p+[k])})
if k in sheets[sheetname]:
raise ValueError(f"The shema would lead to two columns with the same "
f"name which is forbidden:{k}")
sheets[sheetname][k] = (ColumnType.FOREIGN, f"see sheet '{path[0]}'", p+[k])
# columns are added to the new sheet, thus we do not return columns
return {}
else:
# it is a list of primitive types -> semi colon separated list
else: # it is a list of primitive types -> semi colon separated list
schema = schema['items']
ctype = ColumnType.LIST
@@ -193,31 +192,34 @@ class TableTemplateGenerator(ABC):
if 'type' in el:
schema = el
if "properties" in schema:
# recurse for each property
if "properties" in schema: # recurse for each property
cols = {}
for pname in schema["properties"].keys():
cols.update(self._treat_schema_element(
col_defs = self._treat_schema_element(
schema["properties"][pname], sheets, path+[pname], foreign_keys,
level_in_sheet_name, array_paths=array_paths))
level_in_sheet_name, array_paths=array_paths)
for k in col_defs.keys():
if k in cols:
raise ValueError(f"The shema would lead to two columns with the same "
f"name which is forbidden:{k}")
cols.update(col_defs)
return cols
else:
else: # those are the leaves
description = schema['description'] if 'description' in schema else None
# those are the leaves
if 'type' not in schema:
if 'enum' in schema:
return {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
if 'anyOf' in schema:
for d in schema['anyOf']:
# currently the only case where this occurs is date formats
assert d['type'] == 'string'
assert d['format'] == 'date' or d['format'] == 'date-time'
return {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
# definition of a single column
default_return = {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
if 'type' not in schema and 'enum' in schema:
return default_return
elif 'type' not in schema and 'anyOf' in schema:
for d in schema['anyOf']:
# currently the only case where this occurs is date formats
assert d['type'] == 'string'
assert d['format'] == 'date' or d['format'] == 'date-time'
return default_return
elif schema["type"] in ['string', 'number', 'integer', 'boolean']:
if 'format' in schema and schema['format'] == 'data-url':
return {} # file; ignore for now
return {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
return default_return
else:
raise ValueError("Inappropriate JSON schema: The following part should define an"
f" object with properties or a primitive type:\n{schema}\n")
Loading