Skip to content
Snippets Groups Projects
Verified Commit bfdfccd9 authored by Daniel Hornung's avatar Daniel Hornung
Browse files

ENH: table_json_conversion/xlsx_utils.py: data schema generation

parent 83d90993
No related branches found
No related tags found
2 merge requests!107Release v0.11.0,!101ENH: table_json_conversion/xlsx_utils.py: data array schema generation
Pipeline #50340 passed
...@@ -61,9 +61,36 @@ class RowType(Enum): ...@@ -61,9 +61,36 @@ class RowType(Enum):
IGNORE = 3 IGNORE = 3
def data_schema_from_model_schema(model_schema: dict) -> dict:
"""Convert a *model* schema to a *data* schema.
Practically, this means that the top level properties are converted into lists. In a simplified
notation, this can be expressed as:
``data_schema = { elem: [elem typed data...] for elem in model_schema }``
Parameters
----------
model_schema: dict
The schema description of the data model. Must be a json schema *object*, with a number of
*object* typed properties.
Returns
-------
data_schema: dict
A corresponding json schema, where the properties are arrays with the types of the input's
top-level properties.
"""
assert model_schema["type"] == "object"
result = deepcopy(model_schema)
for name, prop in result["properties"].items():
assert prop["type"] == "object"
new_prop = {
"type": "array",
"items": prop
}
result["properties"][name] = new_prop
return result
def get_defining_paths(workbook: Workbook) -> dict[str, list[list[str]]]: def get_defining_paths(workbook: Workbook) -> dict[str, list[list[str]]]:
......
{
"type": "object",
"properties": {
"Training": {
"type": "array",
"items": {
"type": "object",
"required": [],
"additionalProperties": false,
"title": "Training",
"properties": {
"name": {
"type": "string",
"description": "The name of the Record to be created"
},
"date": {
"description": "The date of the training.",
"anyOf": [
{
"type": "string",
"format": "date"
},
{
"type": "string",
"format": "date-time"
}
]
},
"url": {
"type": "string",
"description": "The URL"
},
"subjects": {
"type": "array",
"items": {
"type": "string"
}
},
"coach": {
"type": "array",
"items": {
"type": "object",
"required": [],
"additionalProperties": false,
"title": "coach",
"properties": {
"name": {
"type": "string",
"description": "The name of the Record to be created"
},
"family_name": {
"type": "string"
},
"given_name": {
"type": "string"
},
"Organisation": {
"enum": [
"Federal Reserve",
"IMF",
"ECB"
]
}
}
}
},
"supervisor": {
"type": "object",
"required": [],
"additionalProperties": false,
"title": "supervisor",
"properties": {
"name": {
"type": "string",
"description": "The name of the Record to be created"
},
"family_name": {
"type": "string"
},
"given_name": {
"type": "string"
},
"Organisation": {
"enum": [
"Federal Reserve",
"IMF",
"ECB"
]
}
}
},
"duration": {
"type": "number"
},
"participants": {
"type": "integer"
},
"remote": {
"type": "boolean"
},
"slides": {
"type": "string",
"format": "data-url"
}
},
"$schema": "https://json-schema.org/draft/2020-12/schema"
}
},
"Person": {
"type": "array",
"items": {
"type": "object",
"required": [],
"additionalProperties": false,
"title": "Person",
"properties": {
"name": {
"type": "string",
"description": "The name of the Record to be created"
},
"family_name": {
"type": "string"
},
"given_name": {
"type": "string"
},
"Organisation": {
"enum": [
"Federal Reserve",
"IMF",
"ECB"
]
}
},
"$schema": "https://json-schema.org/draft/2020-12/schema"
}
}
},
"required": [
"Training",
"Person"
],
"additionalProperties": false,
"$schema": "https://json-schema.org/draft/2020-12/schema"
}
...@@ -27,6 +27,8 @@ import tempfile ...@@ -27,6 +27,8 @@ import tempfile
import jsonschema.exceptions as schema_exc import jsonschema.exceptions as schema_exc
import pytest import pytest
from openpyxl import load_workbook from openpyxl import load_workbook
from caosadvancedtools.table_json_conversion import xlsx_utils
from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template
from caosadvancedtools.table_json_conversion.xlsx_utils import ( from caosadvancedtools.table_json_conversion.xlsx_utils import (
get_row_type_column_index, get_row_type_column_index,
...@@ -159,3 +161,10 @@ def test_errors(): ...@@ -159,3 +161,10 @@ def test_errors():
known_good=rfp("data/simple_data.xlsx"), known_good=rfp("data/simple_data.xlsx"),
schema=rfp("data/simple_schema.json")) schema=rfp("data/simple_schema.json"))
assert exc.value.message == "0.5 is not of type 'integer'" assert exc.value.message == "0.5 is not of type 'integer'"
def test_data_schema_generation():
model_schema = xlsx_utils.read_or_dict(rfp("data/simple_schema.json"))
data_schema = xlsx_utils.data_schema_from_model_schema(model_schema)
expected = xlsx_utils.read_or_dict(rfp("data/simple_data_schema.json"))
assert data_schema == expected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment