diff --git a/src/caosadvancedtools/table_json_conversion/xlsx_utils.py b/src/caosadvancedtools/table_json_conversion/xlsx_utils.py index 206e8e0845d64c2961812b11113a5557fec8b5dc..1aa333d81230e8d86ab696c02aa6365ba0c94fc8 100644 --- a/src/caosadvancedtools/table_json_conversion/xlsx_utils.py +++ b/src/caosadvancedtools/table_json_conversion/xlsx_utils.py @@ -61,9 +61,36 @@ class RowType(Enum): IGNORE = 3 +def data_schema_from_model_schema(model_schema: dict) -> dict: + """Convert a *model* schema to a *data* schema. +Practically, this means that the top level properties are converted into lists. In a simplified +notation, this can be expressed as: +``data_schema = { elem: [elem typed data...] for elem in model_schema }`` +Parameters +---------- +model_schema: dict + The schema description of the data model. Must be a json schema *object*, with a number of + *object* typed properties. + +Returns +------- +data_schema: dict + A corresponding json schema, where the properties are arrays with the types of the input's + top-level properties. + """ + assert model_schema["type"] == "object" + result = deepcopy(model_schema) + for name, prop in result["properties"].items(): + assert prop["type"] == "object" + new_prop = { + "type": "array", + "items": prop + } + result["properties"][name] = new_prop + return result def get_defining_paths(workbook: Workbook) -> dict[str, list[list[str]]]: diff --git a/unittests/table_json_conversion/data/simple_data_schema.json b/unittests/table_json_conversion/data/simple_data_schema.json new file mode 100644 index 0000000000000000000000000000000000000000..0a4d44f733b3a8301e2d053cd570c904ef02750f --- /dev/null +++ b/unittests/table_json_conversion/data/simple_data_schema.json @@ -0,0 +1,145 @@ +{ + "type": "object", + "properties": { + "Training": { + "type": "array", + "items": { + "type": "object", + "required": [], + "additionalProperties": false, + "title": "Training", + "properties": { + "name": { + "type": "string", + "description": "The name of the Record to be created" + }, + "date": { + "description": "The date of the training.", + "anyOf": [ + { + "type": "string", + "format": "date" + }, + { + "type": "string", + "format": "date-time" + } + ] + }, + "url": { + "type": "string", + "description": "The URL" + }, + "subjects": { + "type": "array", + "items": { + "type": "string" + } + }, + "coach": { + "type": "array", + "items": { + "type": "object", + "required": [], + "additionalProperties": false, + "title": "coach", + "properties": { + "name": { + "type": "string", + "description": "The name of the Record to be created" + }, + "family_name": { + "type": "string" + }, + "given_name": { + "type": "string" + }, + "Organisation": { + "enum": [ + "Federal Reserve", + "IMF", + "ECB" + ] + } + } + } + }, + "supervisor": { + "type": "object", + "required": [], + "additionalProperties": false, + "title": "supervisor", + "properties": { + "name": { + "type": "string", + "description": "The name of the Record to be created" + }, + "family_name": { + "type": "string" + }, + "given_name": { + "type": "string" + }, + "Organisation": { + "enum": [ + "Federal Reserve", + "IMF", + "ECB" + ] + } + } + }, + "duration": { + "type": "number" + }, + "participants": { + "type": "integer" + }, + "remote": { + "type": "boolean" + }, + "slides": { + "type": "string", + "format": "data-url" + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" + } + }, + "Person": { + "type": "array", + "items": { + "type": "object", + "required": [], + "additionalProperties": false, + "title": "Person", + "properties": { + "name": { + "type": "string", + "description": "The name of the Record to be created" + }, + "family_name": { + "type": "string" + }, + "given_name": { + "type": "string" + }, + "Organisation": { + "enum": [ + "Federal Reserve", + "IMF", + "ECB" + ] + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" + } + } + }, + "required": [ + "Training", + "Person" + ], + "additionalProperties": false, + "$schema": "https://json-schema.org/draft/2020-12/schema" +} diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py index b2eaf042e23336cd8d74421d89b2e736b61877de..aa5ca74041e6a21cc1ccb6115af75958cf41cc26 100644 --- a/unittests/table_json_conversion/test_fill_xlsx.py +++ b/unittests/table_json_conversion/test_fill_xlsx.py @@ -27,6 +27,8 @@ import tempfile import jsonschema.exceptions as schema_exc import pytest from openpyxl import load_workbook + +from caosadvancedtools.table_json_conversion import xlsx_utils from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template from caosadvancedtools.table_json_conversion.xlsx_utils import ( get_row_type_column_index, @@ -159,3 +161,10 @@ def test_errors(): known_good=rfp("data/simple_data.xlsx"), schema=rfp("data/simple_schema.json")) assert exc.value.message == "0.5 is not of type 'integer'" + + +def test_data_schema_generation(): + model_schema = xlsx_utils.read_or_dict(rfp("data/simple_schema.json")) + data_schema = xlsx_utils.data_schema_from_model_schema(model_schema) + expected = xlsx_utils.read_or_dict(rfp("data/simple_data_schema.json")) + assert data_schema == expected