Skip to content
Snippets Groups Projects
Commit a01a74a3 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-enh-143-xlsx-export-lists' into 'f-enh-143-automatic-xlsx-exporting'

JSON schema: List references, auto-detection of enums

See merge request !136
parents 6ecde7c3 d49d2709
No related branches found
No related tags found
3 merge requests!138Release 0.14.0,!136JSON schema: List references, auto-detection of enums,!132Automatic XLSX export
Pipeline #63152 passed
Showing
with 412 additions and 113 deletions
File added
......@@ -82,6 +82,89 @@ def _create_datamodel(modelfile: str):
model.sync_data_model(noquestion=True)
def _insert_multiple_choice_data():
"""Insert the data from `multiple_choice_data.json`."""
# 1. Insert enums.
enums = db.Container()
for skillname in ["Planning", "Communication", "Evaluation"]:
rec = db.Record(name=skillname).add_parent(db.RecordType("Skill"))
enums.append(rec)
for examname in ["Oral", "Written"]:
rec = db.Record(name=examname).add_parent(db.RecordType("ExamType"))
enums.append(rec)
enums.insert()
# 2. Insert data from JSON
json_data_file = rfp_unittest_data("multiple_choice_data.json")
with open(json_data_file, encoding="utf-8") as myfile:
json_data = json.load(myfile)
skills = []
for skillname in ["Planning", "Evaluation"]:
skills.append(db.Record(skillname).retrieve())
records = db.Container()
training_data = json_data["Training"][0]
rec_training = db.Record(name=training_data["name"]).add_parent(db.RecordType("Training"))
rec_training.add_property("date", datetime.fromisoformat(training_data["date"]))
rec_training.add_property("skills", skills)
rec_training.add_property("exam_types", [])
records.append(rec_training)
records.insert()
def _insert_simple_data():
"""Insert the data from `simple_data.json`."""
# 1. Insert enums.
enums = db.Container()
for orgname in ["ECB", "IMF"]:
rec_org = db.Record(name=orgname).add_parent(db.RecordType("Organisation"))
enums.append(rec_org)
enums.insert()
# 2. Insert data from JSON
json_data_file = rfp_unittest_data("simple_data.json")
with open(json_data_file, encoding="utf-8") as myfile:
json_data = json.load(myfile)
training_data = json_data["Training"][0]
coaches = []
for coach_data in training_data["coach"]:
rec_coach = db.Record().add_parent(db.RecordType("Person"))
for propname, value in coach_data.items():
rec_coach.add_property(propname, value=value)
coaches.append(rec_coach)
rec_supervisor = db.Record().add_parent(db.RecordType("Person"))
for propname, value in training_data["supervisor"].items():
rec_supervisor.add_property(propname, value=value)
persons = []
for person_data in json_data["Person"]:
rec_person = db.Record().add_parent(db.RecordType("Person"))
for propname, value in person_data.items():
rec_person.add_property(propname, value=value)
persons.append(rec_person)
rec_training = db.Record().add_parent(db.RecordType("Training"))
rec_training.add_property("date", datetime.fromisoformat(training_data["date"]))
rec_training.add_property("url", training_data["url"])
rec_training.add_property("coach", coaches)
rec_training.add_property("supervisor", rec_supervisor)
rec_training.add_property("duration", training_data["duration"])
rec_training.add_property("participants", training_data["participants"])
rec_training.add_property("subjects", training_data["subjects"])
rec_training.add_property("remote", training_data["remote"])
cont = db.Container()
cont.append(rec_training)
cont.append(rec_supervisor)
cont.extend(coaches)
cont.extend(persons)
cont.insert()
def _insert_multiple_refs_data():
"""Insert the data from `multiple_refs_data`.
"""
......@@ -203,6 +286,121 @@ def test_successful_export():
tmp_path.unlink()
def test_export_lists(tmpdir):
"""Properties of datatype LIST<TEXT/INTEGER/...>."""
tmpdir = Path(tmpdir)
_create_datamodel(rfp_unittest_data("simple_model.yml"))
_insert_simple_data()
query_result = db.execute_query("FIND Training")
export_import_xlsx.export_container_to_xlsx(records=query_result,
include_referenced_entities=True,
xlsx_data_filepath=tmpdir / "result.xlsx",
jsonschema_filepath=tmpdir / "schema.json",
jsondata_filepath=tmpdir / "data.json",
xlsx_template_filepath=tmpdir / "template.xlsx",
)
# Check: schema
with open(tmpdir/"schema.json", encoding="utf-8") as schema_f:
schema_generated = json.load(schema_f)
try:
assert len(schema_generated["properties"]) == 1 # Only 'Training' should be top level
training_props = schema_generated["properties"]["Training"]["properties"]
assert_equal_jsons(training_props["subjects"]["items"],
{"type": ["string", "null"]})
if "oneOf" in training_props["coach"]["items"]:
raise ValueError("'coach' should be handled as 'do_not_retrieve', no records should "
"have been chosen.")
assert_equal_jsons(training_props["coach"]["items"]["properties"]["Organisation"],
{"enum": ["ECB", "IMF"]})
assert_equal_jsons(training_props["supervisor"]["properties"]["Organisation"],
{"enum": ["ECB", "IMF"]})
except KeyError:
print("It seems the generated JSON schema does not have the expected structure!")
raise
# Check: XLSX template
template_known_good = load_workbook(rfp("data", "lists_template.xlsx"))
template_generated = load_workbook(tmpdir / "template.xlsx")
compare_workbooks(template_generated, template_known_good)
# Check: Data json content
with open(rfp_unittest_data("simple_data.json"), encoding="utf-8") as myfile:
json_known_good = json.load(myfile)
json_known_good.pop("Person")
with open(tmpdir / "data.json", encoding="utf-8") as myfile:
json_generated = purge_from_json(json.load(myfile), remove_keys=["id"])
assert_equal_jsons(json_generated, json_known_good, allow_name_dict=True, ignore_datetime=True)
# Check: Filled XLSX
filled_generated = load_workbook(tmpdir / "result.xlsx")
# For the moment: just check a few samples
assert filled_generated.sheetnames == ['Training',
'Training.coach',
]
sheet_training = filled_generated["Training"]
assert sheet_training["K7"].value == "IMF"
sheet_coach = filled_generated["Training.coach"]
assert sheet_coach["G7"].value == "ECB"
assert sheet_coach["G8"].value == "ECB"
def test_multiple_choice(tmpdir):
"""List properties of enum references."""
tmpdir = Path(tmpdir)
_create_datamodel(rfp_unittest_data("multiple_choice_model.yaml"))
_insert_multiple_choice_data()
query_result = db.execute_query("FIND Training")
export_import_xlsx.export_container_to_xlsx(records=query_result,
include_referenced_entities=True,
xlsx_data_filepath=tmpdir / "result.xlsx",
jsonschema_filepath=tmpdir / "schema.json",
jsondata_filepath=tmpdir / "data.json",
xlsx_template_filepath=tmpdir / "template.xlsx",
)
# Check: schema
with open(tmpdir/"schema.json", encoding="utf-8") as schema_f:
schema_generated = json.load(schema_f)
try:
assert len(schema_generated["properties"]) == 1 # Only 'Training' should be top level
training = schema_generated["properties"]["Training"]
props = training["properties"]
assert len(props["skills"]["items"]["enum"]) == 3
assert len(props["exam_types"]["items"]["enum"]) == 2
except KeyError:
print("It seems the generated JSON schema does not have the expected structure!")
raise
# Check: XLSX template
template_known_good = load_workbook(rfp_unittest_data("multiple_choice_id_template.xlsx"))
template_generated = load_workbook(tmpdir / "template.xlsx")
compare_workbooks(template_generated, template_known_good)
# Check: Data json content
with open(rfp_unittest_data("multiple_choice_retrieved_data.json"), encoding="utf-8") as myfile:
json_known_good = json.load(myfile)
with open(tmpdir / "data.json", encoding="utf-8") as myfile:
json_generated = purge_from_json(json.load(myfile), remove_keys=["id"])
assert_equal_jsons(json_generated, json_known_good)
# Check: Filled XLSX
filled_generated = load_workbook(tmpdir / "result.xlsx")
# For the moment: just check a few samples
assert filled_generated.sheetnames == ['Training']
sheet_training = filled_generated["Training"]
assert sheet_training.max_row == 7
assert sheet_training.max_column == 9
assert sheet_training["E7"].value == "x"
assert sheet_training["F7"].value is None
assert sheet_training["G7"].value == "x"
assert sheet_training["H7"].value is None
assert sheet_training["I7"].value is None
def test_export_list_refs(tmpdir):
"""Test the export to XLSX of list-valued references.
......@@ -227,7 +425,7 @@ def test_export_list_refs(tmpdir):
xlsx_template_filepath=tmpdir / "template.xlsx",
)
# Test schema
# Check: schema
with open(tmpdir/"schema.json", encoding="utf-8") as schema_f:
schema_generated = json.load(schema_f)
......
......@@ -155,7 +155,7 @@ def setup_package():
author='Henrik tom Wörden',
author_email='h.tomwoerden@indiscale.com',
python_requires='>=3.9',
install_requires=["linkahead>=0.13.1",
install_requires=["linkahead>0.17.0",
"jsonref",
"jsonschema[format]>=4.4.0",
"numpy>=1.24.0, < 2",
......
......@@ -55,11 +55,12 @@ single schema.
"""
from collections import OrderedDict
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from typing import Any, Iterable, Optional, Sequence, Union
import linkahead as db
from linkahead.cached import cache_clear, cached_query
from linkahead.common.datatype import get_list_datatype, is_list_datatype
from linkahead.utils.get_entity import get_entity_by_name
from .models.data_model import DataModel
......@@ -72,16 +73,16 @@ class JsonSchemaExporter:
name_property_for_new_records: bool = False,
use_id_for_identification: bool = False,
description_property_for_new_records: bool = False,
additional_options_for_text_props: dict = None,
additional_json_schema: Dict[str, dict] = None,
additional_ui_schema: Dict[str, dict] = None,
additional_options_for_text_props: Optional[dict] = None,
additional_json_schema: Optional[dict[str, dict]] = None,
additional_ui_schema: Optional[dict[str, dict]] = None,
units_in_description: bool = True,
plain_data_model: bool = False,
do_not_create: List[str] = None,
do_not_retrieve: List[str] = None,
do_not_create: Optional[list[str]] = None,
do_not_retrieve: Optional[Union[list[str], str]] = None,
no_remote: bool = False,
use_rt_pool: DataModel = None,
multiple_choice: List[str] = None,
use_rt_pool: Optional[DataModel] = None,
multiple_choice: Optional[list[str]] = None,
multiple_choice_guess: bool = False,
wrap_files_in_objects: bool = False,
):
"""Set up a JsonSchemaExporter, which can then be applied on RecordTypes.
......@@ -112,20 +113,17 @@ class JsonSchemaExporter:
description of the corresponding schema entry. If set to false, an
additional `unit` key is added to the schema itself which is purely
annotational and ignored, e.g., in validation. Default is True.
plain_data_model: bool, optional
If True, represent references as plain objects, without the option to choose from an
enum list of existing entities. Exception: When the reference looks like it *should be*
an enum, the existing Record entries are given as options. This parameter should be set
to True when one needs a generic representation of the data model.
The default is ``False``.
do_not_create : list[str], optional
A list of reference Property names, for which there should be no option
to create them. Instead, only the choice of existing elements should
be given.
do_not_retrieve : list[str], optional
do_not_retrieve : list[str] or str, optional
A list of RecordType names, for which no Records shall be retrieved. Instead, only an
object description should be given. If this list overlaps with the `do_not_create`
parameter, the behavior is undefined.
If this parameter is the string "``auto``", only multiple choice references (see
parameter ``multiple_choice``) will be retrieved.
The default is the empty list.
no_remote : bool, optional
If True, do not attempt to connect to a LinkAhead server at all. Default is False. Note
that the exporter may fail if this option is activated and the data model is not
......@@ -137,6 +135,9 @@ class JsonSchemaExporter:
A list of reference Property names which shall be denoted as multiple choice properties.
This means that each option in this property may be selected at most once. This is not
implemented yet if the Property is not in ``do_not_create`` as well.
multiple_choice_guess : bool, default=False
If True, try to guess for all reference Properties that are not in ``multiple_choice``
if they are enum-like and thus should be handled as multiple choice.
wrap_files_in_objects : bool, optional
Whether (lists of) files should be wrapped into an array of objects
that have a file property. The sole purpose of this wrapping is to
......@@ -144,6 +145,15 @@ class JsonSchemaExporter:
bug<https://github.com/rjsf-team/react-jsonschema-form/issues/3957>`_
so only set this to True if you're using the exported schema with
react-json-form and you are experiencing the bug. Default is False.
Notes on reference properties
-----------------------------
List references will have the "uniqueItems" property set if:
- ``do_not_retrieve`` is not set for the referenced RecordType
- ``multiple_choice`` is true or guessed to be true (if ``multiple_choice_guess`` is set)
"""
if not additional_options_for_text_props:
additional_options_for_text_props = {}
......@@ -168,12 +178,12 @@ class JsonSchemaExporter:
self._additional_json_schema = additional_json_schema
self._additional_ui_schema = additional_ui_schema
self._units_in_description = units_in_description
self._plain_data_model = plain_data_model
self._do_not_create = do_not_create
self._do_not_retrieve = do_not_retrieve
self._no_remote = no_remote
self._use_rt_pool = use_rt_pool
self._multiple_choice = multiple_choice
self._multiple_choice_guess = multiple_choice_guess
self._wrap_files_in_objects = wrap_files_in_objects
@staticmethod
......@@ -188,7 +198,8 @@ class JsonSchemaExporter:
return required_list
def _make_segment_from_prop(self, prop: db.Property) -> Tuple[OrderedDict, dict]:
def _make_segment_from_prop(self, prop: db.Property, multiple_choice_enforce: bool = False
) -> tuple[OrderedDict, dict]:
"""Return the JSON Schema and ui schema segments for the given property.
The result may either be a simple json schema segment, such as a `string
......@@ -202,6 +213,9 @@ Parameters
prop : db.Property
The property to be transformed.
multiple_choice_enforce : bool, default=False
If True, this property shall be handled as multiple choice items.
Returns
-------
......@@ -248,19 +262,31 @@ ui_schema : dict
json_prop["type"] = "integer"
elif prop.datatype == db.DOUBLE:
json_prop["type"] = "number"
# list-valued non-files
elif is_list_datatype(prop.datatype) and not (
self._wrap_files_in_objects and get_list_datatype(prop.datatype,
strict=True) == db.FILE):
self._wrap_files_in_objects
and get_list_datatype(prop.datatype, strict=True) == db.FILE):
json_prop["type"] = "array"
list_element_prop = db.Property(
name=prop.name, datatype=get_list_datatype(prop.datatype, strict=True))
json_prop["items"], inner_ui_schema = self._make_segment_from_prop(list_element_prop)
# Is this a multiple choice array?
multiple_choice = prop.name in self._multiple_choice
# breakpoint()
if (not multiple_choice and self._multiple_choice_guess
and db.common.datatype.is_reference(list_element_prop.datatype)):
multiple_choice = self._guess_recordtype_is_enum(list_element_prop.datatype)
# breakpoint()
# Get inner content of list
json_prop["items"], inner_ui_schema = self._make_segment_from_prop(
list_element_prop, multiple_choice_enforce=multiple_choice)
if "type" in json_prop["items"] and (
json_prop["items"]["type"] in ["boolean", "integer", "number", "string"]
):
json_prop["items"]["type"] = [json_prop["items"]["type"], "null"]
if prop.name in self._multiple_choice and prop.name in self._do_not_create:
if multiple_choice:
# TODO: if not multiple_choice, but do_not_create:
# "ui:widget" = "radio" & "ui:inline" = true
# TODO: set threshold for number of items.
......@@ -269,6 +295,7 @@ ui_schema : dict
ui_schema["ui:inline"] = True
if inner_ui_schema:
ui_schema["items"] = inner_ui_schema
# scalar references
elif prop.is_reference():
# We must distinguish between multiple kinds of "reference" properties.
......@@ -324,11 +351,26 @@ ui_schema : dict
prop_name = prop.datatype
if isinstance(prop.datatype, db.Entity):
prop_name = prop.datatype.name
if prop.name in self._do_not_retrieve or self._plain_data_model:
values = []
else:
# Find out if this property is an enum.
is_enum = (multiple_choice_enforce
or
(self._multiple_choice_guess
and self._guess_recordtype_is_enum(prop_name)))
# If `is_enum` -> always get values
# Otherwise -> `do_not_retrieve` may prevent retrieval
if is_enum or not (
(
isinstance(self._do_not_retrieve, list)
and prop_name in self._do_not_retrieve)
or (
self._do_not_retrieve == "auto"
)):
values = self._retrieve_enum_values(f"RECORD '{prop_name}'")
if prop.name in self._do_not_create:
else:
values = []
if is_enum or prop.name in self._do_not_create:
# Only a simple list of values
json_prop["enum"] = values
else:
......@@ -431,6 +473,22 @@ ui_schema : dict
return prop
def _guess_recordtype_is_enum(self, rt_name: str) -> bool:
"""For a given RecordType, guess if it represents an enum.
Parameters
----------
rt_name : str
Name of the RecordType to be guessed.
Returns
-------
out : guess
True, if the RecordType is guessed to be an enum. False otherwise.
"""
rt = get_entity_by_name(rt_name)
return len(rt.get_properties()) == 0
def _retrieve_enum_values(self, role: str):
if self._no_remote:
......@@ -449,7 +507,7 @@ ui_schema : dict
return vals
def _make_segment_from_recordtype(self, rt: db.RecordType) -> Tuple[OrderedDict, dict]:
def _make_segment_from_recordtype(self, rt: db.RecordType) -> tuple[OrderedDict, dict]:
"""Return Json schema and uischema segments for the given RecordType.
The result is an element of type `object
......@@ -515,7 +573,7 @@ ui_schema : dict
return schema, ui_schema
def _customize(self, schema: OrderedDict, ui_schema: dict, entity: db.Entity = None) -> (
Tuple[OrderedDict, dict]):
tuple[OrderedDict, dict]):
"""Generic customization method.
Walk over the available customization stores and apply all applicable ones. No specific order is
......@@ -547,7 +605,7 @@ guaranteed (as of now).
return schema, ui_schema
def recordtype_to_json_schema(self, rt: db.RecordType, rjsf: bool = False) -> Union[
dict, Tuple[dict, dict]]:
dict, tuple[dict, dict]]:
"""Create a jsonschema from a given RecordType that can be used, e.g., to
validate a json specifying a record of the given type.
......@@ -589,17 +647,18 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
use_id_for_identification: bool = False,
description_property_for_new_records: bool = False,
additional_options_for_text_props: Optional[dict] = None,
additional_json_schema: Dict[str, dict] = None,
additional_ui_schema: Dict[str, dict] = None,
additional_json_schema: Optional[dict[str, dict]] = None,
additional_ui_schema: Optional[dict[str, dict]] = None,
units_in_description: bool = True,
do_not_create: List[str] = None,
do_not_retrieve: List[str] = None,
do_not_create: Optional[list[str]] = None,
do_not_retrieve: Optional[Union[list[str], str]] = None,
no_remote: bool = False,
use_rt_pool: DataModel = None,
multiple_choice: List[str] = None,
use_rt_pool: Optional[DataModel] = None,
multiple_choice: Optional[list[str]] = None,
multiple_choice_guess: bool = False,
rjsf: bool = False,
wrap_files_in_objects: bool = False
) -> Union[dict, Tuple[dict, dict]]:
) -> Union[dict, tuple[dict, dict]]:
"""Create a jsonschema from a given RecordType that can be used, e.g., to
validate a json specifying a record of the given type.
......@@ -610,58 +669,8 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
----------
rt : RecordType
The RecordType from which a json schema will be created.
additional_properties : bool, optional
Whether additional properties will be admitted in the resulting
schema. Optional, default is True.
name_property_for_new_records : bool, optional
Whether objects shall generally have a `name` property in the generated schema. Optional,
default is False.
use_id_for_identification: bool, optional
If set to true, an 'id' property is added to all records, and foreign
key references are assumed to be ids.
description_property_for_new_records : bool, optional
Whether objects shall generally have a `description` property in the generated schema.
Optional, default is False.
additional_options_for_text_props : dict, optional
Dictionary containing additional "pattern" or "format" options for
string-typed properties. Optional, default is empty.
additional_json_schema : dict[str, dict], optional
Additional schema content for elements of the given names.
additional_ui_schema : dict[str, dict], optional
Additional ui schema content for elements of the given names.
units_in_description : bool, optional
Whether to add the unit of a LinkAhead property (if it has any) to the
description of the corresponding schema entry. If set to false, an
additional `unit` key is added to the schema itself which is purely
annotational and ignored, e.g., in validation. Default is True.
do_not_create : list[str], optional
A list of reference Property names, for which there should be no option
to create them. Instead, only the choice of existing elements should
be given.
do_not_retrieve : list[str], optional
A list of RedcordType names, for which no Records shall be retrieved. Instead, only an
object description should be given. If this list overlaps with the `do_not_create`
parameter, the behavior is undefined.
no_remote : bool, optional
If True, do not attempt to connect to a LinkAhead server at all. Default is False.
use_rt_pool : models.data_model.DataModel, optional
If given, do not attempt to retrieve RecordType information remotely but from this parameter
instead.
multiple_choice : list[str], optional
A list of reference Property names which shall be denoted as multiple choice properties.
This means that each option in this property may be selected at most once. This is not
implemented yet if the Property is not in ``do_not_create`` as well.
rjsf : bool, optional
If True, uiSchema definitions for react-jsonschema-forms will be output as the second return
value. Default is False.
wrap_files_in_objects : bool, optional
Whether (lists of) files should be wrapped into an array of objects that
have a file property. The sole purpose of this wrapping is to provide a
workaround for a `react-jsonschema-form bug
<https://github.com/rjsf-team/react-jsonschema-form/issues/3957>`_ so
only set this to True if you're using the exported schema with
react-json-form and you are experiencing the bug. Default is False.
The other parameters are identical to the ones use by ``JsonSchemaExporter``
Returns
-------
......@@ -686,12 +695,13 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
no_remote=no_remote,
use_rt_pool=use_rt_pool,
multiple_choice=multiple_choice,
multiple_choice_guess=multiple_choice_guess,
wrap_files_in_objects=wrap_files_in_objects
)
return exporter.recordtype_to_json_schema(rt, rjsf=rjsf)
def make_array(schema: dict, rjsf_uischema: dict = None) -> Union[dict, Tuple[dict, dict]]:
def make_array(schema: dict, rjsf_uischema: dict = None) -> Union[dict, tuple[dict, dict]]:
"""Create an array of the given schema.
The result will look like this:
......@@ -742,9 +752,9 @@ ui_schema : dict, optional
return result
def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]],
rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None,
return_data_schema=False) -> (Union[dict, Tuple[dict, dict]]):
def merge_schemas(schemas: Union[dict[str, dict], Iterable[dict]],
rjsf_uischemas: Optional[Union[dict[str, dict], Sequence[dict]]] = None,
return_data_schema=False) -> (Union[dict, tuple[dict, dict]]):
"""Merge the given schemata into a single schema.
The result will look like this:
......
......@@ -71,7 +71,8 @@ def _generate_jsonschema_from_recordtypes(recordtypes: Iterable,
schema_generator = JsonSchemaExporter(additional_properties=False,
name_property_for_new_records=False,
use_id_for_identification=True,
plain_data_model=True)
do_not_retrieve="auto",
multiple_choice_guess=True)
schemas = [schema_generator.recordtype_to_json_schema(recordtype)
for recordtype in recordtypes]
_, data_schema = merge_schemas(schemas, return_data_schema=True)
......
......@@ -297,7 +297,9 @@ foreign_keys: list[list[str]]
assert d['type'] == 'string'
assert d['format'] == 'date' or d['format'] == 'date-time'
return default_return
if schema["type"] in ['string', 'number', 'integer', 'boolean']:
scalars = ['string', 'number', 'integer', 'boolean']
# Also add "null" combinations, such as ["string", "null"].
if schema["type"] in (scalars + [[scal, "null"] for scal in scalars]):
if 'format' in schema and schema['format'] == 'data-url':
return {} # file; ignore for now
return default_return
......
......@@ -30,8 +30,9 @@ from caosadvancedtools.models import parser
# import tomli
def prepare_datamodel(modelfile, recordtypes: list[str], outfile: str,
def prepare_datamodel(modelfile: str, recordtypes: list[str], outfile: str,
do_not_create: list[str] = None):
"""Dump the schema generated from ``modelfile`` to ``outfile``."""
if do_not_create is None:
do_not_create = []
model = parser.parse_model_from_yaml(modelfile)
......
File added
Training:
recommended_properties:
date:
datatype: DATETIME
description: 'The date of the training.'
skills:
datatype: LIST<Skill>
description: Skills that are trained.
exam_types:
datatype: LIST<ExamType>
# Enum RecordTypes
Skill:
description: Skills that are trained.
ExamType:
description: The type of an exam.
{
"Training": [
{
"name": "Super Skill Training",
"date": "2024-04-17 00:00:00-04:00",
"skills": [
{
"name": "Planning"
},
{
"name": "Evaluation"
}
],
"exam_types": null
}
]
}
......@@ -30,8 +30,8 @@
"items": {
"enum": [
"Planning",
"Communication",
"Evaluation"
"Communication",
"Evaluation"
]
},
"uniqueItems": true
......@@ -41,7 +41,7 @@
"items": {
"enum": [
"Oral",
"Written"
"Written"
]
},
"uniqueItems": true
......
......@@ -30,7 +30,3 @@ Training:
ProgrammingCourse:
inherit_from_suggested:
- Training
Organisation:
recommended_properties:
Country:
datatype: TEXT
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
......@@ -29,6 +28,7 @@ from caosadvancedtools.table_json_conversion.xlsx_utils import ColumnType
from openpyxl import load_workbook
from .utils import compare_workbooks
from .create_jsonschema import prepare_datamodel
def rfp(*pathcomponents):
......@@ -64,6 +64,7 @@ out: tuple
filepath=outpath)
assert os.path.exists(outpath)
generated = load_workbook(outpath)
good = load_workbook(known_good)
compare_workbooks(generated, good)
return generated, good
......@@ -274,6 +275,23 @@ def test_model_with_multiple_choice():
outfile=None)
def test_schema_with_null_arrays(tmp_path):
"""Schemas may be generated with allow `None` as list content."""
# Generate json automatically
prepare_datamodel(modelfile=rfp("data/simple_model.yml"),
recordtypes=["Training", "Person"],
outfile=tmp_path / "simple_schema.json",
do_not_create=["Organisation"])
# Compare result
_compare_generated_to_known_good(
schema_file=tmp_path / "simple_schema.json",
known_good=rfp("data/simple_template.xlsx"),
foreign_keys={'Training': {"__this__": ['date', 'url']}},
outfile=None)
def test_exceptions():
# Foreign keys must be lists
with pytest.raises(ValueError, match="Foreign keys must be a list of strings, but a single "
......
......@@ -22,18 +22,32 @@
"""
from datetime import datetime
from typing import Iterable, Union
from typing import Iterable, Optional, Union
from openpyxl import Workbook
def assert_equal_jsons(json1, json2, allow_none: bool = True, allow_empty: bool = True,
path: list = None) -> None:
ignore_datetime: bool = False, ignore_id_value: bool = False,
allow_name_dict: bool = False,
path: Optional[list] = None) -> None:
"""Compare two json objects for near equality.
Raise an assertion exception if they are not equal."""
Raise an assertion exception if they are not equal.
Parameters
----------
allow_name_dict: bool, default=False
If True, a string and a dict ``{"name": "string's value"}`` are considered equal.
"""
if path is None:
path = []
# if allow_name_dict:
# if ((isinstance(json1, str) and isinstance(json2, dict)) or
# (isinstance(json2, str) and isinstance(json1, dict))):
# breakpoint()
assert isinstance(json1, dict) == isinstance(json2, dict), f"Type mismatch, path: {path}"
if isinstance(json1, dict):
keys = set(json1.keys()).union(json2.keys())
......@@ -43,14 +57,33 @@ Raise an assertion exception if they are not equal."""
if key in json1 and key in json2:
el1 = json1[key]
el2 = json2[key]
if allow_none and (el1 is None and (el2 == [] or el2 == {})
or el2 is None and (el1 == [] or el1 == {})):
# shortcut in case of equivalent empty content
continue
if allow_name_dict: # Special exception
my_str = None
if isinstance(el1, str) and isinstance(el2, dict):
my_str = el1
my_dict = el2
elif isinstance(el2, str) and isinstance(el1, dict):
my_str = el2
my_dict = el1
if my_str is not None:
if len(my_dict) == 1 and my_dict.get("name") == my_str:
continue
assert isinstance(el1, type(el2)), f"Type mismatch, path: {this_path}"
if isinstance(el1, (dict, list)):
# Iterables: Recursion
assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
path=this_path)
assert_equal_jsons(
el1, el2, allow_none=allow_none, allow_empty=allow_empty,
ignore_datetime=ignore_datetime, ignore_id_value=ignore_id_value,
allow_name_dict=allow_name_dict,
path=this_path)
continue
assert equals_with_casting(el1, el2), (
f"Values at path {this_path} are not equal:\n{el1},\n{el2}")
if not (ignore_id_value and key == "id"):
assert equals_with_casting(el1, el2, ignore_datetime=ignore_datetime), (
f"Values at path {this_path} are not equal:\n{el1},\n{el2}")
continue
# Case 2: exists only in one collection
existing = json1.get(key, json2.get(key))
......@@ -66,17 +99,23 @@ Raise an assertion exception if they are not equal."""
this_path = path + [idx]
if isinstance(el1, dict):
assert_equal_jsons(el1, el2, allow_none=allow_none, allow_empty=allow_empty,
ignore_datetime=ignore_datetime, ignore_id_value=ignore_id_value,
allow_name_dict=allow_name_dict,
path=this_path)
else:
assert equals_with_casting(el1, el2), (
f"Values at path {this_path} are not equal:\n{el1},\n{el2}")
def equals_with_casting(value1, value2) -> bool:
def equals_with_casting(value1, value2, ignore_datetime: bool = False) -> bool:
"""Compare two values, return True if equal, False otherwise. Try to cast to clever datatypes.
"""
try:
return datetime.fromisoformat(value1) == datetime.fromisoformat(value2)
dt1 = datetime.fromisoformat(value1)
dt2 = datetime.fromisoformat(value2)
if ignore_datetime:
return True
return dt1 == dt2
except (ValueError, TypeError):
pass
return value1 == value2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment