diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index 3c5b78faa3fe3aef677a15293a712855cf1be3b0..b416fc29764fae73176f8c0177523a37dd353618 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -478,6 +478,12 @@ class XLSXConverter: if isinstance(value, datetime.date) and ( {'type': 'string', 'format': 'date'} in subschema["anyOf"]): return value + # booleans might be retrieved as an integer or formula + if subschema.get('type') == 'boolean': + if value == 0 or isinstance(value, str) and '=false()' == value.lower(): + value = False + if value == 1 or isinstance(value, str) and '=true()' == value.lower(): + value = True jsonschema.validate(value, subschema) # Finally: convert to target type diff --git a/unittests/table_json_conversion/data/simple_data_booleans.json b/unittests/table_json_conversion/data/simple_data_booleans.json new file mode 100644 index 0000000000000000000000000000000000000000..f7d452b39f51ea3e0903af8c50007fb483cb05e4 --- /dev/null +++ b/unittests/table_json_conversion/data/simple_data_booleans.json @@ -0,0 +1,47 @@ +{ + "Training": [ + { + "date": "2023-01-01", + "url": "www.indiscale.com", + "coach": [ + { + "family_name": "Sky", + "given_name": "Max", + "Organisation": "ECB" + }, + { + "family_name": "Sky", + "given_name": "Min", + "Organisation": "ECB" + } + ], + "supervisor": { + "family_name": "Steve", + "given_name": "Stevie", + "Organisation": "IMF" + }, + "duration": 1.0, + "participants": 1, + "subjects": ["Math", "Physics"], + "remote": false + }, + { + "date": "2023-01-02", + "url": "www.indiscale.com", + "supervisor": { + "family_name": "Steve", + "given_name": "Stevie", + "Organisation": "IMF" + }, + "duration": 1.0, + "participants": 1, + "subjects": ["Math", "Physics"], + "remote": true + } + ], + "Person": [{ + "family_name": "Steve", + "given_name": "Stevie", + "Organisation": "IMF" + }] +} diff --git a/unittests/table_json_conversion/data/simple_data_booleans.xlsx b/unittests/table_json_conversion/data/simple_data_booleans.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6a67d7a8d6df76b7d88d0d575c6ed1c30b8f8363 Binary files /dev/null and b/unittests/table_json_conversion/data/simple_data_booleans.xlsx differ diff --git a/unittests/table_json_conversion/data/simple_data_broken.xlsx b/unittests/table_json_conversion/data/simple_data_broken.xlsx index a65d464a53459de73e41fd20d807899c44728cda..c75da9faaefcb7610d84e16dd6ff17dcd055b008 100644 Binary files a/unittests/table_json_conversion/data/simple_data_broken.xlsx and b/unittests/table_json_conversion/data/simple_data_broken.xlsx differ diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py index ac0a42b59478a57a1bf7ef53f4333e20c0358e76..a34c046f95887f0de6a4a40b7102b1ad85a0de3e 100644 --- a/unittests/table_json_conversion/test_read_xlsx.py +++ b/unittests/table_json_conversion/test_read_xlsx.py @@ -26,6 +26,7 @@ import os import re from types import SimpleNamespace +from typing import Optional import jsonschema import pytest @@ -41,7 +42,7 @@ def rfp(*pathcomponents): def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str, - known_good_data: dict = None, strict: bool = False, + known_good_data: Optional[dict] = None, strict: bool = False, validate: bool = True) -> dict: """Convert an XLSX file and compare to a known result. @@ -77,6 +78,9 @@ def test_conversions(): schema_file=rfp("data/multiple_choice_schema.json"), known_good_file=rfp("data/multiple_choice_data.json"), strict=True) + convert_and_compare(xlsx_file=rfp("data/simple_data_booleans.xlsx"), + schema_file=rfp("data/simple_schema.json"), + known_good_file=rfp("data/simple_data_booleans.json")) with open(rfp("data/simple_data.json"), encoding="utf-8") as myfile: expected_datetime = json.load(myfile) @@ -126,29 +130,40 @@ def test_error_table(): assert "'There is no entry in the schema" in str(caught.value) assert "'Not an enum' is not one of [" in str(caught.value) # Correct Locations + matches = set() for line in str(caught.value).split('\n'): if "'Not a num' is not of type 'number'" in line: assert "J7" in line + matches.add("J7") if "'Yes a number?' is not of type 'number'" in line: assert "J8" in line + matches.add("J8") if "1.5 is not of type 'integer'" in line: assert "K7" in line + matches.add("K7") if "1.2345 is not of type 'integer'" in line: assert "K8" in line + matches.add("K8") if "'There is no entry in the schema" in line: assert "Column M" in line + matches.add("Col M") if "'Not an enum' is not one of [" in line: assert "G8" in line + matches.add("K8") + # The next two tests could potentially be removed in the future, once we evaluate formulas. + if "'=NOT(FALSE())' is not of type 'boolean'" in line: + assert "L9" in line + matches.add("L9") + if "'=NOT(TRUE())' is not of type 'boolean'" in line: + assert "L10" in line + matches.add("L10") + assert matches == {"J7", "J8", "K7", "K8", "Col M", "K8", "L9", "L10"} + # No additional errors assert str(caught.value).count("Malformed metadata: Cannot parse paths in worksheet") == 1 assert str(caught.value).count("There is no entry in the schema") == 1 assert str(caught.value).count("is not one of") == 1 - # FIXME ToDo: Remove when boolean is fixed / when everything works as - # expected, set correct number. - if "is not of type 'boolean'" in str(caught.value): - assert str(caught.value).count("is not of type") == 6 - else: - assert str(caught.value).count("is not of type") == 4 + assert str(caught.value).count("is not of type") == 6 # Check correct error message for completely unknown path with pytest.raises(jsonschema.ValidationError) as caught: convert.to_dict(xlsx=rfp("data/simple_data_broken_paths.xlsx"), diff --git a/unittests/table_json_conversion/utils.py b/unittests/table_json_conversion/utils.py index b95715f72b08384f75857e48bcba328488313ad5..ac76fbea4508017261385e4e8fd70bedc378da5a 100644 --- a/unittests/table_json_conversion/utils.py +++ b/unittests/table_json_conversion/utils.py @@ -58,7 +58,8 @@ Raise an assertion exception if they are not equal.""" "the other.") return assert isinstance(json1, list) and isinstance(json2, list), f"Is not a list, path: {path}" - assert len(json1) == len(json2), f"Lists must have equal length, path: {path}" + assert len(json1) == len(json2), (f"Lists must have equal length, path: {path}\n" + f"{json1}\n ---\n{json2}") for idx, (el1, el2) in enumerate(zip(json1, json2)): this_path = path + [idx] if isinstance(el1, dict):