Skip to content
Snippets Groups Projects
Verified Commit ee056a2d authored by Daniel Hornung's avatar Daniel Hornung
Browse files

ENH: table json converter: id-name heuristic and basic datetime.

parent 9c844fd7
No related branches found
No related tags found
2 merge requests!107Release v0.11.0,!103xlsx -> json conversion
Pipeline #50707 failed
...@@ -209,10 +209,19 @@ out: union[dict, None] ...@@ -209,10 +209,19 @@ out: union[dict, None]
assert len(set(type(entry) for entry in content)) == 1 assert len(set(type(entry) for entry in content)) == 1
if isinstance(content[0], dict): # all elements are dicts if isinstance(content[0], dict): # all elements are dicts
# An array of objects: must go into exploded sheet # Heuristic to detect enum entries (only id and name):
for entry in content: if all(set(entry.keys()) == {"id", "name"} for entry in content):
self._handle_data(data=entry, current_path=path, context=next_context) # Convert to list of names, do not recurse
continue content = [entry["name"] for entry in content]
else:
# An array of objects: must go into exploded sheet
for entry in content:
self._handle_data(data=entry, current_path=path, context=next_context)
continue
# Heuristic to detect enum entries (dict with only id and name):
elif isinstance(content, dict) and set(content.keys()) == {"id", "name"}:
content = [content["name"]]
# "Normal" dicts
elif isinstance(content, dict): # we recurse and simply use the result elif isinstance(content, dict): # we recurse and simply use the result
if not current_path: # Special handling for top level if not current_path: # Special handling for top level
self._handle_data(content, current_path=path, context=next_context) self._handle_data(content, current_path=path, context=next_context)
...@@ -259,7 +268,8 @@ out: union[dict, None] ...@@ -259,7 +268,8 @@ out: union[dict, None]
sheet = None sheet = None
for path_str, value in insertables.items(): for path_str, value in insertables.items():
if self._graceful and path_str not in self._sheet_index: if self._graceful and path_str not in self._sheet_index:
warn(f"Ignoring path with missing sheet index: {path_str}") if not (value is None or path_str.endswith(".id") or path_str.endswith(".name")):
warn(f"Ignoring path with missing sheet index: {path_str}")
continue continue
sheet_meta = self._sheet_index[path_str] sheet_meta = self._sheet_index[path_str]
if sheet is None: if sheet is None:
...@@ -346,6 +356,7 @@ validation_schema: dict, optional ...@@ -346,6 +356,7 @@ validation_schema: dict, optional
if validation_schema is not None: if validation_schema is not None:
validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema)) validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema))
try: try:
# FIXME redefine checker for datetime
validate(data, validation_schema, format_checker=FormatChecker()) validate(data, validation_schema, format_checker=FormatChecker())
except ValidationError as verr: except ValidationError as verr:
print(verr.message) print(verr.message)
......
...@@ -235,8 +235,6 @@ proper_name: str ...@@ -235,8 +235,6 @@ proper_name: str
if ii > len(parent): if ii > len(parent):
parent = foreign_path[:ii] parent = foreign_path[:ii]
# print(data_paths, ii)
# breakpoint()
return parent, data_paths[0][ii] return parent, data_paths[0][ii]
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
import datetime
import json import json
import os import os
import re import re
...@@ -33,6 +34,7 @@ from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template ...@@ -33,6 +34,7 @@ from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template
from caosadvancedtools.table_json_conversion.xlsx_utils import ( from caosadvancedtools.table_json_conversion.xlsx_utils import (
get_row_type_column_index, get_row_type_column_index,
get_path_rows, get_path_rows,
read_or_dict,
) )
from .utils import compare_workbooks from .utils import compare_workbooks
...@@ -149,6 +151,32 @@ def test_fill_xlsx(): ...@@ -149,6 +151,32 @@ def test_fill_xlsx():
schema=rfp("data/multiple_choice_schema.json")) schema=rfp("data/multiple_choice_schema.json"))
def test_datetime():
"""Datetime values from LinkAhead are not serialized as strings."""
json_file = rfp("data/simple_data.json")
template_file = rfp("data/simple_template.xlsx")
known_good = rfp("data/simple_data_datetime.xlsx")
# TODO Implement checker for datetime
# schema = rfp("data/simple_schema.json")
# Set datetime explicitly
json_data = read_or_dict(json_file)
json_data["Training"][0]["date"] = datetime.datetime(2023, 1, 1)
# Code copied mostly from `fill_and_compare(...)`
with tempfile.TemporaryDirectory() as tmpdir:
outfile = os.path.join(tmpdir, 'test.xlsx')
assert not os.path.exists(outfile)
fill_template(data=json_data, template=template_file, result=outfile,
# validation_schema=schema
)
assert os.path.exists(outfile)
generated = load_workbook(outfile) # workbook can be read
known_good_wb = load_workbook(known_good)
compare_workbooks(generated, known_good_wb)
def test_errors(): def test_errors():
with pytest.raises(AssertionError) as exc: with pytest.raises(AssertionError) as exc:
fill_and_compare(json_file=rfp("data/error_simple_data.json"), fill_and_compare(json_file=rfp("data/error_simple_data.json"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment