Skip to content
Snippets Groups Projects

Enable validation in table_json_conversion.convert.to_dict for use in XLSX-converter

Merged I. Nüske requested to merge f-bug-138-validate-table_json_conversion into dev
Files
4
@@ -31,6 +31,7 @@ from operator import getitem
from types import SimpleNamespace
from typing import Any, BinaryIO, Callable, TextIO, Union, Optional
from warnings import warn
from copy import deepcopy
import jsonschema
from openpyxl import load_workbook
@@ -152,6 +153,51 @@ class ForeignError(KeyError):
self.definitions = definitions
def _validate_jsonschema(instance, schema):
# Checks whether a key: value pair is in the given schema or fulfills the
# criteria of a direct subschema (anyOf, allOf, oneOf)
def in_schema(key, val, schema):
if schema.get(key, None) == val:
return True
if 'anyOf' in schema:
return any([in_schema(key, val, sub) for sub in schema['anyOf']])
if 'allOf' in schema:
return all([in_schema(key, val, sub) for sub in schema['allOf']])
if 'oneOf' in schema:
return [in_schema(key, val, sub) for sub in schema['oneOf']].count(True) == 1
return False
# Removes Key: None and datetime instances from nested dicts and lists of
# any depth. Key: None is currently valid as there is no 'obligatory with
# value', and datetime cannot be checked by jsonschema.
def remove_incompatible_values(it, schema):
if isinstance(it, list):
schema = schema.get('items', schema)
for elem in it:
remove_incompatible_values(elem, schema)
elif isinstance(it, dict):
schema = schema.get('properties', schema)
for key, elem in list(it.items()):
if elem is None:
it.pop(key)
elif isinstance(elem, datetime.date) or isinstance(elem, datetime.datetime):
if in_schema('format', 'date', schema[key]) or in_schema('format', 'date-time', schema[key]):
it.pop(key)
elif isinstance(it, (dict, list)):
remove_incompatible_values(elem, schema[key])
return it
# If instance is not a dict, remove_incompatible_values would not remove
# the value if it is valid, so we need to check manually by wrapping
instance = deepcopy(instance)
if not isinstance(instance, dict):
if remove_incompatible_values({'key': instance}, {'key': schema}) == {}:
return
# Clean dict and validate
instance = remove_incompatible_values(deepcopy(instance), schema)
jsonschema.validate(instance, schema=schema)
class XLSXConverter:
"""Class for conversion from XLSX to JSON.
@@ -328,7 +374,7 @@ class XLSXConverter:
for e in exceptions])
raise jsonschema.ValidationError(mess)
if validate:
jsonschema.validate(self._result, self._schema)
_validate_jsonschema(self._result, self._schema)
if self._errors:
raise RuntimeError("There were error while handling the XLSX file.")
return self._result
@@ -563,7 +609,7 @@ class XLSXConverter:
value = False
if value == 1 or isinstance(value, str) and '=true()' == value.lower():
value = True
jsonschema.validate(value, subschema)
_validate_jsonschema(value, subschema)
# Finally: convert to target type
return self.PARSER[subschema.get("type", "string")](value)
@@ -707,8 +753,6 @@ def _set_in_nested(mydict: dict, path: list, value: Any, prefix: list = [], skip
return mydict
# ToDo: Fix https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/138
# and remove pylint disable
def to_dict(xlsx: Union[str, BinaryIO], schema: Union[dict, str, TextIO],
validate: Optional[bool] = None, strict: bool = False) -> dict:
"""Convert the xlsx contents to a dict, it must follow a schema.
@@ -733,9 +777,5 @@ def to_dict(xlsx: Union[str, BinaryIO], schema: Union[dict, str, TextIO],
out: dict
A dict representing the JSON with the extracted data.
"""
if validate:
raise NotImplementedError(
"For input validation implement "
"https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/138")
converter = XLSXConverter(xlsx, schema, strict=strict)
return converter.to_dict()
return converter.to_dict(validate=validate)
Loading