Skip to content
Snippets Groups Projects
Commit fef66b62 authored by I. Nüske's avatar I. Nüske
Browse files

BUG: Add a function to filter dict before using jsonschema.validate in convert.py

parent 841bae3b
No related branches found
No related tags found
2 merge requests!138Release 0.14.0,!129Enable validation in table_json_conversion.convert.to_dict for use in XLSX-converter
Pipeline #60709 passed
......@@ -31,6 +31,7 @@ from operator import getitem
from types import SimpleNamespace
from typing import Any, BinaryIO, Callable, TextIO, Union, Optional
from warnings import warn
from copy import deepcopy
import jsonschema
from openpyxl import load_workbook
......@@ -152,6 +153,49 @@ class ForeignError(KeyError):
self.definitions = definitions
def _validate_jsonschema(instance, schema):
# Checks whether a key: value pair is in the given schema or a direct
# subschema (anyOf) ToDo: How to treat allOf and oneOf?
def in_schema(key, val, schema):
if schema.get(key, None) == val:
return True
if 'anyOf' in schema:
return any([in_schema(key, val, sub) for sub in schema['anyOf']])
return False
# Removes Key: None and datetime instances from nested dicts and lists of
# any depth. Key: None is currently valid as there is no 'obligatory with
# value', and datetime cannot be checked by jsonschema.
# ToDo: Is ID: None also valid?
def remove_incompatible_values(it, schema):
if isinstance(it, list):
schema = schema.get('items', schema)
for elem in it:
remove_incompatible_values(elem, schema)
elif isinstance(it, dict):
schema = schema.get('properties', schema)
for key, elem in list(it.items()):
if elem is None:
it.pop(key)
elif isinstance(elem, datetime.date) or isinstance(elem, datetime.datetime):
if in_schema('format', 'date', schema[key]) or in_schema('format', 'date-time', schema[key]):
it.pop(key)
elif isinstance(it, (dict, list)):
remove_incompatible_values(elem, schema[key])
return it
# If instance is not a dict, remove_incompatible_values would not remove
# the value if it is valid, so we need to check manually by wrapping
instance = deepcopy(instance)
if not isinstance(instance, dict):
if remove_incompatible_values({'key': instance}, {'key': schema}) == {}:
return
# Clean dict and validate
instance = remove_incompatible_values(deepcopy(instance), schema)
jsonschema.validate(instance, schema=schema)
class XLSXConverter:
"""Class for conversion from XLSX to JSON.
......@@ -328,7 +372,7 @@ class XLSXConverter:
for e in exceptions])
raise jsonschema.ValidationError(mess)
if validate:
jsonschema.validate(self._result, self._schema)
_validate_jsonschema(self._result, self._schema)
if self._errors:
raise RuntimeError("There were error while handling the XLSX file.")
return self._result
......@@ -563,7 +607,7 @@ class XLSXConverter:
value = False
if value == 1 or isinstance(value, str) and '=true()' == value.lower():
value = True
jsonschema.validate(value, subschema)
_validate_jsonschema(value, subschema)
# Finally: convert to target type
return self.PARSER[subschema.get("type", "string")](value)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment