Skip to content
Snippets Groups Projects
Commit fef66b62 authored by I. Nüske's avatar I. Nüske
Browse files

BUG: Add a function to filter dict before using jsonschema.validate in convert.py

parent 841bae3b
No related branches found
No related tags found
2 merge requests!138Release 0.14.0,!129Enable validation in table_json_conversion.convert.to_dict for use in XLSX-converter
Pipeline #60709 passed
...@@ -31,6 +31,7 @@ from operator import getitem ...@@ -31,6 +31,7 @@ from operator import getitem
from types import SimpleNamespace from types import SimpleNamespace
from typing import Any, BinaryIO, Callable, TextIO, Union, Optional from typing import Any, BinaryIO, Callable, TextIO, Union, Optional
from warnings import warn from warnings import warn
from copy import deepcopy
import jsonschema import jsonschema
from openpyxl import load_workbook from openpyxl import load_workbook
...@@ -152,6 +153,49 @@ class ForeignError(KeyError): ...@@ -152,6 +153,49 @@ class ForeignError(KeyError):
self.definitions = definitions self.definitions = definitions
def _validate_jsonschema(instance, schema):
# Checks whether a key: value pair is in the given schema or a direct
# subschema (anyOf) ToDo: How to treat allOf and oneOf?
def in_schema(key, val, schema):
if schema.get(key, None) == val:
return True
if 'anyOf' in schema:
return any([in_schema(key, val, sub) for sub in schema['anyOf']])
return False
# Removes Key: None and datetime instances from nested dicts and lists of
# any depth. Key: None is currently valid as there is no 'obligatory with
# value', and datetime cannot be checked by jsonschema.
# ToDo: Is ID: None also valid?
def remove_incompatible_values(it, schema):
if isinstance(it, list):
schema = schema.get('items', schema)
for elem in it:
remove_incompatible_values(elem, schema)
elif isinstance(it, dict):
schema = schema.get('properties', schema)
for key, elem in list(it.items()):
if elem is None:
it.pop(key)
elif isinstance(elem, datetime.date) or isinstance(elem, datetime.datetime):
if in_schema('format', 'date', schema[key]) or in_schema('format', 'date-time', schema[key]):
it.pop(key)
elif isinstance(it, (dict, list)):
remove_incompatible_values(elem, schema[key])
return it
# If instance is not a dict, remove_incompatible_values would not remove
# the value if it is valid, so we need to check manually by wrapping
instance = deepcopy(instance)
if not isinstance(instance, dict):
if remove_incompatible_values({'key': instance}, {'key': schema}) == {}:
return
# Clean dict and validate
instance = remove_incompatible_values(deepcopy(instance), schema)
jsonschema.validate(instance, schema=schema)
class XLSXConverter: class XLSXConverter:
"""Class for conversion from XLSX to JSON. """Class for conversion from XLSX to JSON.
...@@ -328,7 +372,7 @@ class XLSXConverter: ...@@ -328,7 +372,7 @@ class XLSXConverter:
for e in exceptions]) for e in exceptions])
raise jsonschema.ValidationError(mess) raise jsonschema.ValidationError(mess)
if validate: if validate:
jsonschema.validate(self._result, self._schema) _validate_jsonschema(self._result, self._schema)
if self._errors: if self._errors:
raise RuntimeError("There were error while handling the XLSX file.") raise RuntimeError("There were error while handling the XLSX file.")
return self._result return self._result
...@@ -563,7 +607,7 @@ class XLSXConverter: ...@@ -563,7 +607,7 @@ class XLSXConverter:
value = False value = False
if value == 1 or isinstance(value, str) and '=true()' == value.lower(): if value == 1 or isinstance(value, str) and '=true()' == value.lower():
value = True value = True
jsonschema.validate(value, subschema) _validate_jsonschema(value, subschema)
# Finally: convert to target type # Finally: convert to target type
return self.PARSER[subschema.get("type", "string")](value) return self.PARSER[subschema.get("type", "string")](value)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment