Skip to content
Snippets Groups Projects
Commit 3b55e553 authored by I. Nüske's avatar I. Nüske
Browse files

ENH: Added error message to convert.to_dict() when trying to parse a column not in the schema

parent 5ceee474
No related branches found
No related tags found
2 merge requests!128MNT: Added a warning when column metadata is not configured, and a better...,!120XLSX-Konverter: Bessere Fehlermeldung bei inkorrektem Typ in Spalte, zusätzlicher Spalte
Pipeline #57494 passed with warnings
...@@ -83,7 +83,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, ...@@ -83,7 +83,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65) return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65)
max_line_length -= 40 # Estimate of Field + Type space use max_line_length -= 40 # Estimate of Field + Type space use
headers = {"loc": "Field", "type": "Error Type", "mess": ["Message"]} headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]}
lengths = {key: len(headers[key]) for key in headers} lengths = {key: len(headers[key]) for key in headers}
new_data = [] new_data = []
...@@ -103,9 +103,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, ...@@ -103,9 +103,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
new_data.append(row) new_data.append(row)
# Field # Field
if isinstance(row_i, int): if isinstance(row_i, int):
row["loc"] = f"{to_char(col_i)}{row_i + 1}" row["loc"] = f"Cell {to_char(col_i)}{row_i + 1}"
else: else:
row["loc"] = f"{to_char(col_i)}" row["loc"] = f"Column {to_char(col_i)}"
lengths["loc"] = max(lengths["loc"], len(row["loc"])) lengths["loc"] = max(lengths["loc"], len(row["loc"]))
# Code # Code
row["type"] = type(excep).__name__ row["type"] = type(excep).__name__
...@@ -140,7 +140,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, ...@@ -140,7 +140,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
# Fill for the messages is set to 0, if we want another column or align # Fill for the messages is set to 0, if we want another column or align
# right we need to use lengths["mess"] # right we need to use lengths["mess"]
string_rep = f"There were failures during validation of worksheet '{worksheet_title}':\n\n" string_rep = f"There were errors during the validation of worksheet '{worksheet_title}':\n\n"
for row in [headers, dividers] + new_data: for row in [headers, dividers] + new_data:
string_rep += ' {loc: <{fill}} '.format(loc=row["loc"], string_rep += ' {loc: <{fill}} '.format(loc=row["loc"],
fill=lengths["loc"]) fill=lengths["loc"])
...@@ -314,12 +314,10 @@ class XLSXConverter: ...@@ -314,12 +314,10 @@ class XLSXConverter:
# entries: dict[str, list[SimpleNamespace]] = {} # entries: dict[str, list[SimpleNamespace]] = {}
exceptions = [] exceptions = []
col_names = None col_names = {}
for row_idx, row in enumerate(sheet.iter_rows(values_only=True)): for row_idx, row in enumerate(sheet.iter_rows(values_only=True)):
# Skip non-data rows and save the row containing column names # Skip non-data rows
if row[row_type_column] is not None: if row[row_type_column] is not None:
if row[row_type_column] == "IGNORE" and col_names is None:
col_names = row
continue continue
foreign_repr = "" foreign_repr = ""
foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value] foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value]
...@@ -334,6 +332,7 @@ class XLSXConverter: ...@@ -334,6 +332,7 @@ class XLSXConverter:
try: try:
if col_idx in data_column_paths: if col_idx in data_column_paths:
path = data_column_paths[col_idx] path = data_column_paths[col_idx]
col_names[col_idx] = '.'.join(path)
if self._is_multiple_choice(path): if self._is_multiple_choice(path):
real_value = path.pop() # Last component is the enum value, insert above real_value = path.pop() # Last component is the enum value, insert above
# set up list # set up list
...@@ -350,7 +349,13 @@ class XLSXConverter: ...@@ -350,7 +349,13 @@ class XLSXConverter:
value = self._validate_and_convert(value, path) value = self._validate_and_convert(value, path)
_set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1) _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
continue continue
except (ValueError, jsonschema.ValidationError) as e: except (ValueError, KeyError, jsonschema.ValidationError) as e:
# Append error for entire column only once
if isinstance(e, KeyError) and 'column' in str(e):
if len([err for ri, ci, err in exceptions
if ci == col_idx and isinstance(err, KeyError)]) == 0:
exceptions.append((None, col_idx, e))
else:
exceptions.append((row_idx, col_idx, e)) exceptions.append((row_idx, col_idx, e))
try: try:
...@@ -422,7 +427,10 @@ class XLSXConverter: ...@@ -422,7 +427,10 @@ class XLSXConverter:
""" """
if value is None: if value is None:
return value return value
try:
subschema = self._get_subschema(path) subschema = self._get_subschema(path)
except KeyError as e:
raise KeyError("There is no entry in the schema that corresponds to this column.")
# Array handling only if schema says it's an array. # Array handling only if schema says it's an array.
if subschema.get("type") == "array": if subschema.get("type") == "array":
array_type = subschema["items"]["type"] array_type = subschema["items"]["type"]
......
...@@ -133,6 +133,20 @@ def test_wrong_datatype(): ...@@ -133,6 +133,20 @@ def test_wrong_datatype():
assert str(caught.value).count("is not of type") == 2 assert str(caught.value).count("is not of type") == 2
def test_additional_column():
with pytest.raises(jsonschema.ValidationError) as caught:
convert.to_dict(xlsx=rfp("data/simple_data_broken.xlsx"),
schema=rfp("data/simple_schema.json"))
# Correct Error
assert "no entry in the schema that corresponds to this column" in str(caught.value)
# Correct Location
for line in str(caught.value).split('\n'):
if "no entry in the schema that corresponds to this column" in line:
assert " M " in line
# No additional column errors
assert str(caught.value).count("no entry in the schema that corresponds to this column") == 1
def test_faulty_foreign(): def test_faulty_foreign():
# Simple wrong foreign key # Simple wrong foreign key
converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"), converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment