Skip to content
Snippets Groups Projects
Commit 3b55e553 authored by I. Nüske's avatar I. Nüske
Browse files

ENH: Added error message to convert.to_dict() when trying to parse a column not in the schema

parent 5ceee474
No related branches found
No related tags found
2 merge requests!128MNT: Added a warning when column metadata is not configured, and a better...,!120XLSX-Konverter: Bessere Fehlermeldung bei inkorrektem Typ in Spalte, zusätzlicher Spalte
Pipeline #57494 passed with warnings
......@@ -83,7 +83,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65)
max_line_length -= 40 # Estimate of Field + Type space use
headers = {"loc": "Field", "type": "Error Type", "mess": ["Message"]}
headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]}
lengths = {key: len(headers[key]) for key in headers}
new_data = []
......@@ -103,9 +103,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
new_data.append(row)
# Field
if isinstance(row_i, int):
row["loc"] = f"{to_char(col_i)}{row_i + 1}"
row["loc"] = f"Cell {to_char(col_i)}{row_i + 1}"
else:
row["loc"] = f"{to_char(col_i)}"
row["loc"] = f"Column {to_char(col_i)}"
lengths["loc"] = max(lengths["loc"], len(row["loc"]))
# Code
row["type"] = type(excep).__name__
......@@ -140,7 +140,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
# Fill for the messages is set to 0, if we want another column or align
# right we need to use lengths["mess"]
string_rep = f"There were failures during validation of worksheet '{worksheet_title}':\n\n"
string_rep = f"There were errors during the validation of worksheet '{worksheet_title}':\n\n"
for row in [headers, dividers] + new_data:
string_rep += ' {loc: <{fill}} '.format(loc=row["loc"],
fill=lengths["loc"])
......@@ -314,12 +314,10 @@ class XLSXConverter:
# entries: dict[str, list[SimpleNamespace]] = {}
exceptions = []
col_names = None
col_names = {}
for row_idx, row in enumerate(sheet.iter_rows(values_only=True)):
# Skip non-data rows and save the row containing column names
# Skip non-data rows
if row[row_type_column] is not None:
if row[row_type_column] == "IGNORE" and col_names is None:
col_names = row
continue
foreign_repr = ""
foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value]
......@@ -334,6 +332,7 @@ class XLSXConverter:
try:
if col_idx in data_column_paths:
path = data_column_paths[col_idx]
col_names[col_idx] = '.'.join(path)
if self._is_multiple_choice(path):
real_value = path.pop() # Last component is the enum value, insert above
# set up list
......@@ -350,8 +349,14 @@ class XLSXConverter:
value = self._validate_and_convert(value, path)
_set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
continue
except (ValueError, jsonschema.ValidationError) as e:
exceptions.append((row_idx, col_idx, e))
except (ValueError, KeyError, jsonschema.ValidationError) as e:
# Append error for entire column only once
if isinstance(e, KeyError) and 'column' in str(e):
if len([err for ri, ci, err in exceptions
if ci == col_idx and isinstance(err, KeyError)]) == 0:
exceptions.append((None, col_idx, e))
else:
exceptions.append((row_idx, col_idx, e))
try:
# Find current position in tree
......@@ -422,7 +427,10 @@ class XLSXConverter:
"""
if value is None:
return value
subschema = self._get_subschema(path)
try:
subschema = self._get_subschema(path)
except KeyError as e:
raise KeyError("There is no entry in the schema that corresponds to this column.")
# Array handling only if schema says it's an array.
if subschema.get("type") == "array":
array_type = subschema["items"]["type"]
......
......@@ -133,6 +133,20 @@ def test_wrong_datatype():
assert str(caught.value).count("is not of type") == 2
def test_additional_column():
with pytest.raises(jsonschema.ValidationError) as caught:
convert.to_dict(xlsx=rfp("data/simple_data_broken.xlsx"),
schema=rfp("data/simple_schema.json"))
# Correct Error
assert "no entry in the schema that corresponds to this column" in str(caught.value)
# Correct Location
for line in str(caught.value).split('\n'):
if "no entry in the schema that corresponds to this column" in line:
assert " M " in line
# No additional column errors
assert str(caught.value).count("no entry in the schema that corresponds to this column") == 1
def test_faulty_foreign():
# Simple wrong foreign key
converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment