Skip to content
Snippets Groups Projects

XLSX-Konverter: Bessere Fehlermeldung bei inkorrektem Typ in Spalte, zusätzlicher Spalte

Merged I. Nüske requested to merge f-xlsx-converter-error-messages into dev
All threads resolved!
Files
4
@@ -25,6 +25,7 @@ from __future__ import annotations
import datetime
import itertools
import sys
import textwrap
from functools import reduce
from operator import getitem
from types import SimpleNamespace
@@ -95,65 +96,52 @@ def _format_exception_table(exceptions: list[tuple], worksheet_title: str,
exceptions.sort(key=lambda tup: tup[1])
for row_i, col_i, excep in exceptions:
if column_names is not None:
# Update Names
# Add a line with information about the current column
if current_column != col_i:
current_column = col_i
new_data.append({
"loc": f"\nErrors in column '{column_names[col_i]}':",
"type": "", "mess": [""]
})
# Setup
row = {}
new_data.append(row)
# Field
# Setup for current Exception
curr_err_data = {}
new_data.append(curr_err_data)
# Get field
if isinstance(row_i, int):
row["loc"] = f"Cell {_column_id_to_chars(col_i)}{row_i + 1}"
curr_err_data["loc"] = f"Cell {_column_id_to_chars(col_i)}{row_i + 1}"
else:
row["loc"] = f"Column {_column_id_to_chars(col_i)}"
lengths["loc"] = max(lengths["loc"], len(row["loc"]))
# Code
row["type"] = type(excep).__name__
lengths["type"] = max(lengths["type"], len(row["type"]))
# Message
curr_err_data["loc"] = f"Column {_column_id_to_chars(col_i)}"
lengths["loc"] = max(lengths["loc"], len(curr_err_data["loc"]))
# Add error code
curr_err_data["type"] = type(excep).__name__
lengths["type"] = max(lengths["type"], len(curr_err_data["type"]))
# Format message - split into lines
lines = str(excep).split('\n')
new_lines = []
for line in lines:
if len(line) > max_line_length:
words = line.split(' ')
current = ""
for word, next_word in zip(words, words[1:] + [""]):
if current != "":
current += " "
current += word
if len(current + next_word) > max_line_length:
lengths["mess"] = max(lengths["mess"], len(current))
new_lines.append(current)
current = ""
if current != "":
lengths["mess"] = max(lengths["mess"], len(current))
new_lines.append(current)
elif len(line) > 0:
lengths["mess"] = max(lengths["mess"], len(line))
new_lines.append(line)
new_lines += textwrap.wrap(line, max_line_length, break_long_words=False)
for line in new_lines:
lengths["mess"] = max(lengths["mess"], len(line))
if new_lines == []:
new_lines = [""]
row["mess"] = new_lines
curr_err_data["mess"] = new_lines
# Generate underline for each header
dividers = {key: '' * l for key, l in lengths.items()}
dividers["mess"] = [dividers["mess"]]
# Fill for the messages is set to 0, if we want another column or align
# right we need to use lengths["mess"]
# Fill with spaces for alignment
string_rep = f"There were errors during the validation of worksheet '{worksheet_title}':\n\n"
for row in [headers, dividers] + new_data:
string_rep += ' {loc: <{fill}} '.format(loc=row["loc"],
for curr_err_data in [headers, dividers] + new_data:
string_rep += ' {loc: <{fill}} '.format(loc=curr_err_data["loc"],
fill=lengths["loc"])
string_rep += ' {typ: <{fill}} '.format(typ=row["type"],
string_rep += ' {typ: <{fill}} '.format(typ=curr_err_data["type"],
fill=lengths["type"])
string_rep += ' {mes: <{fill}}\n'.format(mes=row["mess"][0], fill=0)
for line in row["mess"][1:]:
# Front padding
string_rep += ' ' * (lengths["loc"] + lengths["type"] + 7)
# Fill for the messages is set to 0, if we want another column or align
# right we need to use lengths["mess"]
string_rep += ' {mes: <{fill}}\n'.format(mes=curr_err_data["mess"][0], fill=0)
for line in curr_err_data["mess"][1:]:
# Front padding for lines without location and error type
string_rep += ' ' * (lengths["loc"] + lengths["type"] + 6)
string_rep += ' {mes: <{fill}}\n'.format(mes=line, fill=0)
return string_rep
@@ -194,7 +182,11 @@ class XLSXConverter:
self._workbook = load_workbook(xlsx)
self._schema = read_or_dict(schema)
self._defining_path_index = xlsx_utils.get_defining_paths(self._workbook)
self._check_columns(fail_fast=strict)
try:
self._check_columns(fail_fast=strict)
except KeyError as e:
raise jsonschema.ValidationError(f"Malformed metadata: Cannot parse paths. "
f"Unknown path: {e}") from e
self._handled_sheets: set[str] = set()
self._result: dict = {}
self._errors: dict = {}
@@ -220,9 +212,29 @@ class XLSXConverter:
self._handled_sheets = set()
self._result = {}
self._errors = {}
for sheetname in self._workbook.sheetnames:
if sheetname not in self._handled_sheets:
self._handle_sheet(self._workbook[sheetname], fail_later=collect_errors)
if not collect_errors:
for sheetname in self._workbook.sheetnames:
if sheetname not in self._handled_sheets:
self._handle_sheet(self._workbook[sheetname], fail_later=collect_errors)
else:
# Collect errors from converting
exceptions = []
for sheetname in self._workbook.sheetnames:
if sheetname not in self._handled_sheets:
try:
self._handle_sheet(self._workbook[sheetname], fail_later=collect_errors)
except jsonschema.ValidationError as e:
exceptions.append(e)
# do not collect errors from sheet again
self._handled_sheets.add(sheetname)
if len(exceptions) == 1:
raise exceptions[0]
elif len(exceptions) > 1:
mess = "There were errors during the validation of several worksheets:\n\n"
mess += '\n\n'.join([str(e).replace("There were errors during the validation of worksheet",
"In worksheet")
for e in exceptions])
raise jsonschema.ValidationError(mess)
if validate:
jsonschema.validate(self._result, self._schema)
if self._errors:
@@ -323,6 +335,7 @@ class XLSXConverter:
# entries: dict[str, list[SimpleNamespace]] = {}
exceptions = []
warns = []
col_names = {}
for row_idx, row in enumerate(sheet.iter_rows(values_only=True)):
# Skip non-data rows
@@ -359,7 +372,12 @@ class XLSXConverter:
_set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
continue
elif sheet.cell(col_type_row+1, col_idx+1).value is None:
warn(f"No metadata configured for column {_column_id_to_chars(col_idx)}.")
mess = (f"\nNo metadata configured for column "
f"'{_column_id_to_chars(col_idx)}' in worksheet "
f"'{sheet.title}'.\n")
if mess not in warns:
print(mess, file=sys.stderr)
warns.append(mess) # Prevent multiple instances of same warning
except (ValueError, KeyError, jsonschema.ValidationError) as e:
# Append error for entire column only once
if isinstance(e, KeyError) and 'column' in str(e):
Loading