Skip to content
Snippets Groups Projects
Commit a6910d39 authored by I. Nüske's avatar I. Nüske
Browse files

MNT: Added a warning when column metadata is not configured, and a better...

MNT: Added a warning when column metadata is not configured, and a better error message when a column has a type but no path.
parent 7d4e51cd
No related branches found
No related tags found
2 merge requests!128MNT: Added a warning when column metadata is not configured, and a better...,!120XLSX-Konverter: Bessere Fehlermeldung bei inkorrektem Typ in Spalte, zusätzlicher Spalte
Pipeline #57500 passed with warnings
...@@ -45,10 +45,16 @@ def _strict_bool(value: Any) -> bool: ...@@ -45,10 +45,16 @@ def _strict_bool(value: Any) -> bool:
return value return value
raise TypeError(f"Not a good boolean: {repr(value)}") raise TypeError(f"Not a good boolean: {repr(value)}")
def _column_id_to_chars(num):
def format_exception_table(exceptions: list(tuple), worksheet_title: str, """Converts a column id (zero based) to the corresponding string
column_names: Optional[dict, list] = None, representation, e.g. 0 -> 'A', 97 -> 'CT'"""
max_line_length: Optional[int] = 120) -> str: if num < 0:
return ""
return _column_id_to_chars(int(num / 26) - 1) + chr(int(num % 26) + 65)
def _format_exception_table(exceptions: list(tuple), worksheet_title: str,
column_names: Optional[dict, list] = None,
max_line_length: Optional[int] = 120) -> str:
""" """
Given a list of tuples containing a row and column number as well as an Given a list of tuples containing a row and column number as well as an
exception in that order, and the title of the current worksheet, returns exception in that order, and the title of the current worksheet, returns
...@@ -77,10 +83,6 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, ...@@ -77,10 +83,6 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
string_rep: str string_rep: str
Table containing the given exceptions Table containing the given exceptions
""" """
def to_char(num):
if num < 0:
return ""
return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65)
max_line_length -= 40 # Estimate of Field + Type space use max_line_length -= 40 # Estimate of Field + Type space use
headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]} headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]}
...@@ -103,9 +105,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, ...@@ -103,9 +105,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
new_data.append(row) new_data.append(row)
# Field # Field
if isinstance(row_i, int): if isinstance(row_i, int):
row["loc"] = f"Cell {to_char(col_i)}{row_i + 1}" row["loc"] = f"Cell {_column_id_to_chars(col_i)}{row_i + 1}"
else: else:
row["loc"] = f"Column {to_char(col_i)}" row["loc"] = f"Column {_column_id_to_chars(col_i)}"
lengths["loc"] = max(lengths["loc"], len(row["loc"])) lengths["loc"] = max(lengths["loc"], len(row["loc"]))
# Code # Code
row["type"] = type(excep).__name__ row["type"] = type(excep).__name__
...@@ -296,12 +298,17 @@ class XLSXConverter: ...@@ -296,12 +298,17 @@ class XLSXConverter:
If True, do not fail with unresolvable foreign definitions, but collect all errors. If True, do not fail with unresolvable foreign definitions, but collect all errors.
""" """
row_type_column = xlsx_utils.get_row_type_column_index(sheet) row_type_column = xlsx_utils.get_row_type_column_index(sheet)
col_type_row = xlsx_utils.get_column_type_row_index(sheet)
foreign_columns = xlsx_utils.get_foreign_key_columns(sheet) foreign_columns = xlsx_utils.get_foreign_key_columns(sheet)
foreign_column_paths = {col.index: col.path for col in foreign_columns.values()} foreign_column_paths = {col.index: col.path for col in foreign_columns.values()}
data_columns = xlsx_utils.get_data_columns(sheet) data_columns = xlsx_utils.get_data_columns(sheet)
data_column_paths = {col.index: col.path for col in data_columns.values()} data_column_paths = {col.index: col.path for col in data_columns.values()}
# Parent path, insert in correct order. # Parent path, insert in correct order.
parent, proper_name = xlsx_utils.get_path_position(sheet) try:
parent, proper_name = xlsx_utils.get_path_position(sheet)
except UnboundLocalError as e:
raise jsonschema.ValidationError(f"Malformed metadata: Cannot parse "
f"paths in worksheet '{sheet.title}'.") from e
if parent: if parent:
parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index) parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index)
if parent_sheetname not in self._handled_sheets: if parent_sheetname not in self._handled_sheets:
...@@ -349,6 +356,8 @@ class XLSXConverter: ...@@ -349,6 +356,8 @@ class XLSXConverter:
value = self._validate_and_convert(value, path) value = self._validate_and_convert(value, path)
_set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1) _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
continue continue
elif sheet.cell(col_type_row+1, col_idx+1).value is None:
warn(f"No metadata configured for column {_column_id_to_chars(col_idx)}.")
except (ValueError, KeyError, jsonschema.ValidationError) as e: except (ValueError, KeyError, jsonschema.ValidationError) as e:
# Append error for entire column only once # Append error for entire column only once
if isinstance(e, KeyError) and 'column' in str(e): if isinstance(e, KeyError) and 'column' in str(e):
...@@ -372,8 +381,8 @@ class XLSXConverter: ...@@ -372,8 +381,8 @@ class XLSXConverter:
self._errors[(sheet.title, row_idx)] = kerr.definitions self._errors[(sheet.title, row_idx)] = kerr.definitions
if exceptions != []: if exceptions != []:
exception_table = format_exception_table(exceptions, sheet.title, exception_table = _format_exception_table(exceptions, sheet.title,
col_names) col_names)
raise jsonschema.ValidationError(exception_table) raise jsonschema.ValidationError(exception_table)
self._handled_sheets.add(sheet.title) self._handled_sheets.add(sheet.title)
......
...@@ -258,6 +258,16 @@ def get_row_type_column_index(sheet: Worksheet): ...@@ -258,6 +258,16 @@ def get_row_type_column_index(sheet: Worksheet):
raise ValueError("The column which defines row types (COL_TYPE, PATH, ...) is missing") raise ValueError("The column which defines row types (COL_TYPE, PATH, ...) is missing")
def get_column_type_row_index(sheet: Worksheet):
"""Return the row index (0-indexed) of the row which defines the column types.
"""
for row in sheet.rows:
for cell in row:
if cell.value == RowType.COL_TYPE.name:
return cell.row - 1
raise ValueError("The column which defines row types (COL_TYPE, SCALAR, ...) is missing")
def get_subschema(path: list[str], schema: dict) -> dict: def get_subschema(path: list[str], schema: dict) -> dict:
"""Return the sub schema at ``path``.""" """Return the sub schema at ``path``."""
if path: if path:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment