MNT: Added a warning when column metadata is not configured, and a better...

MNT: Added a warning when column metadata is not configured, and a better error message when a column has a type but no path.

MNT: Added a warning when column metadata is not configured, and a better...
a6910d39 · I. Nüske · 7d4e51cd · a6910d39 · a6910d39
Commit a6910d39 authored 4 months ago by I. Nüske
--- a/src/caosadvancedtools/table_json_conversion/convert.py
+++ b/src/caosadvancedtools/table_json_conversion/convert.py
@@ -45,10 +45,16 @@ def _strict_bool(value: Any) -> bool:
        return value
    raise TypeError(f"Not a good boolean: {repr(value)}")
+def _column_id_to_chars(num):
-def format_exception_table(exceptions: list(tuple), worksheet_title: str,
+    """Converts a column id (zero based) to the corresponding string
-                           column_names: Optional[dict, list] = None,
+    representation, e.g. 0 -> 'A', 97 -> 'CT'"""
-                           max_line_length: Optional[int] = 120) -> str:
+    if num < 0:
+        return ""
+    return _column_id_to_chars(int(num / 26) - 1) + chr(int(num % 26) + 65)
+def _format_exception_table(exceptions: list(tuple), worksheet_title: str,
+                            column_names: Optional[dict, list] = None,
+                            max_line_length: Optional[int] = 120) -> str:
    """
    Given a list of tuples containing a row and column number as well as an
    exception in that order, and the title of the current worksheet, returns
@@ -77,10 +83,6 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
    string_rep:         str
                        Table containing the given exceptions
    """
-    def to_char(num):
-        if num < 0:
-            return ""
-        return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65)
    max_line_length -= 40             # Estimate of Field + Type space use
    headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]}
@@ -103,9 +105,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str,
        new_data.append(row)
        # Field
        if isinstance(row_i, int):
-            row["loc"] = f"Cell {to_char(col_i)}{row_i + 1}"
+            row["loc"] = f"Cell {_column_id_to_chars(col_i)}{row_i + 1}"
        else:
-            row["loc"] = f"Column {to_char(col_i)}"
+            row["loc"] = f"Column {_column_id_to_chars(col_i)}"
        lengths["loc"] = max(lengths["loc"], len(row["loc"]))
        # Code
        row["type"] = type(excep).__name__
@@ -296,12 +298,17 @@ class XLSXConverter:
          If True, do not fail with unresolvable foreign definitions, but collect all errors.
        """
        row_type_column = xlsx_utils.get_row_type_column_index(sheet)
+        col_type_row = xlsx_utils.get_column_type_row_index(sheet)
        foreign_columns = xlsx_utils.get_foreign_key_columns(sheet)
        foreign_column_paths = {col.index: col.path for col in foreign_columns.values()}
        data_columns = xlsx_utils.get_data_columns(sheet)
        data_column_paths = {col.index: col.path for col in data_columns.values()}
        # Parent path, insert in correct order.
-        parent, proper_name = xlsx_utils.get_path_position(sheet)
+        try:
+            parent, proper_name = xlsx_utils.get_path_position(sheet)
+        except UnboundLocalError as e:
+            raise jsonschema.ValidationError(f"Malformed metadata: Cannot parse "
+                                             f"paths in worksheet '{sheet.title}'.") from e
        if parent:
            parent_sheetname = xlsx_utils.get_worksheet_for_path(parent, self._defining_path_index)
            if parent_sheetname not in self._handled_sheets:
@@ -349,6 +356,8 @@ class XLSXConverter:
                            value = self._validate_and_convert(value, path)
                            _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
                        continue
+                    elif sheet.cell(col_type_row+1, col_idx+1).value is None:
+                        warn(f"No metadata configured for column {_column_id_to_chars(col_idx)}.")
                except (ValueError, KeyError, jsonschema.ValidationError) as e:
                    # Append error for entire column only once
                    if isinstance(e, KeyError) and 'column' in str(e):
@@ -372,8 +381,8 @@ class XLSXConverter:
                self._errors[(sheet.title, row_idx)] = kerr.definitions
        if exceptions != []:
-            exception_table = format_exception_table(exceptions, sheet.title,
+            exception_table = _format_exception_table(exceptions, sheet.title,
-                                                     col_names)
+                                                      col_names)
            raise jsonschema.ValidationError(exception_table)
        self._handled_sheets.add(sheet.title)

--- a/src/caosadvancedtools/table_json_conversion/xlsx_utils.py
+++ b/src/caosadvancedtools/table_json_conversion/xlsx_utils.py
@@ -258,6 +258,16 @@ def get_row_type_column_index(sheet: Worksheet):
    raise ValueError("The column which defines row types (COL_TYPE, PATH, ...) is missing")
+def get_column_type_row_index(sheet: Worksheet):
+    """Return the row index (0-indexed) of the row which defines the column types.
+    """
+    for row in sheet.rows:
+        for cell in row:
+            if cell.value == RowType.COL_TYPE.name:
+                return cell.row - 1
+    raise ValueError("The column which defines row types (COL_TYPE, SCALAR, ...) is missing")
 def get_subschema(path: list[str], schema: dict) -> dict:
    """Return the sub schema at ``path``."""
    if path: