diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index e874c5352ff641401bee75ef828410a1aeb4414c..84e3b9547289a5afdc1553dfab6f7f1e19a1a0fb 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -83,7 +83,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65) max_line_length -= 40 # Estimate of Field + Type space use - headers = {"loc": "Field", "type": "Error Type", "mess": ["Message"]} + headers = {"loc": "Location", "type": "Error Type", "mess": ["Message"]} lengths = {key: len(headers[key]) for key in headers} new_data = [] @@ -103,9 +103,9 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, new_data.append(row) # Field if isinstance(row_i, int): - row["loc"] = f"{to_char(col_i)}{row_i + 1}" + row["loc"] = f"Cell {to_char(col_i)}{row_i + 1}" else: - row["loc"] = f"{to_char(col_i)}" + row["loc"] = f"Column {to_char(col_i)}" lengths["loc"] = max(lengths["loc"], len(row["loc"])) # Code row["type"] = type(excep).__name__ @@ -140,7 +140,7 @@ def format_exception_table(exceptions: list(tuple), worksheet_title: str, # Fill for the messages is set to 0, if we want another column or align # right we need to use lengths["mess"] - string_rep = f"There were failures during validation of worksheet '{worksheet_title}':\n\n" + string_rep = f"There were errors during the validation of worksheet '{worksheet_title}':\n\n" for row in [headers, dividers] + new_data: string_rep += ' {loc: <{fill}} '.format(loc=row["loc"], fill=lengths["loc"]) @@ -314,12 +314,10 @@ class XLSXConverter: # entries: dict[str, list[SimpleNamespace]] = {} exceptions = [] - col_names = None + col_names = {} for row_idx, row in enumerate(sheet.iter_rows(values_only=True)): - # Skip non-data rows and save the row containing column names + # Skip non-data rows if row[row_type_column] is not None: - if row[row_type_column] == "IGNORE" and col_names is None: - col_names = row continue foreign_repr = "" foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value] @@ -334,6 +332,7 @@ class XLSXConverter: try: if col_idx in data_column_paths: path = data_column_paths[col_idx] + col_names[col_idx] = '.'.join(path) if self._is_multiple_choice(path): real_value = path.pop() # Last component is the enum value, insert above # set up list @@ -350,8 +349,14 @@ class XLSXConverter: value = self._validate_and_convert(value, path) _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1) continue - except (ValueError, jsonschema.ValidationError) as e: - exceptions.append((row_idx, col_idx, e)) + except (ValueError, KeyError, jsonschema.ValidationError) as e: + # Append error for entire column only once + if isinstance(e, KeyError) and 'column' in str(e): + if len([err for ri, ci, err in exceptions + if ci == col_idx and isinstance(err, KeyError)]) == 0: + exceptions.append((None, col_idx, e)) + else: + exceptions.append((row_idx, col_idx, e)) try: # Find current position in tree @@ -422,7 +427,10 @@ class XLSXConverter: """ if value is None: return value - subschema = self._get_subschema(path) + try: + subschema = self._get_subschema(path) + except KeyError as e: + raise KeyError("There is no entry in the schema that corresponds to this column.") # Array handling only if schema says it's an array. if subschema.get("type") == "array": array_type = subschema["items"]["type"] diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py index 6ee744efabbab9975dc0a64186550c92ebcdfb17..897cd10bfba2a99ee73c20aeb86ccdefb4e1e60b 100644 --- a/unittests/table_json_conversion/test_read_xlsx.py +++ b/unittests/table_json_conversion/test_read_xlsx.py @@ -133,6 +133,20 @@ def test_wrong_datatype(): assert str(caught.value).count("is not of type") == 2 +def test_additional_column(): + with pytest.raises(jsonschema.ValidationError) as caught: + convert.to_dict(xlsx=rfp("data/simple_data_broken.xlsx"), + schema=rfp("data/simple_schema.json")) + # Correct Error + assert "no entry in the schema that corresponds to this column" in str(caught.value) + # Correct Location + for line in str(caught.value).split('\n'): + if "no entry in the schema that corresponds to this column" in line: + assert " M " in line + # No additional column errors + assert str(caught.value).count("no entry in the schema that corresponds to this column") == 1 + + def test_faulty_foreign(): # Simple wrong foreign key converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"),