diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 5efd0500a4c5a797a27a92caf0cd2a49165fddd2..90f82e7e111ecb310ca2e463e4868bd27429c7e8 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -31,7 +31,7 @@ import logging import pathlib from datetime import datetime -import caosdb as db +import linkahead as db import numpy as np import pandas as pd from xlrd import XLRDError @@ -537,6 +537,44 @@ class CSVImporter(TableImporter): extra={'identifier': str(filename), 'category': "inconsistency"}) raise DataInconsistencyError(*ve.args) + except TypeError as te: + # Iterate through the columns and rows to identify + # problematic cells with wrong types. + df = pd.read_csv(filename, sep=sep, + converters=applicable_converters, dtype=None, + **kwargs) + error_dict = {} + columns_with_errors = [] + for key, dtype in self.datatypes.items(): + try: + df[key].astype(dtype) + except (TypeError, ValueError): + columns_with_errors.append(key) + if not columns_with_errors: + # We may have run into any other TypeError not caused + # by wrong datatypes within the table. + raise te + for ii, row in df.iterrows(): + for name in columns_with_errors: + try: + # we need to check with astype to provoke the + # same errors, but that only works on + # Dataframes, so cast value to list to + # DataFrame. + pd.DataFrame([row[name]]).astype(self.datatypes[name]) + except (TypeError, ValueError): + if ii not in error_dict: + error_dict[ii] = [] + error_dict[ii].append( + (name, str(self.datatypes[name]).strip("<>"), str(type(row[name])).strip("<>")) + ) + msg = "Elements with wrong datatypes encountered:\n" + for ii, error_list in error_dict.items(): + msg += f"* row {ii}:\n" + for err in error_list: + msg += f"\t* column \"{err[0]}\": Expected \"{err[1]}\" but found \"{err[2]}\".\n" + msg += '\n' + raise DataInconsistencyError(msg) df = self.check_dataframe(df, filename)