Skip to content
Snippets Groups Projects
Commit ab0788ab authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

ENH: Improve CSV TypeErrors in TableImporter

parent aae82b60
No related branches found
No related tags found
2 merge requests!112Release 0.12.0,!111F better csv value error
Pipeline #53472 passed
...@@ -31,7 +31,7 @@ import logging ...@@ -31,7 +31,7 @@ import logging
import pathlib import pathlib
from datetime import datetime from datetime import datetime
import caosdb as db import linkahead as db
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from xlrd import XLRDError from xlrd import XLRDError
...@@ -537,6 +537,44 @@ class CSVImporter(TableImporter): ...@@ -537,6 +537,44 @@ class CSVImporter(TableImporter):
extra={'identifier': str(filename), extra={'identifier': str(filename),
'category': "inconsistency"}) 'category': "inconsistency"})
raise DataInconsistencyError(*ve.args) raise DataInconsistencyError(*ve.args)
except TypeError as te:
# Iterate through the columns and rows to identify
# problematic cells with wrong types.
df = pd.read_csv(filename, sep=sep,
converters=applicable_converters, dtype=None,
**kwargs)
error_dict = {}
columns_with_errors = []
for key, dtype in self.datatypes.items():
try:
df[key].astype(dtype)
except (TypeError, ValueError):
columns_with_errors.append(key)
if not columns_with_errors:
# We may have run into any other TypeError not caused
# by wrong datatypes within the table.
raise te
for ii, row in df.iterrows():
for name in columns_with_errors:
try:
# we need to check with astype to provoke the
# same errors, but that only works on
# Dataframes, so cast value to list to
# DataFrame.
pd.DataFrame([row[name]]).astype(self.datatypes[name])
except (TypeError, ValueError):
if ii not in error_dict:
error_dict[ii] = []
error_dict[ii].append(
(name, str(self.datatypes[name]).strip("<>"), str(type(row[name])).strip("<>"))
)
msg = "Elements with wrong datatypes encountered:\n"
for ii, error_list in error_dict.items():
msg += f"* row {ii}:\n"
for err in error_list:
msg += f"\t* column \"{err[0]}\": Expected \"{err[1]}\" but found \"{err[2]}\".\n"
msg += '\n'
raise DataInconsistencyError(msg)
df = self.check_dataframe(df, filename) df = self.check_dataframe(df, filename)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment