Skip to content
Snippets Groups Projects
Commit 4ad7df78 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

MAINT: include proper error handling when reading xls files

parent f2906716
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -21,8 +21,10 @@ import logging
import numpy as np
import pandas as pd
from caosadvancedtools.suppressKnown import SuppressKnown
from xlrd import XLRDError
from .datainconsistency import DataInconsistencyError
logger = logging.getLogger("caosadvancedtools")
......@@ -68,16 +70,35 @@ class XLS_Importer(object):
self.converters = converters
def read_xls(self, filename):
xls_file = pd.io.excel.ExcelFile(filename)
if len(xls_file .sheet_names) > 1:
try:
xls_file = pd.io.excel.ExcelFile(filename)
except XLRDError as e:
errmsg = ("Cannot read {}.".format(filename) if "\n"+filename+"\n"
else "the file.")
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
if len(xls_file.sheet_names) > 1:
# Multiple sheets is the default now. Only show in debug
logger.debug("Excel file {} contains multiple sheets. "
"All but the first are being ignored.".format(
filename
))
df = xls_file.parse(converters=self.converters)
try:
df = xls_file.parse(converters=self.converters)
except Exception as e:
errmsg = ("Cannot parse {}.".format(filename) if "\n"+filename+"\n"
else "the file.")
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename)
......@@ -90,8 +111,14 @@ class XLS_Importer(object):
for col in self.required_columns:
if col not in df.columns:
errmsg = "Column '{}' missing in ".format(col)
errmsg += "{}.".format(filename) if filename else "the file."
raise ValueError(errmsg)
errmsg += ("{}.".format(filename) if "\n"+filename+"\n"
else "the file.")
errmsg += "Stopping to treat this file..."
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(errmsg)
def check_unique(self, df, filename=None):
df = df.copy()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment