Skip to content
Snippets Groups Projects
Commit 4ad7df78 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

MAINT: include proper error handling when reading xls files

parent f2906716
Branches
Tags
1 merge request!22Release 0.3
...@@ -21,8 +21,10 @@ import logging ...@@ -21,8 +21,10 @@ import logging
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from caosadvancedtools.suppressKnown import SuppressKnown from caosadvancedtools.suppressKnown import SuppressKnown
from xlrd import XLRDError
from .datainconsistency import DataInconsistencyError
logger = logging.getLogger("caosadvancedtools") logger = logging.getLogger("caosadvancedtools")
...@@ -68,7 +70,16 @@ class XLS_Importer(object): ...@@ -68,7 +70,16 @@ class XLS_Importer(object):
self.converters = converters self.converters = converters
def read_xls(self, filename): def read_xls(self, filename):
try:
xls_file = pd.io.excel.ExcelFile(filename) xls_file = pd.io.excel.ExcelFile(filename)
except XLRDError as e:
errmsg = ("Cannot read {}.".format(filename) if "\n"+filename+"\n"
else "the file.")
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
if len(xls_file.sheet_names) > 1: if len(xls_file.sheet_names) > 1:
# Multiple sheets is the default now. Only show in debug # Multiple sheets is the default now. Only show in debug
...@@ -77,7 +88,17 @@ class XLS_Importer(object): ...@@ -77,7 +88,17 @@ class XLS_Importer(object):
filename filename
)) ))
try:
df = xls_file.parse(converters=self.converters) df = xls_file.parse(converters=self.converters)
except Exception as e:
errmsg = ("Cannot parse {}.".format(filename) if "\n"+filename+"\n"
else "the file.")
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
self.check_columns(df, filename=filename) self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename) df = self.check_missing(df, filename=filename)
...@@ -90,8 +111,14 @@ class XLS_Importer(object): ...@@ -90,8 +111,14 @@ class XLS_Importer(object):
for col in self.required_columns: for col in self.required_columns:
if col not in df.columns: if col not in df.columns:
errmsg = "Column '{}' missing in ".format(col) errmsg = "Column '{}' missing in ".format(col)
errmsg += "{}.".format(filename) if filename else "the file." errmsg += ("{}.".format(filename) if "\n"+filename+"\n"
raise ValueError(errmsg) else "the file.")
errmsg += "Stopping to treat this file..."
logger.warning(
errmsg,
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(errmsg)
def check_unique(self, df, filename=None): def check_unique(self, df, filename=None):
df = df.copy() df = df.copy()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment