diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 13305ac01f83c979c8bcdfcd268756774e89e9cb..3b67e932cf684682b94739cac5a2c019029e70eb 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -21,8 +21,10 @@ import logging import numpy as np import pandas as pd - from caosadvancedtools.suppressKnown import SuppressKnown +from xlrd import XLRDError + +from .datainconsistency import DataInconsistencyError logger = logging.getLogger("caosadvancedtools") @@ -68,16 +70,35 @@ class XLS_Importer(object): self.converters = converters def read_xls(self, filename): - xls_file = pd.io.excel.ExcelFile(filename) - - if len(xls_file .sheet_names) > 1: + try: + xls_file = pd.io.excel.ExcelFile(filename) + except XLRDError as e: + errmsg = ("Cannot read {}.".format(filename) if "\n"+filename+"\n" + else "the file.") + logger.warning( + errmsg, + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + + if len(xls_file.sheet_names) > 1: # Multiple sheets is the default now. Only show in debug logger.debug("Excel file {} contains multiple sheets. " "All but the first are being ignored.".format( filename )) - df = xls_file.parse(converters=self.converters) + try: + df = xls_file.parse(converters=self.converters) + except Exception as e: + errmsg = ("Cannot parse {}.".format(filename) if "\n"+filename+"\n" + else "the file.") + logger.warning( + errmsg, + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) @@ -90,8 +111,14 @@ class XLS_Importer(object): for col in self.required_columns: if col not in df.columns: errmsg = "Column '{}' missing in ".format(col) - errmsg += "{}.".format(filename) if filename else "the file." - raise ValueError(errmsg) + errmsg += ("{}.".format(filename) if "\n"+filename+"\n" + else "the file.") + errmsg += "Stopping to treat this file..." + logger.warning( + errmsg, + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(errmsg) def check_unique(self, df, filename=None): df = df.copy()