diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 463c0d5a8cdb0a2c55cc3d27a919aa40b2576e42..468cdba3bbe23ec7507aa394ddf0cfe7e0be0e1a 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -68,14 +68,14 @@ def yes_no_converter(val): "Field should be 'Yes' or 'No', but is '{}'.".format(val)) -class TSV_Importer(object): +class TSVImporter(object): def __init__(self, converters, obligatory_columns=[], unique_columns=[]): raise NotImplementedError() -class XLS_Importer(object): +class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_columns=None): + def __init__(self, converters, obligatory_columns=None, unique_keys=None): """ converters: dict with column names as keys and converter functions as values @@ -90,7 +90,7 @@ class XLS_Importer(object): self.sup = SuppressKnown() self.required_columns = list(converters.keys()) self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns - self.unique_columns = [] if unique_columns is None else unique_columns + self.unique_keys = [] if unique_keys is None else unique_keys self.converters = converters def read_xls(self, filename): @@ -129,7 +129,7 @@ class XLS_Importer(object): self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) - if len(self.unique_columns) > 0: + if len(self.unique_keys) > 0: df = self.check_unique(df, filename=filename) return df @@ -165,22 +165,23 @@ class XLS_Importer(object): df = df.copy() uniques = [] - subtable = df[list(self.unique_columns)] - for index, row in subtable.iterrows(): - element = tuple(row) - if element in uniques: - errmsg = ( - "The {}. row contains the values '{}'.\nThis value " - "combination should be unique, but was used in a previous " - "row in\n").format(index+1, element) - errmsg += "{}.".format(filename) if filename else "the file." - errmsg += "\nThis row will be ignored!" - - logger.warning(errmsg, extra={'identifier': filename, - 'category': "inconsistency"}) - df = df.drop(index) - else: - uniques.append(element) + for unique_columns in self.unique_keys: + subtable = df[list(unique_columns)] + for index, row in subtable.iterrows(): + element = tuple(row) + if element in uniques: + errmsg = ( + "The {}. row contains the values '{}'.\nThis value " + "combination should be unique, but was used in a previous " + "row in\n").format(index+1, element) + errmsg += "{}.".format(filename) if filename else "the file." + errmsg += "\nThis row will be ignored!" + + logger.warning(errmsg, extra={'identifier': filename, + 'category': "inconsistency"}) + df = df.drop(index) + else: + uniques.append(element) return df diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 746141771f0584c19dcdd962ed916b487a382d26..b848a3ce796a126fb55e03efd2afba3c6c53f603 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile import numpy as np import pandas as pd from caosadvancedtools.datainconsistency import DataInconsistencyError -from caosadvancedtools.table_importer import (XLS_Importer, name_converter, +from caosadvancedtools.table_importer import (XLSImporter, name_converter, yes_no_converter) @@ -45,11 +45,11 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, name_converter, "Max Mustermann") -class XLS_ImporterTest(unittest.TestCase): +class XLSImporterTest(unittest.TestCase): def setUp(self): - self.importer = XLS_Importer( + self.importer = XLSImporter( converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, - obligatory_columns=['a', 'b'], unique_columns=['a', 'b']) + obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( [['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])