diff --git a/.gitignore b/.gitignore index 30f02d1c5af28706cb5c6e661bbaba59c5bf937e..62c90a0b5a3713a1672e91a0ca1a3818b87eaa84 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ __pycache__ *cache.db *.egg-info .docker/cert +version.py +.eggs/ diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 17119d0cba4ea7b3f0d29e0c8ffdb93938bd4aa0..c4ef3104eff3361a65b4ecd1a87ea036836bc559 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -47,9 +47,9 @@ def yes_no_converter(val): "Field should be 'Yes' or 'No', but is '{}'.".format(val)) -class XLS_Importer(object): +class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_columns=None): + def __init__(self, converters, obligatory_columns=None, unique_keys=None): """ converters: dict with column names as keys and converter functions as @@ -65,7 +65,7 @@ class XLS_Importer(object): self.sup = Suppressable(logger=logger) self.required_columns = list(converters.keys()) self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns - self.unique_columns = [] if unique_columns is None else unique_columns + self.unique_keys = [] if unique_keys is None else unique_keys self.converters = converters def read_xls(self, filename): @@ -80,7 +80,7 @@ class XLS_Importer(object): self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) - if len(self.unique_columns) > 0: + if len(self.unique_keys) > 0: df = self.check_unique(df, filename=filename) return df @@ -96,22 +96,23 @@ class XLS_Importer(object): df = df.copy() uniques = [] - subtable = df[list(self.unique_columns)] - for index, row in subtable.iterrows(): - element = tuple(row) - if element in uniques: - errmssg = ( - "The {}. row contains the values '{}'.\nThis value " - "combination should be unique, but was used in a previous " - "row in\n").format(index+1, element) - errmssg += "{}.".format(filename) if filename else "the file." - errmssg += "\nThis row will be ignored!" + for unique_columns in self.unique_keys: + subtable = df[list(unique_columns)] + for index, row in subtable.iterrows(): + element = tuple(row) + if element in uniques: + errmssg = ( + "The {}. row contains the values '{}'.\nThis value " + "combination should be unique, but was used in a previous " + "row in\n").format(index+1, element) + errmssg += "{}.".format(filename) if filename else "the file." + errmssg += "\nThis row will be ignored!" - self.sup.warning(errmssg, identifier=filename, - category="inconsistency") - df = df.drop(index) - else: - uniques.append(element) + self.sup.warning(errmssg, identifier=filename, + category="inconsistency") + df = df.drop(index) + else: + uniques.append(element) return df diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 49debb43f20704c301a77cf2386307c9d3de7d1b..13057b09ad99a736d555800a4737b7fd02597354 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile import numpy as np import pandas as pd -from caosadvancedtools.table_importer import (XLS_Importer, name_converter, +from caosadvancedtools.table_importer import (XLSImporter, name_converter, yes_no_converter) @@ -44,11 +44,11 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, name_converter, "Max Mustermann") -class XLS_ImporterTest(unittest.TestCase): +class XLSImporterTest(unittest.TestCase): def setUp(self): - self.importer = XLS_Importer( + self.importer = XLSImporter( converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, - obligatory_columns=['a', 'b'], unique_columns=['a', 'b']) + obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( [['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])