Skip to content
Snippets Groups Projects
Verified Commit f4965b0b authored by Timm Fitschen's avatar Timm Fitschen
Browse files

Merge branch 'table' into lfilter

parents 616d6c3b 2b138494
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -47,9 +47,9 @@ def yes_no_converter(val):
"Field should be 'Yes' or 'No', but is '{}'.".format(val))
class XLS_Importer(object):
class XLSImporter(object):
def __init__(self, converters, obligatory_columns=None, unique_columns=None):
def __init__(self, converters, obligatory_columns=None, unique_keys=None):
"""
converters: dict with column names as keys and converter functions as
......@@ -65,7 +65,7 @@ class XLS_Importer(object):
self.sup = SuppressKnown()
self.required_columns = list(converters.keys())
self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns
self.unique_columns = [] if unique_columns is None else unique_columns
self.unique_keys = [] if unique_keys is None else unique_keys
self.converters = converters
def read_xls(self, filename):
......@@ -80,7 +80,7 @@ class XLS_Importer(object):
self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename)
if len(self.unique_columns) > 0:
if len(self.unique_keys) > 0:
df = self.check_unique(df, filename=filename)
return df
......@@ -96,22 +96,23 @@ class XLS_Importer(object):
df = df.copy()
uniques = []
subtable = df[list(self.unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmsg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmsg += "{}.".format(filename) if filename else "the file."
errmsg += "\nThis row will be ignored!"
logger.warning(errmsg, extra={'identifier': filename,
'category': "inconsistency"})
df = df.drop(index)
else:
uniques.append(element)
for unique_columns in self.unique_keys:
subtable = df[list(unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmssg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmssg += "{}.".format(filename) if filename else "the file."
errmssg += "\nThis row will be ignored!"
self.sup.warning(errmssg, identifier=filename,
category="inconsistency")
df = df.drop(index)
else:
uniques.append(element)
return df
......
......@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
import numpy as np
import pandas as pd
from caosadvancedtools.table_importer import (XLS_Importer, name_converter,
from caosadvancedtools.table_importer import (XLSImporter, name_converter,
yes_no_converter)
......@@ -44,11 +44,11 @@ class ConverterTest(unittest.TestCase):
self.assertRaises(ValueError, name_converter, "Max Mustermann")
class XLS_ImporterTest(unittest.TestCase):
class XLSImporterTest(unittest.TestCase):
def setUp(self):
self.importer = XLS_Importer(
self.importer = XLSImporter(
converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
obligatory_columns=['a', 'b'], unique_columns=['a', 'b'])
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
self.valid_df = pd.DataFrame(
[['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment