Skip to content
Snippets Groups Projects
Verified Commit 2368c7af authored by Timm Fitschen's avatar Timm Fitschen
Browse files

Merge branch 'lfilter' into table-error

parents 23f1311a f178c8b3
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -68,14 +68,14 @@ def yes_no_converter(val):
"Field should be 'Yes' or 'No', but is '{}'.".format(val))
class TSV_Importer(object):
class TSVImporter(object):
def __init__(self, converters, obligatory_columns=[], unique_columns=[]):
raise NotImplementedError()
class XLS_Importer(object):
class XLSImporter(object):
def __init__(self, converters, obligatory_columns=None, unique_columns=None):
def __init__(self, converters, obligatory_columns=None, unique_keys=None):
"""
converters: dict with column names as keys and converter functions as
values
......@@ -90,7 +90,7 @@ class XLS_Importer(object):
self.sup = SuppressKnown()
self.required_columns = list(converters.keys())
self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns
self.unique_columns = [] if unique_columns is None else unique_columns
self.unique_keys = [] if unique_keys is None else unique_keys
self.converters = converters
def read_xls(self, filename):
......@@ -129,7 +129,7 @@ class XLS_Importer(object):
self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename)
if len(self.unique_columns) > 0:
if len(self.unique_keys) > 0:
df = self.check_unique(df, filename=filename)
return df
......@@ -165,22 +165,23 @@ class XLS_Importer(object):
df = df.copy()
uniques = []
subtable = df[list(self.unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmsg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmsg += "{}.".format(filename) if filename else "the file."
errmsg += "\nThis row will be ignored!"
logger.warning(errmsg, extra={'identifier': filename,
'category': "inconsistency"})
df = df.drop(index)
else:
uniques.append(element)
for unique_columns in self.unique_keys:
subtable = df[list(unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmsg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmsg += "{}.".format(filename) if filename else "the file."
errmsg += "\nThis row will be ignored!"
logger.warning(errmsg, extra={'identifier': filename,
'category': "inconsistency"})
df = df.drop(index)
else:
uniques.append(element)
return df
......
......@@ -23,7 +23,7 @@ from tempfile import NamedTemporaryFile
import numpy as np
import pandas as pd
from caosadvancedtools.datainconsistency import DataInconsistencyError
from caosadvancedtools.table_importer import (XLS_Importer, name_converter,
from caosadvancedtools.table_importer import (XLSImporter, name_converter,
yes_no_converter)
......@@ -45,11 +45,11 @@ class ConverterTest(unittest.TestCase):
self.assertRaises(ValueError, name_converter, "Max Mustermann")
class XLS_ImporterTest(unittest.TestCase):
class XLSImporterTest(unittest.TestCase):
def setUp(self):
self.importer = XLS_Importer(
self.importer = XLSImporter(
converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
obligatory_columns=['a', 'b'], unique_columns=['a', 'b'])
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
self.valid_df = pd.DataFrame(
[['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment