Skip to content
Snippets Groups Projects
Verified Commit 2b138494 authored by Timm Fitschen's avatar Timm Fitschen
Browse files

REVEIW: changed name and signature of XLS_Importer

parent 0f46639d
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -4,3 +4,5 @@ __pycache__
*cache.db
*.egg-info
.docker/cert
version.py
.eggs/
......@@ -47,9 +47,9 @@ def yes_no_converter(val):
"Field should be 'Yes' or 'No', but is '{}'.".format(val))
class XLS_Importer(object):
class XLSImporter(object):
def __init__(self, converters, obligatory_columns=None, unique_columns=None):
def __init__(self, converters, obligatory_columns=None, unique_keys=None):
"""
converters: dict with column names as keys and converter functions as
......@@ -65,7 +65,7 @@ class XLS_Importer(object):
self.sup = Suppressable(logger=logger)
self.required_columns = list(converters.keys())
self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns
self.unique_columns = [] if unique_columns is None else unique_columns
self.unique_keys = [] if unique_keys is None else unique_keys
self.converters = converters
def read_xls(self, filename):
......@@ -80,7 +80,7 @@ class XLS_Importer(object):
self.check_columns(df, filename=filename)
df = self.check_missing(df, filename=filename)
if len(self.unique_columns) > 0:
if len(self.unique_keys) > 0:
df = self.check_unique(df, filename=filename)
return df
......@@ -96,22 +96,23 @@ class XLS_Importer(object):
df = df.copy()
uniques = []
subtable = df[list(self.unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmssg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmssg += "{}.".format(filename) if filename else "the file."
errmssg += "\nThis row will be ignored!"
for unique_columns in self.unique_keys:
subtable = df[list(unique_columns)]
for index, row in subtable.iterrows():
element = tuple(row)
if element in uniques:
errmssg = (
"The {}. row contains the values '{}'.\nThis value "
"combination should be unique, but was used in a previous "
"row in\n").format(index+1, element)
errmssg += "{}.".format(filename) if filename else "the file."
errmssg += "\nThis row will be ignored!"
self.sup.warning(errmssg, identifier=filename,
category="inconsistency")
df = df.drop(index)
else:
uniques.append(element)
self.sup.warning(errmssg, identifier=filename,
category="inconsistency")
df = df.drop(index)
else:
uniques.append(element)
return df
......
......@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
import numpy as np
import pandas as pd
from caosadvancedtools.table_importer import (XLS_Importer, name_converter,
from caosadvancedtools.table_importer import (XLSImporter, name_converter,
yes_no_converter)
......@@ -44,11 +44,11 @@ class ConverterTest(unittest.TestCase):
self.assertRaises(ValueError, name_converter, "Max Mustermann")
class XLS_ImporterTest(unittest.TestCase):
class XLSImporterTest(unittest.TestCase):
def setUp(self):
self.importer = XLS_Importer(
self.importer = XLSImporter(
converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
obligatory_columns=['a', 'b'], unique_columns=['a', 'b'])
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
self.valid_df = pd.DataFrame(
[['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment