Skip to content
Snippets Groups Projects
Commit aed011f9 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

TST: update and extend table importer tests

parent 80ea4d9d
No related branches found
No related tags found
1 merge request!22Release 0.3
Pipeline #14372 passed with warnings
......@@ -317,8 +317,8 @@ class XLSImporter(TableImporter):
class CSVImporter(TableImporter):
def read_file(self, filename, **kwargs):
df = pd.read_csv(filename, sep=";", converters=self.converters,
def read_file(self, filename, sep=",", **kwargs):
df = pd.read_csv(filename, sep=sep, converters=self.converters,
**kwargs)
self.check_dataframe(df, filename)
......
......@@ -70,12 +70,12 @@ class ConverterTest(unittest.TestCase):
@pytest.mark.xfail(reason="To be fixed, see Issue #34")
def test_datetime(self):
test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
self.importer = XLSImporter(converters={'d': datetime_converter,
}, obligatory_columns=['d'])
importer = XLSImporter(converters={'d': datetime_converter,
}, obligatory_columns=['d'])
xls_file = pd.io.excel.ExcelFile(test_file)
df = xls_file.parse()
df = self.importer.read_xls(test_file)
df = importer.read_xls(test_file)
assert df.shape[0] == 2
# TODO datatypes are different; fix it
assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23)
......@@ -83,30 +83,30 @@ class ConverterTest(unittest.TestCase):
def test_date_xlsx(self):
"""Test with .xlsx in order to check openpyxl engine."""
test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
self.importer = XLSImporter(converters={'a': date_converter,
'b': date_converter,
'c': partial(date_converter,
fmt="%d.%m.%y")
}, obligatory_columns=['a'])
importer = XLSImporter(converters={'a': date_converter,
'b': date_converter,
'c': partial(date_converter,
fmt="%d.%m.%y")
}, obligatory_columns=['a'])
xls_file = pd.io.excel.ExcelFile(test_file)
df = xls_file.parse()
df = self.importer.read_xls(test_file)
df = importer.read_xls(test_file)
assert df.shape[0] == 2
assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
def test_date_xls(self):
"""Test with .xls in order to check xlrd engine."""
test_file = os.path.join(os.path.dirname(__file__), "date.xls")
self.importer = XLSImporter(converters={'a': date_converter,
'b': date_converter,
'c': partial(date_converter,
fmt="%d.%m.%y")
}, obligatory_columns=['a'])
importer = XLSImporter(converters={'a': date_converter,
'b': date_converter,
'c': partial(date_converter,
fmt="%d.%m.%y")
}, obligatory_columns=['a'])
xls_file = pd.io.excel.ExcelFile(test_file)
df = xls_file.parse()
df = self.importer.read_xls(test_file)
df = importer.read_xls(test_file)
assert df.shape[0] == 2
assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
......@@ -131,7 +131,7 @@ class ConverterTest(unittest.TestCase):
class TableImporterTest(unittest.TestCase):
def setUp(self):
self.importer = TableImporter(
self.importer_kwargs = dict(
converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
self.valid_df = pd.DataFrame(
......@@ -139,26 +139,29 @@ class TableImporterTest(unittest.TestCase):
def test_missing_col(self):
df = pd.DataFrame(columns=['a', 'b'])
self.assertRaises(ValueError, self.importer.check_columns, df)
self.importer.check_columns(self.valid_df)
importer = TableImporter(**self.importer_kwargs)
self.assertRaises(ValueError, importer.check_columns, df)
importer.check_columns(self.valid_df)
def test_missing_val(self):
self.importer.check_missing(self.valid_df)
importer = TableImporter(**self.importer_kwargs)
importer.check_missing(self.valid_df)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
[None, 1, 2.0, 'yes'],
['a', np.nan, 2.0, 'yes'],
['b', 5, 3.0, 'no']],
columns=['a', 'b', 'c', 'd'])
df_new = self.importer.check_missing(df)
df_new = importer.check_missing(df)
self.assertEqual(df_new.shape[0], 1)
self.assertEqual(df_new.shape[1], 4)
self.assertEqual(df_new.iloc[0].b, 5)
def test_unique(self):
self.importer.check_missing(self.valid_df)
importer = TableImporter(**self.importer_kwargs)
importer.check_missing(self.valid_df)
df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']],
columns=['a', 'b', 'c', 'd'])
df_new = self.importer.check_unique(df)
df_new = importer.check_unique(df)
self.assertEqual(df_new.shape[0], 1)
......@@ -168,10 +171,32 @@ class XLSImporterTest(TableImporterTest):
tmp = NamedTemporaryFile(delete=False, suffix=".xlsx")
tmp.close()
self.valid_df.to_excel(tmp.name)
self.importer.read_xls(tmp.name)
importer = XLSImporter(**self.importer_kwargs)
importer.read_file(tmp.name)
def test_raise(self):
importer = XLSImporter(**self.importer_kwargs)
tmp = NamedTemporaryFile(delete=False, suffix=".lol")
tmp.close()
self.assertRaises(DataInconsistencyError, self.importer.read_xls,
self.assertRaises(DataInconsistencyError, importer.read_xls,
tmp.name)
class CSVImporterTest(TableImporterTest):
def test_full(self):
""" test full run with example data """
tmp = NamedTemporaryFile(delete=False, suffix=".csv")
tmp.close()
self.valid_df.to_csv(tmp.name)
importer = CSVImporter(**self.importer_kwargs)
importer.read_file(tmp.name)
class TSVImporterTest(TableImporterTest):
def test_full(self):
""" test full run with example data """
tmp = NamedTemporaryFile(delete=False, suffix=".tsv")
tmp.close()
self.valid_df.to_csv(tmp.name, sep="\t")
importer = TSVImporter(**self.importer_kwargs)
importer.read_file(tmp.name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment