Skip to content
Snippets Groups Projects
Commit a9bab585 authored by florian's avatar florian
Browse files

TST: Add a unit test for table_importer.check_dataframe

parent 5cbd9a40
No related branches found
No related tags found
2 merge requests!73MAINT: change wording of TableImporter argument and allow converters and...,!70ENH: allow to provide required columns explicitly
Pipeline #35542 passed
......@@ -41,6 +41,16 @@ from caosadvancedtools.table_importer import (CSVImporter, TableImporter,
from test_utils import BaseMockUpTest
# For testing the table importer
IMPORTER_KWARGS = dict(
converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist
datatypes={'a': str, 'b': int, 'x': int}, # x does not exist
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
existing_columns=['e'],
)
VALID_DF = pd.DataFrame(
[['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e'])
class ConverterTest(unittest.TestCase):
def test_yes_no(self):
......@@ -143,20 +153,16 @@ class ConverterTest(unittest.TestCase):
class TableImporterTest(unittest.TestCase):
def setUp(self):
self.importer_kwargs = dict(
converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist
datatypes={'a': str, 'b': int, 'x': int}, # x does not exist
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
existing_columns=['e'],
)
self.valid_df = pd.DataFrame(
[['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e'])
self.importer_kwargs = IMPORTER_KWARGS
self.valid_df = VALID_DF
def test_missing_col(self):
# check missing from existing
df = pd.DataFrame(columns=['a', 'b'])
importer = TableImporter(**self.importer_kwargs)
self.assertRaises(ValueError, importer.check_columns, df)
with pytest.raises(DataInconsistencyError) as die:
importer.check_columns(df)
assert "Column 'e' missing" in str(die.value)
# check valid
importer.check_columns(self.valid_df)
......@@ -191,6 +197,35 @@ class TableImporterTest(unittest.TestCase):
self.assertEqual(df_new.shape[0], 1)
def test_check_dataframe_existing_obligatory_columns(caplog):
"""Needs caplog so remove from above class."""
# stricter test case; column 'a' must exist and have a value
strict_kwargs = IMPORTER_KWARGS.copy()
strict_kwargs["existing_columns"].append('a')
importer = TableImporter(**strict_kwargs)
# the valid df is still valid, since 'a' has a value
importer.check_dataframe(VALID_DF)
# Now 'a' doesn't
df_missing_a = pd.DataFrame(
[[np.nan, 1, 2.0, 'yes', 'e']], columns=['a', 'b', 'c', 'd', 'e'])
new_df = importer.check_dataframe(df_missing_a)
# Column is removed and a warning is in the logger:
assert new_df.shape[0] == 0
assert "Required information is missing (a) in 1. row" in caplog.text
df_missing_c = pd.DataFrame(
[['a', 1, 'yes', np.nan]], columns=['a', 'b', 'd', 'e'])
new_df = importer.check_dataframe(df_missing_c)
assert new_df.shape[0] == 1
assert new_df.shape[1] == 4
caplog.clear()
class XLSImporterTest(TableImporterTest):
def test_full(self):
""" test full run with example data """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment