diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index fc0eb043c4d253beed21f953ae0f63ff52972fc5..0b3f0d7c7fc81b2a9d64e24fb2262c686ea669da 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -41,6 +41,16 @@ from caosadvancedtools.table_importer import (CSVImporter, TableImporter, from test_utils import BaseMockUpTest +# For testing the table importer +IMPORTER_KWARGS = dict( + converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist + datatypes={'a': str, 'b': int, 'x': int}, # x does not exist + obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')], + existing_columns=['e'], +) +VALID_DF = pd.DataFrame( + [['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e']) + class ConverterTest(unittest.TestCase): def test_yes_no(self): @@ -143,20 +153,16 @@ class ConverterTest(unittest.TestCase): class TableImporterTest(unittest.TestCase): def setUp(self): - self.importer_kwargs = dict( - converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist - datatypes={'a': str, 'b': int, 'x': int}, # x does not exist - obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')], - existing_columns=['e'], - ) - self.valid_df = pd.DataFrame( - [['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e']) + self.importer_kwargs = IMPORTER_KWARGS + self.valid_df = VALID_DF def test_missing_col(self): # check missing from existing df = pd.DataFrame(columns=['a', 'b']) importer = TableImporter(**self.importer_kwargs) - self.assertRaises(ValueError, importer.check_columns, df) + with pytest.raises(DataInconsistencyError) as die: + importer.check_columns(df) + assert "Column 'e' missing" in str(die.value) # check valid importer.check_columns(self.valid_df) @@ -191,6 +197,35 @@ class TableImporterTest(unittest.TestCase): self.assertEqual(df_new.shape[0], 1) +def test_check_dataframe_existing_obligatory_columns(caplog): + """Needs caplog so remove from above class.""" + # stricter test case; column 'a' must exist and have a value + strict_kwargs = IMPORTER_KWARGS.copy() + strict_kwargs["existing_columns"].append('a') + + importer = TableImporter(**strict_kwargs) + + # the valid df is still valid, since 'a' has a value + importer.check_dataframe(VALID_DF) + + # Now 'a' doesn't + df_missing_a = pd.DataFrame( + [[np.nan, 1, 2.0, 'yes', 'e']], columns=['a', 'b', 'c', 'd', 'e']) + + new_df = importer.check_dataframe(df_missing_a) + # Column is removed and a warning is in the logger: + assert new_df.shape[0] == 0 + assert "Required information is missing (a) in 1. row" in caplog.text + + df_missing_c = pd.DataFrame( + [['a', 1, 'yes', np.nan]], columns=['a', 'b', 'd', 'e']) + new_df = importer.check_dataframe(df_missing_c) + assert new_df.shape[0] == 1 + assert new_df.shape[1] == 4 + + caplog.clear() + + class XLSImporterTest(TableImporterTest): def test_full(self): """ test full run with example data """