From a9bab5856997d972937c756af913c14e6113c91f Mon Sep 17 00:00:00 2001
From: florian <f.spreckelsen@inidscale.com>
Date: Thu, 13 Apr 2023 14:17:21 +0200
Subject: [PATCH] TST: Add a unit test for table_importer.check_dataframe

---
 unittests/test_table_importer.py | 53 ++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py
index fc0eb043..0b3f0d7c 100644
--- a/unittests/test_table_importer.py
+++ b/unittests/test_table_importer.py
@@ -41,6 +41,16 @@ from caosadvancedtools.table_importer import (CSVImporter, TableImporter,
 
 from test_utils import BaseMockUpTest
 
+# For testing the table importer
+IMPORTER_KWARGS = dict(
+    converters={'c': float, 'd': yes_no_converter, 'x': float},  # x does not exist
+    datatypes={'a': str, 'b': int, 'x': int},  # x does not exist
+    obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
+    existing_columns=['e'],
+)
+VALID_DF = pd.DataFrame(
+    [['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e'])
+
 
 class ConverterTest(unittest.TestCase):
     def test_yes_no(self):
@@ -143,20 +153,16 @@ class ConverterTest(unittest.TestCase):
 
 class TableImporterTest(unittest.TestCase):
     def setUp(self):
-        self.importer_kwargs = dict(
-            converters={'c': float, 'd': yes_no_converter, 'x': float},  # x does not exist
-            datatypes={'a': str, 'b': int, 'x': int},  # x does not exist
-            obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
-            existing_columns=['e'],
-        )
-        self.valid_df = pd.DataFrame(
-            [['a', 1, 2.0, 'yes', np.nan]], columns=['a', 'b', 'c', 'd', 'e'])
+        self.importer_kwargs = IMPORTER_KWARGS
+        self.valid_df = VALID_DF
 
     def test_missing_col(self):
         # check missing from existing
         df = pd.DataFrame(columns=['a', 'b'])
         importer = TableImporter(**self.importer_kwargs)
-        self.assertRaises(ValueError, importer.check_columns, df)
+        with pytest.raises(DataInconsistencyError) as die:
+            importer.check_columns(df)
+        assert "Column 'e' missing" in str(die.value)
         # check valid
         importer.check_columns(self.valid_df)
 
@@ -191,6 +197,35 @@ class TableImporterTest(unittest.TestCase):
         self.assertEqual(df_new.shape[0], 1)
 
 
+def test_check_dataframe_existing_obligatory_columns(caplog):
+    """Needs caplog so remove from above class."""
+    # stricter test case; column 'a' must exist and have a value
+    strict_kwargs = IMPORTER_KWARGS.copy()
+    strict_kwargs["existing_columns"].append('a')
+
+    importer = TableImporter(**strict_kwargs)
+
+    # the valid df is still valid, since 'a' has a value
+    importer.check_dataframe(VALID_DF)
+
+    # Now 'a' doesn't
+    df_missing_a = pd.DataFrame(
+        [[np.nan, 1, 2.0, 'yes', 'e']], columns=['a', 'b', 'c', 'd', 'e'])
+
+    new_df = importer.check_dataframe(df_missing_a)
+    # Column is removed and a warning is in the logger:
+    assert new_df.shape[0] == 0
+    assert "Required information is missing (a) in 1. row" in caplog.text
+
+    df_missing_c = pd.DataFrame(
+        [['a', 1, 'yes', np.nan]], columns=['a', 'b', 'd', 'e'])
+    new_df = importer.check_dataframe(df_missing_c)
+    assert new_df.shape[0] == 1
+    assert new_df.shape[1] == 4
+
+    caplog.clear()
+
+
 class XLSImporterTest(TableImporterTest):
     def test_full(self):
         """ test full run with example data """
-- 
GitLab