From 4cbff6df3ecaceae213f462cb3a4442cdbcd497c Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Tue, 5 Sep 2023 10:49:04 +0200
Subject: [PATCH] TEST: Additional column type conversion checks.

---
 unittests/test_table_importer.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py
index 72650d61..599ea535 100644
--- a/unittests/test_table_importer.py
+++ b/unittests/test_table_importer.py
@@ -44,7 +44,7 @@ from test_utils import BaseMockUpTest
 # For testing the table importer
 IMPORTER_KWARGS = dict(
     converters={'c': float, 'd': yes_no_converter, 'x': float},  # x does not exist
-    datatypes={'a': str, 'b': int, 'x': int},  # x does not exist
+    datatypes={'a': str, 'b': int, 'float': float, 'x': int},  # x does not exist
     obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
     existing_columns=['e'],
 )
@@ -192,17 +192,29 @@ class TableImporterTest(unittest.TestCase):
 
     def test_wrong_datatype(self):
         importer = TableImporter(**self.importer_kwargs)
-        df = pd.DataFrame([[None, 0, 2.0, 'yes'],
-                           [5, 1, 2.0, 'yes']],
-                          columns=['a', 'b', 'c', 'd'])
+        df = pd.DataFrame([[1234, 0, 2.0, 3, 'yes'],
+                           [5678, 1, 2.0, 3, 'yes']],
+                          columns=['a', 'b', 'c', 'float', 'd'])
+        # wrong datatypes before
+        assert df["a"].dtype == int
+        assert df["float"].dtype == int
         # strict = False by default, so this shouldn't raise an error
         importer.check_datatype(df)
+        # The types should be correct now.
+        assert df["a"].dtype == pd.StringDtype
+        assert df["float"].dtype == float
 
-        # Reset since check_datatype changes datatypes
+        # Resetting `df` since check_datatype may change datatypes
         df = pd.DataFrame([[None, 0, 2.0, 'yes'],
                            [5, 1, 2.0, 'yes']],
                           columns=['a', 'b', 'c', 'd'])
-        # strict=True, so int in str column raises an error
+        # strict=True, so number in str column raises an error
+        self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True)
+
+        df = pd.DataFrame([[0],
+                           [1]],
+                          columns=['float'])
+        # strict=True, so int in float column raises an error
         self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True)
 
         # This is always wrong (float in int column)
-- 
GitLab