Skip to content
Snippets Groups Projects
Commit 93be597d authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-fix-strict-values-in-table-columns' into 'dev'

F fix strict values in table columns

See merge request !77
parents 5c7a587a 8fd2460a
No related branches found
No related tags found
2 merge requests!89ENH: JsonSchemaExporter accepts do_not_create parameter.,!77F fix strict values in table columns
Pipeline #40641 passed
......@@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed ###
* A bit better error handling in the yaml model parser.
* `TableImporter.check_datatypes` allows numeric values in string columns if
`strict=False` (default).
### Deprecated ###
......
......@@ -322,7 +322,7 @@ class TableImporter():
.. note::
If columns are integer, but should be float, this method converts the respective columns
in place.
in place. The same for columns that should have string value but have numeric value.
Parameters
----------
......@@ -338,9 +338,11 @@ class TableImporter():
# float, because CaosDB does not have different sizes anyway.
col_dtype = df.dtypes[key]
if not strict and not np.issubdtype(col_dtype, datatype):
issub = np.issubdtype
# These special cases should be fine.
if issub(col_dtype, np.integer) and issub(datatype, np.floating):
if ((datatype == str)
or (np.issubdtype(col_dtype, np.integer)
and np.issubdtype(datatype, np.floating))
): # NOQA
df[key] = df[key].astype(datatype)
# Now check each element
......
......@@ -44,7 +44,7 @@ from test_utils import BaseMockUpTest
# For testing the table importer
IMPORTER_KWARGS = dict(
converters={'c': float, 'd': yes_no_converter, 'x': float}, # x does not exist
datatypes={'a': str, 'b': int, 'x': int}, # x does not exist
datatypes={'a': str, 'b': int, 'float': float, 'x': int}, # x does not exist
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')],
existing_columns=['e'],
)
......@@ -192,10 +192,36 @@ class TableImporterTest(unittest.TestCase):
def test_wrong_datatype(self):
importer = TableImporter(**self.importer_kwargs)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
df = pd.DataFrame([[1234, 0, 2.0, 3, 'yes'],
[5678, 1, 2.0, 3, 'yes']],
columns=['a', 'b', 'c', 'float', 'd'])
# wrong datatypes before
assert df["a"].dtype == int
assert df["float"].dtype == int
# strict = False by default, so this shouldn't raise an error
importer.check_datatype(df)
# The types should be correct now.
assert df["a"].dtype == pd.StringDtype
assert df["float"].dtype == float
# Resetting `df` since check_datatype may change datatypes
df = pd.DataFrame([[None, 0, 2.0, 'yes'],
[5, 1, 2.0, 'yes']],
columns=['a', 'b', 'c', 'd'])
self.assertRaises(DataInconsistencyError, importer.check_datatype, df)
# strict=True, so number in str column raises an error
self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True)
df = pd.DataFrame([[0],
[1]],
columns=['float'])
# strict=True, so int in float column raises an error
self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True)
# This is always wrong (float in int column)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
[5, 1.7, 2.0, 'yes']],
columns=['a', 'b', 'c', 'd'])
self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, False)
def test_unique(self):
importer = TableImporter(**self.importer_kwargs)
......@@ -275,6 +301,30 @@ class CSVImporterTest(TableImporterTest):
importer = CSVImporter(**self.importer_kwargs)
importer.read_file(tmp.name)
def test_with_generous_datatypes(self):
"""Same as above but check that values are converted as expected."""
tmp = NamedTemporaryFile(delete=False, suffix=".csv")
tmp.close()
self.valid_df.to_csv(tmp.name)
# Copy and use float for columns with integer values, string for columns
# with numeric values
kwargs = self.importer_kwargs.copy()
kwargs["datatypes"] = {
'a': str,
'b': float,
'c': str
}
importer = CSVImporter(**kwargs)
importer.read_file(tmp.name)
kwargs["datatypes"] = {
'a': str,
'b': str,
'c': str
}
importer = CSVImporter(**kwargs)
importer.read_file(tmp.name)
class TSVImporterTest(TableImporterTest):
def test_full(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment