Skip to content
Snippets Groups Projects
Commit 5d6e8fb7 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

ENH: Allow string columns to contain numeric values

parent 5c7a587a
No related branches found
No related tags found
2 merge requests!89ENH: JsonSchemaExporter accepts do_not_create parameter.,!77F fix strict values in table columns
......@@ -322,7 +322,7 @@ class TableImporter():
.. note::
If columns are integer, but should be float, this method converts the respective columns
in place.
in place. The same for columns that should have string value but have numeric value.
Parameters
----------
......@@ -342,6 +342,8 @@ class TableImporter():
# These special cases should be fine.
if issub(col_dtype, np.integer) and issub(datatype, np.floating):
df[key] = df[key].astype(datatype)
elif datatype==str:
df[key] = df[key].astype(datatype)
# Now check each element
for idx, val in df.loc[pd.notnull(df.loc[:, key]), key].items():
......
......@@ -192,10 +192,24 @@ class TableImporterTest(unittest.TestCase):
def test_wrong_datatype(self):
importer = TableImporter(**self.importer_kwargs)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
df = pd.DataFrame([[None, 0, 2.0, 'yes'],
[5, 1, 2.0, 'yes']],
columns=['a', 'b', 'c', 'd'])
# strict = False by default, so this shouldn't raise an error
importer.check_datatype(df)
# Reset since check_datatype changes datatypes
df = pd.DataFrame([[None, 0, 2.0, 'yes'],
[5, 1, 2.0, 'yes']],
columns=['a', 'b', 'c', 'd'])
self.assertRaises(DataInconsistencyError, importer.check_datatype, df)
# strict=True, so int in str column raises an error
self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, True)
# This is always wrong (float in int column)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
[5, 1.7, 2.0, 'yes']],
columns=['a', 'b', 'c', 'd'])
self.assertRaises(DataInconsistencyError, importer.check_datatype, df, None, False)
def test_unique(self):
importer = TableImporter(**self.importer_kwargs)
......@@ -275,6 +289,30 @@ class CSVImporterTest(TableImporterTest):
importer = CSVImporter(**self.importer_kwargs)
importer.read_file(tmp.name)
def test_with_generous_datatypes(self):
"""Same as above but check that values are converted as expected."""
tmp = NamedTemporaryFile(delete=False, suffix=".csv")
tmp.close()
self.valid_df.to_csv(tmp.name)
# Copy and use float for columns with integer values, string for columns
# with numeric values
kwargs = self.importer_kwargs.copy()
kwargs["datatypes"] = {
'a': str,
'b': float,
'c': str
}
importer = CSVImporter(**kwargs)
importer.read_file(tmp.name)
kwargs["datatypes"] = {
'a': str,
'b': str,
'c': str
}
importer = CSVImporter(**kwargs)
importer.read_file(tmp.name)
class TSVImporterTest(TableImporterTest):
def test_full(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment