diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index dcda00a5c6dc795b097e43cce2f8952b1e204ce1..6b86b1098fd58c784ba5a190174aced3c4323281 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -317,8 +317,8 @@ class XLSImporter(TableImporter): class CSVImporter(TableImporter): - def read_file(self, filename, **kwargs): - df = pd.read_csv(filename, sep=";", converters=self.converters, + def read_file(self, filename, sep=",", **kwargs): + df = pd.read_csv(filename, sep=sep, converters=self.converters, **kwargs) self.check_dataframe(df, filename) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 22e183b2629a9651442b3074ba2544f6542cdd7d..b36f8bf9eeda9166fb8ab0d0f1b43d5d1c5276a4 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -70,12 +70,12 @@ class ConverterTest(unittest.TestCase): @pytest.mark.xfail(reason="To be fixed, see Issue #34") def test_datetime(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'d': datetime_converter, - }, obligatory_columns=['d']) + importer = XLSImporter(converters={'d': datetime_converter, + }, obligatory_columns=['d']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 # TODO datatypes are different; fix it assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23) @@ -83,30 +83,30 @@ class ConverterTest(unittest.TestCase): def test_date_xlsx(self): """Test with .xlsx in order to check openpyxl engine.""" test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'a': date_converter, - 'b': date_converter, - 'c': partial(date_converter, - fmt="%d.%m.%y") - }, obligatory_columns=['a']) + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] def test_date_xls(self): """Test with .xls in order to check xlrd engine.""" test_file = os.path.join(os.path.dirname(__file__), "date.xls") - self.importer = XLSImporter(converters={'a': date_converter, - 'b': date_converter, - 'c': partial(date_converter, - fmt="%d.%m.%y") - }, obligatory_columns=['a']) + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] @@ -131,7 +131,7 @@ class ConverterTest(unittest.TestCase): class TableImporterTest(unittest.TestCase): def setUp(self): - self.importer = TableImporter( + self.importer_kwargs = dict( converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( @@ -139,26 +139,29 @@ class TableImporterTest(unittest.TestCase): def test_missing_col(self): df = pd.DataFrame(columns=['a', 'b']) - self.assertRaises(ValueError, self.importer.check_columns, df) - self.importer.check_columns(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + self.assertRaises(ValueError, importer.check_columns, df) + importer.check_columns(self.valid_df) def test_missing_val(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + importer.check_missing(self.valid_df) df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], [None, 1, 2.0, 'yes'], ['a', np.nan, 2.0, 'yes'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_missing(df) + df_new = importer.check_missing(df) self.assertEqual(df_new.shape[0], 1) self.assertEqual(df_new.shape[1], 4) self.assertEqual(df_new.iloc[0].b, 5) def test_unique(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + importer.check_missing(self.valid_df) df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_unique(df) + df_new = importer.check_unique(df) self.assertEqual(df_new.shape[0], 1) @@ -168,10 +171,32 @@ class XLSImporterTest(TableImporterTest): tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") tmp.close() self.valid_df.to_excel(tmp.name) - self.importer.read_xls(tmp.name) + importer = XLSImporter(**self.importer_kwargs) + importer.read_file(tmp.name) def test_raise(self): + importer = XLSImporter(**self.importer_kwargs) tmp = NamedTemporaryFile(delete=False, suffix=".lol") tmp.close() - self.assertRaises(DataInconsistencyError, self.importer.read_xls, + self.assertRaises(DataInconsistencyError, importer.read_xls, tmp.name) + + +class CSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".csv") + tmp.close() + self.valid_df.to_csv(tmp.name) + importer = CSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + + +class TSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".tsv") + tmp.close() + self.valid_df.to_csv(tmp.name, sep="\t") + importer = TSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name)