Newer
Older
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright (C) 2020 Henrik tom Wörden
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from functools import partial
from tempfile import NamedTemporaryFile
import numpy as np
import pandas as pd
from caosadvancedtools.datainconsistency import DataInconsistencyError
from caosadvancedtools.table_importer import (XLSImporter, assure_name_format,
win_path_converter,
yes_no_converter)
class ConverterTest(unittest.TestCase):
def test_yes_no(self):
self.assertTrue(yes_no_converter("YES"))
self.assertTrue(yes_no_converter("Yes"))
self.assertTrue(yes_no_converter("yes"))
self.assertTrue(not yes_no_converter("No"))
self.assertTrue(not yes_no_converter("no"))
self.assertRaises(ValueError, yes_no_converter, "nope")
self.assertRaises(ValueError, yes_no_converter, "FALSE")
self.assertRaises(ValueError, yes_no_converter, "TRUE")
self.assertRaises(ValueError, yes_no_converter, "True")
self.assertRaises(ValueError, yes_no_converter, "true")
def test_assure_name_format(self):
self.assertEqual(assure_name_format("Müstermann, Max"),
"Müstermann, Max")
self.assertRaises(ValueError, assure_name_format, "Max Mustermann")
def test_winpath(self):
self.assertRaises(ValueError, win_path_converter, "/hallo/python")
self.assertEqual(win_path_converter(r"\this\computer"),
"/this/computer")
self.assertEqual(win_path_list_converter(r"\this\computer"),
["/this/computer"])
self.assertEqual(win_path_list_converter(
r"\this\computer,\this\computer"),
["/this/computer", "/this/computer"])
def test_datetime(self):
test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
self.importer = XLSImporter(converters={'d': datetime_converter,
}, obligatory_columns=['d'])
xls_file = pd.io.excel.ExcelFile(test_file)
df = xls_file.parse()
df = self.importer.read_xls(test_file)
assert df.shape[0] == 2
assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23)
def test_date(self):
test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
self.importer = XLSImporter(converters={'a': date_converter,
'b': date_converter,
'c': partial(date_converter,
fmt="%d.%m.%y")
}, obligatory_columns=['a'])
xls_file = pd.io.excel.ExcelFile(test_file)
df = xls_file.parse()
df = self.importer.read_xls(test_file)
assert df.shape[0] == 2
assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
def test_inc_date(self):
incomplete_date_converter("2020", fmts={"%Y": "%Y"}) == "2020"
incomplete_date_converter("02/2020",
fmts={"%Y": "%Y", "%Y-%m": "%m/%Y"}
) == "2020-02"
incomplete_date_converter("02/02/2020",
fmts={"%Y": "%Y", "%Y-%m": "%m/%Y",
"%Y-%m-%d": "%d/%m/%Y"}
) == "2020-02-02"
incomplete_date_converter("2020",
fmts={"%Y": "%Y", "%Y-%m": "%m/%Y",
"%Y-%m-%d": "%d/%m/%Y"}
) == "2020"
self.assertRaises(RuntimeError,
incomplete_date_converter,
"2020e",
fmts={"%Y": "%Y"})
class XLSImporterTest(unittest.TestCase):
self.importer = XLSImporter(
converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
self.valid_df = pd.DataFrame(
[['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd'])
def test_missing_col(self):
df = pd.DataFrame(columns=['a', 'b'])
self.assertRaises(ValueError, self.importer.check_columns, df)
self.importer.check_columns(self.valid_df)
def test_missing_val(self):
self.importer.check_missing(self.valid_df)
df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
[None, 1, 2.0, 'yes'],
['a', np.nan, 2.0, 'yes'],
['b', 5, 3.0, 'no']],
columns=['a', 'b', 'c', 'd'])
df_new = self.importer.check_missing(df)
self.assertEqual(df_new.shape[0], 1)
self.assertEqual(df_new.shape[1], 4)
self.assertEqual(df_new.iloc[0].b, 5)
def test_full(self):
""" test full run with example data """
tmp = NamedTemporaryFile(delete=False, suffix=".xlsx")
tmp.close()
self.valid_df.to_excel(tmp.name)
self.importer.read_xls(tmp.name)
def test_unique(self):
self.importer.check_missing(self.valid_df)
df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']],
columns=['a', 'b', 'c', 'd'])
df_new = self.importer.check_unique(df)
self.assertEqual(df_new.shape[0], 1)
def test_raise(self):
tmp = NamedTemporaryFile(delete=False, suffix=".lol")
tmp.close()
self.assertRaises(DataInconsistencyError, self.importer.read_xls,
tmp.name)