#!/usr/bin/env python # encoding: utf-8 # # Copyright (C) 2020 Henrik tom Wörden # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. import datetime import os import unittest from functools import partial from tempfile import NamedTemporaryFile import numpy as np import pandas as pd from caosadvancedtools.datainconsistency import DataInconsistencyError from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, date_converter, datetime_converter, incomplete_date_converter, win_path_converter, win_path_list_converter, yes_no_converter) class ConverterTest(unittest.TestCase): def test_yes_no(self): self.assertTrue(yes_no_converter("YES")) self.assertTrue(yes_no_converter("Yes")) self.assertTrue(yes_no_converter("yes")) self.assertTrue(not yes_no_converter("No")) self.assertTrue(not yes_no_converter("no")) self.assertRaises(ValueError, yes_no_converter, "nope") self.assertRaises(ValueError, yes_no_converter, "FALSE") self.assertRaises(ValueError, yes_no_converter, "TRUE") self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "true") def test_assure_name_format(self): self.assertEqual(assure_name_format("Müstermann, Max"), "Müstermann, Max") self.assertRaises(ValueError, assure_name_format, "Max Mustermann") def test_winpath(self): self.assertRaises(ValueError, win_path_converter, "/hallo/python") self.assertEqual(win_path_converter(r"\this\computer"), "/this/computer") self.assertEqual(win_path_list_converter(r"\this\computer"), ["/this/computer"]) self.assertEqual(win_path_list_converter( r"\this\computer,\this\computer"), ["/this/computer", "/this/computer"]) def test_datetime(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") self.importer = XLSImporter(converters={'d': datetime_converter, }, obligatory_columns=['d']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() df = self.importer.read_xls(test_file) assert df.shape[0] == 2 assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23) def test_date(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") self.importer = XLSImporter(converters={'a': date_converter, 'b': date_converter, 'c': partial(date_converter, fmt="%d.%m.%y") }, obligatory_columns=['a']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() df = self.importer.read_xls(test_file) assert df.shape[0] == 2 assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] def test_inc_date(self): incomplete_date_converter("2020", fmts={"%Y": "%Y"}) == "2020" incomplete_date_converter("02/2020", fmts={"%Y": "%Y", "%Y-%m": "%m/%Y"} ) == "2020-02" incomplete_date_converter("02/02/2020", fmts={"%Y": "%Y", "%Y-%m": "%m/%Y", "%Y-%m-%d": "%d/%m/%Y"} ) == "2020-02-02" incomplete_date_converter("2020", fmts={"%Y": "%Y", "%Y-%m": "%m/%Y", "%Y-%m-%d": "%d/%m/%Y"} ) == "2020" self.assertRaises(RuntimeError, incomplete_date_converter, "2020e", fmts={"%Y": "%Y"}) class XLSImporterTest(unittest.TestCase): def setUp(self): self.importer = XLSImporter( converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( [['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd']) def test_missing_col(self): df = pd.DataFrame(columns=['a', 'b']) self.assertRaises(ValueError, self.importer.check_columns, df) self.importer.check_columns(self.valid_df) def test_missing_val(self): self.importer.check_missing(self.valid_df) df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], [None, 1, 2.0, 'yes'], ['a', np.nan, 2.0, 'yes'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) df_new = self.importer.check_missing(df) self.assertEqual(df_new.shape[0], 1) self.assertEqual(df_new.shape[1], 4) self.assertEqual(df_new.iloc[0].b, 5) def test_full(self): """ test full run with example data """ tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") tmp.close() self.valid_df.to_excel(tmp.name) self.importer.read_xls(tmp.name) def test_unique(self): self.importer.check_missing(self.valid_df) df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) df_new = self.importer.check_unique(df) self.assertEqual(df_new.shape[0], 1) def test_raise(self): tmp = NamedTemporaryFile(delete=False, suffix=".lol") tmp.close() self.assertRaises(DataInconsistencyError, self.importer.read_xls, tmp.name)