From f65ac0761e4806305af473e97779b404f374eb62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 11 Aug 2020 11:22:54 +0200
Subject: [PATCH] ENH: introduce new converters for the table import

---
 src/caosadvancedtools/table_importer.py |  36 ++++++++++++++++++------
 src/caosadvancedtools/utils.py          |  17 -----------
 unittests/date.xlsx                     | Bin 0 -> 4895 bytes
 unittests/test_table_importer.py        |  23 +++++++++++++++
 unittests/test_utils.py                 |   4 +--
 5 files changed, 51 insertions(+), 29 deletions(-)
 create mode 100644 unittests/date.xlsx

diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py
index eeb91d6d..2087b156 100755
--- a/src/caosadvancedtools/table_importer.py
+++ b/src/caosadvancedtools/table_importer.py
@@ -28,9 +28,12 @@ Those converters can also be used to apply checks on the entries.
 """
 
 import logging
+import pathlib
+from datetime import datetime
 
 import numpy as np
 import pandas as pd
+from caosadvancedtools.utils import check_win_path
 from xlrd import XLRDError
 
 from .datainconsistency import DataInconsistencyError
@@ -68,6 +71,30 @@ def yes_no_converter(val):
             "Field should be 'Yes' or 'No', but is '{}'.".format(val))
 
 
+def date_converter(val, fmt="%Y-%m-%d"):
+    """ if the value is already a datetime, it is returned otherwise it
+    converts it using format string
+    """
+
+    if isinstance(val, datetime):
+        return val
+    else:
+        return datetime.strptime(val, fmt)
+
+
+def win_path_converter(val):
+    """
+    checks whether the value looks like a windows path and converts it to posix
+    """
+
+    if not check_win_path(val):
+        raise ValueError(
+            "Field should a Windows path, but is\n'{}'.".format(val))
+    path = pathlib.PureWindowsPath(val)
+
+    return path.as_posix()
+
+
 class TSVImporter(object):
     def __init__(self, converters, obligatory_columns=[], unique_columns=[]):
         raise NotImplementedError()
@@ -128,15 +155,6 @@ class XLSImporter(object):
                        'category': "inconsistency"})
             raise DataInconsistencyError(*e.args)
 
-        try:
-            df = xls_file.parse(converters=self.converters)
-        except Exception as e:
-            logger.warning(
-                "Cannot parse {}.".format(filename),
-                extra={'identifier': str(filename),
-                       'category': "inconsistency"})
-            raise DataInconsistencyError(*e.args)
-
         self.check_columns(df, filename=filename)
         df = self.check_missing(df, filename=filename)
 
diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py
index 2d273edc..8622a7b4 100644
--- a/src/caosadvancedtools/utils.py
+++ b/src/caosadvancedtools/utils.py
@@ -140,23 +140,6 @@ def check_win_path(path, filename=None):
     return True
 
 
-def treat_win_path(path, relative_to, filename=None):
-    """
-    check win path and convert it to posix and make it absolute.
-    Parameters:
-    path: windows path as string
-    relative_to: unix path as string
-    filename: if the path is located in a file, this parameter can be used to
-              direct the user to the file where the path is located.
-    """
-    check_win_path(path, filename=filename)
-    path = pathlib.PureWindowsPath(path)
-    path = path.as_posix()
-    path = assure_absolute_path_in_glob(str(path), prefix=relative_to)
-
-    return path
-
-
 def return_field_or_property(value, prop=None):
     """
     returns value itself of a property.
diff --git a/unittests/date.xlsx b/unittests/date.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..c852cde999b0014aece605328a407481e0de24b5
GIT binary patch
literal 4895
zcmaJ_1yqz>x29`AkQM|)8l<JcA(Za!?ixyll16Y~=ys448I+RlkdSUvkfD)o1VrEt
zes}$Hz2EPC*P3}}t+UUWv-fkJ{p_QzgpNUqf`fyD@<|q;fpWtD$agaj8)r{$uItZ|
z#9>8rUZTj|XOMV*a1GgG>&oV+k(ct%g3|{sErc?7?mT_wABJNHe~lfd5O_E!B$GKy
zY<moW$w6d6#fBU)u|FyO^RsWAY*IHn=?yXr=_)c3TJ;rAbUGj1OLRqjPtEh$SffMV
zRCLk&_1*eN>sf36a2FFZ^egMN?@{pY2z$giR5$V@JgpqQDH{o1Hbs*a8C}%Ni|0IH
zmF4Ddx~(?+SW72|O106M8+_)kio-f(R52gro>iw^8I#`nRJ=aYBMV6KExh#7Q=|?@
zHtw=1QDwc~ezI+p#U6;Fu7r(aTO3+DkKDEx8Vd6Fe+vvDa)b|Z4UmVEB?#oi<>TTE
z)#`Pf;UQ{1;lMz+aGE+&r=sh{#-q#<z3I(JlqGk9kbc}x&AUR}wt3AqEkh_M0+?&c
z@HIrhoYraD^>cLwIxO16)$6&!r@OXhtVGj!hwtlQrWr~NaG8G@+?YKm7<@GDw>#=@
zW2H+k2$-w&#T2NOnGMd9hpP-GC|4f3SOrtmptTwP&>&L2$OLqWhDV&y;hUu1kAw~p
z3b1ZT;>*W&B6RaAoU0#1guf}Jj}scSGbPy$?}wee)tfehrtJp=*=zR28uY%#bgUYC
z!~F&PF+p@uldo(ZXF;6uge^$YzCf8>W@u`kao#$p$7q}$;lN;8CCyb^KdRY{Ke%8B
zjmrCYMfRm8%x^hkJL{WWF5WZ9oszYK)j*F=b{ghS0I*owB?K7TQqN+*c0t^d_FyZQ
zMtgu6Re~d;Vhtips+x5-WTTo~V7Ybu<MunC#0u7i>2ppU(Op3CmRBgLC>{r&Ld>AF
zdfQiu1eq<1qFLA~Hxd2{%a5EW>CYZ~>VadJQtxoLse(*~CK17pRT(3th-%8+y5q=a
zeRUEiysnf~G_A2jFNE26cg-IU1U6;Mj#~>tu|`Gj5_If&^PTcgsbQ_VWRA@zs_>}=
zCBD165gMo<H<y<ST1M^Lj?nerVXW~^4(Qvsq~UMX>)fEY#!R#i|Mm$g3W_WG-!Mb)
z4`w{Qe4TAPuMxAYZvuMBL;7s4to&z7X27U7T0^N;VYBjNreac|M4fqCF)-eJdc+J!
zjUG}sTje}P{DS42cBlRk?3BH`z3Vyuo}{+IYn+AU&amJpeP(TI`X&?3xmQx^jt__j
z3hvRrlgH)G6Ow{+$hvw+Kja~Me5b>BbCE{c21XLwPhvo1@v1=6{hdk?pEfyUzMzV6
zXk_9rpp%!32d^`eyPiwx!6)fRw<)PMf^3rf!EBlNK9k-~`Lo9u8tGC;mIs#UFvciP
zZJjdf^AB4mf&>D^dG}H*Ib3hi5Mi#6__Pg-G&iuXc6OyfL=D(hlXY&vrkH5l2^+vX
zM}Fmn>fNGRg(IjM>gFXch?Pi!ZCi@*mf$?+Jt^?Rz@#iTW0k^a+FaU!2CycRZEFpO
znmHgOd{G&1z%`nvFg*psdqnPx)sP2|QlXmdoqg(zMnBq@`-V!c%$z-3+w5SH$^d1`
z#)_9$Mh(yWhe>A(8ei&6gsii@ZO&*=R0)m_)lqVYwy$O;*vnPDBbgRs8=7SI;*+8y
z@n!?qNf^)VC5{JtEs4z~sO{Z4*~~^}H@U|yAcUFvLdS8dm{KJUkAef;n^`kq0`St#
zrnIQete0t<mii?`70aBjg++mb@6C(c?|h@o&sLLbj~(8ZsX>Rn$sRAXXl)A$)AEjM
z3~*R}-zm`;V6`RXhurDfVz7NARuI&8wwyCDK&Uu)6h8sGvg0iZ3IIo*Aw&Xec6kGn
zcG!0}&y?1CybD1sEpjuL6M{jYnM;%GyCF_R>obT?KA%23+RAN1IEC*;H&0tgEbUAs
zJFpvvd*kXQdVXD1BS-Jp(m^}jxmc6G3|?-PH{^Ko*!U@I9M$fUW5~Ru5_R{82_c|S
zaYi88cNV<3V^Cw8Y3|=nae;A-@y1#(KOD)4Or(FqIOT6%^t89J@$%&U^~HM)_hwxs
z&lzF-mff<EwUP0u1BRiu@5SI+ATTqO-+g*$3rbEOO#)F@=yN^Tq$qe*A=a2dyAj5T
zHs3=?$J(}kPFIibodcB=0LX<|PzEc;1FEwk%xF_V!ynr83x$)4%*h{x;LZ(SMV%DU
zsbHC-qp91H+fI?EX;>%Xd)Q*>2j5qNh}xy5`cY>{qb~(&apmv3`B1GW7nMubO_a5_
zJtV_5w$fb9amLJ(;!`?OVa6NJxtK5OSjyaHn|!VoxA=6$VPBR-&Zl1fA(is>z4oR;
zZ`jHcul@VO;=TkGu=SqzK1ywvOtrT)KeG(;!g^?bs$#?A$&c)owbPOn(3d$rlTVqq
zk|IEKd$IUXvCJNgW?x!QDdL9r*i>T%J^~HK%EsY@r|jk+zu}Nxw^qi$*Vct0huy7J
z^Fq?71|?ihOA<R2tiS{!ugV{>LBXU>_=e8>mOkik7kbW_cT&=pAtxs#iX7|l$675?
zU(np-4cJQZC3|vRx5O;P0VzpZ#Ej%SQJC`UWB6g6U6(4SZ&)n$V$#b<m=y{}&&q3C
zj%Nr<REMa)hrdpXXp%W-NV5n#T~odVUV|)Gx*d=YGz$wv^O#%DN)6hMndup!A2zdp
zEZ~x#(9OO2s%^K<O`y|A9u|JeTsIgsZO@aXEH^&BUzGyEJH&-;Ez=UN%?1{RK1^-E
z(06!-E*4L^D{}1OC7oh<6e>CREz>|SPGAR}J(8sHA^9PDkwl&tkOUUyNTYKUM0DU>
z6r4!bZIZBxVA&DYGhSk_ucY5}eH4>V!>h6pMc8vYKkat(Qj|OOZ4FZwkEJ=-*Uu!>
zzLv%Jy?G^o67uB`s*^RfTg2kA!e_prNprCIWgT4AOQH6qs1F$V2p!HUb#I+hTw?2q
z_eGmvPS=_0S{Xp~m%;_>qAq^+{^iha;x#TIA)^xcSQ?GVw?O`8GOzL@6*pmFG2qLo
ztO_H|-Mva*(ML|ef=evU-V1)URWT5D{QYT!sN=WC2I~Iol4V`|$3+0*QrC#mlGoDI
zn^Xvw3{Z|=)`hYIm2$JQm?EX{=albuo?qWa;!i+GbBuB@ap%7sJ$RsQG>9Sa0czd5
zl(IO-XEDP|Hln=)4G%Y5Lh}^bgKlm>oL`5m&^Lb2G^PmMDOW0hOW7%u6U}o|J7~bG
zkEk$>3)m(V4C3ZA90xdtfIAIhz_^4%$C1Ig@+P0!y*W@zn*E%_NR&W3Cv@0<Amhg&
zZOudA)0HFl{EDl3NdB#EdR^;4f>8(J({EygMpj)KKsCdfpoP^dv}<CRcLvLjBMToE
z-rtCU>K|hGM@-(7P5%hX1Qn+bJowLdFUVw1x)iXjVuYJxq{=Q%QM)x3M>+))r>d_4
zxu}isY_>*{Mm`<3$RBS!gl<H2%HU%a=V3&&cX{J9hg|GzFWk``%tOrEGsTtD5x;FB
z`5a84z-?H0C~UxG*%2>hQqZfY3jfsG5Vozs-G87k4nrLz(<(7SuX5#3Z}jZAD!2MX
zaLX*;Kbz+x0q&p&Z36$OW}M|b2R9$~k;~bN#0u~}&1Z(bvaoDTK``#5nJw+E9{_|q
zQ=%TY^^@`Npf=-4Nf}}`;8$eS_Rjpqko#u(tCU3Y>9v&fw72lEvDWhPaB#Kzr4FDY
zQmUOi_?ROSZ=#w&NnRMq8i@w?265+6=mo~*p<0Vx55K-b0eBbna|6$n1m74Lc9H>l
zl*zryB=1FtKWWhSc_sp8z$tyj+-fZi$o}Z&0e)T&sZn&cD19oS*lW?fTtCdA`LgL-
zVt`A9F=H9#MJK<dO7z`izxZA`wA%-=lN}6E(8ZqT!>wnuu!Xe&wXOs`Iz!LKcKuSE
zA!>uHgt<k!;~J$16Gb4jcw7cN(s(L$;<qMu9BFf&cK#~@#<;fbZOHw(AjQ#t7t^<q
zBdkGI>K-6BPi`xahs`fJt!bhbh7?C9dhsnX7B7d0%wG&i)OU8?fr6@=t#nvb{Do?Q
zKq2jgJMFdpJ0cP%Z+o51$Pu?<Qc@HZNW(kPyEy1|UL~eYcC<3RdQynN!H&r$63kS&
z#l9fAni9gI<-o(*?l&a=C4Dp5ndqr9e1)1dg8(Ki;$f1Jm>b6YCUc)V6`dwxfq)Y=
z!~CPY)!Jrz!d*^Gx(CNoq$MHqq<E}Sxvw>dMw7_flfN&9rHW{!5))E!Df)Y;@0Q#R
ze`<n#T$j>t$?&5lAi!1b__d>MSBQ0QMM65YIZlTsmb+f5ooUa>;bf?Fz8mR}g%Y|a
zZ&_nl#(l(}sjZcrKrJZrn4Lb8xK*I^Lnqc;KPLc*Z&4_(7}f462LprbnPiGZY}y}i
z&5nK|?woD+HU4-^lhX6GRj%Px9ehZ3gJ8N3%lrMA|Cb_8(Y8XT#*0&o*jagWH(tdD
zIZ7uB*;nb$(63Lg^Z;bY3Yndmzhac=W`-=>+^!Rp6|bg-JhNT90TA}-Y8AL*M&Uye
z9QRC&$j~|+diUC#pIVEmv$ZV2RoerD!^1zm1UfhBYu%-1-ZoKleVa)-U!XU=?SCLB
zp5~;<NUDmvM02!^@xu9xz#sZZ@-YGYBSxB^ha^6=Vv=|;8N9Eie`q<MEZyd-7Er~o
zBPp#CGqMW48QT#$Pg=wW3t5h<6Il$(?d#7zA3Jx$%jo%nB1jq1cBeTxOwIz!C1mc~
zcZ2dsqL@@P&-(#yfDBDPOLI&nb-_B7(+)!1OFa$LjWu;tl!pMbdkHni8}3uGDFpeS
zcx1WZ`=IMM9X}Zp-#|ve@2^NEL5?tz0l9kFxO$ms`?}e9n*35e+G5C15h2bw?w7H8
z_XiUxPKWg1q_Fby?KAO0>E6$dUHT8PY3ty!MMd?#-)%`#^A?;bVm1gn99x4aucb5Q
z02m5NJr1hsl(>7HMf=E&0FT;Dw>>o@&C4hcpAbO&9Us`gOYUZ)b=zdO^{rSYD?i*n
z4JDJs9-${l2ve@4W<4v$?cK(T12^EX^d6UTE3Ska&3p{Seo(1zlO>D|-9;^}7WPOB
z6wZ%w_7C7J0iK(-Z*0g*7N<XW1!gei^pcrIv!vZ?$Rdp&s0W-FTcG>LDp1l0tEG(<
zT7bGa&0-A3E}eGO`EA$$M1}fn;p!k7w*{x*9IEM3ZFjiZ+IhK48(|3|@$#N8VDFs?
zGy2QVhuM1-)F%sfFCC6rR&RA$xt+FPXSdOVvk5zgvnptVu&`T&_A|utw4xu+hi=hb
znBIn8!KfCEDDJg5#_>%BCXa)!GZXiHMbH456+O&rjzL8uMfrV~akH7fzRUQ(cH?H_
zPsf`b-F2`3+wLHj`lIvz)A^=UzHa({n+mc{UUU6_TEIVDZx+*QE&bbgvHr8}|5Mg~
z&T_NxTo>Norid)k$UpF3%J84_+@$a8GWFZ6k<t9OqV?wtH|gb?=6{<l{=bm^pYAtL
x-8BdOHk?~G-2a!6{`9_yjO&y6+jx*U{g-p9u7rt<G71VV@&O_VvG3QK{{u-NEOP(=

literal 0
HcmV?d00001

diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py
index b662493b..18a3d017 100644
--- a/unittests/test_table_importer.py
+++ b/unittests/test_table_importer.py
@@ -17,13 +17,17 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 
 
+import os
 import unittest
+from functools import partial
 from tempfile import NamedTemporaryFile
 
 import numpy as np
 import pandas as pd
 from caosadvancedtools.datainconsistency import DataInconsistencyError
 from caosadvancedtools.table_importer import (XLSImporter, assure_name_format,
+                                              date_converter,
+                                              win_path_converter,
                                               yes_no_converter)
 
 
@@ -45,6 +49,25 @@ class ConverterTest(unittest.TestCase):
                          "Müstermann, Max")
         self.assertRaises(ValueError, assure_name_format, "Max Mustermann")
 
+    def test_winpath(self):
+        self.assertRaises(ValueError, win_path_converter, "/hallo/python")
+        self.assertEqual(win_path_converter(r"\this\computer"),
+                         "/this/computer")
+
+    def test_date(self):
+        test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
+        self.importer = XLSImporter(converters={'a': date_converter,
+                                                'b': date_converter,
+                                                'c': partial(date_converter,
+                                                             fmt="%d.%m.%y")
+                                                }, obligatory_columns=['a'])
+
+        xls_file = pd.io.excel.ExcelFile(test_file)
+        df = xls_file.parse()
+        df = self.importer.read_xls(test_file)
+        assert df.shape[0] == 2
+        assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
+
 
 class XLSImporterTest(unittest.TestCase):
     def setUp(self):
diff --git a/unittests/test_utils.py b/unittests/test_utils.py
index 46ca537e..df1e491c 100644
--- a/unittests/test_utils.py
+++ b/unittests/test_utils.py
@@ -24,7 +24,7 @@ import unittest
 
 from caosadvancedtools.utils import (assure_absolute_path_in_glob,
                                      check_win_path, string_to_person,
-                                     treat_win_path)
+                                     )
 
 
 class Assure_absoluteTest(unittest.TestCase):
@@ -63,5 +63,3 @@ class PathTest(unittest.TestCase):
         assert check_win_path(r"C:\hallo")
         assert check_win_path(r"\hallo")
         assert not check_win_path("/hallo")
-        self.assertEqual(treat_win_path(r"tag\hallo", "/lol"),
-                         "/lol/tag/hallo")
-- 
GitLab