From f65ac0761e4806305af473e97779b404f374eb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 11 Aug 2020 11:22:54 +0200 Subject: [PATCH] ENH: introduce new converters for the table import --- src/caosadvancedtools/table_importer.py | 36 ++++++++++++++++++------ src/caosadvancedtools/utils.py | 17 ----------- unittests/date.xlsx | Bin 0 -> 4895 bytes unittests/test_table_importer.py | 23 +++++++++++++++ unittests/test_utils.py | 4 +-- 5 files changed, 51 insertions(+), 29 deletions(-) create mode 100644 unittests/date.xlsx diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index eeb91d6d..2087b156 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -28,9 +28,12 @@ Those converters can also be used to apply checks on the entries. """ import logging +import pathlib +from datetime import datetime import numpy as np import pandas as pd +from caosadvancedtools.utils import check_win_path from xlrd import XLRDError from .datainconsistency import DataInconsistencyError @@ -68,6 +71,30 @@ def yes_no_converter(val): "Field should be 'Yes' or 'No', but is '{}'.".format(val)) +def date_converter(val, fmt="%Y-%m-%d"): + """ if the value is already a datetime, it is returned otherwise it + converts it using format string + """ + + if isinstance(val, datetime): + return val + else: + return datetime.strptime(val, fmt) + + +def win_path_converter(val): + """ + checks whether the value looks like a windows path and converts it to posix + """ + + if not check_win_path(val): + raise ValueError( + "Field should a Windows path, but is\n'{}'.".format(val)) + path = pathlib.PureWindowsPath(val) + + return path.as_posix() + + class TSVImporter(object): def __init__(self, converters, obligatory_columns=[], unique_columns=[]): raise NotImplementedError() @@ -128,15 +155,6 @@ class XLSImporter(object): 'category': "inconsistency"}) raise DataInconsistencyError(*e.args) - try: - df = xls_file.parse(converters=self.converters) - except Exception as e: - logger.warning( - "Cannot parse {}.".format(filename), - extra={'identifier': str(filename), - 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) - self.check_columns(df, filename=filename) df = self.check_missing(df, filename=filename) diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py index 2d273edc..8622a7b4 100644 --- a/src/caosadvancedtools/utils.py +++ b/src/caosadvancedtools/utils.py @@ -140,23 +140,6 @@ def check_win_path(path, filename=None): return True -def treat_win_path(path, relative_to, filename=None): - """ - check win path and convert it to posix and make it absolute. - Parameters: - path: windows path as string - relative_to: unix path as string - filename: if the path is located in a file, this parameter can be used to - direct the user to the file where the path is located. - """ - check_win_path(path, filename=filename) - path = pathlib.PureWindowsPath(path) - path = path.as_posix() - path = assure_absolute_path_in_glob(str(path), prefix=relative_to) - - return path - - def return_field_or_property(value, prop=None): """ returns value itself of a property. diff --git a/unittests/date.xlsx b/unittests/date.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c852cde999b0014aece605328a407481e0de24b5 GIT binary patch literal 4895 zcmaJ_1yqz>x29`AkQM|)8l<JcA(Za!?ixyll16Y~=ys448I+RlkdSUvkfD)o1VrEt zes}$Hz2EPC*P3}}t+UUWv-fkJ{p_QzgpNUqf`fyD@<|q;fpWtD$agaj8)r{$uItZ| z#9>8rUZTj|XOMV*a1GgG>&oV+k(ct%g3|{sErc?7?mT_wABJNHe~lfd5O_E!B$GKy zY<moW$w6d6#fBU)u|FyO^RsWAY*IHn=?yXr=_)c3TJ;rAbUGj1OLRqjPtEh$SffMV zRCLk&_1*eN>sf36a2FFZ^egMN?@{pY2z$giR5$V@JgpqQDH{o1Hbs*a8C}%Ni|0IH zmF4Ddx~(?+SW72|O106M8+_)kio-f(R52gro>iw^8I#`nRJ=aYBMV6KExh#7Q=|?@ zHtw=1QDwc~ezI+p#U6;Fu7r(aTO3+DkKDEx8Vd6Fe+vvDa)b|Z4UmVEB?#oi<>TTE z)#`Pf;UQ{1;lMz+aGE+&r=sh{#-q#<z3I(JlqGk9kbc}x&AUR}wt3AqEkh_M0+?&c z@HIrhoYraD^>cLwIxO16)$6&!r@OXhtVGj!hwtlQrWr~NaG8G@+?YKm7<@GDw>#=@ zW2H+k2$-w&#T2NOnGMd9hpP-GC|4f3SOrtmptTwP&>&L2$OLqWhDV&y;hUu1kAw~p z3b1ZT;>*W&B6RaAoU0#1guf}Jj}scSGbPy$?}wee)tfehrtJp=*=zR28uY%#bgUYC z!~F&PF+p@uldo(ZXF;6uge^$YzCf8>W@u`kao#$p$7q}$;lN;8CCyb^KdRY{Ke%8B zjmrCYMfRm8%x^hkJL{WWF5WZ9oszYK)j*F=b{ghS0I*owB?K7TQqN+*c0t^d_FyZQ zMtgu6Re~d;Vhtips+x5-WTTo~V7Ybu<MunC#0u7i>2ppU(Op3CmRBgLC>{r&Ld>AF zdfQiu1eq<1qFLA~Hxd2{%a5EW>CYZ~>VadJQtxoLse(*~CK17pRT(3th-%8+y5q=a zeRUEiysnf~G_A2jFNE26cg-IU1U6;Mj#~>tu|`Gj5_If&^PTcgsbQ_VWRA@zs_>}= zCBD165gMo<H<y<ST1M^Lj?nerVXW~^4(Qvsq~UMX>)fEY#!R#i|Mm$g3W_WG-!Mb) z4`w{Qe4TAPuMxAYZvuMBL;7s4to&z7X27U7T0^N;VYBjNreac|M4fqCF)-eJdc+J! zjUG}sTje}P{DS42cBlRk?3BH`z3Vyuo}{+IYn+AU&amJpeP(TI`X&?3xmQx^jt__j z3hvRrlgH)G6Ow{+$hvw+Kja~Me5b>BbCE{c21XLwPhvo1@v1=6{hdk?pEfyUzMzV6 zXk_9rpp%!32d^`eyPiwx!6)fRw<)PMf^3rf!EBlNK9k-~`Lo9u8tGC;mIs#UFvciP zZJjdf^AB4mf&>D^dG}H*Ib3hi5Mi#6__Pg-G&iuXc6OyfL=D(hlXY&vrkH5l2^+vX zM}Fmn>fNGRg(IjM>gFXch?Pi!ZCi@*mf$?+Jt^?Rz@#iTW0k^a+FaU!2CycRZEFpO znmHgOd{G&1z%`nvFg*psdqnPx)sP2|QlXmdoqg(zMnBq@`-V!c%$z-3+w5SH$^d1` z#)_9$Mh(yWhe>A(8ei&6gsii@ZO&*=R0)m_)lqVYwy$O;*vnPDBbgRs8=7SI;*+8y z@n!?qNf^)VC5{JtEs4z~sO{Z4*~~^}H@U|yAcUFvLdS8dm{KJUkAef;n^`kq0`St# zrnIQete0t<mii?`70aBjg++mb@6C(c?|h@o&sLLbj~(8ZsX>Rn$sRAXXl)A$)AEjM z3~*R}-zm`;V6`RXhurDfVz7NARuI&8wwyCDK&Uu)6h8sGvg0iZ3IIo*Aw&Xec6kGn zcG!0}&y?1CybD1sEpjuL6M{jYnM;%GyCF_R>obT?KA%23+RAN1IEC*;H&0tgEbUAs zJFpvvd*kXQdVXD1BS-Jp(m^}jxmc6G3|?-PH{^Ko*!U@I9M$fUW5~Ru5_R{82_c|S zaYi88cNV<3V^Cw8Y3|=nae;A-@y1#(KOD)4Or(FqIOT6%^t89J@$%&U^~HM)_hwxs z&lzF-mff<EwUP0u1BRiu@5SI+ATTqO-+g*$3rbEOO#)F@=yN^Tq$qe*A=a2dyAj5T zHs3=?$J(}kPFIibodcB=0LX<|PzEc;1FEwk%xF_V!ynr83x$)4%*h{x;LZ(SMV%DU zsbHC-qp91H+fI?EX;>%Xd)Q*>2j5qNh}xy5`cY>{qb~(&apmv3`B1GW7nMubO_a5_ zJtV_5w$fb9amLJ(;!`?OVa6NJxtK5OSjyaHn|!VoxA=6$VPBR-&Zl1fA(is>z4oR; zZ`jHcul@VO;=TkGu=SqzK1ywvOtrT)KeG(;!g^?bs$#?A$&c)owbPOn(3d$rlTVqq zk|IEKd$IUXvCJNgW?x!QDdL9r*i>T%J^~HK%EsY@r|jk+zu}Nxw^qi$*Vct0huy7J z^Fq?71|?ihOA<R2tiS{!ugV{>LBXU>_=e8>mOkik7kbW_cT&=pAtxs#iX7|l$675? zU(np-4cJQZC3|vRx5O;P0VzpZ#Ej%SQJC`UWB6g6U6(4SZ&)n$V$#b<m=y{}&&q3C zj%Nr<REMa)hrdpXXp%W-NV5n#T~odVUV|)Gx*d=YGz$wv^O#%DN)6hMndup!A2zdp zEZ~x#(9OO2s%^K<O`y|A9u|JeTsIgsZO@aXEH^&BUzGyEJH&-;Ez=UN%?1{RK1^-E z(06!-E*4L^D{}1OC7oh<6e>CREz>|SPGAR}J(8sHA^9PDkwl&tkOUUyNTYKUM0DU> z6r4!bZIZBxVA&DYGhSk_ucY5}eH4>V!>h6pMc8vYKkat(Qj|OOZ4FZwkEJ=-*Uu!> zzLv%Jy?G^o67uB`s*^RfTg2kA!e_prNprCIWgT4AOQH6qs1F$V2p!HUb#I+hTw?2q z_eGmvPS=_0S{Xp~m%;_>qAq^+{^iha;x#TIA)^xcSQ?GVw?O`8GOzL@6*pmFG2qLo ztO_H|-Mva*(ML|ef=evU-V1)URWT5D{QYT!sN=WC2I~Iol4V`|$3+0*QrC#mlGoDI zn^Xvw3{Z|=)`hYIm2$JQm?EX{=albuo?qWa;!i+GbBuB@ap%7sJ$RsQG>9Sa0czd5 zl(IO-XEDP|Hln=)4G%Y5Lh}^bgKlm>oL`5m&^Lb2G^PmMDOW0hOW7%u6U}o|J7~bG zkEk$>3)m(V4C3ZA90xdtfIAIhz_^4%$C1Ig@+P0!y*W@zn*E%_NR&W3Cv@0<Amhg& zZOudA)0HFl{EDl3NdB#EdR^;4f>8(J({EygMpj)KKsCdfpoP^dv}<CRcLvLjBMToE z-rtCU>K|hGM@-(7P5%hX1Qn+bJowLdFUVw1x)iXjVuYJxq{=Q%QM)x3M>+))r>d_4 zxu}isY_>*{Mm`<3$RBS!gl<H2%HU%a=V3&&cX{J9hg|GzFWk``%tOrEGsTtD5x;FB z`5a84z-?H0C~UxG*%2>hQqZfY3jfsG5Vozs-G87k4nrLz(<(7SuX5#3Z}jZAD!2MX zaLX*;Kbz+x0q&p&Z36$OW}M|b2R9$~k;~bN#0u~}&1Z(bvaoDTK``#5nJw+E9{_|q zQ=%TY^^@`Npf=-4Nf}}`;8$eS_Rjpqko#u(tCU3Y>9v&fw72lEvDWhPaB#Kzr4FDY zQmUOi_?ROSZ=#w&NnRMq8i@w?265+6=mo~*p<0Vx55K-b0eBbna|6$n1m74Lc9H>l zl*zryB=1FtKWWhSc_sp8z$tyj+-fZi$o}Z&0e)T&sZn&cD19oS*lW?fTtCdA`LgL- zVt`A9F=H9#MJK<dO7z`izxZA`wA%-=lN}6E(8ZqT!>wnuu!Xe&wXOs`Iz!LKcKuSE zA!>uHgt<k!;~J$16Gb4jcw7cN(s(L$;<qMu9BFf&cK#~@#<;fbZOHw(AjQ#t7t^<q zBdkGI>K-6BPi`xahs`fJt!bhbh7?C9dhsnX7B7d0%wG&i)OU8?fr6@=t#nvb{Do?Q zKq2jgJMFdpJ0cP%Z+o51$Pu?<Qc@HZNW(kPyEy1|UL~eYcC<3RdQynN!H&r$63kS& z#l9fAni9gI<-o(*?l&a=C4Dp5ndqr9e1)1dg8(Ki;$f1Jm>b6YCUc)V6`dwxfq)Y= z!~CPY)!Jrz!d*^Gx(CNoq$MHqq<E}Sxvw>dMw7_flfN&9rHW{!5))E!Df)Y;@0Q#R ze`<n#T$j>t$?&5lAi!1b__d>MSBQ0QMM65YIZlTsmb+f5ooUa>;bf?Fz8mR}g%Y|a zZ&_nl#(l(}sjZcrKrJZrn4Lb8xK*I^Lnqc;KPLc*Z&4_(7}f462LprbnPiGZY}y}i z&5nK|?woD+HU4-^lhX6GRj%Px9ehZ3gJ8N3%lrMA|Cb_8(Y8XT#*0&o*jagWH(tdD zIZ7uB*;nb$(63Lg^Z;bY3Yndmzhac=W`-=>+^!Rp6|bg-JhNT90TA}-Y8AL*M&Uye z9QRC&$j~|+diUC#pIVEmv$ZV2RoerD!^1zm1UfhBYu%-1-ZoKleVa)-U!XU=?SCLB zp5~;<NUDmvM02!^@xu9xz#sZZ@-YGYBSxB^ha^6=Vv=|;8N9Eie`q<MEZyd-7Er~o zBPp#CGqMW48QT#$Pg=wW3t5h<6Il$(?d#7zA3Jx$%jo%nB1jq1cBeTxOwIz!C1mc~ zcZ2dsqL@@P&-(#yfDBDPOLI&nb-_B7(+)!1OFa$LjWu;tl!pMbdkHni8}3uGDFpeS zcx1WZ`=IMM9X}Zp-#|ve@2^NEL5?tz0l9kFxO$ms`?}e9n*35e+G5C15h2bw?w7H8 z_XiUxPKWg1q_Fby?KAO0>E6$dUHT8PY3ty!MMd?#-)%`#^A?;bVm1gn99x4aucb5Q z02m5NJr1hsl(>7HMf=E&0FT;Dw>>o@&C4hcpAbO&9Us`gOYUZ)b=zdO^{rSYD?i*n z4JDJs9-${l2ve@4W<4v$?cK(T12^EX^d6UTE3Ska&3p{Seo(1zlO>D|-9;^}7WPOB z6wZ%w_7C7J0iK(-Z*0g*7N<XW1!gei^pcrIv!vZ?$Rdp&s0W-FTcG>LDp1l0tEG(< zT7bGa&0-A3E}eGO`EA$$M1}fn;p!k7w*{x*9IEM3ZFjiZ+IhK48(|3|@$#N8VDFs? zGy2QVhuM1-)F%sfFCC6rR&RA$xt+FPXSdOVvk5zgvnptVu&`T&_A|utw4xu+hi=hb znBIn8!KfCEDDJg5#_>%BCXa)!GZXiHMbH456+O&rjzL8uMfrV~akH7fzRUQ(cH?H_ zPsf`b-F2`3+wLHj`lIvz)A^=UzHa({n+mc{UUU6_TEIVDZx+*QE&bbgvHr8}|5Mg~ z&T_NxTo>Norid)k$UpF3%J84_+@$a8GWFZ6k<t9OqV?wtH|gb?=6{<l{=bm^pYAtL x-8BdOHk?~G-2a!6{`9_yjO&y6+jx*U{g-p9u7rt<G71VV@&O_VvG3QK{{u-NEOP(= literal 0 HcmV?d00001 diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index b662493b..18a3d017 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -17,13 +17,17 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. +import os import unittest +from functools import partial from tempfile import NamedTemporaryFile import numpy as np import pandas as pd from caosadvancedtools.datainconsistency import DataInconsistencyError from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, + date_converter, + win_path_converter, yes_no_converter) @@ -45,6 +49,25 @@ class ConverterTest(unittest.TestCase): "Müstermann, Max") self.assertRaises(ValueError, assure_name_format, "Max Mustermann") + def test_winpath(self): + self.assertRaises(ValueError, win_path_converter, "/hallo/python") + self.assertEqual(win_path_converter(r"\this\computer"), + "/this/computer") + + def test_date(self): + test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") + self.importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) + + xls_file = pd.io.excel.ExcelFile(test_file) + df = xls_file.parse() + df = self.importer.read_xls(test_file) + assert df.shape[0] == 2 + assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] + class XLSImporterTest(unittest.TestCase): def setUp(self): diff --git a/unittests/test_utils.py b/unittests/test_utils.py index 46ca537e..df1e491c 100644 --- a/unittests/test_utils.py +++ b/unittests/test_utils.py @@ -24,7 +24,7 @@ import unittest from caosadvancedtools.utils import (assure_absolute_path_in_glob, check_win_path, string_to_person, - treat_win_path) + ) class Assure_absoluteTest(unittest.TestCase): @@ -63,5 +63,3 @@ class PathTest(unittest.TestCase): assert check_win_path(r"C:\hallo") assert check_win_path(r"\hallo") assert not check_win_path("/hallo") - self.assertEqual(treat_win_path(r"tag\hallo", "/lol"), - "/lol/tag/hallo") -- GitLab