From 5ceee474254b96a769ce908d307e9bc9efd9a2ae Mon Sep 17 00:00:00 2001 From: "i.nueske" <i.nueske@indiscale.com> Date: Tue, 12 Nov 2024 09:42:52 +0100 Subject: [PATCH] ENH: convert.to_dict() now outputs encountered type errors in a table --- .../table_json_conversion/convert.py | 182 ++++++++++++++---- .../data/simple_data_broken.xlsx | Bin 0 -> 8982 bytes .../table_json_conversion/test_read_xlsx.py | 21 ++ 3 files changed, 170 insertions(+), 33 deletions(-) create mode 100644 unittests/table_json_conversion/data/simple_data_broken.xlsx diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index 48a0f676..e874c535 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -46,6 +46,114 @@ def _strict_bool(value: Any) -> bool: raise TypeError(f"Not a good boolean: {repr(value)}") +def format_exception_table(exceptions: list(tuple), worksheet_title: str, + column_names: Optional[dict, list] = None, + max_line_length: Optional[int] = 120) -> str: + """ + Given a list of tuples containing a row and column number as well as an + exception in that order, and the title of the current worksheet, returns + a formatted table of the exceptions. + + Optionally takes a dict of column names, if given a header will be + generated for each column and exceptions will be clustered by column. + + Default line length is 120 and can be overwritten by max_line_length. + + Params + ------ + exceptions: list of tuples containing row, column, and exception + Data to be formatted + worksheet_title: str + Name of the current worksheet + column_names: dict or list, optional + column_names[column_num] should return the name of + column column_names. + If given, exceptions will be clustered by column. + max_line_length: int + Soft cap for the line length of the resulting table + + Return + ------ + string_rep: str + Table containing the given exceptions + """ + def to_char(num): + if num < 0: + return "" + return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65) + max_line_length -= 40 # Estimate of Field + Type space use + + headers = {"loc": "Field", "type": "Error Type", "mess": ["Message"]} + lengths = {key: len(headers[key]) for key in headers} + new_data = [] + + current_column = None + exceptions.sort(key=lambda tup: tup[1]) + for row_i, col_i, excep in exceptions: + if column_names is not None: + # Update Names + if current_column != col_i: + current_column = col_i + new_data.append({ + "loc": f"\nErrors in column '{column_names[col_i]}':", + "type": "", "mess": [""] + }) + # Setup + row = {} + new_data.append(row) + # Field + if isinstance(row_i, int): + row["loc"] = f"{to_char(col_i)}{row_i + 1}" + else: + row["loc"] = f"{to_char(col_i)}" + lengths["loc"] = max(lengths["loc"], len(row["loc"])) + # Code + row["type"] = type(excep).__name__ + lengths["type"] = max(lengths["type"], len(row["type"])) + # Message + lines = str(excep).split('\n') + new_lines = [] + for line in lines: + if len(line) > max_line_length: + words = line.split(' ') + current = "" + for word, next_word in zip(words, words[1:] + [""]): + if current != "": + current += " " + current += word + if len(current + next_word) > max_line_length: + lengths["mess"] = max(lengths["mess"], len(current)) + new_lines.append(current) + current = "" + if current != "": + lengths["mess"] = max(lengths["mess"], len(current)) + new_lines.append(current) + elif len(line) > 0: + lengths["mess"] = max(lengths["mess"], len(line)) + new_lines.append(line) + if new_lines == []: + new_lines = [""] + row["mess"] = new_lines + + dividers = {key: '–' * l for key, l in lengths.items()} + dividers["mess"] = [dividers["mess"]] + + # Fill for the messages is set to 0, if we want another column or align + # right we need to use lengths["mess"] + string_rep = f"There were failures during validation of worksheet '{worksheet_title}':\n\n" + for row in [headers, dividers] + new_data: + string_rep += ' {loc: <{fill}} '.format(loc=row["loc"], + fill=lengths["loc"]) + string_rep += ' {typ: <{fill}} '.format(typ=row["type"], + fill=lengths["type"]) + string_rep += ' {mes: <{fill}}\n'.format(mes=row["mess"][0], fill=0) + for line in row["mess"][1:]: + # Front padding + string_rep += ' ' * (lengths["loc"] + lengths["type"] + 7) + string_rep += ' {mes: <{fill}}\n'.format(mes=line, fill=0) + return string_rep + + class ForeignError(KeyError): def __init__(self, *args, definitions: list, message: str = ""): super().__init__(message, *args) @@ -205,9 +313,13 @@ class XLSXConverter: # # - data: The actual data of this entry, a dict. # entries: dict[str, list[SimpleNamespace]] = {} + exceptions = [] + col_names = None for row_idx, row in enumerate(sheet.iter_rows(values_only=True)): - # Skip non-data rows. + # Skip non-data rows and save the row containing column names if row[row_type_column] is not None: + if row[row_type_column] == "IGNORE" and col_names is None: + col_names = row continue foreign_repr = "" foreign = [] # A list of lists, each of which is: [path1, path2, ..., leaf, value] @@ -219,24 +331,27 @@ class XLSXConverter: foreign.append(foreign_column_paths[col_idx] + [value]) continue - if col_idx in data_column_paths: - path = data_column_paths[col_idx] - if self._is_multiple_choice(path): - real_value = path.pop() # Last component is the enum value, insert above - # set up list - try: - _set_in_nested(mydict=data, path=path, value=[], prefix=parent, skip=1) - except ValueError as err: - if not str(err).startswith("There is already some value at"): - raise - if not xlsx_utils.parse_multiple_choice(value): - continue - _set_in_nested(mydict=data, path=path, value=real_value, prefix=parent, - skip=1, append_to_list=True) - else: - value = self._validate_and_convert(value, path) - _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1) - continue + try: + if col_idx in data_column_paths: + path = data_column_paths[col_idx] + if self._is_multiple_choice(path): + real_value = path.pop() # Last component is the enum value, insert above + # set up list + try: + _set_in_nested(mydict=data, path=path, value=[], prefix=parent, skip=1) + except ValueError as err: + if not str(err).startswith("There is already some value at"): + raise + if not xlsx_utils.parse_multiple_choice(value): + continue + _set_in_nested(mydict=data, path=path, value=real_value, prefix=parent, + skip=1, append_to_list=True) + else: + value = self._validate_and_convert(value, path) + _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1) + continue + except (ValueError, jsonschema.ValidationError) as e: + exceptions.append((row_idx, col_idx, e)) try: # Find current position in tree @@ -250,6 +365,12 @@ class XLSXConverter: if not fail_later: raise self._errors[(sheet.title, row_idx)] = kerr.definitions + + if exceptions != []: + exception_table = format_exception_table(exceptions, sheet.title, + col_names) + raise jsonschema.ValidationError(exception_table) + self._handled_sheets.add(sheet.title) def _is_multiple_choice(self, path: list[str]) -> bool: @@ -308,20 +429,15 @@ class XLSXConverter: if isinstance(value, str) and ";" in value: values = [self.PARSER[array_type](v) for v in value.split(";")] return values - try: - # special case: datetime or date - if ("anyOf" in subschema): - if isinstance(value, datetime.datetime) and ( - {'type': 'string', 'format': 'date-time'} in subschema["anyOf"]): - return value - if isinstance(value, datetime.date) and ( - {'type': 'string', 'format': 'date'} in subschema["anyOf"]): - return value - jsonschema.validate(value, subschema) - except jsonschema.ValidationError as verr: - print(verr) - print(path) - raise + # special case: datetime or date + if ("anyOf" in subschema): + if isinstance(value, datetime.datetime) and ( + {'type': 'string', 'format': 'date-time'} in subschema["anyOf"]): + return value + if isinstance(value, datetime.date) and ( + {'type': 'string', 'format': 'date'} in subschema["anyOf"]): + return value + jsonschema.validate(value, subschema) # Finally: convert to target type return self.PARSER[subschema.get("type", "string")](value) diff --git a/unittests/table_json_conversion/data/simple_data_broken.xlsx b/unittests/table_json_conversion/data/simple_data_broken.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..361953f660f12cb37d979ff3b4c49895265131e3 GIT binary patch literal 8982 zcmbVyWmp{BvNplp-5mx9Hh6&G?oM!b1_lrA4gm%TPJ%;lcL)x_g1c)V1cF20gMIIJ zc24fO`^T+*dZwpms(P(j)o)eTJ1X)pu((i&h=@>$iuSrtzX{6Iu@Tt9-i3|z`L#T; zTd9W~HSEMYis_1bMFjCfVXJ~nD-{=c7p)^nePmv5@Y$sV4mN6RO;4P&pZ|kt!K#8I z%PKp)ykVrKZX|R&y_s)O@5qC*7aPy$N56h7K4~?OI1@NtS?>JHhUjp<x9dAO<o%zD z@{=$2%#O^Vghax@2AUq659SfOA-YE~*v?j3ALspH+d>o_?6@6eDM;T612?s)lc?MW z<rMOKiul^F(JaD{y;d!Jn@848Am~J>>@td2sZ_h)2l1yG<EVZ?Mh@P;$7yNa1v{CZ z!;iLMhp=0@dMncS$|!u#!6O42$wj}(z8~4eQISVL3{B@s|Ni8JF=!|#mH*XE7*D_O zv}f}Gf$dB|AUjr12m4sHrMN|QOy3i23BPdtCR|JtNtnrs=@Huxt>bY(D%Lo%1<Szl z+mkFlbNLzf8H5O-)GdCivHM*Oq!XgL4Y_{WBZOX<A&s#eR(hjG8<3cwS;?0Y6>L*5 zytpI&$*y$4RhpVN#$7?Atc`=vPv0K7;^KocVp>b&0%qIOQjd-j?ulgxR0FmjF05JS zNBu;6lfp*?aXdJyDzzh(z`B<a@S-o3Kyrp8HSY=ZyUgSV^W1)=Y4N#>!O=Wyx*d5R zTR@I6+@fYU0LAP967p{MiM}wF30QbRp8WNgO2H=DXCxstI+5eXRY-zhzI3CT-9t=J z@-Ca)8zvqFW-3$Di;Xj4bX)#AJ<BP?`hv8BpdtoV7q=LsihUn9;g&6&afe70QVjYY zR(BL1?Zx*BpKT-r<!i&Gir>dT+cP)u%nEI)Fmigq`(SVor##@%G}eCqCd;rd#PrD= z$khM+tg!4fvTqv5D~Q8ao$ai!-zsQ}S=ri1%o~s2dKkaDPjO$?IgV?wgEzd62Klfx zlOQ@t)DoaKp_5Y_EJ5{IiU_TbA(^HZo8RE962Ey_o03Ix<5qNiJ1FUw^`|?-dlua6 z#~*d;X<@T{V-vlvtjUv3P6j6V(qi%q>rfHe+4bY!Qo#49vKbTsD%f=Crp^)4mp+XB zNUhr<DN_wYVC1plng`H-!-1)`*N(6_?j#vKiD|w@dXB0!>@^0Wr--_M`KPEt`>&{S zarLsdaCwfYV_ntC3?5v+wMr4c`6Xl|Bs9B3rh;JsoAlCEJ%_J#2neX6+&n;~jPtp1 z1en0yj&C((mr^LIA--{)F2WQ!W)VRDX13y=2@y<t_JLC4g4>w*KWVKNpxZl*9@)i} z=s4urcVbv30yXw`&RV_bFd)kgNC|N@U8Qh~a=H)uEt+j~a2467iR=Zk&5TtV1_{lY z07NAwD7<MhZ^LzbJ}YZ}t5#Ek!Qsf8xFI-V4{GT(KPB8o;~-K8*3HO|XE-Jj#!>oP z>jzFhuKbUid`_1RM_;YHHNpC%-MK6YawTDDMXTQt+&LO_imEf_uM~bo!J$dso{q>G z@x4-_qPd`H@IVx0fHI;{M}x_cSnJhfAuId70nleTc#by$tYkxAL1EJkHZ%o98r#h{ zr(gd1pz&>3L(3L;(d18<h2%pRTV#Y@%r}Z-oFJJA-iWZT75R4i2F}>hr6S_vqTjgX zUU^4}u49vo#%TRd(hpkOwESayJ?Fm7B7j0X(X4&$$KE#^U?o|mWRe6cy(W1>XcAUB zHjTVKC@hC&JH{)H59jE}*H`B{F4>$d%Ht?;Gi3E_P*;T|FS*I|Q_ib)jU}SUY+x%7 zsbD6fc8Qn=hq@Yr<grKIK9=;2@H-+1=&_!S4|C<q&rZLx*XE2D&EJbAmAyP8OV!a+ z%@jt`Uu8BS=*^_7H!CP>pthp;ej{))&7@!<-Sa{4K_oF{n@1V9j`!<I5ktaRoHMZj zaZ%WwP44ZcU;ZLj6FKJ=2@y(KwB0wU-T%K~bK*P#ga8bA41Jbo`SUS3*cm@alAP}< zPi`$buE;6$Z;3mV$J4HE?xhdsE|ngWKU>~Vo%_cuZY2+CN;`m~7gMDi-j+>Bmrb1J z?Jiy*j5=hOwNKS_0~?YAoQ9i4IzO8LkNX^IM@qh=eQ(|ih#-J-9EdN32>`+I`Fr}i z=NdyH$K+;VhV+G_|MUq+6AU4`PDgVZS8QsO=(R-?kl_p1SD!Ou$0Ju#njuSZR8A~D zh_VOrsb3tTOcW9Org8Jc>~+t5dXY669~+1U1Fp5S%!=O|{X{&GR=&UvWQO0iRNU?! z*d2&Lnz6{XA-wg&oTn}w9~vi=XlQ~9+*|mx(DdT-E`s2%R-Gm7rEDsUy+8*=iebzp zh2u2qj1r;QT#kX`>$!m&+ogN!tcCcO9(7~+ftj3UJ``Fr{i9ml?uvwqT<@ZHNztHz z-`E4<y?pI8H}6cWXQBGPbX_?R7fZGF_e1f`bNT(s1S?;H8dv64wsLw$I$u0v0ag{Y z(Bu;i*pUAd77+fOVO*>&EL>gK{(9qhhJ|)*1D8cUT;DaLjZ^WQ20}y%S=)Vcss(p? zw6sJ=t6O>&B0FmN3R>~O$~ER8p8i6rsWDoN7Y1Z&-rt)W+z?+27kpX5EGb^c>6|;b zCErNsf>bg*G=^tf?R6A!Z4z>H5B08L?f7Rt;5ml+XdqE72E*zr-b;Kt;zZPdyu&St z8$=Ax(iz0H(HVBj<&?#2AShtj_h`+dl!V-|_7wOiqGJtkQM+L605%rs5<;lZ&Nan= znwH+5D~H4B7RAcwa)|qymrBkq;LCjgvH2?0vsp6vaP{t~1l@h2IX36SrhvqFuS;@l ztBK_(&Qgz*yw_+BXWj>W5F6@FL{yhZEOppR&N_PekDV6oMz15h5g|+6T&PjeMO1~9 zABb8RWCc$v-=@yd1Xl$-GA@)ZdMMMGdc)j<!!**y;2)E5*VL3KODzmxJEM*js+1{V zmvt{<DPVD#VG=8Fj6LN|4GN~M>uF;o($|AIqxFHsHOmFe-KD7Sdl{`3odVv9rJSOv zWM41u4^b8kD#Q|*d^tbxPU!XvgU$;)5kg5|rRfzZFl^M*TU!{(P|e$Wxfr>8sCJZI zWJ1@Qhm_W40Hj+Ao8%SW9RlcFlLzgG;o3`G)VEHnzh04$x(iI}y*SyicRYFtQ_Bj0 z!s}4~gtI>LEoN+>M~;SYh`|&#yxb2V`_PI|k_Zrjue7aKLxuVyML`ID!2r}PdTN6h z#B{eNGon&xI~5HskbF}G-jZeP4@oV13#Z2^sVssm3~0=eJ2`R{*(+__n2*{*ZbiVi z;X*5zfBV(cz+j-7x)${KE*K^id>^D;#%sNR3v7hcaBsYUn;fkpfhQ5K6uF<0I!DK< zO^UdY?Y)qXfP$24A9R)o{=zT!6VK+^{6sY_+j~#Dc}v$DC};&@8efq0T8t2V`pR_v zT~RC{_qGj9|G*AW+nelQ?XHtUp!KN$Xn#tI>%xX6{k9HE(RVSzXdmdK=NrcGbc&U@ zJvkzGa(I9I4L`V|ZGObw-0S=ssH(rr)erWo4+eSQIY}O}t(6o#U@rR0TN$<&Q_vkC z5O~%*cq%4fvNVp1>Y#R=xSEJio!Sn8vdK;{?^0@gd69s24i_0{7u=FnEmU8CQ>7o| zRrTFxo<iH7@<Gm8<!)Zdw0%xklqIC1Ja`zeOLUzSGf3wf2E2rZlzCBoqm&m{Wnm`R z@TTt*TNKc-zY!O^hHeQBlt&(Yqb+<2q^g&|%^Z*Vuzwo0b%HdL47lhDo<lW;6}F~V zGdoGHcFKI;qGn^(_WjINa``ly89ar`E6p*^*!D${W(V~$LB<>cxoT7Y)^EFQLOj6E zP~q?(G|Ji5b^W%sbEoJ-*Szk^!7GR*h2`aAYfke71$mb_^9efYxpo5e!`0UZNaFnQ z8{^88h4C`>c?M2?vaGdH8&~JZ6NgP|J;G`4P>Tz3$J^J>n8eO#8=wRW1%-<Kf50Tp zzc4Ay;R%y42iDe#eq2lLIqpVPd$HGvFoa?zicJ9y+;&$14WwMvFSYR}wx>MkUK^Vi zj9ti`L06sseDKg_?qkXGSj(H3H%Y-Jj#q4*;Xxzyo@O{*nZE}E{e{?LA$jxCRI#V? zK3yd@j%Neb$_u_N-S0_^DU}9x!+vhZS`@6pOEWr5!Va+GsNhqfryh;T;e;iyfuh7l zY#Q&hLS*bM@mH(%=5s|w1O@X}c|LQD$FP%9qkk_VNz4zeKY6G<x<Ngrn*N*<6<upz zSZC#7cS*g;k^VKHQIL42VkVFDO-~*l%?W>;WW)+6Y0j80aNSW><~sO%GBYOsRQU^{ z%pixJD#a|Fh)I+QtEh)(>`kh~*iVvMB={&BJI<Q33<jDU5#+ua5>y1UpI`iHi4n!1 zA2HjMnR8-Y#jjj`fJi>pu`?jB5uT>f_OlN!@KD?<T&WYYQevs-&P4iin13QP-gbl` zXLat9Pa<8ZaE=g)`*r9$r$-(%cuUn%U}DoX@M1AczX=Kq!P|QE!_&T>I*aOke&oCN zo9wtI>0u1>@>Odm>HRbXlZ)b1v_p=K%ySew$A`r-d)Pt~<qGn`%sf3W3$9FM<{hFX z6ICP5>1FIQmr<rf*jswic!5RgWP{(izk<M$z2?oPpIt9ps`*@WfmQQftdtl=k>kAU zX)A|UD<=Nym%|F(Ei6@o0t&BQ&M(GSk@v}TKy%4oV4pZ4ww)IXz$5F1%qGAjVdZlt zS5kLV$*EO`G^>I#z{ES*7SVN79r0V6rKOZ~H0NZQAs*NxSCH3(+(GuKr}eGWqK~?& zIQybshv>*+%?E527~UOiCz8eHPdPR}9quL?7?`fiu3s@<g8Q<SCT-A^G4nIzFWtOC zSDb?w5t_F}*cP1qJGa?WNe^jPwDw>l>zDdE?Cm5F7?7q|$AcKqVfJtX@rwe>U!fl< zjifNRx}RCZqebKmMNGDL)20Kg!p#BheE#tm-KeZ%TMX#^Z(K3!FmWCi)Jl-93fWsS z>ZE6tno@C2zJ>TID3C<z(l}R82@DLjO%6)ZR9n-sl0=qCZHm)W+tX?hP0nKDL*#VX z#-eAV)m@fV4-1s$4GN6Y6Lf*#)tE0C3DK73lQH@{G}@zs;g;qLF`xXRNRyTJB;|{y zbOhYQ$}<H_t{}scl)eREu81;?ICvt)<f#~z^n?-1ZMk67<l)I}E&hR2VhJG^zrzvL z9ap0|aEoFxuoCA`xDrQhgXJ(pjW^Xz43LrBuP&X_y;=+OJm)(r102nes+S)5W~uNR zWfeOJqkVLI-a7;~88Ki8p3F6HM{IsCfLC2^|JqdMC>)FD_R|K_?WcuIQB>3I*Mm;` zgL4x3A|*yZ`Ye((2j>)JRPxQZYaHkHLno@xh3BJ2RH5T&b)4D@AlgS^7(2|J%+_L> z2I{&A%0plUylO}VR|lis4{^LmAoEL$;FrKcJ<h;DK@CfgUxR+m<IUV4GMAB{mg5c* zpVrlvNic??d$V?Mi`5|jq}ee~7t?z$kZ^2i7M!r3L~;AUBiyi9pr(JveMLoz+~$ia zUmwT7r!yh9a3Tgn-_OIut<1gO5fQ^{%oZJ+YZ?b?CZgq)a25tn_#JN$R9Dr|y}pjx zn`c}aQ|3anE6Qt1+7x48>E4$Q(Tv@qC3lqA^AMB|+}9j&r-!jSkLJfIU!9J^56oe{ zWWl4s{A9l#KN`Y0hPkos7}0iDJO9{1)ple=A}pS4SRLNuBu8^9FT%9geyR$#LJ9=x zG-;V%uy0O=80(&58sI<bOF3NAB2xb3$iYb=&mZ2fTYJ4mHVoqY*pYM5%uuNUY0Da( z(_U+UbhcArDaFQnf_w_Gq8<%L)>Pl;jLJ>8qu%)O)S!ojjDE>7st2@8r)?;Ib#int z*E9<qIn&M@!g$7igXW6%7C0y<B;x-A{saHQf1M%l5+7#!xOSE2pbfRH5veRdx+YVD zznU3Lujvvjqau+8O9+){@BfRdOA=QkBRgh5AVO@b+UUyeotW9}sc>YL&~6aBfXl1$ zhHu7)wE+{yUw%2azJ2XZMuml@@`W+G8AGRuV$WXzBW5iU(d-}qhRM>A*5d81*XMYr zAc^RL_n#F8J%eabZ)R}-1B?^Abk%bXjp2qth;Bd(t9v?O9YSmhwzN)p_T)62I5S!* zAFKoo+|}>F1UD#r0wSf7g2|Yv3knC7Q;qj@y?Y8(Dh)m<+aDHmmD+}6vm+l9yRdst z+z-5+GR43o*f=-2X0Chc$M<2cs3PfhjdY>hkW^XNzZ6(U&*?nKT(q@W@ES7~w%<&T zfU=cgps`D9UXg|aui9MP`Lz(yuA|q3i|^efdeNC9Km;_ZO;i2yN<qwG96EGsJN+j| zRmP`Qn9y2UZkr&~C_H$j1}q8BP;uQBh;?m0$0Z%Ql68d#Cw$6<c#Vnk0aX{(VxL+c zlur!7rBH~VOUYLtd=85yDGKQTlk(gt-X#hq>RrXpL7kdQRa7Ro#pB`YjE<E$CZWm+ zm?ay7Gu;a=d%{+A`klt|()=lv+d>&3elOgL;I5VdT`qzw=b3ja-y)1!Xl-j)(!d74 z3^c3?2+%ZM%|W|f%R&>0nrF>A2@*q?L{){&==c@f^t$d4`xsf8r&*G2kWZXZJ;@Yj z;8$yp+HlTpaB1;w^&&~~G&BzPPD0FO$u+fdBVb<?_7xB30&rY0L_a61_~M<3&LKcp zT}Waee2?kX^pi9eY(+=2s&Pg+l{r5z?5^Lo&u(83A@FbI^7Lr!T@Z!yZ&k%PPv25# zknP0eS!rC<z-6=_CP@4QSRsVlS@&m@A15AO&u3Pwpy~@sDSxtJ=9SS+C`nD2$aJcN z`sTNq{ni`LFz(u%=It-_!H+~WbyQjEy^=JhGZ9Tb=o^6@1%oTRc+Ga9Gu#plSY+zk zmV-4!;7L?nZgJL}&`iV?Z}kt}v0_URV_ldn@SI#B4OBusBa%S#8PsMUd@!}^qhjcH zUZnOqtX;J_tU?i<&y2kVv4k@tK+kGHhLeZ287t#HZXSHoFhUJxJKDzLz!=nI2Xb`_ z5|dJU8iizsUK}QjHLV?!ykVY?RwDSL?+Vb;ruSaof^J^dmw3trDAT-h-#YPDg-Iin z?+rUgV=E($xLMPKIhZZu@-s{BA7z>m`SL3gty-fr@EkSSma<UJ&_ek=SZv3wRGA<X zaZy*Sap2O!+um0ZrEYyECE59Guvp?0T77?xnPgdLH}U#1B=zFd2x|?SGR`e{*)i6Y zcc7LNSKONUqIop;71Qf?J7KcLx+@C#t!H_`?>KacymyE`vxbgw_xUK2o1h52*kLlL z5!PcQ9`(_2Pp!z)*LNAkse);bWp59&H(79k9y<J)o|eJM%1=Fk12WxS%bHrZk}&tu z-+(36jDz>Fen-Hfitt*D`|4a`*+R~^HBXFLL+Fi93ihXO=nED%m9Dc&BA+oKMPSJ$ zKDFfCZ<I~*6cnL15I9lyPaOW?iRV3H{+)OAW@kMwWviSzHiCa<(M3Eo&kk4$4q9?o zO}U+4_#Bdv0djQtI=`uB$(qN3>b91brptDbd(I{tAO6D0`SxDY&8Ir~8t0#I@(Dzr zyTkw0NB_M?{8vxCJ?URP^+AvH29oEVI%U4WfM)=sTZXLz_QH#XMk7o_0h>}jg;%-_ zTaS<E{-qZtq`e5(SE~TV8d6w+t&CMS<W+YzkNBzzf*E`SvMHPfU*iNvQsyd-Fb2Sa z4k@>laISpD>{PLtZAp@7M*DY_6t4$Lo6;8-X@&D#$ktQ1l<orokCT@UOC&y*yS@?x zY&tS=126aiwukIY*WYXNVa{rNy%g`1mAxAxPUF`-AnP4+90;{-918phwJjW9!05XV zrGV=6Dz8`w^=q^d*L#@S;_U_LV&&(A0tV=?Y&^N)^*=T|asRvFX>9_wFjsd4+c;YN z)%Q$Jkq0fYqXwMZ;RT_$&bZea6TTUc#OKlrs6)cOs#5PBjwzo_5mZ%5_cv;;yy0RG zz)1Xc3P=RMy;z<$jgPD2#_=MjQ_EFtm?+E~Z1wTG6h}TYMlLSzjufOGDm#*w+m^)S z=1NGuI+i3&Dy@1EWgkuNv!FW{fT5?|KP?av#3Zpceaa}?F@R=G6IzpX+I{3nD`|DU zhHeD{^_!&?<kz0h48zsu2A$@HO&qIQ*3O&>Kh}0AndKBXV>}?Njunm^goRcV-GImm z)hy9axF;x4mPPZvZ3$(tU67+u7&T+Z(Emt3xZa#Qw(XI3HvF(phUL!SqXSaW)kXyM z!64%)t(kSdVvhGV<P$3K+TXMs+f0de7(<y0Jwto>IVc)a5amdJbhFsoQ(`9)snO)a zkU(~A))s*b%9ve&9=n1SDNjloDx=qO=QXuE1FE^wadY5)<T39%!DbwUn&q~0vIXr` zY3bXm#Mt?-9vO7eWOR8Tt06B>o$C#!h&lilpqN9g!9;c?pptcNCZTRKd9nNgI^@9f zaL;5S)Oo-(8BQ~ci=O%t@fu=04lQx|wmPbE;!VrF%8!Pv6=f}_a)+F_FH>(Vo#$$E z1byF5FgRad4H`a?DhP<^_`&qcFQA~B5&!8n!g`7hbC8(|806%_W(ERVJl7eaY6i-m z*>QcZv{`&93<kmr7o<K8CehNHa&^pHLeJFOEkd829P{v~$ojPIWly<urYm-85FK|3 z=pK+Jhz3zlv|+1Nx}3LV4)8L9vu(PE<;3IA*C*<lhMPXL)%>t#hMARjQf2=d2Tsb1 zy$Ulux`xMf3C^7e)YsJFpvW~a>$zsogHcK{55n`Le{a>q6Qg(%5tkBlm11c((l6qK zO2;^{YQjl*oqu7R+<KjBG6s>m4GxU{8h12fq;&cDm*4Ao3=*6Dob|cB9(ABCi(5Mr zdhuXftcqix7p5wEu0wO27t!iyii<|RtUy!}ms0o$Wa9Ka{g7Ume|sg`Qd5oOXtqcM z9G_#s=|<Sg8yYO9q4T#S_=boPR#&n4$}>eQX+NW#&XZxCy%EU?zy{KX@9#jgVy20# zvSQLS0&T)1B|vK?z4@Wr6TB^1_4)y~CC9sm7t4*n3(8)@TkJ(wf`^-TeZ0>L#cqt( zVEeRC%m2uRVg9~QCQeSz>y-6Q5%l?)IlRNu?5$S>=tZj#*V!y0*}QVKZe)_zAB3>c z0hk{BUU01o<{Zt<n%&WTlPx=f5|*)h!^I{C3Ea?<skNK5x=LYF#is@a0qk-mr4g+a zDF(wE(S0D8H4^L}B<~1Ukhi+7a&;Nimh#7p+bubg{lbsgWzZ{A;)xY{HFakR3lXIP z23PPRSbJxv9tfCNUat@|tW468(~(&h>A=eUczJAZUYbN~Jw^NLO@h@PWSfOveqYMZ z+w=8Fmtr+z`p0yKe4?TUJKd`bGk4yJA_~6%={80ywmc2$O2m%vtih$|IoAgb2w)7x zpSYV>?||*uZ@5=V-M3F($$0YJ|IQ*lC6V6(+|9)m<nWj4#;TE@LOgV1r3i6hNdqC_ zYh5(oD~+pns|_z)K!b|WzTPxJ-{Zn5gq5a*eu;m=T)?tzW@83Gy+lK2cnSJdz$e>1 zv~ElulDh-X(&|2^PXiY*^dg#t(vH6f*S27OY#l9?-;9+;5~P;xti}~L_Brin^F@+? z#*FGhD9#PF5C6^SA@?tNbn~^7>s1hyw?m$v)e^z;yf$W`0#ctwefqTj^St!LL5{8# zj;=-;UQQM+2G8>v)znk&p?qeutUb9y9q9YtyL<mQE0)h3hu^5{OO_89Jzhh}VvU|X ztda7i-pM?XLg&h;u*mw<CGbHrPRcU^TSdX;!!W7P7tmK1t}N7=oBjZKu}Uz1X&Wy= z2%lD9KDr$YlLoQ_-NnXKO0P;oF5SCRCr_#OTI3LA-))hkBV^>3Prl%G^d-F&DCwb{ z1~$2!2x4^<DPP7U9lCNpl=^Q_6}XS<tiCQ`Ff|9x$?V^^?cnl;F`&iz&7C{RPVIBa zzT!gfvXVC9ktPz=Kcoy#h>)9M2YHW871mV@MK?q<loK@F&Ff9Osv^Sc*A}2+9>u^7 z8^Enw#Ae{hGfnf}j)82Qe9L(AU(2jQk=mrFj?=r}Bis@`;!qU%Z=1b(xO2IY*Y4Jw zhVH!LY4d1T-WK*qjStAtb<p<&yd-tp6!MTLqQ?xp_;|;3gbQ;qSwhv}OM3lb(f*(< zMS28eBOHyv`Sre@zVdmw1g7zUj!z-D4gb7cFQ9Ru{$~AuC%T_m|9_X?{sZu*;_qDM zGY|f^Fg#uA4_5q7<=@GVXS(xm@qMZko|OM4KmSzyy+`@Xj{Gf=2>-h7|KLjgwDS9t z?!Q|hL;a6d{_1Z2Y327i@DD54|Ix}{&&YpT`MpSbt{MLpNrJzu{4W*cpLTvvr_UwT z-{SZ*xM!#SSz!HX;dh*WPP>0g%+tXBM-u*1{dc%~P6mGqH|cNc|D78C)cze*o+0sX taUlCm+w5Ne`KN*3!~EY3NIr3F{{e|q<l&y~h^Kb<^Y#>X=M>MM{vWJ`rHlXo literal 0 HcmV?d00001 diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py index 0eec2e9c..6ee744ef 100644 --- a/unittests/table_json_conversion/test_read_xlsx.py +++ b/unittests/table_json_conversion/test_read_xlsx.py @@ -27,6 +27,7 @@ import re from types import SimpleNamespace +import jsonschema import pytest from caosadvancedtools.table_json_conversion import convert @@ -112,6 +113,26 @@ def test_missing_columns(): assert expected in messages +def test_wrong_datatype(): + with pytest.raises(jsonschema.ValidationError) as caught: + convert.to_dict(xlsx=rfp("data/simple_data_broken.xlsx"), + schema=rfp("data/simple_schema.json")) + # Correct Errors + assert "'Not a num' is not of type 'number'" in str(caught.value) + assert "1.5 is not of type 'integer'" in str(caught.value) + # Correct Locations + for line in str(caught.value).split('\n'): + if "'Not a num' is not of type 'number'" in line: + assert "J7" in line + if "1.5 is not of type 'integer'" in line: + assert "K7" in line + # No additional type errors + if "is not of type 'boolean'" in str(caught.value): # ToDo: Remove when boolean is fixed + assert str(caught.value).count("is not of type") == 3 + else: + assert str(caught.value).count("is not of type") == 2 + + def test_faulty_foreign(): # Simple wrong foreign key converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"), -- GitLab