From 5ceee474254b96a769ce908d307e9bc9efd9a2ae Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Tue, 12 Nov 2024 09:42:52 +0100
Subject: [PATCH] ENH: convert.to_dict() now outputs encountered type errors in
 a table

---
 .../table_json_conversion/convert.py          | 182 ++++++++++++++----
 .../data/simple_data_broken.xlsx              | Bin 0 -> 8982 bytes
 .../table_json_conversion/test_read_xlsx.py   |  21 ++
 3 files changed, 170 insertions(+), 33 deletions(-)
 create mode 100644 unittests/table_json_conversion/data/simple_data_broken.xlsx

diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py
index 48a0f676..e874c535 100644
--- a/src/caosadvancedtools/table_json_conversion/convert.py
+++ b/src/caosadvancedtools/table_json_conversion/convert.py
@@ -46,6 +46,114 @@ def _strict_bool(value: Any) -> bool:
     raise TypeError(f"Not a good boolean: {repr(value)}")
 
 
+def format_exception_table(exceptions: list(tuple), worksheet_title: str,
+                           column_names: Optional[dict, list] = None,
+                           max_line_length: Optional[int] = 120) -> str:
+    """
+    Given a list of tuples containing a row and column number as well as an
+    exception in that order, and the title of the current worksheet, returns
+    a formatted table of the exceptions.
+
+    Optionally takes a dict of column names, if given a header will be
+    generated for each column and exceptions will be clustered by column.
+
+    Default line length is 120 and can be overwritten by max_line_length.
+
+    Params
+    ------
+    exceptions:         list of tuples containing row, column, and exception
+                        Data to be formatted
+    worksheet_title:    str
+                        Name of the current worksheet
+    column_names:       dict or list, optional
+                        column_names[column_num] should return the name of
+                        column column_names.
+                        If given, exceptions will be clustered by column.
+    max_line_length:    int
+                        Soft cap for the line length of the resulting table
+
+    Return
+    ------
+    string_rep:         str
+                        Table containing the given exceptions
+    """
+    def to_char(num):
+        if num < 0:
+            return ""
+        return to_char(int(num / 26) - 1) + chr(int(num % 26) + 65)
+    max_line_length -= 40             # Estimate of Field + Type space use
+
+    headers = {"loc": "Field", "type": "Error Type", "mess": ["Message"]}
+    lengths = {key: len(headers[key]) for key in headers}
+    new_data = []
+
+    current_column = None
+    exceptions.sort(key=lambda tup: tup[1])
+    for row_i, col_i, excep in exceptions:
+        if column_names is not None:
+            # Update Names
+            if current_column != col_i:
+                current_column = col_i
+                new_data.append({
+                    "loc": f"\nErrors in column '{column_names[col_i]}':",
+                    "type": "", "mess": [""]
+                })
+        # Setup
+        row = {}
+        new_data.append(row)
+        # Field
+        if isinstance(row_i, int):
+            row["loc"] = f"{to_char(col_i)}{row_i + 1}"
+        else:
+            row["loc"] = f"{to_char(col_i)}"
+        lengths["loc"] = max(lengths["loc"], len(row["loc"]))
+        # Code
+        row["type"] = type(excep).__name__
+        lengths["type"] = max(lengths["type"], len(row["type"]))
+        # Message
+        lines = str(excep).split('\n')
+        new_lines = []
+        for line in lines:
+            if len(line) > max_line_length:
+                words = line.split(' ')
+                current = ""
+                for word, next_word in zip(words, words[1:] + [""]):
+                    if current != "":
+                        current += " "
+                    current += word
+                    if len(current + next_word) > max_line_length:
+                        lengths["mess"] = max(lengths["mess"], len(current))
+                        new_lines.append(current)
+                        current = ""
+                if current != "":
+                    lengths["mess"] = max(lengths["mess"], len(current))
+                    new_lines.append(current)
+            elif len(line) > 0:
+                lengths["mess"] = max(lengths["mess"], len(line))
+                new_lines.append(line)
+        if new_lines == []:
+            new_lines = [""]
+        row["mess"] = new_lines
+
+    dividers = {key: '–' * l for key, l in lengths.items()}
+    dividers["mess"] = [dividers["mess"]]
+
+    # Fill for the messages is set to 0, if we want another column or align
+    # right we need to use lengths["mess"]
+    string_rep = f"There were failures during validation of worksheet '{worksheet_title}':\n\n"
+    for row in [headers, dividers] + new_data:
+        string_rep += ' {loc: <{fill}}  '.format(loc=row["loc"],
+                                                 fill=lengths["loc"])
+        string_rep += ' {typ: <{fill}}  '.format(typ=row["type"],
+                                                 fill=lengths["type"])
+        string_rep += ' {mes: <{fill}}\n'.format(mes=row["mess"][0], fill=0)
+        for line in row["mess"][1:]:
+            # Front padding
+            string_rep += ' ' * (lengths["loc"] + lengths["type"] + 7)
+            string_rep += ' {mes: <{fill}}\n'.format(mes=line, fill=0)
+    return string_rep
+
+
 class ForeignError(KeyError):
     def __init__(self, *args, definitions: list, message: str = ""):
         super().__init__(message, *args)
@@ -205,9 +313,13 @@ class XLSXConverter:
         # # - data: The actual data of this entry, a dict.
         # entries: dict[str, list[SimpleNamespace]] = {}
 
+        exceptions = []
+        col_names = None
         for row_idx, row in enumerate(sheet.iter_rows(values_only=True)):
-            # Skip non-data rows.
+            # Skip non-data rows and save the row containing column names
             if row[row_type_column] is not None:
+                if row[row_type_column] == "IGNORE" and col_names is None:
+                    col_names = row
                 continue
             foreign_repr = ""
             foreign = []  # A list of lists, each of which is: [path1, path2, ..., leaf, value]
@@ -219,24 +331,27 @@ class XLSXConverter:
                     foreign.append(foreign_column_paths[col_idx] + [value])
                     continue
 
-                if col_idx in data_column_paths:
-                    path = data_column_paths[col_idx]
-                    if self._is_multiple_choice(path):
-                        real_value = path.pop()  # Last component is the enum value, insert above
-                        # set up list
-                        try:
-                            _set_in_nested(mydict=data, path=path, value=[], prefix=parent, skip=1)
-                        except ValueError as err:
-                            if not str(err).startswith("There is already some value at"):
-                                raise
-                        if not xlsx_utils.parse_multiple_choice(value):
-                            continue
-                        _set_in_nested(mydict=data, path=path, value=real_value, prefix=parent,
-                                       skip=1, append_to_list=True)
-                    else:
-                        value = self._validate_and_convert(value, path)
-                        _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
-                    continue
+                try:
+                    if col_idx in data_column_paths:
+                        path = data_column_paths[col_idx]
+                        if self._is_multiple_choice(path):
+                            real_value = path.pop()  # Last component is the enum value, insert above
+                            # set up list
+                            try:
+                                _set_in_nested(mydict=data, path=path, value=[], prefix=parent, skip=1)
+                            except ValueError as err:
+                                if not str(err).startswith("There is already some value at"):
+                                    raise
+                            if not xlsx_utils.parse_multiple_choice(value):
+                                continue
+                            _set_in_nested(mydict=data, path=path, value=real_value, prefix=parent,
+                                           skip=1, append_to_list=True)
+                        else:
+                            value = self._validate_and_convert(value, path)
+                            _set_in_nested(mydict=data, path=path, value=value, prefix=parent, skip=1)
+                        continue
+                except (ValueError, jsonschema.ValidationError) as e:
+                    exceptions.append((row_idx, col_idx, e))
 
             try:
                 # Find current position in tree
@@ -250,6 +365,12 @@ class XLSXConverter:
                 if not fail_later:
                     raise
                 self._errors[(sheet.title, row_idx)] = kerr.definitions
+
+        if exceptions != []:
+            exception_table = format_exception_table(exceptions, sheet.title,
+                                                     col_names)
+            raise jsonschema.ValidationError(exception_table)
+
         self._handled_sheets.add(sheet.title)
 
     def _is_multiple_choice(self, path: list[str]) -> bool:
@@ -308,20 +429,15 @@ class XLSXConverter:
             if isinstance(value, str) and ";" in value:
                 values = [self.PARSER[array_type](v) for v in value.split(";")]
                 return values
-        try:
-            # special case: datetime or date
-            if ("anyOf" in subschema):
-                if isinstance(value, datetime.datetime) and (
-                        {'type': 'string', 'format': 'date-time'} in subschema["anyOf"]):
-                    return value
-                if isinstance(value, datetime.date) and (
-                        {'type': 'string', 'format': 'date'} in subschema["anyOf"]):
-                    return value
-            jsonschema.validate(value, subschema)
-        except jsonschema.ValidationError as verr:
-            print(verr)
-            print(path)
-            raise
+        # special case: datetime or date
+        if ("anyOf" in subschema):
+            if isinstance(value, datetime.datetime) and (
+                    {'type': 'string', 'format': 'date-time'} in subschema["anyOf"]):
+                return value
+            if isinstance(value, datetime.date) and (
+                    {'type': 'string', 'format': 'date'} in subschema["anyOf"]):
+                return value
+        jsonschema.validate(value, subschema)
 
         # Finally: convert to target type
         return self.PARSER[subschema.get("type", "string")](value)
diff --git a/unittests/table_json_conversion/data/simple_data_broken.xlsx b/unittests/table_json_conversion/data/simple_data_broken.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..361953f660f12cb37d979ff3b4c49895265131e3
GIT binary patch
literal 8982
zcmbVyWmp{BvNplp-5mx9Hh6&G?oM!b1_lrA4gm%TPJ%;lcL)x_g1c)V1cF20gMIIJ
zc24fO`^T+*dZwpms(P(j)o)eTJ1X)pu((i&h=@>$iuSrtzX{6Iu@Tt9-i3|z`L#T;
zTd9W~HSEMYis_1bMFjCfVXJ~nD-{=c7p)^nePmv5@Y$sV4mN6RO;4P&pZ|kt!K#8I
z%PKp)ykVrKZX|R&y_s)O@5qC*7aPy$N56h7K4~?OI1@NtS?>JHhUjp<x9dAO<o%zD
z@{=$2%#O^Vghax@2AUq659SfOA-YE~*v?j3ALspH+d>o_?6@6eDM;T612?s)lc?MW
z<rMOKiul^F(JaD{y;d!Jn@848Am~J>>@td2sZ_h)2l1yG<EVZ?Mh@P;$7yNa1v{CZ
z!;iLMhp=0@dMncS$|!u#!6O42$wj}(z8~4eQISVL3{B@s|Ni8JF=!|#mH*XE7*D_O
zv}f}Gf$dB|AUjr12m4sHrMN|QOy3i23BPdtCR|JtNtnrs=@Huxt>bY(D%Lo%1<Szl
z+mkFlbNLzf8H5O-)GdCivHM*Oq!XgL4Y_{WBZOX<A&s#eR(hjG8<3cwS;?0Y6>L*5
zytpI&$*y$4RhpVN#$7?Atc`=vPv0K7;^KocVp>b&0%qIOQjd-j?ulgxR0FmjF05JS
zNBu;6lfp*?aXdJyDzzh(z`B<a@S-o3Kyrp8HSY=ZyUgSV^W1)=Y4N#>!O=Wyx*d5R
zTR@I6+@fYU0LAP967p{MiM}wF30QbRp8WNgO2H=DXCxstI+5eXRY-zhzI3CT-9t=J
z@-Ca)8zvqFW-3$Di;Xj4bX)#AJ<BP?`hv8BpdtoV7q=LsihUn9;g&6&afe70QVjYY
zR(BL1?Zx*BpKT-r<!i&Gir>dT+cP)u%nEI)Fmigq`(SVor##@%G}eCqCd;rd#PrD=
z$khM+tg!4fvTqv5D~Q8ao$ai!-zsQ}S=ri1%o~s2dKkaDPjO$?IgV?wgEzd62Klfx
zlOQ@t)DoaKp_5Y_EJ5{IiU_TbA(^HZo8RE962Ey_o03Ix<5qNiJ1FUw^`|?-dlua6
z#~*d;X<@T{V-vlvtjUv3P6j6V(qi%q>rfHe+4bY!Qo#49vKbTsD%f=Crp^)4mp+XB
zNUhr<DN_wYVC1plng`H-!-1)`*N(6_?j#vKiD|w@dXB0!>@^0Wr--_M`KPEt`>&{S
zarLsdaCwfYV_ntC3?5v+wMr4c`6Xl|Bs9B3rh;JsoAlCEJ%_J#2neX6+&n;~jPtp1
z1en0yj&C((mr^LIA--{)F2WQ!W)VRDX13y=2@y<t_JLC4g4>w*KWVKNpxZl*9@)i}
z=s4urcVbv30yXw`&RV_bFd)kgNC|N@U8Qh~a=H)uEt+j~a2467iR=Zk&5TtV1_{lY
z07NAwD7<MhZ^LzbJ}YZ}t5#Ek!Qsf8xFI-V4{GT(KPB8o;~-K8*3HO|XE-Jj#!>oP
z>jzFhuKbUid`_1RM_;YHHNpC%-MK6YawTDDMXTQt+&LO_imEf_uM~bo!J$dso{q>G
z@x4-_qPd`H@IVx0fHI;{M}x_cSnJhfAuId70nleTc#by$tYkxAL1EJkHZ%o98r#h{
zr(gd1pz&>3L(3L;(d18<h2%pRTV#Y@%r}Z-oFJJA-iWZT75R4i2F}>hr6S_vqTjgX
zUU^4}u49vo#%TRd(hpkOwESayJ?Fm7B7j0X(X4&$$KE#^U?o|mWRe6cy(W1>XcAUB
zHjTVKC@hC&JH{)H59jE}*H`B{F4>$d%Ht?;Gi3E_P*;T|FS*I|Q_ib)jU}SUY+x%7
zsbD6fc8Qn=hq@Yr<grKIK9=;2@H-+1=&_!S4|C<q&rZLx*XE2D&EJbAmAyP8OV!a+
z%@jt`Uu8BS=*^_7H!CP>pthp;ej{))&7@!<-Sa{4K_oF{n@1V9j`!<I5ktaRoHMZj
zaZ%WwP44ZcU;ZLj6FKJ=2@y(KwB0wU-T%K~bK*P#ga8bA41Jbo`SUS3*cm@alAP}<
zPi`$buE;6$Z;3mV$J4HE?xhdsE|ngWKU>~Vo%_cuZY2+CN;`m~7gMDi-j+>Bmrb1J
z?Jiy*j5=hOwNKS_0~?YAoQ9i4IzO8LkNX^IM@qh=eQ(|ih#-J-9EdN32>`+I`Fr}i
z=NdyH$K+;VhV+G_|MUq+6AU4`PDgVZS8QsO=(R-?kl_p1SD!Ou$0Ju#njuSZR8A~D
zh_VOrsb3tTOcW9Org8Jc>~+t5dXY669~+1U1Fp5S%!=O|{X{&GR=&UvWQO0iRNU?!
z*d2&Lnz6{XA-wg&oTn}w9~vi=XlQ~9+*|mx(DdT-E`s2%R-Gm7rEDsUy+8*=iebzp
zh2u2qj1r;QT#kX`>$!m&+ogN!tcCcO9(7~+ftj3UJ``Fr{i9ml?uvwqT<@ZHNztHz
z-`E4<y?pI8H}6cWXQBGPbX_?R7fZGF_e1f`bNT(s1S?;H8dv64wsLw$I$u0v0ag{Y
z(Bu;i*pUAd77+fOVO*>&EL>gK{(9qhhJ|)*1D8cUT;DaLjZ^WQ20}y%S=)Vcss(p?
zw6sJ=t6O>&B0FmN3R>~O$~ER8p8i6rsWDoN7Y1Z&-rt)W+z?+27kpX5EGb^c>6|;b
zCErNsf>bg*G=^tf?R6A!Z4z>H5B08L?f7Rt;5ml+XdqE72E*zr-b;Kt;zZPdyu&St
z8$=Ax(iz0H(HVBj<&?#2AShtj_h`+dl!V-|_7wOiqGJtkQM+L605%rs5<;lZ&Nan=
znwH+5D~H4B7RAcwa)|qymrBkq;LCjgvH2?0vsp6vaP{t~1l@h2IX36SrhvqFuS;@l
ztBK_(&Qgz*yw_+BXWj>W5F6@FL{yhZEOppR&N_PekDV6oMz15h5g|+6T&PjeMO1~9
zABb8RWCc$v-=@yd1Xl$-GA@)ZdMMMGdc)j<!!**y;2)E5*VL3KODzmxJEM*js+1{V
zmvt{<DPVD#VG=8Fj6LN|4GN~M>uF;o($|AIqxFHsHOmFe-KD7Sdl{`3odVv9rJSOv
zWM41u4^b8kD#Q|*d^tbxPU!XvgU$;)5kg5|rRfzZFl^M*TU!{(P|e$Wxfr>8sCJZI
zWJ1@Qhm_W40Hj+Ao8%SW9RlcFlLzgG;o3`G)VEHnzh04$x(iI}y*SyicRYFtQ_Bj0
z!s}4~gtI>LEoN+>M~;SYh`|&#yxb2V`_PI|k_Zrjue7aKLxuVyML`ID!2r}PdTN6h
z#B{eNGon&xI~5HskbF}G-jZeP4@oV13#Z2^sVssm3~0=eJ2`R{*(+__n2*{*ZbiVi
z;X*5zfBV(cz+j-7x)${KE*K^id>^D;#%sNR3v7hcaBsYUn;fkpfhQ5K6uF<0I!DK<
zO^UdY?Y)qXfP$24A9R)o{=zT!6VK+^{6sY_+j~#Dc}v$DC};&@8efq0T8t2V`pR_v
zT~RC{_qGj9|G*AW+nelQ?XHtUp!KN$Xn#tI>%xX6{k9HE(RVSzXdmdK=NrcGbc&U@
zJvkzGa(I9I4L`V|ZGObw-0S=ssH(rr)erWo4+eSQIY}O}t(6o#U@rR0TN$<&Q_vkC
z5O~%*cq%4fvNVp1>Y#R=xSEJio!Sn8vdK;{?^0@gd69s24i_0{7u=FnEmU8CQ>7o|
zRrTFxo<iH7@<Gm8<!)Zdw0%xklqIC1Ja`zeOLUzSGf3wf2E2rZlzCBoqm&m{Wnm`R
z@TTt*TNKc-zY!O^hHeQBlt&(Yqb+<2q^g&|%^Z*Vuzwo0b%HdL47lhDo<lW;6}F~V
zGdoGHcFKI;qGn^(_WjINa``ly89ar`E6p*^*!D${W(V~$LB<>cxoT7Y)^EFQLOj6E
zP~q?(G|Ji5b^W%sbEoJ-*Szk^!7GR*h2`aAYfke71$mb_^9efYxpo5e!`0UZNaFnQ
z8{^88h4C`>c?M2?vaGdH8&~JZ6NgP|J;G`4P>Tz3$J^J>n8eO#8=wRW1%-<Kf50Tp
zzc4Ay;R%y42iDe#eq2lLIqpVPd$HGvFoa?zicJ9y+;&$14WwMvFSYR}wx>MkUK^Vi
zj9ti`L06sseDKg_?qkXGSj(H3H%Y-Jj#q4*;Xxzyo@O{*nZE}E{e{?LA$jxCRI#V?
zK3yd@j%Neb$_u_N-S0_^DU}9x!+vhZS`@6pOEWr5!Va+GsNhqfryh;T;e;iyfuh7l
zY#Q&hLS*bM@mH(%=5s|w1O@X}c|LQD$FP%9qkk_VNz4zeKY6G<x<Ngrn*N*<6<upz
zSZC#7cS*g;k^VKHQIL42VkVFDO-~*l%?W>;WW)+6Y0j80aNSW><~sO%GBYOsRQU^{
z%pixJD#a|Fh)I+QtEh)(>`kh~*iVvMB={&BJI<Q33<jDU5#+ua5>y1UpI`iHi4n!1
zA2HjMnR8-Y#jjj`fJi>pu`?jB5uT>f_OlN!@KD?<T&WYYQevs-&P4iin13QP-gbl`
zXLat9Pa<8ZaE=g)`*r9$r$-(%cuUn%U}DoX@M1AczX=Kq!P|QE!_&T>I*aOke&oCN
zo9wtI>0u1>@>Odm>HRbXlZ)b1v_p=K%ySew$A`r-d)Pt~<qGn`%sf3W3$9FM<{hFX
z6ICP5>1FIQmr<rf*jswic!5RgWP{(izk<M$z2?oPpIt9ps`*@WfmQQftdtl=k>kAU
zX)A|UD<=Nym%|F(Ei6@o0t&BQ&M(GSk@v}TKy%4oV4pZ4ww)IXz$5F1%qGAjVdZlt
zS5kLV$*EO`G^>I#z{ES*7SVN79r0V6rKOZ~H0NZQAs*NxSCH3(+(GuKr}eGWqK~?&
zIQybshv>*+%?E527~UOiCz8eHPdPR}9quL?7?`fiu3s@<g8Q<SCT-A^G4nIzFWtOC
zSDb?w5t_F}*cP1qJGa?WNe^jPwDw>l>zDdE?Cm5F7?7q|$AcKqVfJtX@rwe>U!fl<
zjifNRx}RCZqebKmMNGDL)20Kg!p#BheE#tm-KeZ%TMX#^Z(K3!FmWCi)Jl-93fWsS
z>ZE6tno@C2zJ>TID3C<z(l}R82@DLjO%6)ZR9n-sl0=qCZHm)W+tX?hP0nKDL*#VX
z#-eAV)m@fV4-1s$4GN6Y6Lf*#)tE0C3DK73lQH@{G}@zs;g;qLF`xXRNRyTJB;|{y
zbOhYQ$}<H_t{}scl)eREu81;?ICvt)<f#~z^n?-1ZMk67<l)I}E&hR2VhJG^zrzvL
z9ap0|aEoFxuoCA`xDrQhgXJ(pjW^Xz43LrBuP&X_y;=+OJm)(r102nes+S)5W~uNR
zWfeOJqkVLI-a7;~88Ki8p3F6HM{IsCfLC2^|JqdMC>)FD_R|K_?WcuIQB>3I*Mm;`
zgL4x3A|*yZ`Ye((2j>)JRPxQZYaHkHLno@xh3BJ2RH5T&b)4D@AlgS^7(2|J%+_L>
z2I{&A%0plUylO}VR|lis4{^LmAoEL$;FrKcJ<h;DK@CfgUxR+m<IUV4GMAB{mg5c*
zpVrlvNic??d$V?Mi`5|jq}ee~7t?z$kZ^2i7M!r3L~;AUBiyi9pr(JveMLoz+~$ia
zUmwT7r!yh9a3Tgn-_OIut<1gO5fQ^{%oZJ+YZ?b?CZgq)a25tn_#JN$R9Dr|y}pjx
zn`c}aQ|3anE6Qt1+7x48>E4$Q(Tv@qC3lqA^AMB|+}9j&r-!jSkLJfIU!9J^56oe{
zWWl4s{A9l#KN`Y0hPkos7}0iDJO9{1)ple=A}pS4SRLNuBu8^9FT%9geyR$#LJ9=x
zG-;V%uy0O=80(&58sI<bOF3NAB2xb3$iYb=&mZ2fTYJ4mHVoqY*pYM5%uuNUY0Da(
z(_U+UbhcArDaFQnf_w_Gq8<%L)>Pl;jLJ>8qu%)O)S!ojjDE>7st2@8r)?;Ib#int
z*E9<qIn&M@!g$7igXW6%7C0y<B;x-A{saHQf1M%l5+7#!xOSE2pbfRH5veRdx+YVD
zznU3Lujvvjqau+8O9+){@BfRdOA=QkBRgh5AVO@b+UUyeotW9}sc>YL&~6aBfXl1$
zhHu7)wE+{yUw%2azJ2XZMuml@@`W+G8AGRuV$WXzBW5iU(d-}qhRM>A*5d81*XMYr
zAc^RL_n#F8J%eabZ)R}-1B?^Abk%bXjp2qth;Bd(t9v?O9YSmhwzN)p_T)62I5S!*
zAFKoo+|}>F1UD#r0wSf7g2|Yv3knC7Q;qj@y?Y8(Dh)m<+aDHmmD+}6vm+l9yRdst
z+z-5+GR43o*f=-2X0Chc$M<2cs3PfhjdY>hkW^XNzZ6(U&*?nKT(q@W@ES7~w%<&T
zfU=cgps`D9UXg|aui9MP`Lz(yuA|q3i|^efdeNC9Km;_ZO;i2yN<qwG96EGsJN+j|
zRmP`Qn9y2UZkr&~C_H$j1}q8BP;uQBh;?m0$0Z%Ql68d#Cw$6<c#Vnk0aX{(VxL+c
zlur!7rBH~VOUYLtd=85yDGKQTlk(gt-X#hq>RrXpL7kdQRa7Ro#pB`YjE<E$CZWm+
zm?ay7Gu;a=d%{+A`klt|()=lv+d>&3elOgL;I5VdT`qzw=b3ja-y)1!Xl-j)(!d74
z3^c3?2+%ZM%|W|f%R&>0nrF>A2@*q?L{){&==c@f^t$d4`xsf8r&*G2kWZXZJ;@Yj
z;8$yp+HlTpaB1;w^&&~~G&BzPPD0FO$u+fdBVb<?_7xB30&rY0L_a61_~M<3&LKcp
zT}Waee2?kX^pi9eY(+=2s&Pg+l{r5z?5^Lo&u(83A@FbI^7Lr!T@Z!yZ&k%PPv25#
zknP0eS!rC<z-6=_CP@4QSRsVlS@&m@A15AO&u3Pwpy~@sDSxtJ=9SS+C`nD2$aJcN
z`sTNq{ni`LFz(u%=It-_!H+~WbyQjEy^=JhGZ9Tb=o^6@1%oTRc+Ga9Gu#plSY+zk
zmV-4!;7L?nZgJL}&`iV?Z}kt}v0_URV_ldn@SI#B4OBusBa%S#8PsMUd@!}^qhjcH
zUZnOqtX;J_tU?i<&y2kVv4k@tK+kGHhLeZ287t#HZXSHoFhUJxJKDzLz!=nI2Xb`_
z5|dJU8iizsUK}QjHLV?!ykVY?RwDSL?+Vb;ruSaof^J^dmw3trDAT-h-#YPDg-Iin
z?+rUgV=E($xLMPKIhZZu@-s{BA7z>m`SL3gty-fr@EkSSma<UJ&_ek=SZv3wRGA<X
zaZy*Sap2O!+um0ZrEYyECE59Guvp?0T77?xnPgdLH}U#1B=zFd2x|?SGR`e{*)i6Y
zcc7LNSKONUqIop;71Qf?J7KcLx+@C#t!H_`?>KacymyE`vxbgw_xUK2o1h52*kLlL
z5!PcQ9`(_2Pp!z)*LNAkse);bWp59&H(79k9y<J)o|eJM%1=Fk12WxS%bHrZk}&tu
z-+(36jDz>Fen-Hfitt*D`|4a`*+R~^HBXFLL+Fi93ihXO=nED%m9Dc&BA+oKMPSJ$
zKDFfCZ<I~*6cnL15I9lyPaOW?iRV3H{+)OAW@kMwWviSzHiCa<(M3Eo&kk4$4q9?o
zO}U+4_#Bdv0djQtI=`uB$(qN3>b91brptDbd(I{tAO6D0`SxDY&8Ir~8t0#I@(Dzr
zyTkw0NB_M?{8vxCJ?URP^+AvH29oEVI%U4WfM)=sTZXLz_QH#XMk7o_0h>}jg;%-_
zTaS<E{-qZtq`e5(SE~TV8d6w+t&CMS<W+YzkNBzzf*E`SvMHPfU*iNvQsyd-Fb2Sa
z4k@>laISpD>{PLtZAp@7M*DY_6t4$Lo6;8-X@&D#$ktQ1l<orokCT@UOC&y*yS@?x
zY&tS=126aiwukIY*WYXNVa{rNy%g`1mAxAxPUF`-AnP4+90;{-918phwJjW9!05XV
zrGV=6Dz8`w^=q^d*L#@S;_U_LV&&(A0tV=?Y&^N)^*=T|asRvFX>9_wFjsd4+c;YN
z)%Q$Jkq0fYqXwMZ;RT_$&bZea6TTUc#OKlrs6)cOs#5PBjwzo_5mZ%5_cv;;yy0RG
zz)1Xc3P=RMy;z<$jgPD2#_=MjQ_EFtm?+E~Z1wTG6h}TYMlLSzjufOGDm#*w+m^)S
z=1NGuI+i3&Dy@1EWgkuNv!FW{fT5?|KP?av#3Zpceaa}?F@R=G6IzpX+I{3nD`|DU
zhHeD{^_!&?<kz0h48zsu2A$@HO&qIQ*3O&>Kh}0AndKBXV>}?Njunm^goRcV-GImm
z)hy9axF;x4mPPZvZ3$(tU67+u7&T+Z(Emt3xZa#Qw(XI3HvF(phUL!SqXSaW)kXyM
z!64%)t(kSdVvhGV<P$3K+TXMs+f0de7(<y0Jwto>IVc)a5amdJbhFsoQ(`9)snO)a
zkU(~A))s*b%9ve&9=n1SDNjloDx=qO=QXuE1FE^wadY5)<T39%!DbwUn&q~0vIXr`
zY3bXm#Mt?-9vO7eWOR8Tt06B>o$C#!h&lilpqN9g!9;c?pptcNCZTRKd9nNgI^@9f
zaL;5S)Oo-(8BQ~ci=O%t@fu=04lQx|wmPbE;!VrF%8!Pv6=f}_a)+F_FH>(Vo#$$E
z1byF5FgRad4H`a?DhP<^_`&qcFQA~B5&!8n!g`7hbC8(|806%_W(ERVJl7eaY6i-m
z*>QcZv{`&93<kmr7o<K8CehNHa&^pHLeJFOEkd829P{v~$ojPIWly<urYm-85FK|3
z=pK+Jhz3zlv|+1Nx}3LV4)8L9vu(PE<;3IA*C*<lhMPXL)%>t#hMARjQf2=d2Tsb1
zy$Ulux`xMf3C^7e)YsJFpvW~a>$zsogHcK{55n`Le{a>q6Qg(%5tkBlm11c((l6qK
zO2;^{YQjl*oqu7R+<KjBG6s>m4GxU{8h12fq;&cDm*4Ao3=*6Dob|cB9(ABCi(5Mr
zdhuXftcqix7p5wEu0wO27t!iyii<|RtUy!}ms0o$Wa9Ka{g7Ume|sg`Qd5oOXtqcM
z9G_#s=|<Sg8yYO9q4T#S_=boPR#&n4$}>eQX+NW#&XZxCy%EU?zy{KX@9#jgVy20#
zvSQLS0&T)1B|vK?z4@Wr6TB^1_4)y~CC9sm7t4*n3(8)@TkJ(wf`^-TeZ0>L#cqt(
zVEeRC%m2uRVg9~QCQeSz>y-6Q5%l?)IlRNu?5$S>=tZj#*V!y0*}QVKZe)_zAB3>c
z0hk{BUU01o<{Zt<n%&WTlPx=f5|*)h!^I{C3Ea?<skNK5x=LYF#is@a0qk-mr4g+a
zDF(wE(S0D8H4^L}B<~1Ukhi+7a&;Nimh#7p+bubg{lbsgWzZ{A;)xY{HFakR3lXIP
z23PPRSbJxv9tfCNUat@|tW468(~(&h>A=eUczJAZUYbN~Jw^NLO@h@PWSfOveqYMZ
z+w=8Fmtr+z`p0yKe4?TUJKd`bGk4yJA_~6%={80ywmc2$O2m%vtih$|IoAgb2w)7x
zpSYV>?||*uZ@5=V-M3F($$0YJ|IQ*lC6V6(+|9)m<nWj4#;TE@LOgV1r3i6hNdqC_
zYh5(oD~+pns|_z)K!b|WzTPxJ-{Zn5gq5a*eu;m=T)?tzW@83Gy+lK2cnSJdz$e>1
zv~ElulDh-X(&|2^PXiY*^dg#t(vH6f*S27OY#l9?-;9+;5~P;xti}~L_Brin^F@+?
z#*FGhD9#PF5C6^SA@?tNbn~^7>s1hyw?m$v)e^z;yf$W`0#ctwefqTj^St!LL5{8#
zj;=-;UQQM+2G8>v)znk&p?qeutUb9y9q9YtyL<mQE0)h3hu^5{OO_89Jzhh}VvU|X
ztda7i-pM?XLg&h;u*mw<CGbHrPRcU^TSdX;!!W7P7tmK1t}N7=oBjZKu}Uz1X&Wy=
z2%lD9KDr$YlLoQ_-NnXKO0P;oF5SCRCr_#OTI3LA-))hkBV^>3Prl%G^d-F&DCwb{
z1~$2!2x4^<DPP7U9lCNpl=^Q_6}XS<tiCQ`Ff|9x$?V^^?cnl;F`&iz&7C{RPVIBa
zzT!gfvXVC9ktPz=Kcoy#h>)9M2YHW871mV@MK?q<loK@F&Ff9Osv^Sc*A}2+9>u^7
z8^Enw#Ae{hGfnf}j)82Qe9L(AU(2jQk=mrFj?=r}Bis@`;!qU%Z=1b(xO2IY*Y4Jw
zhVH!LY4d1T-WK*qjStAtb<p<&yd-tp6!MTLqQ?xp_;|;3gbQ;qSwhv}OM3lb(f*(<
zMS28eBOHyv`Sre@zVdmw1g7zUj!z-D4gb7cFQ9Ru{$~AuC%T_m|9_X?{sZu*;_qDM
zGY|f^Fg#uA4_5q7<=@GVXS(xm@qMZko|OM4KmSzyy+`@Xj{Gf=2>-h7|KLjgwDS9t
z?!Q|hL;a6d{_1Z2Y327i@DD54|Ix}{&&YpT`MpSbt{MLpNrJzu{4W*cpLTvvr_UwT
z-{SZ*xM!#SSz!HX;dh*WPP>0g%+tXBM-u*1{dc%~P6mGqH|cNc|D78C)cze*o+0sX
taUlCm+w5Ne`KN*3!~EY3NIr3F{{e|q<l&y~h^Kb<^Y#>X=M>MM{vWJ`rHlXo

literal 0
HcmV?d00001

diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py
index 0eec2e9c..6ee744ef 100644
--- a/unittests/table_json_conversion/test_read_xlsx.py
+++ b/unittests/table_json_conversion/test_read_xlsx.py
@@ -27,6 +27,7 @@ import re
 
 from types import SimpleNamespace
 
+import jsonschema
 import pytest
 from caosadvancedtools.table_json_conversion import convert
 
@@ -112,6 +113,26 @@ def test_missing_columns():
         assert expected in messages
 
 
+def test_wrong_datatype():
+    with pytest.raises(jsonschema.ValidationError) as caught:
+        convert.to_dict(xlsx=rfp("data/simple_data_broken.xlsx"),
+                        schema=rfp("data/simple_schema.json"))
+    # Correct Errors
+    assert "'Not a num' is not of type 'number'" in str(caught.value)
+    assert "1.5 is not of type 'integer'" in str(caught.value)
+    # Correct Locations
+    for line in str(caught.value).split('\n'):
+        if "'Not a num' is not of type 'number'" in line:
+            assert "J7" in line
+        if "1.5 is not of type 'integer'" in line:
+            assert "K7" in line
+    # No additional type errors
+    if "is not of type 'boolean'" in str(caught.value):   # ToDo: Remove when boolean is fixed
+        assert str(caught.value).count("is not of type") == 3
+    else:
+        assert str(caught.value).count("is not of type") == 2
+
+
 def test_faulty_foreign():
     # Simple wrong foreign key
     converter = convert.XLSXConverter(xlsx=rfp("data/simple_data_wrong_foreign.xlsx"),
-- 
GitLab