From a10d0b64dceddbf18fd92c492db758b895d68173 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Mon, 3 Mar 2025 11:24:37 +0100
Subject: [PATCH 1/8] TST: Use _validate_jsonschema instead of
 jsonschema.validate in fill_xlsx, move _validate_jsonschema to own file and
 and adjust tests to match

---
 .../table_json_conversion/convert.py          | 47 +--------
 .../table_json_conversion/fill_xlsx.py        | 10 +-
 .../table_json_conversion/validation_utils.py | 98 +++++++++++++++++++
 .../table_json_conversion/test_fill_xlsx.py   |  9 +-
 4 files changed, 113 insertions(+), 51 deletions(-)
 create mode 100644 src/caosadvancedtools/table_json_conversion/validation_utils.py

diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py
index 7a3d63a2..33432b85 100644
--- a/src/caosadvancedtools/table_json_conversion/convert.py
+++ b/src/caosadvancedtools/table_json_conversion/convert.py
@@ -31,12 +31,12 @@ from operator import getitem
 from types import SimpleNamespace
 from typing import Any, BinaryIO, Callable, TextIO, Union, Optional
 from warnings import warn
-from copy import deepcopy
 
 import jsonschema
 from openpyxl import load_workbook
 from openpyxl.worksheet.worksheet import Worksheet
 
+from .validation_utils import _validate_jsonschema
 from caosadvancedtools.table_json_conversion import xlsx_utils
 from caosadvancedtools.table_json_conversion.fill_xlsx import read_or_dict
 
@@ -153,51 +153,6 @@ class ForeignError(KeyError):
         self.definitions = definitions
 
 
-def _validate_jsonschema(instance, schema):
-    # Checks whether a key: value pair is in the given schema or fulfills the
-    # criteria of a direct subschema (anyOf, allOf, oneOf)
-    def in_schema(key, val, schema):
-        if schema.get(key, None) == val:
-            return True
-        if 'anyOf' in schema:
-            return any([in_schema(key, val, sub) for sub in schema['anyOf']])
-        if 'allOf' in schema:
-            return all([in_schema(key, val, sub) for sub in schema['allOf']])
-        if 'oneOf' in schema:
-            return [in_schema(key, val, sub) for sub in schema['oneOf']].count(True) == 1
-        return False
-
-    # Removes Key: None and datetime instances from nested dicts and lists of
-    # any depth. Key: None is currently valid as there is no 'obligatory with
-    # value', and datetime cannot be checked by jsonschema.
-    def remove_incompatible_values(it, schema):
-        if isinstance(it, list):
-            schema = schema.get('items', schema)
-            for elem in it:
-                remove_incompatible_values(elem, schema)
-        elif isinstance(it, dict):
-            schema = schema.get('properties', schema)
-            for key, elem in list(it.items()):
-                if elem is None:
-                    it.pop(key)
-                elif isinstance(elem, datetime.date) or isinstance(elem, datetime.datetime):
-                    if in_schema('format', 'date', schema[key]) or in_schema('format', 'date-time', schema[key]):
-                        it.pop(key)
-                elif isinstance(it, (dict, list)):
-                    remove_incompatible_values(elem, schema[key])
-        return it
-
-    # If instance is not a dict, remove_incompatible_values would not remove
-    # the value if it is valid, so we need to check manually by wrapping
-    instance = deepcopy(instance)
-    if not isinstance(instance, dict):
-        if remove_incompatible_values({'key': instance}, {'key': schema}) == {}:
-            return
-    # Clean dict and validate
-    instance = remove_incompatible_values(deepcopy(instance), schema)
-    jsonschema.validate(instance, schema=schema)
-
-
 class XLSXConverter:
     """Class for conversion from XLSX to JSON.
 
diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
index f2e0abc3..fe62731f 100644
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -28,11 +28,11 @@ from types import SimpleNamespace
 from typing import Any, Optional, TextIO, Union
 from warnings import warn
 
-from jsonschema import FormatChecker, validate
 from jsonschema.exceptions import ValidationError
 from openpyxl import load_workbook, Workbook
 from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
 
+from .validation_utils import _validate_jsonschema
 from .xlsx_utils import (
     array_schema_from_model_schema,
     get_foreign_key_columns,
@@ -354,10 +354,12 @@ validation_schema: dict, optional
 
     # Validation
     if validation_schema is not None:
-        validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema))
+        # convert to array_schema is given schema is a model_schema
+        if 'properties' in validation_schema and validation_schema['properties'].values():
+            if list(validation_schema['properties'].values())[0]["type"] != "array":
+                validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema))
         try:
-            # FIXME redefine checker for datetime
-            validate(data, validation_schema, format_checker=FormatChecker())
+            _validate_jsonschema(data, validation_schema)
         except ValidationError as verr:
             print(verr.message)
             raise verr
diff --git a/src/caosadvancedtools/table_json_conversion/validation_utils.py b/src/caosadvancedtools/table_json_conversion/validation_utils.py
new file mode 100644
index 00000000..4d5e0741
--- /dev/null
+++ b/src/caosadvancedtools/table_json_conversion/validation_utils.py
@@ -0,0 +1,98 @@
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""
+Utilities for validation of conversion / import / export results.
+For internal use.
+"""
+
+import datetime
+import json
+from copy import deepcopy
+from typing import Union
+
+import jsonschema
+
+
+def _validate_jsonschema(instance: Union[dict, int, str, bool],
+                         schema: Union[str, dict]):
+    """
+    A table_json_conversion compatible variant of jsonschema.validate().
+    Accepts instances with datetime instances and None in not-nullable entries.
+
+    Parameters
+    ----------
+    instance : dict, int, str, bool
+        Either a dict or a json entry to check against the given schema.
+    schema : str, dict
+        Either a dict with the jsonschema to check against, or a path to a file
+        containing the same.
+    """
+    # Helper Functions
+    def _in_schema(key, val, schema):
+        """
+        Checks whether a key: value pair is in the given schema or fulfills the
+        criteria of a direct subschema (anyOf, allOf, oneOf).
+        """
+        if schema.get(key, None) == val:
+            return True
+        if 'anyOf' in schema:
+            return any([_in_schema(key, val, sub) for sub in schema['anyOf']])
+        if 'allOf' in schema:
+            return all([_in_schema(key, val, sub) for sub in schema['allOf']])
+        if 'oneOf' in schema:
+            return [_in_schema(key, val, sub) for sub in schema['oneOf']].count(True) == 1
+        return False
+
+    def _remove_incompatible_vals(iterable, schema):
+        """
+        Removes Key: None and datetime instances from nested dicts and lists of
+        any depth. Key: None is currently valid as there is no 'obligatory with
+        value', and datetime cannot be checked by jsonschema.
+        """
+        if isinstance(iterable, list):
+            schema = schema.get('items', schema)
+            for elem in iterable:
+                _remove_incompatible_vals(elem, schema)
+        elif isinstance(iterable, dict):
+            schema = schema.get('properties', schema)
+            for key, elem in list(iterable.items()):
+                if elem is None:
+                    iterable.pop(key)
+                elif isinstance(elem, (datetime.date, datetime.datetime)):
+                    if (_in_schema('format', 'date', schema[key]) or
+                            _in_schema('format', 'date-time', schema[key])):
+                        iterable.pop(key)
+                elif isinstance(iterable, (dict, list)):
+                    _remove_incompatible_vals(elem, schema[key])
+        return iterable
+
+    # If jsonschema is a file, load its content
+    if str(schema).endswith(".json"):
+        with open(schema, encoding="utf-8") as content:
+            schema = json.load(content)
+    # If instance is not a dict, remove_incompatible_values would not remove
+    # the value if it is valid, so we need to check manually by wrapping
+    instance = deepcopy(instance)
+    if not isinstance(instance, dict):
+        if _remove_incompatible_vals({'key': instance}, {'key': schema}) == {}:
+            return
+    # Clean dict and validate
+    instance = _remove_incompatible_vals(deepcopy(instance), schema)
+    jsonschema.validate(instance, schema=schema)
diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py
index 899bb81e..f77131bc 100644
--- a/unittests/table_json_conversion/test_fill_xlsx.py
+++ b/unittests/table_json_conversion/test_fill_xlsx.py
@@ -59,13 +59,20 @@ schema: str, optional,
 custom_output: str, optional
   If given, write to this file and drop into an IPython shell.  For development only.
     """
+    if schema is not None:
+        with open(schema, encoding="utf8", mode="r") as sch_f:
+            model_schema = json.load(sch_f)
+        data_schema = xlsx_utils.array_schema_from_model_schema(model_schema)
+    else:
+        data_schema = schema
+
     with tempfile.TemporaryDirectory() as tmpdir:
         outfile = os.path.join(tmpdir, 'test.xlsx')
         assert not os.path.exists(outfile)
         if custom_output is not None:
             outfile = custom_output
         fill_template(data=json_file, template=template_file, result=outfile,
-                      validation_schema=schema)
+                      validation_schema=data_schema)
         assert os.path.exists(outfile)
         generated = load_workbook(outfile)  # workbook can be read
     known_good_wb = load_workbook(known_good)
-- 
GitLab


From fff1c8f8200e89abe444bc9d126877c6e03c53b5 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Wed, 5 Mar 2025 09:30:03 +0100
Subject: [PATCH 2/8] WIP: Add function to export records to xlsx:

- Added parameter use_id_for_identification to JsonSchemaExporter, which sets foreign key to id and adds an 'id' column to all tables
- Added parameter return_data_schema to merge_schemas. If set, merge_schemas returns a data_schema as well as the normal model_schema

- Added new file export_import_xlsx. In addition to private functions for generating templates, schemas, and data, there is a new public function export_container_to_xlsx which generates an xlsx file containing the data from the given records at a given path

- Changed a print warning in fill_xlsx to warnings.warn for easier filtering
---
 src/caosadvancedtools/json_schema_exporter.py |  53 +++-
 .../export_import_xlsx.py                     | 237 ++++++++++++++++++
 .../table_json_conversion/fill_xlsx.py        |   3 +-
 3 files changed, 288 insertions(+), 5 deletions(-)
 create mode 100644 src/caosadvancedtools/table_json_conversion/export_import_xlsx.py

diff --git a/src/caosadvancedtools/json_schema_exporter.py b/src/caosadvancedtools/json_schema_exporter.py
index 56568ca1..bce3102e 100644
--- a/src/caosadvancedtools/json_schema_exporter.py
+++ b/src/caosadvancedtools/json_schema_exporter.py
@@ -70,6 +70,7 @@ class JsonSchemaExporter:
 
     def __init__(self, additional_properties: bool = True,
                  name_property_for_new_records: bool = False,
+                 use_id_for_identification: bool = False,
                  description_property_for_new_records: bool = False,
                  additional_options_for_text_props: dict = None,
                  additional_json_schema: Dict[str, dict] = None,
@@ -92,6 +93,9 @@ class JsonSchemaExporter:
         name_property_for_new_records : bool, optional
             Whether objects shall generally have a `name` property in the generated schema.
             Optional, default is False.
+        use_id_for_identification: bool, optional
+            If set to true, an 'id' property is added to all records, and
+            foreign key references are assumed to be ids.
         description_property_for_new_records : bool, optional
             Whether objects shall generally have a `description` property in the generated schema.
             Optional, default is False.
@@ -151,6 +155,7 @@ class JsonSchemaExporter:
 
         self._additional_properties = additional_properties
         self._name_property_for_new_records = name_property_for_new_records
+        self._use_id_for_identification = use_id_for_identification
         self._description_property_for_new_records = description_property_for_new_records
         self._additional_options_for_text_props = additional_options_for_text_props
         self._additional_json_schema = additional_json_schema
@@ -257,7 +262,18 @@ ui_schema : dict
             if inner_ui_schema:
                 ui_schema["items"] = inner_ui_schema
         elif prop.is_reference():
-            if prop.datatype == db.REFERENCE:
+            if self._use_id_for_identification:
+                json_prop["type"] = "object"
+                json_prop["required"] = []
+                json_prop["additionalProperties"] = False
+                json_prop["title"] = prop.name
+                if prop.datatype == db.FILE:
+                    json_prop["description"] = "Path to file"
+                    json_prop["properties"] = {"path": {"type": "string"}}
+                else:
+                    json_prop["properties"] = {
+                        "id": {"oneOf": [{"type": "integer"}, {"type": "string"}]}}
+            elif prop.datatype == db.REFERENCE:
                 # No Record creation since no RT is specified and we don't know what
                 # schema to use, so only enum of all Records and all Files.
                 values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
@@ -410,7 +426,9 @@ ui_schema : dict
 
         vals = []
         for val in possible_values:
-            if val.name:
+            if self._use_id_for_identification:
+                vals.append(val.id)
+            elif val.name:
                 vals.append(f"{val.name}")
             else:
                 vals.append(f"{val.id}")
@@ -453,6 +471,8 @@ ui_schema : dict
         props = OrderedDict()
         if self._name_property_for_new_records:
             props["name"] = self._make_text_property("The name of the Record to be created")
+        if self._use_id_for_identification:
+            props["id"] = self._make_text_property("The id of the Record")
         if self._description_property_for_new_records:
             props["description"] = self._make_text_property(
                 "The description of the Record to be created")
@@ -544,6 +564,7 @@ guaranteed (as of now).
 
 def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
                               name_property_for_new_records: bool = False,
+                              use_id_for_identification: bool = False,
                               description_property_for_new_records: bool = False,
                               additional_options_for_text_props: Optional[dict] = None,
                               additional_json_schema: Dict[str, dict] = None,
@@ -573,6 +594,9 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
     name_property_for_new_records : bool, optional
         Whether objects shall generally have a `name` property in the generated schema. Optional,
         default is False.
+    use_id_for_identification: bool, optional
+        If set to true, an 'id' property is added to all records, and foreign
+        key references are assumed to be ids.
     description_property_for_new_records : bool, optional
         Whether objects shall generally have a `description` property in the generated schema.
         Optional, default is False.
@@ -629,6 +653,7 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
     exporter = JsonSchemaExporter(
         additional_properties=additional_properties,
         name_property_for_new_records=name_property_for_new_records,
+        use_id_for_identification=use_id_for_identification,
         description_property_for_new_records=description_property_for_new_records,
         additional_options_for_text_props=additional_options_for_text_props,
         additional_json_schema=additional_json_schema,
@@ -696,8 +721,8 @@ ui_schema : dict, optional
 
 
 def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]],
-                  rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None) -> (
-                      Union[dict, Tuple[dict, dict]]):
+                  rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None,
+                  return_data_schema=False) -> (Union[dict, Tuple[dict, dict]]):
     """Merge the given schemata into a single schema.
 
 The result will look like this:
@@ -728,6 +753,11 @@ rjsf_uischemas : dict[str, dict] | Iterable[dict], optional
   If given, also merge the react-jsonschema-forms from this argument and return as the second return
   value.  If ``schemas`` is a dict, this parameter must also be a dict, if ``schemas`` is only an
   iterable, this paramater must support numerical indexing.
+return_data_schema : bool, default False
+  If set to True, a second schema with all top-level entries wrapped in an
+  array will be returned. This is necessary if the schema describes the
+  data layout of an XLSX file.
+  Cannot be used together with rjsf_uischemas.
 
 Returns
 -------
@@ -737,10 +767,13 @@ schema : dict
 
 uischema : dict
   If ``rjsf_uischemas`` was given, this contains the merged UI schemata.
+data_schema : dict
+  If ``return_data_schema`` was given, this contains the XLSX file schema.
     """
     sub_schemas: dict[str, dict] = OrderedDict()
     required = []
     ui_schema = None
+    data_sub_schemas = OrderedDict()
 
     if isinstance(schemas, dict):
         sub_schemas = schemas
@@ -754,6 +787,8 @@ uischema : dict
         for i, schema in enumerate(schemas, start=1):
             title = schema.get("title", str(i))
             sub_schemas[title] = schema
+            if return_data_schema:
+                data_sub_schemas[title] = {"type": "array", "items": schema}
             required.append(title)
         if rjsf_uischemas is not None:
             if not isinstance(rjsf_uischemas, Sequence):
@@ -771,7 +806,17 @@ uischema : dict
         "additionalProperties": False,
         "$schema": "https://json-schema.org/draft/2020-12/schema",
     }
+    if return_data_schema:
+        data_schema = {
+            "type": "object",
+            "properties": data_sub_schemas,
+            "required": required,
+            "additionalProperties": False,
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+        }
 
     if ui_schema is not None:
         return result, ui_schema
+    if return_data_schema:
+        return result, data_schema
     return result
diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
new file mode 100644
index 00000000..ed2b9720
--- /dev/null
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
@@ -0,0 +1,237 @@
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""
+Utilities for automatically exporting and importing data to and from xlsx.
+"""
+
+import json
+import tempfile
+import warnings
+from typing import Union
+from pathlib import Path
+
+import linkahead
+from linkahead.common.models import Container
+from linkahead import execute_query
+with warnings.catch_warnings():
+    warnings.filterwarnings("ignore", message="^.*experimental.*$")
+    from linkahead.high_level_api import convert_to_python_object
+
+from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
+from .table_generator import XLSXTemplateGenerator
+from .fill_xlsx import fill_template
+
+
+def _generate_jsonschema_from_recordtypes(recordtypes: list,
+                                          out_path: Union[str, Path] = None) -> dict:
+    """
+    Generate a combined jsonschema for all given recordtypes.
+
+    Parameters
+    ----------
+    recordtypes : Iterable
+        List of RecordType entities for which a schema should be generated.
+    out_path : str, Path
+        If given, the resulting jsonschema will also be written to the file
+        given by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    data_schema : dict
+        The generated schema.
+    """
+    # Generate schema
+    schema_generator = JsonSchemaExporter(additional_properties=False,
+                                          name_property_for_new_records=True,
+                                          use_id_for_identification=True)
+    schemas = [schema_generator.recordtype_to_json_schema(recordtype)
+               for recordtype in recordtypes]
+    _, data_schema = merge_schemas(schemas, return_data_schema=True)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(data_schema, json_file, ensure_ascii=False, indent=2)
+    # Return
+    return data_schema
+
+
+def _generate_jsondata_from_records(records: Container,
+                                    out_path: Union[str, Path] = None) -> dict:
+    """
+    Extract relevant information (id, name, properties, etc.) from the given
+    records and converts this information to json.
+
+    Parameters
+    ----------
+    records :  Iterable
+        List of Record entities from which the data will be converted to json.
+    out_path : str, Path
+        If given, the resulting jsondata will also be written to the file given
+        by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    json_data : dict
+        The given records data in json form.
+    """
+    json_data = {}
+    # Ignore warning from high_level_api to avoid raising warnings that cannot
+    # be avoided by user
+    for record in records:
+        # Convert records to high level api objects
+        record_obj = convert_to_python_object(record)
+        try:
+            record_obj.resolve_references(True, None)
+        except linkahead.LinkAheadException:
+            warnings.warn(f"Data for record with id {record_obj.id} might be "
+                          f"incomplete, unsuccessful retrieve.")
+        # Get json representation & adjust layout for compatibility
+        raw_data = record_obj.serialize()
+        raw_data.update(raw_data.get('properties', {}))
+        raw_data.pop('properties')
+        if record.parents[0].name not in json_data:
+            json_data[record.parents[0].name] = []
+        json_data[record.parents[0].name].append(raw_data)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str)
+    # Return
+    return json_data
+
+
+def _generate_xlsx_template_file(schema: dict,
+                                 recordtype_names: Union[list, set],
+                                 out_path: Union[str, Path]):
+    """
+    Generate an empty XLSX template file for the given schema at the indicated
+    location.
+
+    Parameters
+    ----------
+    schema : dict
+        Jsonschema for which an xlsx template should be generated.
+    recordtype_names : Iterable
+        List of all RecordType names in the given schema.
+    out_path : str, Path
+        The resulting xlsx template will be written to the file at this path.
+    """
+    generator = XLSXTemplateGenerator()
+    foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names}
+    generator.generate(schema=schema, foreign_keys=foreign_keys,
+                       filepath=out_path)
+
+
+def export_container_to_xlsx(records: Container,
+                             xlsx_data_filepath: Union[str, Path],
+                             include_referenced_entities: bool = False,
+                             jsonschema_filepath: Union[str, Path] = None,
+                             jsondata_filepath: Union[str, Path] = None,
+                             xlsx_template_filepath: Union[str, Path] = None):
+    """
+    Export the data of the given records to an xlsx file.
+
+    Parameters
+    ----------
+    records : Container, Iterable
+        List of records to export.
+    xlsx_data_filepath : str, Path
+        Write the resulting xlsx file to the file at this location.
+    include_referenced_entities : bool
+        If set to true, any records referenced by properties of those given in
+        'records' will also be exported.
+        Optional, default False
+    jsonschema_filepath : str, Path
+        If given, write the jsonschema to this file.
+        Optional, default None
+    jsondata_filepath : str, Path
+        If given, write the json data to this file.
+        Optional, default None
+    xlsx_template_filepath : str, Path
+        If given, write the xlsx template to this file.
+        Optional, default None
+    """
+    # Ensure every record is only handled once by using id as key.
+    entity_ids = {record.id for record in records}
+    # If indicated, also get and add the records referenced on the first level
+    # in the given container
+    if include_referenced_entities:
+        for record in records:
+            for prop in record.properties:
+                if prop.is_reference() and prop.value is not None:
+                    try:
+                        ref_list = prop.value
+                        if not isinstance(ref_list, list):
+                            ref_list = [ref_list]
+                        for element in ref_list:
+                            if isinstance(element, (int, str)):
+                                elem_id = element
+                            elif isinstance(element, linkahead.Entity):
+                                elem_id = element.id
+                            else:
+                                warnings.warn(f"Cannot handle referenced "
+                                              f"entity '{prop.value}'")
+                                continue
+                            entity_ids.add(elem_id)
+                    except linkahead.LinkAheadException as e:
+                        warnings.warn(f"Cannot handle referenced entity "
+                                      f"'{prop.value}' because of error '{e}'")
+    # Retrieve data
+    new_records = []
+    for entity_id in entity_ids:
+        entity_id = str(entity_id).split('@')[0]
+        entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
+        if len(entity.get_parents()) > 0:
+            new_records.append(entity)
+        # ToDo: Handle Files and other Entities (e.g. Properties) separately
+    records = new_records
+    recordtypes = {record.parents[0] for record in records}
+    recordtype_ids = {recordtype.id for recordtype in recordtypes}
+    recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})",
+                                 unique=True)
+                   for rt_id in recordtype_ids]
+    recordtype_names = {recordtype.name for recordtype in recordtypes}
+    # Generate schema and data from the records
+    json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
+                                                        jsonschema_filepath)
+    json_data = _generate_jsondata_from_records(records, jsondata_filepath)
+    # Generate xlsx template with tempfile if necessary
+    if xlsx_template_filepath is None:
+        xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
+        xlsx_template_filepath = xlsx_template_file.name
+    else:
+        xlsx_template_file = None
+    _generate_xlsx_template_file(json_schema, recordtype_names,
+                                 xlsx_template_filepath)
+    # Fill xlsx file with data
+    with warnings.catch_warnings():
+        # We have a lot of information in the json data that we do not need
+        warnings.filterwarnings("ignore",
+                                message="^.*Ignoring path with missing sheet index.*$")
+        warnings.filterwarnings("ignore",
+                                message="^.*No validation schema.*$")
+        fill_template(data=json_data, template=xlsx_template_filepath,
+                      result=xlsx_data_filepath)
+        # ToDo: Validation
+    # Cleanup
+    if xlsx_template_file is not None:
+        xlsx_template_file.close()
diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
index fe62731f..92fae16c 100644
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -24,6 +24,7 @@ from __future__ import annotations
 
 import datetime
 import pathlib
+import warnings
 from types import SimpleNamespace
 from typing import Any, Optional, TextIO, Union
 from warnings import warn
@@ -364,7 +365,7 @@ validation_schema: dict, optional
             print(verr.message)
             raise verr
     else:
-        print("No validation schema given, continue at your own risk.")
+        warnings.warn("No validation schema given, continue at your own risk.")
 
     # Filling the data
     result_wb = load_workbook(template)
-- 
GitLab


From 76ed7bfeeefb72a2988dd7ad634b46293fc9cd62 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Wed, 5 Mar 2025 13:05:56 +0100
Subject: [PATCH 3/8] ENH: XLSX Export cleanup incl. remove broken warnings
 filter

---
 .../table_json_conversion/export_import_xlsx.py            | 7 ++-----
 .../table_json_conversion/validation_utils.py              | 5 ++++-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
index ed2b9720..8730d986 100644
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
@@ -30,9 +30,7 @@ from pathlib import Path
 import linkahead
 from linkahead.common.models import Container
 from linkahead import execute_query
-with warnings.catch_warnings():
-    warnings.filterwarnings("ignore", message="^.*experimental.*$")
-    from linkahead.high_level_api import convert_to_python_object
+from linkahead.high_level_api import convert_to_python_object
 
 from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
 from .table_generator import XLSXTemplateGenerator
@@ -100,7 +98,7 @@ def _generate_jsondata_from_records(records: Container,
         # Convert records to high level api objects
         record_obj = convert_to_python_object(record)
         try:
-            record_obj.resolve_references(True, None)
+            record_obj.resolve_references(False, None)
         except linkahead.LinkAheadException:
             warnings.warn(f"Data for record with id {record_obj.id} might be "
                           f"incomplete, unsuccessful retrieve.")
@@ -231,7 +229,6 @@ def export_container_to_xlsx(records: Container,
                                 message="^.*No validation schema.*$")
         fill_template(data=json_data, template=xlsx_template_filepath,
                       result=xlsx_data_filepath)
-        # ToDo: Validation
     # Cleanup
     if xlsx_template_file is not None:
         xlsx_template_file.close()
diff --git a/src/caosadvancedtools/table_json_conversion/validation_utils.py b/src/caosadvancedtools/table_json_conversion/validation_utils.py
index 4d5e0741..f1e77f48 100644
--- a/src/caosadvancedtools/table_json_conversion/validation_utils.py
+++ b/src/caosadvancedtools/table_json_conversion/validation_utils.py
@@ -80,7 +80,10 @@ def _validate_jsonschema(instance: Union[dict, int, str, bool],
                             _in_schema('format', 'date-time', schema[key])):
                         iterable.pop(key)
                 elif isinstance(iterable, (dict, list)):
-                    _remove_incompatible_vals(elem, schema[key])
+                    try:
+                        _remove_incompatible_vals(elem, schema[key])
+                    except KeyError:
+                        pass
         return iterable
 
     # If jsonschema is a file, load its content
-- 
GitLab


From 93fdae4167cc7d2b33f45fb15b1c723dd570d396 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Sun, 9 Mar 2025 13:46:10 +0100
Subject: [PATCH 4/8] TST: Add more XLSX tests, unignore validation parameter
 in convert_and_compare, fix typo

---
 .../table_json_conversion/fill_xlsx.py        |  2 +-
 .../table_json_conversion/test_fill_xlsx.py   | 26 +++++++++++++++
 .../table_json_conversion/test_read_xlsx.py   | 32 +++++++++++++++++--
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
index 92fae16c..1f39f66d 100644
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -355,7 +355,7 @@ validation_schema: dict, optional
 
     # Validation
     if validation_schema is not None:
-        # convert to array_schema is given schema is a model_schema
+        # convert to array_schema if given schema is a model_schema
         if 'properties' in validation_schema and validation_schema['properties'].values():
             if list(validation_schema['properties'].values())[0]["type"] != "array":
                 validation_schema = array_schema_from_model_schema(read_or_dict(validation_schema))
diff --git a/unittests/table_json_conversion/test_fill_xlsx.py b/unittests/table_json_conversion/test_fill_xlsx.py
index f77131bc..084f19ba 100644
--- a/unittests/table_json_conversion/test_fill_xlsx.py
+++ b/unittests/table_json_conversion/test_fill_xlsx.py
@@ -196,6 +196,32 @@ def test_errors():
                          known_good=rfp("data/simple_data.xlsx"),
                          schema=rfp("data/simple_schema.json"))
     assert exc.value.message == "0.5 is not of type 'integer'"
+    # Check wrong data
+    with open(rfp("data/simple_data.json")) as json_file:
+        json_data = json.load(json_file)
+    json_data["Training"][0]["date"] = "2023-01"
+    with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file:
+        json.dump(json_data, temp_file)
+        temp_file.seek(0)
+        with pytest.raises(AssertionError) as exc:
+            fill_and_compare(json_file=temp_file.name,
+                             template_file=rfp("data/simple_template.xlsx"),
+                             known_good=rfp("data/simple_data.xlsx"),
+                             schema=rfp("data/simple_schema.json"))
+        assert "Training" in str(exc) and "2023-01" in str(exc)
+    # Check wrong schema
+    with open(rfp("data/simple_schema.json")) as json_file:
+        json_schema = json.load(json_file)
+    json_schema["properties"]["Person"]["properties"]["given_name"]["type"] = "integer"
+    with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file:
+        json.dump(json_schema, temp_file)
+        temp_file.seek(0)
+        with pytest.raises(schema_exc.ValidationError) as exc:
+            fill_and_compare(json_file=rfp("data/simple_data.json"),
+                             template_file=rfp("data/simple_template.xlsx"),
+                             known_good=rfp("data/simple_data.xlsx"),
+                             schema=temp_file.name)
+        assert "integer" in str(exc)
 
 
 def test_data_schema_generation():
diff --git a/unittests/table_json_conversion/test_read_xlsx.py b/unittests/table_json_conversion/test_read_xlsx.py
index d453ab35..10b462df 100644
--- a/unittests/table_json_conversion/test_read_xlsx.py
+++ b/unittests/table_json_conversion/test_read_xlsx.py
@@ -24,6 +24,7 @@ import datetime
 import json
 import os
 import re
+import tempfile
 
 from types import SimpleNamespace
 from typing import Optional
@@ -43,7 +44,7 @@ def rfp(*pathcomponents):
 
 def convert_and_compare(xlsx_file: str, schema_file: str, known_good_file: str,
                         known_good_data: Optional[dict] = None, strict: bool = False,
-                        validate: bool = False) -> dict:
+                        validate: bool = True) -> dict:
     """Convert an XLSX file and compare to a known result.
 
 Exactly one of ``known_good_file`` and ``known_good_data`` should be non-empty.
@@ -57,7 +58,7 @@ json: dict
         model_schema = json.load(sch_f)
     data_schema = xlsx_utils.array_schema_from_model_schema(model_schema)
 
-    result = convert.to_dict(xlsx=xlsx_file, schema=data_schema, validate=True)
+    result = convert.to_dict(xlsx=xlsx_file, schema=data_schema, validate=validate)
     if known_good_file:
         with open(known_good_file, encoding="utf-8") as myfile:
             expected = json.load(myfile)
@@ -101,6 +102,33 @@ def test_conversions():
     assert str(err.value).startswith("Values at path ['Training', 0, ")
 
 
+def test_validation():
+    # Check wrong data
+    with open(rfp("data/simple_data.json")) as json_file:
+        known_good = json.load(json_file)
+    known_good["Training"][0]["date"] = "2023-01-02"
+    with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file:
+        json.dump(known_good, temp_file)
+        temp_file.seek(0)
+        with pytest.raises(AssertionError) as exc:
+            convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"),
+                                schema_file=rfp("data/simple_schema.json"),
+                                known_good_file=temp_file.name)
+        assert "Training" in str(exc) and "2023-01-02" in str(exc)
+    # Check wrong schema
+    with open(rfp("data/simple_schema.json")) as json_file:
+        json_schema = json.load(json_file)
+    json_schema["properties"]["Person"]["properties"]["given_name"]["type"] = "integer"
+    with tempfile.NamedTemporaryFile(suffix='.json', mode='w+t') as temp_file:
+        json.dump(json_schema, temp_file)
+        temp_file.seek(0)
+        with pytest.raises(jsonschema.ValidationError) as exc:
+            convert_and_compare(xlsx_file=rfp("data/simple_data.xlsx"),
+                                schema_file=temp_file.name,
+                                known_good_file=rfp("data/simple_data.json"))
+        assert "integer" in str(exc)
+
+
 def test_missing_columns():
     with pytest.raises(ValueError) as caught:
         convert.to_dict(xlsx=rfp("data/simple_data_missing.xlsx"),
-- 
GitLab


From 376cd77ccc7971b4bbb4297364c8286e6c7ae2d3 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Sun, 9 Mar 2025 14:11:23 +0100
Subject: [PATCH 5/8] DOC: Update Changelog

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 404424de..dc465d78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+- Added table_json_conversion.export_import_xlsx with a public function
+  export_container_to_xlsx, which exports the data of a given Entity
+  Container to a XLSX file.
+
 ### Changed ###
 
 ### Deprecated ###
-- 
GitLab


From 3b93d81e89ed955c59e91ad4903c70af0bd4fd44 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Sun, 9 Mar 2025 14:21:57 +0100
Subject: [PATCH 6/8] DOC: Add some comments

---
 .../table_json_conversion/export_import_xlsx.py        | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
index 8730d986..0fe5d9d8 100644
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
@@ -92,8 +92,6 @@ def _generate_jsondata_from_records(records: Container,
         The given records data in json form.
     """
     json_data = {}
-    # Ignore warning from high_level_api to avoid raising warnings that cannot
-    # be avoided by user
     for record in records:
         # Convert records to high level api objects
         record_obj = convert_to_python_object(record)
@@ -196,8 +194,10 @@ def export_container_to_xlsx(records: Container,
     # Retrieve data
     new_records = []
     for entity_id in entity_ids:
-        entity_id = str(entity_id).split('@')[0]
+        entity_id = str(entity_id).split('@')[0]    # Queries cannot handle version
         entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
+        # We can currently only handle Entities with a parent, as otherwise we
+        # do not know which sheet they belong in.
         if len(entity.get_parents()) > 0:
             new_records.append(entity)
         # ToDo: Handle Files and other Entities (e.g. Properties) separately
@@ -212,7 +212,9 @@ def export_container_to_xlsx(records: Container,
     json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
                                                         jsonschema_filepath)
     json_data = _generate_jsondata_from_records(records, jsondata_filepath)
-    # Generate xlsx template with tempfile if necessary
+    # Generate xlsx template
+    # _generate_xlsx_template_file needs a file name, so use NamedTemporaryFile
+    # ToDo: This might not work on windows, if not, fix _generate file handling
     if xlsx_template_filepath is None:
         xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
         xlsx_template_filepath = xlsx_template_file.name
-- 
GitLab


From b1d0ec967959136d753f0932cfa5f2a0e6499397 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Wed, 12 Mar 2025 19:44:16 +0100
Subject: [PATCH 7/8] MNT: Suppress high_level_api import warning

---
 .../table_json_conversion/export_import_xlsx.py           | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
index 0fe5d9d8..d41fe333 100644
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
@@ -24,18 +24,24 @@ Utilities for automatically exporting and importing data to and from xlsx.
 import json
 import tempfile
 import warnings
+import logging
 from typing import Union
 from pathlib import Path
 
 import linkahead
 from linkahead.common.models import Container
 from linkahead import execute_query
-from linkahead.high_level_api import convert_to_python_object
 
 from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
 from .table_generator import XLSXTemplateGenerator
 from .fill_xlsx import fill_template
 
+# The high_level_api import would normally warn about the API being
+# experimental. We know this, so suppress the warning.
+logging.disable(logging.WARNING)
+from linkahead.high_level_api import convert_to_python_object
+logging.disable(logging.NOTSET)
+
 
 def _generate_jsonschema_from_recordtypes(recordtypes: list,
                                           out_path: Union[str, Path] = None) -> dict:
-- 
GitLab


From aec14b4a192a66934e5ca817d8a756b45afc31a5 Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Wed, 12 Mar 2025 20:00:18 +0100
Subject: [PATCH 8/8] STY: Ignore style issue

---
 .../table_json_conversion/export_import_xlsx.py                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
index d41fe333..ea18a374 100644
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
@@ -39,7 +39,7 @@ from .fill_xlsx import fill_template
 # The high_level_api import would normally warn about the API being
 # experimental. We know this, so suppress the warning.
 logging.disable(logging.WARNING)
-from linkahead.high_level_api import convert_to_python_object
+from linkahead.high_level_api import convert_to_python_object   # noqa: E402, pylint: disable=wrong-import-position
 logging.disable(logging.NOTSET)
 
 
-- 
GitLab