WIP: Add function to export records to xlsx:

- Added parameter use_id_for_identification to JsonSchemaExporter, which sets foreign key to id and adds an 'id' column to all tables - Added parameter return_data_schema to merge_schemas. If set, merge_schemas returns a data_schema as well as the normal model_schema - Added new file export_import_xlsx. In addition to private functions for generating templates, schemas, and data, there is a new public function export_container_to_xlsx which generates an xlsx file containing the data from the given records at a given path - Changed a print warning in fill_xlsx to warnings.warn for easier filtering

WIP: Add function to export records to xlsx:
fff1c8f8 · I. Nüske · a10d0b64 · fff1c8f8 · fff1c8f8 · fff1c8f8
Commit fff1c8f8 authored 2 weeks ago by I. Nüske
--- a/src/caosadvancedtools/json_schema_exporter.py
+++ b/src/caosadvancedtools/json_schema_exporter.py
@@ -70,6 +70,7 @@ class JsonSchemaExporter:

    def __init__(self, additional_properties: bool = True,
                 name_property_for_new_records: bool = False,
+                 use_id_for_identification: bool = False,
                 description_property_for_new_records: bool = False,
                 additional_options_for_text_props: dict = None,
                 additional_json_schema: Dict[str, dict] = None,
@@ -92,6 +93,9 @@ class JsonSchemaExporter:
        name_property_for_new_records : bool, optional
            Whether objects shall generally have a `name` property in the generated schema.
            Optional, default is False.
+        use_id_for_identification: bool, optional
+            If set to true, an 'id' property is added to all records, and
+            foreign key references are assumed to be ids.
        description_property_for_new_records : bool, optional
            Whether objects shall generally have a `description` property in the generated schema.
            Optional, default is False.
@@ -151,6 +155,7 @@ class JsonSchemaExporter:

        self._additional_properties = additional_properties
        self._name_property_for_new_records = name_property_for_new_records
+        self._use_id_for_identification = use_id_for_identification
        self._description_property_for_new_records = description_property_for_new_records
        self._additional_options_for_text_props = additional_options_for_text_props
        self._additional_json_schema = additional_json_schema
@@ -257,7 +262,18 @@ ui_schema : dict
            if inner_ui_schema:
                ui_schema["items"] = inner_ui_schema
        elif prop.is_reference():
-            if prop.datatype == db.REFERENCE:
+            if self._use_id_for_identification:
+                json_prop["type"] = "object"
+                json_prop["required"] = []
+                json_prop["additionalProperties"] = False
+                json_prop["title"] = prop.name
+                if prop.datatype == db.FILE:
+                    json_prop["description"] = "Path to file"
+                    json_prop["properties"] = {"path": {"type": "string"}}
+                else:
+                    json_prop["properties"] = {
+                        "id": {"oneOf": [{"type": "integer"}, {"type": "string"}]}}
+            elif prop.datatype == db.REFERENCE:
                # No Record creation since no RT is specified and we don't know what
                # schema to use, so only enum of all Records and all Files.
                values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
@@ -410,7 +426,9 @@ ui_schema : dict

        vals = []
        for val in possible_values:
-            if val.name:
+            if self._use_id_for_identification:
+                vals.append(val.id)
+            elif val.name:
                vals.append(f"{val.name}")
            else:
                vals.append(f"{val.id}")
@@ -453,6 +471,8 @@ ui_schema : dict
        props = OrderedDict()
        if self._name_property_for_new_records:
            props["name"] = self._make_text_property("The name of the Record to be created")
+        if self._use_id_for_identification:
+            props["id"] = self._make_text_property("The id of the Record")
        if self._description_property_for_new_records:
            props["description"] = self._make_text_property(
                "The description of the Record to be created")
@@ -544,6 +564,7 @@ guaranteed (as of now).

 def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
                              name_property_for_new_records: bool = False,
+                              use_id_for_identification: bool = False,
                              description_property_for_new_records: bool = False,
                              additional_options_for_text_props: Optional[dict] = None,
                              additional_json_schema: Dict[str, dict] = None,
@@ -573,6 +594,9 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
    name_property_for_new_records : bool, optional
        Whether objects shall generally have a `name` property in the generated schema. Optional,
        default is False.
+    use_id_for_identification: bool, optional
+        If set to true, an 'id' property is added to all records, and foreign
+        key references are assumed to be ids.
    description_property_for_new_records : bool, optional
        Whether objects shall generally have a `description` property in the generated schema.
        Optional, default is False.
@@ -629,6 +653,7 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
    exporter = JsonSchemaExporter(
        additional_properties=additional_properties,
        name_property_for_new_records=name_property_for_new_records,
+        use_id_for_identification=use_id_for_identification,
        description_property_for_new_records=description_property_for_new_records,
        additional_options_for_text_props=additional_options_for_text_props,
        additional_json_schema=additional_json_schema,
@@ -696,8 +721,8 @@ ui_schema : dict, optional


 def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]],
-                  rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None) -> (
-                      Union[dict, Tuple[dict, dict]]):
+                  rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None,
+                  return_data_schema=False) -> (Union[dict, Tuple[dict, dict]]):
    """Merge the given schemata into a single schema.

 The result will look like this:
@@ -728,6 +753,11 @@ rjsf_uischemas : dict[str, dict] | Iterable[dict], optional
  If given, also merge the react-jsonschema-forms from this argument and return as the second return
  value.  If ``schemas`` is a dict, this parameter must also be a dict, if ``schemas`` is only an
  iterable, this paramater must support numerical indexing.
+return_data_schema : bool, default False
+  If set to True, a second schema with all top-level entries wrapped in an
+  array will be returned. This is necessary if the schema describes the
+  data layout of an XLSX file.
+  Cannot be used together with rjsf_uischemas.

 Returns
 -------
@@ -737,10 +767,13 @@ schema : dict

 uischema : dict
  If ``rjsf_uischemas`` was given, this contains the merged UI schemata.
+data_schema : dict
+  If ``return_data_schema`` was given, this contains the XLSX file schema.
    """
    sub_schemas: dict[str, dict] = OrderedDict()
    required = []
    ui_schema = None
+    data_sub_schemas = OrderedDict()

    if isinstance(schemas, dict):
        sub_schemas = schemas
@@ -754,6 +787,8 @@ uischema : dict
        for i, schema in enumerate(schemas, start=1):
            title = schema.get("title", str(i))
            sub_schemas[title] = schema
+            if return_data_schema:
+                data_sub_schemas[title] = {"type": "array", "items": schema}
            required.append(title)
        if rjsf_uischemas is not None:
            if not isinstance(rjsf_uischemas, Sequence):
@@ -771,7 +806,17 @@ uischema : dict
        "additionalProperties": False,
        "$schema": "https://json-schema.org/draft/2020-12/schema",
    }
+    if return_data_schema:
+        data_schema = {
+            "type": "object",
+            "properties": data_sub_schemas,
+            "required": required,
+            "additionalProperties": False,
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+        }

    if ui_schema is not None:
        return result, ui_schema
+    if return_data_schema:
+        return result, data_schema
    return result
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""
+Utilities for automatically exporting and importing data to and from xlsx.
+"""
+
+import json
+import tempfile
+import warnings
+from typing import Union
+from pathlib import Path
+
+import linkahead
+from linkahead.common.models import Container
+from linkahead import execute_query
+with warnings.catch_warnings():
+    warnings.filterwarnings("ignore", message="^.*experimental.*$")
+    from linkahead.high_level_api import convert_to_python_object
+
+from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
+from .table_generator import XLSXTemplateGenerator
+from .fill_xlsx import fill_template
+
+
+def _generate_jsonschema_from_recordtypes(recordtypes: list,
+                                          out_path: Union[str, Path] = None) -> dict:
+    """
+    Generate a combined jsonschema for all given recordtypes.
+
+    Parameters
+    ----------
+    recordtypes : Iterable
+        List of RecordType entities for which a schema should be generated.
+    out_path : str, Path
+        If given, the resulting jsonschema will also be written to the file
+        given by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    data_schema : dict
+        The generated schema.
+    """
+    # Generate schema
+    schema_generator = JsonSchemaExporter(additional_properties=False,
+                                          name_property_for_new_records=True,
+                                          use_id_for_identification=True)
+    schemas = [schema_generator.recordtype_to_json_schema(recordtype)
+               for recordtype in recordtypes]
+    _, data_schema = merge_schemas(schemas, return_data_schema=True)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(data_schema, json_file, ensure_ascii=False, indent=2)
+    # Return
+    return data_schema
+
+
+def _generate_jsondata_from_records(records: Container,
+                                    out_path: Union[str, Path] = None) -> dict:
+    """
+    Extract relevant information (id, name, properties, etc.) from the given
+    records and converts this information to json.
+
+    Parameters
+    ----------
+    records :  Iterable
+        List of Record entities from which the data will be converted to json.
+    out_path : str, Path
+        If given, the resulting jsondata will also be written to the file given
+        by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    json_data : dict
+        The given records data in json form.
+    """
+    json_data = {}
+    # Ignore warning from high_level_api to avoid raising warnings that cannot
+    # be avoided by user
+    for record in records:
+        # Convert records to high level api objects
+        record_obj = convert_to_python_object(record)
+        try:
+            record_obj.resolve_references(True, None)
+        except linkahead.LinkAheadException:
+            warnings.warn(f"Data for record with id {record_obj.id} might be "
+                          f"incomplete, unsuccessful retrieve.")
+        # Get json representation & adjust layout for compatibility
+        raw_data = record_obj.serialize()
+        raw_data.update(raw_data.get('properties', {}))
+        raw_data.pop('properties')
+        if record.parents[0].name not in json_data:
+            json_data[record.parents[0].name] = []
+        json_data[record.parents[0].name].append(raw_data)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str)
+    # Return
+    return json_data
+
+
+def _generate_xlsx_template_file(schema: dict,
+                                 recordtype_names: Union[list, set],
+                                 out_path: Union[str, Path]):
+    """
+    Generate an empty XLSX template file for the given schema at the indicated
+    location.
+
+    Parameters
+    ----------
+    schema : dict
+        Jsonschema for which an xlsx template should be generated.
+    recordtype_names : Iterable
+        List of all RecordType names in the given schema.
+    out_path : str, Path
+        The resulting xlsx template will be written to the file at this path.
+    """
+    generator = XLSXTemplateGenerator()
+    foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names}
+    generator.generate(schema=schema, foreign_keys=foreign_keys,
+                       filepath=out_path)
+
+
+def export_container_to_xlsx(records: Container,
+                             xlsx_data_filepath: Union[str, Path],
+                             include_referenced_entities: bool = False,
+                             jsonschema_filepath: Union[str, Path] = None,
+                             jsondata_filepath: Union[str, Path] = None,
+                             xlsx_template_filepath: Union[str, Path] = None):
+    """
+    Export the data of the given records to an xlsx file.
+
+    Parameters
+    ----------
+    records : Container, Iterable
+        List of records to export.
+    xlsx_data_filepath : str, Path
+        Write the resulting xlsx file to the file at this location.
+    include_referenced_entities : bool
+        If set to true, any records referenced by properties of those given in
+        'records' will also be exported.
+        Optional, default False
+    jsonschema_filepath : str, Path
+        If given, write the jsonschema to this file.
+        Optional, default None
+    jsondata_filepath : str, Path
+        If given, write the json data to this file.
+        Optional, default None
+    xlsx_template_filepath : str, Path
+        If given, write the xlsx template to this file.
+        Optional, default None
+    """
+    # Ensure every record is only handled once by using id as key.
+    entity_ids = {record.id for record in records}
+    # If indicated, also get and add the records referenced on the first level
+    # in the given container
+    if include_referenced_entities:
+        for record in records:
+            for prop in record.properties:
+                if prop.is_reference() and prop.value is not None:
+                    try:
+                        ref_list = prop.value
+                        if not isinstance(ref_list, list):
+                            ref_list = [ref_list]
+                        for element in ref_list:
+                            if isinstance(element, (int, str)):
+                                elem_id = element
+                            elif isinstance(element, linkahead.Entity):
+                                elem_id = element.id
+                            else:
+                                warnings.warn(f"Cannot handle referenced "
+                                              f"entity '{prop.value}'")
+                                continue
+                            entity_ids.add(elem_id)
+                    except linkahead.LinkAheadException as e:
+                        warnings.warn(f"Cannot handle referenced entity "
+                                      f"'{prop.value}' because of error '{e}'")
+    # Retrieve data
+    new_records = []
+    for entity_id in entity_ids:
+        entity_id = str(entity_id).split('@')[0]
+        entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
+        if len(entity.get_parents()) > 0:
+            new_records.append(entity)
+        # ToDo: Handle Files and other Entities (e.g. Properties) separately
+    records = new_records
+    recordtypes = {record.parents[0] for record in records}
+    recordtype_ids = {recordtype.id for recordtype in recordtypes}
+    recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})",
+                                 unique=True)
+                   for rt_id in recordtype_ids]
+    recordtype_names = {recordtype.name for recordtype in recordtypes}
+    # Generate schema and data from the records
+    json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
+                                                        jsonschema_filepath)
+    json_data = _generate_jsondata_from_records(records, jsondata_filepath)
+    # Generate xlsx template with tempfile if necessary
+    if xlsx_template_filepath is None:
+        xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
+        xlsx_template_filepath = xlsx_template_file.name
+    else:
+        xlsx_template_file = None
+    _generate_xlsx_template_file(json_schema, recordtype_names,
+                                 xlsx_template_filepath)
+    # Fill xlsx file with data
+    with warnings.catch_warnings():
+        # We have a lot of information in the json data that we do not need
+        warnings.filterwarnings("ignore",
+                                message="^.*Ignoring path with missing sheet index.*$")
+        warnings.filterwarnings("ignore",
+                                message="^.*No validation schema.*$")
+        fill_template(data=json_data, template=xlsx_template_filepath,
+                      result=xlsx_data_filepath)
+        # ToDo: Validation
+    # Cleanup
+    if xlsx_template_file is not None:
+        xlsx_template_file.close()
--- a/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+++ b/src/caosadvancedtools/table_json_conversion/fill_xlsx.py
@@ -24,6 +24,7 @@ from __future__ import annotations

 import datetime
 import pathlib
+import warnings
 from types import SimpleNamespace
 from typing import Any, Optional, TextIO, Union
 from warnings import warn
@@ -364,7 +365,7 @@ validation_schema: dict, optional
            print(verr.message)
            raise verr
    else:
-        print("No validation schema given, continue at your own risk.")
+        warnings.warn("No validation schema given, continue at your own risk.")

    # Filling the data
    result_wb = load_workbook(template)