I. Nüske
--- a/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py 0 → 100644

+ 242

− 0
+++ b/src/caosadvancedtools/table_json_conversion/export_import_xlsx.py 0 → 100644

+ 242

− 0
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""
+Utilities for automatically exporting and importing data to and from xlsx.
+"""
+
+import json
+import tempfile
+import warnings
+import logging
+from typing import Union
+from pathlib import Path
+
+import linkahead
+from linkahead.common.models import Container
+from linkahead import execute_query
+
+from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
+from .table_generator import XLSXTemplateGenerator
+from .fill_xlsx import fill_template
+
+# The high_level_api import would normally warn about the API being
+# experimental. We know this, so suppress the warning.
+logging.disable(logging.WARNING)
+from linkahead.high_level_api import convert_to_python_object   # noqa: E402, pylint: disable=wrong-import-position
+logging.disable(logging.NOTSET)
+
+
+def _generate_jsonschema_from_recordtypes(recordtypes: list,
+                                          out_path: Union[str, Path] = None) -> dict:
+    """
+    Generate a combined jsonschema for all given recordtypes.
+
+    Parameters
+    ----------
+    recordtypes : Iterable
+        List of RecordType entities for which a schema should be generated.
+    out_path : str, Path
+        If given, the resulting jsonschema will also be written to the file
+        given by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    data_schema : dict
+        The generated schema.
+    """
+    # Generate schema
+    schema_generator = JsonSchemaExporter(additional_properties=False,
+                                          name_property_for_new_records=True,
+                                          use_id_for_identification=True)
+    schemas = [schema_generator.recordtype_to_json_schema(recordtype)
+               for recordtype in recordtypes]
+    _, data_schema = merge_schemas(schemas, return_data_schema=True)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(data_schema, json_file, ensure_ascii=False, indent=2)
+    # Return
+    return data_schema
+
+
+def _generate_jsondata_from_records(records: Container,
+                                    out_path: Union[str, Path] = None) -> dict:
+    """
+    Extract relevant information (id, name, properties, etc.) from the given
+    records and converts this information to json.
+
+    Parameters
+    ----------
+    records :  Iterable
+        List of Record entities from which the data will be converted to json.
+    out_path : str, Path
+        If given, the resulting jsondata will also be written to the file given
+        by out_path.
+        Optional, default None
+
+    Returns
+    -------
+    json_data : dict
+        The given records data in json form.
+    """
+    json_data = {}
+    for record in records:
+        # Convert records to high level api objects
+        record_obj = convert_to_python_object(record)
+        try:
+            record_obj.resolve_references(False, None)
+        except linkahead.LinkAheadException:
+            warnings.warn(f"Data for record with id {record_obj.id} might be "
+                          f"incomplete, unsuccessful retrieve.")
+        # Get json representation & adjust layout for compatibility
+        raw_data = record_obj.serialize()
+        raw_data.update(raw_data.get('properties', {}))
+        raw_data.pop('properties')
+        if record.parents[0].name not in json_data:
+            json_data[record.parents[0].name] = []
+        json_data[record.parents[0].name].append(raw_data)
+    # If indicated, save as json file
+    if out_path is not None:
+        with open(out_path, mode="w", encoding="utf8") as json_file:
+            json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str)
+    # Return
+    return json_data
+
+
+def _generate_xlsx_template_file(schema: dict,
+                                 recordtype_names: Union[list, set],
+                                 out_path: Union[str, Path]):
+    """
+    Generate an empty XLSX template file for the given schema at the indicated
+    location.
+
+    Parameters
+    ----------
+    schema : dict
+        Jsonschema for which an xlsx template should be generated.
+    recordtype_names : Iterable
+        List of all RecordType names in the given schema.
+    out_path : str, Path
+        The resulting xlsx template will be written to the file at this path.
+    """
+    generator = XLSXTemplateGenerator()
+    foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names}
+    generator.generate(schema=schema, foreign_keys=foreign_keys,
+                       filepath=out_path)
+
+
+def export_container_to_xlsx(records: Container,
+                             xlsx_data_filepath: Union[str, Path],
+                             include_referenced_entities: bool = False,
+                             jsonschema_filepath: Union[str, Path] = None,
+                             jsondata_filepath: Union[str, Path] = None,
+                             xlsx_template_filepath: Union[str, Path] = None):
+    """
+    Export the data of the given records to an xlsx file.
+
+    Parameters
+    ----------
+    records : Container, Iterable
+        List of records to export.
+    xlsx_data_filepath : str, Path
+        Write the resulting xlsx file to the file at this location.
+    include_referenced_entities : bool
+        If set to true, any records referenced by properties of those given in
+        'records' will also be exported.
+        Optional, default False
+    jsonschema_filepath : str, Path
+        If given, write the jsonschema to this file.
+        Optional, default None
+    jsondata_filepath : str, Path
+        If given, write the json data to this file.
+        Optional, default None
+    xlsx_template_filepath : str, Path
+        If given, write the xlsx template to this file.
+        Optional, default None
+    """
+    # Ensure every record is only handled once by using id as key.
+    entity_ids = {record.id for record in records}
+    # If indicated, also get and add the records referenced on the first level
+    # in the given container
+    if include_referenced_entities:
+        for record in records:
+            for prop in record.properties:
+                if prop.is_reference() and prop.value is not None:
+                    try:
+                        ref_list = prop.value
+                        if not isinstance(ref_list, list):
+                            ref_list = [ref_list]
+                        for element in ref_list:
+                            if isinstance(element, (int, str)):
+                                elem_id = element
+                            elif isinstance(element, linkahead.Entity):
+                                elem_id = element.id
+                            else:
+                                warnings.warn(f"Cannot handle referenced "
+                                              f"entity '{prop.value}'")
+                                continue
+                            entity_ids.add(elem_id)
+                    except linkahead.LinkAheadException as e:
+                        warnings.warn(f"Cannot handle referenced entity "
+                                      f"'{prop.value}' because of error '{e}'")
+    # Retrieve data
+    new_records = []
+    for entity_id in entity_ids:
+        entity_id = str(entity_id).split('@')[0]    # Queries cannot handle version
+        entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
+        # We can currently only handle Entities with a parent, as otherwise we
+        # do not know which sheet they belong in.
+        if len(entity.get_parents()) > 0:
+            new_records.append(entity)
+        # ToDo: Handle Files and other Entities (e.g. Properties) separately
+    records = new_records
+    recordtypes = {record.parents[0] for record in records}
+    recordtype_ids = {recordtype.id for recordtype in recordtypes}
+    recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})",
+                                 unique=True)
+                   for rt_id in recordtype_ids]
+    recordtype_names = {recordtype.name for recordtype in recordtypes}
+    # Generate schema and data from the records
+    json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
+                                                        jsonschema_filepath)
+    json_data = _generate_jsondata_from_records(records, jsondata_filepath)
+    # Generate xlsx template
+    # _generate_xlsx_template_file needs a file name, so use NamedTemporaryFile
+    # ToDo: This might not work on windows, if not, fix _generate file handling
+    if xlsx_template_filepath is None:
+        xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
+        xlsx_template_filepath = xlsx_template_file.name
+    else:
+        xlsx_template_file = None
+    _generate_xlsx_template_file(json_schema, recordtype_names,
+                                 xlsx_template_filepath)
+    # Fill xlsx file with data
+    with warnings.catch_warnings():
+        # We have a lot of information in the json data that we do not need
+        warnings.filterwarnings("ignore",
+                                message="^.*Ignoring path with missing sheet index.*$")
+        warnings.filterwarnings("ignore",
+                                message="^.*No validation schema.*$")
+        fill_template(data=json_data, template=xlsx_template_filepath,
+                      result=xlsx_data_filepath)
+    # Cleanup
+    if xlsx_template_file is not None:
+        xlsx_template_file.close()