Skip to content
Snippets Groups Projects

Draft: Automatic XLSX export

Open I. Nüske requested to merge f-enh-143-automatic-xlsx-exporting into dev
12 unresolved threads
1 file
+ 1
1
Compare changes
  • Side-by-side
  • Inline
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Utilities for automatically exporting and importing data to and from xlsx.
"""
import json
import tempfile
import warnings
import logging
from typing import Union
from pathlib import Path
import linkahead
from linkahead.common.models import Container
from linkahead import execute_query
from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
from .table_generator import XLSXTemplateGenerator
from .fill_xlsx import fill_template
# The high_level_api import would normally warn about the API being
# experimental. We know this, so suppress the warning.
logging.disable(logging.WARNING)
from linkahead.high_level_api import convert_to_python_object # noqa: E402, pylint: disable=wrong-import-position
logging.disable(logging.NOTSET)
def _generate_jsonschema_from_recordtypes(recordtypes: list,
out_path: Union[str, Path] = None) -> dict:
"""
Generate a combined jsonschema for all given recordtypes.
Parameters
----------
recordtypes : Iterable
List of RecordType entities for which a schema should be generated.
out_path : str, Path
If given, the resulting jsonschema will also be written to the file
given by out_path.
Optional, default None
Returns
-------
data_schema : dict
The generated schema.
"""
# Generate schema
schema_generator = JsonSchemaExporter(additional_properties=False,
name_property_for_new_records=True,
use_id_for_identification=True)
schemas = [schema_generator.recordtype_to_json_schema(recordtype)
for recordtype in recordtypes]
_, data_schema = merge_schemas(schemas, return_data_schema=True)
# If indicated, save as json file
if out_path is not None:
with open(out_path, mode="w", encoding="utf8") as json_file:
json.dump(data_schema, json_file, ensure_ascii=False, indent=2)
# Return
return data_schema
def _generate_jsondata_from_records(records: Container,
out_path: Union[str, Path] = None) -> dict:
"""
Extract relevant information (id, name, properties, etc.) from the given
records and converts this information to json.
Parameters
----------
records : Iterable
List of Record entities from which the data will be converted to json.
out_path : str, Path
If given, the resulting jsondata will also be written to the file given
by out_path.
Optional, default None
Returns
-------
json_data : dict
The given records data in json form.
"""
json_data = {}
for record in records:
# Convert records to high level api objects
record_obj = convert_to_python_object(record)
try:
record_obj.resolve_references(False, None)
except linkahead.LinkAheadException:
warnings.warn(f"Data for record with id {record_obj.id} might be "
f"incomplete, unsuccessful retrieve.")
# Get json representation & adjust layout for compatibility
raw_data = record_obj.serialize()
raw_data.update(raw_data.get('properties', {}))
raw_data.pop('properties')
    • Comment on lines +105 to +106
      Author Maintainer

      The properties need to be top-level for the xlsx template exporter

Please register or sign in to reply
if record.parents[0].name not in json_data:
json_data[record.parents[0].name] = []
json_data[record.parents[0].name].append(raw_data)
# If indicated, save as json file
if out_path is not None:
with open(out_path, mode="w", encoding="utf8") as json_file:
json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str)
# Return
return json_data
def _generate_xlsx_template_file(schema: dict,
recordtype_names: Union[list, set],
out_path: Union[str, Path]):
"""
Generate an empty XLSX template file for the given schema at the indicated
location.
Parameters
----------
schema : dict
Jsonschema for which an xlsx template should be generated.
recordtype_names : Iterable
List of all RecordType names in the given schema.
out_path : str, Path
The resulting xlsx template will be written to the file at this path.
"""
generator = XLSXTemplateGenerator()
foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names}
generator.generate(schema=schema, foreign_keys=foreign_keys,
filepath=out_path)
def export_container_to_xlsx(records: Container,
xlsx_data_filepath: Union[str, Path],
include_referenced_entities: bool = False,
jsonschema_filepath: Union[str, Path] = None,
jsondata_filepath: Union[str, Path] = None,
xlsx_template_filepath: Union[str, Path] = None):
"""
Export the data of the given records to an xlsx file.
Parameters
----------
records : Container, Iterable
List of records to export.
xlsx_data_filepath : str, Path
Write the resulting xlsx file to the file at this location.
include_referenced_entities : bool
If set to true, any records referenced by properties of those given in
'records' will also be exported.
Optional, default False
jsonschema_filepath : str, Path
If given, write the jsonschema to this file.
Optional, default None
jsondata_filepath : str, Path
If given, write the json data to this file.
Optional, default None
xlsx_template_filepath : str, Path
If given, write the xlsx template to this file.
Optional, default None
"""
# Ensure every record is only handled once by using id as key.
entity_ids = {record.id for record in records}
# If indicated, also get and add the records referenced on the first level
# in the given container
if include_referenced_entities:
for record in records:
for prop in record.properties:
if prop.is_reference() and prop.value is not None:
try:
ref_list = prop.value
if not isinstance(ref_list, list):
ref_list = [ref_list]
for element in ref_list:
if isinstance(element, (int, str)):
elem_id = element
elif isinstance(element, linkahead.Entity):
elem_id = element.id
else:
warnings.warn(f"Cannot handle referenced "
f"entity '{prop.value}'")
continue
entity_ids.add(elem_id)
except linkahead.LinkAheadException as e:
warnings.warn(f"Cannot handle referenced entity "
f"'{prop.value}' because of error '{e}'")
# Retrieve data
new_records = []
for entity_id in entity_ids:
entity_id = str(entity_id).split('@')[0] # Queries cannot handle version
    • Author Maintainer

      FIND fails on version-pinned references (e.g. "FIND ENTITY WITH (ID = 114@f2h6dEr56)"), so we need to strip the version. This is not ideal, as this does remove some information that might be expected to be there, but I did not find a time-efficient workaround.

Please register or sign in to reply
entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
# We can currently only handle Entities with a parent, as otherwise we
# do not know which sheet they belong in.
if len(entity.get_parents()) > 0:
new_records.append(entity)
# ToDo: Handle Files and other Entities (e.g. Properties) separately
records = new_records
recordtypes = {record.parents[0] for record in records}
recordtype_ids = {recordtype.id for recordtype in recordtypes}
recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})",
unique=True)
for rt_id in recordtype_ids]
recordtype_names = {recordtype.name for recordtype in recordtypes}
# Generate schema and data from the records
json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
jsonschema_filepath)
json_data = _generate_jsondata_from_records(records, jsondata_filepath)
# Generate xlsx template
# _generate_xlsx_template_file needs a file name, so use NamedTemporaryFile
# ToDo: This might not work on windows, if not, fix _generate file handling
if xlsx_template_filepath is None:
xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
xlsx_template_filepath = xlsx_template_file.name
else:
xlsx_template_file = None
_generate_xlsx_template_file(json_schema, recordtype_names,
xlsx_template_filepath)
    • Comment on lines +215 to +224
      Author Maintainer

      I do not have a Windows VM available to test this. If it does not work, the easiest workaround would probably be to change _generate_xlsx_template_file to also work with file-like objects and never write to disk.

Please register or sign in to reply
# Fill xlsx file with data
with warnings.catch_warnings():
# We have a lot of information in the json data that we do not need
warnings.filterwarnings("ignore",
message="^.*Ignoring path with missing sheet index.*$")
warnings.filterwarnings("ignore",
message="^.*No validation schema.*$")
fill_template(data=json_data, template=xlsx_template_filepath,
result=xlsx_data_filepath)
# Cleanup
if xlsx_template_file is not None:
xlsx_template_file.close()
Loading