Skip to content
Snippets Groups Projects
Commit fff1c8f8 authored by I. Nüske's avatar I. Nüske
Browse files

WIP: Add function to export records to xlsx:

- Added parameter use_id_for_identification to JsonSchemaExporter, which sets foreign key to id and adds an 'id' column to all tables
- Added parameter return_data_schema to merge_schemas. If set, merge_schemas returns a data_schema as well as the normal model_schema

- Added new file export_import_xlsx. In addition to private functions for generating templates, schemas, and data, there is a new public function export_container_to_xlsx which generates an xlsx file containing the data from the given records at a given path

- Changed a print warning in fill_xlsx to warnings.warn for easier filtering
parent a10d0b64
No related branches found
No related tags found
1 merge request!132Draft: Automatic XLSX export
Pipeline #61558 passed
This commit is part of merge request !132. Comments created here will be created in the context of that merge request.
......@@ -70,6 +70,7 @@ class JsonSchemaExporter:
def __init__(self, additional_properties: bool = True,
name_property_for_new_records: bool = False,
use_id_for_identification: bool = False,
description_property_for_new_records: bool = False,
additional_options_for_text_props: dict = None,
additional_json_schema: Dict[str, dict] = None,
......@@ -92,6 +93,9 @@ class JsonSchemaExporter:
name_property_for_new_records : bool, optional
Whether objects shall generally have a `name` property in the generated schema.
Optional, default is False.
use_id_for_identification: bool, optional
If set to true, an 'id' property is added to all records, and
foreign key references are assumed to be ids.
description_property_for_new_records : bool, optional
Whether objects shall generally have a `description` property in the generated schema.
Optional, default is False.
......@@ -151,6 +155,7 @@ class JsonSchemaExporter:
self._additional_properties = additional_properties
self._name_property_for_new_records = name_property_for_new_records
self._use_id_for_identification = use_id_for_identification
self._description_property_for_new_records = description_property_for_new_records
self._additional_options_for_text_props = additional_options_for_text_props
self._additional_json_schema = additional_json_schema
......@@ -257,7 +262,18 @@ ui_schema : dict
if inner_ui_schema:
ui_schema["items"] = inner_ui_schema
elif prop.is_reference():
if prop.datatype == db.REFERENCE:
if self._use_id_for_identification:
json_prop["type"] = "object"
json_prop["required"] = []
json_prop["additionalProperties"] = False
json_prop["title"] = prop.name
if prop.datatype == db.FILE:
json_prop["description"] = "Path to file"
json_prop["properties"] = {"path": {"type": "string"}}
else:
json_prop["properties"] = {
"id": {"oneOf": [{"type": "integer"}, {"type": "string"}]}}
elif prop.datatype == db.REFERENCE:
# No Record creation since no RT is specified and we don't know what
# schema to use, so only enum of all Records and all Files.
values = self._retrieve_enum_values("RECORD") + self._retrieve_enum_values("FILE")
......@@ -410,7 +426,9 @@ ui_schema : dict
vals = []
for val in possible_values:
if val.name:
if self._use_id_for_identification:
vals.append(val.id)
elif val.name:
vals.append(f"{val.name}")
else:
vals.append(f"{val.id}")
......@@ -453,6 +471,8 @@ ui_schema : dict
props = OrderedDict()
if self._name_property_for_new_records:
props["name"] = self._make_text_property("The name of the Record to be created")
if self._use_id_for_identification:
props["id"] = self._make_text_property("The id of the Record")
if self._description_property_for_new_records:
props["description"] = self._make_text_property(
"The description of the Record to be created")
......@@ -544,6 +564,7 @@ guaranteed (as of now).
def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = True,
name_property_for_new_records: bool = False,
use_id_for_identification: bool = False,
description_property_for_new_records: bool = False,
additional_options_for_text_props: Optional[dict] = None,
additional_json_schema: Dict[str, dict] = None,
......@@ -573,6 +594,9 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
name_property_for_new_records : bool, optional
Whether objects shall generally have a `name` property in the generated schema. Optional,
default is False.
use_id_for_identification: bool, optional
If set to true, an 'id' property is added to all records, and foreign
key references are assumed to be ids.
description_property_for_new_records : bool, optional
Whether objects shall generally have a `description` property in the generated schema.
Optional, default is False.
......@@ -629,6 +653,7 @@ def recordtype_to_json_schema(rt: db.RecordType, additional_properties: bool = T
exporter = JsonSchemaExporter(
additional_properties=additional_properties,
name_property_for_new_records=name_property_for_new_records,
use_id_for_identification=use_id_for_identification,
description_property_for_new_records=description_property_for_new_records,
additional_options_for_text_props=additional_options_for_text_props,
additional_json_schema=additional_json_schema,
......@@ -696,8 +721,8 @@ ui_schema : dict, optional
def merge_schemas(schemas: Union[Dict[str, dict], Iterable[dict]],
rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None) -> (
Union[dict, Tuple[dict, dict]]):
rjsf_uischemas: Union[Dict[str, dict], Sequence[dict]] = None,
return_data_schema=False) -> (Union[dict, Tuple[dict, dict]]):
"""Merge the given schemata into a single schema.
The result will look like this:
......@@ -728,6 +753,11 @@ rjsf_uischemas : dict[str, dict] | Iterable[dict], optional
If given, also merge the react-jsonschema-forms from this argument and return as the second return
value. If ``schemas`` is a dict, this parameter must also be a dict, if ``schemas`` is only an
iterable, this paramater must support numerical indexing.
return_data_schema : bool, default False
If set to True, a second schema with all top-level entries wrapped in an
array will be returned. This is necessary if the schema describes the
data layout of an XLSX file.
Cannot be used together with rjsf_uischemas.
Returns
-------
......@@ -737,10 +767,13 @@ schema : dict
uischema : dict
If ``rjsf_uischemas`` was given, this contains the merged UI schemata.
data_schema : dict
If ``return_data_schema`` was given, this contains the XLSX file schema.
"""
sub_schemas: dict[str, dict] = OrderedDict()
required = []
ui_schema = None
data_sub_schemas = OrderedDict()
if isinstance(schemas, dict):
sub_schemas = schemas
......@@ -754,6 +787,8 @@ uischema : dict
for i, schema in enumerate(schemas, start=1):
title = schema.get("title", str(i))
sub_schemas[title] = schema
if return_data_schema:
data_sub_schemas[title] = {"type": "array", "items": schema}
required.append(title)
if rjsf_uischemas is not None:
if not isinstance(rjsf_uischemas, Sequence):
......@@ -771,7 +806,17 @@ uischema : dict
"additionalProperties": False,
"$schema": "https://json-schema.org/draft/2020-12/schema",
}
if return_data_schema:
data_schema = {
"type": "object",
"properties": data_sub_schemas,
"required": required,
"additionalProperties": False,
"$schema": "https://json-schema.org/draft/2020-12/schema",
}
if ui_schema is not None:
return result, ui_schema
if return_data_schema:
return result, data_schema
return result
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2025 Indiscale GmbH <info@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Utilities for automatically exporting and importing data to and from xlsx.
"""
import json
import tempfile
import warnings
from typing import Union
from pathlib import Path
import linkahead
from linkahead.common.models import Container
from linkahead import execute_query
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="^.*experimental.*$")
from linkahead.high_level_api import convert_to_python_object
from ..json_schema_exporter import JsonSchemaExporter, merge_schemas
from .table_generator import XLSXTemplateGenerator
from .fill_xlsx import fill_template
def _generate_jsonschema_from_recordtypes(recordtypes: list,
out_path: Union[str, Path] = None) -> dict:
"""
Generate a combined jsonschema for all given recordtypes.
Parameters
----------
recordtypes : Iterable
List of RecordType entities for which a schema should be generated.
out_path : str, Path
If given, the resulting jsonschema will also be written to the file
given by out_path.
Optional, default None
Returns
-------
data_schema : dict
The generated schema.
"""
# Generate schema
schema_generator = JsonSchemaExporter(additional_properties=False,
name_property_for_new_records=True,
use_id_for_identification=True)
schemas = [schema_generator.recordtype_to_json_schema(recordtype)
for recordtype in recordtypes]
_, data_schema = merge_schemas(schemas, return_data_schema=True)
# If indicated, save as json file
if out_path is not None:
with open(out_path, mode="w", encoding="utf8") as json_file:
json.dump(data_schema, json_file, ensure_ascii=False, indent=2)
# Return
return data_schema
def _generate_jsondata_from_records(records: Container,
out_path: Union[str, Path] = None) -> dict:
"""
Extract relevant information (id, name, properties, etc.) from the given
records and converts this information to json.
Parameters
----------
records : Iterable
List of Record entities from which the data will be converted to json.
out_path : str, Path
If given, the resulting jsondata will also be written to the file given
by out_path.
Optional, default None
Returns
-------
json_data : dict
The given records data in json form.
"""
json_data = {}
# Ignore warning from high_level_api to avoid raising warnings that cannot
# be avoided by user
for record in records:
# Convert records to high level api objects
record_obj = convert_to_python_object(record)
try:
record_obj.resolve_references(True, None)
except linkahead.LinkAheadException:
warnings.warn(f"Data for record with id {record_obj.id} might be "
f"incomplete, unsuccessful retrieve.")
# Get json representation & adjust layout for compatibility
raw_data = record_obj.serialize()
raw_data.update(raw_data.get('properties', {}))
raw_data.pop('properties')
if record.parents[0].name not in json_data:
json_data[record.parents[0].name] = []
json_data[record.parents[0].name].append(raw_data)
# If indicated, save as json file
if out_path is not None:
with open(out_path, mode="w", encoding="utf8") as json_file:
json.dump(json_data, json_file, ensure_ascii=False, indent=2, default=str)
# Return
return json_data
def _generate_xlsx_template_file(schema: dict,
recordtype_names: Union[list, set],
out_path: Union[str, Path]):
"""
Generate an empty XLSX template file for the given schema at the indicated
location.
Parameters
----------
schema : dict
Jsonschema for which an xlsx template should be generated.
recordtype_names : Iterable
List of all RecordType names in the given schema.
out_path : str, Path
The resulting xlsx template will be written to the file at this path.
"""
generator = XLSXTemplateGenerator()
foreign_keys = {name: {"__this__": ['id']} for name in recordtype_names}
generator.generate(schema=schema, foreign_keys=foreign_keys,
filepath=out_path)
def export_container_to_xlsx(records: Container,
xlsx_data_filepath: Union[str, Path],
include_referenced_entities: bool = False,
jsonschema_filepath: Union[str, Path] = None,
jsondata_filepath: Union[str, Path] = None,
xlsx_template_filepath: Union[str, Path] = None):
"""
Export the data of the given records to an xlsx file.
Parameters
----------
records : Container, Iterable
List of records to export.
xlsx_data_filepath : str, Path
Write the resulting xlsx file to the file at this location.
include_referenced_entities : bool
If set to true, any records referenced by properties of those given in
'records' will also be exported.
Optional, default False
jsonschema_filepath : str, Path
If given, write the jsonschema to this file.
Optional, default None
jsondata_filepath : str, Path
If given, write the json data to this file.
Optional, default None
xlsx_template_filepath : str, Path
If given, write the xlsx template to this file.
Optional, default None
"""
# Ensure every record is only handled once by using id as key.
entity_ids = {record.id for record in records}
# If indicated, also get and add the records referenced on the first level
# in the given container
if include_referenced_entities:
for record in records:
for prop in record.properties:
if prop.is_reference() and prop.value is not None:
try:
ref_list = prop.value
if not isinstance(ref_list, list):
ref_list = [ref_list]
for element in ref_list:
if isinstance(element, (int, str)):
elem_id = element
elif isinstance(element, linkahead.Entity):
elem_id = element.id
else:
warnings.warn(f"Cannot handle referenced "
f"entity '{prop.value}'")
continue
entity_ids.add(elem_id)
except linkahead.LinkAheadException as e:
warnings.warn(f"Cannot handle referenced entity "
f"'{prop.value}' because of error '{e}'")
# Retrieve data
new_records = []
for entity_id in entity_ids:
entity_id = str(entity_id).split('@')[0]
entity = execute_query(f"FIND ENTITY WITH (ID = {entity_id})", unique=True)
if len(entity.get_parents()) > 0:
new_records.append(entity)
# ToDo: Handle Files and other Entities (e.g. Properties) separately
records = new_records
recordtypes = {record.parents[0] for record in records}
recordtype_ids = {recordtype.id for recordtype in recordtypes}
recordtypes = [execute_query(f"FIND RECORDTYPE WITH (ID = {rt_id})",
unique=True)
for rt_id in recordtype_ids]
recordtype_names = {recordtype.name for recordtype in recordtypes}
# Generate schema and data from the records
json_schema = _generate_jsonschema_from_recordtypes(recordtypes,
jsonschema_filepath)
json_data = _generate_jsondata_from_records(records, jsondata_filepath)
# Generate xlsx template with tempfile if necessary
if xlsx_template_filepath is None:
xlsx_template_file = tempfile.NamedTemporaryFile(suffix='.xlsx')
xlsx_template_filepath = xlsx_template_file.name
else:
xlsx_template_file = None
_generate_xlsx_template_file(json_schema, recordtype_names,
xlsx_template_filepath)
# Fill xlsx file with data
with warnings.catch_warnings():
# We have a lot of information in the json data that we do not need
warnings.filterwarnings("ignore",
message="^.*Ignoring path with missing sheet index.*$")
warnings.filterwarnings("ignore",
message="^.*No validation schema.*$")
fill_template(data=json_data, template=xlsx_template_filepath,
result=xlsx_data_filepath)
# ToDo: Validation
# Cleanup
if xlsx_template_file is not None:
xlsx_template_file.close()
......@@ -24,6 +24,7 @@ from __future__ import annotations
import datetime
import pathlib
import warnings
from types import SimpleNamespace
from typing import Any, Optional, TextIO, Union
from warnings import warn
......@@ -364,7 +365,7 @@ validation_schema: dict, optional
print(verr.message)
raise verr
else:
print("No validation schema given, continue at your own risk.")
warnings.warn("No validation schema given, continue at your own risk.")
# Filling the data
result_wb = load_workbook(template)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment