diff --git a/CHANGELOG.md b/CHANGELOG.md index 686a0b7eaddf0fb8d58954c8d8fcd93695e7f054..f9f35aa2954650e660fb144448c434830f34fd8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### - XLSX export with lists of files works again. This was broken in the enum guessing. +- XLSX import with multiple rows of multiple choice questions. +- XLSX import with empty enum references. ### Security ### diff --git a/integrationtests/test_profile/custom/caosdb-server/caosdb-webui/build.properties.d/50.import-xlsx.conf b/integrationtests/test_profile/custom/caosdb-server/caosdb-webui/build.properties.d/50.import-xlsx.conf new file mode 100644 index 0000000000000000000000000000000000000000..4bb9691cd01005e16e1c0d59ebf8fd0c15948ac8 --- /dev/null +++ b/integrationtests/test_profile/custom/caosdb-server/caosdb-webui/build.properties.d/50.import-xlsx.conf @@ -0,0 +1,25 @@ +# Custom configuration for the LinkAhead Web Interface. +# +# This file will be sourced during the `make install` and `make test` builds +# and must comply with BASH. +# +# All variables with the format "BUILD_[A-Z_]" will replace any variable +# reference of the same name in the source files during the build. +# +# E.g. all occurences of "${BUILD_NAVBAR_LOGO}" will be replaced by the value +# defined below. +# +# Each variable should be described in this file. However, the exact semantics +# depend on the source files and cannot guaranteed here. +# +# Note: The variable BUILD_NUMBER is special and should not be used. It will be +# overridden in the makefile in any case. + +############################################################################## +# Modules enabled/disabled by defaulto +############################################################################## +# A button that allows to download query results as XLSX. +BUILD_MODULE_EXT_EXPORT_TO_XLSX=ENABLED + +# Uploading of XLSX files to insert or update content. Needs the crawler installed. +BUILD_MODULE_EXT_IMPORT_FROM_XLSX=ENABLED diff --git a/integrationtests/test_profile/custom/caosdb-server/scripting/home/.pylinkahead.ini b/integrationtests/test_profile/custom/caosdb-server/scripting/home/.pylinkahead.ini new file mode 100644 index 0000000000000000000000000000000000000000..10169aa11c7854cb3df3c70f08889b36cce7e0b2 --- /dev/null +++ b/integrationtests/test_profile/custom/caosdb-server/scripting/home/.pylinkahead.ini @@ -0,0 +1,10 @@ +[Connection] +# url=https://demo.indiscale.com/ +url=https://localhost:10443/ + +cacert=/opt/caosdb/cert/caosdb.cert.pem + +## If this option is set, the SSL certificate will be ignored. Use with care! +# ssl_insecure=1 + +timeout=1200 diff --git a/integrationtests/test_profile/profile.yaml b/integrationtests/test_profile/profile.yaml index 82f95b112ba040b91c960bac901d3eb9bf7b1085..5dc72b317acd02aecfd319d15f6a91bbebbcd046 100644 --- a/integrationtests/test_profile/profile.yaml +++ b/integrationtests/test_profile/profile.yaml @@ -23,6 +23,12 @@ default: _CAOSDB_INTEGRATION_TEST_SUITE_KEY: "_CAOSDB_ADV_TEST_SUITE" TRANSACTION_BENCHMARK_ENABLED: "TRUE" # SERVER_SIDE_SCRIPTING_BIN_DIRS: "" + server_side_scripting_bin_dirs: ./caosdb-webui/sss_bin,./scripting/bin + scripting: + packages: + crawler: + mode: "pip" + package: "caoscrawler" # Development configuration options # devel: diff --git a/integrationtests/test_xlsx_export_import_xlsx.py b/integrationtests/test_xlsx_export_import_xlsx.py index 9b7d1b2963a1c33a6a86b3b037e2fc5e05de158d..8adf3bc5ebda13652d46a74eebdb46feb3cf23b8 100755 --- a/integrationtests/test_xlsx_export_import_xlsx.py +++ b/integrationtests/test_xlsx_export_import_xlsx.py @@ -24,6 +24,7 @@ Data is partly reused from the unit tests. """ import json +import logging import os import sys import pytest @@ -31,6 +32,7 @@ from datetime import datetime from pathlib import Path from linkahead.cached import cache_clear +from linkahead import high_level_api as hl from linkahead.utils.register_tests import clear_database, set_test_key from openpyxl import load_workbook @@ -44,6 +46,8 @@ from caosadvancedtools.tools.test import ( purge_from_json, ) +logger = logging.getLogger(__name__) + set_test_key("_CAOSDB_ADV_TEST_SUITE") @@ -98,19 +102,18 @@ def _insert_multiple_choice_data(): with open(json_data_file, encoding="utf-8") as myfile: json_data = json.load(myfile) - skills = [] - for skillname in ["Planning", "Evaluation"]: - skills.append(db.Record(skillname).retrieve()) - records = db.Container() - training_data = json_data["Training"][0] + for training_data in json_data["Training"]: - rec_training = db.Record(name=training_data["name"]).add_parent(db.RecordType("Training")) - rec_training.add_property("date", datetime.fromisoformat(training_data["date"])) - rec_training.add_property("skills", skills) - rec_training.add_property("exam_types", []) + rec_training = db.Record(name=training_data["name"]).add_parent(db.RecordType("Training")) + rec_training.add_property("date", datetime.fromisoformat(training_data["date"])) + skills = [] + for skillname in training_data["skills"]: + skills.append(db.Record(skillname).retrieve()) + rec_training.add_property("skills", skills) + rec_training.add_property("exam_types", []) - records.append(rec_training) + records.append(rec_training) records.insert() @@ -214,6 +217,20 @@ def _insert_multiple_refs_data(): assert dt_value == datetime.fromisoformat(json_data["Training"][0]["date"]) +def _insert_enum_data(): + """Insert the data from `enum_data.json`. + """ + json_data_file = rfp_unittest_data("enum_data.json") + with open(json_data_file, encoding="utf-8") as myfile: + json_data = json.load(myfile) + + for data in json_data["Training"]: + rec = db.Record().add_parent("Training") + for prop in ["title", "Skill"]: + rec.add_property(prop, value=data[prop]) + rec.insert() + + @pytest.fixture(autouse=True) def clear_cache(): """Clear the LinkAhead cache.""" @@ -415,21 +432,34 @@ def test_multiple_choice(tmpdir, clear_database): # For the moment: just check a few samples assert filled_generated.sheetnames == ['Training'] sheet_training = filled_generated["Training"] - assert sheet_training.max_row == 7 + assert sheet_training.max_row == 8 assert sheet_training.max_column == 9 + + # Row 7 assert sheet_training["E7"].value == "x" assert sheet_training["F7"].value is None assert sheet_training["G7"].value == "x" assert sheet_training["H7"].value is None assert sheet_training["I7"].value is None + # Row 8 + assert str(sheet_training["D8"].value).startswith("2028-01-17") + assert sheet_training["E8"].value is None + assert sheet_training["F8"].value == "x" + assert sheet_training["G8"].value == "x" + # Check: reimport imported_dict = convert.to_dict(xlsx=str(tmpdir/"result.xlsx"), schema=str(tmpdir/"schema.json"), collect_errors=False) records = convert.records_from_json(imported_dict, schema=str(tmpdir/"schema.json")) - skill_values = records[0].get_property("skills").value - assert [db.get_entity_by_id(val).name for val in skill_values] == ["Planning", "Evaluation"] + skills = { + 0: ["Planning", "Evaluation"], + 1: ["Communication", "Evaluation"], + } + for idx, skill_values_expected in skills.items(): + skill_values = records[idx].get_property("skills").value + assert [db.get_entity_by_id(val).name for val in skill_values] == skill_values_expected def test_export_list_refs(tmpdir, clear_database): @@ -553,3 +583,139 @@ def test_file_references(tmpdir, clear_database): data = json.load(data_file) assert purge_from_json(data, ["id"]) == { 'Experiment': [{'name': None, 'comment': 'Hello world!'}]} + + +def test_empty_enum(tmpdir, clear_database): + """Empty enum reference. + """ + tmpdir = Path(tmpdir) + # 1, Data model + _create_datamodel(rfp_unittest_data("enum_model.yaml")) + # 1.1 Enums + for name in ["Communication", "Evaluation"]: + rec = db.Record(name=name).add_parent("Skill").insert() + logger.debug(rec) + + # 2. Data + _insert_enum_data() + + # 3. Retrieve and export + records = db.execute_query("FIND Training") + export_import_xlsx.export_container_to_xlsx(records=records, + include_referenced_entities=True, + xlsx_data_filepath=tmpdir / "result.xlsx", + jsonschema_filepath=tmpdir / "schema.json", + jsondata_filepath=tmpdir / "data.json", + xlsx_template_filepath=tmpdir / "template.xlsx", + ) + with open(tmpdir / "data.json", encoding="utf-8") as data_file: + data = json.load(data_file) + assert purge_from_json(data, ["id"]) == {'Training': [ + {'name': None, 'title': 'Marketing', 'Skill': {'name': 'Communication'}}, + {'name': None, 'title': 'Vacation', 'Skill': None} + ]} + + # 4. Reimport + + imported_dict = convert.to_dict(xlsx=str(tmpdir/"result.xlsx"), + schema=str(tmpdir/"schema.json"), + collect_errors=False) + records = convert.records_from_json(imported_dict, schema=str(tmpdir/"schema.json")) + + +def test_empty_enum_direct(tmpdir, clear_database): + """Empty enum reference, testing the function directly. + """ + tmpdir = Path(tmpdir) + # 1, Data model + _create_datamodel(rfp_unittest_data("enum_model.yaml")) + prop_multiskill = db.Property("MultiSkill", datatype="LIST<Skill>").insert() + rt_training = db.RecordType("Training").retrieve() + rt_training.add_property(prop_multiskill).update() + # 1.1 Enums + for name in ["Communication", "Evaluation"]: + rec = db.Record(name=name).add_parent("Skill").insert() + logger.debug(rec) + + # 2. Data + _insert_enum_data() + + # 3. Try to convert jsons + # Commented code: to get schema + # records = db.execute_query("FIND Training") + # export_import_xlsx.export_container_to_xlsx(records=records, + # include_referenced_entities=True, + # xlsx_data_filepath=tmpdir / "result.xlsx", + # jsonschema_filepath=tmpdir / "schema.json", + # jsondata_filepath=tmpdir / "data.json", + # xlsx_template_filepath=tmpdir / "template.xlsx", + # ) + # with open(tmpdir / "schema.json", encoding="utf-8") as data_file: + # schema = json.load(data_file) + schema = {'type': 'object', + 'properties': + {'Training': + {'type': 'object', + 'required': [], + 'additionalProperties': False, + 'title': 'Training', + 'properties': {'id': {'type': 'string'}, + 'name': {'type': 'string', + 'description': 'The name of the Record to be created'}, + 'title': {'type': 'string'}, + 'Skill': {'description': 'A skills that is trained.', + 'enum': ['Communication', 'Evaluation']}, + 'MultiSkill': {'type': 'array', + 'items': {'enum': ['Communication', 'Evaluation']}, + 'uniqueItems': True}}, + '$schema': 'https://json-schema.org/draft/2020-12/schema'}}, + 'required': ['Training'], + 'additionalProperties': False, + '$schema': 'https://json-schema.org/draft/2020-12/schema', + } + + data_new = {"Training": [ + { + "id": -1, + "name": "New training 1", + "title": "No skills", + "Skill": None, + "MultiSkill": ["Communication"] + }, + { + "id": -2, + "name": "New training 2", + "title": "No MultiSkills", + "Skill": "Evaluation", + "MultiSkill": [None] + }, + { + "id": -3, + "name": "New training 3", + "title": "Some skills", + "MultiSkill": ["Communication", None] + }, + ]} + + new_records = convert.records_from_json(data=data_new, schema=schema) + new_records.insert() + + json_0 = hl.convert_to_python_object(new_records[0], resolve_references=True + ).serialize(plain_json=True) + json_1 = hl.convert_to_python_object(new_records[1], resolve_references=True + ).serialize(plain_json=True) + json_2 = hl.convert_to_python_object(new_records[2], resolve_references=True + ).serialize(plain_json=True) + + assert json_0["Skill"] is None + assert len(json_0["MultiSkill"]) == 1 + assert json_0["MultiSkill"][0]["name"] == "Communication" + + assert json_1["Skill"]["name"] == "Evaluation" + assert len(json_1["MultiSkill"]) == 1 + assert json_1["MultiSkill"][0] is None + + assert "Skill" not in json_2 + assert len(json_2["MultiSkill"]) == 2 + assert json_2["MultiSkill"][0]["name"] == "Communication" + assert json_2["MultiSkill"][1] is None diff --git a/src/caosadvancedtools/table_json_conversion/convert.py b/src/caosadvancedtools/table_json_conversion/convert.py index 79818c1fe7fe8311ef7d41a2cb940e169a3ff8eb..eda13b81e5d521569e1a92e6117fa70462865976 100644 --- a/src/caosadvancedtools/table_json_conversion/convert.py +++ b/src/caosadvancedtools/table_json_conversion/convert.py @@ -457,6 +457,8 @@ class XLSXConverter: path = data_column_paths[col_idx] col_names[col_idx] = '.'.join(path) if self._is_multiple_choice(path): + # `path` will be modified, so we need a copy. + path = path.copy() real_value = path.pop() # Last component is enum value, insert above # set up list try: @@ -557,7 +559,10 @@ class XLSXConverter: subschema = self._get_subschema(path) # Array handling only if schema says it's an array. if subschema.get("type") == "array": - array_type = subschema["items"]["type"] + array_type = subschema["items"].get("type") + if array_type is None: + raise KeyError("Array without 'type' in schema's items:\n" + + str(subschema["items"])) # Convert type "[foo, null]" -> "foo" if isinstance(array_type, list): if len(array_type) == 2 and array_type[-1] == "null": @@ -844,7 +849,7 @@ def deep_record_from_json(recdata: dict, schema: dict, parent: str, all_refs.append(referenced_cont[0]) result.extend(referenced_cont) else: - if "enum" in subschema: + if "enum" in subschema and element: enum_id = _get_enum_id(propname=key, enumvalue=element) all_refs.append(enum_id) else: @@ -862,7 +867,7 @@ def deep_record_from_json(recdata: dict, schema: dict, parent: str, if key == "file": raise RuntimeError("This should not be reached if 'file' is part of the dict.") value = prop - if "enum" in subschema: + if "enum" in subschema and value: value = _get_enum_id(propname=key, enumvalue=prop) rec.add_property(name=key, value=value) return result diff --git a/src/caosadvancedtools/table_json_conversion/table_generator.py b/src/caosadvancedtools/table_json_conversion/table_generator.py index 17ed5dac8d4e3a48190ebb58901feb744853fea2..903726983452e275656a5fe9a7ca54951fc0c1df 100644 --- a/src/caosadvancedtools/table_json_conversion/table_generator.py +++ b/src/caosadvancedtools/table_json_conversion/table_generator.py @@ -38,8 +38,12 @@ from openpyxl.workbook.child import INVALID_TITLE_REGEX from .xlsx_utils import p2s, ColumnType, RowType +class EmptySchemaException(RuntimeError): + """Exception for when a schema is empty, so that no meaningful table can be generated.""" + + class TableTemplateGenerator(ABC): - """ base class for generating tables from json schema """ + """Base class for generating tables from json schema """ def __init__(self): pass @@ -330,6 +334,8 @@ class XLSXTemplateGenerator(TableTemplateGenerator): """ sheets = self._generate_sheets_from_schema(schema, foreign_keys, use_ids_as_foreign=use_ids_as_foreign) + if not sheets: + raise EmptySchemaException("No sheets generated, schema is too empty.") wb = self._create_workbook_from_sheets_def(sheets) if not isinstance(filepath, Path): filepath = Path(filepath) diff --git a/unittests/table_json_conversion/data/enum_data.json b/unittests/table_json_conversion/data/enum_data.json new file mode 100644 index 0000000000000000000000000000000000000000..e03f77b731bfb89207948a7b98cd7854264bab58 --- /dev/null +++ b/unittests/table_json_conversion/data/enum_data.json @@ -0,0 +1,12 @@ +{ + "Training": [ + { + "title": "Marketing", + "Skill": "Communication" + }, + { + "title": "Vacation", + "Skill": null + } + ] +} diff --git a/unittests/table_json_conversion/data/enum_model.yaml b/unittests/table_json_conversion/data/enum_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bddbb902e5884a65ebf80bd87ebe76da939837d8 --- /dev/null +++ b/unittests/table_json_conversion/data/enum_model.yaml @@ -0,0 +1,9 @@ +Training: + recommended_properties: + title: + datatype: TEXT + Skill: + description: A skills that is trained. + # enums: + # - Communication + # - Evaluation diff --git a/unittests/table_json_conversion/data/multiple_choice_data.json b/unittests/table_json_conversion/data/multiple_choice_data.json index ee24ef7adbd61abf22d47bb3d49f43f3e1e26501..450499ce85c02a8365260f5dd838afc7d22acf77 100644 --- a/unittests/table_json_conversion/data/multiple_choice_data.json +++ b/unittests/table_json_conversion/data/multiple_choice_data.json @@ -1,11 +1,23 @@ { - "Training": [{ - "name": "Super Skill Training", - "date": "2024-04-17", - "skills": [ - "Planning", - "Evaluation" - ], - "exam_types": [] - }] + "Training": [ + { + "name": "Super Skill Training", + "date": "2024-04-17", + "skills": [ + "Planning", + "Evaluation" + ], + "exam_types": [] + }, + { + "name": "Normal Skill Training", + "date": "2028-01-17", + "skills": [ + "Communication", + "Evaluation" + ], + "exam_types": [] + } + + ] } diff --git a/unittests/table_json_conversion/data/multiple_choice_data.xlsx b/unittests/table_json_conversion/data/multiple_choice_data.xlsx index 28cf4007d8a1a061235863d12e5bdc5b5747f386..14849db49a40ab99244fd4f58bbf1ec78247ab22 100644 Binary files a/unittests/table_json_conversion/data/multiple_choice_data.xlsx and b/unittests/table_json_conversion/data/multiple_choice_data.xlsx differ diff --git a/unittests/table_json_conversion/data/multiple_choice_retrieved_data.json b/unittests/table_json_conversion/data/multiple_choice_retrieved_data.json index 78969122f012b84e545e46855375ac5ef9f5bd62..c31c2561d86e8719ba876af5f1470586f25e7b1a 100644 --- a/unittests/table_json_conversion/data/multiple_choice_retrieved_data.json +++ b/unittests/table_json_conversion/data/multiple_choice_retrieved_data.json @@ -12,6 +12,15 @@ } ], "exam_types": null + }, + { + "name": "Normal Skill Training", + "date": "2028-01-17 00:00:00-05:00", + "skills": [ + {"name": "Communication"}, + {"name": "Evaluation"} + ], + "exam_types": null } ] }