test_fill_xlsx.py

# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import json
import os
import re
import tempfile

import jsonschema.exceptions as schema_exc
import pytest
from openpyxl import load_workbook

from caosadvancedtools.table_json_conversion import xlsx_utils
from caosadvancedtools.table_json_conversion.fill_xlsx import fill_template
from caosadvancedtools.table_json_conversion.xlsx_utils import (
    get_row_type_column_index,
    get_path_rows,
)

from .utils import compare_workbooks


def rfp(*pathcomponents):
    """
    Return full path.
    Shorthand convenience function.
    """
    return os.path.join(os.path.dirname(__file__), *pathcomponents)


def fill_and_compare(json_file: str, template_file: str, known_good: str,
                     schema: str = None, custom_output: str = None):
    """Fill the data into a template and compare to a known good.

Parameters:
-----------
schema: str, optional,
  Json schema to validate against.
custom_output: str, optional
  If given, write to this file and drop into an IPython shell.  For development only.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        outfile = os.path.join(tmpdir, 'test.xlsx')
        assert not os.path.exists(outfile)
        if custom_output is not None:
            outfile = custom_output
        fill_template(data=json_file, template=template_file, result=outfile,
                      validation_schema=schema)
        assert os.path.exists(outfile)
        generated = load_workbook(outfile)  # workbook can be read
    known_good_wb = load_workbook(known_good)
    compare_workbooks(generated, known_good_wb)


def test_detect():
    example = load_workbook(rfp("data/simple_template.xlsx"))
    assert 0 == get_row_type_column_index(example['Person'])
    assert [1, 2] == get_path_rows(example['Person'])


def test_temporary():
    # TODO: remove the following after manual testing
    di = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/crawler/schemas'
    dd = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/'
    allreadydone = [
                "Präventionsmaßnahmen",
                "Beratungsstellen",
                "Schutzeinrichtungen",
                "Einzelfallversorgung",
                "Strategiedokumente",
                "Kooperationsvereinbarungen",
                "Gremien",
                "Verwaltungsvorschriften",
                "Gewaltschutzkonzepte und -maßnahmen",
                "Polizeilicher Opferschutz",
                "Feedback",
                ]
    for prefix, _, files in os.walk(dd):
        for fi in files:
            match = re.match(r"(?P<teilb>.*)_2024-.*\.json", fi)

            if match:
                print(match.group('teilb'))
                tb = match.group('teilb')
                if tb in allreadydone:
                    continue
                # allreadydone.append(tb)
                template = os.path.join(di, "template_"+tb+".xlsx")
                schema = os.path.join(di, "schema_"+tb+".json")
                if not os.path.exists(template):
                    print(template)
                    assert False
                jfi = os.path.join(prefix, fi)
                print(jfi)
                if not fi.startswith("Art"):
                    continue
                # if jfi != "/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/data/datenhalterin_gg/he_gg_2/Art__13_Bewusstseinsbildung_2024-01-11T10:22:26.json":
                    # continue
                with open(jfi, encoding="utf-8") as infile:
                    data = json.load(infile)
                    data = data["form_data"]
                    if "__version__" in data:
                        del data["__version__"]
                with tempfile.TemporaryDirectory() as tmpdir:
                    outfile = os.path.join(tmpdir, 'test.xlsx')
                    fill_template(data=data, template=template, result=outfile,
                                  validation_schema=schema)
                    os.system(f'libreoffice {outfile}')


def test_fill_xlsx():
    fill_and_compare(json_file=rfp("data/simple_data.json"),
                     template_file=rfp("data/simple_template.xlsx"),
                     known_good=rfp("data/simple_data.xlsx"),
                     schema=rfp("data/simple_schema.json"))
    fill_and_compare(json_file=rfp("data/multiple_refs_data.json"),
                     template_file=rfp("data/multiple_refs_template.xlsx"),
                     known_good=rfp("data/multiple_refs_data.xlsx"),
                     schema=rfp("data/multiple_refs_schema.json"))
    fill_and_compare(json_file=rfp("data/indirect_data.json"),
                     template_file=rfp("data/indirect_template.xlsx"),
                     known_good=rfp("data/indirect_data.xlsx"),
                     schema=rfp("data/indirect_schema.json"))
    fill_and_compare(json_file=rfp("data/simple_data_ascii_chars.json"),
                     template_file=rfp("data/simple_template.xlsx"),
                     known_good=rfp("data/simple_data_ascii_chars.xlsx"),
                     schema=rfp("data/simple_schema.json"))
    fill_and_compare(json_file=rfp("data/multiple_choice_data.json"),
                     template_file=rfp("data/multiple_choice_template.xlsx"),
                     known_good=rfp("data/multiple_choice_data.xlsx"),
                     schema=rfp("data/multiple_choice_schema.json"))


def test_errors():
    with pytest.raises(AssertionError) as exc:
        fill_and_compare(json_file=rfp("data/error_simple_data.json"),
                         template_file=rfp("data/simple_template.xlsx"),
                         known_good=rfp("data/simple_data.xlsx"))
    assert "Auric\nSteve" in str(exc.value)
    with pytest.raises(schema_exc.ValidationError) as exc:
        fill_and_compare(json_file=rfp("data/error_simple_data.json"),
                         template_file=rfp("data/simple_template.xlsx"),
                         known_good=rfp("data/simple_data.xlsx"),
                         schema=rfp("data/simple_schema.json"))
    assert exc.value.message == "0.5 is not of type 'integer'"


def test_data_schema_generation():
    model_schema = xlsx_utils.read_or_dict(rfp("data/simple_schema.json"))
    array_schema = xlsx_utils.array_schema_from_model_schema(model_schema)
    expected = xlsx_utils.read_or_dict(rfp("data/simple_data_schema.json"))
    assert array_schema == expected