#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2021,2022 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2021,2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#

"""
test the converters module
"""
import importlib
import os
import pytest
import yaml

from caoscrawler.converters import (Converter, ConverterValidationError,
                                    DictConverter, DirectoryConverter,
                                    handle_value, MarkdownFileConverter,
                                    JSONFileConverter)
from caoscrawler.crawl import Crawler
from caoscrawler.stores import GeneralStore
from caoscrawler.structure_elements import (File, DictTextElement,
                                            DictListElement, DictElement,
                                            DictBooleanElement, DictDictElement,
                                            DictIntegerElement,
                                            DictFloatElement, Directory)

from test_tool import rfp


@pytest.fixture
def converter_registry():
    converter_registry: dict[str, dict[str, str]] = {
        "Directory": {
            "converter": "DirectoryConverter",
            "package": "caoscrawler.converters"},
        "MarkdownFile": {
            "converter": "MarkdownFileConverter",
            "package": "caoscrawler.converters"},
        "Dict": {
            "converter": "DictConverter",
            "package": "caoscrawler.converters"},
        "DictTextElement": {
            "converter": "DictTextElementConverter",
            "package": "caoscrawler.converters"},
        "DictListElement": {
            "converter": "DictListElementConverter",
            "package": "caoscrawler.converters"},
        "TextElement": {
            "converter": "TextElementConverter",
            "package": "caoscrawler.converters"},
        "JSONFile": {
            "converter": "JSONFileConverter",
            "package": "caoscrawler.converters"},
    }

    for key, value in converter_registry.items():
        module = importlib.import_module(value["package"])
        value["class"] = getattr(module, value["converter"])
    return converter_registry


def testConverterTrivial(converter_registry):

    types = [
        "Directory",
        "MarkdownFile",
        "DictTextElement",
        "DictListElement",
        "TextElement"
    ]

    for ct in types:
        Converter.converter_factory(
            definition={
                "type": ct},
            name="Test",
            converter_registry=converter_registry)


def testDirectoryConverter(converter_registry):
    """ test using the "test_directories" folder"""
    dc = Converter.converter_factory(
        definition={
            "type": "Directory"
        },
        name="Test", converter_registry=converter_registry)
    elements = dc.create_children(GeneralStore(),
                                  Directory("test_directories", rfp("test_directories")))

    # Check whether the right structure elements were created
    # this has been updated, there are more directories now
    # assert len(elements) == 1
    element_names = []
    for element in elements:
        assert isinstance(element, Directory)
        element_names.append(element.name)
    assert "examples_article" in element_names
    assert "example_overwrite_1" in element_names
    assert "example_insert" in element_names


def test_markdown_converter(converter_registry):
    test_readme = File(
        "README.md",
        rfp(
            "test_directories", "examples_article", "DataAnalysis",
            "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"
        )
    )

    converter = MarkdownFileConverter({
        "match": "(.*)"
    }, "TestMarkdownFileConverter",
       converter_registry)

    m = converter.match(File("test_tool.py", rfp(
        "test_tool.py")))
    assert m is None

    m = converter.match(test_readme)
    assert m is not None
    assert m.__class__ == dict
    assert len(m) == 0

    converter = MarkdownFileConverter({
        "match": "README.md"
    }, "TestMarkdownFileConverter",
       converter_registry)

    m = converter.match(test_readme)
    assert m is not None
    assert len(m) == 0

    children = converter.create_children(None, test_readme)
    assert len(children) == 5
    assert children[1].__class__ == DictTextElement
    assert children[1].name == "description"
    assert children[1].value.__class__ == str

    assert children[0].__class__ == DictTextElement
    assert children[0].name == "responsible"
    assert children[0].value.__class__ == str

    test_readme2 = File(
        "README.md",
        rfp("test_directories", "examples_article",
            "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")
    )

    m = converter.match(test_readme2)
    assert m is not None
    assert len(m) == 0

    children = converter.create_children(None, test_readme2)
    assert len(children) == 2
    assert children[1].__class__ == DictTextElement
    assert children[1].name == "description"
    assert children[1].value.__class__ == str

    assert children[0].__class__ == DictListElement
    assert children[0].name == "responsible"
    assert children[0].value.__class__ == list


def test_json_converter(converter_registry):
    test_json = File("testjson.json", rfp(
        "test_directories", "examples_json", "testjson.json"))

    schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               "test_directories", "examples_json", "testjson.schema.json")
    jsonconverter = JSONFileConverter(
        definition={"match": "(.*)", "validate": schema_path},
        name="TestJSONFileConverter",
        converter_registry=converter_registry)

    m = jsonconverter.match(test_json)
    assert m is not None
    assert len(m) == 0

    children = jsonconverter.create_children(None, test_json)
    assert len(children) == 8
    assert children[0].__class__ == DictTextElement
    assert children[0].name == "name"
    assert children[0].value.__class__ == str
    assert children[0].value == "DEMO"

    assert children[1].__class__ == DictIntegerElement
    assert children[1].name == "projectId"
    assert children[1].value.__class__ == int
    assert children[1].value == 10002

    assert children[2].__class__ == DictBooleanElement
    assert children[2].name == "archived"
    assert children[2].value.__class__ == bool

    assert children[3].__class__ == DictListElement
    assert children[3].name == "Person"
    assert children[3].value.__class__ == list
    assert len(children[3].value) == 2

    assert children[4].__class__ == DictTextElement
    assert children[4].name == "start_date"
    assert children[4].value.__class__ == str

    assert children[5].__class__ == DictListElement
    assert children[5].name == "candidates"
    assert children[5].value.__class__ == list
    assert children[5].value == ["Mouse", "Penguine"]

    assert children[6].__class__ == DictFloatElement
    assert children[6].name == "rvalue"
    assert children[6].value.__class__ == float

    assert children[7].__class__ == DictTextElement
    assert children[7].name == "url"
    assert children[7].value.__class__ == str

    broken_json = File(
        "brokenjson.json",
        rfp("test_directories", "examples_json", "brokenjson.json")
    )
    m = jsonconverter.match(broken_json)

    # Doesn't validate because of missing required 'name' property
    with pytest.raises(ConverterValidationError) as err:
        children = jsonconverter.create_children(None, broken_json)

    assert err.value.message.startswith("Couldn't validate")


def test_variable_replacement():
    values = GeneralStore()
    values["a"] = 4
    values["b"] = "68"

    assert handle_value("b", values) == ("b", "single")
    assert handle_value("+b", values) == ("b", "list")
    assert handle_value("*b", values) == ("b", "multiproperty")
    assert handle_value("$b", values) == ("68", "single")
    assert handle_value("+$b", values) == ("68", "list")
    assert handle_value("*$b", values) == ("68", "multiproperty")

    assert handle_value({"value": "b",
                         "collection_mode": "single"}, values) == ("b", "single")
    assert handle_value({"value": "b",
                         "collection_mode": "list"}, values) == ("b", "list")
    assert handle_value({"value": "b",
                         "collection_mode": "multiproperty"}, values) == ("b", "multiproperty")
    assert handle_value({"value": "$b",
                         "collection_mode": "single"}, values) == ("68", "single")
    assert handle_value({"value": "$b",
                         "collection_mode": "list"}, values) == ("68", "list")
    assert handle_value({"value": "$b",
                         "collection_mode": "multiproperty"}, values) == ("68", "multiproperty")

    assert handle_value(["a", "b"], values) == (["a", "b"], "single")
    assert handle_value(["$a", "$b"], values) == (["4", "68"], "single")


def test_filter_children_of_directory(converter_registry):
    """Verify that children (i.e., files) in a directory are filtered or sorted
    correctly.

    """
    test_dir = Directory("examples_filter_children", rfp(
        "test_directories", "examples_filter_children"))

    dc = DirectoryConverter(
        definition={
            "match": "(.*)",
            "filter": {
                "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                "group": "date",
                "rule": "only_max"
            }
        },
        name="TestOnlyMaxDirectoryConverter",
        converter_registry=converter_registry
    )

    m = dc.match(test_dir)
    assert m is not None

    # This should only contain the youngest json and the csv that doesn't match
    # the above filter expression.
    children = dc.create_children(None, test_dir)
    assert len(children) == 2
    assert children[0].__class__ == File
    assert children[0].name == "test_2022-02-02.json"
    assert children[1].__class__ == File
    assert children[1].name == "some_other_file.csv"

    dc = DirectoryConverter(
        definition={
            "match": "(.*)",
            "filter": {
                "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                "group": "date",
                "rule": "only_min"
            }
        },
        name="TestOnlyMinDirectoryConverter",
        converter_registry=converter_registry
    )

    m = dc.match(test_dir)
    assert m is not None

    # This should only contain the youngest json and the csv that doesn't match
    # the above filter expression.
    children = dc.create_children(None, test_dir)
    assert len(children) == 2
    assert children[0].__class__ == File
    assert children[0].name == "test_2022-01-01.json"
    assert children[1].__class__ == File
    assert children[1].name == "some_other_file.csv"

    dc = DirectoryConverter(
        definition={
            "match": "(.*)",
            "filter": {
                "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                "group": "date",
                "rule": "does_not_exist"
            }
        },
        name="TestBrokenDirectoryConverter",
        converter_registry=converter_registry
    )

    m = dc.match(test_dir)
    assert m is not None

    with pytest.raises(RuntimeError):
        children = dc.create_children(None, test_dir)


def test_validate_custom_converters():
    one_doc_yaml = """
Converters:
  MyNewType:
    converter: MyNewTypeConverter
    package: some_package.my_converters
MyElement:
  type: MyNewType
  match: something
    """
    crawler1 = Crawler()
    one_doc_definitions = crawler1._load_definition_from_yaml_dict(
        [yaml.load(one_doc_yaml, Loader=yaml.SafeLoader)])
    assert "MyElement" in one_doc_definitions
    assert one_doc_definitions["MyElement"]["type"] == "MyNewType"

    # this has to be equivalent
    two_doc_yaml = """
---
metadata:
  Converters:
    MyNewType:
      converter: MyNewTypeConverter
      package: some_package.my_converters
---
MyElement:
  type: MyNewType
  match: something
    """
    crawler2 = Crawler()
    two_doc_definitions = crawler2._load_definition_from_yaml_dict(
        list(yaml.safe_load_all(two_doc_yaml)))
    assert "MyElement" in two_doc_definitions
    assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"]
