#!/bin/python
# Tests for the tool using pytest
# Adapted from check-sfs
# A. Schlemmer, 06/2021

from newcrawler import Crawler
from newcrawler.converters import MarkdownFileConverter
from newcrawler.structure_elements import File, DictTextElement, DictListElement
from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
from functools import partial
from copy import deepcopy
from unittest.mock import MagicMock, Mock
from os.path import join, dirname, basename
import yaml
import caosdb as db
from caosdb.apiutils import compare_entities

import pytest
from pytest import raises


def rfp(*pathcomponents):
    """
    Return full path.
    Shorthand convenience function.
    """
    return join(dirname(__file__), *pathcomponents)


def dircheckstr(*pathcomponents):
    """
    Return the debug tree identifier for a given path.
    """
    return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)


@pytest.fixture
def crawler():
    crawler = Crawler(debug=True)
    crawler.crawl_directory(rfp("test_directories", "examples_article"),
                            rfp("scifolder_cfood.yml"))
    return crawler

@pytest.fixture
def ident(crawler):
    ident = LocalStorageIdentifiableAdapter()
    crawler.identifiableAdapter = ident

    # The records.xml file is constructed as follows:
    # To a full run of the crawler, resolve all identifiables and insert all resulting entities.
    # See: test-setup/datamodel/generate_test_data.py for details.
    ident.restore_state(rfp("records.xml"))

    ident.register_identifiable(
        "Person", db.RecordType()
        .add_parent(name="Person")
        .add_property(name="first_name")
        .add_property(name="last_name"))
    ident.register_identifiable(
        "Measurement", db.RecordType()
        .add_parent(name="Measurement")
        .add_property(name="identifier")
        .add_property(name="date")
        .add_property(name="project"))
    ident.register_identifiable(
        "Project", db.RecordType()
        .add_parent(name="Project")
        .add_property(name="date")
        .add_property(name="identifier"))
    return ident

def test_crawler(crawler):
    subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
    subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
    assert len(subd) == 2
    assert len(subd[0]) == 0
    assert len(subd[1]) == 0
    assert len(subc) == 2
    assert len(subc[0]) == 0
    assert len(subc[1]) == 0

    subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
    subc = crawler.debug_metadata["copied"][dircheckstr(
        "DataAnalysis", "2020_climate-model-predict")]

    assert len(subd[1]) == 1
    assert len(subd[1]["Project"].get_parents()) == 1
    assert subd[1]["Project"].get_parents()[0].name == "Project"
    assert subd[1]["Project"].get_property("date").value == "2020"
    assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"

    assert len(subd[0]) == 3
    assert subd[0]["date"] == "2020"
    assert subd[0]["identifier"] == "climate-model-predict"
    assert subd[0]["Project"].__class__ == db.Record

    # Check the copy flags for the first level in the hierarchy:
    assert len(subc[0]) == 3
    assert len(subc[1]) == 1
    assert subc[1]["Project"] is False
    assert subc[0]["Project"] is False
    assert subc[0]["date"] is False
    assert subc[0]["identifier"] is False

    subd = crawler.debug_tree[dircheckstr("DataAnalysis",
                                          "2020_climate-model-predict",
                                          "2020-02-08_prediction-errors")]
    subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
                                                        "2020_climate-model-predict",
                                                        "2020-02-08_prediction-errors")]
    assert len(subd[0]) == 4
    assert subd[0]["date"] == "2020-02-08"
    assert subd[0]["identifier"] == "prediction-errors"
    assert subd[0]["Project"].__class__ == db.Record
    assert subd[0]["Measurement"].__class__ == db.Record

    assert len(subd[1]) == 2

    assert len(subd[1]["Project"].get_parents()) == 1
    assert subd[1]["Project"].get_parents()[0].name == "Project"
    assert subd[1]["Project"].get_property("date").value == "2020"
    assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"

    assert len(subd[1]["Measurement"].get_parents()) == 1
    assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
    assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
    assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors"
    assert subd[1]["Measurement"].get_property("project").value != "$Project"
    assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
    assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]

    # Check the copy flags for the second level in the hierarchy:
    assert subc[1]["Project"] is True
    assert subc[0]["Project"] is True
    assert subc[1]["Measurement"] is False
    assert subc[0]["Measurement"] is False
    assert subc[0]["date"] is False
    assert subc[0]["identifier"] is False


def test_markdown_converter():
    test_readme = File("README.md", rfp(
        "test_directories", "examples_article", "DataAnalysis",
        "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"))

    converter = MarkdownFileConverter({
        "match": "(.*)"
    }, "TestMarkdownFileConverter")

    m = converter.match(File("test_tool.py", rfp(
        "test_tool.py")))
    assert m is None

    m = converter.match(test_readme)
    assert m is not None
    assert m.__class__ == dict
    assert len(m) == 0

    converter = MarkdownFileConverter({
        "match": "README.md"
    }, "TestMarkdownFileConverter")

    m = converter.match(test_readme)
    assert m is not None
    assert len(m) == 0

    children = converter.create_children(None, test_readme)
    assert len(children) == 5
    assert children[1].__class__ == DictTextElement
    assert children[1].name == "description"
    assert children[1].value.__class__ == str

    assert children[0].__class__ == DictTextElement
    assert children[0].name == "responsible"
    assert children[0].value.__class__ == str

    test_readme2 = File("README.md", rfp("test_directories", "examples_article",
                        "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))

    m = converter.match(test_readme2)
    assert m is not None
    assert len(m) == 0

    children = converter.create_children(None, test_readme2)
    assert len(children) == 2
    assert children[1].__class__ == DictTextElement
    assert children[1].name == "description"
    assert children[1].value.__class__ == str

    assert children[0].__class__ == DictListElement
    assert children[0].name == "responsible"
    assert children[0].value.__class__ == list

# def prepare_test_record_file():
#     ident = LocalStorageIdentifiableAdapter()
#     crawler = Crawler(debug=True, identifiableAdapter=ident)
#     crawler.crawl_directory(rfp("test_directories", "examples_article"),
#                             rfp("scifolder_cfood.yml"))

#     # clean record list:
#     recordlist = ident.get_records()
#     for i in range(len(recordlist)-1, 1, -1):
#         if recordlist[i].parents[0].name == "Person":
#             del recordlist[i]

#     ident.store_state(rfp("records.xml"))


def test_ambigious_records(crawler, ident):
    ident.get_records().clear()
    ident.get_records().extend(crawler.updateList)
    r = ident.get_records()
    id_r0 = ident.get_identifiable(r[0])
    with raises(RuntimeError, match=".*unambigiously.*"):
        ident.retrieve_identified_record(id_r0)


def test_crawler_update_list(crawler, ident):
    # If the following assertions fail, that is a hint, that the test file records.xml is
    # incorrect:
    assert len(ident.get_records()) == 18
    assert len([r for r in ident.get_records() if r.parents[0].name == "Person"]) == 5
    assert len([r for r in ident.get_records() if r.parents[0].name == "Measurement"]) == 11
    assert len([r for r in ident.get_records() if r.parents[0].name == "Project"]) == 2

    # The crawler contains lots of duplicates, because identifiables have not been resolved yet:
    assert len(ident.get_records()) != len(crawler.updateList)

    # Check consistency:
    # Check whether identifiables retrieved from current identifiable store return the same results.

    # take the first person in the list of records:
    for r in ident.get_records():
        if r.parents[0].name == "Person":
            r_cur = r
            break

    id_r0 = ident.get_identifiable(r_cur)
    assert r_cur.parents[0].name == id_r0.parents[0].name
    assert r_cur.get_property("first_name").value == id_r0.get_property("first_name").value
    assert r_cur.get_property("last_name").value == id_r0.get_property("last_name").value
    assert len(r_cur.parents) == 1
    assert len(id_r0.parents) == 1
    assert len(r_cur.properties) == 2
    assert len(id_r0.properties) == 2

    idr_r0_test = ident.retrieve_identified_record(id_r0)
    idr_r0 = ident.retrieve_identifiable(r_cur)
    assert idr_r0 == idr_r0_test

    # take the first measurement in the list of records:
    for r in ident.get_records():
        if r.parents[0].name == "Measurement":
            r_cur = r
            break

    id_r1 = ident.get_identifiable(r_cur)
    assert r_cur.parents[0].name == id_r1.parents[0].name
    assert r_cur.get_property("identifier").value == id_r1.get_property("identifier").value
    assert r_cur.get_property("date").value == id_r1.get_property("date").value
    assert r_cur.get_property("project").value == id_r1.get_property("project").value
    assert len(r_cur.parents) == 1
    assert len(id_r1.parents) == 1
    assert len(r_cur.properties) == 5
    assert len(id_r1.properties) == 3

    idr_r1_test = ident.retrieve_identified_record(id_r1)
    idr_r1 = ident.retrieve_identifiable(r_cur)
    assert idr_r1 == idr_r1_test
    assert idr_r1 != idr_r0
    assert idr_r1_test != idr_r0_test

    assert len(idr_r1.properties) == 5
    assert r_cur.get_property("responsible").value == idr_r1.get_property("responsible").value
    assert r_cur.get_property("description").value == idr_r1.get_property("description").value

    # test whether compare_entites function works in this context:
    comp = compare_entities(r_cur, id_r1)
    assert len(comp[0]["parents"]) == 0
    assert len(comp[1]["parents"]) == 0
    assert len(comp[0]["properties"]) == 2
    assert len(comp[1]["properties"]) == 0
    assert "responsible" in comp[0]["properties"]
    assert "description" in comp[0]["properties"]

    comp = compare_entities(r_cur, idr_r1)
    assert len(comp[0]["parents"]) == 0
    assert len(comp[1]["parents"]) == 0
    assert len(comp[0]["properties"]) == 0
    assert len(comp[1]["properties"]) == 0

    insl, updl = crawler.synchronize()
    assert len(insl) == 0
    assert len(updl) == 0


def test_identifiable_update(crawler, ident):
    # change one value in updateList and then run the synchronization:
    meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0]
    meas.get_property("responsible").value = []
    insl, updl = crawler.synchronize()
    assert len(updl) == 1


def test_identifiable_update2(crawler, ident):
    # change one unit in updateList and then run the synchronization:
    meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"][0]
    meas.get_property("description").unit = "cm"
    insl, updl = crawler.synchronize()
    assert len(updl) == 1


def test_identifiable_update3(crawler, ident):
    # change values of multiple records in updateList and then run the synchronization:
    meas = [r for r in crawler.updateList if r.parents[0].name == "Measurement"]
    meas[0].get_property("responsible").value = []
    meas[3].get_property("responsible").value = []
    insl, updl = crawler.synchronize()
    assert len(updl) == 2


def test_identifiable_adapter():
    query = IdentifiableAdapter.create_query_for_identifiable(
        db.Record().add_parent("Person")
        .add_property("first_name", value="A")
        .add_property("last_name", value="B"))
    assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' "


@pytest.mark.xfail
def test_identifiable_adapter_no_identifiable(crawler, ident):
    del ident._registered_identifiables["Person"]
    insl, updl = crawler.synchronize()
    assert len(updl) == 0

    pers = [r for r in crawler.updateList if r.parents[0].name == "Person"]
    # All persons are inserted, because they are not identifiable:
    assert len(insl) == len(pers)


def test_provenance_debug_data(crawler):
    crawler.save_debug_data(rfp("provenance.yml"))

    with open(rfp("provenance.yml"), "r") as f:
        provenance = yaml.load(f, Loader=yaml.SafeLoader)

    pr = provenance["provenance"]

    def check_key_count(prefix):
        return sum([1 for key in pr.keys() if key.startswith(prefix)])
    assert check_key_count("Measurement") == 11
    assert check_key_count("Project") == 5
    assert check_key_count("Person") == 14


def test_split_into_inserts_and_updates(crawler):
    # Try trivial argument
    crawler.split_into_inserts_and_updates([])

    # simulate remote server content by using the names to identify records
    def base_mocked_lookup(rec, known):
        if rec.name in known:
            return known[rec.name]
        else:
            return None

    cache = []

    def trivial_cache_loockup(stuff):
        print("current cache", cache)
        if stuff.name in cache:
            return stuff
        else:
            return None

    def trivial_cache_add(stuff):
        cache.append(stuff.name)

    crawler.get_identifiable_from_local_cache = Mock(side_effect=trivial_cache_loockup)
    crawler.add_identifiable_to_local_cache = Mock(side_effect=trivial_cache_add)
    crawler.copy_attributes = Mock()

    # a record that is found remotely and should be added to the update list and one that is not
    # found and should be added to the insert one
    remote_known = {"A": db.Record(id=1111, name="A")}
    entlist = [db.Record(name="A"), db.Record(name="B")]
    crawler.identifiableAdapter.retrieve_identifiable = Mock(side_effect=partial(
        base_mocked_lookup, known=remote_known))
    insert, update = crawler.split_into_inserts_and_updates(entlist)
    print(crawler.identifiableAdapter.retrieve_identifiable.call_args_list)
    print(entlist)
    crawler.identifiableAdapter.retrieve_identifiable.assert_any_call(entlist[0])
    crawler.identifiableAdapter.retrieve_identifiable.assert_any_call(entlist[1])
    assert len(insert) == 1
    assert insert[0].name == "B"
    assert len(update) == 1
    assert update[0].name == "A"

    # reset cache
    cache.clear()

    # try it with a reference
    a = db.Record(name="A")
    b = db.Record(name="B")
    b.add_property("A", a)
    entlist = [a, b]
    crawler.identifiableAdapter.retrieve_identifiable = Mock(side_effect=partial(
        base_mocked_lookup, known=remote_known))
    insert, update = crawler.split_into_inserts_and_updates(entlist)
    assert len(insert) == 1
    assert insert[0].name == "B"
    assert len(update) == 1
    assert update[0].name == "A"

    # reset cache
    cache.clear()

    # try circular
    a = db.Record(name="A")
    b = db.Record(name="B")
    b.add_property("A", a)
    a.add_property("B", b)
    entlist = [a, b]
    with raises(RuntimeError):
        crawler.split_into_inserts_and_updates(entlist)