Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_crawler.py 3.40 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#

"""
test the Crawler class
"""
import json
import os

from pytest import raises

import caosdb as db

from caoscrawler.stores import GeneralStore
from caoscrawler.crawl import Crawler
import warnings

from test_tool import rfp
import pytest


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_constructor():
    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.filterwarnings("ignore")
        warnings.filterwarnings("always", category=DeprecationWarning)

        Crawler(debug=True)
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "The debug argument of the Crawler class" in str(w[-1].message)

        Crawler(generalStore=GeneralStore())
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "The generalStore argument of the Crawler" in str(w[-1].message)


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_deprecated_functions():
    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.filterwarnings("ignore")
        warnings.filterwarnings("always", category=DeprecationWarning)
        cr = Crawler()
        cr.crawl_directory(".", rfp("scifolder_cfood.yml"))
        print(w)
        print(w[0].message)
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "The function crawl_directory in the crawl" in str(w[-1].message)

        cr.start_crawling([], {}, {})
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "The function start_crawling in the crawl module" in str(w[-1].message)

        cr.crawled_data
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "The use of self.crawled_data is depricated" in str(w[-1].message)


def test_check_whether_parent_exists():
    trivial_result = Crawler.check_whether_parent_exists([], [])
    assert len(trivial_result) == 0
    assert isinstance(trivial_result, list)

    trivial_result2 = Crawler.check_whether_parent_exists([db.Record(), db.Record()], [])
    assert len(trivial_result) == 0
    assert isinstance(trivial_result, list)

    # make sure records with parent is collected
    a_recs = Crawler.check_whether_parent_exists(
        [
            db.Record(id=1).add_parent("A"),
            db.Record(id=2).add_parent("B"),
            db.Record(id=3).add_parent("B"),
            db.Record(id=4).add_parent("A"),
        ], ["A"])
    a_recs_ids = [el.id for el in a_recs]
    assert 1 in a_recs_ids
    assert 4 in a_recs_ids