Skip to content
Snippets Groups Projects
Commit f0a56809 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

TST: Add integration test for crawler_main with list of dirs

parent 0cd3c0c1
No related branches found
No related tags found
2 merge requests!217TST: Make NamedTemporaryFiles Windows-compatible,!208ENH: Allow crawler_main to operate on a list of paths
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# 2024 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import logging
from pathlib import Path
import linkahead as db
from caoscrawler import crawl
from caoscrawler.crawl import (crawler_main, SecurityMode)
from linkahead.utils.register_tests import clear_database, set_test_key
set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
INTTESTDIR = Path(__file__).parent
def test_list_of_paths(clear_database, monkeypatch):
# Mock the status record
dummy_status = {
"n_calls": 0
}
def _mock_update_status_record(run_id, n_inserts, n_updates, status):
print("Update mocked status")
dummy_status["run_id"] = run_id
dummy_status["n_inserts"] = n_inserts
dummy_status["n_updates"] = n_updates
dummy_status["status"] = status
dummy_status["n_calls"] += 1
monkeypatch.setattr(crawl, "_update_status_record", _mock_update_status_record)
# mock SSS environment
monkeypatch.setenv("SHARED_DIR", "/tmp")
# We need only one dummy RT
rt = db.RecordType(name="TestType").insert()
basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs"
dirlist = [basepath / "dir1", basepath / "dir2"]
crawler_main(
dirlist,
cfood_file_name=basepath / "cfood.yml",
identifiables_definition_file=basepath / "identifiable.yml"
)
recs = db.execute_query("FIND TestType")
assert len(recs) == 2
assert "Test1" in [r.name for r in recs]
assert "Test2" in [r.name for r in recs]
assert dummy_status["n_inserts"] == 2
assert dummy_status["n_updates"] == 0
assert dummy_status["status"] == "OK"
assert dummy_status["n_calls"] == 1
def test_not_implemented_list_with_authorization(caplog, clear_database):
rt = db.RecordType(name="TestType").insert()
basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs"
dirlist = [basepath / "dir1", basepath / "dir2"]
# This is not implemented yet, so check log for correct error.
crawler_main(
dirlist,
cfood_file_name=basepath / "cfood.yml",
identifiables_definition_file=basepath / "identifiable.yml",
securityMode=SecurityMode.RETRIEVE
)
err_tuples = [t for t in caplog.record_tuples if t[1] == logging.ERROR]
assert len(err_tuples) == 1
assert "currently implemented only for single paths, not for lists of paths" in err_tuples[0][2]
# No inserts after the errors
assert len(db.execute_query("FIND TestType")) == 0
---
metadata:
crawler-version: 0.10.2
---
BaseDirElement:
type: Directory
match: ^dir(?P<dir_number>[0-9]+)$$
records:
TestType:
name: Test$dir_number
TestType:
- name
# This file is a part of the CaosDB Project.
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
# 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment