diff --git a/integrationtests/test_crawler_main.py b/integrationtests/test_crawler_main.py new file mode 100644 index 0000000000000000000000000000000000000000..3c0ec57e4b45da2c31569d3a0bb2e683f0c9b6fd --- /dev/null +++ b/integrationtests/test_crawler_main.py @@ -0,0 +1,91 @@ +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# 2024 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import logging + +from pathlib import Path + +import linkahead as db + +from caoscrawler import crawl +from caoscrawler.crawl import (crawler_main, SecurityMode) +from linkahead.utils.register_tests import clear_database, set_test_key + +set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + +INTTESTDIR = Path(__file__).parent + + +def test_list_of_paths(clear_database, monkeypatch): + + # Mock the status record + dummy_status = { + "n_calls": 0 + } + + def _mock_update_status_record(run_id, n_inserts, n_updates, status): + print("Update mocked status") + dummy_status["run_id"] = run_id + dummy_status["n_inserts"] = n_inserts + dummy_status["n_updates"] = n_updates + dummy_status["status"] = status + dummy_status["n_calls"] += 1 + monkeypatch.setattr(crawl, "_update_status_record", _mock_update_status_record) + + # mock SSS environment + monkeypatch.setenv("SHARED_DIR", "/tmp") + + # We need only one dummy RT + rt = db.RecordType(name="TestType").insert() + basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs" + dirlist = [basepath / "dir1", basepath / "dir2"] + crawler_main( + dirlist, + cfood_file_name=basepath / "cfood.yml", + identifiables_definition_file=basepath / "identifiable.yml" + ) + recs = db.execute_query("FIND TestType") + assert len(recs) == 2 + assert "Test1" in [r.name for r in recs] + assert "Test2" in [r.name for r in recs] + + assert dummy_status["n_inserts"] == 2 + assert dummy_status["n_updates"] == 0 + assert dummy_status["status"] == "OK" + assert dummy_status["n_calls"] == 1 + + +def test_not_implemented_list_with_authorization(caplog, clear_database): + + rt = db.RecordType(name="TestType").insert() + basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs" + dirlist = [basepath / "dir1", basepath / "dir2"] + + # This is not implemented yet, so check log for correct error. + crawler_main( + dirlist, + cfood_file_name=basepath / "cfood.yml", + identifiables_definition_file=basepath / "identifiable.yml", + securityMode=SecurityMode.RETRIEVE + ) + err_tuples = [t for t in caplog.record_tuples if t[1] == logging.ERROR] + assert len(err_tuples) == 1 + assert "currently implemented only for single paths, not for lists of paths" in err_tuples[0][2] + # No inserts after the errors + assert len(db.execute_query("FIND TestType")) == 0 diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml b/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..c7f22ce07e9b401915aefde3bf7e3a78d92e2bd6 --- /dev/null +++ b/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml @@ -0,0 +1,10 @@ +--- +metadata: + crawler-version: 0.10.2 +--- +BaseDirElement: + type: Directory + match: ^dir(?P<dir_number>[0-9]+)$$ + records: + TestType: + name: Test$dir_number diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/dir1/.gitkeep b/integrationtests/test_data/crawler_main_with_list_of_dirs/dir1/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/dir2/.gitkeep b/integrationtests/test_data/crawler_main_with_list_of_dirs/dir2/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml b/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml new file mode 100644 index 0000000000000000000000000000000000000000..6d608cece0ae7c2aa6461fb56025a8ac8e4faf6f --- /dev/null +++ b/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml @@ -0,0 +1,2 @@ +TestType: + - name diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index cb1e2e0925dd85b9f6cadf2b56b22aface4bb468..c699e0ab84a0d928c1f84a1b421d97e1b2d848b6 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -1,4 +1,4 @@ -# This file is a part of the CaosDB Project. +# This file is a part of the LinkAhead Project. # # Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> # 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>