From f0a56809211896b0387ffd0b656d5668b85d6466 Mon Sep 17 00:00:00 2001 From: Florian Spreckelsen <f.spreckelsen@indiscale.com> Date: Thu, 5 Dec 2024 18:39:21 +0100 Subject: [PATCH] TST: Add integration test for crawler_main with list of dirs --- integrationtests/test_crawler_main.py | 91 +++++++++++++++++++ .../crawler_main_with_list_of_dirs/cfood.yml | 10 ++ .../dir1/.gitkeep | 0 .../dir2/.gitkeep | 0 .../identifiable.yml | 2 + integrationtests/test_issues.py | 2 +- 6 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 integrationtests/test_crawler_main.py create mode 100644 integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml create mode 100644 integrationtests/test_data/crawler_main_with_list_of_dirs/dir1/.gitkeep create mode 100644 integrationtests/test_data/crawler_main_with_list_of_dirs/dir2/.gitkeep create mode 100644 integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml diff --git a/integrationtests/test_crawler_main.py b/integrationtests/test_crawler_main.py new file mode 100644 index 00000000..3c0ec57e --- /dev/null +++ b/integrationtests/test_crawler_main.py @@ -0,0 +1,91 @@ +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# 2024 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import logging + +from pathlib import Path + +import linkahead as db + +from caoscrawler import crawl +from caoscrawler.crawl import (crawler_main, SecurityMode) +from linkahead.utils.register_tests import clear_database, set_test_key + +set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + +INTTESTDIR = Path(__file__).parent + + +def test_list_of_paths(clear_database, monkeypatch): + + # Mock the status record + dummy_status = { + "n_calls": 0 + } + + def _mock_update_status_record(run_id, n_inserts, n_updates, status): + print("Update mocked status") + dummy_status["run_id"] = run_id + dummy_status["n_inserts"] = n_inserts + dummy_status["n_updates"] = n_updates + dummy_status["status"] = status + dummy_status["n_calls"] += 1 + monkeypatch.setattr(crawl, "_update_status_record", _mock_update_status_record) + + # mock SSS environment + monkeypatch.setenv("SHARED_DIR", "/tmp") + + # We need only one dummy RT + rt = db.RecordType(name="TestType").insert() + basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs" + dirlist = [basepath / "dir1", basepath / "dir2"] + crawler_main( + dirlist, + cfood_file_name=basepath / "cfood.yml", + identifiables_definition_file=basepath / "identifiable.yml" + ) + recs = db.execute_query("FIND TestType") + assert len(recs) == 2 + assert "Test1" in [r.name for r in recs] + assert "Test2" in [r.name for r in recs] + + assert dummy_status["n_inserts"] == 2 + assert dummy_status["n_updates"] == 0 + assert dummy_status["status"] == "OK" + assert dummy_status["n_calls"] == 1 + + +def test_not_implemented_list_with_authorization(caplog, clear_database): + + rt = db.RecordType(name="TestType").insert() + basepath = INTTESTDIR / "test_data" / "crawler_main_with_list_of_dirs" + dirlist = [basepath / "dir1", basepath / "dir2"] + + # This is not implemented yet, so check log for correct error. + crawler_main( + dirlist, + cfood_file_name=basepath / "cfood.yml", + identifiables_definition_file=basepath / "identifiable.yml", + securityMode=SecurityMode.RETRIEVE + ) + err_tuples = [t for t in caplog.record_tuples if t[1] == logging.ERROR] + assert len(err_tuples) == 1 + assert "currently implemented only for single paths, not for lists of paths" in err_tuples[0][2] + # No inserts after the errors + assert len(db.execute_query("FIND TestType")) == 0 diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml b/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml new file mode 100644 index 00000000..c7f22ce0 --- /dev/null +++ b/integrationtests/test_data/crawler_main_with_list_of_dirs/cfood.yml @@ -0,0 +1,10 @@ +--- +metadata: + crawler-version: 0.10.2 +--- +BaseDirElement: + type: Directory + match: ^dir(?P<dir_number>[0-9]+)$$ + records: + TestType: + name: Test$dir_number diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/dir1/.gitkeep b/integrationtests/test_data/crawler_main_with_list_of_dirs/dir1/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/dir2/.gitkeep b/integrationtests/test_data/crawler_main_with_list_of_dirs/dir2/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml b/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml new file mode 100644 index 00000000..6d608cec --- /dev/null +++ b/integrationtests/test_data/crawler_main_with_list_of_dirs/identifiable.yml @@ -0,0 +1,2 @@ +TestType: + - name diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index cb1e2e09..c699e0ab 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -1,4 +1,4 @@ -# This file is a part of the CaosDB Project. +# This file is a part of the LinkAhead Project. # # Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> # 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> -- GitLab