diff --git a/src/doc/how-to-upgrade.md b/src/doc/how-to-upgrade.md index 931fa0cd2f2d621c89c35046d6df4ba6ac9b7a1e..4efc78280ca9ddbb893f166ee3530b3363684081 100644 --- a/src/doc/how-to-upgrade.md +++ b/src/doc/how-to-upgrade.md @@ -1,6 +1,18 @@ # How to upgrade +## 0.4.0 to 0.5.0 +The crawler was split into two modules: the scanner and the crawler. The scanner creates a Record +structure from the data and the crawler synchronizes this with the server. Due to this change you +should: +- Remove the `debug` argument from the Crawler constructor. For debugging supply a DebugTree as + argument to functions like the scanner. +- Remove the `generalStore` argument from the Crawler constructor. A store can no longer be + provided to the crawler. +- `load_definition` and `initialize_converters` are now part of the scanner module +- `crawl_directory` is replcaced by `scan_directory` of the scanner module +- `start_crawling` is replcaced by `scan_structure_elements` of the scanner module + ## 0.2.x to 0.3.0 DictElementConverter (old: DictConverter) now can use "match" keywords. If none are in the definition, the behavior is as before. If you had "match", diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml index dce219b751c3e980662a1eaa4904e1163d9836a0..9d6e8cf3ea325ad14641530f2e6cafd43f0dc1bb 100644 --- a/unittests/scifolder_cfood.yml +++ b/unittests/scifolder_cfood.yml @@ -2,6 +2,10 @@ # The full scifolder cfood will be developed here: # https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood +--- +metadata: + crawler-version: 0.3.1 +--- Definitions: type: Definitions #include "description.yml" diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py new file mode 100644 index 0000000000000000000000000000000000000000..b603c62aec78cccd0d1782c33aacc2230dc8a7fd --- /dev/null +++ b/unittests/test_crawler.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +test the Crawler class +""" +import json +import os + +from pytest import raises + +import caosdb as db + +from caoscrawler.stores import GeneralStore +from caoscrawler.crawl import Crawler +import warnings + +from test_tool import rfp +import pytest + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_constructor(): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + Crawler(debug=True) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The debug argument of the Crawler class" in str(w[-1].message) + + Crawler(generalStore=GeneralStore()) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The generalStore argument of the Crawler" in str(w[-1].message) + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +def test_deprecated_functions(): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + cr = Crawler() + cr.crawl_directory(".", rfp("scifolder_cfood.yml")) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The function crawl_directory in the crawl" in str(w[-1].message) + + cr.start_crawling([], {}, {}) + assert issubclass(w[-1].category, DeprecationWarning) + assert "The function start_crawling in the crawl module" in str(w[-1].message) + + cr.crawled_data + assert issubclass(w[-1].category, DeprecationWarning) + assert "The use of self.crawled_data is depricated" in str(w[-1].message) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 94266278cd2186a7df3b40a7457cc8c8bfc54ce3..08b3a0e4f9623e996540746ac408801090b97aa3 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -982,6 +982,8 @@ def test_split_restricted_path(): assert ["el", "el"] == split_restricted_path("/el/el") +# Filter the warning because we want to have it here and this way it does not hinder running +# tests with -Werror. @pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning") def test_deprecated_prefix_option(): """Test that calling the crawler's main function with the deprecated