diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index af4fd078b627201233cf2ffec6d3b87837084971..8a633689e8803791a48a0f2df7c267071e7e67bd 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -27,6 +27,7 @@ an integration test module that does basic integration tests """ from caosadvancedtools.crawler import Crawler as OldCrawler +from caoscrawler.debug_tree import DebugTree import os from caosdb import EmptyUniqueQueryError import argparse @@ -36,6 +37,7 @@ from caoscrawler import Crawler, SecurityMode from caoscrawler.identifiable import Identifiable import caosdb as db from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from caoscrawler.scanner import scan_directory import pytest from caosadvancedtools.models.parser import parse_model_from_yaml import yaml @@ -82,24 +84,28 @@ def ident(): return ident -def crawl_standard_test_directory(cr: Crawler, - subdir: str = "examples_article", - cfood: str = "scifolder_cfood.yml"): - return cr.crawl_directory(rfp("..", "..", "unittests", "test_directories", subdir), - rfp("..", "..", "unittests", cfood)) +def crawl_standard_test_directory(subdir: str = "examples_article", + cfood: str = "scifolder_cfood.yml", + debug_tree=None): + return scan_directory(rfp("..", "..", "unittests", "test_directories", subdir), + rfp("..", "..", "unittests", cfood), + debug_tree=debug_tree) @pytest.fixture def crawler(ident): cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr) + debug_tree = DebugTree() + crawled_data = crawl_standard_test_directory(debug_tree=debug_tree) return cr, crawled_data, debug_tree @pytest.fixture def crawler_extended(ident): cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + debug_tree = DebugTree() + crawled_data = crawl_standard_test_directory( + cfood="scifolder_extended.yml", debug_tree=debug_tree) # correct paths for current working directory file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: @@ -139,8 +145,8 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): # Do a second run on the same data, there should be no changes: crawler = Crawler(identifiableAdapter=ident) - crawled_data = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), - rfp("../../unittests/scifolder_cfood.yml")) + crawled_data = scan_directory(rfp("../../unittests/test_directories", "examples_article"), + rfp("../../unittests/scifolder_cfood.yml")) ins, ups = crawler.synchronize(crawled_data=crawled_data) assert len(ins) == 0 assert len(ups) == 0 @@ -151,7 +157,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should be no changes: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory() ins, ups = cr.synchronize(crawled_data=crawled_data) assert len(ins) == 0 assert len(ups) == 0 @@ -162,7 +168,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should a new insert: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory("example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data=crawled_data) assert len(ins) == 1 @@ -170,7 +176,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do it again to check whether nothing is changed: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory("example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data=crawled_data) assert len(ins) == 0 @@ -182,7 +188,7 @@ def test_insert_auth(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should a new insert: cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE) - crawled_data = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory("example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data=crawled_data) assert len(ins) == 1 @@ -192,7 +198,7 @@ def test_insert_auth(clear_database, usemodel, ident, crawler): # Do it again to check whether nothing is changed: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory("example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data=crawled_data) assert len(ins) == 0 @@ -203,11 +209,11 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): ins, ups = crawler[0].synchronize(crawled_data=crawler[1]) cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory("example_insert") ins, ups = cr.synchronize(crawled_data=crawled_data) cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, "example_overwrite_1") + crawled_data = crawl_standard_test_directory("example_overwrite_1") # cr.save_debug_data(rfp("provenance.yml")) assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data=crawled_data) @@ -221,7 +227,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Do a second run on the same data with a change in one # of the identifiables: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory() # Test the addition of a single property: l = crawled_data @@ -239,7 +245,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Test the change within one property: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory() l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and @@ -253,7 +259,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Changing the date should result in a new insertion: cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory() l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and @@ -270,7 +276,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): def test_file_insertion_dry(clear_database, usemodel, ident): crawler_extended = Crawler(identifiableAdapter=ident) crawled_data = crawl_standard_test_directory( - crawler_extended, cfood="scifolder_extended.yml") + cfood="scifolder_extended.yml") file_list = [r for r in crawled_data if r.role == "File"] assert len(file_list) == 11 @@ -286,7 +292,7 @@ def test_file_insertion_dry(clear_database, usemodel, ident): def test_file_insertion(clear_database, usemodel, ident, crawler_extended): ins, ups = crawler_extended[0].synchronize( - crawled_data=crawler_extended[1], commit_changes=True) + crawled_data=deepcopy(crawler_extended[1]), commit_changes=True) file_list_ins = [r for r in ins if r.role == "File"] assert len(file_list_ins) == 11 @@ -307,7 +313,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): file_list_ins = [r for r in ins1 if r.role == "File"] cr = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + crawled_data = crawl_standard_test_directory(cfood="scifolder_extended.yml") file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: @@ -322,7 +328,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): assert len(res[0].parents) == 0 cr2 = Crawler(identifiableAdapter=ident) - crawled_data = crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") + crawled_data = crawl_standard_test_directory(cfood="scifolder_extended2.yml") file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index f93533f63c51fe2de2a8aa27654126479242fba6..45873ddeb8b4f4a23fbcbc9225cbeea60b213cc4 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -105,9 +105,9 @@ def test_dataset(clear_database, usemodel, addfiles, caplog): identifiable_path = os.path.join(DATADIR, "identifiables.yml") crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml") crawler_main( - os.path.join(DATADIR, 'data'), - crawler_definition_path, - identifiable_path, + crawled_directory_path=os.path.join(DATADIR, 'data'), + cfood_file_name=crawler_definition_path, + identifiables_definition_file=identifiable_path, provenance_file=os.path.join(DATADIR, "provenance.yml"), dry_run=False, remove_prefix=DATADIR, @@ -144,9 +144,9 @@ def test_event_update(clear_database, usemodel, addfiles): crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml") crawler_main( - os.path.join(DATADIR, 'data'), - crawler_definition_path, - identifiable_path, + crawled_directory_path=os.path.join(DATADIR, 'data'), + cfood_file_name=crawler_definition_path, + identifiables_definition_file=identifiable_path, provenance_file=os.path.join(DATADIR, "provenance.yml"), dry_run=False, remove_prefix=DATADIR, diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py index 5fc0f6c7d85a0fce4490c72952e711fe241a0099..0f48677d4bf64158374a0eb0865eb2b85ea715db 100644 --- a/integrationtests/test_use_case_simple_presentation.py +++ b/integrationtests/test_use_case_simple_presentation.py @@ -57,22 +57,24 @@ def test_complete_crawler(clear_database): # test that a bad value for "remove_prefix" leads to runtime error with pytest.raises(RuntimeError) as re: - crawler_main(DATADIR, - os.path.join(DATADIR, "cfood.yml"), - os.path.join(DATADIR, "identifiables.yml"), - True, - os.path.join(DATADIR, "provenance.yml"), - False, - remove_prefix="sldkfjsldf") + crawler_main( + crawled_directory_path=os.path.join(DATADIR), + cfood_file_name=os.path.join(DATADIR, "cfood.yml"), + identifiables_definition_file=os.path.join(DATADIR, "identifiables.yml"), + provenance_file=os.path.join(DATADIR, "provenance.yml"), + dry_run=False, + remove_prefix="sldkfjsldf", + ) assert "path does not start with the prefix" in str(re.value) - crawler_main(DATADIR, - os.path.join(DATADIR, "cfood.yml"), - os.path.join(DATADIR, "identifiables.yml"), - True, - os.path.join(DATADIR, "provenance.yml"), - False, - remove_prefix=os.path.abspath(DATADIR)) + crawler_main( + crawled_directory_path=os.path.join(DATADIR), + cfood_file_name=os.path.join(DATADIR, "cfood.yml"), + identifiables_definition_file=os.path.join(DATADIR, "identifiables.yml"), + provenance_file=os.path.join(DATADIR, "provenance.yml"), + dry_run=False, + remove_prefix=os.path.abspath(DATADIR), + ) res = db.execute_query("FIND Record Experiment") assert len(res) == 1 diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 4578f7e3c5f4785b455b13ad73f87103eb499c97..39f4d83391bc04ae4a422dece0452c6ddec64e94 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -285,7 +285,6 @@ class Crawler(object): warnings.warn(DeprecationWarning( "The function crawl_directory in the crawl module is deprecated. " "Please use scan_directory from the scanner module.")) - self.crawled_directory = crawled_directory data = scan_directory(crawled_directory, crawler_definition_path,