diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index 0c847b08a729f3b112cbdf3c38bac31309cda125..d275a5965b055d4644f1fa5c1fc4d0e772114cf9 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -85,26 +85,26 @@ def ident(): def crawl_standard_test_directory(cr: Crawler, subdir: str = "examples_article", cfood: str = "scifolder_cfood.yml"): - cr.crawl_directory(rfp("..", "..", "unittests", "test_directories", subdir), - rfp("..", "..", "unittests", cfood)) + return cr.crawl_directory(rfp("..", "..", "unittests", "test_directories", subdir), + rfp("..", "..", "unittests", cfood)) @pytest.fixture def crawler(ident): - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr) - return cr + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr) + return cr, crawled_data, debug_tree @pytest.fixture def crawler_extended(ident): - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") # correct paths for current working directory - file_list = [r for r in cr.crawled_data if r.role == "File"] + file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: f.file = rfp("..", "..", "unittests", "test_directories", f.file) - return cr + return cr, crawled_data, debug_tree def test_ambigious_lookup(clear_database, usemodel, crawler, ident): @@ -117,7 +117,7 @@ def test_ambigious_lookup(clear_database, usemodel, crawler, ident): def test_single_insertion(clear_database, usemodel, crawler, ident): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize(crawler[1]) # This test also generates the file records.xml used in some of the unittesets: res = db.execute_query("FIND Record") @@ -138,94 +138,94 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): assert len(ups) == 0 # Do a second run on the same data, there should be no changes: - crawler = Crawler(debug=True, identifiableAdapter=ident) - crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), + crawler = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), rfp("../../unittests/scifolder_cfood.yml")) - ins, ups = crawler.synchronize() + ins, ups = crawler.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 def test_multiple_insertions(clear_database, usemodel, ident, crawler): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize([1]) # Do a second run on the same data, there should be no changes: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr) - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr) + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 def test_insertion(clear_database, usemodel, ident, crawler): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize([1]) # Do a second run on the same data, there should a new insert: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, "example_insert") - assert len(cr.crawled_data) == 3 - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + assert len(crawled_data) == 3 + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 1 assert len(ups) == 0 # Do it again to check whether nothing is changed: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, "example_insert") - assert len(cr.crawled_data) == 3 - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + assert len(crawled_data) == 3 + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 def test_insert_auth(clear_database, usemodel, ident, crawler): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize([1]) # Do a second run on the same data, there should a new insert: - cr = Crawler(debug=True, identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE) - crawl_standard_test_directory(cr, "example_insert") - assert len(cr.crawled_data) == 3 - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + assert len(crawled_data) == 3 + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 1 assert not ins[0].is_valid() nins, nups = OldCrawler.update_authorized_changes(cr.run_id) assert nins == 1 # Do it again to check whether nothing is changed: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, "example_insert") - assert len(cr.crawled_data) == 3 - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + assert len(crawled_data) == 3 + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 def test_insertion_and_update(clear_database, usemodel, ident, crawler): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize([1]) - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, "example_insert") - ins, ups = cr.synchronize() + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + ins, ups = cr.synchronize(crawled_data) - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, "example_overwrite_1") + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_overwrite_1") # print(cr.crawled_data) # cr.save_debug_data(rfp("provenance.yml")) - assert len(cr.crawled_data) == 3 - ins, ups = cr.synchronize() + assert len(crawled_data) == 3 + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 1 def test_identifiable_update(clear_database, usemodel, ident, crawler): - ins, ups = crawler.synchronize() + ins, ups = crawler[0].synchronize([1]) # Do a second run on the same data with a change in one # of the identifiables: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr) + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr) # Test the addition of a single property: - l = cr.crawled_data + l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): @@ -234,28 +234,28 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): name="email", value="testperson@testaccount.test") print("one change") break - ins, ups = cr.synchronize() + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 1 # Test the change within one property: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr) - l = cr.crawled_data + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr) + l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): record.add_property(name="email", value="testperson@coolmail.test") print("one change") break - ins, ups = cr.synchronize() + ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 1 # Changing the date should result in a new insertion: - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr) - l = cr.crawled_data + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr) + l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and record.get_property("date").value == "2020-01-03"): @@ -263,30 +263,30 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): record.get_property("date").value = "2012-01-02" print("one change") break - ins, ups = cr.synchronize() + ins, ups = synchronize(crawled_data, debug_tree = ) assert len(ins) == 1 assert len(ups) == 0 def test_file_insertion_dry(clear_database, usemodel, ident): - crawler_extended = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory( + crawler_extended = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory( crawler_extended, cfood="scifolder_extended.yml") - file_list = [r for r in crawler_extended.crawled_data if r.role == "File"] + file_list = [r for r in crawled_data if r.role == "File"] assert len(file_list) == 11 for f in file_list: assert f.path.endswith("README.md") assert f.path[1:] == f.file - ins, ups = crawler_extended.synchronize(commit_changes=False) + ins, ups = crawler_extended.synchronize(crawled_datacommit_changes=False) assert len(ups) == 0 file_list_ins = [r for r in ins if r.role == "File"] assert len(file_list_ins) == 11 def test_file_insertion(clear_database, usemodel, ident, crawler_extended): - ins, ups = crawler_extended.synchronize(commit_changes=True) + ins, ups = crawler_extended[0].synchronize(crawler_extended[1], commit_changes=True) file_list_ins = [r for r in ins if r.role == "File"] assert len(file_list_ins) == 11 @@ -302,11 +302,11 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended): def test_file_update(clear_database, usemodel, ident, crawler_extended): - ins1, ups1 = crawler_extended.synchronize(commit_changes=True) + ins1, ups1 = crawler_extended[0].synchronize(crawler_extended[1], commit_changes=True) file_list_ins = [r for r in ins1 if r.role == "File"] - cr = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + cr = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") file_list = [r for r in cr.crawled_data if r.role == "File"] for f in file_list: @@ -320,13 +320,13 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): assert len(res) == 11 assert len(res[0].parents) == 0 - cr2 = Crawler(debug=True, identifiableAdapter=ident) - crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") + cr2 = Crawler(identifiableAdapter=ident) + crawled_data, debug_tree = crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") file_list = [r for r in cr2.crawled_data if r.role == "File"] for f in file_list: f.file = rfp("..", "..", "unittests", "test_directories", f.file) - ins3, ups3 = cr2.synchronize(commit_changes=True) + ins3, ups3 = cr2.synchronize(crawled_data, commit_changes=True) assert len(ups3) == 11 res = db.execute_query("Find File")