Skip to content
Snippets Groups Projects
Commit 4088c0a0 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

FIX: inttest

parent 2cd122ec
Branches
Tags
3 merge requests!108Release 0.5.0,!106Suggestion htw,!104Create a new scanner module and move functions from crawl module there
Pipeline #34915 failed
...@@ -108,7 +108,7 @@ def crawler_extended(ident): ...@@ -108,7 +108,7 @@ def crawler_extended(ident):
def test_ambigious_lookup(clear_database, usemodel, crawler, ident): def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True) proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True)
with pytest.raises(RuntimeError, match=".*unambigiously.*"): with pytest.raises(RuntimeError, match=".*unambigiously.*"):
...@@ -117,7 +117,7 @@ def test_ambigious_lookup(clear_database, usemodel, crawler, ident): ...@@ -117,7 +117,7 @@ def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
def test_single_insertion(clear_database, usemodel, crawler, ident): def test_single_insertion(clear_database, usemodel, crawler, ident):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
# This test also generates the file records.xml used in some of the unittesets: # This test also generates the file records.xml used in some of the unittesets:
res = db.execute_query("FIND Record") res = db.execute_query("FIND Record")
...@@ -141,30 +141,30 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): ...@@ -141,30 +141,30 @@ def test_single_insertion(clear_database, usemodel, crawler, ident):
crawler = Crawler(identifiableAdapter=ident) crawler = Crawler(identifiableAdapter=ident)
crawled_data = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), crawled_data = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"),
rfp("../../unittests/scifolder_cfood.yml")) rfp("../../unittests/scifolder_cfood.yml"))
ins, ups = crawler.synchronize(crawled_data) ins, ups = crawler.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_multiple_insertions(clear_database, usemodel, ident, crawler): def test_multiple_insertions(clear_database, usemodel, ident, crawler):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
# Do a second run on the same data, there should be no changes: # Do a second run on the same data, there should be no changes:
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr) crawled_data = crawl_standard_test_directory(cr)
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_insertion(clear_database, usemodel, ident, crawler): def test_insertion(clear_database, usemodel, ident, crawler):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
# Do a second run on the same data, there should a new insert: # Do a second run on the same data, there should a new insert:
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr, "example_insert") crawled_data = crawl_standard_test_directory(cr, "example_insert")
assert len(crawled_data) == 3 assert len(crawled_data) == 3
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 1 assert len(ins) == 1
assert len(ups) == 0 assert len(ups) == 0
...@@ -172,19 +172,19 @@ def test_insertion(clear_database, usemodel, ident, crawler): ...@@ -172,19 +172,19 @@ def test_insertion(clear_database, usemodel, ident, crawler):
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr, "example_insert") crawled_data = crawl_standard_test_directory(cr, "example_insert")
assert len(crawled_data) == 3 assert len(crawled_data) == 3
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_insert_auth(clear_database, usemodel, ident, crawler): def test_insert_auth(clear_database, usemodel, ident, crawler):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
# Do a second run on the same data, there should a new insert: # Do a second run on the same data, there should a new insert:
cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE) cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE)
crawled_data = crawl_standard_test_directory(cr, "example_insert") crawled_data = crawl_standard_test_directory(cr, "example_insert")
assert len(crawled_data) == 3 assert len(crawled_data) == 3
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 1 assert len(ins) == 1
assert not ins[0].is_valid() assert not ins[0].is_valid()
nins, nups = OldCrawler.update_authorized_changes(cr.run_id) nins, nups = OldCrawler.update_authorized_changes(cr.run_id)
...@@ -194,29 +194,29 @@ def test_insert_auth(clear_database, usemodel, ident, crawler): ...@@ -194,29 +194,29 @@ def test_insert_auth(clear_database, usemodel, ident, crawler):
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr, "example_insert") crawled_data = crawl_standard_test_directory(cr, "example_insert")
assert len(crawled_data) == 3 assert len(crawled_data) == 3
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_insertion_and_update(clear_database, usemodel, ident, crawler): def test_insertion_and_update(clear_database, usemodel, ident, crawler):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr, "example_insert") crawled_data = crawl_standard_test_directory(cr, "example_insert")
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
crawled_data = crawl_standard_test_directory(cr, "example_overwrite_1") crawled_data = crawl_standard_test_directory(cr, "example_overwrite_1")
# cr.save_debug_data(rfp("provenance.yml")) # cr.save_debug_data(rfp("provenance.yml"))
assert len(crawled_data) == 3 assert len(crawled_data) == 3
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
def test_identifiable_update(clear_database, usemodel, ident, crawler): def test_identifiable_update(clear_database, usemodel, ident, crawler):
ins, ups = crawler[0].synchronize(crawler[1]) ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
# Do a second run on the same data with a change in one # Do a second run on the same data with a change in one
# of the identifiables: # of the identifiables:
...@@ -233,7 +233,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -233,7 +233,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
name="email", value="testperson@testaccount.test") name="email", value="testperson@testaccount.test")
print("one change") print("one change")
break break
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
...@@ -247,7 +247,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -247,7 +247,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
record.add_property(name="email", value="testperson@coolmail.test") record.add_property(name="email", value="testperson@coolmail.test")
print("one change") print("one change")
break break
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
...@@ -262,7 +262,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -262,7 +262,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
record.get_property("date").value = "2012-01-02" record.get_property("date").value = "2012-01-02"
print("one change") print("one change")
break break
ins, ups = cr.synchronize(crawled_data) ins, ups = cr.synchronize(crawled_data=crawled_data)
assert len(ins) == 1 assert len(ins) == 1
assert len(ups) == 0 assert len(ups) == 0
...@@ -278,14 +278,15 @@ def test_file_insertion_dry(clear_database, usemodel, ident): ...@@ -278,14 +278,15 @@ def test_file_insertion_dry(clear_database, usemodel, ident):
assert f.path.endswith("README.md") assert f.path.endswith("README.md")
assert f.path[1:] == f.file assert f.path[1:] == f.file
ins, ups = crawler_extended.synchronize(crawled_data, commit_changes=False) ins, ups = crawler_extended.synchronize(crawled_data=crawled_data, commit_changes=False)
assert len(ups) == 0 assert len(ups) == 0
file_list_ins = [r for r in ins if r.role == "File"] file_list_ins = [r for r in ins if r.role == "File"]
assert len(file_list_ins) == 11 assert len(file_list_ins) == 11
def test_file_insertion(clear_database, usemodel, ident, crawler_extended): def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
ins, ups = crawler_extended[0].synchronize(crawler_extended[1], commit_changes=True) ins, ups = crawler_extended[0].synchronize(
crawled_data=crawler_extended[1], commit_changes=True)
file_list_ins = [r for r in ins if r.role == "File"] file_list_ins = [r for r in ins if r.role == "File"]
assert len(file_list_ins) == 11 assert len(file_list_ins) == 11
...@@ -301,7 +302,8 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended): ...@@ -301,7 +302,8 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
def test_file_update(clear_database, usemodel, ident, crawler_extended): def test_file_update(clear_database, usemodel, ident, crawler_extended):
ins1, ups1 = crawler_extended[0].synchronize(crawler_extended[1], commit_changes=True) ins1, ups1 = crawler_extended[0].synchronize(
crawled_data=crawler_extended[1], commit_changes=True)
file_list_ins = [r for r in ins1 if r.role == "File"] file_list_ins = [r for r in ins1 if r.role == "File"]
cr = Crawler(identifiableAdapter=ident) cr = Crawler(identifiableAdapter=ident)
...@@ -310,7 +312,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): ...@@ -310,7 +312,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
file_list = [r for r in crawled_data if r.role == "File"] file_list = [r for r in crawled_data if r.role == "File"]
for f in file_list: for f in file_list:
f.file = rfp("..", "..", "unittests", "test_directories", f.file) f.file = rfp("..", "..", "unittests", "test_directories", f.file)
ins2, ups2 = cr.synchronize(crawled_data, commit_changes=True) ins2, ups2 = cr.synchronize(crawled_data=crawled_data, commit_changes=True)
assert len(ups1) == 0 assert len(ups1) == 0
assert len(ups2) == 0 assert len(ups2) == 0
...@@ -325,7 +327,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): ...@@ -325,7 +327,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
file_list = [r for r in crawled_data if r.role == "File"] file_list = [r for r in crawled_data if r.role == "File"]
for f in file_list: for f in file_list:
f.file = rfp("..", "..", "unittests", "test_directories", f.file) f.file = rfp("..", "..", "unittests", "test_directories", f.file)
ins3, ups3 = cr2.synchronize(crawled_data, commit_changes=True) ins3, ups3 = cr2.synchronize(crawled_data=crawled_data, commit_changes=True)
assert len(ups3) == 11 assert len(ups3) == 11
res = db.execute_query("Find File") res = db.execute_query("Find File")
......
...@@ -112,7 +112,7 @@ def test_issue_23(clear_database): ...@@ -112,7 +112,7 @@ def test_issue_23(clear_database):
assert rec_crawled.get_property("prop_a") is None assert rec_crawled.get_property("prop_a") is None
# synchronize with database and update the record # synchronize with database and update the record
ins, ups = crawler.synchronize(records) ins, ups = crawler.synchronize(crawled_data=records)
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
......
...@@ -108,9 +108,8 @@ def test_dataset(clear_database, usemodel, addfiles, caplog): ...@@ -108,9 +108,8 @@ def test_dataset(clear_database, usemodel, addfiles, caplog):
os.path.join(DATADIR, 'data'), os.path.join(DATADIR, 'data'),
crawler_definition_path, crawler_definition_path,
identifiable_path, identifiable_path,
True, provenance_file=os.path.join(DATADIR, "provenance.yml"),
os.path.join(DATADIR, "provenance.yml"), dry_run=False,
False,
remove_prefix=DATADIR, remove_prefix=DATADIR,
# this test will fail without this prefix since the crawler would try to create new files # this test will fail without this prefix since the crawler would try to create new files
add_prefix="/extroot/realworld_example" add_prefix="/extroot/realworld_example"
...@@ -148,9 +147,8 @@ def test_event_update(clear_database, usemodel, addfiles): ...@@ -148,9 +147,8 @@ def test_event_update(clear_database, usemodel, addfiles):
os.path.join(DATADIR, 'data'), os.path.join(DATADIR, 'data'),
crawler_definition_path, crawler_definition_path,
identifiable_path, identifiable_path,
True, provenance_file=os.path.join(DATADIR, "provenance.yml"),
os.path.join(DATADIR, "provenance.yml"), dry_run=False,
False,
remove_prefix=DATADIR, remove_prefix=DATADIR,
# this test will fail without this prefix since the crawler would try to create new files # this test will fail without this prefix since the crawler would try to create new files
add_prefix="/extroot/realworld_example" add_prefix="/extroot/realworld_example"
...@@ -193,7 +191,7 @@ def test_event_update(clear_database, usemodel, addfiles): ...@@ -193,7 +191,7 @@ def test_event_update(clear_database, usemodel, addfiles):
"latitude").value = 0.0 "latitude").value = 0.0
rec.get_property("Event").value[0].get_property( rec.get_property("Event").value[0].get_property(
"location").value = "Origin" "location").value = "Origin"
second_crawler.synchronize(records) second_crawler.synchronize(crawled_data=records)
# Dataset is still the same Record, but with an updated event # Dataset is still the same Record, but with an updated event
new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve() new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment