diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 12ea6fcacb696a933f1477fe9489744e5c66d024..96f0fa06cc85867810bf60598b35ca316bbf4d53 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -44,9 +44,13 @@ import yaml import caosdb as db from caosdb.apiutils import compare_entities +import uuid + import pytest from pytest import raises +from caoscrawler.scanner import create_converter_registry, scan_structure_elements, scan_directory + def rfp(*pathcomponents): """ @@ -72,15 +76,21 @@ def dircheckstr(*pathcomponents): @pytest.fixture -def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "examples_article"), +def scanner(): + return scan_directory(rfp("test_directories", "examples_article"), rfp("scifolder_cfood.yml")) + +@pytest.fixture +def crawler(): + crawler = Crawler() return crawler @pytest.fixture -def ident(crawler): +def ident(scanner): + records = scanner[0] + debug_tree = scanner[1] + crawler = Crawler() ident = LocalStorageIdentifiableAdapter() crawler.identifiableAdapter = ident @@ -196,7 +206,7 @@ def test_record_structure_generation(crawler): # def prepare_test_record_file(): # ident = LocalStorageIdentifiableAdapter() -# crawler = Crawler(debug=True, identifiableAdapter=ident) +# crawler = Crawler(, identifiableAdapter=ident) # crawler.crawl_directory(rfp("test_directories", "examples_article"), # rfp("scifolder_cfood.yml")) @@ -209,7 +219,9 @@ def test_record_structure_generation(crawler): # ident.store_state(rfp("records.xml")) -def test_crawler_update_list(crawler, ident): +def test_crawler_update_list(scanner, crawler, ident): + crawled_data = scanner[0] + debug_tree = scanner[1] # If the following assertions fail, that is a hint, that the test file records.xml has changed # and this needs to be updated: assert len(ident.get_records()) == 18 @@ -224,7 +236,7 @@ def test_crawler_update_list(crawler, ident): ) == 2 # The crawler contains lots of duplicates, because identifiables have not been resolved yet: - assert len(ident.get_records()) != len(crawler.crawled_data) + assert len(ident.get_records()) != len(crawled_data) # Check consistency: # Check whether identifiables retrieved from current identifiable store return @@ -279,8 +291,13 @@ def test_crawler_update_list(crawler, ident): assert r_cur.description == idr_r1.description -def test_synchronization(crawler, ident): - insl, updl = crawler.synchronize(commit_changes=False) +def test_synchronization(scanner, ident): + records = scanner[0] + debug_tree = scanner[1] + crawler = Crawler(ident) + # TODO: not ideal + crawler.run_id = uuid.uuid1() + insl, updl = crawler.synchronize(records, commit_changes=False) assert len(insl) == 0 assert len(updl) == 0 @@ -336,7 +353,9 @@ def test_identifiable_adapter_no_identifiable(crawler, ident): # All persons are inserted, because they are not identifiable: assert len(insl) == len(pers) - +# This function is not provided anymore. +# Additionally, the format of the provenance.yml file has been changed completely. +@pytest.mark.xfail def test_provenance_debug_data(crawler): crawler.save_debug_data(rfp("provenance.yml")) @@ -586,12 +605,12 @@ def mock_get_entity_by_name(name): def prepare_crawler_with_sec_mode(mode, ident): - crawler = Crawler(debug=True, securityMode=mode) - crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("scifolder_cfood.yml")) + crawler = Crawler(securityMode=mode) + records, debug_tree = scan_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml")) crawler.identifiableAdapter = ident - return crawler + return crawler, records, debug_tree def reset_mocks(mocks): @@ -644,19 +663,21 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): records_backup = deepcopy(ident._records) # trivial case: nothing to do - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) - crawler.synchronize(commit_changes=True) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() updateCacheMock.assert_not_called() # RETRIEVE: insert only - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) # remove one element del ident._records[-1] # insert forbidden - crawler.synchronize(commit_changes=True) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -667,10 +688,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # RETRIEVE: update only - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -681,10 +703,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: insert only - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # remove one element del ident._records[-1] - crawler.synchronize(commit_changes=True) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.assert_called_once() upmock.assert_not_called() @@ -695,10 +718,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: update only - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -709,11 +733,12 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: insert and update - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, records, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # change two elements change_non_identifiable_prop(ident) change_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.run_id = uuid.uuid1() # TODO + crawler.synchronize(records, commit_changes=True) assert crawler.run_id is not None insmock.asser_called_once() upmock.assert_not_called() @@ -908,8 +933,7 @@ def test_restricted_path(create_mock): } } - crawler = Crawler(debug=True) - converter_registry = crawler.load_converters(crawler_definition) + converter_registry = create_converter_registry(crawler_definition) # This structure is crawled test_dict = { @@ -924,7 +948,7 @@ def test_restricted_path(create_mock): } # first test without a restricted_path restricted_path = None - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -934,7 +958,7 @@ def test_restricted_path(create_mock): # test with a restricted_path but one that has no effect (single root element) # this also tests that the remainder of the tree is fully traversed restricted_path = ["TestDict"] - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -943,7 +967,7 @@ def test_restricted_path(create_mock): # test with a restricted_path that restricts the tree (single root element) restricted_path = ["TestDict", "v2"] - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -953,7 +977,7 @@ def test_restricted_path(create_mock): # test with a restricted_path that contains a bad element restricted_path = ["TestDict", "v3"] with raises(RuntimeError): - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py index d0b431a539a15e3e83906540c69becff437742ec..56cf6f7d9910a4b86062777c6e91bc81a0368ec5 100644 --- a/unittests/test_tool_extended.py +++ b/unittests/test_tool_extended.py @@ -17,6 +17,8 @@ from caosdb.apiutils import compare_entities import pytest from pytest import raises +from caoscrawler.scanner import scan_directory + def rfp(*pathcomponents): """ @@ -35,41 +37,10 @@ def dircheckstr(*pathcomponents, structure_element_type="Directory"): rfp("test_directories", "examples_article", *pathcomponents)) -@pytest.fixture -def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("scifolder_extended.yml")) - return crawler - - -# @pytest.fixture -# def ident(crawler): -# ident = LocalStorageIdentifiableAdapter() -# crawler.identifiableAdapter = ident - -# ident.restore_state(rfp("records.xml")) - -# ident.register_identifiable( -# "Person", db.RecordType() -# .add_parent(name="Person") -# .add_property(name="first_name") -# .add_property(name="last_name")) -# ident.register_identifiable( -# "Measurement", db.RecordType() -# .add_parent(name="Measurement") -# .add_property(name="identifier") -# .add_property(name="date") -# .add_property(name="project")) -# ident.register_identifiable( -# "Project", db.RecordType() -# .add_parent(name="Project") -# .add_property(name="date") -# .add_property(name="identifier")) -# return ident - - -def test_file_structure_generation(crawler): +def test_file_structure_generation(): + crawled_data, debug_tree = scan_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_extended.yml")) + breakpoint() sd = crawler.debug_tree[dircheckstr("SimulationData", "2020_climate-model-predict", "2020-02-01", "README.md", structure_element_type="File")]