diff --git a/unittests/test_cfood_metadata.py b/unittests/test_cfood_metadata.py index 09d6c88bdc27e1066ed18a9c5865cbfb95270c3a..2881a47df2418bd0d794f79291b166f091456cf3 100644 --- a/unittests/test_cfood_metadata.py +++ b/unittests/test_cfood_metadata.py @@ -24,6 +24,8 @@ from tempfile import NamedTemporaryFile import caoscrawler +from caoscrawler.scanner import load_definition + CRAWLER_VERSION = "" @@ -46,8 +48,7 @@ def _temp_file_load(txt: str): with NamedTemporaryFile() as f: f.write(txt.encode()) f.flush() - c = caoscrawler.Crawler() - definition = c.load_definition(f.name) + definition = load_definition(f.name) return definition diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 4d3791fce3ceffaafe529423e4020ebd6a4231ba..25b8412126289b34b97dd83f6357964f62823a56 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -45,6 +45,7 @@ from caoscrawler.stores import GeneralStore from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement, BooleanElement, IntegerElement, FloatElement, Directory) +from caoscrawler.scanner import load_definition, _load_definition_from_yaml_dict, create_converter_registry from test_tool import rfp @@ -447,8 +448,7 @@ MyElement: type: MyNewType match: something """ - crawler1 = Crawler() - one_doc_definitions = crawler1._load_definition_from_yaml_dict( + one_doc_definitions = _load_definition_from_yaml_dict( [yaml.load(one_doc_yaml, Loader=yaml.SafeLoader)]) assert "MyElement" in one_doc_definitions assert one_doc_definitions["MyElement"]["type"] == "MyNewType" @@ -466,8 +466,7 @@ MyElement: type: MyNewType match: something """ - crawler2 = Crawler() - two_doc_definitions = crawler2._load_definition_from_yaml_dict( + two_doc_definitions = _load_definition_from_yaml_dict( list(yaml.safe_load_all(two_doc_yaml))) assert "MyElement" in two_doc_definitions assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"] @@ -588,8 +587,7 @@ def test_date_converter(): def test_load_converters(): - c = Crawler() - converter_registry = c.load_converters({}) + converter_registry = create_converter_registry({}) # The previous function call actually already asserts that all defined # converter classes can be loaded from their respective packages. diff --git a/unittests/test_issues.py b/unittests/test_issues.py index a1724e5a989190977a7ec0d86846fc2b7433ab5d..ef8c38afdd77c93ec2439157f6e7dacfa9c32c14 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -30,6 +30,8 @@ from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter from caoscrawler.structure_elements import DictElement from test_tool import rfp +from caoscrawler.scanner import create_converter_registry, scan_structure_elements + def test_issue_10(): """Test integer-to-float conversion in dictionaries""" @@ -55,14 +57,13 @@ def test_issue_10(): } } - crawler = Crawler(debug=True) - converter_registry = crawler.load_converters(crawler_definition) + converter_registry = create_converter_registry(crawler_definition) test_dict = { "float_value": 4 } - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 assert records[0].parents[0].name == "TestRec" diff --git a/unittests/test_json.py b/unittests/test_json.py index 41fd31a43389148ad6fbc4167fd3fbd4f7f2ee9f..42841da7715da8b796cbb9b85ee831bf004f36b7 100644 --- a/unittests/test_json.py +++ b/unittests/test_json.py @@ -36,6 +36,7 @@ import caosdb as db from caoscrawler.converters import JSONFileConverter from caoscrawler.crawl import Crawler from caoscrawler.structure_elements import File, JSONFile +from caoscrawler.scanner import load_definition, create_converter_registry, scan_structure_elements from test_tool import rfp, dircheckstr @@ -44,12 +45,11 @@ def test_json(): "jsontest_cfood.yml") json_file_path = rfp("test_directories", "examples_json", "testjson.json") - crawler = Crawler(debug=True) - crawler_definition = crawler.load_definition(crawler_definition_path) + crawler_definition = load_definition(crawler_definition_path) # Load and register converter packages: - converter_registry = crawler.load_converters(crawler_definition) + converter_registry = create_converter_registry(crawler_definition) - records = crawler.start_crawling( + records, debug_tree = scan_structure_elements( JSONFile(os.path.basename(json_file_path), json_file_path), crawler_definition, converter_registry @@ -70,8 +70,7 @@ def test_json(): def test_broken_validation(): crawler_definition_path = rfp( "broken_cfoods", "broken_validation_path.yml") - crawler = Crawler() with raises(FileNotFoundError) as err: - crawler_definition = crawler.load_definition(crawler_definition_path) + crawler_definition = load_definition(crawler_definition_path) assert str(err.value).startswith("Couldn't find validation file") diff --git a/unittests/test_macros.py b/unittests/test_macros.py index b5ea5d84846f5f33853910c292132d7b5026600e..cc79731e7e8e52ab972a22151b580720c10e195c 100644 --- a/unittests/test_macros.py +++ b/unittests/test_macros.py @@ -25,6 +25,7 @@ from caoscrawler.macros import defmacro_constructor, macro_constructor from caoscrawler.macros.macro_yaml_object import macro_store from caoscrawler.crawl import Crawler +from caoscrawler.scanner import load_definition from tempfile import NamedTemporaryFile @@ -52,8 +53,7 @@ def _temp_file_load(txt: str): with NamedTemporaryFile() as f: f.write(txt.encode()) f.flush() - c = Crawler() - definition = c.load_definition(f.name) + definition = load_definition(f.name) return definition diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py index ac408b2dab0fa151c370d3ec6ffd1dced22c77d7..4fbeeed628101654a20de18210bdac683cf801f4 100644 --- a/unittests/test_scalars_cfood.py +++ b/unittests/test_scalars_cfood.py @@ -16,10 +16,10 @@ from test_tool import dircheckstr, rfp @pytest.fixture def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("cfoods_scalar.yml")) - return crawler + crawler = Crawler() + crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("cfoods_scalar.yml")) + return crawler, crawled_data, debug_tree def test_handle_value(): @@ -39,7 +39,7 @@ def test_handle_value(): def test_record_structure_generation(crawler): - subd = crawler.debug_tree[dircheckstr("DataAnalysis")] + subd = crawler[2].debug_tree[dircheckstr("DataAnalysis")] assert len(subd) == 2 # variables store on Data Analysis node of debug tree if "Data" in subd[0]: diff --git a/unittests/test_schema.py b/unittests/test_schema.py index 0736698eb32146fb3cfbee6acbcf11f5436df27e..0d5bebce98fbc8c789c1080bcf3919f128bdbf54 100644 --- a/unittests/test_schema.py +++ b/unittests/test_schema.py @@ -13,6 +13,8 @@ from pytest import raises from jsonschema.exceptions import ValidationError +from caoscrawler.scanner import load_definition + def rfp(*pathcomponents): """ @@ -23,9 +25,8 @@ def rfp(*pathcomponents): def test_schema_validation(): - cr = Crawler() - cr.load_definition(rfp("scifolder_cfood.yml")) - cr.load_definition(rfp("scifolder_extended.yml")) + load_definition(rfp("scifolder_cfood.yml")) + load_definition(rfp("scifolder_extended.yml")) with raises(ValidationError, match=".*enum.*"): - cr.load_definition(rfp("broken_cfoods", "broken1.yml")) + load_definition(rfp("broken_cfoods", "broken1.yml")) diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py index abe4ac85ec4fc0a78e71c177222817e1b84e9e56..4db6f26ee6d23606ddeab2fcb3805cf33965e098 100644 --- a/unittests/test_table_converter.py +++ b/unittests/test_table_converter.py @@ -93,10 +93,10 @@ def dircheckstr(*pathcomponents): @pytest.fixture def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "examples_tables", "ExperimentalData"), + crawler = Crawler() + crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_tables", "ExperimentalData"), rfp("test_directories", "examples_tables", "crawler_for_tables.yml")) - return crawler + return crawler, crawled_data, debug_tree def test_convert_table(converter_registry): @@ -153,7 +153,7 @@ def test_convert_table(converter_registry): def test_crawl_csv_table(crawler): for file_ext in ["xlsx", "csv"]: - subd = crawler.debug_tree[dircheckstr("test1." + file_ext)] + subd = crawler[2].debug_tree[dircheckstr("test1." + file_ext)] record_experiment = subd[1]["Experiment"] assert isinstance(record_experiment, db.Record) assert isinstance(record_experiment.get_property("Measurements").value, list) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 5a1fa37b59f01b91779be7c992384764db5cc626..fc664d138b44af7a8aa51497ccf53cc27cdc95d9 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -48,6 +48,8 @@ from caosdb.apiutils import compare_entities import pytest from pytest import raises +from caoscrawler.scanner import create_converter_registry, scan_structure_elements + def rfp(*pathcomponents): """ @@ -217,6 +219,7 @@ def test_record_structure_generation(crawler): def test_crawler_update_list(crawler, ident): + crawled_data = crawler[1] # If the following assertions fail, that is a hint, that the test file records.xml has changed # and this needs to be updated: assert len(ident.get_records()) == 18 @@ -231,7 +234,7 @@ def test_crawler_update_list(crawler, ident): ) == 2 # The crawler contains lots of duplicates, because identifiables have not been resolved yet: - assert len(ident.get_records()) != len(crawler.crawled_data) + assert len(ident.get_records()) != len(crawled_data) # Check consistency: # Check whether identifiables retrieved from current identifiable store return @@ -287,7 +290,7 @@ def test_crawler_update_list(crawler, ident): def test_synchronization(crawler, ident): - insl, updl = crawler.synchronize(commit_changes=False) + insl, updl = crawler[0].synchronize(crawler[1], commit_changes=False) assert len(insl) == 0 assert len(updl) == 0 @@ -345,7 +348,7 @@ def test_identifiable_adapter_no_identifiable(crawler, ident): def test_provenance_debug_data(crawler): - crawler.save_debug_data(rfp("provenance.yml")) + crawler[0].save_debug_data(crawler[2], rfp("provenance.yml")) with open(rfp("provenance.yml"), "r") as f: provenance = yaml.load(f, Loader=yaml.SafeLoader) @@ -360,7 +363,7 @@ def test_provenance_debug_data(crawler): def test_split_into_inserts_and_updates_trivial(crawler): - crawler.split_into_inserts_and_updates([]) + crawler[0].split_into_inserts_and_updates([]) def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None): @@ -374,21 +377,21 @@ def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None): @pytest.fixture def crawler_mocked_identifiable_retrieve(crawler): # mock retrieval of registered identifiabls: return Record with just a parent - crawler.identifiableAdapter.get_registered_identifiable = Mock( + crawler[0].identifiableAdapter.get_registered_identifiable = Mock( side_effect=lambda x: db.Record().add_parent(x.parents[0].name)) # Simulate remote server content by using the names to identify records # There is only a single known Record with name A - crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( + crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")})) - crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( + crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( side_effect=partial( basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")})) return crawler def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve + crawler = crawler_mocked_identifiable_retrieve[0] identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")] entlist = [db.Record(name="A").add_parent( "C"), db.Record(name="B").add_parent("C")] @@ -413,7 +416,7 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve + crawler = crawler_mocked_identifiable_retrieve[0] a = db.Record(name="A").add_parent("C") b = db.Record(name="B").add_parent("C") b.add_property("A", a) @@ -431,7 +434,7 @@ def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiab def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve + crawler = crawler_mocked_identifiable_retrieve[0] # try it with a reference a = db.Record(name="A").add_parent("C") b = db.Record(name="B").add_parent("C") @@ -458,7 +461,7 @@ def test_split_into_inserts_and_updates_with_circ(crawler): def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve + crawler = crawler_mocked_identifiable_retrieve[0] # A # ^ # | @@ -485,7 +488,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve): - crawler = crawler_mocked_identifiable_retrieve + crawler = crawler_mocked_identifiable_retrieve[0] # assume identifiable is only the name a = db.Record(name="A").add_parent("C") a.add_property("foo", 1) @@ -504,7 +507,7 @@ def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiab def test_has_missing_object_in_references(crawler): # Simulate remote server content by using the names to identify records # There are only two known Records with name A and B - crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial( + crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial( basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC") .add_property("d"), "D": db.Record(name="D").add_parent("RTD") @@ -512,54 +515,54 @@ def test_has_missing_object_in_references(crawler): })) # one reference with id -> check - assert not crawler._has_missing_object_in_references( + assert not crawler[0]._has_missing_object_in_references( Identifiable(name="C", record_type="RTC", properties={'d': 123}), []) # one ref with Entity with id -> check - assert not crawler._has_missing_object_in_references( + assert not crawler[0]._has_missing_object_in_references( Identifiable(name="C", record_type="RTC", properties={'d': db.Record(id=123) .add_parent("C")}), []) # one ref with id one with Entity with id (mixed) -> check - assert not crawler._has_missing_object_in_references( + assert not crawler[0]._has_missing_object_in_references( Identifiable(name="C", record_type="RTD", properties={'d': 123, 'b': db.Record(id=123).add_parent("RTC")}), []) # entity to be referenced in the following a = db.Record(name="C").add_parent("C").add_property("d", 12311) # one ref with id one with Entity without id (but not identifying) -> fail - assert not crawler._has_missing_object_in_references( + assert not crawler[0]._has_missing_object_in_references( Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}), []) # one ref with id one with Entity without id (mixed) -> fail - assert not crawler._has_missing_object_in_references( + assert not crawler[0]._has_missing_object_in_references( Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), []) - crawler.add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC", + crawler[0].add_to_remote_missing_cache(a, Identifiable(name="C", record_type="RTC", properties={'d': 12311})) # one ref with id one with Entity without id but in cache -> check - assert crawler._has_missing_object_in_references( + assert crawler[0]._has_missing_object_in_references( Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}), []) # if this ever fails, the mock up may be removed - crawler.identifiableAdapter.get_registered_identifiable.assert_called() + crawler[0].identifiableAdapter.get_registered_identifiable.assert_called() @pytest.mark.xfail() def test_references_entities_without_ids(crawler, ident): - assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") .add_property('last_name', 123) .add_property('first_name', 123)) # id and rec with id - assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person") + assert not crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") .add_property('first_name', 123) .add_property('last_name', db.Record(id=123))) # id and rec with id and one unneeded prop - assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") .add_property('first_name', 123) .add_property('stuff', db.Record()) .add_property('last_name', db.Record(id=123))) # one identifying prop is missing - assert crawler._has_reference_value_without_id(db.Record().add_parent("Person") + assert crawler[0]._has_reference_value_without_id(db.Record().add_parent("Person") .add_property('first_name', 123) .add_property('last_name', db.Record())) @@ -569,7 +572,7 @@ def test_replace_entities_with_ids(crawler): .add_property("B", db.Record(id=12345)) .add_property("C", [db.Record(id=12345), 233324])) - crawler.replace_entities_with_ids(a) + crawler[0].replace_entities_with_ids(a) assert a.get_property("A").value == 12345 assert a.get_property("B").value == 12345 assert a.get_property("C").value == [12345, 233324] @@ -593,12 +596,13 @@ def mock_get_entity_by_name(name): def prepare_crawler_with_sec_mode(mode, ident): - crawler = Crawler(debug=True, securityMode=mode) - crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("scifolder_cfood.yml")) + crawler = Crawler(securityMode=mode) + crawled_data, debug_tree = crawler.crawl_directory( + rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml")) crawler.identifiableAdapter = ident - return crawler + return crawler, crawled_data, debug_tree def reset_mocks(mocks): @@ -651,19 +655,19 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): records_backup = deepcopy(ident._records) # trivial case: nothing to do - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) - crawler.synchronize(commit_changes=True) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() updateCacheMock.assert_not_called() # RETRIEVE: insert only - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) # remove one element del ident._records[-1] # insert forbidden - crawler.synchronize(commit_changes=True) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -674,10 +678,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # RETRIEVE: update only - crawler = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -688,10 +692,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: insert only - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # remove one element del ident._records[-1] - crawler.synchronize(commit_changes=True) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.assert_called_once() upmock.assert_not_called() @@ -702,10 +706,10 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: update only - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -716,11 +720,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): ident._records = deepcopy(records_backup) # INSERT: insert and update - crawler = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) + crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # change two elements change_non_identifiable_prop(ident) change_identifiable_prop(ident) - crawler.synchronize(commit_changes=True) + crawler.synchronize(crawled_data, commit_changes=True) assert crawler.run_id is not None insmock.asser_called_once() upmock.assert_not_called() @@ -773,14 +777,14 @@ def crawler_mocked_for_backref_test(crawler): "is_referenced_by", value=["BR", "BR2"]) else: return db.Record().add_parent(x.parents[0].name) - crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident) + crawler[0].identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident) # Simulate remote server content by using the names to identify records # There is only a single known Record with name A - crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( + crawler[0].identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A").add_parent("BR")})) - crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( + crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable = Mock( side_effect=partial( basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A").add_parent("BR")})) @@ -804,7 +808,7 @@ def test_validation_error_print(caplog): def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test): - crawler = crawler_mocked_for_backref_test + crawler = crawler_mocked_for_backref_test[0] identlist = [Identifiable(name="A", record_type="BR"), Identifiable(name="B", record_type="C", backrefs=[db.Entity()])] referenced = db.Record(name="B").add_parent("C") @@ -839,7 +843,7 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test) def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test): # test whether multiple references of the same record type are correctly used - crawler = crawler_mocked_for_backref_test + crawler = crawler_mocked_for_backref_test[0] referenced = db.Record(name="B").add_parent("C") entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), @@ -859,7 +863,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test): # test whether multiple references of the different record types are correctly used - crawler = crawler_mocked_for_backref_test + crawler = crawler_mocked_for_backref_test[0] referenced = db.Record(name="B").add_parent("D") entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), @@ -915,8 +919,8 @@ def test_restricted_path(create_mock): } } - crawler = Crawler(debug=True) - converter_registry = crawler.load_converters(crawler_definition) + crawler = Crawler() + converter_registry = create_converter_registry(crawler_definition) # This structure is crawled test_dict = { @@ -931,7 +935,7 @@ def test_restricted_path(create_mock): } # first test without a restricted_path restricted_path = None - records = crawler.start_crawling( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -941,7 +945,7 @@ def test_restricted_path(create_mock): # test with a restricted_path but one that has no effect (single root element) # this also tests that the remainder of the tree is fully traversed restricted_path = ["TestDict"] - records = crawler.start_crawling( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -950,7 +954,7 @@ def test_restricted_path(create_mock): # test with a restricted_path that restricts the tree (single root element) restricted_path = ["TestDict", "v2"] - records = crawler.start_crawling( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) @@ -960,7 +964,7 @@ def test_restricted_path(create_mock): # test with a restricted_path that contains a bad element restricted_path = ["TestDict", "v3"] with raises(RuntimeError): - records = crawler.start_crawling( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry, restricted_path ) diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py index d0b431a539a15e3e83906540c69becff437742ec..a9266b85367e10cd6460ea05af5eaac2104a748c 100644 --- a/unittests/test_tool_extended.py +++ b/unittests/test_tool_extended.py @@ -37,10 +37,10 @@ def dircheckstr(*pathcomponents, structure_element_type="Directory"): @pytest.fixture def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("scifolder_extended.yml")) - return crawler + crawler = Crawler() + crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_extended.yml")) + return crawler, crawled_data, debug_tree # @pytest.fixture @@ -70,7 +70,7 @@ def crawler(): def test_file_structure_generation(crawler): - sd = crawler.debug_tree[dircheckstr("SimulationData", + sd = crawler[2].debug_tree[dircheckstr("SimulationData", "2020_climate-model-predict", "2020-02-01", "README.md", structure_element_type="File")] assert sd[1]["ReadmeFile"].role == "File" diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py index f6c3b6375a3111faff9d746779805ba16af260b7..90534b5efac7b2c06c39e6c8f327570f68bd11cd 100644 --- a/unittests/test_variable_substitutions.py +++ b/unittests/test_variable_substitutions.py @@ -34,31 +34,31 @@ def dircheckstr(element_type, *pathcomponents): @pytest.fixture def crawler(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + crawler = Crawler() + crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), rfp("test_directories", "example_substitutions", "substitutions.yml")) - return crawler + return crawler, crawled_data, debug_tree @pytest.fixture def crawler_2(): - crawler = Crawler(debug=True) - crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + crawler = Crawler() + crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), rfp("test_directories", "example_substitutions", "substitutions_parents.yml")) - return crawler + return crawler, crawled_data, debug_tree def test_substitutions(crawler): # @review Florian Spreckelsen 2022-05-13 for i in range(2): - subd = crawler.debug_tree[dircheckstr( + subd = crawler[2].debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] assert subd[i]["Experiment"].get_property("date").value == "2022-05-12" assert isinstance(subd[i]["ExperimentSeries"].get_property( "Experiment").value, db.Record) - subd = crawler.debug_tree[dircheckstr("Directory", "ExperimentalData")] + subd = crawler[2].debug_tree[dircheckstr("Directory", "ExperimentalData")] assert subd[i]["Project"].name == "project" assert isinstance(subd[i]["Project"].get_property( "Experiments").value, list) @@ -74,7 +74,7 @@ def test_substitutions_parents(crawler_2): # This is a test for: # https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/35 # ... testing whether variable substitutions can be used in parent declarations. - subd = crawler_2.debug_tree[dircheckstr( + subd = crawler_2[2].debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] # subd[0] <- generalStore # subd[1] <- recordStore @@ -89,7 +89,7 @@ def test_empty_parents(crawler_2): # This is a test for: # https://gitlab.com/caosdb/caosdb-crawler/-/issues/8 - subd = crawler_2.debug_tree[dircheckstr( + subd = crawler_2[2].debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] parents = subd[1]["RecordWithoutParents"].get_parents()