diff --git a/integrationtests/basic_example/test_basic.py b/integrationtests/basic_example/test_basic.py index ac8c62382b6b38a0686518932f5b851d60d584cd..dc3f47cd41bd921be5f6d873f097a891a184a3f5 100755 --- a/integrationtests/basic_example/test_basic.py +++ b/integrationtests/basic_example/test_basic.py @@ -92,14 +92,14 @@ def crawl_standard_test_directory(cr: Crawler, @pytest.fixture def crawler(ident): cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory(cr) return cr, crawled_data, debug_tree @pytest.fixture def crawler_extended(ident): cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + crawled_data = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") # correct paths for current working directory file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: @@ -139,8 +139,8 @@ def test_single_insertion(clear_database, usemodel, crawler, ident): # Do a second run on the same data, there should be no changes: crawler = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), - rfp("../../unittests/scifolder_cfood.yml")) + crawled_data = crawler.crawl_directory(rfp("../../unittests/test_directories", "examples_article"), + rfp("../../unittests/scifolder_cfood.yml")) ins, ups = crawler.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 @@ -151,7 +151,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should be no changes: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory(cr) ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 assert len(ups) == 0 @@ -162,7 +162,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should a new insert: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory(cr, "example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data) assert len(ins) == 1 @@ -170,7 +170,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): # Do it again to check whether nothing is changed: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory(cr, "example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 @@ -182,7 +182,7 @@ def test_insert_auth(clear_database, usemodel, ident, crawler): # Do a second run on the same data, there should a new insert: cr = Crawler(identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory(cr, "example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data) assert len(ins) == 1 @@ -192,7 +192,7 @@ def test_insert_auth(clear_database, usemodel, ident, crawler): # Do it again to check whether nothing is changed: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory(cr, "example_insert") assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data) assert len(ins) == 0 @@ -203,12 +203,11 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): ins, ups = crawler[0].synchronize(crawler[1]) cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_insert") + crawled_data = crawl_standard_test_directory(cr, "example_insert") ins, ups = cr.synchronize(crawled_data) cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, "example_overwrite_1") - # print(cr.crawled_data) + crawled_data = crawl_standard_test_directory(cr, "example_overwrite_1") # cr.save_debug_data(rfp("provenance.yml")) assert len(crawled_data) == 3 ins, ups = cr.synchronize(crawled_data) @@ -222,7 +221,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Do a second run on the same data with a change in one # of the identifiables: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory(cr) # Test the addition of a single property: l = crawled_data @@ -240,7 +239,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Test the change within one property: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory(cr) l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and @@ -254,7 +253,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): # Changing the date should result in a new insertion: cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr) + crawled_data = crawl_standard_test_directory(cr) l = crawled_data for record in l: if (record.parents[0].name == "Measurement" and @@ -270,7 +269,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): def test_file_insertion_dry(clear_database, usemodel, ident): crawler_extended = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory( + crawled_data = crawl_standard_test_directory( crawler_extended, cfood="scifolder_extended.yml") file_list = [r for r in crawled_data if r.role == "File"] assert len(file_list) == 11 @@ -306,7 +305,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): file_list_ins = [r for r in ins1 if r.role == "File"] cr = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + crawled_data = crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: @@ -321,7 +320,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): assert len(res[0].parents) == 0 cr2 = Crawler(identifiableAdapter=ident) - crawled_data, debug_tree = crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") + crawled_data = crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") file_list = [r for r in crawled_data if r.role == "File"] for f in file_list: diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index d9dd4114eb089f53f83384c9b1a7670fd43984c6..7ee4d7ee1aa43c9199c7f51edb2ab93b0a37bb8d 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -98,7 +98,7 @@ def test_issue_23(clear_database): } crawler.generate_run_id() - records, debug_tree = scan_structure_elements( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index 9373989da6e10c07f072854f9ec67c06bad20e96..c883bf403af07882c495ae3a8516b785a2583069 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -175,7 +175,7 @@ def test_event_update(clear_database, usemodel, addfiles): crawler_definition = load_definition( crawler_definition_path) converter_registry = create_converter_registry(crawler_definition) - records, debug_tree = scan_structure_elements( + records = scan_structure_elements( Directory("data", os.path.join(DATADIR, "data")), crawler_definition, converter_registry diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 044d8f0bf53c4c80dab9b492919fa64ab321a60d..05bad0b54d9098c0b7f165d8295a0faa2966fa32 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,2 +1,4 @@ from .crawl import Crawler, SecurityMode -from .version import CfoodRequiredVersionError, version as __version__ +from .version import CfoodRequiredVersionError, get_caoscrawler_version + +__version__ = get_caoscrawler_version() diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3f963bc3d0e3c5ed733356315fe6fd8fe8293cff..4578f7e3c5f4785b455b13ad73f87103eb499c97 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -184,7 +184,7 @@ class Crawler(object): def __init__(self, generalStore: Optional[GeneralStore] = None, - debug: bool = False, + debug: Optional[bool] = None, identifiableAdapter: Optional[IdentifiableAdapter] = None, securityMode: SecurityMode = SecurityMode.UPDATE): """ @@ -219,8 +219,7 @@ class Crawler(object): if identifiableAdapter is not None: self.identifiableAdapter = identifiableAdapter - if not debug: - # TODO: not ideal, how to warn if debug is explicitely set to false? + if debug is not None: warnings.warn(DeprecationWarning( "The debug argument of the Crawler class is deprecated and has no effect.")) @@ -258,7 +257,7 @@ class Crawler(object): "The function start_crawling in the crawl module is deprecated. " "Please use scan_structure_elements from the scanner module.")) - data, _ = scan_structure_elements( + data = scan_structure_elements( items, crawler_definition, converter_registry, restricted_path) self.crawled_data = data return data @@ -288,9 +287,9 @@ class Crawler(object): "Please use scan_directory from the scanner module.")) self.crawled_directory = crawled_directory - data, _ = scan_directory(crawled_directory, - crawler_definition_path, - restricted_path) + data = scan_directory(crawled_directory, + crawler_definition_path, + restricted_path) self.crawled_data = data return data @@ -1023,8 +1022,9 @@ def crawler_main(crawled_directory_path: str, crawler = Crawler(securityMode=securityMode) try: - crawled_data, debug_tree = scan_directory( - crawled_directory_path, cfood_file_name, restricted_path) + debug_tree = DebugTree() + crawled_data = scan_directory( + crawled_directory_path, cfood_file_name, restricted_path, debug_tree=debug_tree) except ConverterValidationError as err: logger.error(err) return 1 diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 648c64c24486d1b3d71aa92dc15797db327b514e..ff6156aed3bde639435219a705d6d7d2124f7f38 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -275,9 +275,6 @@ def scanner(items: list[StructureElement], if converters_path is None: converters_path = [] - if debug_tree is None: - debug_tree = DebugTree() - for element in items: for converter in converters: @@ -303,25 +300,26 @@ def scanner(items: list[StructureElement], children = converter.create_children(general_store_copy, element) - # add provenance information for each variable - debug_tree.debug_tree[str(element)] = ( - general_store_copy.get_storage(), record_store_copy.get_storage()) - debug_tree.debug_metadata["copied"][str(element)] = ( - general_store_copy.get_dict_copied(), - record_store_copy.get_dict_copied()) - debug_tree.debug_metadata["usage"][str(element)].add( - "/".join(converters_path + [converter.name])) - mod_info = debug_tree.debug_metadata["provenance"] - for record_name, prop_name in keys_modified: - # TODO: check - internal_id = record_store_copy.get_internal_id( - record_name) - record_identifier = record_name + \ - "_" + str(internal_id) - converter.metadata["usage"].add(record_identifier) - mod_info[record_identifier][prop_name] = ( - structure_elements_path + [element.get_name()], - converters_path + [converter.name]) + if debug_tree is not None: + # add provenance information for each variable + debug_tree.debug_tree[str(element)] = ( + general_store_copy.get_storage(), record_store_copy.get_storage()) + debug_tree.debug_metadata["copied"][str(element)] = ( + general_store_copy.get_dict_copied(), + record_store_copy.get_dict_copied()) + debug_tree.debug_metadata["usage"][str(element)].add( + "/".join(converters_path + [converter.name])) + mod_info = debug_tree.debug_metadata["provenance"] + for record_name, prop_name in keys_modified: + # TODO: check + internal_id = record_store_copy.get_internal_id( + record_name) + record_identifier = record_name + \ + "_" + str(internal_id) + converter.metadata["usage"].add(record_identifier) + mod_info[record_identifier][prop_name] = ( + structure_elements_path + [element.get_name()], + converters_path + [converter.name]) scanner(children, converter.converters, general_store_copy, record_store_copy, @@ -351,7 +349,7 @@ def scanner(items: list[StructureElement], # del record_store[name] # del general_store[name] - return crawled_data, debug_tree + return crawled_data # -------------------------------------------------------------------------------- diff --git a/src/caoscrawler/version.py b/src/caoscrawler/version.py index e73905dcd25673eae88f718a7e45b7b4d0665e47..fdc8323452cd190cc3628efa57c15992f30fabeb 100644 --- a/src/caoscrawler/version.py +++ b/src/caoscrawler/version.py @@ -25,8 +25,10 @@ except ImportError: # Python<3.8 dowesn"t support this so use from packaging.version import parse as parse_version from warnings import warn -# Read in version of locally installed caoscrawler package -version = importlib_metadata.version("caoscrawler") + +def get_caoscrawler_version(): + """ Read in version of locally installed caoscrawler package""" + return importlib_metadata.version("caoscrawler") class CfoodRequiredVersionError(RuntimeError): @@ -51,7 +53,7 @@ as expected with the installed version of the crawler. warn(msg, UserWarning) return - installed_version = parse_version(version) + installed_version = parse_version(get_caoscrawler_version()) cfood_version = parse_version(metadata["crawler-version"]) if cfood_version > installed_version: diff --git a/unittests/test_cfood_metadata.py b/unittests/test_cfood_metadata.py index e66ac484f6175e6aac4821cfe6676c2f93283d4f..494bd383d95b4a845b5ea6f86ccff0f9a1db257f 100644 --- a/unittests/test_cfood_metadata.py +++ b/unittests/test_cfood_metadata.py @@ -21,23 +21,13 @@ import pytest import yaml from tempfile import NamedTemporaryFile +from unittest.mock import patch +from unittest.mock import MagicMock, Mock import caoscrawler from caoscrawler.scanner import load_definition -CRAWLER_VERSION = "" - - -def setup_function(function): - """Store original crawler version in case it is altered for tests.""" - CRAWLER_VERSION = caoscrawler.version.version - - -def teardown_function(function): - """Reset version""" - caoscrawler.version.version = CRAWLER_VERSION - def _temp_file_load(txt: str): """ @@ -94,7 +84,8 @@ SimulationData: assert found -def test_warning_if_version_too_old(): +@patch("caoscrawler.version.get_caoscrawler_version") +def test_warning_if_version_too_old(get_version): """Warn if the cfood was written for an older crawler version.""" definition_text = """ @@ -109,8 +100,7 @@ SimulationData: match: SimulationData """ - # higher minor - caoscrawler.version.version = "0.3.0" + get_version.side_effect = lambda: "0.3.0" with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) @@ -123,7 +113,7 @@ SimulationData: assert found # higher major - caoscrawler.version.version = "1.1.0" + get_version.side_effect = lambda: "1.1.0" with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) @@ -136,10 +126,12 @@ SimulationData: assert found -def test_error_if_version_too_new(): +@patch("caoscrawler.version.get_caoscrawler_version") +def test_error_if_version_too_new(get_version): """Raise error if the cfood requires a newer crawler version.""" # minor too old + get_version.side_effect = lambda: "0.1.5" definition_text = """ --- metadata: @@ -151,7 +143,6 @@ SimulationData: type: Directory match: SimulationData """ - caoscrawler.version.version = "0.1.5" with pytest.raises(caoscrawler.CfoodRequiredVersionError) as cre: _temp_file_load(definition_text) @@ -179,7 +170,7 @@ SimulationData: assert "version installed on your system: 0.1.5" in str(cre.value) # patch to old - caoscrawler.version.version = "1.0.0" + get_version.side_effect = lambda: "1.0.0" with pytest.raises(caoscrawler.CfoodRequiredVersionError) as cre: _temp_file_load(definition_text) @@ -189,7 +180,8 @@ SimulationData: assert "version installed on your system: 1.0.0" in str(cre.value) -def test_matching_version(): +@patch("caoscrawler.version.get_caoscrawler_version") +def test_matching_version(get_version): """Test that there is no warning or error in case the version matches.""" definition_text = """ @@ -203,10 +195,10 @@ SimulationData: type: Directory match: SimulationData """ - caoscrawler.version.version = "0.2.1" + get_version.side_effect = lambda: "0.2.1" assert _temp_file_load(definition_text) # The version is also considered a match if the patch version of the # installed crawler is newer than the one specified in the cfood metadata - caoscrawler.version.version = "0.2.7" + get_version.side_effect = lambda: "0.2.7" assert _temp_file_load(definition_text) diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 25b8412126289b34b97dd83f6357964f62823a56..154724be6d126aefb430c7d0600b86a5ec721812 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -438,6 +438,7 @@ def test_filter_children_of_directory(converter_registry, capsys): children = dc.create_children(None, test_dir) +@pytest.mark.filterwarnings("ignore::UserWarning") def test_validate_custom_converters(): one_doc_yaml = """ Converters: @@ -457,6 +458,7 @@ MyElement: two_doc_yaml = """ --- metadata: + crawler-version: 0.3.1 Converters: MyNewType: converter: MyNewTypeConverter diff --git a/unittests/test_issues.py b/unittests/test_issues.py index ef8c38afdd77c93ec2439157f6e7dacfa9c32c14..46157af9225c11b79e76dd3ef856d60519a6eb9d 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -63,7 +63,7 @@ def test_issue_10(): "float_value": 4 } - records, debug_tree = scan_structure_elements( + records = scan_structure_elements( DictElement("TestDict", test_dict), crawler_definition, converter_registry) assert len(records) == 1 assert records[0].parents[0].name == "TestRec" @@ -95,7 +95,7 @@ def test_list_datatypes(): } } - crawler = Crawler(debug=True) + crawler = Crawler() converter_registry = crawler.load_converters(crawler_definition) test_dict = { diff --git a/unittests/test_json.py b/unittests/test_json.py index 42841da7715da8b796cbb9b85ee831bf004f36b7..3c120be174ff819baeeaa49ddf142cf40dba751e 100644 --- a/unittests/test_json.py +++ b/unittests/test_json.py @@ -49,7 +49,7 @@ def test_json(): # Load and register converter packages: converter_registry = create_converter_registry(crawler_definition) - records, debug_tree = scan_structure_elements( + records = scan_structure_elements( JSONFile(os.path.basename(json_file_path), json_file_path), crawler_definition, converter_registry diff --git a/unittests/test_macros.py b/unittests/test_macros.py index cc79731e7e8e52ab972a22151b580720c10e195c..5244307db8e694ffb4864380d33936ebb76ae715 100644 --- a/unittests/test_macros.py +++ b/unittests/test_macros.py @@ -142,6 +142,7 @@ def test_multi_macros_toplevel(register_macros, macro_store_reset): dat_loader = list(yaml.safe_load_all(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: test_one @@ -168,6 +169,10 @@ testnode: !macro def test_load_definition(register_macros, macro_store_reset): txt = """ +--- +metadata: + crawler-version: 0.3.1 +--- extroot: type: Directory match: extroot @@ -183,6 +188,7 @@ extroot: cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: test_one @@ -256,6 +262,7 @@ def test_circular_macro_definition(register_macros, macro_store_reset): cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: test_one @@ -304,6 +311,7 @@ def test_use_macro_twice(): cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: test_twice @@ -337,6 +345,7 @@ def test_documentation_example_2(): cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: MarkdownFile @@ -374,6 +383,7 @@ def test_documentation_example_1(): cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: SimulationDatasetFile @@ -422,6 +432,7 @@ def test_def_replacements(): cfood = _temp_file_load(""" --- metadata: + crawler-version: 0.3.1 macros: - !defmacro name: test_def_replacements diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py index 4fbeeed628101654a20de18210bdac683cf801f4..89d94fc74ebda6aedfbee422294e99eab2216d73 100644 --- a/unittests/test_scalars_cfood.py +++ b/unittests/test_scalars_cfood.py @@ -10,16 +10,11 @@ from caoscrawler.converters import handle_value from caoscrawler.crawl import Crawler # We need the store for the above function from caoscrawler.stores import GeneralStore - -from test_tool import dircheckstr, rfp +from caoscrawler.scanner import scan_directory +from caoscrawler.debug_tree import DebugTree -@pytest.fixture -def crawler(): - crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("cfoods_scalar.yml")) - return crawler, crawled_data, debug_tree +from test_tool import dircheckstr, rfp def test_handle_value(): @@ -38,8 +33,11 @@ def test_handle_value(): assert handle_value([4, 3, 2], store) == ([4, 3, 2], "single") -def test_record_structure_generation(crawler): - subd = crawler[2].debug_tree[dircheckstr("DataAnalysis")] +def test_record_structure_generation(): + dbt = DebugTree() + scan_directory(rfp("test_directories", "examples_article"), rfp("cfoods_scalar.yml"), + debug_tree=dbt) + subd = dbt.debug_tree[dircheckstr("DataAnalysis")] assert len(subd) == 2 # variables store on Data Analysis node of debug tree if "Data" in subd[0]: diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py index 6a776fa2a633ae5c7bb21163732abec467e09f8b..d739695fc4c6a019f28f3c3697e3f134e0f1755e 100644 --- a/unittests/test_table_converter.py +++ b/unittests/test_table_converter.py @@ -28,6 +28,8 @@ test the converters module from caoscrawler.converters import Converter from caoscrawler.stores import GeneralStore +from caoscrawler.scanner import scan_directory +from caoscrawler.debug_tree import DebugTree from caoscrawler.converters import (ConverterValidationError, DictConverter, XLSXTableConverter, CSVTableConverter) from caoscrawler.structure_elements import Directory @@ -91,14 +93,6 @@ def dircheckstr(*pathcomponents): return "caoscrawler.structure_elements.File: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_tables", "ExperimentalData", *pathcomponents) -@pytest.fixture -def crawler(): - crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_tables", "ExperimentalData"), - rfp("test_directories", "examples_tables", "crawler_for_tables.yml")) - return crawler, crawled_data, debug_tree - - def test_convert_table(converter_registry): extentions = ["xlsx", "csv", "tsv"] if importlib.util.find_spec("odf") is not None: @@ -151,9 +145,13 @@ def test_convert_table(converter_registry): assert res[0].name == "jdsfkljadskf" -def test_crawl_csv_table(crawler): +def test_crawl_csv_table(): + dbt = DebugTree() + scan_directory(rfp("test_directories", "examples_tables", "ExperimentalData"), + rfp("test_directories", "examples_tables", "crawler_for_tables.yml"), + debug_tree=dbt) for file_ext in ["xlsx", "csv"]: - subd = crawler[2].debug_tree[dircheckstr("test1." + file_ext)] + subd = dbt.debug_tree[dircheckstr("test1." + file_ext)] record_experiment = subd[1]["Experiment"] assert isinstance(record_experiment, db.Record) assert isinstance(record_experiment.get_property("Measurements").value, list) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index d0bb108453f1e390dc72980baed99e4b469e3edb..94266278cd2186a7df3b40a7457cc8c8bfc54ce3 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -32,6 +32,8 @@ import os from caoscrawler.crawl import Crawler, SecurityMode, split_restricted_path from caoscrawler.identifiable import Identifiable from caoscrawler.structure_elements import File, DictTextElement, DictListElement, DictElement +from caoscrawler.scanner import scan_directory +from caoscrawler.debug_tree import DebugTree from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter from simulated_server_data import full_data from functools import partial @@ -77,9 +79,10 @@ def dircheckstr(*pathcomponents): @pytest.fixture def crawler(): crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory( + debug_tree = DebugTree() + crawled_data = scan_directory( rfp("test_directories", "examples_article"), - rfp("scifolder_cfood.yml")) + rfp("scifolder_cfood.yml"), debug_tree=debug_tree) return crawler, crawled_data, debug_tree @@ -112,14 +115,16 @@ def ident(crawler): return ident -def test_record_structure_generation(crawler): +def test_record_structure_generation(): # TODO How does this test relate to the test function in test_scalars_cfood with the same name? # There seems to be code duplication - debug_tree = crawler[2] - - subd = debug_tree.debug_tree[dircheckstr("DataAnalysis")] - subc = debug_tree.debug_metadata["copied"][dircheckstr("DataAnalysis")] + dbt = DebugTree() + scan_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml"), + debug_tree=dbt) + subd = dbt.debug_tree[dircheckstr("DataAnalysis")] + subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")] assert len(subd) == 2 # variables store on Data Analysis node of debug tree assert len(subd[0]) == 4 @@ -133,9 +138,9 @@ def test_record_structure_generation(crawler): assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" assert subc[0]["DataAnalysis"] is False - subd = debug_tree.debug_tree[dircheckstr( + subd = dbt.debug_tree[dircheckstr( "DataAnalysis", "2020_climate-model-predict")] - subc = debug_tree.debug_metadata["copied"][dircheckstr( + subc = dbt.debug_metadata["copied"][dircheckstr( "DataAnalysis", "2020_climate-model-predict")] assert len(subd[1]) == 1 @@ -163,12 +168,12 @@ def test_record_structure_generation(crawler): assert subc[0]["date"] is False assert subc[0]["identifier"] is False - subd = debug_tree.debug_tree[dircheckstr("DataAnalysis", - "2020_climate-model-predict", - "2020-02-08_prediction-errors")] - subc = debug_tree.debug_metadata["copied"][dircheckstr("DataAnalysis", - "2020_climate-model-predict", - "2020-02-08_prediction-errors")] + subd = dbt.debug_tree[dircheckstr("DataAnalysis", + "2020_climate-model-predict", + "2020-02-08_prediction-errors")] + subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis", + "2020_climate-model-predict", + "2020-02-08_prediction-errors")] assert len(subd[0]) == 12 assert subd[0]["date"] == "2020-02-08" assert subd[0]["identifier"] == "prediction-errors" @@ -290,7 +295,7 @@ def test_crawler_update_list(crawler, ident): def test_synchronization(crawler, ident): - insl, updl = crawler[0].synchronize(crawler[1], commit_changes=False) + insl, updl = crawler[0].synchronize(commit_changes=False, crawled_data=crawler[1]) assert len(insl) == 0 assert len(updl) == 0 @@ -339,16 +344,16 @@ def test_remove_unnecessary_updates(): @pytest.mark.xfail def test_identifiable_adapter_no_identifiable(crawler, ident): del ident._registered_identifiables["Person"] - insl, updl = crawler.synchronize() + insl, updl = crawler[0].synchronize() assert len(updl) == 0 - pers = [r for r in crawler.crawled_data if r.parents[0].name == "Person"] + pers = [r for r in crawler[0].crawled_data if r.parents[0].name == "Person"] # All persons are inserted, because they are not identifiable: assert len(insl) == len(pers) def test_provenance_debug_data(crawler): - crawler[0].save_debug_data(crawler[2], rfp("provenance.yml")) + crawler[0].save_debug_data(rfp("provenance.yml"), debug_tree=crawler[2]) with open(rfp("provenance.yml"), "r") as f: provenance = yaml.load(f, Loader=yaml.SafeLoader) @@ -597,9 +602,10 @@ def mock_get_entity_by_name(name): def prepare_crawler_with_sec_mode(mode, ident): crawler = Crawler(securityMode=mode) - crawled_data, debug_tree = crawler.crawl_directory( + debug_tree = DebugTree() + crawled_data = scan_directory( rfp("test_directories", "examples_article"), - rfp("scifolder_cfood.yml")) + rfp("scifolder_cfood.yml"), debug_tree=debug_tree) crawler.identifiableAdapter = ident return crawler, crawled_data, debug_tree @@ -656,7 +662,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): # trivial case: nothing to do crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -667,7 +673,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): # remove one element del ident._records[-1] # insert forbidden - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -681,7 +687,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.RETRIEVE, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -695,7 +701,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # remove one element del ident._records[-1] - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.assert_called_once() upmock.assert_not_called() @@ -709,7 +715,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): crawler, crawled_data, debug_tree = prepare_crawler_with_sec_mode(SecurityMode.INSERT, ident) # change one element change_non_identifiable_prop(ident) - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.assert_not_called() upmock.assert_not_called() @@ -724,7 +730,7 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident): # change two elements change_non_identifiable_prop(ident) change_identifiable_prop(ident) - crawler.synchronize(crawled_data, commit_changes=True) + crawler.synchronize(commit_changes=True, crawled_data=crawled_data) assert crawler.run_id is not None insmock.asser_called_once() upmock.assert_not_called() @@ -976,6 +982,7 @@ def test_split_restricted_path(): assert ["el", "el"] == split_restricted_path("/el/el") +@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning") def test_deprecated_prefix_option(): """Test that calling the crawler's main function with the deprecated `prefix` option raises the correct errors and warnings. diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py index 4cb7286f91df577e7f17a354bd54547b3802c55d..7dd4282e4c6d206c8c360424d865b9f736b5e582 100644 --- a/unittests/test_tool_extended.py +++ b/unittests/test_tool_extended.py @@ -6,7 +6,9 @@ from caoscrawler import Crawler from caoscrawler.structure_elements import File, DictTextElement, DictListElement from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter +from caoscrawler.scanner import scan_directory from functools import partial +from caoscrawler.debug_tree import DebugTree from copy import deepcopy from unittest.mock import MagicMock, Mock from os.path import join, dirname, basename @@ -37,10 +39,10 @@ def dircheckstr(*pathcomponents, structure_element_type="Directory"): @pytest.fixture def crawler(): - crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "examples_article"), - rfp("scifolder_extended.yml")) - return crawler, crawled_data, debug_tree + crawler = Crawler(debug=True) + crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_extended.yml")) + return crawler # @pytest.fixture @@ -69,10 +71,14 @@ def crawler(): # return ident -def test_file_structure_generation(crawler): - sd = crawler[2].debug_tree[dircheckstr("SimulationData", - "2020_climate-model-predict", "2020-02-01", - "README.md", structure_element_type="File")] +def test_file_structure_generation(): + dbt = DebugTree() + scan_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_extended.yml"), + debug_tree=dbt) + sd = dbt.debug_tree[dircheckstr("SimulationData", + "2020_climate-model-predict", "2020-02-01", + "README.md", structure_element_type="File")] assert sd[1]["ReadmeFile"].role == "File" assert len(sd[1]["ReadmeFile"].path) > 0 assert len(sd[1]["ReadmeFile"].file) > 0 diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py index 8680d792973d1f6c9aac2fe2ff0229edfa495d57..f13e759982e8102bbf37e65311ff4073ba52e5a2 100644 --- a/unittests/test_variable_substitutions.py +++ b/unittests/test_variable_substitutions.py @@ -2,7 +2,9 @@ # Tests for variable substitutions # A. Schlemmer, 05/2022 +from caoscrawler.debug_tree import DebugTree from caoscrawler import Crawler +from caoscrawler.scanner import scan_directory from caoscrawler.structure_elements import File, DictTextElement, DictListElement from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter from functools import partial @@ -32,33 +34,21 @@ def dircheckstr(element_type, *pathcomponents): return "caoscrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents) -@pytest.fixture -def crawler(): - crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), - rfp("test_directories", "example_substitutions", "substitutions.yml")) - return crawler, crawled_data, debug_tree +def test_substitutions(): - -@pytest.fixture -def crawler_2(): - crawler = Crawler() - crawled_data, debug_tree = crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), - rfp("test_directories", "example_substitutions", - "substitutions_parents.yml")) - return crawler, crawled_data, debug_tree - - -def test_substitutions(crawler): + dbt = DebugTree() + scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + rfp("test_directories", "example_substitutions", "substitutions.yml"), + debug_tree=dbt) # @review Florian Spreckelsen 2022-05-13 for i in range(2): - subd = crawler[2].debug_tree[dircheckstr( + subd = dbt.debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] assert subd[i]["Experiment"].get_property("date").value == "2022-05-12" assert isinstance(subd[i]["ExperimentSeries"].get_property( "Experiment").value, db.Record) - subd = crawler[2].debug_tree[dircheckstr("Directory", "ExperimentalData")] + subd = dbt.debug_tree[dircheckstr("Directory", "ExperimentalData")] assert subd[i]["Project"].name == "project" assert isinstance(subd[i]["Project"].get_property( "Experiments").value, list) @@ -70,11 +60,16 @@ def test_substitutions(crawler): "dates").value[0] == "2022-05-12" -def test_substitutions_parents(crawler_2): +def test_substitutions_parents(): + dbt = DebugTree() + scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + rfp("test_directories", "example_substitutions", + "substitutions_parents.yml"), + debug_tree=dbt) # This is a test for: # https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/35 # ... testing whether variable substitutions can be used in parent declarations. - subd = crawler_2[2].debug_tree[dircheckstr( + subd = dbt.debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] # subd[0] <- generalStore # subd[1] <- recordStore @@ -85,11 +80,16 @@ def test_substitutions_parents(crawler_2): assert parents[1].name == "Month_05" -def test_empty_parents(crawler_2): +def test_empty_parents(): + dbt = DebugTree() + scan_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + rfp("test_directories", "example_substitutions", + "substitutions_parents.yml"), + debug_tree=dbt) # This is a test for: # https://gitlab.com/caosdb/caosdb-crawler/-/issues/8 - subd = crawler_2[2].debug_tree[dircheckstr( + subd = dbt.debug_tree[dircheckstr( "File", "ExperimentalData", "220512_data.dat")] parents = subd[1]["RecordWithoutParents"].get_parents()