diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3808cd8d218abc4d9edf00afeda5f9fe4485478f..3f963bc3d0e3c5ed733356315fe6fd8fe8293cff 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -258,8 +258,8 @@ class Crawler(object): "The function start_crawling in the crawl module is deprecated. " "Please use scan_structure_elements from the scanner module.")) - data = scan_structure_elements( - items, crawler_definition, converter_registry, restrict_path) + data, _ = scan_structure_elements( + items, crawler_definition, converter_registry, restricted_path) self.crawled_data = data return data @@ -288,9 +288,9 @@ class Crawler(object): "Please use scan_directory from the scanner module.")) self.crawled_directory = crawled_directory - data = scan_directory(crawled_directory, - crawler_definition_path, - restricted_path) + data, _ = scan_directory(crawled_directory, + crawler_definition_path, + restricted_path) self.crawled_data = data return data @@ -940,7 +940,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) res[converter.name]["subtree"][k[0]] = d[k[0]] return res - def save_debug_data(self, debug_tree: DebugTree, filename: str): + def save_debug_data(self, filename: str, debug_tree: DebugTree = None): """ Save the information contained in a debug_tree to a file named filename. """ @@ -1022,7 +1022,8 @@ def crawler_main(crawled_directory_path: str, """ crawler = Crawler(securityMode=securityMode) try: - crawled_data, debug_tree = crawler.crawl_directory( + + crawled_data, debug_tree = scan_directory( crawled_directory_path, cfood_file_name, restricted_path) except ConverterValidationError as err: logger.error(err) diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 83cd6c9fbce63e0cd47fec90350576f09b378c4b..648c64c24486d1b3d71aa92dc15797db327b514e 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -360,7 +360,8 @@ def scanner(items: list[StructureElement], def scan_directory(dirname: str, crawler_definition_path: str, - restricted_path: Optional[list[str]] = None): + restricted_path: Optional[list[str]] = None, + debug_tree: Optional[DebugTree] = None): """ Crawl a single directory. Formerly known as "crawl_directory". @@ -396,13 +397,16 @@ def scan_directory(dirname: str, crawler_definition_path: str, dirname), crawler_definition, converter_registry, - restricted_path=restricted_path) + restricted_path=restricted_path, + debug_tree=debug_tree + ) def scan_structure_elements(items: Union[list[StructureElement], StructureElement], crawler_definition: dict, converter_registry: dict, - restricted_path: Optional[list[str]] = None): + restricted_path: Optional[list[str]] = None, + debug_tree: Optional[DebugTree] = None): """ Start point of the crawler recursion. @@ -437,4 +441,6 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen return scanner( items=items, converters=converters, - restricted_path=restricted_path) + restricted_path=restricted_path, + debug_tree=debug_tree + ) diff --git a/unittests/test_cfood_metadata.py b/unittests/test_cfood_metadata.py index 2881a47df2418bd0d794f79291b166f091456cf3..e66ac484f6175e6aac4821cfe6676c2f93283d4f 100644 --- a/unittests/test_cfood_metadata.py +++ b/unittests/test_cfood_metadata.py @@ -69,9 +69,12 @@ SimulationData: with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) - assert len(uw) == 1 - assert "No crawler version specified in cfood definition" in uw[0].message.args[0] - assert "Specifying a version is highly recommended" in uw[0].message.args[0] + found = False + for w in uw: + if ("No crawler version specified in cfood definition" in w.message.args[0] and + "Specifying a version is highly recommended" in w.message.args[0]): + found = True + assert found # metadata section is missing alltogether definition_text = """ @@ -83,9 +86,12 @@ SimulationData: with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) - assert len(uw) == 1 - assert "No crawler version specified in cfood definition" in uw[0].message.args[0] - assert "Specifying a version is highly recommended" in uw[0].message.args[0] + found = False + for w in uw: + if ("No crawler version specified in cfood definition" in w.message.args[0] and + "Specifying a version is highly recommended" in w.message.args[0]): + found = True + assert found def test_warning_if_version_too_old(): @@ -108,20 +114,26 @@ SimulationData: with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) - assert len(uw) == 1 - assert "cfood was written for a previous crawler version" in uw[0].message.args[0] - assert "version specified in cfood: 0.2.0" in uw[0].message.args[0] - assert "version installed on your system: 0.3.0" in uw[0].message.args[0] + found = False + for w in uw: + if ("cfood was written for a previous crawler version" in w.message.args[0] and + "version specified in cfood: 0.2.0" in w.message.args[0] and + "version installed on your system: 0.3.0" in w.message.args[0]): + found = True + assert found # higher major caoscrawler.version.version = "1.1.0" with pytest.warns(UserWarning) as uw: _temp_file_load(definition_text) - assert len(uw) == 1 - assert "cfood was written for a previous crawler version" in uw[0].message.args[0] - assert "version specified in cfood: 0.2.0" in uw[0].message.args[0] - assert "version installed on your system: 1.1.0" in uw[0].message.args[0] + found = False + for w in uw: + if ("cfood was written for a previous crawler version" in w.message.args[0] and + "version specified in cfood: 0.2.0" in w.message.args[0] and + "version installed on your system: 1.1.0" in w.message.args[0]): + found = True + assert found def test_error_if_version_too_new():