diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 02241ff7bc25e84ba6c09a1928678ce4ea00f283..2655313c40b705d6bac9f61932c5f93f9acd04c9 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -79,7 +79,9 @@ class FileSystemStructureElement(StructureElement): self.path = path def __str__(self): - return "{}: {}, {}".format(self.__class__, self.name, self.path) + class_name_short = str(self.__class__).replace( + "<class \'", "")[:-2] + return "{}: {}, {}".format(class_name_short, self.name, self.path) class Directory(FileSystemStructureElement): pass @@ -389,7 +391,7 @@ class Crawler(object): self.debug = debug if self.debug: - self.debug_tree = dict() + self.debug_tree: dict[str, tuple] = dict() def crawl_directory(self, dirname: str, cfood: str): @@ -466,7 +468,7 @@ class Crawler(object): children = converter.create_children(generalStore_copy, element) if self.debug: self.debug_tree[str(element)] = ( - generalStore_copy, recordStore_copy) + generalStore_copy.storage, recordStore_copy.storage) self.crawl(children, global_converters, converter.converters, generalStore_copy, recordStore_copy) diff --git a/tests/test_tool.py b/tests/test_tool.py index 5bd120a65ee20c02c8ea9e2feb43a1f88bfd1c58..32e3362d595f0be9e079ec648ff43516667a8a2c 100755 --- a/tests/test_tool.py +++ b/tests/test_tool.py @@ -4,12 +4,36 @@ # A. Schlemmer, 06/2021 from newcrawler import Crawler -from os.path import join, dirname +from os.path import join, dirname, basename +import yaml + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return join(dirname(__file__), *pathcomponents) + +def dircheckstr(*pathcomponents): + """ + Return the debug tree identifier for a given path. + """ + return "newcrawler.crawl.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) def test_crawler(): crawler = Crawler(debug=True) - crawler.crawl_directory(join(dirname(__file__), "test_directories/examples_article"), - join(dirname(__file__), "scifolder_cfood.yml")) + crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml")) + + # debug_variables = dict() + # for k, v in crawler.debug_tree.items(): + # debug_variables[k] = v[0] + # print(yaml.dump(debug_variables)) - print(crawler.debug_tree) - assert crawler.generalStore["date"] == "test" + subd = crawler.debug_tree[dircheckstr("DataAnalysis")] + assert len(subd) == 2 + assert len(subd[0]) == 0 + subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")] + assert len(subd[0]) == 2 + assert subd[0]["date"] == "2020" + assert subd[0]["identifier"] == "climate-model-predict"