From 388930abd18016d3f83ee3bc3aba3ca8d3b42bf8 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander@mail-schlemmer.de> Date: Thu, 18 Nov 2021 10:23:18 +0100 Subject: [PATCH] ENH: cleaned up deubg tree and streamlined unit testing --- src/newcrawler/crawl.py | 8 +++++--- tests/test_tool.py | 34 +++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 02241ff7..2655313c 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -79,7 +79,9 @@ class FileSystemStructureElement(StructureElement): self.path = path def __str__(self): - return "{}: {}, {}".format(self.__class__, self.name, self.path) + class_name_short = str(self.__class__).replace( + "<class \'", "")[:-2] + return "{}: {}, {}".format(class_name_short, self.name, self.path) class Directory(FileSystemStructureElement): pass @@ -389,7 +391,7 @@ class Crawler(object): self.debug = debug if self.debug: - self.debug_tree = dict() + self.debug_tree: dict[str, tuple] = dict() def crawl_directory(self, dirname: str, cfood: str): @@ -466,7 +468,7 @@ class Crawler(object): children = converter.create_children(generalStore_copy, element) if self.debug: self.debug_tree[str(element)] = ( - generalStore_copy, recordStore_copy) + generalStore_copy.storage, recordStore_copy.storage) self.crawl(children, global_converters, converter.converters, generalStore_copy, recordStore_copy) diff --git a/tests/test_tool.py b/tests/test_tool.py index 5bd120a6..32e3362d 100755 --- a/tests/test_tool.py +++ b/tests/test_tool.py @@ -4,12 +4,36 @@ # A. Schlemmer, 06/2021 from newcrawler import Crawler -from os.path import join, dirname +from os.path import join, dirname, basename +import yaml + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return join(dirname(__file__), *pathcomponents) + +def dircheckstr(*pathcomponents): + """ + Return the debug tree identifier for a given path. + """ + return "newcrawler.crawl.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) def test_crawler(): crawler = Crawler(debug=True) - crawler.crawl_directory(join(dirname(__file__), "test_directories/examples_article"), - join(dirname(__file__), "scifolder_cfood.yml")) + crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_cfood.yml")) + + # debug_variables = dict() + # for k, v in crawler.debug_tree.items(): + # debug_variables[k] = v[0] + # print(yaml.dump(debug_variables)) - print(crawler.debug_tree) - assert crawler.generalStore["date"] == "test" + subd = crawler.debug_tree[dircheckstr("DataAnalysis")] + assert len(subd) == 2 + assert len(subd[0]) == 0 + subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")] + assert len(subd[0]) == 2 + assert subd[0]["date"] == "2020" + assert subd[0]["identifier"] == "climate-model-predict" -- GitLab