From 388930abd18016d3f83ee3bc3aba3ca8d3b42bf8 Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Thu, 18 Nov 2021 10:23:18 +0100
Subject: [PATCH] ENH: cleaned up deubg tree and streamlined unit testing

---
 src/newcrawler/crawl.py |  8 +++++---
 tests/test_tool.py      | 34 +++++++++++++++++++++++++++++-----
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index 02241ff7..2655313c 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -79,7 +79,9 @@ class FileSystemStructureElement(StructureElement):
         self.path = path
 
     def __str__(self):
-        return "{}: {}, {}".format(self.__class__, self.name, self.path)
+        class_name_short = str(self.__class__).replace(
+            "<class \'", "")[:-2]
+        return "{}: {}, {}".format(class_name_short, self.name, self.path)
 
 class Directory(FileSystemStructureElement):
     pass
@@ -389,7 +391,7 @@ class Crawler(object):
 
         self.debug = debug
         if self.debug:
-            self.debug_tree = dict()
+            self.debug_tree: dict[str, tuple] = dict()
 
     def crawl_directory(self, dirname: str,
                         cfood: str):
@@ -466,7 +468,7 @@ class Crawler(object):
                     children = converter.create_children(generalStore_copy, element)
                     if self.debug:
                         self.debug_tree[str(element)] = (
-                            generalStore_copy, recordStore_copy)
+                            generalStore_copy.storage, recordStore_copy.storage)
 
                     self.crawl(children, global_converters, converter.converters,
                                generalStore_copy, recordStore_copy)
diff --git a/tests/test_tool.py b/tests/test_tool.py
index 5bd120a6..32e3362d 100755
--- a/tests/test_tool.py
+++ b/tests/test_tool.py
@@ -4,12 +4,36 @@
 # A. Schlemmer, 06/2021
 
 from newcrawler import Crawler
-from os.path import join, dirname
+from os.path import join, dirname, basename
+import yaml
+
+def rfp(*pathcomponents):
+    """
+    Return full path.
+    Shorthand convenience function.
+    """
+    return join(dirname(__file__), *pathcomponents)
+
+def dircheckstr(*pathcomponents):
+    """
+    Return the debug tree identifier for a given path.
+    """
+    return "newcrawler.crawl.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
 
 def test_crawler():
     crawler = Crawler(debug=True)
-    crawler.crawl_directory(join(dirname(__file__), "test_directories/examples_article"),
-              join(dirname(__file__), "scifolder_cfood.yml"))
+    crawler.crawl_directory(rfp("test_directories", "examples_article"),
+              rfp("scifolder_cfood.yml"))
+
+    # debug_variables = dict()
+    # for k, v in crawler.debug_tree.items():
+    #     debug_variables[k] = v[0]
+    # print(yaml.dump(debug_variables))
 
-    print(crawler.debug_tree)
-    assert crawler.generalStore["date"] == "test"
+    subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
+    assert len(subd) == 2
+    assert len(subd[0]) == 0
+    subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
+    assert len(subd[0]) == 2
+    assert subd[0]["date"] == "2020"
+    assert subd[0]["identifier"] == "climate-model-predict"
-- 
GitLab