From 3905d7a65e49f79368d53e74148f117c5ebf5f9d Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Thu, 18 Nov 2021 13:59:00 +0100
Subject: [PATCH] ENH: implemented the copy flag in the stores and added
 corresponding unit tests

---
 src/newcrawler/crawl.py  |  6 ++++++
 src/newcrawler/stores.py | 13 +++++++++++++
 tests/test_tool.py       | 28 ++++++++++++++++++++++++++--
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index ebd3008b..bf6bf4c9 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -90,6 +90,9 @@ class Crawler(object):
                The debugging information tree is a variable stored in
                self.debug_tree. It is a dictionary mapping directory entries
                to a tuple of general stores and record stores which are valid for the directory scope.
+               Furthermore, it is stored in a second tree named self.debug_copied whether the
+               objects in debug_tree had been copied from a higher level in the hierarchy
+               of the structureelements.
         """
         self.converters = converters
         self.generalStore = generalStore
@@ -101,6 +104,7 @@ class Crawler(object):
         self.debug = debug
         if self.debug:
             self.debug_tree: dict[str, tuple] = dict()
+            self.debug_copied: dict[str, tuple] = dict()
 
     def crawl_directory(self, dirname: str,
                         cfood: str):
@@ -178,6 +182,8 @@ class Crawler(object):
                     if self.debug:
                         self.debug_tree[str(element)] = (
                             generalStore_copy.get_storage(), recordStore_copy.get_storage())
+                        self.debug_copied[str(element)] = (
+                            generalStore_copy.get_dict_copied(), recordStore_copy.get_dict_copied())
 
                     self.crawl(children, global_converters, converter.converters,
                                generalStore_copy, recordStore_copy)
diff --git a/src/newcrawler/stores.py b/src/newcrawler/stores.py
index 275a2346..64742069 100644
--- a/src/newcrawler/stores.py
+++ b/src/newcrawler/stores.py
@@ -34,6 +34,9 @@ class Store(object):
 
     def __init__(self):
         self._storage = dict()
+        # This dict stores whether the corresponding dict item in _storage
+        # (same key) has been copied from another Store, or was created newly in this store.
+        self._copied = dict()
 
     def __getitem__(self, key):
         return self._storage[key]
@@ -43,9 +46,12 @@ class Store(object):
 
     def update(self, other):
         self._storage.update(other)
+        for key in other:
+            self._copied[key] = False
 
     def __setitem__(self, key, value):
         self._storage[key] = value
+        self._copied[key] = False
 
     def get_storage(self):
         return self._storage
@@ -53,8 +59,15 @@ class Store(object):
     def create_scoped_copy(self):
         s_copy = self.__class__()
         s_copy._storage = dict(self._storage)
+        s_copy._copied = {key: True for key in self._copied}
         return s_copy
 
+    def get_dict_copied(self):
+        """
+        Only for debugging.
+        """
+        return self._copied
+
 class GeneralStore(Store):
     pass
 
diff --git a/tests/test_tool.py b/tests/test_tool.py
index ef4b5929..9ba90527 100755
--- a/tests/test_tool.py
+++ b/tests/test_tool.py
@@ -28,11 +28,16 @@ def test_crawler():
               rfp("scifolder_cfood.yml"))
     
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
+    subc = crawler.debug_copied[dircheckstr("DataAnalysis")]
     assert len(subd) == 2
     assert len(subd[0]) == 0
     assert len(subd[1]) == 0
+    assert len(subc) == 2
+    assert len(subc[0]) == 0
+    assert len(subc[1]) == 0
     
     subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
+    subc = crawler.debug_copied[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
     
     assert len(subd[1]) == 1
     assert len(subd[1]["Project"].get_parents()) == 1
@@ -43,13 +48,24 @@ def test_crawler():
     assert len(subd[0]) == 3
     assert subd[0]["date"] == "2020"
     assert subd[0]["identifier"] == "climate-model-predict"
-
     assert subd[0]["Project"].__class__ == db.Record
 
+    # Check the copy flags for the first level in the hierarchy:
+    assert len(subc[0]) == 3
+    assert len(subc[1]) == 1
+    assert subc[1]["Project"] == False
+    assert subc[0]["Project"] == False
+    assert subc[0]["date"] == False
+    assert subc[0]["identifier"] == False
+
+    
+
     subd = crawler.debug_tree[dircheckstr("DataAnalysis",
                                           "2020_climate-model-predict",
                                           "2020-02-08_prediction-errors")]
-    print(subd)
+    subc = crawler.debug_copied[dircheckstr("DataAnalysis",
+                                          "2020_climate-model-predict",
+                                          "2020-02-08_prediction-errors")]
     assert len(subd[0]) == 4
     assert subd[0]["date"] == "2020-02-08"
     assert subd[0]["identifier"] == "prediction-errors"
@@ -71,6 +87,14 @@ def test_crawler():
     assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
     assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
 
+    # Check the copy flags for the second level in the hierarchy:
+    assert subc[1]["Project"] == True
+    assert subc[0]["Project"] == True
+    assert subc[1]["Measurement"] == False
+    assert subc[0]["Measurement"] == False
+    assert subc[0]["date"] == False
+    assert subc[0]["identifier"] == False
+
 def test_markdown_converter():
     test_readme = File("README.md", rfp(
         "test_directories", "examples_article", "DataAnalysis",
-- 
GitLab