From 641ca8c2ed1c7cef27ec5448d73a3b356cbb75c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Wed, 15 Feb 2023 19:29:44 +0100
Subject: [PATCH] ENH: add a decorator that adds a path variable

file system related structure elements can use it to have a variable
that can be accessed in the cfood that contains the file system path of
the structure element

Also, did some refactoring of the related StructureElements
---
 src/caoscrawler/converters.py | 83 ++++++++++++++---------------------
 unittests/test_converters.py  | 14 ++++++
 2 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index ed48c130..7e0603d1 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -110,6 +110,19 @@ class ConverterValidationError(Exception):
         self.message = msg
 
 
+def create_path_value(func):
+    """decorator for create_values functions that adds a value containing the path
+
+    should be used for StructureElement that are associated with file system objects that have a
+    path, like File or Directory.
+    """
+
+    def inner(self, values: GeneralStore, element: StructureElement):
+        func(self, values=values, element=element)
+        values.update({self.name + "_path": element.path})
+    return inner
+
+
 def replace_variables(propvalue, values: GeneralStore):
     """
     This function replaces variables in property values (and possibly other locations,
@@ -478,6 +491,10 @@ class DirectoryConverter(Converter):
 
         return children
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     def typecheck(self, element: StructureElement):
         return isinstance(element, Directory)
 
@@ -525,6 +542,10 @@ class SimpleFileConverter(Converter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         return list()
 
+    @create_path_value
+    def create_values(self, values: GeneralStore, element: StructureElement):
+        super().create_values(values=values, element=element)
+
     @Converter.debug_matching("name")
     def match(self, element: StructureElement):
         # TODO: See comment on types and inheritance
@@ -543,7 +564,7 @@ class FileConverter(SimpleFileConverter):
         super().__init__(*args, **kwargs)
 
 
-class MarkdownFileConverter(Converter):
+class MarkdownFileConverter(SimpleFileConverter):
     """
     reads the yaml header of markdown files (if a such a header exists).
     """
@@ -553,8 +574,15 @@ class MarkdownFileConverter(Converter):
         if not isinstance(element, File):
             raise RuntimeError("A markdown file is needed to create children.")
 
-        header = yaml_header_tools.get_header_from_file(
-            element.path, clean=False)
+        try:
+            header = yaml_header_tools.get_header_from_file(
+                element.path, clean=False)
+        except yaml_header_tools.NoValidHeader:
+            path = generalStore[self.name]
+            raise ConverterValidationError(
+                "Error during the validation (yaml header cannot be read) of the markdown file "
+                "located at the following node in the data structure:\n"
+                f"{path}\n" + err.message)
         children: List[StructureElement] = []
 
         for name, entry in header.items():
@@ -567,25 +595,6 @@ class MarkdownFileConverter(Converter):
                     "Header entry {} has incompatible type.".format(name))
         return children
 
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not isinstance(element, File):
-            raise RuntimeError("Element must be a file.")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        try:
-            yaml_header_tools.get_header_from_file(element.path)
-        except yaml_header_tools.NoValidHeader:
-            # TODO(salexan): Raise a validation error instead of just not
-            # matching silently.
-            return None
-        return m.groupdict()
-
 
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                           msg_prefix=""):
@@ -692,20 +701,7 @@ class DictDictElementConverter(DictElementConverter):
         super().__init__(*args, **kwargs)
 
 
-class JSONFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class JSONFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
@@ -727,20 +723,7 @@ class JSONFileConverter(Converter):
         return [structure_element]
 
 
-class YAMLFileConverter(Converter):
-    def typecheck(self, element: StructureElement):
-        return isinstance(element, File)
-
-    @Converter.debug_matching("name")
-    def match(self, element: StructureElement):
-        # TODO: See comment on types and inheritance
-        if not self.typecheck(element):
-            raise RuntimeError("Element must be a file")
-        m = re.match(self.definition["match"], element.name)
-        if m is None:
-            return None
-        return m.groupdict()
-
+class YAMLFileConverter(SimpleFileConverter):
     def create_children(self, generalStore: GeneralStore, element: StructureElement):
         # TODO: See comment on types and inheritance
         if not isinstance(element, File):
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index f72deda1..f4643bd0 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -610,3 +610,17 @@ def test_load_converters():
     assert "SimpleFile" in converter_registry
     assert "Directory" in converter_registry
     assert "ListElement" in converter_registry
+
+
+def test_create_path_value(converter_registry):
+    """ test using the "test_directories" folder"""
+    dc = Converter.converter_factory(
+        definition={
+            "type": "Directory",
+            "match": ".*"
+        },
+        name="Test", converter_registry=converter_registry)
+    values = GeneralStore()
+    dc.create_values(values, Directory("a", "/a"))
+    assert "Test_path" in values
+    assert values["Test_path"] == "/a"
-- 
GitLab