From 641ca8c2ed1c7cef27ec5448d73a3b356cbb75c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Wed, 15 Feb 2023 19:29:44 +0100 Subject: [PATCH] ENH: add a decorator that adds a path variable file system related structure elements can use it to have a variable that can be accessed in the cfood that contains the file system path of the structure element Also, did some refactoring of the related StructureElements --- src/caoscrawler/converters.py | 83 ++++++++++++++--------------------- unittests/test_converters.py | 14 ++++++ 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index ed48c130..7e0603d1 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -110,6 +110,19 @@ class ConverterValidationError(Exception): self.message = msg +def create_path_value(func): + """decorator for create_values functions that adds a value containing the path + + should be used for StructureElement that are associated with file system objects that have a + path, like File or Directory. + """ + + def inner(self, values: GeneralStore, element: StructureElement): + func(self, values=values, element=element) + values.update({self.name + "_path": element.path}) + return inner + + def replace_variables(propvalue, values: GeneralStore): """ This function replaces variables in property values (and possibly other locations, @@ -478,6 +491,10 @@ class DirectoryConverter(Converter): return children + @create_path_value + def create_values(self, values: GeneralStore, element: StructureElement): + super().create_values(values=values, element=element) + def typecheck(self, element: StructureElement): return isinstance(element, Directory) @@ -525,6 +542,10 @@ class SimpleFileConverter(Converter): def create_children(self, generalStore: GeneralStore, element: StructureElement): return list() + @create_path_value + def create_values(self, values: GeneralStore, element: StructureElement): + super().create_values(values=values, element=element) + @Converter.debug_matching("name") def match(self, element: StructureElement): # TODO: See comment on types and inheritance @@ -543,7 +564,7 @@ class FileConverter(SimpleFileConverter): super().__init__(*args, **kwargs) -class MarkdownFileConverter(Converter): +class MarkdownFileConverter(SimpleFileConverter): """ reads the yaml header of markdown files (if a such a header exists). """ @@ -553,8 +574,15 @@ class MarkdownFileConverter(Converter): if not isinstance(element, File): raise RuntimeError("A markdown file is needed to create children.") - header = yaml_header_tools.get_header_from_file( - element.path, clean=False) + try: + header = yaml_header_tools.get_header_from_file( + element.path, clean=False) + except yaml_header_tools.NoValidHeader: + path = generalStore[self.name] + raise ConverterValidationError( + "Error during the validation (yaml header cannot be read) of the markdown file " + "located at the following node in the data structure:\n" + f"{path}\n" + err.message) children: List[StructureElement] = [] for name, entry in header.items(): @@ -567,25 +595,6 @@ class MarkdownFileConverter(Converter): "Header entry {} has incompatible type.".format(name)) return children - def typecheck(self, element: StructureElement): - return isinstance(element, File) - - @Converter.debug_matching("name") - def match(self, element: StructureElement): - # TODO: See comment on types and inheritance - if not isinstance(element, File): - raise RuntimeError("Element must be a file.") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - try: - yaml_header_tools.get_header_from_file(element.path) - except yaml_header_tools.NoValidHeader: - # TODO(salexan): Raise a validation error instead of just not - # matching silently. - return None - return m.groupdict() - def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None, msg_prefix=""): @@ -692,20 +701,7 @@ class DictDictElementConverter(DictElementConverter): super().__init__(*args, **kwargs) -class JSONFileConverter(Converter): - def typecheck(self, element: StructureElement): - return isinstance(element, File) - - @Converter.debug_matching("name") - def match(self, element: StructureElement): - # TODO: See comment on types and inheritance - if not self.typecheck(element): - raise RuntimeError("Element must be a file") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - return m.groupdict() - +class JSONFileConverter(SimpleFileConverter): def create_children(self, generalStore: GeneralStore, element: StructureElement): # TODO: See comment on types and inheritance if not isinstance(element, File): @@ -727,20 +723,7 @@ class JSONFileConverter(Converter): return [structure_element] -class YAMLFileConverter(Converter): - def typecheck(self, element: StructureElement): - return isinstance(element, File) - - @Converter.debug_matching("name") - def match(self, element: StructureElement): - # TODO: See comment on types and inheritance - if not self.typecheck(element): - raise RuntimeError("Element must be a file") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - return m.groupdict() - +class YAMLFileConverter(SimpleFileConverter): def create_children(self, generalStore: GeneralStore, element: StructureElement): # TODO: See comment on types and inheritance if not isinstance(element, File): diff --git a/unittests/test_converters.py b/unittests/test_converters.py index f72deda1..f4643bd0 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -610,3 +610,17 @@ def test_load_converters(): assert "SimpleFile" in converter_registry assert "Directory" in converter_registry assert "ListElement" in converter_registry + + +def test_create_path_value(converter_registry): + """ test using the "test_directories" folder""" + dc = Converter.converter_factory( + definition={ + "type": "Directory", + "match": ".*" + }, + name="Test", converter_registry=converter_registry) + values = GeneralStore() + dc.create_values(values, Directory("a", "/a")) + assert "Test_path" in values + assert values["Test_path"] == "/a" -- GitLab