From cc10f1261268c0240d92818b197410184e8ff0bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Thu, 14 Apr 2022 16:38:00 +0200 Subject: [PATCH] MAINT: inherit jsonconverter from dictconverter --- src/newcrawler/converters.py | 61 ++++++++++++++++++------------------ unittests/test_converters.py | 6 ---- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py index 14a4020c..9b1c9450 100644 --- a/src/newcrawler/converters.py +++ b/src/newcrawler/converters.py @@ -364,44 +364,18 @@ class MarkdownFileConverter(Converter): return m.groupdict() -class JSONFileConverter(Converter): - def typecheck(self, element: StructureElement): - print(type(element)) - return isinstance(element, File) - - def match(self, element: StructureElement): - if not self.typecheck(element): - raise RuntimeError("Element must be a file") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - if "validate" in self.definition and self.definition["validate"]: - # TODO(fspreck) validate against json schema, raise a Validation - # error if not valid. - pass - return m.groupdict() - - def create_children(self, generalStore: GeneralStore, element: StructureElement): - if not self.typecheck(element): - raise RuntimeError("A JSON file is needed to create children") - with open(element.path, 'r') as json_file: - json_data = json.load(json_file) - if not isinstance(json_data, dict): - raise NotImplementedError("JSON file must contain a dict") - - children = [] - return [Dict(name="json_dict", value=json_data)] - - class DictConverter(Converter): # TODO use Dict as typecheck? def create_children(self, generalStore: GeneralStore, element: StructureElement): if not self.typecheck(element): raise RuntimeError("A dict is needed to create children") + return self._create_children_from_dict(element.value) + + def _create_children_from_dict(self, data): children = [] - for name, value in element.value.items(): + for name, value in data.items(): # TODO(fspreck): Treat similar to yaml header and introduce more # DictXXXElements for numbers, booleans, enums, ... if type(value) == list: @@ -433,6 +407,33 @@ class DictConverter(Converter): return {} +class JSONFileConverter(DictConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, File) + + def match(self, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("Element must be a file") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + if "validate" in self.definition and self.definition["validate"]: + # TODO(fspreck) validate against json schema, raise a Validation + # error if not valid. + pass + return m.groupdict() + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A JSON file is needed to create children") + with open(element.path, 'r') as json_file: + json_data = json.load(json_file) + if not isinstance(json_data, dict): + raise NotImplementedError("JSON file must contain a dict") + + return self._create_children_from_dict(json_data) + + class DictTextElementConverter(Converter): def create_children(self, generalStore: GeneralStore, element: StructureElement): return [] diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 278a1f2e..935a70ca 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -181,12 +181,6 @@ def test_json_converter(converter_registry): assert len(m) == 0 children = jsonconverter.create_children(None, test_json) - dictconverter = DictConverter( - definition={"match": "(.*)"}, - name="TestDictConverter", - converter_registry=converter_registry) - - children = dictconverter.create_children(None, children[0]) assert len(children) == 8 assert children[0].__class__ == DictTextElement assert children[0].name == "name" -- GitLab