From f283b4b99fae534bceb74018dd6ebad1daab6b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Thu, 14 Apr 2022 16:50:56 +0200 Subject: [PATCH] MAINT: introduce more dict elements --- src/newcrawler/cfood-schema.yml | 4 ++++ src/newcrawler/converters.py | 31 ++++++++++++++++++++++++++++ src/newcrawler/crawl.py | 3 +++ src/newcrawler/structure_elements.py | 30 ++++++++++++++++++++++++++- unittests/test_converters.py | 11 +++++----- 5 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/newcrawler/cfood-schema.yml b/src/newcrawler/cfood-schema.yml index c990dc5c..5e3813e7 100644 --- a/src/newcrawler/cfood-schema.yml +++ b/src/newcrawler/cfood-schema.yml @@ -15,6 +15,10 @@ cfood: - YamlFileCaosDBRecord - MarkdownFile - DictListElement + - DictDictElement + - DictFloatElement + - DictIntegerElement + - DictBooleanElement - Definitions - Dict - JSONFile diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py index 9b1c9450..c355cf38 100644 --- a/src/newcrawler/converters.py +++ b/src/newcrawler/converters.py @@ -30,6 +30,8 @@ import json from .utils import has_parent from .stores import GeneralStore, RecordStore from .structure_elements import (StructureElement, Directory, File, Dict, JSONFile, + DictIntegerElement, DictBooleanElement, + DictFloatElement, DictDictElement, TextElement, DictTextElement, DictElement, DictListElement) from typing import Optional, Union from abc import abstractmethod @@ -382,6 +384,14 @@ class DictConverter(Converter): children.append(DictListElement(name, value)) elif type(value) == str: children.append(DictTextElement(name, value)) + elif type(value) == dict: + children.append(DictDictElement(name, value)) + elif type(value) == int: + children.append(DictIntegerElement(name, value)) + elif type(value) == bool: + children.append(DictBooleanElement(name, value)) + elif type(value) == float: + children.append(DictFloatElement(name, value)) else: children.append(DictElement(name, value)) print(f"JSON value {name} has incompatible type.") @@ -485,6 +495,27 @@ class DictListElementConverter(Converter): return m.groupdict() +class DictDictElementConverter(Dict): + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A dict is needed to create children") + + return self._create_children_from_dict(element.value) + + def typecheck(self, element: StructureElement): + return isinstance(element, Dict) + + def match(self, element: StructureElement): + if not isinstance(element, Dict): + raise RuntimeError("Element must be a DictListElement.") + m = re.match(self.definition["match_name"], element.name) + if m is None: + return None + if "match" in self.definition: + raise NotImplementedError("Match is not implemented for DictListElement.") + return m.groupdict() + + class TextElementConverter(Converter): def create_children(self, generalStore: GeneralStore, element: StructureElement): diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 4e62f8be..518260d1 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -238,6 +238,9 @@ class Crawler(object): "DictListElement": { "converter": "DictListElementConverter", "package": "newcrawler.converters"}, + "DictDictElement": { + "converter": "DictDictElementConverter", + "package": "newcrawler.converters"}, "TextElement": { "converter": "TextElementConverter", "package": "newcrawler.converters"} diff --git a/src/newcrawler/structure_elements.py b/src/newcrawler/structure_elements.py index 61a519f2..7c430e62 100644 --- a/src/newcrawler/structure_elements.py +++ b/src/newcrawler/structure_elements.py @@ -82,8 +82,36 @@ class DictTextElement(StructureElement): self.value = value +class DictIntegerElement(StructureElement): + def __init__(self, name: str, value: int): + super().__init__(name) + self.value = value + + +class DictBooleanElement(StructureElement): + def __init__(self, name: str, value: bool): + super().__init__(name) + self.value = value + + +class DictBooleanElement(StructureElement): + def __init__(self, name: str, value: bool): + super().__init__(name) + self.value = value + + +class DictDictElement(Dict): + pass + + class DictListElement(StructureElement): - def __init__(self, name: str, value: list): + def __init__(self, name: str, value: dict): + super().__init__(name) + self.value = value + + +class DictFloatElement(StructureElement): + def __init__(self, name: str, value: float): super().__init__(name) self.value = value diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 935a70ca..2fa3eeb7 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -31,7 +31,8 @@ from newcrawler.converters import Converter from newcrawler.stores import GeneralStore from newcrawler.converters import MarkdownFileConverter, JSONFileConverter, DictConverter from newcrawler.structure_elements import Directory -from newcrawler.structure_elements import File, DictTextElement, DictListElement, DictElement +from newcrawler.structure_elements import (File, DictTextElement, DictListElement, DictElement, + DictBooleanElement, DictDictElement, DictIntegerElement, DictFloatElement) from test_tool import rfp @@ -187,16 +188,16 @@ def test_json_converter(converter_registry): assert children[0].value.__class__ == str assert children[0].value == "DEMO" - assert children[1].__class__ == DictElement + assert children[1].__class__ == DictIntegerElement assert children[1].name == "id" assert children[1].value.__class__ == int assert children[1].value == 10002 - assert children[2].__class__ == DictElement + assert children[2].__class__ == DictBooleanElement assert children[2].name == "archived" assert children[2].value.__class__ == bool - assert children[3].__class__ == DictElement + assert children[3].__class__ == DictDictElement assert children[3].name == "coordinator" assert children[3].value.__class__ == dict @@ -209,7 +210,7 @@ def test_json_converter(converter_registry): assert children[5].value.__class__ == list assert children[5].value == ["Mouse", "Penguine"] - assert children[6].__class__ == DictElement + assert children[6].__class__ == DictFloatElement assert children[6].name == "rvalue" assert children[6].value.__class__ == float -- GitLab