diff --git a/src/newcrawler/cfood-schema.yml b/src/newcrawler/cfood-schema.yml index c990dc5ccf834440d6297fe44572e3c4f63f4a7c..5e3813e7f641f59e1ec3d3e8e55a599ce71b6b87 100644 --- a/src/newcrawler/cfood-schema.yml +++ b/src/newcrawler/cfood-schema.yml @@ -15,6 +15,10 @@ cfood: - YamlFileCaosDBRecord - MarkdownFile - DictListElement + - DictDictElement + - DictFloatElement + - DictIntegerElement + - DictBooleanElement - Definitions - Dict - JSONFile diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py index 9b1c945078180e6a4710cbe15cfd05fd66f4251f..c355cf38d3f909cdfc99963f256e532f48b4b223 100644 --- a/src/newcrawler/converters.py +++ b/src/newcrawler/converters.py @@ -30,6 +30,8 @@ import json from .utils import has_parent from .stores import GeneralStore, RecordStore from .structure_elements import (StructureElement, Directory, File, Dict, JSONFile, + DictIntegerElement, DictBooleanElement, + DictFloatElement, DictDictElement, TextElement, DictTextElement, DictElement, DictListElement) from typing import Optional, Union from abc import abstractmethod @@ -382,6 +384,14 @@ class DictConverter(Converter): children.append(DictListElement(name, value)) elif type(value) == str: children.append(DictTextElement(name, value)) + elif type(value) == dict: + children.append(DictDictElement(name, value)) + elif type(value) == int: + children.append(DictIntegerElement(name, value)) + elif type(value) == bool: + children.append(DictBooleanElement(name, value)) + elif type(value) == float: + children.append(DictFloatElement(name, value)) else: children.append(DictElement(name, value)) print(f"JSON value {name} has incompatible type.") @@ -485,6 +495,27 @@ class DictListElementConverter(Converter): return m.groupdict() +class DictDictElementConverter(Dict): + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A dict is needed to create children") + + return self._create_children_from_dict(element.value) + + def typecheck(self, element: StructureElement): + return isinstance(element, Dict) + + def match(self, element: StructureElement): + if not isinstance(element, Dict): + raise RuntimeError("Element must be a DictListElement.") + m = re.match(self.definition["match_name"], element.name) + if m is None: + return None + if "match" in self.definition: + raise NotImplementedError("Match is not implemented for DictListElement.") + return m.groupdict() + + class TextElementConverter(Converter): def create_children(self, generalStore: GeneralStore, element: StructureElement): diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 4e62f8beec11f965001a7279617ad3dfa4c5e917..518260d13e315f0048aee11ee4866a27612f0027 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -238,6 +238,9 @@ class Crawler(object): "DictListElement": { "converter": "DictListElementConverter", "package": "newcrawler.converters"}, + "DictDictElement": { + "converter": "DictDictElementConverter", + "package": "newcrawler.converters"}, "TextElement": { "converter": "TextElementConverter", "package": "newcrawler.converters"} diff --git a/src/newcrawler/structure_elements.py b/src/newcrawler/structure_elements.py index 61a519f266153323adce162b8c2708db041476ed..7c430e62c943d9c588b237b2494fb53313fced8c 100644 --- a/src/newcrawler/structure_elements.py +++ b/src/newcrawler/structure_elements.py @@ -82,8 +82,36 @@ class DictTextElement(StructureElement): self.value = value +class DictIntegerElement(StructureElement): + def __init__(self, name: str, value: int): + super().__init__(name) + self.value = value + + +class DictBooleanElement(StructureElement): + def __init__(self, name: str, value: bool): + super().__init__(name) + self.value = value + + +class DictBooleanElement(StructureElement): + def __init__(self, name: str, value: bool): + super().__init__(name) + self.value = value + + +class DictDictElement(Dict): + pass + + class DictListElement(StructureElement): - def __init__(self, name: str, value: list): + def __init__(self, name: str, value: dict): + super().__init__(name) + self.value = value + + +class DictFloatElement(StructureElement): + def __init__(self, name: str, value: float): super().__init__(name) self.value = value diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 935a70caa94f2d8c794d714817547d7ea2d39fb2..2fa3eeb7b5d88f13ff3b2b8fc951f6e9302964b6 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -31,7 +31,8 @@ from newcrawler.converters import Converter from newcrawler.stores import GeneralStore from newcrawler.converters import MarkdownFileConverter, JSONFileConverter, DictConverter from newcrawler.structure_elements import Directory -from newcrawler.structure_elements import File, DictTextElement, DictListElement, DictElement +from newcrawler.structure_elements import (File, DictTextElement, DictListElement, DictElement, + DictBooleanElement, DictDictElement, DictIntegerElement, DictFloatElement) from test_tool import rfp @@ -187,16 +188,16 @@ def test_json_converter(converter_registry): assert children[0].value.__class__ == str assert children[0].value == "DEMO" - assert children[1].__class__ == DictElement + assert children[1].__class__ == DictIntegerElement assert children[1].name == "id" assert children[1].value.__class__ == int assert children[1].value == 10002 - assert children[2].__class__ == DictElement + assert children[2].__class__ == DictBooleanElement assert children[2].name == "archived" assert children[2].value.__class__ == bool - assert children[3].__class__ == DictElement + assert children[3].__class__ == DictDictElement assert children[3].name == "coordinator" assert children[3].value.__class__ == dict @@ -209,7 +210,7 @@ def test_json_converter(converter_registry): assert children[5].value.__class__ == list assert children[5].value == ["Mouse", "Penguine"] - assert children[6].__class__ == DictElement + assert children[6].__class__ == DictFloatElement assert children[6].name == "rvalue" assert children[6].value.__class__ == float