Skip to content
Snippets Groups Projects
Commit 521d8d05 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

ENH: implemented markdown converter

parent 497378ea
Branches
Tags
1 merge request!53Release 0.1
......@@ -27,8 +27,9 @@ import os
import re
import caosdb as db
from .stores import GeneralStore, RecordStore
from .structure_elements import StructureElement, Directory, File
from typing import Type, Optional
from .structure_elements import (StructureElement, Directory, File,
TextElement, DictTextElement, DictListElement)
from typing import Type, Union, Literal
from abc import abstractmethod
import yaml_header_tools
......@@ -159,6 +160,10 @@ class Converter(object):
m = self.match(element)
if m is None:
raise RuntimeError("Condition does not match.")
if type(m) == bool:
if m == False:
raise RuntimeError("Result of match must not be False, use None instead.")
return
values.update(m.groupdict())
@abstractmethod
......@@ -200,7 +205,7 @@ class Converter(object):
pass
@abstractmethod
def match(self, element: StructureElement) -> Optional[re.Match]:
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
pass
class DirectoryConverter(Converter):
......@@ -258,10 +263,20 @@ class MarkdownFileConverter(Converter):
if not isinstance(element, File):
raise RuntimeError("A markdown file is needed to create children.")
return
header = yaml_header_tools.get_header_from_file(element.path)
children: list[StructureElement] = []
for name, entry in header.items():
if type(entry) == list:
children.append(DictListElement(name, entry))
elif type(entry) == str:
children.append(DictTextElement(name, entry))
else:
raise RuntimeError("Header entry {} has incompatible type.".format(name))
return children
def typecheck(self, element: StructureElement):
return False
return isinstance(element, File)
def match(self, element: StructureElement):
......@@ -275,10 +290,51 @@ class MarkdownFileConverter(Converter):
return m
class DictTextElementConverter(Converter):
pass
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
return []
def typecheck(self, element: StructureElement):
return isinstance(element, DictTextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, DictTextElement):
raise RuntimeError("Element must be a DictTextElement.")
if self.name != element.name:
return None
m = re.match(self.definition["match"], element.value)
return m
class DictListElementConverter(Converter):
pass
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
if not isinstance(element, DictListElement):
raise RuntimeError("This converter can only process DictListElements.")
return [TextElement(list_element) for list_element in element.value]
def typecheck(self, element: StructureElement):
return isinstance(element, DictListElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, DictListElement):
raise RuntimeError("Element must be a DictListElement.")
if self.name != element.name:
return None
if "match" in self.definition:
raise NotImplementedError("Match is not implemented for DictListElement.")
return True
class TextElementConverter(Converter):
pass
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
return []
def typecheck(self, element: StructureElement):
return isinstance(element, TextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, TextElement):
raise RuntimeError("Element must be a TextElement.")
m = re.match(self.definition["match"], element.value)
return m
......@@ -70,11 +70,6 @@ from .structure_elements import StructureElement, Directory, File
from .converters import Converter, DirectoryConverter
class Crawler(object):
"""
Crawler class that encapsulates crawling functions.
......
......@@ -28,7 +28,7 @@ class StructureElement(object):
pass
class FileSystemStructureElement(StructureElement):
def __init__(self, name, path):
def __init__(self, name: str, path: str):
self.name = name
self.path = path
......@@ -42,3 +42,17 @@ class Directory(FileSystemStructureElement):
class File(FileSystemStructureElement):
pass
class DictTextElement(StructureElement):
def __init__(self, name: str, value: str):
self.name = name
self.value = value
class DictListElement(StructureElement):
def __init__(self, name: str, value: list):
self.name = name
self.value = value
class TextElement(StructureElement):
def __init__(self, value: str):
self.value = value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment