Skip to content
Snippets Groups Projects
Commit 521d8d05 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

ENH: implemented markdown converter

parent 497378ea
Branches
Tags
1 merge request!53Release 0.1
...@@ -27,8 +27,9 @@ import os ...@@ -27,8 +27,9 @@ import os
import re import re
import caosdb as db import caosdb as db
from .stores import GeneralStore, RecordStore from .stores import GeneralStore, RecordStore
from .structure_elements import StructureElement, Directory, File from .structure_elements import (StructureElement, Directory, File,
from typing import Type, Optional TextElement, DictTextElement, DictListElement)
from typing import Type, Union, Literal
from abc import abstractmethod from abc import abstractmethod
import yaml_header_tools import yaml_header_tools
...@@ -159,6 +160,10 @@ class Converter(object): ...@@ -159,6 +160,10 @@ class Converter(object):
m = self.match(element) m = self.match(element)
if m is None: if m is None:
raise RuntimeError("Condition does not match.") raise RuntimeError("Condition does not match.")
if type(m) == bool:
if m == False:
raise RuntimeError("Result of match must not be False, use None instead.")
return
values.update(m.groupdict()) values.update(m.groupdict())
@abstractmethod @abstractmethod
...@@ -200,7 +205,7 @@ class Converter(object): ...@@ -200,7 +205,7 @@ class Converter(object):
pass pass
@abstractmethod @abstractmethod
def match(self, element: StructureElement) -> Optional[re.Match]: def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
pass pass
class DirectoryConverter(Converter): class DirectoryConverter(Converter):
...@@ -258,10 +263,20 @@ class MarkdownFileConverter(Converter): ...@@ -258,10 +263,20 @@ class MarkdownFileConverter(Converter):
if not isinstance(element, File): if not isinstance(element, File):
raise RuntimeError("A markdown file is needed to create children.") raise RuntimeError("A markdown file is needed to create children.")
return header = yaml_header_tools.get_header_from_file(element.path)
children: list[StructureElement] = []
for name, entry in header.items():
if type(entry) == list:
children.append(DictListElement(name, entry))
elif type(entry) == str:
children.append(DictTextElement(name, entry))
else:
raise RuntimeError("Header entry {} has incompatible type.".format(name))
return children
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return False
return isinstance(element, File) return isinstance(element, File)
def match(self, element: StructureElement): def match(self, element: StructureElement):
...@@ -275,10 +290,51 @@ class MarkdownFileConverter(Converter): ...@@ -275,10 +290,51 @@ class MarkdownFileConverter(Converter):
return m return m
class DictTextElementConverter(Converter): class DictTextElementConverter(Converter):
pass def create_children(self, generalStore: GeneralStore,
element: StructureElement):
return []
def typecheck(self, element: StructureElement):
return isinstance(element, DictTextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, DictTextElement):
raise RuntimeError("Element must be a DictTextElement.")
if self.name != element.name:
return None
m = re.match(self.definition["match"], element.value)
return m
class DictListElementConverter(Converter): class DictListElementConverter(Converter):
pass def create_children(self, generalStore: GeneralStore,
element: StructureElement):
if not isinstance(element, DictListElement):
raise RuntimeError("This converter can only process DictListElements.")
return [TextElement(list_element) for list_element in element.value]
def typecheck(self, element: StructureElement):
return isinstance(element, DictListElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, DictListElement):
raise RuntimeError("Element must be a DictListElement.")
if self.name != element.name:
return None
if "match" in self.definition:
raise NotImplementedError("Match is not implemented for DictListElement.")
return True
class TextElementConverter(Converter): class TextElementConverter(Converter):
pass def create_children(self, generalStore: GeneralStore,
element: StructureElement):
return []
def typecheck(self, element: StructureElement):
return isinstance(element, TextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]:
if not isinstance(element, TextElement):
raise RuntimeError("Element must be a TextElement.")
m = re.match(self.definition["match"], element.value)
return m
...@@ -70,11 +70,6 @@ from .structure_elements import StructureElement, Directory, File ...@@ -70,11 +70,6 @@ from .structure_elements import StructureElement, Directory, File
from .converters import Converter, DirectoryConverter from .converters import Converter, DirectoryConverter
class Crawler(object): class Crawler(object):
""" """
Crawler class that encapsulates crawling functions. Crawler class that encapsulates crawling functions.
......
...@@ -28,7 +28,7 @@ class StructureElement(object): ...@@ -28,7 +28,7 @@ class StructureElement(object):
pass pass
class FileSystemStructureElement(StructureElement): class FileSystemStructureElement(StructureElement):
def __init__(self, name, path): def __init__(self, name: str, path: str):
self.name = name self.name = name
self.path = path self.path = path
...@@ -42,3 +42,17 @@ class Directory(FileSystemStructureElement): ...@@ -42,3 +42,17 @@ class Directory(FileSystemStructureElement):
class File(FileSystemStructureElement): class File(FileSystemStructureElement):
pass pass
class DictTextElement(StructureElement):
def __init__(self, name: str, value: str):
self.name = name
self.value = value
class DictListElement(StructureElement):
def __init__(self, name: str, value: list):
self.name = name
self.value = value
class TextElement(StructureElement):
def __init__(self, value: str):
self.value = value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment