Skip to content
Snippets Groups Projects
Commit 3052a97b authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

FIX: fixed some bugs in markdown conversion

parent 521d8d05
No related branches found
No related tags found
1 merge request!53Release 0.1
...@@ -29,7 +29,7 @@ import caosdb as db ...@@ -29,7 +29,7 @@ import caosdb as db
from .stores import GeneralStore, RecordStore from .stores import GeneralStore, RecordStore
from .structure_elements import (StructureElement, Directory, File, from .structure_elements import (StructureElement, Directory, File,
TextElement, DictTextElement, DictListElement) TextElement, DictTextElement, DictListElement)
from typing import Type, Union, Literal from typing import Type, Optional
from abc import abstractmethod from abc import abstractmethod
import yaml_header_tools import yaml_header_tools
...@@ -160,11 +160,7 @@ class Converter(object): ...@@ -160,11 +160,7 @@ class Converter(object):
m = self.match(element) m = self.match(element)
if m is None: if m is None:
raise RuntimeError("Condition does not match.") raise RuntimeError("Condition does not match.")
if type(m) == bool: values.update(m)
if m == False:
raise RuntimeError("Result of match must not be False, use None instead.")
return
values.update(m.groupdict())
@abstractmethod @abstractmethod
def create_children(self, values: GeneralStore, def create_children(self, values: GeneralStore,
...@@ -205,7 +201,7 @@ class Converter(object): ...@@ -205,7 +201,7 @@ class Converter(object):
pass pass
@abstractmethod @abstractmethod
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]: def match(self, element: StructureElement) -> Optional[dict]:
pass pass
class DirectoryConverter(Converter): class DirectoryConverter(Converter):
...@@ -229,7 +225,10 @@ class DirectoryConverter(Converter): ...@@ -229,7 +225,10 @@ class DirectoryConverter(Converter):
def match(self, element: StructureElement): def match(self, element: StructureElement):
if not isinstance(element, Directory): if not isinstance(element, Directory):
raise RuntimeError("Element must be a directory.") raise RuntimeError("Element must be a directory.")
return re.match(self.definition["match"], element.name) m = re.match(self.definition["match"], element.name)
if m is None:
return None
return m.groupdict()
@staticmethod @staticmethod
def create_children_from_directory(element: Directory): def create_children_from_directory(element: Directory):
...@@ -263,7 +262,7 @@ class MarkdownFileConverter(Converter): ...@@ -263,7 +262,7 @@ class MarkdownFileConverter(Converter):
if not isinstance(element, File): if not isinstance(element, File):
raise RuntimeError("A markdown file is needed to create children.") raise RuntimeError("A markdown file is needed to create children.")
header = yaml_header_tools.get_header_from_file(element.path) header = yaml_header_tools.get_header_from_file(element.path, clean=False)
children: list[StructureElement] = [] children: list[StructureElement] = []
for name, entry in header.items(): for name, entry in header.items():
...@@ -283,11 +282,13 @@ class MarkdownFileConverter(Converter): ...@@ -283,11 +282,13 @@ class MarkdownFileConverter(Converter):
if not isinstance(element, File): if not isinstance(element, File):
raise RuntimeError("Element must be a file.") raise RuntimeError("Element must be a file.")
m = re.match(self.definition["match"], element.name) m = re.match(self.definition["match"], element.name)
if m is None:
return None
try: try:
yaml_header_tools.get_header_from_file(element.path) yaml_header_tools.get_header_from_file(element.path)
except yaml_header_tools.NoValidHeader: except yaml_header_tools.NoValidHeader:
return None return None
return m return m.groupdict()
class DictTextElementConverter(Converter): class DictTextElementConverter(Converter):
def create_children(self, generalStore: GeneralStore, def create_children(self, generalStore: GeneralStore,
...@@ -298,13 +299,19 @@ class DictTextElementConverter(Converter): ...@@ -298,13 +299,19 @@ class DictTextElementConverter(Converter):
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return isinstance(element, DictTextElement) return isinstance(element, DictTextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]: def match(self, element: StructureElement):
if not isinstance(element, DictTextElement): if not isinstance(element, DictTextElement):
raise RuntimeError("Element must be a DictTextElement.") raise RuntimeError("Element must be a DictTextElement.")
if self.name != element.name: m1 = re.match(self.definition["match_name"], self.name)
if m1 is None:
return None return None
m = re.match(self.definition["match"], element.value) m2 = re.match(self.definition["match_value"], element.value)
return m if m2 is None:
return None
values = dict()
values.update(m1.groupdict())
values.update(m2.groupdict())
return values
class DictListElementConverter(Converter): class DictListElementConverter(Converter):
def create_children(self, generalStore: GeneralStore, def create_children(self, generalStore: GeneralStore,
...@@ -316,14 +323,14 @@ class DictListElementConverter(Converter): ...@@ -316,14 +323,14 @@ class DictListElementConverter(Converter):
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return isinstance(element, DictListElement) return isinstance(element, DictListElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]: def match(self, element: StructureElement):
if not isinstance(element, DictListElement): if not isinstance(element, DictListElement):
raise RuntimeError("Element must be a DictListElement.") raise RuntimeError("Element must be a DictListElement.")
if self.name != element.name: if self.name != element.name:
return None return None
if "match" in self.definition: if "match" in self.definition:
raise NotImplementedError("Match is not implemented for DictListElement.") raise NotImplementedError("Match is not implemented for DictListElement.")
return True return dict()
class TextElementConverter(Converter): class TextElementConverter(Converter):
def create_children(self, generalStore: GeneralStore, def create_children(self, generalStore: GeneralStore,
...@@ -333,8 +340,10 @@ class TextElementConverter(Converter): ...@@ -333,8 +340,10 @@ class TextElementConverter(Converter):
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return isinstance(element, TextElement) return isinstance(element, TextElement)
def match(self, element: StructureElement) -> Union[re.Match, Literal[True], None]: def match(self, element: StructureElement):
if not isinstance(element, TextElement): if not isinstance(element, TextElement):
raise RuntimeError("Element must be a TextElement.") raise RuntimeError("Element must be a TextElement.")
m = re.match(self.definition["match"], element.value) m = re.match(self.definition["match"], element.value)
return m if m is None:
return None
return m.groupdict()
...@@ -29,12 +29,25 @@ DataAnalysis: # name of the converter ...@@ -29,12 +29,25 @@ DataAnalysis: # name of the converter
subtree: subtree:
description: description:
type: DictTextElement type: DictTextElement
match: (?P<description>.*) match_value: (?P<description>.*)
match_name: description
records: records:
Measurement: Measurement:
description: $description description: $description
responsible: responsible_single:
type: DictTextElement
match_name: responsible
match_value: (?P<first_name>.+) (?P<last_name>.+)
records:
Person:
first_name: $first_name
last_name: $last_name
Measurement: # this uses the reference to the above defined record
responsible: +$Person
responsible_list:
type: DictListElement type: DictListElement
match_name: responsible
subtree: subtree:
Person: Person:
type: TextElement type: TextElement
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
from newcrawler import Crawler from newcrawler import Crawler
from newcrawler.converters import MarkdownFileConverter from newcrawler.converters import MarkdownFileConverter
from newcrawler.structure_elements import File from newcrawler.structure_elements import File, DictTextElement, DictListElement
from os.path import join, dirname, basename from os.path import join, dirname, basename
import caosdb as db import caosdb as db
...@@ -72,16 +72,54 @@ def test_crawler(): ...@@ -72,16 +72,54 @@ def test_crawler():
assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"] assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
def test_markdown_converter(): def test_markdown_converter():
test_readme = File("README.md", rfp(
"test_directories", "examples_article", "DataAnalysis",
"2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"))
converter = MarkdownFileConverter({ converter = MarkdownFileConverter({
"match": "(.*)" "match": "(.*)"
}, "TestMarkdownFileConverter") }, "TestMarkdownFileConverter")
m = converter.match(File("README.md", rfp(
"test_directories", "examples_article", "DataAnalysis",
"2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")))
assert m is not None
assert len(m.groups()) == 1
m = converter.match(File("test_tool.py", rfp( m = converter.match(File("test_tool.py", rfp(
"test_tool.py"))) "test_tool.py")))
assert m is None assert m is None
m = converter.match(test_readme)
assert m is not None
assert m.__class__ == dict
assert len(m) == 0
converter = MarkdownFileConverter({
"match": "README.md"
}, "TestMarkdownFileConverter")
m = converter.match(test_readme)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme)
assert len(children) == 5
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictTextElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == str
test_readme2 = File("README.md", rfp("test_directories", "examples_article", "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))
m = converter.match(test_readme2)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme2)
assert len(children) == 2
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictListElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == list
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment