Skip to content
Snippets Groups Projects
Commit 497378ea authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

ENH: started implementation of markdown file converter and added a unit test

parent b1d84fc2
No related branches found
No related tags found
1 merge request!53Release 0.1
from .crawl import *
from .crawl import Crawler
......@@ -28,8 +28,9 @@ import re
import caosdb as db
from .stores import GeneralStore, RecordStore
from .structure_elements import StructureElement, Directory, File
from typing import Type
from typing import Type, Optional
from abc import abstractmethod
import yaml_header_tools
class Converter(object):
"""
......@@ -145,7 +146,6 @@ class Converter(object):
converter = converter_registry[definition["type"]](definition, name)
return converter
@abstractmethod
def create_values(self,
values: GeneralStore,
element: StructureElement):
......@@ -156,7 +156,10 @@ class Converter(object):
values: The GeneralStore to store values in.
element: The StructureElement to extract values from.
"""
pass
m = self.match(element)
if m is None:
raise RuntimeError("Condition does not match.")
values.update(m.groupdict())
@abstractmethod
def create_children(self, values: GeneralStore,
......@@ -197,7 +200,7 @@ class Converter(object):
pass
@abstractmethod
def match(self, element: StructureElement):
def match(self, element: StructureElement) -> Optional[re.Match]:
pass
class DirectoryConverter(Converter):
......@@ -208,16 +211,6 @@ class DirectoryConverter(Converter):
"""
super().__init__(definition, name)
def create_values(self,
values: GeneralStore,
element: StructureElement):
if not isinstance(element, Directory):
raise RuntimeError("Element must be a directory.")
m = re.match(self.definition["match"], element.name)
if m is None:
raise RuntimeError("Condition does not match.")
values.update(m.groupdict())
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
if not isinstance(element, Directory):
......@@ -231,8 +224,7 @@ class DirectoryConverter(Converter):
def match(self, element: StructureElement):
if not isinstance(element, Directory):
raise RuntimeError("Element must be a directory.")
m = re.match(self.definition["match"], element.name)
return m is not None
return re.match(self.definition["match"], element.name)
@staticmethod
def create_children_from_directory(element: Directory):
......@@ -255,7 +247,32 @@ class DirectoryConverter(Converter):
return children
class MarkdownFileConverter(Converter):
pass
def __init__(self, definition: dict, name: str):
"""
Initialize a new directory converter.
"""
super().__init__(definition, name)
def create_children(self, generalStore: GeneralStore,
element: StructureElement):
if not isinstance(element, File):
raise RuntimeError("A markdown file is needed to create children.")
return
def typecheck(self, element: StructureElement):
return False
return isinstance(element, File)
def match(self, element: StructureElement):
if not isinstance(element, File):
raise RuntimeError("Element must be a file.")
m = re.match(self.definition["match"], element.name)
try:
yaml_header_tools.get_header_from_file(element.path)
except yaml_header_tools.NoValidHeader:
return None
return m
class DictTextElementConverter(Converter):
pass
......
......@@ -167,7 +167,7 @@ class Crawler(object):
# type is something like "matches files", replace isinstance with "type_matches"
# match function tests regexp for example
if (converter.typecheck(element) and
converter.match(element)):
converter.match(element) is not None):
generalStore_copy = generalStore.create_scoped_copy()
recordStore_copy = recordStore.create_scoped_copy()
# extracts values from structure element and stores them in the converter
......
......@@ -4,6 +4,8 @@
# A. Schlemmer, 06/2021
from newcrawler import Crawler
from newcrawler.converters import MarkdownFileConverter
from newcrawler.structure_elements import File
from os.path import join, dirname, basename
import caosdb as db
......@@ -68,3 +70,18 @@ def test_crawler():
assert subd[1]["Measurement"].get_property("project").value != "$Project"
assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
def test_markdown_converter():
converter = MarkdownFileConverter({
"match": "(.*)"
}, "TestMarkdownFileConverter")
m = converter.match(File("README.md", rfp(
"test_directories", "examples_article", "DataAnalysis",
"2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")))
assert m is not None
assert len(m.groups()) == 1
m = converter.match(File("test_tool.py", rfp(
"test_tool.py")))
assert m is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment