Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_converters.py 8.10 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
test the converters module
"""
from newcrawler.converters import Converter
from newcrawler.stores import GeneralStore
from newcrawler.converters import (ConverterValidationError,
MarkdownFileConverter, JSONFileConverter,
DictConverter)
from newcrawler.structure_elements import Directory
from newcrawler.structure_elements import (File, DictTextElement,
DictListElement, DictElement,
DictBooleanElement, DictDictElement,
DictIntegerElement, DictFloatElement)
from test_tool import rfp
import pytest
import os
import importlib
@pytest.fixture
def converter_registry():
converter_registry: dict[str, dict[str, str]] = {
"Directory": {
"converter": "DirectoryConverter",
"package": "newcrawler.converters"},
"MarkdownFile": {
"converter": "MarkdownFileConverter",
"package": "newcrawler.converters"},
"Dict": {
"converter": "DictConverter",
"package": "newcrawler.converters"},
"DictTextElement": {
"converter": "DictTextElementConverter",
"package": "newcrawler.converters"},
"DictListElement": {
"converter": "DictListElementConverter",
"package": "newcrawler.converters"},
"TextElement": {
"converter": "TextElementConverter",
"package": "newcrawler.converters"},
"JSONFile": {
"converter": "JSONFileConverter",
"package": "newcrawler.converters"},
}
for key, value in converter_registry.items():
module = importlib.import_module(value["package"])
value["class"] = getattr(module, value["converter"])
return converter_registry
def testConverterTrivial(converter_registry):
types = [
"Directory",
"MarkdownFile",
"DictTextElement",
"DictListElement",
"TextElement"
]
for ct in types:
Converter.converter_factory(
definition={
"type": ct},
name="Test",
converter_registry=converter_registry)
def testDirectoryConverter(converter_registry):
""" test using the "test_directories" folder"""
dc = Converter.converter_factory(
definition={
"type": "Directory"
},
name="Test", converter_registry=converter_registry)
elements = dc.create_children(GeneralStore(),
Directory("test_directories", rfp("test_directories")))
# Check whether the right structure elements were created
# this has been updated, there are more directories now
# assert len(elements) == 1
element_names = []
for element in elements:
assert isinstance(element, Directory)
element_names.append(element.name)
assert "examples_article" in element_names
assert "example_overwrite_1" in element_names
assert "example_insert" in element_names
def test_markdown_converter(converter_registry):
test_readme = File(
"README.md",
rfp(
"test_directories", "examples_article", "DataAnalysis",
"2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"
)
)
converter = MarkdownFileConverter({
"match": "(.*)"
}, "TestMarkdownFileConverter",
converter_registry)
m = converter.match(File("test_tool.py", rfp(
"test_tool.py")))
assert m is None
m = converter.match(test_readme)
assert m is not None
assert m.__class__ == dict
assert len(m) == 0
converter = MarkdownFileConverter({
"match": "README.md"
}, "TestMarkdownFileConverter",
converter_registry)
m = converter.match(test_readme)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme)
assert len(children) == 5
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictTextElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == str
test_readme2 = File("README.md", rfp("test_directories", "examples_article",
"ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))
m = converter.match(test_readme2)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme2)
assert len(children) == 2
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictListElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == list
def test_json_converter(converter_registry):
test_json = File("testjson.json", rfp(
"test_directories", "examples_json", "testjson.json"))
schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"test_directories", "examples_json", "testjson.schema.json")
jsonconverter = JSONFileConverter(
definition={"match": "(.*)", "validate": schema_path},
name="TestJSONFileConverter",
converter_registry=converter_registry)
m = jsonconverter.match(test_json)
assert m is not None
assert len(m) == 0
children = jsonconverter.create_children(None, test_json)
assert len(children) == 8
assert children[0].__class__ == DictTextElement
assert children[0].name == "name"
assert children[0].value.__class__ == str
assert children[0].value == "DEMO"
assert children[1].__class__ == DictIntegerElement
assert children[1].name == "projectId"
assert children[1].value.__class__ == int
assert children[1].value == 10002
assert children[2].__class__ == DictBooleanElement
assert children[2].name == "archived"
assert children[2].value.__class__ == bool
assert children[3].__class__ == DictListElement
assert children[3].name == "Person"
assert children[3].value.__class__ == list
assert len(children[3].value) == 2
assert children[4].__class__ == DictTextElement
assert children[4].name == "start_date"
assert children[4].value.__class__ == str
assert children[5].__class__ == DictListElement
assert children[5].name == "candidates"
assert children[5].value.__class__ == list
assert children[5].value == ["Mouse", "Penguine"]
assert children[6].__class__ == DictFloatElement
assert children[6].name == "rvalue"
assert children[6].value.__class__ == float
assert children[7].__class__ == DictTextElement
assert children[7].name == "url"
assert children[7].value.__class__ == str
broken_json = File("brokenjson.json", rfp(
"test_directories", "examples_json", "brokenjson.json"))
m = jsonconverter.match(broken_json)
# Doesn't validate because of missing required 'name' property
with pytest.raises(ConverterValidationError) as err:
children = jsonconverter.create_children(None, broken_json)
assert err.value.message.startswith("Couldn't validate")