Skip to content
Snippets Groups Projects
Commit 2eb36c70 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

MAIN: Moved the definitions for the default converters to a separate file.

parent 7df30e6f
No related branches found
No related tags found
2 merge requests!105REL: v0.4.0,!95F refactor default converters
Pipeline #33422 failed
......@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed ###
- The definitions for the default converters were removed from crawl.py and placed into
a separate yaml file called `default_converters.yml`. There is a new test testing for
the correct loading behavior of that file.
### Deprecated ###
### Removed ###
......
......@@ -328,77 +328,13 @@ class Crawler(object):
directory:
schema.yml file
README.md documentation
TODO: this function does not make use of self, so it could become static.
"""
# Defaults for the converter registry:
converter_registry: dict[str, dict[str, str]] = {
"Directory": {
"converter": "DirectoryConverter",
"package": "caoscrawler.converters"},
"SimpleFile": {
"converter": "SimpleFileConverter",
"package": "caoscrawler.converters"},
"MarkdownFile": {
"converter": "MarkdownFileConverter",
"package": "caoscrawler.converters"},
"File": {
"converter": "SimpleFileConverter",
"package": "caoscrawler.converters"},
"JSONFile": {
"converter": "JSONFileConverter",
"package": "caoscrawler.converters"},
"YAMLFile": {
"converter": "YAMLFileConverter",
"package": "caoscrawler.converters"},
"CSVTableConverter": {
"converter": "CSVTableConverter",
"package": "caoscrawler.converters"},
"XLSXTableConverter": {
"converter": "XLSXTableConverter",
"package": "caoscrawler.converters"},
"DictBooleanElement": {
"converter": "BooleanElementConverter",
"package": "caoscrawler.converters"},
"BooleanElement": {
"converter": "BooleanElementConverter",
"package": "caoscrawler.converters"},
"DictFloatElement": {
"converter": "FloatElementConverter",
"package": "caoscrawler.converters"},
"FloatElement": {
"converter": "FloatElementConverter",
"package": "caoscrawler.converters"},
"DictTextElement": {
"converter": "TextElementConverter",
"package": "caoscrawler.converters"},
"TextElement": {
"converter": "TextElementConverter",
"package": "caoscrawler.converters"},
"Date": {
"converter": "DateElementConverter",
"package": "caoscrawler.converters"},
"DictIntegerElement": {
"converter": "IntegerElementConverter",
"package": "caoscrawler.converters"},
"IntegerElement": {
"converter": "IntegerElementConverter",
"package": "caoscrawler.converters"},
"DictListElement": {
"converter": "ListElementConverter",
"package": "caoscrawler.converters"},
"ListElement": {
"converter": "ListElementConverter",
"package": "caoscrawler.converters"},
"DictDictElement": {
"converter": "DictElementConverter",
"package": "caoscrawler.converters"},
"DictElement": {
"converter": "DictElementConverter",
"package": "caoscrawler.converters"},
"Dict": {
"converter": "DictElementConverter",
"package": "caoscrawler.converters"},
}
with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
# More converters from definition file:
if "Converters" in definition:
......
......@@ -595,3 +595,25 @@ def test_date_converter():
matches = dictconverter.match(TextElement("text", "alve"))
assert matches is None
def test_load_converters():
c = Crawler()
converter_registry = c.load_converters({})
# The previous function call actually already asserts that all defined
# converter classes can be loaded from their respective packages.
# Please adapt, if defaults change!
assert len(converter_registry) == 22
# All of them are contained in caoscrawler.converters
for conv_key, conv in converter_registry.items():
assert conv["package"] == "caoscrawler.converters"
# ... and their names all end in "Converter"
assert conv["converter"].endswith("Converter")
# Some checks:
assert "CSVTableConverter" in converter_registry
assert "SimpleFile" in converter_registry
assert "Directory" in converter_registry
assert "ListElement" in converter_registry
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment