From 2eb36c7080e52619709b1a0b8276ad18191cb3cf Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Mon, 6 Feb 2023 17:48:26 +0100
Subject: [PATCH] MAIN: Moved the definitions for the default converters to a
 separate file.

---
 CHANGELOG.md                 |  4 ++
 src/caoscrawler/crawl.py     | 72 ++----------------------------------
 unittests/test_converters.py | 22 +++++++++++
 3 files changed, 30 insertions(+), 68 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f3c0cf5..7df6439d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed ###
 
+- The definitions for the default converters were removed from crawl.py and placed into
+  a separate yaml file called `default_converters.yml`. There is a new test testing for
+  the correct loading behavior of that file.
+
 ### Deprecated ###
 
 ### Removed ###
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 7ad81e7b..9cb2ceb4 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -328,77 +328,13 @@ class Crawler(object):
         directory:
         schema.yml file
         README.md documentation
+
+        TODO: this function does not make use of self, so it could become static.
         """
 
         # Defaults for the converter registry:
-        converter_registry: dict[str, dict[str, str]] = {
-            "Directory": {
-                "converter": "DirectoryConverter",
-                "package": "caoscrawler.converters"},
-            "SimpleFile": {
-                "converter": "SimpleFileConverter",
-                "package": "caoscrawler.converters"},
-            "MarkdownFile": {
-                "converter": "MarkdownFileConverter",
-                "package": "caoscrawler.converters"},
-            "File": {
-                "converter": "SimpleFileConverter",
-                "package": "caoscrawler.converters"},
-            "JSONFile": {
-                "converter": "JSONFileConverter",
-                "package": "caoscrawler.converters"},
-            "YAMLFile": {
-                "converter": "YAMLFileConverter",
-                "package": "caoscrawler.converters"},
-            "CSVTableConverter": {
-                "converter": "CSVTableConverter",
-                "package": "caoscrawler.converters"},
-            "XLSXTableConverter": {
-                "converter": "XLSXTableConverter",
-                "package": "caoscrawler.converters"},
-            "DictBooleanElement": {
-                "converter": "BooleanElementConverter",
-                "package": "caoscrawler.converters"},
-            "BooleanElement": {
-                "converter": "BooleanElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictFloatElement": {
-                "converter": "FloatElementConverter",
-                "package": "caoscrawler.converters"},
-            "FloatElement": {
-                "converter": "FloatElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictTextElement": {
-                "converter": "TextElementConverter",
-                "package": "caoscrawler.converters"},
-            "TextElement": {
-                "converter": "TextElementConverter",
-                "package": "caoscrawler.converters"},
-            "Date": {
-                "converter": "DateElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictIntegerElement": {
-                "converter": "IntegerElementConverter",
-                "package": "caoscrawler.converters"},
-            "IntegerElement": {
-                "converter": "IntegerElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictListElement": {
-                "converter": "ListElementConverter",
-                "package": "caoscrawler.converters"},
-            "ListElement": {
-                "converter": "ListElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictDictElement": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-            "DictElement": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-            "Dict": {
-                "converter": "DictElementConverter",
-                "package": "caoscrawler.converters"},
-        }
+        with open(str(files('caoscrawler').joinpath('default_converters.yml')), "r") as f:
+            converter_registry: dict[str, dict[str, str]] = yaml.safe_load(f)
 
         # More converters from definition file:
         if "Converters" in definition:
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 25251f62..9390e65c 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -595,3 +595,25 @@ def test_date_converter():
 
     matches = dictconverter.match(TextElement("text", "alve"))
     assert matches is None
+
+    
+def test_load_converters():
+    c = Crawler()
+    converter_registry = c.load_converters({})
+    # The previous function call actually already asserts that all defined
+    # converter classes can be loaded from their respective packages.
+
+    # Please adapt, if defaults change!
+    assert len(converter_registry) == 22
+
+    # All of them are contained in caoscrawler.converters
+    for conv_key, conv in converter_registry.items():
+        assert conv["package"] == "caoscrawler.converters"
+        # ... and their names all end in "Converter"
+        assert conv["converter"].endswith("Converter")
+
+    # Some checks:
+    assert "CSVTableConverter" in converter_registry
+    assert "SimpleFile" in converter_registry
+    assert "Directory" in converter_registry
+    assert "ListElement" in converter_registry
-- 
GitLab