diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 7f9dce83f54845151faf1564d1b3a4fc3909891d..5c193e613dab023c50844822b9139cad5fb97fb4 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -337,6 +337,11 @@ class Converter(object): group: str, rule: str): """Filter children according to regexp `expr` and `rule`.""" + if rule not in FILTER_FUNCTIONS: + raise RuntimeError( + f"{rule} is not a known filter rule. Only {list(FILTER_FUNCTIONS.keys())} are implemented." + ) + to_be_filtered = [] unmatched_children = [] diff --git a/unittests/test_converters.py b/unittests/test_converters.py index b50ceef134dc59dbbda6cff632f133568a140ddb..15a39b72e75c8f26f3df80c24d48a4c5c2585029 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -329,3 +329,22 @@ def test_filter_children_of_directory(converter_registry): assert children[0].name == "test_2022-01-01.json" assert children[1].__class__ == File assert children[1].name == "some_other_file.csv" + + dc = DirectoryConverter( + definition={ + "match": "(.*)", + "filter": { + "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json", + "group": "date", + "rule": "does_not_exist" + } + }, + name="TestBrokenDirectoryConverter", + converter_registry=converter_registry + ) + + m = dc.match(test_dir) + assert m is not None + + with pytest.raises(RuntimeError): + children = dc.create_children(None, test_dir)