diff --git a/CHANGELOG.md b/CHANGELOG.md index da7250c1d4b1f63473d6cfb689d04293111c68ed..0cc1b13cb111e9eca0bb99844692fc607642412d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### +- Added better error message for some cases of broken converter and + record definitions. + ### Security ### ### Documentation ### diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index 8b5f9a60c999cb0c2366fb3ce1e385729a39d4dd..22686e0dbae26e3322059928cf3ba0b4522f672c 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -288,6 +288,9 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict c_record = records[name] + if isinstance(record, str): + raise RuntimeError( + "dict expected, but found str: {}".format(record)) for key, value in record.items(): if key == "parents" or key == "role": continue @@ -404,6 +407,10 @@ class Converter(object, metaclass=ABCMeta): The `type` key in the `definition` defines the Converter class which is being used. """ + if definition is None: + raise RuntimeError("Definition of converter \"{}\" is " + "empty".format(name)) + if "type" not in definition: raise RuntimeError( "Type is mandatory for converter entries in CFood definition.") diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py index f6c85c694e5ef0be7e6a9be8154a34c400bab008..cd52effb954d66bcc69b7296de77ddaf7b2b8394 100644 --- a/src/caoscrawler/identifiable.py +++ b/src/caoscrawler/identifiable.py @@ -80,7 +80,7 @@ class Identifiable(): def get_representation(self) -> str: return sha256(Identifiable._create_hashable_string(self).encode('utf-8')).hexdigest() - @ staticmethod + @staticmethod def _value_representation(value) -> str: """returns the string representation of property values to be used in the hash function @@ -103,7 +103,7 @@ class Identifiable(): else: raise ValueError(f"Unknown datatype of the value: {value}") - @ staticmethod + @staticmethod def _create_hashable_string(identifiable: Identifiable) -> str: """ creates a string from the attributes of an identifiable that can be hashed diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 9f8f5e40beb729d73151bad38f3e390a4a8cecb4..eeb2bdbf8f0f0d96579598cd8842739a3d154b93 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -55,10 +55,19 @@ from .version import check_cfood_version logger = logging.getLogger(__name__) -def load_definition(crawler_definition_path: str): +def load_definition(crawler_definition_path: str) -> dict: """ Load a cfood from a crawler definition defined by crawler definition path and validate it using cfood-schema.yml. + + Arguments: + ---------- + crawler_definition_path: str + Path to the crawler definition file in yaml format. + + Returns: + -------- + dict containing the crawler definition. """ # Load the cfood from a yaml file: @@ -70,13 +79,21 @@ def load_definition(crawler_definition_path: str): return _resolve_validator_paths(crawler_definition, crawler_definition_path) -def _load_definition_from_yaml_dict(crawler_definitions: list[dict]): +def _load_definition_from_yaml_dict(crawler_definitions: list[dict]) -> dict: """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which contains either one or two documents. Doesn't resolve the validator paths in the cfood definition, so for internal and testing use only. + Arguments: + ---------- + crawler_definitions: list[dict] + List of one or two dicts containing (optionally) metadata and the crawler definition. + + Returns: + -------- + dict containing the crawler definition. """ if len(crawler_definitions) == 1: # Simple case, just one document: diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index 53490bc0413a95d960d94186c639dac2c6223b80..d2a5c3bb5d4007348b48d513e664c39592dc4c61 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -233,7 +233,7 @@ def test_get_identifiable(): id_r0 = ident.get_identifiable(se, []) -@ pytest.mark.xfail +@pytest.mark.xfail def test_retrieve_identified_record_for_identifiable(): # TODO modify this such that it becomes a test that acutally tests (sufficiently) the # retrieve_identified_record_for_identifiable function diff --git a/unittests/test_scanner.py b/unittests/test_scanner.py index da26af0b9436b622aa9e479dc24f000283cfdc32..680e4abe0da1896e63dba4636c05a38b90de2dc5 100644 --- a/unittests/test_scanner.py +++ b/unittests/test_scanner.py @@ -37,7 +37,8 @@ import yaml from caoscrawler.crawl import Crawler from caoscrawler.debug_tree import DebugTree from caoscrawler.scanner import (create_converter_registry, load_definition, - scan_directory, scan_structure_elements) + scan_directory, scan_structure_elements, + _load_definition_from_yaml_dict) from caoscrawler.structure_elements import (DictElement, DictListElement, DictTextElement, File) from pytest import raises @@ -318,6 +319,40 @@ def test_record_parents(): assert len(rec.parents) == 1 +def test_error_messages(): + data = { + 'Experiments': {} + } + + broken_yaml = """ +EmptyConverter: + """ + broken_definition = _load_definition_from_yaml_dict( + [yaml.load(broken_yaml, Loader=yaml.SafeLoader)]) + + converter_registry = create_converter_registry(broken_definition) + + with pytest.raises(RuntimeError, match="Definition of converter \"EmptyConverter\" is empty"): + scan_structure_elements(DictElement(name="", value=data), + broken_definition, converter_registry) + + broken_yaml = """ +Converter: + type: DictElement + records: + TestRecord: "42" + """ + + broken_definition = _load_definition_from_yaml_dict( + [yaml.load(broken_yaml, Loader=yaml.SafeLoader)]) + + converter_registry = create_converter_registry(broken_definition) + + with pytest.raises(RuntimeError, match="dict expected, but found str: 42"): + scan_structure_elements(DictElement(name="", value=data), + broken_definition, converter_registry) + + def test_units(): """Test the correct setting of units.""" crawler_definition = load_definition(UNITTESTDIR / "test_unit_cfood.yml")