diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 9dab07ceb47eb03f829feff59a21bb6a5256bef8..fefb4da9e6964bcbabcb3bb76c973fd3e397c791 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -51,7 +51,9 @@ from caoscrawler.converters import (Converter, ConverterValidationError, from caoscrawler.crawl import Crawler from caoscrawler.scanner import (_load_definition_from_yaml_dict, create_converter_registry, - create_transformer_registry, load_definition) + create_transformer_registry, + load_definition, + scan_structure_elements) from caoscrawler.stores import GeneralStore, RecordStore from caoscrawler.structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, @@ -675,10 +677,10 @@ def test_properties_from_dict_basic(converter_registry): that the children are still created correctly. """ - # TODO definitions with blacklist and named references + # definitions with blacklist and named references pfdc = PropertiesFromDictConverter( definition={ - "type": "DictElement", + "type": "PropertiesFromDictElement", "match": ".*", "record_from_dict": { "variable_name": "MyRec", @@ -692,7 +694,7 @@ def test_properties_from_dict_basic(converter_registry): } }, name="Test", converter_registry=converter_registry) - # TODO: Tests for Dict with skalars, dict with lists, dict with reference, + # Tests for Dict with skalars, dict with lists, dict with reference, # dict with list of references, dict with reference with reference, named # reference values = GeneralStore() @@ -891,3 +893,144 @@ def test_properties_from_dict_callable(converter_registry): assert referenced2.get_property("nourl").value == "something else" assert referenced2.get_property("url") is not None assert referenced2.get_property("url").value == "https://indiscale.com" + + +def test_properties_from_dict_nested(converter_registry): + """Test the PropertiesFromDictConverter with a nested dict, + together with the regular DictElementConverter and Records created + and used on different subtree levels. + + """ + root_dict_element = DictElement("RootDict", { + "TopLevelRec": "MyRec", + "propertiesDict": { + "a": 5, + "blacklisted": { + "bl_name": "BlackList", + "date": "2023-12-31" + } + }, + "otherDict": { + "additional_from_other": "other" + } + }) + def_dict = { + "RootElt": { + # Root dictionary + "type": "DictElement", + "match": ".*", + "records": { + # Define top-level, use below in subtrees + "MyRec": { + "parents": ["MyType"] + } + }, + "subtree": { + # Top-level text element for the Record name + "NameElt": { + "type": "TextElement", + "match_name": "^TopLevelRec$", + "match_value": "(?P<name>.*)", + "records": { + "MyRec": { + "name": "$name" + } + } + }, + "PFDElement": { + "type": "PropertiesFromDictElement", + "match_name": "^propertiesDict$", + "record_from_dict": { + "variable_name": "MyRec", + "properties_blacklist": ["blacklisted"] + }, + "subtree": { + "BLElement": { + "type": "DictElement", + "match_name": "^blacklisted$", + "records": { + "BLRec": { + "parents": ["BlackListedType"], + "MyRec": "$MyRec" + } + }, + "subtree": { + "BLNameElt": { + "type": "TextElement", + "match_name": "^bl_name$", + "match_value": "(?P<name>.*)", + "records": { + "BLRec": { + "name": "$name" + } + } + }, + "BLDateElt": { + "type": "TextElement", + "match_name": "^date$", + "match_value": "(?P<date>.*)", + "records": { + "BLRec": { + "creation_date": "$date" + } + } + } + } + } + } + }, + # Other dict which uses the DictElementConverter + "OtherDictElement": { + "type": "DictElement", + "match_name": "^otherDict$", + "subtree": { + "additionalElt": { + "type": "TextElement", + "match_name": "^additional_from_other$", + "match_value": "(?P<val>.*)", + "records": { + "MyRec": { + "additional_from_other": "$val" + } + } + } + } + } + } + } + } + + records = scan_structure_elements(root_dict_element, def_dict, converter_registry) + + # All records need to be there + assert len(records) == 2 + myrec = None + blrec = None + for rec in records: + if rec.name == "MyRec": + myrec = rec + elif rec.name == "BlackList": + blrec = rec + assert myrec is not None + assert blrec is not None + + # Parent is set from top level + assert len(myrec.parents) == 1 + assert "MyType" in [par.name for par in myrec.parents] + + # Set automatically, with blacklist + assert myrec.get_property("a") is not None + assert myrec.get_property("a").value == 5 + assert myrec.get_property("blacklisted") is None + + # Now check blacklisted record from subtree + assert len(blrec.parents) == 1 + assert "BlackListedType" in [par.name for par in blrec.parents] + assert blrec.get_property("MyRec") is not None + assert blrec.get_property("MyRec").value == myrec + assert blrec.get_property("creation_date") is not None + assert blrec.get_property("creation_date").value == "2023-12-31" + + # The "old" DictConverter should have added the additional property: + assert myrec.get_property("additional_from_other") is not None + assert myrec.get_property("additional_from_other").value == "other"