Skip to content
Snippets Groups Projects

F dict heuristic

Merged Florian Spreckelsen requested to merge f-dict-heuristic into dev
1 file
+ 147
4
Compare changes
  • Side-by-side
  • Inline
+ 147
4
@@ -51,7 +51,9 @@ from caoscrawler.converters import (Converter, ConverterValidationError,
from caoscrawler.crawl import Crawler
from caoscrawler.scanner import (_load_definition_from_yaml_dict,
create_converter_registry,
create_transformer_registry, load_definition)
create_transformer_registry,
load_definition,
scan_structure_elements)
from caoscrawler.stores import GeneralStore, RecordStore
from caoscrawler.structure_elements import (BooleanElement, DictElement,
Directory, File, FloatElement,
@@ -675,10 +677,10 @@ def test_properties_from_dict_basic(converter_registry):
that the children are still created correctly.
"""
# TODO definitions with blacklist and named references
# definitions with blacklist and named references
pfdc = PropertiesFromDictConverter(
definition={
"type": "DictElement",
"type": "PropertiesFromDictElement",
"match": ".*",
"record_from_dict": {
"variable_name": "MyRec",
@@ -692,7 +694,7 @@ def test_properties_from_dict_basic(converter_registry):
}
},
name="Test", converter_registry=converter_registry)
# TODO: Tests for Dict with skalars, dict with lists, dict with reference,
# Tests for Dict with skalars, dict with lists, dict with reference,
# dict with list of references, dict with reference with reference, named
# reference
values = GeneralStore()
@@ -891,3 +893,144 @@ def test_properties_from_dict_callable(converter_registry):
assert referenced2.get_property("nourl").value == "something else"
assert referenced2.get_property("url") is not None
assert referenced2.get_property("url").value == "https://indiscale.com"
def test_properties_from_dict_nested(converter_registry):
"""Test the PropertiesFromDictConverter with a nested dict,
together with the regular DictElementConverter and Records created
and used on different subtree levels.
"""
root_dict_element = DictElement("RootDict", {
"TopLevelRec": "MyRec",
"propertiesDict": {
"a": 5,
"blacklisted": {
"bl_name": "BlackList",
"date": "2023-12-31"
}
},
"otherDict": {
"additional_from_other": "other"
}
})
def_dict = {
"RootElt": {
# Root dictionary
"type": "DictElement",
"match": ".*",
"records": {
# Define top-level, use below in subtrees
"MyRec": {
"parents": ["MyType"]
}
},
"subtree": {
# Top-level text element for the Record name
"NameElt": {
"type": "TextElement",
"match_name": "^TopLevelRec$",
"match_value": "(?P<name>.*)",
"records": {
"MyRec": {
"name": "$name"
}
}
},
"PFDElement": {
"type": "PropertiesFromDictElement",
"match_name": "^propertiesDict$",
"record_from_dict": {
"variable_name": "MyRec",
"properties_blacklist": ["blacklisted"]
},
"subtree": {
"BLElement": {
"type": "DictElement",
"match_name": "^blacklisted$",
"records": {
"BLRec": {
"parents": ["BlackListedType"],
"MyRec": "$MyRec"
}
},
"subtree": {
"BLNameElt": {
"type": "TextElement",
"match_name": "^bl_name$",
"match_value": "(?P<name>.*)",
"records": {
"BLRec": {
"name": "$name"
}
}
},
"BLDateElt": {
"type": "TextElement",
"match_name": "^date$",
"match_value": "(?P<date>.*)",
"records": {
"BLRec": {
"creation_date": "$date"
}
}
}
}
}
}
},
# Other dict which uses the DictElementConverter
"OtherDictElement": {
"type": "DictElement",
"match_name": "^otherDict$",
"subtree": {
"additionalElt": {
"type": "TextElement",
"match_name": "^additional_from_other$",
"match_value": "(?P<val>.*)",
"records": {
"MyRec": {
"additional_from_other": "$val"
}
}
}
}
}
}
}
}
records = scan_structure_elements(root_dict_element, def_dict, converter_registry)
# All records need to be there
assert len(records) == 2
myrec = None
blrec = None
for rec in records:
if rec.name == "MyRec":
myrec = rec
elif rec.name == "BlackList":
blrec = rec
assert myrec is not None
assert blrec is not None
# Parent is set from top level
assert len(myrec.parents) == 1
assert "MyType" in [par.name for par in myrec.parents]
# Set automatically, with blacklist
assert myrec.get_property("a") is not None
assert myrec.get_property("a").value == 5
assert myrec.get_property("blacklisted") is None
# Now check blacklisted record from subtree
assert len(blrec.parents) == 1
assert "BlackListedType" in [par.name for par in blrec.parents]
assert blrec.get_property("MyRec") is not None
assert blrec.get_property("MyRec").value == myrec
assert blrec.get_property("creation_date") is not None
assert blrec.get_property("creation_date").value == "2023-12-31"
# The "old" DictConverter should have added the additional property:
assert myrec.get_property("additional_from_other") is not None
assert myrec.get_property("additional_from_other").value == "other"
Loading