Skip to content
Snippets Groups Projects
Commit 2cbeb584 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

ENH: Implement property from dict generation

parent 7e7eeddc
Branches
Tags
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!163F dict heuristic
Pipeline #49210 failed
......@@ -811,43 +811,148 @@ class DictElementConverter(Converter):
return match_name_and_value(self.definition, element.name, element.value)
class HeuristicDictConverter(DictElementConverter):
class PropertiesFromDictConverter(DictElementConverter):
"""Extend the :py:class:`DictElementConverter` by a heuristic to set
property values from the dictionary keys.
"""
def _validate_definition(self, definition: dict, name: str):
def _validate_definition(self):
if "record_from_dict" not in definition or definition["record_from_dict"] is None:
if "record_from_dict" not in self.definition or self.definition["record_from_dict"] is None:
raise ValueError(
"You need to specify the (root) record, the properties of "
f"which will be set from the dict in converter {name}."
f"which will be set from the dict in converter {self.name}."
)
def __init__(self, definition: dict, name: str, converter_registry: dict):
if not "variable_name" in self.definition["record_from_dict"] or not self.definition["record_from_dict"]["variable_name"]:
raise ValueError(
f"The root record in converter {self.name} needs to have a "
"`variable_name` by which it is accessed in the subtree."
)
def __init__(self, definition: dict, name: str, converter_registry: dict,
referenced_record_callback: Optional[callable] = None):
_validate_definition(definition)
super().__init__(definition, name, converter_registry)
self._validate_definition()
self.referenced_record_callback = referenced_record_callback
def _recursively_create_records(self, subdict: dict, root_record: db.Record,
root_rec_name: str,
values: GeneralStore, records: RecordStore,
referenced_record_callback: callable,
keys_modified: list = []
):
"""Create a record form the given `subdict` and recursively create referenced records."""
blacklisted_keys = self.definition["record_from_dict"][
"properties_blacklist"] if "properties_blacklist" in self.definition["record_from_dict"] else []
special_references = self.definition["record_from_dict"]["references"] if "references" in self.definition["record_from_dict"] else [
]
for key, value in subdict.items():
if key in blacklisted_keys:
# We ignore this in the automated property generation
continue
if isinstance(value, list):
if not any([isinstance(val, dict) for val in value]):
# no dict in list, i.e., no references, so this is simple
root_record.add_property(name=key, value=value)
else:
if not all([isinstance(val, dict) for val in value]):
# if this is not an error (most probably it is), this
# needs to be handled manually for now.
raise ValueError(
f"{key} in {subdict} contains a mixed list of references and scalars.")
ref_recs = []
for ii, ref_dict in enumerate(value):
ref_rec = db.Record()
ref_var_name = f"{root_rec_name}.{key}.{ii}"
if key in special_references:
for par in special_references[key]["parents"]:
ref_rec.add_parent(par)
else:
ref_rec.add_parent(key)
records[ref_var_name] = ref_rec
values[ref_var_name] = ref_rec
keys_modified, ref_rec = self._recursively_create_records(
subdict=ref_dict,
root_record=ref_rec,
root_rec_name=ref_var_name,
values=values,
records=records,
referenced_record_callback=referenced_record_callback,
keys_modified=keys_modified,
)
ref_recs.append(ref_rec)
root_record.add_property(name=key, value=ref_recs)
elif isinstance(value, dict):
ref_rec = db.Record()
ref_var_name = f"{root_rec_name}.{key}"
if key in special_references:
for par in special_references[key]["parents"]:
ref_rec.add_parent(par)
else:
ref_rec.add_parent(key)
records[ref_var_name] = ref_rec
values[ref_var_name] = ref_rec
keys_modified, ref_rec = self._recursively_create_records(
subdict=value,
root_record=ref_rec,
root_rec_name=ref_var_name,
values=values,
records=records,
referenced_record_callback=referenced_record_callback,
keys_modified=keys_modified
)
root_record.add_property(key, ref_rec)
else:
if key.lower() in SPECIAL_PROPERTIES:
setattr(root_record, key.lower(), value)
else:
root_record.add_property(name=key, value=value)
keys_modified.append((root_rec_name, key))
def create_records(self, values: GeneralStore, records: RecordStore,
element: StructureElement, referenced_record_callback:
Optional[callable] = None):
if referenced_record_callback:
root_record = referenced_record_callback(root_record)
keys_modified = []
return keys_modified, root_record
def _insert_into_stores(rec: db.Record, rec_name: str):
def create_records(self, values: GeneralStore, records: RecordStore,
element: StructureElement):
records[rec_name] = rec
values[rec_name] = rec
keys_modified = []
def _create_or_return_record(rec_name: str, parent_names: Optional[Union[str, List[str]]] = None):
rfd = self.definition["record_from_dict"]
if rfd["variable_name"] not in records:
rec = db.Record()
if "name" in rfd:
rec.name = rfd["name"]
if "parents" in rfd:
for par in rfd["parents"]:
rec.add_parent(par)
else:
rec.add_parent(rfd["variable_name"])
records[rfd["variable_name"]] = rec
values[rfd["variable_name"]] = rec
if parent_names is None:
parent_names = [rec_name]
elif not isinstance(parent_names, list):
parent_names = [parent_names]
else:
rec = records[rfd["variable_name"]]
keys_modified, rec = self._recursively_create_records(
subdict=element.value,
root_record=rec,
root_rec_name=rfd["variable_name"],
values=values,
records=records,
referenced_record_callback=self.referenced_record_callback,
keys_modified=keys_modified,
)
keys_modified.extend(super().create_records(
values=values, records=records, element=element))
......
......@@ -28,12 +28,15 @@ import importlib
import json
import logging
import os
import pytest
import sys
import yaml
from itertools import product
from pathlib import Path
import pytest
import yaml
import linkahead as db
from caoscrawler.converters import (Converter, ConverterValidationError,
DateElementConverter, DictElementConverter,
DictIntegerElementConverter,
......@@ -697,6 +700,7 @@ def test_properties_from_dict_basic(converter_registry):
"a": 5,
"b": ["a", "b", "c"],
"scalar_ref": {
"name": "Scalar Ref",
"a": 23,
"blacklisted_int": 42
},
......@@ -718,7 +722,7 @@ def test_properties_from_dict_basic(converter_registry):
"blacklisted_ref": {
"a": 25
},
"author": {
"authors": {
"full_name": "Some Author"
}
})
......@@ -726,6 +730,9 @@ def test_properties_from_dict_basic(converter_registry):
assert "MyRec" in records
my_rec = records["MyRec"]
assert isinstance(my_rec, db.Record)
assert len(my_rec.parents) == 2
assert "DictRT1" in [par.name for par in my_rec.parents]
assert "DictRT2" in [par.name for par in my_rec.parents]
# scalar prop
assert my_rec.get_property("a") is not None
......@@ -741,8 +748,9 @@ def test_properties_from_dict_basic(converter_registry):
assert my_rec.get_property("scalar_ref") is not None
referenced = my_rec.get_property("scalar_ref").value
assert isinstance(referenced, db.Record)
assert referenced.name == "Scalar Ref"
assert len(referenced.parents) == 1
assert referenced.has_parent("scalar_ref")
assert "scalar_ref" in [par.name for par in referenced.parents]
assert referenced.get_property("a") is not None
assert referenced.get_property("a").value == 23
# blacklisted
......@@ -755,9 +763,9 @@ def test_properties_from_dict_basic(converter_registry):
for rec in my_rec.get_property("list_ref").value:
assert isinstance(rec, db.Record)
assert len(rec.parents) == 1
assert rec.has_parent("list_ref")
assert "list_ref" in [par.name for par in rec.parents]
assert rec.get_property("c") is not None
assert type(rec.get_property("c")) is bool
assert type(rec.get_property("c").value) is bool
assert True in [rec.get_property("c").value for rec in my_rec.get_property("list_ref").value]
assert False in [rec.get_property("c").value for rec in my_rec.get_property("list_ref").value]
......@@ -766,14 +774,14 @@ def test_properties_from_dict_basic(converter_registry):
outer_rec = my_rec.get_property("ref_with_ref").value
assert isinstance(outer_rec, db.Record)
assert len(outer_rec.parents) == 1
assert outer_rec.has_parent("ref_with_ref")
assert "ref_with_ref" in [par.name for par in outer_rec.parents]
assert outer_rec.get_property("a") is not None
assert outer_rec.get_property("a").value == 789
assert outer_rec.get_property("ref_in_ref") is not None
inner_rec = outer_rec.get_property("ref_in_ref").value
assert isinstance(inner_rec, db.Record)
assert len(inner_rec.parents) == 1
assert inner_rec.has_parent("ref_in_ref")
assert "ref_in_ref" in [par.name for par in inner_rec.parents]
assert inner_rec.get_property("b") is not None
assert inner_rec.get_property("b").value == "something"
......@@ -782,11 +790,11 @@ def test_properties_from_dict_basic(converter_registry):
assert my_rec.get_property("blacklisted_ref") is None
# named reference property
assert my_rec.get_property("author") is not None
author_rec = my_rec.get_property("author").value
assert my_rec.get_property("authors") is not None
author_rec = my_rec.get_property("authors").value
assert isinstance(author_rec, db.Record)
assert len(author_rec.parents) == 1
assert author_rec.has_parent("Person")
assert "Person" in [par.name for par in author_rec.parents]
assert author_rec.get_property("full_name") is not None
assert author_rec.get_property("full_name").value == "Some Author"
......@@ -834,13 +842,13 @@ def test_properties_from_dict_callable(converter_registry):
pdfc = PropertiesFromDictConverter(
definition={
"record_from_dict": {
"variable_name": "MyRec"
"variable_name": "MyRec",
"name": "My New Record"
}
},
name="TestConverter",
converter_registry=converter_registry,
referenced_record_callback=convert_some_values
}
)
values = GeneralStore()
......@@ -851,7 +859,7 @@ def test_properties_from_dict_callable(converter_registry):
"url": "referenced"
},
"referenced2": {
"nourl": "something else"
"nourl": "something else",
"url": "https://indiscale.com"
}
})
......@@ -860,7 +868,7 @@ def test_properties_from_dict_callable(converter_registry):
my_rec = records["MyRec"]
assert isinstance(my_rec, db.Record)
assert len(my_rec.parents) == 1
assert my_rec.has_parent("MyRec")
assert "MyRec" in [par.name for par in my_rec.parents]
assert my_rec.name == "My New Record"
# simple conversion
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment