Skip to content
Snippets Groups Projects
Commit bf99a124 authored by I. Nüske's avatar I. Nüske
Browse files

MNT: compare_entities: Extend parent check, docstring update, renamed some variables

parent 6facb46e
Branches
Tags
2 merge requests!159Release 0.16.o,!155Review filter lists and compare_entities
Pipeline #56969 passed with warnings
......@@ -179,56 +179,68 @@ def getCommitIn(folder):
return get_commit_in(folder)
def compare_entities(old_entity: Entity,
new_entity: Entity,
def compare_entities(entity0: Entity, entity1: Entity,
compare_referenced_records: bool = False
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Compare two entities.
The following attributes are compared:
- parents
- properties
- all SPECIAL_ATTRIBUTES (see models.py)
Returns a tuple of dictionaries, the first element in the tupel lists additional information of old
entity, the second elements lists to additional information of new entity.
Additional information means in detail:
- Additional parents: key="parents", value=a list of names of parents that are present in the respective entity
- Information about properties (key="properties", value=dict with property names as keys):
- properties are included in the dict if they are missing in the other entity or if one of
the following differs:
Returns two dicts listing the differences between the two entities. The
order of the two returned dicts corresponds to the two input entities.
The dicts contain two keys, 'parents' and 'properties'. The list saved
under the 'parents' key contains those parents of the respective entity
that are missing in the other entity, and the 'properties' dict contains
properties and SPECIAL_ATTRIBUTES if they are missing or different from
their counterparts in the other entity.
The value of the properties dict for each listed property is again a dict
detailing the differences between this property and its counterpart.
The characteristics that are checked to determine whether two properties
match are the following:
- datatype
- importance or
- importance
- value
If any of these characteristics differ for a property, the respective
string (datatype, importance, value) is added as a key to the dict of the
property with its value being the characteristics value,
e.g. {"prop": {"value": 6, 'importance': 'SUGGESTED'}}.
If a property is of type LIST, the comparison is order-sensitive.
In case of changed information the value listed under the respective key is the
value that is stored in the respective entity.
Comparison of multi-properties is not yet supported, so should either
entity have several instances of one Property, the comparison is aborted
and an error is raised.
If `compare_referenced_records` is `True`, also referenced entities will be
compared using this function (which is then called with
`compare_referenced_records = False` to prevent infinite recursion in case
of circular references).
Two parents match, if TODO: get final decision on parent match criteria
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value. If set to `False`, only the corresponding Python
objects are compared which may lead to unexpected behavior when
identical records are stored in different objects. Default is False.
Should referenced records not be checked for equality between the entities
but for equivalency, this is possible by setting the parameter
compare_referenced_records.
Params
------
entity0 : Entity
First entity to be compared.
entity1 : Entity
Second entity to be compared.
compare_referenced_records: bool, default: False
If set to True, corresponding referenced
records are not checked for equality but for
equivalency, using this function.
compare_referenced_records is set to False for
these recursive calls, so references of
references need to be equal. If set to `False`,
only the Python objects are compared, which may
lead to unexpected behavior.
"""
olddiff: dict[str, Any] = {"properties": {}, "parents": []}
newdiff: dict[str, Any] = {"properties": {}, "parents": []}
diff = ({"properties": {}, "parents": []},
{"properties": {}, "parents": []})
if old_entity is new_entity:
return olddiff, newdiff
# ToDo: Is is wanted here? == would make more sense unless there are plans
# to do use this function in the equality check of entities
if entity0 is entity1:
return diff
if type(old_entity) is not type(new_entity):
# ToDo: Same question as above, is not or !=
if type(entity0) is not type(entity1):
raise ValueError(
"Comparison of different Entity types is not supported.")
......@@ -237,135 +249,206 @@ def compare_entities(old_entity: Entity,
if attr == "value":
continue
oldattr = old_entity.__getattribute__(attr)
attr0 = entity0.__getattribute__(attr)
# we consider "" and None to be nonexistent
old_entity_attr_na = (oldattr == "" or oldattr is None)
attr0_unset = (attr0 == "" or attr0 is None)
newattr = new_entity.__getattribute__(attr)
attr1 = entity1.__getattribute__(attr)
# we consider "" and None to be nonexistent
new_entity_attr_na = (newattr == "" or newattr is None)
attr1_unset = (attr1 == "" or attr1 is None)
# in both entities the current attribute is not set
if old_entity_attr_na and new_entity_attr_na:
if attr0_unset and attr1_unset:
continue
# treat datatype separately if one datatype is an object and the other
# a string or int, and may be a name or id
# a string or int, and therefore may be a name or id
if attr == "datatype":
if not old_entity_attr_na and not new_entity_attr_na:
if isinstance(oldattr, RecordType):
if oldattr.name == newattr:
if not attr0_unset and not attr1_unset:
if isinstance(attr0, RecordType):
if attr0.name == attr1:
continue
if oldattr.id == newattr:
if str(attr0.id) == str(attr1):
continue
if isinstance(newattr, RecordType):
if newattr.name == oldattr:
if isinstance(attr1, RecordType):
if attr1.name == attr0:
continue
if newattr.id == oldattr:
if str(attr1.id) == str(attr0):
continue
# add to diff if attribute has different values or is not set for one entity
if ((old_entity_attr_na ^ new_entity_attr_na)
or (oldattr != newattr)):
if not old_entity_attr_na:
olddiff[attr] = oldattr
if not new_entity_attr_na:
newdiff[attr] = newattr
# add to diff if attr has different values or is not set for one entity
if (attr0_unset != attr1_unset) or (attr0 != attr1):
if not attr0_unset:
diff[0][attr] = attr0
if not attr1_unset:
diff[1][attr] = attr1
# compare value
if old_entity.value != new_entity.value:
ent0_val, ent1_val = entity0.value, entity1.value
if ent0_val != ent1_val:
# though the values are not equal, they might be equivalent:
same_value = False
if compare_referenced_records:
# both values are scalar references:
if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity):
# compare_referenced_records=False to prevent infinite recursion
same_value = empty_diff(
old_entity.value, new_entity.value, compare_referenced_records=False)
if isinstance(ent0_val, Entity) and isinstance(ent1_val, Entity):
# comp_referenced_records=False to prevent infinite recursion
# ToDo: Consider keeping a list of traversed entities rather
# than abort past first layer
same_value = empty_diff(ent0_val, ent1_val, False)
# both values are a list of references:
elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list):
# if all elements in both lists are entity objects, check each pair for equality
# TODO: check whether mixed cases should be allowed or lead to an error
if (all([isinstance(x, Entity) for x in old_entity.value])
and all([isinstance(x, Entity) for x in new_entity.value])):
# lists can't be the same if the lengths are different
if len(old_entity.value) == len(new_entity.value):
# the lists are the same if the diffs of each entry pair are empty
same_value = all(
[empty_diff(x, y, False) for x, y
in zip(old_entity.value, new_entity.value)])
elif isinstance(ent0_val, list) and isinstance(ent1_val, list):
# lists can't be the same if the lengths are different
if len(ent0_val) == len(ent1_val):
lists_match = True
for val0, val1 in zip(ent0_val, ent1_val):
if val0 == val1:
continue
if (isinstance(val0, Entity)
and isinstance(val1, Entity)):
if empty_diff(val0, val1, False):
continue
# ToDo: Henrik wasn't sure whether it would make sense
# to check int and str against an Entity id/name
# -> ask, but if uncommented the test
# test_merge_id_with_resolved_entity in
# test_apiutils fails
#if (isinstance(val0, Entity)
# and isinstance(val1, (int, str))):
# if (str(val0.id) == str(val1)
# or str(val0.name) == str(val1)):
# continue
#if (isinstance(val1, Entity)
# and isinstance(val0, (int, str))):
# if (str(val1.id) == str(val0)
# or str(val1.name) == str(val0)):
# continue
# val0 and val1 could not be matched
lists_match = False
break
if lists_match:
same_value = True
if not same_value:
olddiff["value"] = old_entity.value
newdiff["value"] = new_entity.value
diff[0]["value"] = ent0_val
diff[1]["value"] = ent1_val
# compare properties
for prop in old_entity.properties:
matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()]
for prop in entity0.properties:
matching = entity1.properties.filter(prop, check_wrapped=False)
if len(matching) == 0:
# old_entity has prop, new_entity does not
olddiff["properties"][prop.name] = {}
elif len(matching) ==1:
olddiff["properties"][prop.name] = {}
newdiff["properties"][prop.name] = {}
oldpropdiff = olddiff["properties"][prop.name]
newpropdiff = newdiff["properties"][prop.name]
# recursive call to determine the differences between properties
# entity1 has prop, entity0 does not
diff[0]["properties"][prop.name] = {}
elif len(matching) == 1:
diff[0]["properties"][prop.name] = {}
diff[1]["properties"][prop.name] = {}
propdiff = (diff[0]["properties"][prop.name],
diff[1]["properties"][prop.name])
# Recursive call to determine the differences between properties
# ToDo: This can lead to infinite recursion if two properties have
# each other as subproperties
od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records)
# as we do not care about parents and properties here, discard their entries
# TODO do we?
# themselves or each other as subproperties - would also be
# fixed by keeping a list of traversed entities I believe
# We should compare the wrapped properties instead of the
# wrapping entities if possible:
comp1, comp2 = prop, matching[0]
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
# ToDo: compare_referenced_records is being ignored here, is this
# intended? If not, also fix in other places
od, nd = compare_entities(comp1, comp2, compare_referenced_records)
# ToDo: Do we care about parents and properties here?
# Note: If we can identify a subset of characteristics that we
# exclusively care about, maybe restrict the recursion to
# those checks at some point? Would be faster, and might
# replace the full recursions that we later call empty_diff
# on anyways
# as we do not care about parents and properties here,
# discard their entries
od.pop("parents")
od.pop("properties")
nd.pop("parents")
nd.pop("properties")
# use the remaining diff
oldpropdiff.update(od)
newpropdiff.update(nd)
propdiff[0].update(od)
propdiff[1].update(nd)
# As the importance of a property is an attribute of the record and not
# the property, it is not contained in the diff returned by compare_entities
if old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name):
oldpropdiff["importance"] = old_entity.get_importance(prop.name)
newpropdiff["importance"] = new_entity.get_importance(prop.name)
# As the importance of a property is an attribute of the record
# and not the property, it is not contained in the diff returned
# by compare_entities and needs to be added separately
if (entity0.get_importance(prop.name) !=
entity1.get_importance(prop.name)):
propdiff[0]["importance"] = entity0.get_importance(prop.name)
propdiff[1]["importance"] = entity1.get_importance(prop.name)
# in case there is no difference, we remove the dict keys again
if len(newpropdiff) == 0 and len(oldpropdiff) == 0:
newdiff["properties"].pop(prop.name)
olddiff["properties"].pop(prop.name)
if len(propdiff[0]) == 0 and len(propdiff[1]) == 0:
diff[0]["properties"].pop(prop.name)
diff[1]["properties"].pop(prop.name)
else:
raise NotImplementedError(
"Comparison not implemented for multi-properties.")
# we have not yet compared properties that do not exist in old_entity
for prop in new_entity.properties:
# check how often the property appears in old_entity
num_old_prop = len([0 for p in old_entity.properties
if p.name.lower() == prop.name.lower()])
if num_old_prop == 0:
# property is only present in new_entity - add to diff
newdiff["properties"][prop.name] = {}
if num_old_prop > 1:
# Check whether the property is present multiple times in old_entity
# ToDo: Iterate and match analogously to parents?
# we have not yet compared properties that do not exist in entity0
for prop in entity1.properties:
# check how often the property appears in entity0
num_prop_in_ent0 = len(entity0.properties.filter(prop))
if num_prop_in_ent0 == 0:
# property is only present in entity0 - add to diff
diff[1]["properties"][prop.name] = {}
if num_prop_in_ent0 > 1:
# Check whether the property is present multiple times in entity0
# and raise error - result would be incorrect
raise NotImplementedError(
"Comparison not implemented for multi-properties.")
# compare parents
# ToDo: Compare using filter function, compare inheritance level for RTs
# TODO we currently only use names for parents and property matching
for parent in old_entity.parents:
if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
olddiff["parents"].append(parent.name)
for parent in new_entity.parents:
if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
newdiff["parents"].append(parent.name)
return olddiff, newdiff
for index, parents, other_entity in [(0, entity0.parents, entity1),
(1, entity1.parents, entity0)]:
for parent in parents:
# As parent is a wrapper entity, we also want the wrappers
matching = other_entity.parents.filter(parent, check_wrapped=False)
if len(matching) == 0:
diff[index]["parents"].append(parent.name)
continue
# ToDo: Should parents also have a differences dict?
# ToDo: What about duplicate parents? Is this already satisfactory?
# ToDo: Recheck whether matching id and name is intended to be a
# sufficient criterion for a parent match, if yes, delete
# this and uncomment+test the commented block below
match_found = False
for match in matching:
comp1, comp2 = parent, match
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
if empty_diff(comp1, comp2, False):
# For RecordType check inheritance, otherwise it's a match
if not isinstance(parent, RecordType):
match_found = True
break
elif (parent._flags['inheritance'] ==
match._flags['inheritance']):
match_found = True
break
if not match_found:
diff[index]["parents"].append(parent.name)
# if isinstance(parent, RecordType):
# # compare inheritance level only for RecordTypes
# matching_inheritance_found = False
# for potential in matching:
# if (parent._flags['inheritance'] ==
# potential._flags['inheritance']):
# matching_inheritance_found = True
# break
# if not matching_inheritance_found:
# # ToDo: How to indicate that this is a non-matching
# # importance level instead of a missing parent?
# diff[index]["parents"].append(parent.name)
return diff
def empty_diff(old_entity: Entity, new_entity: Entity,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment