Skip to content
Snippets Groups Projects
Commit 3535228f authored by I. Nüske's avatar I. Nüske
Browse files

Revert "MNT: compare_entities: Extend parent check, docstring update, renamed some variables"

This reverts commit bf99a124.
parent bf99a124
No related branches found
No related tags found
1 merge request!151Draft: refactor compare_entities and enhance test coverage
......@@ -179,68 +179,56 @@ def getCommitIn(folder):
return get_commit_in(folder)
def compare_entities(entity0: Entity, entity1: Entity,
def compare_entities(old_entity: Entity,
new_entity: Entity,
compare_referenced_records: bool = False
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Compare two entities.
Returns two dicts listing the differences between the two entities. The
order of the two returned dicts corresponds to the two input entities.
The dicts contain two keys, 'parents' and 'properties'. The list saved
under the 'parents' key contains those parents of the respective entity
that are missing in the other entity, and the 'properties' dict contains
properties and SPECIAL_ATTRIBUTES if they are missing or different from
their counterparts in the other entity.
The value of the properties dict for each listed property is again a dict
detailing the differences between this property and its counterpart.
The characteristics that are checked to determine whether two properties
match are the following:
The following attributes are compared:
- parents
- properties
- all SPECIAL_ATTRIBUTES (see models.py)
Returns a tuple of dictionaries, the first element in the tupel lists additional information of old
entity, the second elements lists to additional information of new entity.
Additional information means in detail:
- Additional parents: key="parents", value=a list of names of parents that are present in the respective entity
- Information about properties (key="properties", value=dict with property names as keys):
- properties are included in the dict if they are missing in the other entity or if one of
the following differs:
- datatype
- importance
- importance or
- value
If any of these characteristics differ for a property, the respective
string (datatype, importance, value) is added as a key to the dict of the
property with its value being the characteristics value,
e.g. {"prop": {"value": 6, 'importance': 'SUGGESTED'}}.
If a property is of type LIST, the comparison is order-sensitive.
Comparison of multi-properties is not yet supported, so should either
entity have several instances of one Property, the comparison is aborted
and an error is raised.
In case of changed information the value listed under the respective key is the
value that is stored in the respective entity.
Two parents match, if TODO: get final decision on parent match criteria
If `compare_referenced_records` is `True`, also referenced entities will be
compared using this function (which is then called with
`compare_referenced_records = False` to prevent infinite recursion in case
of circular references).
Should referenced records not be checked for equality between the entities
but for equivalency, this is possible by setting the parameter
compare_referenced_records.
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value. If set to `False`, only the corresponding Python
objects are compared which may lead to unexpected behavior when
identical records are stored in different objects. Default is False.
Params
------
entity0 : Entity
First entity to be compared.
entity1 : Entity
Second entity to be compared.
compare_referenced_records: bool, default: False
If set to True, corresponding referenced
records are not checked for equality but for
equivalency, using this function.
compare_referenced_records is set to False for
these recursive calls, so references of
references need to be equal. If set to `False`,
only the Python objects are compared, which may
lead to unexpected behavior.
"""
diff = ({"properties": {}, "parents": []},
{"properties": {}, "parents": []})
olddiff: dict[str, Any] = {"properties": {}, "parents": []}
newdiff: dict[str, Any] = {"properties": {}, "parents": []}
# ToDo: Is is wanted here? == would make more sense unless there are plans
# to do use this function in the equality check of entities
if entity0 is entity1:
return diff
if old_entity is new_entity:
return olddiff, newdiff
# ToDo: Same question as above, is not or !=
if type(entity0) is not type(entity1):
if type(old_entity) is not type(new_entity):
raise ValueError(
"Comparison of different Entity types is not supported.")
......@@ -249,206 +237,135 @@ def compare_entities(entity0: Entity, entity1: Entity,
if attr == "value":
continue
attr0 = entity0.__getattribute__(attr)
oldattr = old_entity.__getattribute__(attr)
# we consider "" and None to be nonexistent
attr0_unset = (attr0 == "" or attr0 is None)
old_entity_attr_na = (oldattr == "" or oldattr is None)
attr1 = entity1.__getattribute__(attr)
newattr = new_entity.__getattribute__(attr)
# we consider "" and None to be nonexistent
attr1_unset = (attr1 == "" or attr1 is None)
new_entity_attr_na = (newattr == "" or newattr is None)
# in both entities the current attribute is not set
if attr0_unset and attr1_unset:
if old_entity_attr_na and new_entity_attr_na:
continue
# treat datatype separately if one datatype is an object and the other
# a string or int, and therefore may be a name or id
# a string or int, and may be a name or id
if attr == "datatype":
if not attr0_unset and not attr1_unset:
if isinstance(attr0, RecordType):
if attr0.name == attr1:
if not old_entity_attr_na and not new_entity_attr_na:
if isinstance(oldattr, RecordType):
if oldattr.name == newattr:
continue
if str(attr0.id) == str(attr1):
if oldattr.id == newattr:
continue
if isinstance(attr1, RecordType):
if attr1.name == attr0:
if isinstance(newattr, RecordType):
if newattr.name == oldattr:
continue
if str(attr1.id) == str(attr0):
if newattr.id == oldattr:
continue
# add to diff if attr has different values or is not set for one entity
if (attr0_unset != attr1_unset) or (attr0 != attr1):
if not attr0_unset:
diff[0][attr] = attr0
if not attr1_unset:
diff[1][attr] = attr1
# add to diff if attribute has different values or is not set for one entity
if ((old_entity_attr_na ^ new_entity_attr_na)
or (oldattr != newattr)):
if not old_entity_attr_na:
olddiff[attr] = oldattr
if not new_entity_attr_na:
newdiff[attr] = newattr
# compare value
ent0_val, ent1_val = entity0.value, entity1.value
if ent0_val != ent1_val:
if old_entity.value != new_entity.value:
# though the values are not equal, they might be equivalent:
same_value = False
if compare_referenced_records:
# both values are scalar references:
if isinstance(ent0_val, Entity) and isinstance(ent1_val, Entity):
# comp_referenced_records=False to prevent infinite recursion
# ToDo: Consider keeping a list of traversed entities rather
# than abort past first layer
same_value = empty_diff(ent0_val, ent1_val, False)
if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity):
# compare_referenced_records=False to prevent infinite recursion
same_value = empty_diff(
old_entity.value, new_entity.value, compare_referenced_records=False)
# both values are a list of references:
elif isinstance(ent0_val, list) and isinstance(ent1_val, list):
# lists can't be the same if the lengths are different
if len(ent0_val) == len(ent1_val):
lists_match = True
for val0, val1 in zip(ent0_val, ent1_val):
if val0 == val1:
continue
if (isinstance(val0, Entity)
and isinstance(val1, Entity)):
if empty_diff(val0, val1, False):
continue
# ToDo: Henrik wasn't sure whether it would make sense
# to check int and str against an Entity id/name
# -> ask, but if uncommented the test
# test_merge_id_with_resolved_entity in
# test_apiutils fails
#if (isinstance(val0, Entity)
# and isinstance(val1, (int, str))):
# if (str(val0.id) == str(val1)
# or str(val0.name) == str(val1)):
# continue
#if (isinstance(val1, Entity)
# and isinstance(val0, (int, str))):
# if (str(val1.id) == str(val0)
# or str(val1.name) == str(val0)):
# continue
# val0 and val1 could not be matched
lists_match = False
break
if lists_match:
same_value = True
elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list):
# if all elements in both lists are entity objects, check each pair for equality
# TODO: check whether mixed cases should be allowed or lead to an error
if (all([isinstance(x, Entity) for x in old_entity.value])
and all([isinstance(x, Entity) for x in new_entity.value])):
# lists can't be the same if the lengths are different
if len(old_entity.value) == len(new_entity.value):
# the lists are the same if the diffs of each entry pair are empty
same_value = all(
[empty_diff(x, y, False) for x, y
in zip(old_entity.value, new_entity.value)])
if not same_value:
diff[0]["value"] = ent0_val
diff[1]["value"] = ent1_val
olddiff["value"] = old_entity.value
newdiff["value"] = new_entity.value
# compare properties
for prop in entity0.properties:
matching = entity1.properties.filter(prop, check_wrapped=False)
for prop in old_entity.properties:
matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()]
if len(matching) == 0:
# entity1 has prop, entity0 does not
diff[0]["properties"][prop.name] = {}
elif len(matching) == 1:
diff[0]["properties"][prop.name] = {}
diff[1]["properties"][prop.name] = {}
propdiff = (diff[0]["properties"][prop.name],
diff[1]["properties"][prop.name])
# Recursive call to determine the differences between properties
# old_entity has prop, new_entity does not
olddiff["properties"][prop.name] = {}
elif len(matching) ==1:
olddiff["properties"][prop.name] = {}
newdiff["properties"][prop.name] = {}
oldpropdiff = olddiff["properties"][prop.name]
newpropdiff = newdiff["properties"][prop.name]
# recursive call to determine the differences between properties
# ToDo: This can lead to infinite recursion if two properties have
# themselves or each other as subproperties - would also be
# fixed by keeping a list of traversed entities I believe
# We should compare the wrapped properties instead of the
# wrapping entities if possible:
comp1, comp2 = prop, matching[0]
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
# ToDo: compare_referenced_records is being ignored here, is this
# intended? If not, also fix in other places
od, nd = compare_entities(comp1, comp2, compare_referenced_records)
# ToDo: Do we care about parents and properties here?
# Note: If we can identify a subset of characteristics that we
# exclusively care about, maybe restrict the recursion to
# those checks at some point? Would be faster, and might
# replace the full recursions that we later call empty_diff
# on anyways
# as we do not care about parents and properties here,
# discard their entries
# each other as subproperties
od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records)
# as we do not care about parents and properties here, discard their entries
# TODO do we?
od.pop("parents")
od.pop("properties")
nd.pop("parents")
nd.pop("properties")
# use the remaining diff
propdiff[0].update(od)
propdiff[1].update(nd)
oldpropdiff.update(od)
newpropdiff.update(nd)
# As the importance of a property is an attribute of the record
# and not the property, it is not contained in the diff returned
# by compare_entities and needs to be added separately
if (entity0.get_importance(prop.name) !=
entity1.get_importance(prop.name)):
propdiff[0]["importance"] = entity0.get_importance(prop.name)
propdiff[1]["importance"] = entity1.get_importance(prop.name)
# As the importance of a property is an attribute of the record and not
# the property, it is not contained in the diff returned by compare_entities
if old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name):
oldpropdiff["importance"] = old_entity.get_importance(prop.name)
newpropdiff["importance"] = new_entity.get_importance(prop.name)
# in case there is no difference, we remove the dict keys again
if len(propdiff[0]) == 0 and len(propdiff[1]) == 0:
diff[0]["properties"].pop(prop.name)
diff[1]["properties"].pop(prop.name)
if len(newpropdiff) == 0 and len(oldpropdiff) == 0:
newdiff["properties"].pop(prop.name)
olddiff["properties"].pop(prop.name)
else:
raise NotImplementedError(
"Comparison not implemented for multi-properties.")
# ToDo: Iterate and match analogously to parents?
# we have not yet compared properties that do not exist in entity0
for prop in entity1.properties:
# check how often the property appears in entity0
num_prop_in_ent0 = len(entity0.properties.filter(prop))
if num_prop_in_ent0 == 0:
# property is only present in entity0 - add to diff
diff[1]["properties"][prop.name] = {}
if num_prop_in_ent0 > 1:
# Check whether the property is present multiple times in entity0
# we have not yet compared properties that do not exist in old_entity
for prop in new_entity.properties:
# check how often the property appears in old_entity
num_old_prop = len([0 for p in old_entity.properties
if p.name.lower() == prop.name.lower()])
if num_old_prop == 0:
# property is only present in new_entity - add to diff
newdiff["properties"][prop.name] = {}
if num_old_prop > 1:
# Check whether the property is present multiple times in old_entity
# and raise error - result would be incorrect
raise NotImplementedError(
"Comparison not implemented for multi-properties.")
# compare parents
for index, parents, other_entity in [(0, entity0.parents, entity1),
(1, entity1.parents, entity0)]:
for parent in parents:
# As parent is a wrapper entity, we also want the wrappers
matching = other_entity.parents.filter(parent, check_wrapped=False)
if len(matching) == 0:
diff[index]["parents"].append(parent.name)
continue
# ToDo: Should parents also have a differences dict?
# ToDo: What about duplicate parents? Is this already satisfactory?
# ToDo: Recheck whether matching id and name is intended to be a
# sufficient criterion for a parent match, if yes, delete
# this and uncomment+test the commented block below
match_found = False
for match in matching:
comp1, comp2 = parent, match
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
if empty_diff(comp1, comp2, False):
# For RecordType check inheritance, otherwise it's a match
if not isinstance(parent, RecordType):
match_found = True
break
elif (parent._flags['inheritance'] ==
match._flags['inheritance']):
match_found = True
break
if not match_found:
diff[index]["parents"].append(parent.name)
# if isinstance(parent, RecordType):
# # compare inheritance level only for RecordTypes
# matching_inheritance_found = False
# for potential in matching:
# if (parent._flags['inheritance'] ==
# potential._flags['inheritance']):
# matching_inheritance_found = True
# break
# if not matching_inheritance_found:
# # ToDo: How to indicate that this is a non-matching
# # importance level instead of a missing parent?
# diff[index]["parents"].append(parent.name)
return diff
# ToDo: Compare using filter function, compare inheritance level for RTs
# TODO we currently only use names for parents and property matching
for parent in old_entity.parents:
if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
olddiff["parents"].append(parent.name)
for parent in new_entity.parents:
if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
newdiff["parents"].append(parent.name)
return olddiff, newdiff
def empty_diff(old_entity: Entity, new_entity: Entity,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment