Skip to content
Snippets Groups Projects
Commit bf99a124 authored by I. Nüske's avatar I. Nüske
Browse files

MNT: compare_entities: Extend parent check, docstring update, renamed some variables

parent 6facb46e
No related branches found
No related tags found
2 merge requests!159Release 0.16.o,!155Review filter lists and compare_entities
Pipeline #56969 passed with warnings
...@@ -179,56 +179,68 @@ def getCommitIn(folder): ...@@ -179,56 +179,68 @@ def getCommitIn(folder):
return get_commit_in(folder) return get_commit_in(folder)
def compare_entities(old_entity: Entity, def compare_entities(entity0: Entity, entity1: Entity,
new_entity: Entity,
compare_referenced_records: bool = False compare_referenced_records: bool = False
) -> tuple[dict[str, Any], dict[str, Any]]: ) -> tuple[dict[str, Any], dict[str, Any]]:
"""Compare two entities. """Compare two entities.
The following attributes are compared: Returns two dicts listing the differences between the two entities. The
- parents order of the two returned dicts corresponds to the two input entities.
- properties The dicts contain two keys, 'parents' and 'properties'. The list saved
- all SPECIAL_ATTRIBUTES (see models.py) under the 'parents' key contains those parents of the respective entity
that are missing in the other entity, and the 'properties' dict contains
Returns a tuple of dictionaries, the first element in the tupel lists additional information of old properties and SPECIAL_ATTRIBUTES if they are missing or different from
entity, the second elements lists to additional information of new entity. their counterparts in the other entity.
Additional information means in detail: The value of the properties dict for each listed property is again a dict
- Additional parents: key="parents", value=a list of names of parents that are present in the respective entity detailing the differences between this property and its counterpart.
- Information about properties (key="properties", value=dict with property names as keys): The characteristics that are checked to determine whether two properties
- properties are included in the dict if they are missing in the other entity or if one of match are the following:
the following differs:
- datatype - datatype
- importance or - importance
- value - value
If any of these characteristics differ for a property, the respective
string (datatype, importance, value) is added as a key to the dict of the
property with its value being the characteristics value,
e.g. {"prop": {"value": 6, 'importance': 'SUGGESTED'}}.
If a property is of type LIST, the comparison is order-sensitive.
In case of changed information the value listed under the respective key is the Comparison of multi-properties is not yet supported, so should either
value that is stored in the respective entity. entity have several instances of one Property, the comparison is aborted
and an error is raised.
If `compare_referenced_records` is `True`, also referenced entities will be Two parents match, if TODO: get final decision on parent match criteria
compared using this function (which is then called with
`compare_referenced_records = False` to prevent infinite recursion in case
of circular references).
Parameters Should referenced records not be checked for equality between the entities
---------- but for equivalency, this is possible by setting the parameter
old_entity, new_entity : Entity compare_referenced_records.
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value. If set to `False`, only the corresponding Python
objects are compared which may lead to unexpected behavior when
identical records are stored in different objects. Default is False.
Params
------
entity0 : Entity
First entity to be compared.
entity1 : Entity
Second entity to be compared.
compare_referenced_records: bool, default: False
If set to True, corresponding referenced
records are not checked for equality but for
equivalency, using this function.
compare_referenced_records is set to False for
these recursive calls, so references of
references need to be equal. If set to `False`,
only the Python objects are compared, which may
lead to unexpected behavior.
""" """
olddiff: dict[str, Any] = {"properties": {}, "parents": []} diff = ({"properties": {}, "parents": []},
newdiff: dict[str, Any] = {"properties": {}, "parents": []} {"properties": {}, "parents": []})
if old_entity is new_entity: # ToDo: Is is wanted here? == would make more sense unless there are plans
return olddiff, newdiff # to do use this function in the equality check of entities
if entity0 is entity1:
return diff
if type(old_entity) is not type(new_entity): # ToDo: Same question as above, is not or !=
if type(entity0) is not type(entity1):
raise ValueError( raise ValueError(
"Comparison of different Entity types is not supported.") "Comparison of different Entity types is not supported.")
...@@ -237,135 +249,206 @@ def compare_entities(old_entity: Entity, ...@@ -237,135 +249,206 @@ def compare_entities(old_entity: Entity,
if attr == "value": if attr == "value":
continue continue
oldattr = old_entity.__getattribute__(attr) attr0 = entity0.__getattribute__(attr)
# we consider "" and None to be nonexistent # we consider "" and None to be nonexistent
old_entity_attr_na = (oldattr == "" or oldattr is None) attr0_unset = (attr0 == "" or attr0 is None)
newattr = new_entity.__getattribute__(attr) attr1 = entity1.__getattribute__(attr)
# we consider "" and None to be nonexistent # we consider "" and None to be nonexistent
new_entity_attr_na = (newattr == "" or newattr is None) attr1_unset = (attr1 == "" or attr1 is None)
# in both entities the current attribute is not set # in both entities the current attribute is not set
if old_entity_attr_na and new_entity_attr_na: if attr0_unset and attr1_unset:
continue continue
# treat datatype separately if one datatype is an object and the other # treat datatype separately if one datatype is an object and the other
# a string or int, and may be a name or id # a string or int, and therefore may be a name or id
if attr == "datatype": if attr == "datatype":
if not old_entity_attr_na and not new_entity_attr_na: if not attr0_unset and not attr1_unset:
if isinstance(oldattr, RecordType): if isinstance(attr0, RecordType):
if oldattr.name == newattr: if attr0.name == attr1:
continue continue
if oldattr.id == newattr: if str(attr0.id) == str(attr1):
continue continue
if isinstance(newattr, RecordType): if isinstance(attr1, RecordType):
if newattr.name == oldattr: if attr1.name == attr0:
continue continue
if newattr.id == oldattr: if str(attr1.id) == str(attr0):
continue continue
# add to diff if attribute has different values or is not set for one entity # add to diff if attr has different values or is not set for one entity
if ((old_entity_attr_na ^ new_entity_attr_na) if (attr0_unset != attr1_unset) or (attr0 != attr1):
or (oldattr != newattr)): if not attr0_unset:
if not old_entity_attr_na: diff[0][attr] = attr0
olddiff[attr] = oldattr if not attr1_unset:
diff[1][attr] = attr1
if not new_entity_attr_na:
newdiff[attr] = newattr
# compare value # compare value
if old_entity.value != new_entity.value: ent0_val, ent1_val = entity0.value, entity1.value
if ent0_val != ent1_val:
# though the values are not equal, they might be equivalent: # though the values are not equal, they might be equivalent:
same_value = False same_value = False
if compare_referenced_records: if compare_referenced_records:
# both values are scalar references: # both values are scalar references:
if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity): if isinstance(ent0_val, Entity) and isinstance(ent1_val, Entity):
# compare_referenced_records=False to prevent infinite recursion # comp_referenced_records=False to prevent infinite recursion
same_value = empty_diff( # ToDo: Consider keeping a list of traversed entities rather
old_entity.value, new_entity.value, compare_referenced_records=False) # than abort past first layer
same_value = empty_diff(ent0_val, ent1_val, False)
# both values are a list of references: # both values are a list of references:
elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list): elif isinstance(ent0_val, list) and isinstance(ent1_val, list):
# if all elements in both lists are entity objects, check each pair for equality # lists can't be the same if the lengths are different
# TODO: check whether mixed cases should be allowed or lead to an error if len(ent0_val) == len(ent1_val):
if (all([isinstance(x, Entity) for x in old_entity.value]) lists_match = True
and all([isinstance(x, Entity) for x in new_entity.value])): for val0, val1 in zip(ent0_val, ent1_val):
# lists can't be the same if the lengths are different if val0 == val1:
if len(old_entity.value) == len(new_entity.value): continue
# the lists are the same if the diffs of each entry pair are empty if (isinstance(val0, Entity)
same_value = all( and isinstance(val1, Entity)):
[empty_diff(x, y, False) for x, y if empty_diff(val0, val1, False):
in zip(old_entity.value, new_entity.value)]) continue
# ToDo: Henrik wasn't sure whether it would make sense
# to check int and str against an Entity id/name
# -> ask, but if uncommented the test
# test_merge_id_with_resolved_entity in
# test_apiutils fails
#if (isinstance(val0, Entity)
# and isinstance(val1, (int, str))):
# if (str(val0.id) == str(val1)
# or str(val0.name) == str(val1)):
# continue
#if (isinstance(val1, Entity)
# and isinstance(val0, (int, str))):
# if (str(val1.id) == str(val0)
# or str(val1.name) == str(val0)):
# continue
# val0 and val1 could not be matched
lists_match = False
break
if lists_match:
same_value = True
if not same_value: if not same_value:
olddiff["value"] = old_entity.value diff[0]["value"] = ent0_val
newdiff["value"] = new_entity.value diff[1]["value"] = ent1_val
# compare properties # compare properties
for prop in old_entity.properties: for prop in entity0.properties:
matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()] matching = entity1.properties.filter(prop, check_wrapped=False)
if len(matching) == 0: if len(matching) == 0:
# old_entity has prop, new_entity does not # entity1 has prop, entity0 does not
olddiff["properties"][prop.name] = {} diff[0]["properties"][prop.name] = {}
elif len(matching) ==1: elif len(matching) == 1:
olddiff["properties"][prop.name] = {} diff[0]["properties"][prop.name] = {}
newdiff["properties"][prop.name] = {} diff[1]["properties"][prop.name] = {}
oldpropdiff = olddiff["properties"][prop.name] propdiff = (diff[0]["properties"][prop.name],
newpropdiff = newdiff["properties"][prop.name] diff[1]["properties"][prop.name])
# recursive call to determine the differences between properties # Recursive call to determine the differences between properties
# ToDo: This can lead to infinite recursion if two properties have # ToDo: This can lead to infinite recursion if two properties have
# each other as subproperties # themselves or each other as subproperties - would also be
od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records) # fixed by keeping a list of traversed entities I believe
# as we do not care about parents and properties here, discard their entries # We should compare the wrapped properties instead of the
# TODO do we? # wrapping entities if possible:
comp1, comp2 = prop, matching[0]
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
# ToDo: compare_referenced_records is being ignored here, is this
# intended? If not, also fix in other places
od, nd = compare_entities(comp1, comp2, compare_referenced_records)
# ToDo: Do we care about parents and properties here?
# Note: If we can identify a subset of characteristics that we
# exclusively care about, maybe restrict the recursion to
# those checks at some point? Would be faster, and might
# replace the full recursions that we later call empty_diff
# on anyways
# as we do not care about parents and properties here,
# discard their entries
od.pop("parents") od.pop("parents")
od.pop("properties") od.pop("properties")
nd.pop("parents") nd.pop("parents")
nd.pop("properties") nd.pop("properties")
# use the remaining diff # use the remaining diff
oldpropdiff.update(od) propdiff[0].update(od)
newpropdiff.update(nd) propdiff[1].update(nd)
# As the importance of a property is an attribute of the record and not # As the importance of a property is an attribute of the record
# the property, it is not contained in the diff returned by compare_entities # and not the property, it is not contained in the diff returned
if old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name): # by compare_entities and needs to be added separately
oldpropdiff["importance"] = old_entity.get_importance(prop.name) if (entity0.get_importance(prop.name) !=
newpropdiff["importance"] = new_entity.get_importance(prop.name) entity1.get_importance(prop.name)):
propdiff[0]["importance"] = entity0.get_importance(prop.name)
propdiff[1]["importance"] = entity1.get_importance(prop.name)
# in case there is no difference, we remove the dict keys again # in case there is no difference, we remove the dict keys again
if len(newpropdiff) == 0 and len(oldpropdiff) == 0: if len(propdiff[0]) == 0 and len(propdiff[1]) == 0:
newdiff["properties"].pop(prop.name) diff[0]["properties"].pop(prop.name)
olddiff["properties"].pop(prop.name) diff[1]["properties"].pop(prop.name)
else: else:
raise NotImplementedError( raise NotImplementedError(
"Comparison not implemented for multi-properties.") "Comparison not implemented for multi-properties.")
# ToDo: Iterate and match analogously to parents?
# we have not yet compared properties that do not exist in old_entity
for prop in new_entity.properties: # we have not yet compared properties that do not exist in entity0
# check how often the property appears in old_entity for prop in entity1.properties:
num_old_prop = len([0 for p in old_entity.properties # check how often the property appears in entity0
if p.name.lower() == prop.name.lower()]) num_prop_in_ent0 = len(entity0.properties.filter(prop))
if num_old_prop == 0: if num_prop_in_ent0 == 0:
# property is only present in new_entity - add to diff # property is only present in entity0 - add to diff
newdiff["properties"][prop.name] = {} diff[1]["properties"][prop.name] = {}
if num_old_prop > 1: if num_prop_in_ent0 > 1:
# Check whether the property is present multiple times in old_entity # Check whether the property is present multiple times in entity0
# and raise error - result would be incorrect # and raise error - result would be incorrect
raise NotImplementedError( raise NotImplementedError(
"Comparison not implemented for multi-properties.") "Comparison not implemented for multi-properties.")
# compare parents # compare parents
# ToDo: Compare using filter function, compare inheritance level for RTs for index, parents, other_entity in [(0, entity0.parents, entity1),
# TODO we currently only use names for parents and property matching (1, entity1.parents, entity0)]:
for parent in old_entity.parents: for parent in parents:
if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0: # As parent is a wrapper entity, we also want the wrappers
olddiff["parents"].append(parent.name) matching = other_entity.parents.filter(parent, check_wrapped=False)
if len(matching) == 0:
for parent in new_entity.parents: diff[index]["parents"].append(parent.name)
if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0: continue
newdiff["parents"].append(parent.name) # ToDo: Should parents also have a differences dict?
# ToDo: What about duplicate parents? Is this already satisfactory?
return olddiff, newdiff # ToDo: Recheck whether matching id and name is intended to be a
# sufficient criterion for a parent match, if yes, delete
# this and uncomment+test the commented block below
match_found = False
for match in matching:
comp1, comp2 = parent, match
if (comp1._wrapped_entity is not None
and comp2._wrapped_entity is not None):
comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity
if empty_diff(comp1, comp2, False):
# For RecordType check inheritance, otherwise it's a match
if not isinstance(parent, RecordType):
match_found = True
break
elif (parent._flags['inheritance'] ==
match._flags['inheritance']):
match_found = True
break
if not match_found:
diff[index]["parents"].append(parent.name)
# if isinstance(parent, RecordType):
# # compare inheritance level only for RecordTypes
# matching_inheritance_found = False
# for potential in matching:
# if (parent._flags['inheritance'] ==
# potential._flags['inheritance']):
# matching_inheritance_found = True
# break
# if not matching_inheritance_found:
# # ToDo: How to indicate that this is a non-matching
# # importance level instead of a missing parent?
# diff[index]["parents"].append(parent.name)
return diff
def empty_diff(old_entity: Entity, new_entity: Entity, def empty_diff(old_entity: Entity, new_entity: Entity,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment