diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py index d70f937feb4aa980eb3e69767c83e2dd28cdcfd1..fab8c14ff84d733c7e96013e0fcc8e643eb1dd2b 100644 --- a/src/linkahead/apiutils.py +++ b/src/linkahead/apiutils.py @@ -183,7 +183,7 @@ def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False ) -> tuple[dict[str, Any], dict[str, Any]]: - """Compare two entities. + """Compare two entites. The following attributes are compared: - parents @@ -200,7 +200,7 @@ def compare_entities(old_entity: Entity, the following differs: - datatype - importance or - - value + - value (not implemented yet) In case of changed information the value listed under the respective key is the value that is stored in the respective entity. @@ -226,33 +226,31 @@ def compare_entities(old_entity: Entity, newdiff: dict[str, Any] = {"properties": {}, "parents": []} if old_entity is new_entity: - return olddiff, newdiff + return (olddiff, newdiff) if type(old_entity) is not type(new_entity): raise ValueError( "Comparison of different Entity types is not supported.") - # compare special attributes + ### compare special attributes ### for attr in SPECIAL_ATTRIBUTES: if attr == "value": continue - oldattr = old_entity.__getattribute__(attr) - # we consider "" and None to be nonexistent + # we considere "" and None as nonexistent old_entity_attr_na = (oldattr == "" or oldattr is None) newattr = new_entity.__getattribute__(attr) - # we consider "" and None to be nonexistent + # we considere "" and None as nonexistent new_entity_attr_na = (newattr == "" or newattr is None) - # in both entities the current attribute is not set + # both unset if old_entity_attr_na and new_entity_attr_na: continue - # treat datatype separately if one datatype is an object and the other - # a string or int, and may be a name or id + # treat datatype separately: if datatype is an object on one side and string on the other. if attr == "datatype": - if not old_entity_attr_na and not new_entity_attr_na: + if (not old_entity_attr_na and not new_entity_attr_na): if isinstance(oldattr, RecordType): if oldattr.name == newattr: continue @@ -264,7 +262,8 @@ def compare_entities(old_entity: Entity, if newattr.id == oldattr: continue - # add to diff if attribute has different values or is not set for one entity + + # only one set or different values if ((old_entity_attr_na ^ new_entity_attr_na) or (oldattr != newattr)): if not old_entity_attr_na: @@ -273,25 +272,27 @@ def compare_entities(old_entity: Entity, if not new_entity_attr_na: newdiff[attr] = newattr - # compare value - if old_entity.value != new_entity.value: - # though the values are not equal, they might be equivalent: + + # value + if (old_entity.value != new_entity.value): + # basic comparison of value objects says they are different same_value = False if compare_referenced_records: - # both values are scalar references: + # scalar reference if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity): - # compare_referenced_records=False to prevent infinite recursion + # explicitely not recursive to prevent infinite recursion same_value = empty_diff( old_entity.value, new_entity.value, compare_referenced_records=False) - # both values are a list of references: + # list of references elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list): - # if all elements in both lists are entity objects, check each pair for equality - # TODO: check whether mixed cases should be allowed or lead to an error + # all elements in both lists actually are entity objects + # TODO: check, whether mixed cases can be allowed or should lead to an error if (all([isinstance(x, Entity) for x in old_entity.value]) and all([isinstance(x, Entity) for x in new_entity.value])): - # lists can't be the same if the lengths are different + # can't be the same if the lengths are different if len(old_entity.value) == len(new_entity.value): - # the lists are the same if the diffs of each entry pair are empty + # do a one-by-one comparison: + # the values are the same if all diffs are empty same_value = all( [empty_diff(x, y, False) for x, y in zip(old_entity.value, new_entity.value)]) @@ -300,24 +301,21 @@ def compare_entities(old_entity: Entity, olddiff["value"] = old_entity.value newdiff["value"] = new_entity.value - # compare properties + # properties for prop in old_entity.properties: matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()] if len(matching) == 0: - # old_entity has prop, new_entity does not - olddiff["properties"][prop.name] = {} + # old has prop and new does not + olddiff["properties"][prop.name] = {} elif len(matching) ==1: - olddiff["properties"][prop.name] = {} - newdiff["properties"][prop.name] = {} + olddiff["properties"][prop.name] = {} + newdiff["properties"][prop.name] = {} oldpropdiff = olddiff["properties"][prop.name] newpropdiff = newdiff["properties"][prop.name] - # recursive call to determine the differences between properties - # ToDo: This can lead to infinite recursion if two properties have - # each other as subproperties + # use compare function detect difference of properties od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records) - # as we do not care about parents and properties here, discard their entries - # TODO do we? + # we do not care about parents and properties here # TODO do we? od.pop("parents") od.pop("properties") nd.pop("parents") @@ -326,28 +324,27 @@ def compare_entities(old_entity: Entity, oldpropdiff.update(od) newpropdiff.update(nd) - # As the importance of a property is an attribute of the record and not - # the property, it is not contained in the diff returned by compare_entities - if old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name): - oldpropdiff["importance"] = old_entity.get_importance(prop.name) - newpropdiff["importance"] = new_entity.get_importance(prop.name) + # importance is associated with the record. So do it extra + if (old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name)): + oldpropdiff["importance"] = old_entity.get_importance(prop.name) + newpropdiff["importance"] = new_entity.get_importance(prop.name) - # in case there is no difference, we remove the dict keys again - if len(newpropdiff) == 0 and len(oldpropdiff) == 0: + # in case there was actually no difference, we remove the dict keys again + if (len(newpropdiff) == 0 and len(oldpropdiff) == 0): newdiff["properties"].pop(prop.name) olddiff["properties"].pop(prop.name) else: raise NotImplementedError( "Comparison not implemented for multi-properties.") - # we have not yet compared properties that do not exist in old_entity + # add the properties that are only present in new for prop in new_entity.properties: if len([0 for p in old_entity.properties if p.name.lower() == prop.name.lower()]) == 0: newdiff["properties"][prop.name] = {} - # compare parents - # ToDo: Compare using filter function, compare inheritance level for RTs - # TODO we currently only use names for parents and property matching + # parents + + # TODO we only use names for parents and property matching for parent in old_entity.parents: if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0: olddiff["parents"].append(parent.name) @@ -356,7 +353,7 @@ def compare_entities(old_entity: Entity, if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0: newdiff["parents"].append(parent.name) - return olddiff, newdiff + return (olddiff, newdiff) def empty_diff(old_entity: Entity, new_entity: Entity,