diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py index c2c687f1b3e5a175ce1c06653f198aa58b2eac63..7ffd8ebb7baf2f2f9dd8bc0adb14fd9ff771cb42 100644 --- a/src/linkahead/apiutils.py +++ b/src/linkahead/apiutils.py @@ -179,56 +179,68 @@ def getCommitIn(folder): return get_commit_in(folder) -def compare_entities(old_entity: Entity, - new_entity: Entity, +def compare_entities(entity0: Entity, entity1: Entity, compare_referenced_records: bool = False ) -> tuple[dict[str, Any], dict[str, Any]]: """Compare two entities. - The following attributes are compared: - - parents - - properties - - all SPECIAL_ATTRIBUTES (see models.py) - - Returns a tuple of dictionaries, the first element in the tupel lists additional information of old - entity, the second elements lists to additional information of new entity. - - Additional information means in detail: - - Additional parents: key="parents", value=a list of names of parents that are present in the respective entity - - Information about properties (key="properties", value=dict with property names as keys): - - properties are included in the dict if they are missing in the other entity or if one of - the following differs: + Returns two dicts listing the differences between the two entities. The + order of the two returned dicts corresponds to the two input entities. + The dicts contain two keys, 'parents' and 'properties'. The list saved + under the 'parents' key contains those parents of the respective entity + that are missing in the other entity, and the 'properties' dict contains + properties and SPECIAL_ATTRIBUTES if they are missing or different from + their counterparts in the other entity. + + The value of the properties dict for each listed property is again a dict + detailing the differences between this property and its counterpart. + The characteristics that are checked to determine whether two properties + match are the following: - datatype - - importance or + - importance - value + If any of these characteristics differ for a property, the respective + string (datatype, importance, value) is added as a key to the dict of the + property with its value being the characteristics value, + e.g. {"prop": {"value": 6, 'importance': 'SUGGESTED'}}. + If a property is of type LIST, the comparison is order-sensitive. - In case of changed information the value listed under the respective key is the - value that is stored in the respective entity. + Comparison of multi-properties is not yet supported, so should either + entity have several instances of one Property, the comparison is aborted + and an error is raised. - If `compare_referenced_records` is `True`, also referenced entities will be - compared using this function (which is then called with - `compare_referenced_records = False` to prevent infinite recursion in case - of circular references). + Two parents match, if TODO: get final decision on parent match criteria - Parameters - ---------- - old_entity, new_entity : Entity - Entities to be compared - compare_referenced_records : bool, optional - Whether to compare referenced records in case of both, `old_entity` and - `new_entity`, have the same reference properties and both have a Record - object as value. If set to `False`, only the corresponding Python - objects are compared which may lead to unexpected behavior when - identical records are stored in different objects. Default is False. + Should referenced records not be checked for equality between the entities + but for equivalency, this is possible by setting the parameter + compare_referenced_records. + Params + ------ + entity0 : Entity + First entity to be compared. + entity1 : Entity + Second entity to be compared. + compare_referenced_records: bool, default: False + If set to True, corresponding referenced + records are not checked for equality but for + equivalency, using this function. + compare_referenced_records is set to False for + these recursive calls, so references of + references need to be equal. If set to `False`, + only the Python objects are compared, which may + lead to unexpected behavior. """ - olddiff: dict[str, Any] = {"properties": {}, "parents": []} - newdiff: dict[str, Any] = {"properties": {}, "parents": []} + diff = ({"properties": {}, "parents": []}, + {"properties": {}, "parents": []}) - if old_entity is new_entity: - return olddiff, newdiff + # ToDo: Is is wanted here? == would make more sense unless there are plans + # to do use this function in the equality check of entities + if entity0 is entity1: + return diff - if type(old_entity) is not type(new_entity): + # ToDo: Same question as above, is not or != + if type(entity0) is not type(entity1): raise ValueError( "Comparison of different Entity types is not supported.") @@ -237,135 +249,206 @@ def compare_entities(old_entity: Entity, if attr == "value": continue - oldattr = old_entity.__getattribute__(attr) + attr0 = entity0.__getattribute__(attr) # we consider "" and None to be nonexistent - old_entity_attr_na = (oldattr == "" or oldattr is None) + attr0_unset = (attr0 == "" or attr0 is None) - newattr = new_entity.__getattribute__(attr) + attr1 = entity1.__getattribute__(attr) # we consider "" and None to be nonexistent - new_entity_attr_na = (newattr == "" or newattr is None) + attr1_unset = (attr1 == "" or attr1 is None) # in both entities the current attribute is not set - if old_entity_attr_na and new_entity_attr_na: + if attr0_unset and attr1_unset: continue # treat datatype separately if one datatype is an object and the other - # a string or int, and may be a name or id + # a string or int, and therefore may be a name or id if attr == "datatype": - if not old_entity_attr_na and not new_entity_attr_na: - if isinstance(oldattr, RecordType): - if oldattr.name == newattr: + if not attr0_unset and not attr1_unset: + if isinstance(attr0, RecordType): + if attr0.name == attr1: continue - if oldattr.id == newattr: + if str(attr0.id) == str(attr1): continue - if isinstance(newattr, RecordType): - if newattr.name == oldattr: + if isinstance(attr1, RecordType): + if attr1.name == attr0: continue - if newattr.id == oldattr: + if str(attr1.id) == str(attr0): continue - # add to diff if attribute has different values or is not set for one entity - if ((old_entity_attr_na ^ new_entity_attr_na) - or (oldattr != newattr)): - if not old_entity_attr_na: - olddiff[attr] = oldattr - - if not new_entity_attr_na: - newdiff[attr] = newattr + # add to diff if attr has different values or is not set for one entity + if (attr0_unset != attr1_unset) or (attr0 != attr1): + if not attr0_unset: + diff[0][attr] = attr0 + if not attr1_unset: + diff[1][attr] = attr1 # compare value - if old_entity.value != new_entity.value: + ent0_val, ent1_val = entity0.value, entity1.value + if ent0_val != ent1_val: # though the values are not equal, they might be equivalent: same_value = False if compare_referenced_records: # both values are scalar references: - if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity): - # compare_referenced_records=False to prevent infinite recursion - same_value = empty_diff( - old_entity.value, new_entity.value, compare_referenced_records=False) + if isinstance(ent0_val, Entity) and isinstance(ent1_val, Entity): + # comp_referenced_records=False to prevent infinite recursion + # ToDo: Consider keeping a list of traversed entities rather + # than abort past first layer + same_value = empty_diff(ent0_val, ent1_val, False) # both values are a list of references: - elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list): - # if all elements in both lists are entity objects, check each pair for equality - # TODO: check whether mixed cases should be allowed or lead to an error - if (all([isinstance(x, Entity) for x in old_entity.value]) - and all([isinstance(x, Entity) for x in new_entity.value])): - # lists can't be the same if the lengths are different - if len(old_entity.value) == len(new_entity.value): - # the lists are the same if the diffs of each entry pair are empty - same_value = all( - [empty_diff(x, y, False) for x, y - in zip(old_entity.value, new_entity.value)]) + elif isinstance(ent0_val, list) and isinstance(ent1_val, list): + # lists can't be the same if the lengths are different + if len(ent0_val) == len(ent1_val): + lists_match = True + for val0, val1 in zip(ent0_val, ent1_val): + if val0 == val1: + continue + if (isinstance(val0, Entity) + and isinstance(val1, Entity)): + if empty_diff(val0, val1, False): + continue + # ToDo: Henrik wasn't sure whether it would make sense + # to check int and str against an Entity id/name + # -> ask, but if uncommented the test + # test_merge_id_with_resolved_entity in + # test_apiutils fails + #if (isinstance(val0, Entity) + # and isinstance(val1, (int, str))): + # if (str(val0.id) == str(val1) + # or str(val0.name) == str(val1)): + # continue + #if (isinstance(val1, Entity) + # and isinstance(val0, (int, str))): + # if (str(val1.id) == str(val0) + # or str(val1.name) == str(val0)): + # continue + # val0 and val1 could not be matched + lists_match = False + break + if lists_match: + same_value = True if not same_value: - olddiff["value"] = old_entity.value - newdiff["value"] = new_entity.value + diff[0]["value"] = ent0_val + diff[1]["value"] = ent1_val # compare properties - for prop in old_entity.properties: - matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()] + for prop in entity0.properties: + matching = entity1.properties.filter(prop, check_wrapped=False) if len(matching) == 0: - # old_entity has prop, new_entity does not - olddiff["properties"][prop.name] = {} - elif len(matching) ==1: - olddiff["properties"][prop.name] = {} - newdiff["properties"][prop.name] = {} - oldpropdiff = olddiff["properties"][prop.name] - newpropdiff = newdiff["properties"][prop.name] - - # recursive call to determine the differences between properties + # entity1 has prop, entity0 does not + diff[0]["properties"][prop.name] = {} + elif len(matching) == 1: + diff[0]["properties"][prop.name] = {} + diff[1]["properties"][prop.name] = {} + propdiff = (diff[0]["properties"][prop.name], + diff[1]["properties"][prop.name]) + + # Recursive call to determine the differences between properties # ToDo: This can lead to infinite recursion if two properties have - # each other as subproperties - od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records) - # as we do not care about parents and properties here, discard their entries - # TODO do we? + # themselves or each other as subproperties - would also be + # fixed by keeping a list of traversed entities I believe + # We should compare the wrapped properties instead of the + # wrapping entities if possible: + comp1, comp2 = prop, matching[0] + if (comp1._wrapped_entity is not None + and comp2._wrapped_entity is not None): + comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity + # ToDo: compare_referenced_records is being ignored here, is this + # intended? If not, also fix in other places + od, nd = compare_entities(comp1, comp2, compare_referenced_records) + # ToDo: Do we care about parents and properties here? + # Note: If we can identify a subset of characteristics that we + # exclusively care about, maybe restrict the recursion to + # those checks at some point? Would be faster, and might + # replace the full recursions that we later call empty_diff + # on anyways + # as we do not care about parents and properties here, + # discard their entries od.pop("parents") od.pop("properties") nd.pop("parents") nd.pop("properties") # use the remaining diff - oldpropdiff.update(od) - newpropdiff.update(nd) + propdiff[0].update(od) + propdiff[1].update(nd) - # As the importance of a property is an attribute of the record and not - # the property, it is not contained in the diff returned by compare_entities - if old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name): - oldpropdiff["importance"] = old_entity.get_importance(prop.name) - newpropdiff["importance"] = new_entity.get_importance(prop.name) + # As the importance of a property is an attribute of the record + # and not the property, it is not contained in the diff returned + # by compare_entities and needs to be added separately + if (entity0.get_importance(prop.name) != + entity1.get_importance(prop.name)): + propdiff[0]["importance"] = entity0.get_importance(prop.name) + propdiff[1]["importance"] = entity1.get_importance(prop.name) # in case there is no difference, we remove the dict keys again - if len(newpropdiff) == 0 and len(oldpropdiff) == 0: - newdiff["properties"].pop(prop.name) - olddiff["properties"].pop(prop.name) + if len(propdiff[0]) == 0 and len(propdiff[1]) == 0: + diff[0]["properties"].pop(prop.name) + diff[1]["properties"].pop(prop.name) + else: raise NotImplementedError( "Comparison not implemented for multi-properties.") - - # we have not yet compared properties that do not exist in old_entity - for prop in new_entity.properties: - # check how often the property appears in old_entity - num_old_prop = len([0 for p in old_entity.properties - if p.name.lower() == prop.name.lower()]) - if num_old_prop == 0: - # property is only present in new_entity - add to diff - newdiff["properties"][prop.name] = {} - if num_old_prop > 1: - # Check whether the property is present multiple times in old_entity + # ToDo: Iterate and match analogously to parents? + + # we have not yet compared properties that do not exist in entity0 + for prop in entity1.properties: + # check how often the property appears in entity0 + num_prop_in_ent0 = len(entity0.properties.filter(prop)) + if num_prop_in_ent0 == 0: + # property is only present in entity0 - add to diff + diff[1]["properties"][prop.name] = {} + if num_prop_in_ent0 > 1: + # Check whether the property is present multiple times in entity0 # and raise error - result would be incorrect raise NotImplementedError( "Comparison not implemented for multi-properties.") # compare parents - # ToDo: Compare using filter function, compare inheritance level for RTs - # TODO we currently only use names for parents and property matching - for parent in old_entity.parents: - if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0: - olddiff["parents"].append(parent.name) - - for parent in new_entity.parents: - if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0: - newdiff["parents"].append(parent.name) - - return olddiff, newdiff + for index, parents, other_entity in [(0, entity0.parents, entity1), + (1, entity1.parents, entity0)]: + for parent in parents: + # As parent is a wrapper entity, we also want the wrappers + matching = other_entity.parents.filter(parent, check_wrapped=False) + if len(matching) == 0: + diff[index]["parents"].append(parent.name) + continue + # ToDo: Should parents also have a differences dict? + # ToDo: What about duplicate parents? Is this already satisfactory? + # ToDo: Recheck whether matching id and name is intended to be a + # sufficient criterion for a parent match, if yes, delete + # this and uncomment+test the commented block below + match_found = False + for match in matching: + comp1, comp2 = parent, match + if (comp1._wrapped_entity is not None + and comp2._wrapped_entity is not None): + comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity + if empty_diff(comp1, comp2, False): + # For RecordType check inheritance, otherwise it's a match + if not isinstance(parent, RecordType): + match_found = True + break + elif (parent._flags['inheritance'] == + match._flags['inheritance']): + match_found = True + break + if not match_found: + diff[index]["parents"].append(parent.name) + # if isinstance(parent, RecordType): + # # compare inheritance level only for RecordTypes + # matching_inheritance_found = False + # for potential in matching: + # if (parent._flags['inheritance'] == + # potential._flags['inheritance']): + # matching_inheritance_found = True + # break + # if not matching_inheritance_found: + # # ToDo: How to indicate that this is a non-matching + # # importance level instead of a missing parent? + # diff[index]["parents"].append(parent.name) + + return diff def empty_diff(old_entity: Entity, new_entity: Entity,