diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py index 5e25e3f5faff065ab9e5887d17fb0962398ce9d7..c571a849411322fc6b5fda890b89ed13cb5c8c1f 100644 --- a/src/linkahead/apiutils.py +++ b/src/linkahead/apiutils.py @@ -180,7 +180,8 @@ def getCommitIn(folder): def compare_entities(entity0: Entity, entity1: Entity, - compare_referenced_records: bool = False + compare_referenced_records: bool = False, + entity_name_id_equivalency: bool = False ) -> tuple[dict[str, Any], dict[str, Any]]: """Compare two entities. @@ -209,11 +210,12 @@ def compare_entities(entity0: Entity, entity1: Entity, entity have several instances of one Property, the comparison is aborted and an error is raised. - Two parents match, if TODO: get final decision on parent match criteria + Two parents match if their name and id are the same, any further + differences are ignored. - Should referenced records not be checked for equality between the entities - but for equivalency, this is possible by setting the parameter - compare_referenced_records. + Should records referenced in the value field not be checked for equality + between the entities but for equivalency, this is possible by setting the + parameter compare_referenced_records. Params ------ @@ -222,24 +224,39 @@ def compare_entities(entity0: Entity, entity1: Entity, entity1 : Entity Second entity to be compared. compare_referenced_records: bool, default: False - If set to True, corresponding referenced - records are not checked for equality but for - equivalency, using this function. + If set to True, values with referenced records + are not checked for equality but for + equivalency using this function. compare_referenced_records is set to False for these recursive calls, so references of references need to be equal. If set to `False`, only the Python objects are compared, which may lead to unexpected behavior. + entity_name_id_equivalency: bool, default: False + If set to True, the comparison between an + entity and an int or str also checks whether + the int/str matches the name or id of the + entity, so Entity(id=100) == 100 == "100". """ + # ToDo: Discuss intended behaviour + # Questions that need clarification: + # - What is intended behaviour for multi-properties and multi-parents? + # - Do different inheritance levels for parents count as a difference? + # - Do we care about parents and properties of properties? + # - Should there be a more detailed comparison of parents without id? + # Suggestions for enhancements: + # - For the comparison of entities in value and properties, consider + # keeping a list of traversed entities, not only look at first layer + # - Make the empty_diff functionality faster by adding a parameter to + # this function so that it returns after the first found difference? + # - Add parameter to restrict diff to some characteristics + diff = ({"properties": {}, "parents": []}, {"properties": {}, "parents": []}) - # ToDo: Is is wanted here? == would make more sense unless there are plans - # to do use this function in the equality check of entities if entity0 is entity1: return diff - # ToDo: Same question as above, is not or != if type(entity0) is not type(entity1): raise ValueError( "Comparison of different Entity types is not supported.") @@ -286,51 +303,47 @@ def compare_entities(entity0: Entity, entity1: Entity, # compare value ent0_val, ent1_val = entity0.value, entity1.value if ent0_val != ent1_val: - # though the values are not equal, they might be equivalent: same_value = False - if compare_referenced_records: - # both values are scalar references: - if isinstance(ent0_val, Entity) and isinstance(ent1_val, Entity): - # comp_referenced_records=False to prevent infinite recursion - # ToDo: Consider keeping a list of traversed entities rather - # than abort past first layer - same_value = empty_diff(ent0_val, ent1_val, False) - # both values are a list of references: - elif isinstance(ent0_val, list) and isinstance(ent1_val, list): - # lists can't be the same if the lengths are different - if len(ent0_val) == len(ent1_val): - lists_match = True - for val0, val1 in zip(ent0_val, ent1_val): - if val0 == val1: + + # Surround scalar values with a list to avoid code duplication - + # this way, the scalar values can be checked against special cases + # (compare refs, entity id equivalency etc.) in the list loop + if not isinstance(ent0_val, list) and not isinstance(ent1_val, list): + ent0_val, ent1_val = [ent0_val], [ent1_val] + + if isinstance(ent0_val, list) and isinstance(ent1_val, list): + # lists can't be the same if the lengths are different + if len(ent0_val) == len(ent1_val): + lists_match = True + for val0, val1 in zip(ent0_val, ent1_val): + if val0 == val1: + continue + # Compare Entities + if (compare_referenced_records and + isinstance(val0, Entity) and isinstance(val1, Entity)): + if empty_diff(val0, val1, False): continue + # Compare Entity name and id + if entity_name_id_equivalency: if (isinstance(val0, Entity) - and isinstance(val1, Entity)): - if empty_diff(val0, val1, False): + and isinstance(val1, (int, str))): + if (str(val0.id) == str(val1) + or str(val0.name) == str(val1)): + continue + if (isinstance(val1, Entity) + and isinstance(val0, (int, str))): + if (str(val1.id) == str(val0) + or str(val1.name) == str(val0)): continue - # ToDo: Henrik wasn't sure whether it would make sense - # to check int and str against an Entity id/name - # -> ask, but if uncommented the test - # test_merge_id_with_resolved_entity in - # test_apiutils fails - #if (isinstance(val0, Entity) - # and isinstance(val1, (int, str))): - # if (str(val0.id) == str(val1) - # or str(val0.name) == str(val1)): - # continue - #if (isinstance(val1, Entity) - # and isinstance(val0, (int, str))): - # if (str(val1.id) == str(val0) - # or str(val1.name) == str(val0)): - # continue - # val0 and val1 could not be matched - lists_match = False - break - if lists_match: - same_value = True + # val0 and val1 could not be matched + lists_match = False + break + if lists_match: + same_value = True if not same_value: - diff[0]["value"] = ent0_val - diff[1]["value"] = ent1_val + diff[0]["value"] = entity0.value + diff[1]["value"] = entity1.value # compare properties for prop in entity0.properties: @@ -344,27 +357,17 @@ def compare_entities(entity0: Entity, entity1: Entity, propdiff = (diff[0]["properties"][prop.name], diff[1]["properties"][prop.name]) - # Recursive call to determine the differences between properties - # ToDo: This can lead to infinite recursion if two properties have - # themselves or each other as subproperties - would also be - # fixed by keeping a list of traversed entities I believe # We should compare the wrapped properties instead of the # wrapping entities if possible: comp1, comp2 = prop, matching[0] if (comp1._wrapped_entity is not None and comp2._wrapped_entity is not None): comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity - # ToDo: compare_referenced_records is being ignored here, is this - # intended? If not, also fix in other places + # Recursive call to determine the differences between properties + # Note: Can lead to infinite recursion if two properties have + # themselves or each other as subproperties od, nd = compare_entities(comp1, comp2, compare_referenced_records) - # ToDo: Do we care about parents and properties here? - # Note: If we can identify a subset of characteristics that we - # exclusively care about, maybe restrict the recursion to - # those checks at some point? Would be faster, and might - # replace the full recursions that we later call empty_diff - # on anyways - # as we do not care about parents and properties here, - # discard their entries + # We do not care about parents and properties here, discard od.pop("parents") od.pop("properties") nd.pop("parents") @@ -389,7 +392,6 @@ def compare_entities(entity0: Entity, entity1: Entity, else: raise NotImplementedError( "Comparison not implemented for multi-properties.") - # ToDo: Iterate and match analogously to parents? # we have not yet compared properties that do not exist in entity0 for prop in entity1.properties: @@ -408,45 +410,10 @@ def compare_entities(entity0: Entity, entity1: Entity, for index, parents, other_entity in [(0, entity0.parents, entity1), (1, entity1.parents, entity0)]: for parent in parents: - # As parent is a wrapper entity, we also want the wrappers - matching = other_entity.parents.filter(parent, check_wrapped=False) + matching = other_entity.parents.filter(parent) if len(matching) == 0: diff[index]["parents"].append(parent.name) continue - # ToDo: Should parents also have a differences dict? - # ToDo: What about duplicate parents? Is this already satisfactory? - # ToDo: Recheck whether matching id and name is intended to be a - # sufficient criterion for a parent match, if yes, delete - # this and uncomment+test the commented block below - match_found = False - for match in matching: - comp1, comp2 = parent, match - if (comp1._wrapped_entity is not None - and comp2._wrapped_entity is not None): - comp1, comp2 = comp1._wrapped_entity, comp2._wrapped_entity - if empty_diff(comp1, comp2, False): - # For RecordType check inheritance, otherwise it's a match - if not isinstance(parent, RecordType): - match_found = True - break - elif (parent._flags['inheritance'] == - match._flags['inheritance']): - match_found = True - break - if not match_found: - diff[index]["parents"].append(parent.name) - # if isinstance(parent, RecordType): - # # compare inheritance level only for RecordTypes - # matching_inheritance_found = False - # for potential in matching: - # if (parent._flags['inheritance'] == - # potential._flags['inheritance']): - # matching_inheritance_found = True - # break - # if not matching_inheritance_found: - # # ToDo: How to indicate that this is a non-matching - # # importance level instead of a missing parent? - # diff[index]["parents"].append(parent.name) return diff @@ -579,6 +546,8 @@ def merge_entities(entity_a: Entity, else: same = _same_id_as_resolved_entity(this, that) if same is True: + setattr(entity_a.get_property(key), attribute, + diff_r2["properties"][key][attribute]) raise_error = False if raise_error is True: raise EntityMergeConflictError( diff --git a/src/linkahead/common/models.py b/src/linkahead/common/models.py index b4c28ad8f88198a87f92ebed9a5e14c7eac506e9..ee42e06a8baac03f0e4ffbfdf9a30626f4b0d9ab 100644 --- a/src/linkahead/common/models.py +++ b/src/linkahead/common/models.py @@ -2523,8 +2523,7 @@ class PropertyList(list): return xml2str(xml) def filter(self, prop:Property = None, pid:Union[str, int] = None, - name:str = None, check_equality: bool = False, - check_wrapped: bool = True) -> list: + name:str = None, check_wrapped: bool = True) -> list: """ Return all Properties from the given PropertyList that match the selection criteria. @@ -2551,10 +2550,6 @@ class PropertyList(list): Property ID to match name : str Property name to match - check_equality : bool, default: False - If set to True, potential matches will be checked - using the equality operator instead of ID and name. - Will be ignored if the prop parameter is not set. check_wrapped : bool, default: True If set to False, only the wrapper elements contained in the given PropertyList will be @@ -2566,7 +2561,7 @@ class PropertyList(list): List containing all matching Properties """ return _filter_entity_list(self, pid=pid, name=name, entity=prop, - check_equality=check_equality, check_wrapped=check_wrapped) + check_wrapped=check_wrapped) def _get_entity_by_cuid(self, cuid: str): ''' @@ -2699,8 +2694,7 @@ class ParentList(list): return xml2str(xml) def filter(self, parent:Parent = None, pid:Union[str, int] = None, - name:str = None, check_equality: bool = False, - check_wrapped: bool = True) -> list: + name:str = None, check_wrapped: bool = True) -> list: """ Return all Parents from the given ParentList that match the selection criteria. @@ -2727,10 +2721,6 @@ class ParentList(list): name : str Parent name to match simultaneously with ID or name. - check_equality : bool, default: False - If set to True, potential matches will be checked - using the equality operator instead of ID and name. - Will be ignored if the entity parameter is not set. check_wrapped : bool, default: True If set to False, only the wrapper elements contained in the given ParentList will be @@ -2742,7 +2732,7 @@ class ParentList(list): List containing all matching Parents """ return _filter_entity_list(self, pid=pid, name=name, entity=parent, - check_equality=check_equality, check_wrapped=check_wrapped) + check_wrapped=check_wrapped) def remove(self, parent: Union[Entity, int, str]): """ @@ -5509,8 +5499,7 @@ def delete(ids: Union[list[int], range], raise_exception_on_error: bool = True): def _filter_entity_list(listobject, entity:Entity = None, pid:Union[str, int] = None, - name:str = None, check_equality:bool = False, - check_wrapped:bool = True) -> list: + name:str = None, check_wrapped:bool = True) -> list: """ Return all elements from the given list that match the selection criteria. @@ -5519,7 +5508,8 @@ def _filter_entity_list(listobject, entity:Entity = None, pid:Union[str, int] = returned. If an Entity is given, neither name nor ID may be set. In this case, only - elements matching both name and ID of the Entity are returned. + elements matching both name and ID of the Entity are returned, as long as + name and ID are both set. In case the elements contained in the given list are wrapped, the function in its default configuration checks both the wrapped and wrapper Entity @@ -5538,10 +5528,6 @@ def _filter_entity_list(listobject, entity:Entity = None, pid:Union[str, int] = Entity ID to match name : str Entity name to match - check_equality : bool, default: False - If set to True, potential matches will be checked - using the equality operator instead of ID and name. - Will be ignored if the entity parameter is not set. check_wrapped : bool, default: True If set to False, only the wrapper elements contained in the given list will be checked and @@ -5560,8 +5546,6 @@ def _filter_entity_list(listobject, entity:Entity = None, pid:Union[str, int] = pid = entity.id name = entity.name match_entity = True - else: - check_equality = False # Iterate through list and match based on given criteria matches = [] @@ -5582,31 +5566,25 @@ def _filter_entity_list(listobject, entity:Entity = None, pid:Union[str, int] = except: pass - # If indicated, only consider equality - if check_equality: - if candidate == entity: - matches.append(candidate) - elif original_candidate is not None: - potentials.append((original_candidate, True)) - continue - # Otherwise, check whether name/pid match + # Check whether name/pid match # pid and candidate.id might be int and str, so cast to str (None safe) if pid is not None and str(candidate.id) == str(pid): pid_match = True - elif match_entity and str(candidate.id) == str(pid): - # If we are matching the entity, both being Null is also ok + elif match_entity and pid is None: + # Without a pid we cannot match pid_match = True if (name is not None and candidate.name is not None and candidate.name.lower() == name.lower()): name_match = True - elif match_entity and candidate.name == name: - # If we are matching the entity, both being Null is also ok + elif match_entity and name is None: + # Without a name we cannot match name_match = True # If the criteria are satisfied, append the match. Otherwise, check # the wrapper if applicable - # ToDo: Check whether it would make sense to also check RecordType - # for match_entity + # ToDo: Check whether it would make sense to also check the RecordType + # for equality when match_entity is true to offset potentially + # missing id if name_match and pid_match: matches.append(candidate) elif not match_entity and (name_match or pid_match): diff --git a/unittests/test_entity.py b/unittests/test_entity.py index ff2c79ead1e32e394e3b003ff239ccc3cbf57c1e..8a41ca6fe6025b4ea45334e493215e730be5d895 100644 --- a/unittests/test_entity.py +++ b/unittests/test_entity.py @@ -201,21 +201,17 @@ def test_filter(): t.add_property(p3) assert p1 in t_props.filter(pid=100) assert p1 in t_props.filter(pid="100") - assert p1 in t_props.filter(p1, check_equality=True) assert p1 not in t_props.filter(pid=101, name="RT") for entity in [rt1, p2, r1, r2]: assert entity not in t_props.filter(pid=100) assert p1 in t_props.filter(entity) - assert p1 not in t_props.filter(entity, check_equality=True) # Check that direct addition (not wrapped) works t_props.append(p2) assert p2 in t_props.filter(pid=100) - assert p2 in t_props.filter(p2, check_equality=True) assert p2 not in t_props.filter(pid=101, name="RT") for entity in [rt1, r1, r2]: assert entity not in t_props.filter(pid=100) assert p2 in t_props.filter(entity) - assert p2 not in t_props.filter(entity, check_equality=True) ## Parents # Filtering with both name and id @@ -236,12 +232,9 @@ def test_filter(): t.add_parent(ent) for ent in test_ents: assert ent in t_pars.filter(ent) - assert ent in t_pars.filter(ent, check_equality=True) for ent in [rt1, p1, p2, r1, r2]: filtered = t_pars.filter(ent) for ent2 in [rt1, p1, p2, r1, r2]: assert ent2 in filtered assert ent in t_pars.filter(pid=100) assert ent in t_pars.filter(pid="100") - # ToDo: Check whether duplicates are wanted and write tests for the - # desirable outcome