Skip to content
Snippets Groups Projects
Commit 58677097 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-fix-merge-entity-conflicts' into 'dev'

F fix merge entity conflicts

See merge request !72
parents f05dca21 f85dc20e
No related branches found
No related tags found
2 merge requests!79Release 0.10.0,!72F fix merge entity conflicts
Pipeline #29877 passed
......@@ -9,8 +9,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ###
* `apiutils.empty_diff` function that returns `True` if the diffs of two
entities found with the `compare_entitis` function are empty, `False`
otherwise.
### Changed ###
* `apiutils.compare_entities` now has an optional `compare_referenced_records`
argument to compare referenced Entities recursively (fomerly, only the
referenced Python objects would be compared). The default is `False` to
recover the original behavior.
* `apiutils.merge_entities` now has an optional
`merge_references_with_empty_diffs` argument that determines whether a merge
of two entities will be performed if they reference identical records (w.r.t
th above `empty_diff` function). Formerly this would have caused a merge
conflict if the referenced record(s) were identical, but stored in different
Python objects.
### Deprecated ###
### Removed ###
......
......@@ -188,9 +188,8 @@ def getCommitIn(folder):
return t.readline().strip()
def compare_entities(old_entity: Entity, new_entity: Entity):
"""
Compare two entites.
def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False):
"""Compare two entites.
Return a tuple of dictionaries, the first index belongs to additional information for old
entity, the second index belongs to additional information for new entity.
......@@ -204,6 +203,23 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
- ... value (not implemented yet)
In case of changed information the value listed under the respective key shows the
value that is stored in the respective entity.
If `compare_referenced_records` is `True`, also referenced entities will be
compared using this function (which is then called with
`compare_referenced_records = False` to prevent infinite recursion in case
of circular references).
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value. If set to `False`, only the corresponding Python
objects are compared which may lead to unexpected behavior when
identical records are stored in different objects. Default is False.
"""
olddiff: Dict[str, Any] = {"properties": {}, "parents": []}
newdiff: Dict[str, Any] = {"properties": {}, "parents": []}
......@@ -270,9 +286,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
matching[0].unit
if (prop.value != matching[0].value):
olddiff["properties"][prop.name]["value"] = prop.value
newdiff["properties"][prop.name]["value"] = \
matching[0].value
# basic comparison of value objects says they are different
same_value = False
if compare_referenced_records:
# scalar reference
if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity):
# explicitely not recursive to prevent infinite recursion
same_value = empty_diff(
prop.value, matching[0].value, compare_referenced_records=False)
# list of references
elif isinstance(prop.value, list) and isinstance(matching[0].value, list):
# all elements in both lists actually are entity objects
# TODO: check, whether mixed cases can be allowed or should lead to an error
if all([isinstance(x, Entity) for x in prop.value]) and all([isinstance(x, Entity) for x in matching[0].value]):
# can't be the same if the lengths are different
if len(prop.value) == len(matching[0].value):
# do a one-by-one comparison; the values are the same, if all diffs are empty
same_value = all(
[empty_diff(x, y, False) for x, y in zip(prop.value, matching[0].value)])
if not same_value:
olddiff["properties"][prop.name]["value"] = prop.value
newdiff["properties"][prop.name]["value"] = \
matching[0].value
if (len(newdiff["properties"][prop.name]) == 0
and len(olddiff["properties"][prop.name]) == 0):
......@@ -300,7 +336,32 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
return (olddiff, newdiff)
def merge_entities(entity_a: Entity, entity_b: Entity):
def empty_diff(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False):
"""Check whether the `compare_entities` found any differences between
old_entity and new_entity.
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value.
"""
olddiff, newdiff = compare_entities(
old_entity, new_entity, compare_referenced_records)
for diff in [olddiff, newdiff]:
for key in diff:
if len(diff[key]) > 0:
# There is a difference somewhere in the diff
return False
# all elements of the two diffs were empty
return True
def merge_entities(entity_a: Entity, entity_b: Entity, merge_references_with_empty_diffs=True):
"""
Merge entity_b into entity_a such that they have the same parents and properties.
......@@ -314,13 +375,30 @@ def merge_entities(entity_a: Entity, entity_b: Entity):
Returns entity_a.
WARNING: This function is currently experimental and insufficiently tested. Use with care.
Parameters
----------
entity_a, entity_b : Entity
The entities to be merged. entity_b will be merged into entity_a in place
merge_references_with_empty_diffs : bool, optional
Whether the merge is performed if entity_a and entity_b both reference
record(s) that may be different Python objects but have empty diffs. If
set to `False` a merge conflict will be raised in this case
instead. Default is True.
Returns
-------
entity_a : Entity
The initial entity_a after the in-place merge
"""
logging.warning(
"This function is currently experimental and insufficiently tested. Use with care.")
# Compare both entities:
diff_r1, diff_r2 = compare_entities(entity_a, entity_b)
diff_r1, diff_r2 = compare_entities(
entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs)
# Go through the comparison and try to apply changes to entity_a:
for key in diff_r2["parents"]:
......
......@@ -30,7 +30,7 @@ import pytest
import caosdb as db
import caosdb.apiutils
from caosdb.apiutils import (apply_to_ids, compare_entities, create_id_query,
resolve_reference, merge_entities)
empty_diff, resolve_reference, merge_entities)
from caosdb.common.models import SPECIAL_ATTRIBUTES
......@@ -272,8 +272,10 @@ def test_copy_entities():
for i in [0, 1]:
assert c.properties[i] is not r.properties[i]
for special in SPECIAL_ATTRIBUTES:
assert getattr(c.properties[i], special) == getattr(r.properties[i], special)
assert c.get_importance(c.properties[i]) == r.get_importance(r.properties[i])
assert getattr(c.properties[i], special) == getattr(
r.properties[i], special)
assert c.get_importance(
c.properties[i]) == r.get_importance(r.properties[i])
def test_merge_entities():
......@@ -326,10 +328,12 @@ def test_merge_bug_109():
assert r_a.get_property("test_bug_property").value == [18, 19]
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_b)
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_a)
@pytest.mark.xfail
......@@ -349,7 +353,116 @@ def test_bug_109():
assert r_a.get_property("test_bug_property").value == [18, 19]
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_b)
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_a)
def test_wrong_merge_conflict_reference():
"""Test a wrongly detected merge conflict in case of two records referencing
two different, but identical objects.
"""
# Two identical license records will be referenced from both records to be
# merged
license_rt = db.RecordType(name="license")
license_rec_a = db.Record(name="CC-BY-3.0").add_parent(license_rt)
license_rec_b = db.Record(name="CC-BY-3.0").add_parent(license_rt)
# two referencing records
dataset_rt = db.RecordType(name="Dataset")
title_prop = db.Property(name="title", datatype=db.TEXT)
doi_prop = db.Property(name="DOI", datatype=db.TEXT)
rec_a = db.Record().add_parent(dataset_rt)
rec_a.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_a)
rec_a.add_property(name=title_prop.name, value="Some dataset title")
rec_b = db.Record().add_parent(dataset_rt)
rec_b.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_b)
rec_b.add_property(name=doi_prop.name, value="https://doi.org/12345.678")
merge_entities(rec_a, rec_b)
assert rec_a.get_property(license_rt.name) is not None
assert rec_a.get_property(license_rt.name).value is not None
assert isinstance(rec_a.get_property(license_rt.name).value, db.Record)
assert rec_a.get_property(license_rt.name).value.name == license_rec_a.name
assert rec_a.get_property(license_rt.name).value.name == license_rec_b.name
assert rec_a.get_property("title").value == "Some dataset title"
assert rec_a.get_property("doi").value == "https://doi.org/12345.678"
# Reset rec_a
rec_a = db.Record().add_parent(dataset_rt)
rec_a.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_a)
rec_a.add_property(name=title_prop.name, value="Some dataset title")
# this does not compare referenced records, so it will fail
with pytest.raises(RuntimeError) as re:
merge_entities(rec_a, rec_b, merge_references_with_empty_diffs=False)
assert "Merge conflict" in str(re.value)
# ... as should this, of course
rec_b.get_property(license_rt.name).value.name = "Another license"
with pytest.raises(RuntimeError) as re:
merge_entities(rec_a, rec_b)
assert "Merge conflict" in str(re.value)
def test_empty_diff():
rec_a = db.Record(name="A")
rec_b = db.Record(name="B")
assert empty_diff(rec_a, rec_a)
assert not empty_diff(rec_a, rec_b)
rec_a.add_parent(name="RT")
rec_b.add_parent(name="RT")
assert empty_diff(rec_a, rec_a)
assert not empty_diff(rec_a, rec_b)
rec_b.name = "A"
assert empty_diff(rec_a, rec_b)
rec_a.add_property(name="some_prop", value=1)
assert not empty_diff(rec_a, rec_b)
rec_b.add_property(name="some_prop", value=1)
assert empty_diff(rec_a, rec_b)
rec_b.get_property("some_prop").value = 2
assert not empty_diff(rec_a, rec_b)
rec_b.get_property("some_prop").value = 1
rec_b.add_property(name="some_other_prop", value="Test")
assert not empty_diff(rec_a, rec_b)
rec_a.add_property(name="some_other_prop", value="Test")
assert empty_diff(rec_a, rec_b)
# reference identical records, but different Python Record objects
ref_rec_a = db.Record(name="Ref").add_parent(name="RefType")
ref_rec_b = db.Record(name="Ref").add_parent(name="RefType")
rec_a.add_property(name="RefType", datatype="RefType", value=ref_rec_a)
rec_b.add_property(name="RefType", datatype="RefType", value=ref_rec_b)
# the default is `compare_referenced_records=False`, so the diff shouldn't
# be empty (different Python objects are referenced.)
assert not empty_diff(rec_a, rec_b)
# when looking into the referenced record, the diffs should be empty again
assert empty_diff(rec_a, rec_b, compare_referenced_records=True)
# The same for lists of references
rec_a.remove_property("RefType")
rec_b.remove_property("RefType")
assert empty_diff(rec_a, rec_b)
rec_a.add_property(name="RefType", datatype=db.LIST(
"RefType"), value=[ref_rec_a, ref_rec_a])
rec_b.add_property(name="RefType", datatype=db.LIST(
"RefType"), value=[ref_rec_b, ref_rec_b])
assert not empty_diff(rec_a, rec_b)
assert empty_diff(rec_a, rec_b, compare_referenced_records=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment