Skip to content
Snippets Groups Projects
Commit 58677097 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-fix-merge-entity-conflicts' into 'dev'

F fix merge entity conflicts

See merge request !72
parents f05dca21 f85dc20e
Branches
Tags
2 merge requests!79Release 0.10.0,!72F fix merge entity conflicts
Pipeline #29877 passed
...@@ -9,8 +9,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -9,8 +9,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### ### Added ###
* `apiutils.empty_diff` function that returns `True` if the diffs of two
entities found with the `compare_entitis` function are empty, `False`
otherwise.
### Changed ### ### Changed ###
* `apiutils.compare_entities` now has an optional `compare_referenced_records`
argument to compare referenced Entities recursively (fomerly, only the
referenced Python objects would be compared). The default is `False` to
recover the original behavior.
* `apiutils.merge_entities` now has an optional
`merge_references_with_empty_diffs` argument that determines whether a merge
of two entities will be performed if they reference identical records (w.r.t
th above `empty_diff` function). Formerly this would have caused a merge
conflict if the referenced record(s) were identical, but stored in different
Python objects.
### Deprecated ### ### Deprecated ###
### Removed ### ### Removed ###
......
...@@ -188,9 +188,8 @@ def getCommitIn(folder): ...@@ -188,9 +188,8 @@ def getCommitIn(folder):
return t.readline().strip() return t.readline().strip()
def compare_entities(old_entity: Entity, new_entity: Entity): def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False):
""" """Compare two entites.
Compare two entites.
Return a tuple of dictionaries, the first index belongs to additional information for old Return a tuple of dictionaries, the first index belongs to additional information for old
entity, the second index belongs to additional information for new entity. entity, the second index belongs to additional information for new entity.
...@@ -204,6 +203,23 @@ def compare_entities(old_entity: Entity, new_entity: Entity): ...@@ -204,6 +203,23 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
- ... value (not implemented yet) - ... value (not implemented yet)
In case of changed information the value listed under the respective key shows the In case of changed information the value listed under the respective key shows the
value that is stored in the respective entity. value that is stored in the respective entity.
If `compare_referenced_records` is `True`, also referenced entities will be
compared using this function (which is then called with
`compare_referenced_records = False` to prevent infinite recursion in case
of circular references).
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value. If set to `False`, only the corresponding Python
objects are compared which may lead to unexpected behavior when
identical records are stored in different objects. Default is False.
""" """
olddiff: Dict[str, Any] = {"properties": {}, "parents": []} olddiff: Dict[str, Any] = {"properties": {}, "parents": []}
newdiff: Dict[str, Any] = {"properties": {}, "parents": []} newdiff: Dict[str, Any] = {"properties": {}, "parents": []}
...@@ -270,9 +286,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity): ...@@ -270,9 +286,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
matching[0].unit matching[0].unit
if (prop.value != matching[0].value): if (prop.value != matching[0].value):
olddiff["properties"][prop.name]["value"] = prop.value # basic comparison of value objects says they are different
newdiff["properties"][prop.name]["value"] = \ same_value = False
matching[0].value if compare_referenced_records:
# scalar reference
if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity):
# explicitely not recursive to prevent infinite recursion
same_value = empty_diff(
prop.value, matching[0].value, compare_referenced_records=False)
# list of references
elif isinstance(prop.value, list) and isinstance(matching[0].value, list):
# all elements in both lists actually are entity objects
# TODO: check, whether mixed cases can be allowed or should lead to an error
if all([isinstance(x, Entity) for x in prop.value]) and all([isinstance(x, Entity) for x in matching[0].value]):
# can't be the same if the lengths are different
if len(prop.value) == len(matching[0].value):
# do a one-by-one comparison; the values are the same, if all diffs are empty
same_value = all(
[empty_diff(x, y, False) for x, y in zip(prop.value, matching[0].value)])
if not same_value:
olddiff["properties"][prop.name]["value"] = prop.value
newdiff["properties"][prop.name]["value"] = \
matching[0].value
if (len(newdiff["properties"][prop.name]) == 0 if (len(newdiff["properties"][prop.name]) == 0
and len(olddiff["properties"][prop.name]) == 0): and len(olddiff["properties"][prop.name]) == 0):
...@@ -300,7 +336,32 @@ def compare_entities(old_entity: Entity, new_entity: Entity): ...@@ -300,7 +336,32 @@ def compare_entities(old_entity: Entity, new_entity: Entity):
return (olddiff, newdiff) return (olddiff, newdiff)
def merge_entities(entity_a: Entity, entity_b: Entity): def empty_diff(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False):
"""Check whether the `compare_entities` found any differences between
old_entity and new_entity.
Parameters
----------
old_entity, new_entity : Entity
Entities to be compared
compare_referenced_records : bool, optional
Whether to compare referenced records in case of both, `old_entity` and
`new_entity`, have the same reference properties and both have a Record
object as value.
"""
olddiff, newdiff = compare_entities(
old_entity, new_entity, compare_referenced_records)
for diff in [olddiff, newdiff]:
for key in diff:
if len(diff[key]) > 0:
# There is a difference somewhere in the diff
return False
# all elements of the two diffs were empty
return True
def merge_entities(entity_a: Entity, entity_b: Entity, merge_references_with_empty_diffs=True):
""" """
Merge entity_b into entity_a such that they have the same parents and properties. Merge entity_b into entity_a such that they have the same parents and properties.
...@@ -314,13 +375,30 @@ def merge_entities(entity_a: Entity, entity_b: Entity): ...@@ -314,13 +375,30 @@ def merge_entities(entity_a: Entity, entity_b: Entity):
Returns entity_a. Returns entity_a.
WARNING: This function is currently experimental and insufficiently tested. Use with care. WARNING: This function is currently experimental and insufficiently tested. Use with care.
Parameters
----------
entity_a, entity_b : Entity
The entities to be merged. entity_b will be merged into entity_a in place
merge_references_with_empty_diffs : bool, optional
Whether the merge is performed if entity_a and entity_b both reference
record(s) that may be different Python objects but have empty diffs. If
set to `False` a merge conflict will be raised in this case
instead. Default is True.
Returns
-------
entity_a : Entity
The initial entity_a after the in-place merge
""" """
logging.warning( logging.warning(
"This function is currently experimental and insufficiently tested. Use with care.") "This function is currently experimental and insufficiently tested. Use with care.")
# Compare both entities: # Compare both entities:
diff_r1, diff_r2 = compare_entities(entity_a, entity_b) diff_r1, diff_r2 = compare_entities(
entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs)
# Go through the comparison and try to apply changes to entity_a: # Go through the comparison and try to apply changes to entity_a:
for key in diff_r2["parents"]: for key in diff_r2["parents"]:
......
...@@ -30,7 +30,7 @@ import pytest ...@@ -30,7 +30,7 @@ import pytest
import caosdb as db import caosdb as db
import caosdb.apiutils import caosdb.apiutils
from caosdb.apiutils import (apply_to_ids, compare_entities, create_id_query, from caosdb.apiutils import (apply_to_ids, compare_entities, create_id_query,
resolve_reference, merge_entities) empty_diff, resolve_reference, merge_entities)
from caosdb.common.models import SPECIAL_ATTRIBUTES from caosdb.common.models import SPECIAL_ATTRIBUTES
...@@ -272,8 +272,10 @@ def test_copy_entities(): ...@@ -272,8 +272,10 @@ def test_copy_entities():
for i in [0, 1]: for i in [0, 1]:
assert c.properties[i] is not r.properties[i] assert c.properties[i] is not r.properties[i]
for special in SPECIAL_ATTRIBUTES: for special in SPECIAL_ATTRIBUTES:
assert getattr(c.properties[i], special) == getattr(r.properties[i], special) assert getattr(c.properties[i], special) == getattr(
assert c.get_importance(c.properties[i]) == r.get_importance(r.properties[i]) r.properties[i], special)
assert c.get_importance(
c.properties[i]) == r.get_importance(r.properties[i])
def test_merge_entities(): def test_merge_entities():
...@@ -326,10 +328,12 @@ def test_merge_bug_109(): ...@@ -326,10 +328,12 @@ def test_merge_bug_109():
assert r_a.get_property("test_bug_property").value == [18, 19] assert r_a.get_property("test_bug_property").value == [18, 19]
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_b)
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_a)
@pytest.mark.xfail @pytest.mark.xfail
...@@ -349,7 +353,116 @@ def test_bug_109(): ...@@ -349,7 +353,116 @@ def test_bug_109():
assert r_a.get_property("test_bug_property").value == [18, 19] assert r_a.get_property("test_bug_property").value == [18, 19]
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_b)
assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a)
assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(
r_a)
def test_wrong_merge_conflict_reference():
"""Test a wrongly detected merge conflict in case of two records referencing
two different, but identical objects.
"""
# Two identical license records will be referenced from both records to be
# merged
license_rt = db.RecordType(name="license")
license_rec_a = db.Record(name="CC-BY-3.0").add_parent(license_rt)
license_rec_b = db.Record(name="CC-BY-3.0").add_parent(license_rt)
# two referencing records
dataset_rt = db.RecordType(name="Dataset")
title_prop = db.Property(name="title", datatype=db.TEXT)
doi_prop = db.Property(name="DOI", datatype=db.TEXT)
rec_a = db.Record().add_parent(dataset_rt)
rec_a.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_a)
rec_a.add_property(name=title_prop.name, value="Some dataset title")
rec_b = db.Record().add_parent(dataset_rt)
rec_b.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_b)
rec_b.add_property(name=doi_prop.name, value="https://doi.org/12345.678")
merge_entities(rec_a, rec_b)
assert rec_a.get_property(license_rt.name) is not None
assert rec_a.get_property(license_rt.name).value is not None
assert isinstance(rec_a.get_property(license_rt.name).value, db.Record)
assert rec_a.get_property(license_rt.name).value.name == license_rec_a.name
assert rec_a.get_property(license_rt.name).value.name == license_rec_b.name
assert rec_a.get_property("title").value == "Some dataset title"
assert rec_a.get_property("doi").value == "https://doi.org/12345.678"
# Reset rec_a
rec_a = db.Record().add_parent(dataset_rt)
rec_a.add_property(name=license_rt.name,
datatype=license_rt.name, value=license_rec_a)
rec_a.add_property(name=title_prop.name, value="Some dataset title")
# this does not compare referenced records, so it will fail
with pytest.raises(RuntimeError) as re:
merge_entities(rec_a, rec_b, merge_references_with_empty_diffs=False)
assert "Merge conflict" in str(re.value)
# ... as should this, of course
rec_b.get_property(license_rt.name).value.name = "Another license"
with pytest.raises(RuntimeError) as re:
merge_entities(rec_a, rec_b)
assert "Merge conflict" in str(re.value)
def test_empty_diff():
rec_a = db.Record(name="A")
rec_b = db.Record(name="B")
assert empty_diff(rec_a, rec_a)
assert not empty_diff(rec_a, rec_b)
rec_a.add_parent(name="RT")
rec_b.add_parent(name="RT")
assert empty_diff(rec_a, rec_a)
assert not empty_diff(rec_a, rec_b)
rec_b.name = "A"
assert empty_diff(rec_a, rec_b)
rec_a.add_property(name="some_prop", value=1)
assert not empty_diff(rec_a, rec_b)
rec_b.add_property(name="some_prop", value=1)
assert empty_diff(rec_a, rec_b)
rec_b.get_property("some_prop").value = 2
assert not empty_diff(rec_a, rec_b)
rec_b.get_property("some_prop").value = 1
rec_b.add_property(name="some_other_prop", value="Test")
assert not empty_diff(rec_a, rec_b)
rec_a.add_property(name="some_other_prop", value="Test")
assert empty_diff(rec_a, rec_b)
# reference identical records, but different Python Record objects
ref_rec_a = db.Record(name="Ref").add_parent(name="RefType")
ref_rec_b = db.Record(name="Ref").add_parent(name="RefType")
rec_a.add_property(name="RefType", datatype="RefType", value=ref_rec_a)
rec_b.add_property(name="RefType", datatype="RefType", value=ref_rec_b)
# the default is `compare_referenced_records=False`, so the diff shouldn't
# be empty (different Python objects are referenced.)
assert not empty_diff(rec_a, rec_b)
# when looking into the referenced record, the diffs should be empty again
assert empty_diff(rec_a, rec_b, compare_referenced_records=True)
# The same for lists of references
rec_a.remove_property("RefType")
rec_b.remove_property("RefType")
assert empty_diff(rec_a, rec_b)
rec_a.add_property(name="RefType", datatype=db.LIST(
"RefType"), value=[ref_rec_a, ref_rec_a])
rec_b.add_property(name="RefType", datatype=db.LIST(
"RefType"), value=[ref_rec_b, ref_rec_b])
assert not empty_diff(rec_a, rec_b)
assert empty_diff(rec_a, rec_b, compare_referenced_records=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment