diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py index 4ae8edd16f1fdc00eb7ba2c17661eea6e114885e..0c023e3a712465e29d4909f23999badb3eb65a14 100644 --- a/src/linkahead/apiutils.py +++ b/src/linkahead/apiutils.py @@ -185,18 +185,24 @@ def compare_entities(old_entity: Entity, ) -> tuple[dict[str, Any], dict[str, Any]]: """Compare two entites. - Return a tuple of dictionaries, the first index belongs to additional information for old - entity, the second index belongs to additional information for new entity. + The following attributes are compared: + - parents + - properties + - all SPECIAL_ATTRIBUTES (see models.py) + + Returns a tuple of dictionaries, the first element in the tupel lists additional information of old + entity, the second elements lists to additional information of new entity. Additional information means in detail: - - Additional parents (a list under key "parents") - - Information about properties: - - Each property lists either an additional property or a property with a changed: + - Additional parents: key="parents", value=a list of names of parents that are present in the respective entity + - Information about properties (key="properties", value=dict with property names as keys): + - properties are included in the dict if they are missing in the other entity or if one of + the following differs: - datatype - importance or - value (not implemented yet) - In case of changed information the value listed under the respective key shows the + In case of changed information the value listed under the respective key is the value that is stored in the respective entity. If `compare_referenced_records` is `True`, also referenced entities will be @@ -226,113 +232,109 @@ def compare_entities(old_entity: Entity, raise ValueError( "Comparison of different Entity types is not supported.") + ### compare special attributes ### for attr in SPECIAL_ATTRIBUTES: - try: - oldattr = old_entity.__getattribute__(attr) - old_entity_attr_exists = True - except BaseException: - old_entity_attr_exists = False - try: - newattr = new_entity.__getattribute__(attr) - new_entity_attr_exists = True - except BaseException: - new_entity_attr_exists = False - - if old_entity_attr_exists and (oldattr == "" or oldattr is None): - old_entity_attr_exists = False - - if new_entity_attr_exists and (newattr == "" or newattr is None): - new_entity_attr_exists = False - - if not old_entity_attr_exists and not new_entity_attr_exists: + if attr == "value": continue + oldattr = old_entity.__getattribute__(attr) + # we considere "" and None as nonexistent + old_entity_attr_na = (oldattr == "" or oldattr is None) - if ((old_entity_attr_exists ^ new_entity_attr_exists) - or (oldattr != newattr)): + newattr = new_entity.__getattribute__(attr) + # we considere "" and None as nonexistent + new_entity_attr_na = (newattr == "" or newattr is None) + + # both unset + if old_entity_attr_na and new_entity_attr_na: + continue - if old_entity_attr_exists: + # only one set or different values + if ((old_entity_attr_na ^ new_entity_attr_na) + or (oldattr != newattr)): + if not old_entity_attr_na: olddiff[attr] = oldattr - if new_entity_attr_exists: + if not new_entity_attr_na: newdiff[attr] = newattr - # properties + # value + if (old_entity.value != new_entity.value): + # basic comparison of value objects says they are different + same_value = False + if compare_referenced_records: + # scalar reference + if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity): + # explicitely not recursive to prevent infinite recursion + same_value = empty_diff( + old_entity.value, new_entity.value, compare_referenced_records=False) + # list of references + elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list): + # all elements in both lists actually are entity objects + # TODO: check, whether mixed cases can be allowed or should lead to an error + if (all([isinstance(x, Entity) for x in old_entity.value]) + and all([isinstance(x, Entity) for x in new_entity.value])): + # can't be the same if the lengths are different + if len(old_entity.value) == len(new_entity.value): + # do a one-by-one comparison: + # the values are the same if all diffs are empty + same_value = all( + [empty_diff(x, y, False) for x, y + in zip(old_entity.value, new_entity.value)]) + + if not same_value: + olddiff["value"] = old_entity.value + newdiff["value"] = new_entity.value + # properties for prop in old_entity.properties: - matching = [p for p in new_entity.properties if p.name == prop.name] - + matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()] if len(matching) == 0: - olddiff["properties"][prop.name] = {} - elif len(matching) == 1: - newdiff["properties"][prop.name] = {} - olddiff["properties"][prop.name] = {} - - if (old_entity.get_importance(prop.name) != - new_entity.get_importance(prop.name)): - olddiff["properties"][prop.name]["importance"] = \ - old_entity.get_importance(prop.name) - newdiff["properties"][prop.name]["importance"] = \ - new_entity.get_importance(prop.name) - - if (prop.datatype != matching[0].datatype): - olddiff["properties"][prop.name]["datatype"] = prop.datatype - newdiff["properties"][prop.name]["datatype"] = \ - matching[0].datatype - - if (prop.unit != matching[0].unit): - olddiff["properties"][prop.name]["unit"] = prop.unit - newdiff["properties"][prop.name]["unit"] = \ - matching[0].unit - - if (prop.value != matching[0].value): - # basic comparison of value objects says they are different - same_value = False - if compare_referenced_records: - # scalar reference - if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity): - # explicitely not recursive to prevent infinite recursion - same_value = empty_diff( - prop.value, matching[0].value, compare_referenced_records=False) - # list of references - elif isinstance(prop.value, list) and isinstance(matching[0].value, list): - # all elements in both lists actually are entity objects - # TODO: check, whether mixed cases can be allowed or should lead to an error - if (all([isinstance(x, Entity) for x in prop.value]) - and all([isinstance(x, Entity) for x in matching[0].value])): - # can't be the same if the lengths are different - if len(prop.value) == len(matching[0].value): - # do a one-by-one comparison: - # the values are the same if all diffs are empty - same_value = all( - [empty_diff(x, y, False) for x, y - in zip(prop.value, matching[0].value)]) - - if not same_value: - olddiff["properties"][prop.name]["value"] = prop.value - newdiff["properties"][prop.name]["value"] = \ - matching[0].value - - if (len(newdiff["properties"][prop.name]) == 0 - and len(olddiff["properties"][prop.name]) == 0): + # old has prop and new does not + olddiff["properties"][prop.name] = {} + elif len(matching) ==1: + olddiff["properties"][prop.name] = {} + newdiff["properties"][prop.name] = {} + oldpropdiff = olddiff["properties"][prop.name] + newpropdiff = newdiff["properties"][prop.name] + + # use compare function detect difference of properties + od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records) + # we do not care about parents and properties here # TODO do we? + od.pop("parents") + od.pop("properties") + nd.pop("parents") + nd.pop("properties") + # use the remaining diff + oldpropdiff.update(od) + newpropdiff.update(nd) + + # importance is associated with the record. So do it extra + if (old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name)): + oldpropdiff["importance"] = old_entity.get_importance(prop.name) + newpropdiff["importance"] = new_entity.get_importance(prop.name) + + # in case there was actually no difference, we remove the dict keys again + if (len(newpropdiff) == 0 and len(oldpropdiff) == 0): newdiff["properties"].pop(prop.name) olddiff["properties"].pop(prop.name) - else: raise NotImplementedError( "Comparison not implemented for multi-properties.") + # add the properties that are only present in new for prop in new_entity.properties: - if len([0 for p in old_entity.properties if p.name == prop.name]) == 0: + if len([0 for p in old_entity.properties if p.name.lower() == prop.name.lower()]) == 0: newdiff["properties"][prop.name] = {} # parents + # TODO we only use names for parents and property matching for parent in old_entity.parents: - if len([0 for p in new_entity.parents if p.name == parent.name]) == 0: + if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0: olddiff["parents"].append(parent.name) for parent in new_entity.parents: - if len([0 for p in old_entity.parents if p.name == parent.name]) == 0: + if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0: newdiff["parents"].append(parent.name) return (olddiff, newdiff) @@ -376,9 +378,9 @@ def merge_entities(entity_a: Entity, ) -> Entity: """Merge entity_b into entity_a such that they have the same parents and properties. - datatype, unit, value, name and description will only be changed in entity_a - if they are None for entity_a and set for entity_b. If there is a - corresponding value for entity_a different from None, an + The attributes datatype, unit, value, name and description will only be changed + in entity_a if they are None for entity_a and set for entity_b. If one of those attributes is + set in both entities and they differ, then an EntityMergeConflictError will be raised to inform about an unresolvable merge conflict. @@ -386,8 +388,6 @@ def merge_entities(entity_a: Entity, Returns entity_a. - WARNING: This function is currently experimental and insufficiently tested. Use with care. - Parameters ---------- entity_a, entity_b : Entity @@ -420,9 +420,6 @@ def merge_entities(entity_a: Entity, """ - logger.warning( - "This function is currently experimental and insufficiently tested. Use with care.") - # Compare both entities: diff_r1, diff_r2 = compare_entities( entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs) @@ -445,7 +442,8 @@ def merge_entities(entity_a: Entity, for attribute in ("datatype", "unit", "value"): if (attribute in diff_r2["properties"][key] and diff_r2["properties"][key][attribute] is not None): - if (diff_r1["properties"][key][attribute] is None): + if (attribute not in diff_r1["properties"][key] or + diff_r1["properties"][key][attribute] is None): setattr(entity_a.get_property(key), attribute, diff_r2["properties"][key][attribute]) elif force: diff --git a/src/linkahead/common/models.py b/src/linkahead/common/models.py index a8144286fdacefacadf2b823160e0eb9bfe00c77..b990e42e67e01d041b13199cd85944522a31ad11 100644 --- a/src/linkahead/common/models.py +++ b/src/linkahead/common/models.py @@ -115,7 +115,7 @@ if TYPE_CHECKING: ROLE = Literal["Entity", "Record", "RecordType", "Property", "File"] SPECIAL_ATTRIBUTES = ["name", "role", "datatype", "description", - "id", "path", "checksum", "size", "value"] + "id", "path", "checksum", "size", "value", "unit"] class Entity: @@ -138,10 +138,10 @@ class Entity: description: Optional[str] = None, # @ReservedAssignment datatype: Optional[DATATYPE] = None, value=None, - **kwargs, + role=None, ): - self.__role: Optional[ROLE] = kwargs["role"] if "role" in kwargs else None + self.__role: Optional[ROLE] = role self._checksum: Optional[str] = None self._size = None self._upload = None diff --git a/tox.ini b/tox.ini index 592c660c5bbbf5805a3ecbb3e60c41f597182a55..043e5210ddda3251248510562466dbaf62b10c05 100644 --- a/tox.ini +++ b/tox.ini @@ -17,6 +17,6 @@ max-line-length=100 [pytest] testpaths = unittests xfail_strict = True -addopts = -x -vv --cov=caosdb +#addopts = -x -vv --cov=caosdb pythonpath = src diff --git a/unittests/test_apiutils.py b/unittests/test_apiutils.py index 4705f19a1bdfbc4358790f787f2dce9ea97fee48..76c2a1d91a6338bc7e7e7b792601f21429280679 100644 --- a/unittests/test_apiutils.py +++ b/unittests/test_apiutils.py @@ -96,6 +96,7 @@ def test_resolve_reference(): def test_compare_entities(): + # test compare of parents, properties r1 = db.Record() r2 = db.Record() r1.add_parent("bla") @@ -135,12 +136,9 @@ def test_compare_entities(): assert "tests_TT" in diff_r2["properties"] -def test_compare_entities_units(): + # test compare units of properties r1 = db.Record() r2 = db.Record() - r1.add_parent("bla") - r2.add_parent("bla") - r1.add_parent("lopp") r1.add_property("test", value=2, unit="cm") r2.add_property("test", value=2, unit="m") r1.add_property("tests", value=3, unit="cm") @@ -152,8 +150,6 @@ def test_compare_entities_units(): diff_r1, diff_r2 = compare_entities(r1, r2) - assert len(diff_r1["parents"]) == 1 - assert len(diff_r2["parents"]) == 0 assert len(diff_r1["properties"]) == 4 assert len(diff_r2["properties"]) == 4 @@ -172,12 +168,10 @@ def test_compare_entities_units(): def test_compare_special_properties(): # Test for all known special properties: - SPECIAL_PROPERTIES = ("description", "name", - "checksum", "size", "path", "id") INTS = ("size", "id") HIDDEN = ("checksum", "size") - for key in SPECIAL_PROPERTIES: + for key in SPECIAL_ATTRIBUTES: set_key = key if key in HIDDEN: set_key = "_" + key @@ -216,7 +210,7 @@ def test_compare_special_properties(): assert len(diff_r2["properties"]) == 0 -def test_compare_properties(): + # compare Property objects p1 = db.Property() p2 = db.Property() diff --git a/unittests/test_entity.py b/unittests/test_entity.py index abf82f0a9b557cf9d1d2365e01fedaa4eae0c565..d059f7e29a50161e85b2d708c05cd9e0c5254f65 100644 --- a/unittests/test_entity.py +++ b/unittests/test_entity.py @@ -29,6 +29,7 @@ from lxml import etree import os from linkahead import (INTEGER, Entity, Property, Record, RecordType, configure_connection) +from linkahead.common.models import SPECIAL_ATTRIBUTES from linkahead.connection.mockup import MockUpServerConnection UNITTESTDIR = os.path.dirname(os.path.abspath(__file__)) @@ -82,7 +83,13 @@ class TestEntity(unittest.TestCase): self.assertEqual(entity.to_xml().tag, "Property") def test_instantiation(self): - self.assertRaises(Exception, Entity()) + e = Entity() + for attr in SPECIAL_ATTRIBUTES: + assert hasattr(e, attr) + + def test_instantiation_bad_argument(self): + with self.assertRaises(Exception): + Entity(rol="File") def test_parse_role(self): """During parsing, the role of an entity is set explicitely. All other