From cdf6a8f109f06e1f212156cc36e203d9a5185c84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Sun, 6 Oct 2024 19:54:31 +0200
Subject: [PATCH] wip

---
 src/linkahead/apiutils.py      | 190 ++++++++++++++++-----------------
 src/linkahead/common/models.py |   6 +-
 tox.ini                        |   2 +-
 unittests/test_apiutils.py     |  14 +--
 unittests/test_entity.py       |   9 +-
 5 files changed, 110 insertions(+), 111 deletions(-)

diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py
index 4ae8edd1..0c023e3a 100644
--- a/src/linkahead/apiutils.py
+++ b/src/linkahead/apiutils.py
@@ -185,18 +185,24 @@ def compare_entities(old_entity: Entity,
                      ) -> tuple[dict[str, Any], dict[str, Any]]:
     """Compare two entites.
 
-    Return a tuple of dictionaries, the first index belongs to additional information for old
-    entity, the second index belongs to additional information for new entity.
+    The following attributes are compared:
+    - parents
+    - properties
+    - all SPECIAL_ATTRIBUTES (see models.py)
+
+    Returns a tuple of dictionaries, the first element in the tupel lists additional information of old
+    entity, the second elements lists to additional information of new entity.
 
     Additional information means in detail:
-    - Additional parents (a list under key "parents")
-    - Information about properties:
-      - Each property lists either an additional property or a property with a changed:
+    - Additional parents: key="parents", value=a list of names of parents that are present in the respective entity
+    - Information about properties (key="properties", value=dict with property names as keys):
+      - properties are included in the dict if they are missing in the other entity or if one of
+        the following differs:
         - datatype
         - importance or
         - value (not implemented yet)
 
-        In case of changed information the value listed under the respective key shows the
+        In case of changed information the value listed under the respective key is the
         value that is stored in the respective entity.
 
     If `compare_referenced_records` is `True`, also referenced entities will be
@@ -226,113 +232,109 @@ def compare_entities(old_entity: Entity,
         raise ValueError(
             "Comparison of different Entity types is not supported.")
 
+    ### compare special attributes ###
     for attr in SPECIAL_ATTRIBUTES:
-        try:
-            oldattr = old_entity.__getattribute__(attr)
-            old_entity_attr_exists = True
-        except BaseException:
-            old_entity_attr_exists = False
-        try:
-            newattr = new_entity.__getattribute__(attr)
-            new_entity_attr_exists = True
-        except BaseException:
-            new_entity_attr_exists = False
-
-        if old_entity_attr_exists and (oldattr == "" or oldattr is None):
-            old_entity_attr_exists = False
-
-        if new_entity_attr_exists and (newattr == "" or newattr is None):
-            new_entity_attr_exists = False
-
-        if not old_entity_attr_exists and not new_entity_attr_exists:
+        if attr == "value":
             continue
+        oldattr = old_entity.__getattribute__(attr)
+        # we considere "" and None as nonexistent
+        old_entity_attr_na = (oldattr == "" or oldattr is None)
 
-        if ((old_entity_attr_exists ^ new_entity_attr_exists)
-                or (oldattr != newattr)):
+        newattr = new_entity.__getattribute__(attr)
+        # we considere "" and None as nonexistent
+        new_entity_attr_na = (newattr == "" or newattr is None)
+
+        # both unset
+        if old_entity_attr_na and new_entity_attr_na:
+            continue
 
-            if old_entity_attr_exists:
+        # only one set or different values
+        if ((old_entity_attr_na ^ new_entity_attr_na)
+                or (oldattr != newattr)):
+            if not old_entity_attr_na:
                 olddiff[attr] = oldattr
 
-            if new_entity_attr_exists:
+            if not new_entity_attr_na:
                 newdiff[attr] = newattr
 
-    # properties
+    # value
+    if (old_entity.value != new_entity.value):
+        # basic comparison of value objects says they are different
+        same_value = False
+        if compare_referenced_records:
+            # scalar reference
+            if isinstance(old_entity.value, Entity) and isinstance(new_entity.value, Entity):
+                # explicitely not recursive to prevent infinite recursion
+                same_value = empty_diff(
+                    old_entity.value, new_entity.value, compare_referenced_records=False)
+            # list of references
+            elif isinstance(old_entity.value, list) and isinstance(new_entity.value, list):
+                # all elements in both lists actually are entity objects
+                # TODO: check, whether mixed cases can be allowed or should lead to an error
+                if (all([isinstance(x, Entity) for x in old_entity.value])
+                        and all([isinstance(x, Entity) for x in new_entity.value])):
+                    # can't be the same if the lengths are different
+                    if len(old_entity.value) == len(new_entity.value):
+                        # do a one-by-one comparison:
+                        # the values are the same if all diffs are empty
+                        same_value = all(
+                            [empty_diff(x, y, False) for x, y
+                                in zip(old_entity.value, new_entity.value)])
+
+        if not same_value:
+            olddiff["value"] = old_entity.value
+            newdiff["value"] = new_entity.value
 
+    # properties
     for prop in old_entity.properties:
-        matching = [p for p in new_entity.properties if p.name == prop.name]
-
+        matching = [p for p in new_entity.properties if p.name.lower() == prop.name.lower()]
         if len(matching) == 0:
-            olddiff["properties"][prop.name] = {}
-        elif len(matching) == 1:
-            newdiff["properties"][prop.name] = {}
-            olddiff["properties"][prop.name] = {}
-
-            if (old_entity.get_importance(prop.name) !=
-                    new_entity.get_importance(prop.name)):
-                olddiff["properties"][prop.name]["importance"] = \
-                    old_entity.get_importance(prop.name)
-                newdiff["properties"][prop.name]["importance"] = \
-                    new_entity.get_importance(prop.name)
-
-            if (prop.datatype != matching[0].datatype):
-                olddiff["properties"][prop.name]["datatype"] = prop.datatype
-                newdiff["properties"][prop.name]["datatype"] = \
-                    matching[0].datatype
-
-            if (prop.unit != matching[0].unit):
-                olddiff["properties"][prop.name]["unit"] = prop.unit
-                newdiff["properties"][prop.name]["unit"] = \
-                    matching[0].unit
-
-            if (prop.value != matching[0].value):
-                # basic comparison of value objects says they are different
-                same_value = False
-                if compare_referenced_records:
-                    # scalar reference
-                    if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity):
-                        # explicitely not recursive to prevent infinite recursion
-                        same_value = empty_diff(
-                            prop.value, matching[0].value, compare_referenced_records=False)
-                    # list of references
-                    elif isinstance(prop.value, list) and isinstance(matching[0].value, list):
-                        # all elements in both lists actually are entity objects
-                        # TODO: check, whether mixed cases can be allowed or should lead to an error
-                        if (all([isinstance(x, Entity) for x in prop.value])
-                                and all([isinstance(x, Entity) for x in matching[0].value])):
-                            # can't be the same if the lengths are different
-                            if len(prop.value) == len(matching[0].value):
-                                # do a one-by-one comparison:
-                                # the values are the same if all diffs are empty
-                                same_value = all(
-                                    [empty_diff(x, y, False) for x, y
-                                     in zip(prop.value, matching[0].value)])
-
-                if not same_value:
-                    olddiff["properties"][prop.name]["value"] = prop.value
-                    newdiff["properties"][prop.name]["value"] = \
-                        matching[0].value
-
-            if (len(newdiff["properties"][prop.name]) == 0
-                    and len(olddiff["properties"][prop.name]) == 0):
+            # old has prop and new does not
+            olddiff["properties"][prop.name]  = {}
+        elif len(matching) ==1:
+            olddiff["properties"][prop.name]  = {}
+            newdiff["properties"][prop.name]  = {}
+            oldpropdiff = olddiff["properties"][prop.name]
+            newpropdiff = newdiff["properties"][prop.name]
+
+            # use compare function detect difference of properties
+            od, nd = compare_entities(prop, matching[0], compare_referenced_records=compare_referenced_records)
+            # we do not care about parents and properties here # TODO do we?
+            od.pop("parents")
+            od.pop("properties")
+            nd.pop("parents")
+            nd.pop("properties")
+            # use the remaining diff
+            oldpropdiff.update(od)
+            newpropdiff.update(nd)
+
+            # importance is associated with the record. So do it extra
+            if (old_entity.get_importance(prop.name) != new_entity.get_importance(prop.name)):
+                oldpropdiff["importance"] =  old_entity.get_importance(prop.name)
+                newpropdiff["importance"] =  new_entity.get_importance(prop.name)
+
+            # in case there was actually no difference, we remove the dict keys again
+            if (len(newpropdiff) == 0 and len(oldpropdiff) == 0):
                 newdiff["properties"].pop(prop.name)
                 olddiff["properties"].pop(prop.name)
-
         else:
             raise NotImplementedError(
                 "Comparison not implemented for multi-properties.")
 
+    # add the properties that are only present in new
     for prop in new_entity.properties:
-        if len([0 for p in old_entity.properties if p.name == prop.name]) == 0:
+        if len([0 for p in old_entity.properties if p.name.lower() == prop.name.lower()]) == 0:
             newdiff["properties"][prop.name] = {}
 
     # parents
 
+    # TODO we only use names for parents and property matching
     for parent in old_entity.parents:
-        if len([0 for p in new_entity.parents if p.name == parent.name]) == 0:
+        if len([0 for p in new_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
             olddiff["parents"].append(parent.name)
 
     for parent in new_entity.parents:
-        if len([0 for p in old_entity.parents if p.name == parent.name]) == 0:
+        if len([0 for p in old_entity.parents if p.name.lower() == parent.name.lower()]) == 0:
             newdiff["parents"].append(parent.name)
 
     return (olddiff, newdiff)
@@ -376,9 +378,9 @@ def merge_entities(entity_a: Entity,
                    ) -> Entity:
     """Merge entity_b into entity_a such that they have the same parents and properties.
 
-    datatype, unit, value, name and description will only be changed in entity_a
-    if they are None for entity_a and set for entity_b. If there is a
-    corresponding value for entity_a different from None, an
+    The attributes datatype, unit, value, name and description will only be changed
+    in entity_a if they are None for entity_a and set for entity_b. If one of those attributes is
+    set in both entities and they differ, then an
     EntityMergeConflictError will be raised to inform about an unresolvable merge
     conflict.
 
@@ -386,8 +388,6 @@ def merge_entities(entity_a: Entity,
 
     Returns entity_a.
 
-    WARNING: This function is currently experimental and insufficiently tested. Use with care.
-
     Parameters
     ----------
     entity_a, entity_b : Entity
@@ -420,9 +420,6 @@ def merge_entities(entity_a: Entity,
 
     """
 
-    logger.warning(
-        "This function is currently experimental and insufficiently tested. Use with care.")
-
     # Compare both entities:
     diff_r1, diff_r2 = compare_entities(
         entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs)
@@ -445,7 +442,8 @@ def merge_entities(entity_a: Entity,
             for attribute in ("datatype", "unit", "value"):
                 if (attribute in diff_r2["properties"][key] and
                         diff_r2["properties"][key][attribute] is not None):
-                    if (diff_r1["properties"][key][attribute] is None):
+                    if (attribute not in diff_r1["properties"][key] or
+                            diff_r1["properties"][key][attribute] is None):
                         setattr(entity_a.get_property(key), attribute,
                                 diff_r2["properties"][key][attribute])
                     elif force:
diff --git a/src/linkahead/common/models.py b/src/linkahead/common/models.py
index a8144286..b990e42e 100644
--- a/src/linkahead/common/models.py
+++ b/src/linkahead/common/models.py
@@ -115,7 +115,7 @@ if TYPE_CHECKING:
     ROLE = Literal["Entity", "Record", "RecordType", "Property", "File"]
 
 SPECIAL_ATTRIBUTES = ["name", "role", "datatype", "description",
-                      "id", "path", "checksum", "size", "value"]
+                      "id", "path", "checksum", "size", "value", "unit"]
 
 
 class Entity:
@@ -138,10 +138,10 @@ class Entity:
         description: Optional[str] = None,  # @ReservedAssignment
         datatype: Optional[DATATYPE] = None,
         value=None,
-        **kwargs,
+        role=None,
     ):
 
-        self.__role: Optional[ROLE] = kwargs["role"] if "role" in kwargs else None
+        self.__role: Optional[ROLE] = role
         self._checksum: Optional[str] = None
         self._size = None
         self._upload = None
diff --git a/tox.ini b/tox.ini
index 592c660c..043e5210 100644
--- a/tox.ini
+++ b/tox.ini
@@ -17,6 +17,6 @@ max-line-length=100
 [pytest]
 testpaths = unittests
 xfail_strict = True
-addopts = -x -vv --cov=caosdb
+#addopts = -x -vv --cov=caosdb
 pythonpath = src
 
diff --git a/unittests/test_apiutils.py b/unittests/test_apiutils.py
index 4705f19a..76c2a1d9 100644
--- a/unittests/test_apiutils.py
+++ b/unittests/test_apiutils.py
@@ -96,6 +96,7 @@ def test_resolve_reference():
 
 
 def test_compare_entities():
+    # test compare of parents, properties
     r1 = db.Record()
     r2 = db.Record()
     r1.add_parent("bla")
@@ -135,12 +136,9 @@ def test_compare_entities():
     assert "tests_TT" in diff_r2["properties"]
 
 
-def test_compare_entities_units():
+    # test compare units of properties
     r1 = db.Record()
     r2 = db.Record()
-    r1.add_parent("bla")
-    r2.add_parent("bla")
-    r1.add_parent("lopp")
     r1.add_property("test", value=2, unit="cm")
     r2.add_property("test", value=2, unit="m")
     r1.add_property("tests", value=3, unit="cm")
@@ -152,8 +150,6 @@ def test_compare_entities_units():
 
     diff_r1, diff_r2 = compare_entities(r1, r2)
 
-    assert len(diff_r1["parents"]) == 1
-    assert len(diff_r2["parents"]) == 0
     assert len(diff_r1["properties"]) == 4
     assert len(diff_r2["properties"]) == 4
 
@@ -172,12 +168,10 @@ def test_compare_entities_units():
 
 def test_compare_special_properties():
     # Test for all known special properties:
-    SPECIAL_PROPERTIES = ("description", "name",
-                          "checksum", "size", "path", "id")
     INTS = ("size", "id")
     HIDDEN = ("checksum", "size")
 
-    for key in SPECIAL_PROPERTIES:
+    for key in SPECIAL_ATTRIBUTES:
         set_key = key
         if key in HIDDEN:
             set_key = "_" + key
@@ -216,7 +210,7 @@ def test_compare_special_properties():
         assert len(diff_r2["properties"]) == 0
 
 
-def test_compare_properties():
+    # compare Property objects
     p1 = db.Property()
     p2 = db.Property()
 
diff --git a/unittests/test_entity.py b/unittests/test_entity.py
index abf82f0a..d059f7e2 100644
--- a/unittests/test_entity.py
+++ b/unittests/test_entity.py
@@ -29,6 +29,7 @@ from lxml import etree
 import os
 from linkahead import (INTEGER, Entity, Property, Record, RecordType,
                        configure_connection)
+from linkahead.common.models import SPECIAL_ATTRIBUTES
 from linkahead.connection.mockup import MockUpServerConnection
 
 UNITTESTDIR = os.path.dirname(os.path.abspath(__file__))
@@ -82,7 +83,13 @@ class TestEntity(unittest.TestCase):
         self.assertEqual(entity.to_xml().tag, "Property")
 
     def test_instantiation(self):
-        self.assertRaises(Exception, Entity())
+        e = Entity()
+        for attr in SPECIAL_ATTRIBUTES:
+            assert hasattr(e, attr)
+
+    def test_instantiation_bad_argument(self):
+        with self.assertRaises(Exception):
+            Entity(rol="File")
 
     def test_parse_role(self):
         """During parsing, the role of an entity is set explicitely. All other
-- 
GitLab