From e8ad31d3931712f7de131d99d965587a4796164f Mon Sep 17 00:00:00 2001
From: "i.nueske" <i.nueske@indiscale.com>
Date: Mon, 4 Nov 2024 08:18:25 +0100
Subject: [PATCH] TST: Add tests for compare and fix a couple failing tests

---
 src/linkahead/apiutils.py      |  18 ++-
 src/linkahead/common/models.py |   2 +-
 unittests/test_apiutils.py     | 202 +++++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 7 deletions(-)

diff --git a/src/linkahead/apiutils.py b/src/linkahead/apiutils.py
index 53c9957e..35d33c8c 100644
--- a/src/linkahead/apiutils.py
+++ b/src/linkahead/apiutils.py
@@ -244,6 +244,8 @@ def compare_entities(entity0: Entity, entity1: Entity,
     #    - Do different inheritance levels for parents count as a difference?
     #    - Do we care about parents and properties of properties?
     #    - Should there be a more detailed comparison of parents without id?
+    #    - Revisit filter - do we care about RecordType when matching?
+    #      How to treat None?
     # Suggestions for enhancements:
     #    - For the comparison of entities in value and properties, consider
     #      keeping a list of traversed entities, not only look at first layer
@@ -321,8 +323,12 @@ def compare_entities(entity0: Entity, entity1: Entity,
                     # Compare Entities
                     if (compare_referenced_records and
                         isinstance(val0, Entity) and isinstance(val1, Entity)):
-                        if empty_diff(val0, val1, False,
-                                      entity_name_id_equivalency):
+                        try:
+                            same = empty_diff(val0, val1, False,
+                                              entity_name_id_equivalency)
+                        except:
+                            same = False
+                        if same:
                             continue
                     # Compare Entity name and id
                     if entity_name_id_equivalency:
@@ -381,10 +387,10 @@ def compare_entities(entity0: Entity, entity1: Entity,
             # As the importance of a property is an attribute of the record
             # and not the property, it is not contained in the diff returned
             # by compare_entities and needs to be added separately
-            if (entity0.get_importance(prop.name) !=
-                    entity1.get_importance(prop.name)):
-                propdiff[0]["importance"] = entity0.get_importance(prop.name)
-                propdiff[1]["importance"] = entity1.get_importance(prop.name)
+            if (entity0.get_importance(prop) !=
+                    entity1.get_importance(matching[0])):
+                propdiff[0]["importance"] = entity0.get_importance(prop)
+                propdiff[1]["importance"] = entity1.get_importance(matching[0])
 
             # in case there is no difference, we remove the dict keys again
             if len(propdiff[0]) == 0 and len(propdiff[1]) == 0:
diff --git a/src/linkahead/common/models.py b/src/linkahead/common/models.py
index ee42e06a..d3ef1e52 100644
--- a/src/linkahead/common/models.py
+++ b/src/linkahead/common/models.py
@@ -114,7 +114,7 @@ if TYPE_CHECKING:
     IMPORTANCE = Literal["OBLIGATORY", "RECOMMENDED", "SUGGESTED", "FIX", "NONE"]
     ROLE = Literal["Entity", "Record", "RecordType", "Property", "File"]
 
-SPECIAL_ATTRIBUTES = ["name", "role", "datatype", "description",
+SPECIAL_ATTRIBUTES = ["name", "role", "datatype", "description", "file",
                       "id", "path", "checksum", "size", "value", "unit"]
 
 
diff --git a/unittests/test_apiutils.py b/unittests/test_apiutils.py
index fe2155e5..c11cecdf 100644
--- a/unittests/test_apiutils.py
+++ b/unittests/test_apiutils.py
@@ -25,6 +25,7 @@
 # Test apiutils
 # A. Schlemmer, 02/2018
 
+from io import StringIO
 
 import linkahead as db
 import linkahead.apiutils
@@ -166,6 +167,207 @@ def test_compare_entities():
     assert diff_r2["properties"]["test"]["unit"] == "m"
 
 
+def test_compare_entities_battery():
+    par1, par2, par3 = db.Record(), db.Record(), db.RecordType()
+    r1, r2, r3 = db.Record(), db.Record(), db.Record()
+    prop1 = db.Property()
+    prop2 = db.Property(name="Property 2")
+    prop3 = db.Property()
+
+    ##  Basic tests for Properties
+    prop_settings = {"datatype": db.REFERENCE, "description": "desc of prop",
+                     "value": db.Record().add_parent(par3), "unit": '°'}
+    t1 = db.Record().add_parent(db.RecordType())
+    t2 = db.Record().add_parent(db.RecordType())
+    # Change datatype
+    t1.add_property(db.Property(name="datatype", **prop_settings))
+    prop_settings["datatype"] = par3
+    t2.add_property(db.Property(name="datatype", **prop_settings))
+    # Change description
+    t1.add_property(db.Property(name="description", **prop_settings))
+    prop_settings["description"] = "diff desc"
+    t2.add_property(db.Property(name="description", **prop_settings))
+    # Change value to copy
+    t1.add_property(db.Property(name="value copy", **prop_settings))
+    prop_settings["value"] = db.Record().add_parent(par3)
+    t2.add_property(db.Property(name="value copy", **prop_settings))
+    # Change value to something different
+    t1.add_property(db.Property(name="value", **prop_settings))
+    prop_settings["value"] = db.Record(name="n").add_parent(par3)
+    t2.add_property(db.Property(name="value", **prop_settings))
+    # Change unit
+    t1.add_property(db.Property(name="unit", **prop_settings))
+    prop_settings["unit"] = db.Property(unit='°')
+    t2.add_property(db.Property(name="unit", **prop_settings))
+    # Change unit again
+    t1.add_property(db.Property(name="unit 2", **prop_settings))
+    prop_settings["unit"] = db.Property()
+    t2.add_property(db.Property(name="unit 2", **prop_settings))
+    # Compare
+    diff_0 = compare_entities(t1, t2)
+    diff_1 = compare_entities(t1, t2, compare_referenced_records=True)
+    # Check correct detection of changes
+    assert diff_0[0]["properties"]["datatype"] == {"datatype": db.REFERENCE}
+    assert diff_0[1]["properties"]["datatype"] == {"datatype": par3}
+    assert diff_0[0]["properties"]["description"] == {"description": "desc of prop"}
+    assert diff_0[1]["properties"]["description"] == {"description": "diff desc"}
+    assert "value" in diff_0[0]["properties"]["value copy"]
+    assert "value" in diff_0[1]["properties"]["value copy"]
+    assert "value" in diff_0[0]["properties"]["value"]
+    assert "value" in diff_0[1]["properties"]["value"]
+    assert "unit" in diff_0[0]["properties"]["unit"]
+    assert "unit" in diff_0[1]["properties"]["unit"]
+    assert "unit" in diff_0[0]["properties"]["unit 2"]
+    assert "unit" in diff_0[1]["properties"]["unit 2"]
+    # Check correct result for compare_referenced_records=True
+    assert "value copy" not in diff_1[0]["properties"]
+    assert "value copy" not in diff_1[1]["properties"]
+    diff_0[0]["properties"].pop("value copy")
+    diff_0[1]["properties"].pop("value copy")
+    assert diff_0 == diff_1
+
+    ##  Basic tests for Parents
+    t3 = db.Record().add_parent(db.RecordType("A")).add_parent(db.Record("B"))
+    t4 = db.Record().add_parent(db.RecordType("A"))
+    assert compare_entities(t3, t4)[0]['parents'] == ['B']
+    assert len(compare_entities(t3, t4)[1]['parents']) == 0
+    t4.add_parent(db.Record("B"))
+    assert empty_diff(t3, t4)
+    # The two following assertions document current behaviour but do not make a
+    # lot of sense
+    t4.add_parent(db.Record("B"))
+    assert empty_diff(t3, t4)
+    t3.add_parent(db.RecordType("A")).add_parent(db.Record("B"))
+    t4.add_parent(db.RecordType("B")).add_parent(db.Record("A"))
+    assert empty_diff(t3, t4)
+
+    ##  Basic tests for special attributes
+    prop_settings = {"id": 42, "name": "Property",
+                     "datatype": db.LIST(db.REFERENCE), "value": [db.Record()],
+                     "unit": '€', "description": "desc of prop"}
+    alt_settings  = {"id": 64, "name": "Property 2",
+                     "datatype": db.LIST(db.TEXT), "value": [db.RecordType()],
+                     "unit": '€€', "description": " ę Ě ப ཾ ཿ ∛ ∜ ㅿ ㆀ 값 "}
+    t5 = db.Property(**prop_settings)
+    t6 = db.Property(**prop_settings)
+    assert empty_diff(t5, t6)
+    # ID
+    t5.id = alt_settings['id']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'id': alt_settings['id']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'id': prop_settings['id']}
+    t6.id = alt_settings['id']
+    assert empty_diff(t5, t6)
+    # Name
+    t5.name = alt_settings['name']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'name': alt_settings['name']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'name': prop_settings['name']}
+    t6.name = alt_settings['name']
+    assert empty_diff(t5, t6)
+    # Description
+    t6.description = alt_settings['description']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'description': prop_settings['description']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'description': alt_settings['description']}
+    t5.description = alt_settings['description']
+    assert empty_diff(t5, t6)
+    # Unit
+    t5.unit = alt_settings['unit']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'unit': alt_settings['unit']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'unit': prop_settings['unit']}
+    t6.unit = alt_settings['unit']
+    assert empty_diff(t5, t6)
+    # Value
+    t6.value = alt_settings['value']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'value': prop_settings['value']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'value': alt_settings['value']}
+    t5.value = alt_settings['value']
+    assert empty_diff(t5, t6)
+    # Datatype
+    t6.datatype = alt_settings['datatype']
+    diff = compare_entities(t5, t6)
+    assert diff[0] == {'properties': {}, 'parents': [], 'datatype': prop_settings['datatype']}
+    assert diff[1] == {'properties': {}, 'parents': [], 'datatype': alt_settings['datatype']}
+    t5.datatype = alt_settings['datatype']
+    assert empty_diff(t5, t6)
+    # All at once
+    diff = compare_entities(db.Property(**prop_settings), db.Property(**alt_settings))
+    assert diff[0] == {'properties': {}, 'parents': [], **prop_settings}
+    assert diff[1] == {'properties': {}, 'parents': [], **alt_settings}
+    # Entity Type
+    diff = compare_entities(db.Property(value=db.Property(id=101)),
+                            db.Property(value=db.Record(id=101)))
+    assert "value" in diff[0]
+    assert "value" in diff[1]
+    diff = compare_entities(db.Property(value=db.Record(id=101)),
+                            db.Property(value=db.Record(id=101)))
+    assert "value" in diff[0]
+    assert "value" in diff[1]
+    assert empty_diff(db.Property(value=db.Record(id=101)),
+                      db.Property(value=db.Record(id=101)),
+                      compare_referenced_records=True)
+
+    ##  Special cases
+    # Files
+    assert not empty_diff(db.File(path='ABC', file=StringIO("ABC")),
+                          db.File(path='ABC', file=StringIO("Other")))
+    # Importance
+    assert empty_diff(db.Property().add_property(prop1),
+                      db.Property().add_property(prop1))
+    assert not empty_diff(db.Property().add_property(prop1, importance=db.SUGGESTED),
+                          db.Property().add_property(prop1, importance=db.OBLIGATORY))
+    # Mixed Lists
+    assert empty_diff(db.Property(value=[1, 2, 'a', r1]),
+                      db.Property(value=[1, 2, 'a', r1]))
+    # entity_name_id_equivalency
+    assert not empty_diff(db.Property(value=[1, db.Record(id=2), 3, db.Record(id=4)]),
+                          db.Property(value=[db.Record(id=1), 2, db.Record(id=3), 4]))
+    assert empty_diff(db.Property(value=[1, db.Record(id=2), 3, db.Record(id=4)]),
+                      db.Property(value=[db.Record(id=1), 2, db.Record(id=3), 4]),
+                      entity_name_id_equivalency=True)
+    assert empty_diff(db.Property(value=1), db.Property(value=db.Record(id=1)),
+                      entity_name_id_equivalency=True)
+    # entity_name_id_equivalency
+    prop4 = db.Property(**prop_settings).add_parent(par1).add_property(prop2)
+    prop4_c = db.Property(**prop_settings).add_parent(par1).add_property(prop2)
+    prop4.value = db.Record(id=12)
+    prop4_c.value = '12'
+    prop4.add_property(db.Property(name="diff", datatype=db.LIST(db.REFERENCE),
+                                     value=[12, db.Record(id=13), par1, "abc%"]))
+    prop4_c.add_property(db.Property(name="diff", datatype=db.LIST(db.REFERENCE),
+                                     value=[db.Record(id=12), "13", par1, "abc%"]))
+    assert not empty_diff(prop4, prop4_c, entity_name_id_equivalency=False)
+    assert empty_diff(prop4, prop4_c, entity_name_id_equivalency=True)
+    # Order invariance
+    t7 = db.Property(**prop_settings).add_parent(par1).add_property(prop1)
+    t8 = db.Property(**alt_settings).add_parent(par3).add_property(prop3)
+    try:
+        diffs_0 = compare_entities(t7, t8), compare_entities(t7, t8, True)
+    except:
+        diffs_0 = None
+    try:
+        diffs_1 = compare_entities(t8, t7)[::-1], compare_entities(t8, t7, True)[::-1]
+    except:
+        diffs_1 = None
+    assert diffs_0 == diffs_1
+    prop_settings = {"datatype": db.REFERENCE, "description": "desc of prop",
+                     "value": db.Record().add_parent(par3), "unit": '°'}
+    t1.add_property(db.Property(name="description", **prop_settings))
+    t2.add_property(db.Property(name="description", **prop_settings))
+    try:
+        diffs_0 = compare_entities(t1, t2), compare_entities(t1, t2, True)
+    except Exception as e:
+        diffs_0 = type(e)
+    try:
+        diffs_1 = compare_entities(t2, t1)[::-1], compare_entities(t2, t1, True)[::-1]
+    except Exception as e:
+        diffs_1 = type(e)
+    assert diffs_0 == diffs_1
+
+
 def test_compare_special_properties():
     # Test for all known special properties:
     INTS = ("size", "id")
-- 
GitLab