diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 1daf358f1877c9f454d5982d8bfe999aa7edf6ec..4a384cfd0a9c4d9c21b7b279a699c198773808fb 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -24,7 +24,6 @@ from __future__ import annotations import logging from typing import Any, Dict, List, Optional, Union -from uuid import uuid4 as uuid import linkahead as db import yaml @@ -36,47 +35,59 @@ logger = logging.getLogger(__name__) class SyncNode(): - """ represents the information related to an Entity as it shall be created in LinkAhead + """ represents the information of an Entity as it shall be created in LinkAhead - The following information is taken from db.Entity object during initialization or when the - object is updated using `update(entity)`: + The following information is taken from an db.Entity object during initialization or when the + object is updated using the `update` member function: - id - role - - parents - path + - file - name - description + - parents - properties Typically, this class is used in the following way: - 1. A SyncNode is initialized with a db.Entity object - 2. The SyncNode object is possibly updated one or more times with further db.Entity objects - 3. A db.Entity object is created (`export_entity`) that contains the combined information of - the previous db.Entity objects. + 1. A SyncNode is initialized with a db.Entity object. + 2. The SyncNode object is possibly updated one or more times with other SyncNode objects. + 3. A db.Entity object is created (`export_entity`) that contains the combined information. """ def __init__(self, entity: db.Entity, registered_identifiable: Optional[db.RecordType] = None) -> None: + # db.Entity properties self.id = entity.id self.role = entity.role - self.parents = _ParentList().extend(entity.parents) self.path = entity.path self.file = entity.file self.name = entity.name self.description = entity.description + self.parents = _ParentList().extend(entity.parents) self.properties = _Properties().extend(entity.properties) - self.uuid = uuid() + # other members self.identifiable = None self.registered_identifiable = registered_identifiable self.other = [] def update(self, other: SyncNode) -> None: + """update this node with information of given ``other`` SyncNode. + + parents are added if they are not yet in the list + properties are added in any case. This may lead to duplication of properties. + """ + if other.identifiable is not None and self.identifiable is not None: - assert (other.identifiable.get_representation() == - self.identifiable.get_representation()) + if (other.identifiable.get_representation() != self.identifiable.get_representation()): + raise ValueError( + "The SyncNode that is used with update must have an equivalent" + f" identifiable. The identifiables where:\n" + f"{self.identifiable.get_representation()}\n" + f"and\n{other.identifiable.get_representation()}.") + if other.identifiable: self.identifiable = other.identifiable - for attr in ["id", "path", "file", "role", "path", "name", "description"]: + for attr in ["id", "role", "path", "file", "name", "description"]: if other.__getattribute__(attr) is not None: if self.__getattribute__(attr) is None: self.__setattr__(attr, other.__getattribute__(attr)) @@ -89,6 +100,12 @@ class SyncNode(): self.properties.append(p) def export_entity(self) -> db.Entity: + """ create a db.Entity object from this SyncNode + + Properties are only added once (based on id or name). If values do not match, an Error is + raised. If values are SyncNode objects with IDs, they are considered equal if their IDs are + equal. + """ ent = None if self.role == "Record": ent = db.Record() @@ -96,20 +113,32 @@ class SyncNode(): ent = db.File() else: raise RuntimeError("Invalid role") - for attr in ["id", "path", "file", "role", "path", "name", "description"]: + for attr in ["id", "role", "path", "file", "name", "description"]: ent.__setattr__(attr, self.__getattribute__(attr)) for p in self.parents: ent.add_parent(p) for p in self.properties: - if ent.get_property(p) is not None: + if ent.get_property(p) is None: + ent.add_property(id=p.id, name=p.name, value=p.value) + else: + unequal = False entval = ent.get_property(p).value pval = p.value - if isinstance(entval, SyncNode) and entval.id is not None: - entval = entval.id - if isinstance(pval, SyncNode) and pval.id is not None: - pval = pval.id - - if entval != pval: + if isinstance(entval, list) != isinstance(pval, list): + unequal = True + if not isinstance(entval, list): + entval = [entval] + if not isinstance(pval, list): + pval = [pval] + for e_el, p_el in zip(entval, pval): + if isinstance(e_el, SyncNode) and e_el.id is not None: + e_el = e_el.id + if isinstance(p_el, SyncNode) and p_el.id is not None: + p_el = p_el.id + if e_el != p_el: + unequal = True + + if unequal: logger.error("The Crawler is trying to create an entity," " but there are have conflicting property values." f"Problematic Property: {p.name}\n" @@ -121,8 +150,6 @@ class SyncNode(): ime.pname = p.name ime.values = (ent.get_property(p).value, p.value) raise ime - else: - ent.add_property(id=p.id, name=p.name, value=p.value) return ent def __repr__(self): @@ -131,7 +158,7 @@ class SyncNode(): res += f"user: {self._metadata['user']}\n" res += f"json: {self._metadata['json']}\n" res += "---------------------------------------------------\n" - res += yaml.dump({"uuid": self.uuid.hex, "id": self.id, "name": self.name, + res += yaml.dump({"id": self.id, "name": self.name, "parents": [el.name for el in self.parents]}, allow_unicode=True) res += "---------------------------------------------------\n" res += "properties:\n" @@ -156,6 +183,7 @@ class SyncNode(): def parent_in_list(parent, plist): + """helper function that checks whether a parent with the same name or ID is in the plist""" missing = False if parent.name is not None: if parent.name not in plist._element_by_name: @@ -167,6 +195,7 @@ def parent_in_list(parent, plist): def property_in_list(prop, plist): + """helper function that checks whether a property with the same name or ID is in the plist""" missing = False if prop.name is not None: if prop.name not in plist._element_by_name: diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py index b8996d30f27207bb04bad8cba1f10a406295f666..c53e7ad4e60388f4248838624e1aa88bcc70fab9 100644 --- a/unittests/test_sync_graph.py +++ b/unittests/test_sync_graph.py @@ -502,149 +502,6 @@ def test_something(simple_adapter): assert b_prop.id == 101 -def test_sync_node(): - # initialization - rec = (db.Record(id=101, name='101') - .add_parent("A") - .add_parent(id=102) - .add_property(name="a", value='a') - .add_property(id=103, value='b')) - sn = SyncNode(rec) - assert "Record" in str(sn) - assert sn.id == rec.id - assert sn.name == rec.name - assert sn.parents == rec.parents - assert sn.properties == rec.properties - assert sn.description == rec.description - assert sn.role == rec.role - fi = db.File(id=101, name='101', path='/a/') - sn = SyncNode(fi) - assert sn.role == fi.role - assert sn.name == fi.name - assert sn.id == fi.id - assert sn.path == fi.path - - export = sn.export_entity() - export == rec - - # merge no common information - rec_a = (db.Record(name='101') - .add_parent("A") - .add_parent(id=102) - .add_property(name="a", value='a') - .add_property(id=103, value='b')) - - rec_b = (db.Record(id=101) - .add_parent("B") - .add_parent(id=103) - .add_property(name="a", value='a') - .add_property(id=103, value='b')) - rec_b.description = "tja" - - sn_a = SyncNode(rec_a) - sn_b = SyncNode(rec_b) - sn_a.update(sn_b) - assert sn_a.id == rec_b.id - assert sn_a.name == rec_a.name - for p in rec_a.parents + rec_b.parents: - assert p in sn_a.parents - for p in rec_a.properties + rec_b.properties: - assert p in sn_a.properties - assert sn_a.description == rec_b.description - assert sn_a.role == rec_a.role - - export = sn_a.export_entity() - assert export.id == rec_b.id - assert export.name == rec_a.name - for p in rec_a.parents + rec_b.parents: - assert parent_in_list(p, export.parents) - # if p.name is not None: - # assert p.name in [el.name for el in export.parents] - # if p.id is not None: - # assert p.id in [el.id for el in export.parents] - for p in rec_a.properties + rec_b.properties: - if p.name is not None: - assert p.name in [el.name for el in export.properties] - if p.id is not None: - assert p.id in [el.id for el in export.properties] - assert len(export.properties) == 2 - assert export.get_property('a').value == 'a' - assert export.get_property(103).value == 'b' - assert export.description == rec_b.description - assert export.role == rec_a.role - - # merge with common information - rec_a = (db.Record(id=101, name='101') - .add_parent("A") - .add_parent(id=102) - .add_property(name="a", value='a')) - - rec_b = (db.Record(id=101, name='101') - .add_parent("A") - .add_parent(id=102) - .add_property(name="a", value='a')) - - sn_a = SyncNode(rec_a) - sn_b = SyncNode(rec_b) - sn_a.update(sn_b) - assert sn_a.id == rec_b.id - assert sn_a.name == rec_a.name - for p in rec_a.parents + rec_b.parents: - assert parent_in_list(p, sn_a.parents) - for p in rec_a.properties + rec_b.properties: - assert property_in_list(p, sn_a.properties) - assert sn_a.description == rec_b.description - assert sn_a.role == rec_a.role - - # merge with conflicting information - sn_a = SyncNode(db.Record(id=102)) - with pytest.raises(AssertionError): - sn_a.update(SyncNode(db.Record(id=101))) - - sn_a = SyncNode(db.Record(name='102')) - with pytest.raises(AssertionError): - sn_a.update(SyncNode(db.Record(name='101'))) - - sn_a = SyncNode(db.Record(name='102')) - with pytest.raises(AssertionError): - sn_a.update(SyncNode(db.File(name='102'))) - - sn_a = SyncNode(db.Record(description='102')) - with pytest.raises(AssertionError): - sn_a.update(SyncNode(db.Record(description='101'))) - - sn_a = SyncNode(db.File(path='102')) - with pytest.raises(AssertionError): - sn_a.update(SyncNode(db.File(path='101'))) - - sn_a = SyncNode(db.File(path='102')) - sn_a.identifiable = Identifiable(name='a') - sn_b.identifiable = Identifiable(name='b') - sn_b = SyncNode(db.File(path='101')) - with pytest.raises(AssertionError): - sn_a.update(sn_b) - - -def test_export_node(): - rec_a = (db.Record(id=101) - .add_parent("B") - .add_parent(id=103) - .add_property(name="a", value=[SyncNode(db.Record())]) - .add_property(name='b', id=103, value='b')) - - sn_a = SyncNode(rec_a) - exp = sn_a.export_entity() - assert exp.id == rec_a.id - assert exp.name == rec_a.name - for p in rec_a.parents: - assert len([el for el in exp.parents if p.name == el.name]) == 1 - for p in rec_a.properties: - assert p.value == exp.get_property(p.name).value - if isinstance(p.value, list): - assert len(p.value) == len(exp.get_property(p.name).value) - assert len(exp.properties) == len(rec_a.properties) - assert len(exp.parents) == len(rec_a.parents) - # TODO create test that tests the assumptions after initialization: # - no two (or more) nodes with the same id diff --git a/unittests/test_sync_node.py b/unittests/test_sync_node.py new file mode 100644 index 0000000000000000000000000000000000000000..ff9e0f214c9501933b9692b602bf6ade20918bf0 --- /dev/null +++ b/unittests/test_sync_node.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +from functools import partial +from unittest.mock import MagicMock, Mock, patch + +import linkahead as db +import pytest +from caoscrawler.exceptions import ImpossibleMergeError +from caoscrawler.identifiable import Identifiable +from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from caoscrawler.sync_graph import SyncGraph +from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list + +from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by + + +def compare_parents(p1, p2): + for a, b in zip(p1, p2): + assert a.id == b.id + assert a.name == b.name + + +def compare_properties(p1, p2): + for a, b in zip(p1, p2): + assert a.id == b.id + assert a.name == b.name + assert a.value == b.value + assert a.datatype == b.datatype + + +def test_sync_node(): + # initialization + rec = (db.Record(id=101, name='101') + .add_parent("A") + .add_parent("B") + .add_parent(id=102) + .add_property(name="a", value='a') + .add_property(id=103, value='b')) + rec.description = "hallo" + sna = SyncNode(rec) + # check information stored in initialized SyncNode + assert "Record" in str(sna) + assert sna.id == rec.id + assert sna.role == rec.role + assert sna.name == rec.name + assert sna.description == rec.description + compare_parents(sna.parents, rec.parents) + compare_properties(sna.properties, rec.properties) + # ... special case File (path and file attributes) + fi = db.File(id=101, name='101', path='/a/') + snb = SyncNode(fi) + assert snb.role == fi.role + assert snb.name == fi.name + assert snb.id == fi.id + assert snb.path == fi.path + assert snb.file == fi.file + + # check information in exported db.Entity + export = sna.export_entity() + assert export.id == rec.id + assert export.role == rec.role + assert export.name == rec.name + assert export.description == rec.description + compare_parents(export.parents, rec.parents) + compare_properties(export.properties, rec.properties) + export = snb.export_entity() + assert export.role == fi.role + assert export.name == fi.name + assert export.id == fi.id + assert export.path == fi.path + assert export.file == fi.file + + # merge no common information + # --------------------------- + rec_a = (db.Record(name='101') + .add_parent("A") + .add_parent(id=102) + .add_property(name="a", value='a') + .add_property(id=103, value='b')) + + rec_b = (db.Record(id=101) + .add_parent("B") + .add_parent(id=103) + .add_property(name="a", value='a') + .add_property(id=103, value='b')) + rec_b.description = "tja" + + sn_a = SyncNode(rec_a) + sn_b = SyncNode(rec_b) + sn_a.update(sn_b) + # test information in updated node + assert sn_a.id == rec_b.id + assert sn_a.role == rec_a.role + assert sn_a.name == rec_a.name + assert sn_a.description == rec_b.description + for p in rec_a.parents + rec_b.parents: + assert p in sn_a.parents + for p in rec_a.properties + rec_b.properties: + assert p in sn_a.properties + + # test information in exported entity + export = sn_a.export_entity() + assert export.id == rec_b.id + assert export.name == rec_a.name + for p in rec_a.parents + rec_b.parents: + assert parent_in_list(p, export.parents) + # if p.name is not None: + # assert p.name in [el.name for el in export.parents] + # if p.id is not None: + # assert p.id in [el.id for el in export.parents] + for p in rec_a.properties + rec_b.properties: + if p.name is not None: + assert p.name in [el.name for el in export.properties] + if p.id is not None: + assert p.id in [el.id for el in export.properties] + assert len(export.properties) == 2 + assert export.get_property('a').value == 'a' + assert export.get_property(103).value == 'b' + assert export.description == rec_b.description + assert export.role == rec_a.role + + # merge with common information + # ----------------------------- + rec_a = (db.Record(id=101, name='101') + .add_parent("A") + .add_parent(id=102) + .add_property(name="a", value='a')) + + rec_b = (db.Record(id=101, name='101') + .add_parent("A") + .add_parent(id=102) + .add_property(name="a", value='a')) + + sn_a = SyncNode(rec_a) + sn_b = SyncNode(rec_b) + sn_a.update(sn_b) + assert sn_a.id == rec_b.id + assert sn_a.name == rec_a.name + for p in rec_a.parents + rec_b.parents: + assert parent_in_list(p, sn_a.parents) + for p in rec_a.properties + rec_b.properties: + assert property_in_list(p, sn_a.properties) + assert sn_a.description == rec_b.description + assert sn_a.role == rec_a.role + + # merge with conflicting information + # ---------------------------------- + sn_a = SyncNode(db.Record(id=102)) + with pytest.raises(AssertionError): + sn_a.update(SyncNode(db.Record(id=101))) + + sn_a = SyncNode(db.Record(name='102')) + with pytest.raises(AssertionError): + sn_a.update(SyncNode(db.Record(name='101'))) + + sn_a = SyncNode(db.Record(name='102')) + with pytest.raises(AssertionError): + sn_a.update(SyncNode(db.File(name='102'))) + + sn_a = SyncNode(db.Record(description='102')) + with pytest.raises(AssertionError): + sn_a.update(SyncNode(db.Record(description='101'))) + + sn_a = SyncNode(db.File(path='102')) + with pytest.raises(AssertionError): + sn_a.update(SyncNode(db.File(path='101'))) + + sn_a = SyncNode(db.File(path='102')) + sn_a.identifiable = Identifiable(name='a') + sn_b.identifiable = Identifiable(name='b') + sn_b = SyncNode(db.File(path='101')) + with pytest.raises(AssertionError): + sn_a.update(sn_b) + + +def test_export_node(): + rec_a = (db.Record(id=101) + .add_parent("B") + .add_parent(id=103) + .add_property(name="a", value=[SyncNode(db.Record())]) + .add_property(name='b', id=103, value='b')) + + sn_a = SyncNode(rec_a) + exp = sn_a.export_entity() + assert exp.id == rec_a.id + assert exp.name == rec_a.name + for p in rec_a.parents: + assert len([el for el in exp.parents if p.name == el.name]) == 1 + for p in rec_a.properties: + assert p.value == exp.get_property(p.name).value + if isinstance(p.value, list): + assert len(p.value) == len(exp.get_property(p.name).value) + assert len(exp.properties) == len(rec_a.properties) + assert len(exp.parents) == len(rec_a.parents) + + + # SyncNodes with same ID are considered equal + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=SyncNode(db.Record(id=1))) + .add_property(name="a", value=SyncNode(db.Record(id=1)))) + + exp = SyncNode(rec_a).export_entity() + assert exp.get_property('a').value.id == 1 + + # same SyncNode object is obviously equal + sn = SyncNode(db.Record(id=1)) + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=sn) + .add_property(name="a", value=sn)) + + exp = SyncNode(rec_a).export_entity() + assert exp.get_property('a').value.id == 1 + + # different SyncNode Objects are not equal + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=SyncNode(db.Record())) + .add_property(name="a", value=SyncNode(db.Record()))) + + with pytest.raises(ImpossibleMergeError): + exp = SyncNode(rec_a).export_entity() + + # different SyncNode Objects with differing are not equal + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=SyncNode(db.Record(id=1))) + .add_property(name="a", value=SyncNode(db.Record(id=2)))) + + with pytest.raises(ImpossibleMergeError): + exp = SyncNode(rec_a).export_entity() + + # SyncNodes with same ID are considered equal (list) + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]) + .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])) + + exp = SyncNode(rec_a).export_entity() + assert exp.get_property('a').value[0].id == 1 + + # same SyncNode object is obviously equal (list) + sn = SyncNode(db.Record(id=1)) + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=[sn]) + .add_property(name="a", value=[sn])) + + exp = SyncNode(rec_a).export_entity() + assert exp.get_property('a').value[0].id == 1 + + # different SyncNode Objects are not equal (list) + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=[SyncNode(db.Record())]) + .add_property(name="a", value=[SyncNode(db.Record())])) + + with pytest.raises(ImpossibleMergeError): + exp = SyncNode(rec_a).export_entity() + + # different SyncNode Objects with differing are not equal (list) + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=[SyncNode(db.Record(id=1))]) + .add_property(name="a", value=[SyncNode(db.Record(id=2))])) + + with pytest.raises(ImpossibleMergeError): + exp = SyncNode(rec_a).export_entity() + + # list vs no list + rec_a = (db.Record(id=101) + .add_parent("B") + .add_property(name="a", value=SyncNode(db.Record(id=1))) + .add_property(name="a", value=[SyncNode(db.Record(id=1))])) + + with pytest.raises(ImpossibleMergeError): + exp = SyncNode(rec_a).export_entity()