Skip to content
Snippets Groups Projects
Commit f3fcb67c authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

DOC: SyncNode

parent 81d9436b
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
......@@ -24,7 +24,6 @@ from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional, Union
from uuid import uuid4 as uuid
import linkahead as db
import yaml
......@@ -36,47 +35,59 @@ logger = logging.getLogger(__name__)
class SyncNode():
""" represents the information related to an Entity as it shall be created in LinkAhead
""" represents the information of an Entity as it shall be created in LinkAhead
The following information is taken from db.Entity object during initialization or when the
object is updated using `update(entity)`:
The following information is taken from an db.Entity object during initialization or when the
object is updated using the `update` member function:
- id
- role
- parents
- path
- file
- name
- description
- parents
- properties
Typically, this class is used in the following way:
1. A SyncNode is initialized with a db.Entity object
2. The SyncNode object is possibly updated one or more times with further db.Entity objects
3. A db.Entity object is created (`export_entity`) that contains the combined information of
the previous db.Entity objects.
1. A SyncNode is initialized with a db.Entity object.
2. The SyncNode object is possibly updated one or more times with other SyncNode objects.
3. A db.Entity object is created (`export_entity`) that contains the combined information.
"""
def __init__(self, entity: db.Entity, registered_identifiable: Optional[db.RecordType] =
None) -> None:
# db.Entity properties
self.id = entity.id
self.role = entity.role
self.parents = _ParentList().extend(entity.parents)
self.path = entity.path
self.file = entity.file
self.name = entity.name
self.description = entity.description
self.parents = _ParentList().extend(entity.parents)
self.properties = _Properties().extend(entity.properties)
self.uuid = uuid()
# other members
self.identifiable = None
self.registered_identifiable = registered_identifiable
self.other = []
def update(self, other: SyncNode) -> None:
"""update this node with information of given ``other`` SyncNode.
parents are added if they are not yet in the list
properties are added in any case. This may lead to duplication of properties.
"""
if other.identifiable is not None and self.identifiable is not None:
assert (other.identifiable.get_representation() ==
self.identifiable.get_representation())
if (other.identifiable.get_representation() != self.identifiable.get_representation()):
raise ValueError(
"The SyncNode that is used with update must have an equivalent"
f" identifiable. The identifiables where:\n"
f"{self.identifiable.get_representation()}\n"
f"and\n{other.identifiable.get_representation()}.")
if other.identifiable:
self.identifiable = other.identifiable
for attr in ["id", "path", "file", "role", "path", "name", "description"]:
for attr in ["id", "role", "path", "file", "name", "description"]:
if other.__getattribute__(attr) is not None:
if self.__getattribute__(attr) is None:
self.__setattr__(attr, other.__getattribute__(attr))
......@@ -89,6 +100,12 @@ class SyncNode():
self.properties.append(p)
def export_entity(self) -> db.Entity:
""" create a db.Entity object from this SyncNode
Properties are only added once (based on id or name). If values do not match, an Error is
raised. If values are SyncNode objects with IDs, they are considered equal if their IDs are
equal.
"""
ent = None
if self.role == "Record":
ent = db.Record()
......@@ -96,20 +113,32 @@ class SyncNode():
ent = db.File()
else:
raise RuntimeError("Invalid role")
for attr in ["id", "path", "file", "role", "path", "name", "description"]:
for attr in ["id", "role", "path", "file", "name", "description"]:
ent.__setattr__(attr, self.__getattribute__(attr))
for p in self.parents:
ent.add_parent(p)
for p in self.properties:
if ent.get_property(p) is not None:
if ent.get_property(p) is None:
ent.add_property(id=p.id, name=p.name, value=p.value)
else:
unequal = False
entval = ent.get_property(p).value
pval = p.value
if isinstance(entval, SyncNode) and entval.id is not None:
entval = entval.id
if isinstance(pval, SyncNode) and pval.id is not None:
pval = pval.id
if entval != pval:
if isinstance(entval, list) != isinstance(pval, list):
unequal = True
if not isinstance(entval, list):
entval = [entval]
if not isinstance(pval, list):
pval = [pval]
for e_el, p_el in zip(entval, pval):
if isinstance(e_el, SyncNode) and e_el.id is not None:
e_el = e_el.id
if isinstance(p_el, SyncNode) and p_el.id is not None:
p_el = p_el.id
if e_el != p_el:
unequal = True
if unequal:
logger.error("The Crawler is trying to create an entity,"
" but there are have conflicting property values."
f"Problematic Property: {p.name}\n"
......@@ -121,8 +150,6 @@ class SyncNode():
ime.pname = p.name
ime.values = (ent.get_property(p).value, p.value)
raise ime
else:
ent.add_property(id=p.id, name=p.name, value=p.value)
return ent
def __repr__(self):
......@@ -131,7 +158,7 @@ class SyncNode():
res += f"user: {self._metadata['user']}\n"
res += f"json: {self._metadata['json']}\n"
res += "---------------------------------------------------\n"
res += yaml.dump({"uuid": self.uuid.hex, "id": self.id, "name": self.name,
res += yaml.dump({"id": self.id, "name": self.name,
"parents": [el.name for el in self.parents]}, allow_unicode=True)
res += "---------------------------------------------------\n"
res += "properties:\n"
......@@ -156,6 +183,7 @@ class SyncNode():
def parent_in_list(parent, plist):
"""helper function that checks whether a parent with the same name or ID is in the plist"""
missing = False
if parent.name is not None:
if parent.name not in plist._element_by_name:
......@@ -167,6 +195,7 @@ def parent_in_list(parent, plist):
def property_in_list(prop, plist):
"""helper function that checks whether a property with the same name or ID is in the plist"""
missing = False
if prop.name is not None:
if prop.name not in plist._element_by_name:
......
......@@ -502,149 +502,6 @@ def test_something(simple_adapter):
assert b_prop.id == 101
def test_sync_node():
# initialization
rec = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
sn = SyncNode(rec)
assert "Record" in str(sn)
assert sn.id == rec.id
assert sn.name == rec.name
assert sn.parents == rec.parents
assert sn.properties == rec.properties
assert sn.description == rec.description
assert sn.role == rec.role
fi = db.File(id=101, name='101', path='/a/')
sn = SyncNode(fi)
assert sn.role == fi.role
assert sn.name == fi.name
assert sn.id == fi.id
assert sn.path == fi.path
export = sn.export_entity()
export == rec
# merge no common information
rec_a = (db.Record(name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b = (db.Record(id=101)
.add_parent("B")
.add_parent(id=103)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b.description = "tja"
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
assert sn_a.id == rec_b.id
assert sn_a.name == rec_a.name
for p in rec_a.parents + rec_b.parents:
assert p in sn_a.parents
for p in rec_a.properties + rec_b.properties:
assert p in sn_a.properties
assert sn_a.description == rec_b.description
assert sn_a.role == rec_a.role
export = sn_a.export_entity()
assert export.id == rec_b.id
assert export.name == rec_a.name
for p in rec_a.parents + rec_b.parents:
assert parent_in_list(p, export.parents)
# if p.name is not None:
# assert p.name in [el.name for el in export.parents]
# if p.id is not None:
# assert p.id in [el.id for el in export.parents]
for p in rec_a.properties + rec_b.properties:
if p.name is not None:
assert p.name in [el.name for el in export.properties]
if p.id is not None:
assert p.id in [el.id for el in export.properties]
assert len(export.properties) == 2
assert export.get_property('a').value == 'a'
assert export.get_property(103).value == 'b'
assert export.description == rec_b.description
assert export.role == rec_a.role
# merge with common information
rec_a = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
rec_b = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
assert sn_a.id == rec_b.id
assert sn_a.name == rec_a.name
for p in rec_a.parents + rec_b.parents:
assert parent_in_list(p, sn_a.parents)
for p in rec_a.properties + rec_b.properties:
assert property_in_list(p, sn_a.properties)
assert sn_a.description == rec_b.description
assert sn_a.role == rec_a.role
# merge with conflicting information
sn_a = SyncNode(db.Record(id=102))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(id=101)))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(name='101')))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(name='102')))
sn_a = SyncNode(db.Record(description='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(description='101')))
sn_a = SyncNode(db.File(path='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(path='101')))
sn_a = SyncNode(db.File(path='102'))
sn_a.identifiable = Identifiable(name='a')
sn_b.identifiable = Identifiable(name='b')
sn_b = SyncNode(db.File(path='101'))
with pytest.raises(AssertionError):
sn_a.update(sn_b)
def test_export_node():
rec_a = (db.Record(id=101)
.add_parent("B")
.add_parent(id=103)
.add_property(name="a", value=[SyncNode(db.Record())])
.add_property(name='b', id=103, value='b'))
sn_a = SyncNode(rec_a)
exp = sn_a.export_entity()
assert exp.id == rec_a.id
assert exp.name == rec_a.name
for p in rec_a.parents:
assert len([el for el in exp.parents if p.name == el.name]) == 1
for p in rec_a.properties:
assert p.value == exp.get_property(p.name).value
if isinstance(p.value, list):
assert len(p.value) == len(exp.get_property(p.name).value)
assert len(exp.properties) == len(rec_a.properties)
assert len(exp.parents) == len(rec_a.parents)
# TODO create test that tests the assumptions after initialization:
# - no two (or more) nodes with the same id
......
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
from functools import partial
from unittest.mock import MagicMock, Mock, patch
import linkahead as db
import pytest
from caoscrawler.exceptions import ImpossibleMergeError
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.sync_graph import SyncGraph
from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list
from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
def compare_parents(p1, p2):
for a, b in zip(p1, p2):
assert a.id == b.id
assert a.name == b.name
def compare_properties(p1, p2):
for a, b in zip(p1, p2):
assert a.id == b.id
assert a.name == b.name
assert a.value == b.value
assert a.datatype == b.datatype
def test_sync_node():
# initialization
rec = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent("B")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec.description = "hallo"
sna = SyncNode(rec)
# check information stored in initialized SyncNode
assert "Record" in str(sna)
assert sna.id == rec.id
assert sna.role == rec.role
assert sna.name == rec.name
assert sna.description == rec.description
compare_parents(sna.parents, rec.parents)
compare_properties(sna.properties, rec.properties)
# ... special case File (path and file attributes)
fi = db.File(id=101, name='101', path='/a/')
snb = SyncNode(fi)
assert snb.role == fi.role
assert snb.name == fi.name
assert snb.id == fi.id
assert snb.path == fi.path
assert snb.file == fi.file
# check information in exported db.Entity
export = sna.export_entity()
assert export.id == rec.id
assert export.role == rec.role
assert export.name == rec.name
assert export.description == rec.description
compare_parents(export.parents, rec.parents)
compare_properties(export.properties, rec.properties)
export = snb.export_entity()
assert export.role == fi.role
assert export.name == fi.name
assert export.id == fi.id
assert export.path == fi.path
assert export.file == fi.file
# merge no common information
# ---------------------------
rec_a = (db.Record(name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b = (db.Record(id=101)
.add_parent("B")
.add_parent(id=103)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b.description = "tja"
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
# test information in updated node
assert sn_a.id == rec_b.id
assert sn_a.role == rec_a.role
assert sn_a.name == rec_a.name
assert sn_a.description == rec_b.description
for p in rec_a.parents + rec_b.parents:
assert p in sn_a.parents
for p in rec_a.properties + rec_b.properties:
assert p in sn_a.properties
# test information in exported entity
export = sn_a.export_entity()
assert export.id == rec_b.id
assert export.name == rec_a.name
for p in rec_a.parents + rec_b.parents:
assert parent_in_list(p, export.parents)
# if p.name is not None:
# assert p.name in [el.name for el in export.parents]
# if p.id is not None:
# assert p.id in [el.id for el in export.parents]
for p in rec_a.properties + rec_b.properties:
if p.name is not None:
assert p.name in [el.name for el in export.properties]
if p.id is not None:
assert p.id in [el.id for el in export.properties]
assert len(export.properties) == 2
assert export.get_property('a').value == 'a'
assert export.get_property(103).value == 'b'
assert export.description == rec_b.description
assert export.role == rec_a.role
# merge with common information
# -----------------------------
rec_a = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
rec_b = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
assert sn_a.id == rec_b.id
assert sn_a.name == rec_a.name
for p in rec_a.parents + rec_b.parents:
assert parent_in_list(p, sn_a.parents)
for p in rec_a.properties + rec_b.properties:
assert property_in_list(p, sn_a.properties)
assert sn_a.description == rec_b.description
assert sn_a.role == rec_a.role
# merge with conflicting information
# ----------------------------------
sn_a = SyncNode(db.Record(id=102))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(id=101)))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(name='101')))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(name='102')))
sn_a = SyncNode(db.Record(description='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(description='101')))
sn_a = SyncNode(db.File(path='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(path='101')))
sn_a = SyncNode(db.File(path='102'))
sn_a.identifiable = Identifiable(name='a')
sn_b.identifiable = Identifiable(name='b')
sn_b = SyncNode(db.File(path='101'))
with pytest.raises(AssertionError):
sn_a.update(sn_b)
def test_export_node():
rec_a = (db.Record(id=101)
.add_parent("B")
.add_parent(id=103)
.add_property(name="a", value=[SyncNode(db.Record())])
.add_property(name='b', id=103, value='b'))
sn_a = SyncNode(rec_a)
exp = sn_a.export_entity()
assert exp.id == rec_a.id
assert exp.name == rec_a.name
for p in rec_a.parents:
assert len([el for el in exp.parents if p.name == el.name]) == 1
for p in rec_a.properties:
assert p.value == exp.get_property(p.name).value
if isinstance(p.value, list):
assert len(p.value) == len(exp.get_property(p.name).value)
assert len(exp.properties) == len(rec_a.properties)
assert len(exp.parents) == len(rec_a.parents)
# SyncNodes with same ID are considered equal
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=SyncNode(db.Record(id=1)))
.add_property(name="a", value=SyncNode(db.Record(id=1))))
exp = SyncNode(rec_a).export_entity()
assert exp.get_property('a').value.id == 1
# same SyncNode object is obviously equal
sn = SyncNode(db.Record(id=1))
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=sn)
.add_property(name="a", value=sn))
exp = SyncNode(rec_a).export_entity()
assert exp.get_property('a').value.id == 1
# different SyncNode Objects are not equal
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=SyncNode(db.Record()))
.add_property(name="a", value=SyncNode(db.Record())))
with pytest.raises(ImpossibleMergeError):
exp = SyncNode(rec_a).export_entity()
# different SyncNode Objects with differing are not equal
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=SyncNode(db.Record(id=1)))
.add_property(name="a", value=SyncNode(db.Record(id=2))))
with pytest.raises(ImpossibleMergeError):
exp = SyncNode(rec_a).export_entity()
# SyncNodes with same ID are considered equal (list)
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])
.add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]))
exp = SyncNode(rec_a).export_entity()
assert exp.get_property('a').value[0].id == 1
# same SyncNode object is obviously equal (list)
sn = SyncNode(db.Record(id=1))
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=[sn])
.add_property(name="a", value=[sn]))
exp = SyncNode(rec_a).export_entity()
assert exp.get_property('a').value[0].id == 1
# different SyncNode Objects are not equal (list)
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=[SyncNode(db.Record())])
.add_property(name="a", value=[SyncNode(db.Record())]))
with pytest.raises(ImpossibleMergeError):
exp = SyncNode(rec_a).export_entity()
# different SyncNode Objects with differing are not equal (list)
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=[SyncNode(db.Record(id=1))])
.add_property(name="a", value=[SyncNode(db.Record(id=2))]))
with pytest.raises(ImpossibleMergeError):
exp = SyncNode(rec_a).export_entity()
# list vs no list
rec_a = (db.Record(id=101)
.add_parent("B")
.add_property(name="a", value=SyncNode(db.Record(id=1)))
.add_property(name="a", value=[SyncNode(db.Record(id=1))]))
with pytest.raises(ImpossibleMergeError):
exp = SyncNode(rec_a).export_entity()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment