Skip to content
Snippets Groups Projects
Commit 10b02c15 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

wip

parent 7bff47f8
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
......@@ -69,9 +69,9 @@ from .logging import configure_server_side_logging
from .macros import defmacro_constructor, macro_constructor
from .scanner import (create_converter_registry, initialize_converters,
load_definition, scan_directory, scan_structure_elements)
from .sync_graph import SyncGraph
from .stores import GeneralStore
from .structure_elements import StructureElement
from .sync_graph import SyncGraph
logger = logging.getLogger(__name__)
......@@ -368,32 +368,36 @@ class Crawler(object):
# 3. Can it be checked on the remote server?
for se in list(st.unchecked):
if st.identity_relies_on_unchecked_entity(se):
print(st.se.index(se), "relies on unchecked")
print(st.nodes.index(se), "relies on unchecked")
continue
st.make_identifiable(se)
print(st.se.index(se), "is now identifiable")
if st.merge_with_equivalent(se):
print('see above', "was merged")
entity_was_treated = True
if se.identifiable is None:
st.set_identifiable_of_node(se, st.identifiableAdapter.get_identifiable(
se, st.backward_id_referenced_by[se.uuid]))
# entity was merged with another due to the new identifiable
if se not in st.unchecked:
continue
# 2. Does it have to be new since a needed reference is missing?
# (Is it impossible to check this record because an identifiable references a
# missing record?)
if st.identity_relies_on_missing_entity(se):
st.set_missing(se)
# 3. check on the remote server
else:
st.check_remote_server(se)
print("checked", se.id)
if se.id is None:
st.set_missing(se)
print("missing")
else:
st.set_existing(se)
print("exisitng")
# if (equivalent_se.identifiable is None and not
# self.identity_relies_on_unchecked_entity(equivalent_se)):
# try:
# equivalent_se.identifiable = self.identifiableAdapter.get_identifiable(
# equivalent_se, self.backward_id_referenced_by[equivalent_se.uuid])
# if equivalent_se not in self.unchecked:
# self._identifiable_look_up[
# equivalent_se.identifiable.get_representation()
# ] = equivalent_se
# except Exception as es:
# print(es)
# pass
identified_record = (
st.identifiableAdapter.retrieve_identified_record_for_identifiable(
se.identifiable))
remote_id = None
if identified_record is not None:
remote_id = identified_record.id
st.set_id_of_node(se, remote_id)
entity_was_treated = True
# TODO
......@@ -403,7 +407,7 @@ class Crawler(object):
# We postponed the merge for records where it failed previously and try it again now.
# This only might add properties of the postponed records to the already used ones.
if len(st.unchecked) > 0:
circle = st.detect_circular_dependency()
circle = st.unchecked_contains_circular_dependency()
if circle is None:
logger.error("Failed, but found NO circular dependency. The data is as follows:"
# + str(self.compact_entity_list_representation(st.entities,
......
......@@ -233,7 +233,7 @@ startswith: bool, optional
refs.append(val)
return refs
def get_identifiable(self, se: SemanticEntity, identifiable_backrefs):
def get_identifiable(self, se: SyncNode, identifiable_backrefs):
"""
Retrieve the registered identifiable and fill the property values to create an
identifiable.
......
This diff is collapsed.
......@@ -8,7 +8,7 @@ import caosdb as db
import pytest
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
from caoscrawler.sync_graph import SemanticEntity
from caoscrawler.sync_graph import SyncNode
from caosdb.cached import cache_clear
from caosdb.exceptions import EmptyUniqueQueryError
from pytest import raises
......@@ -30,11 +30,11 @@ def test_file_identifiable():
# Without a path there is no identifying information
with raises(ValueError):
ident.get_identifiable(SemanticEntity(db.File(), None), [])
ident.get_identifiable(SyncNode(db.File(), None), [])
fp = "/test/bla/bla.txt"
file_obj = db.File(path=fp)
identifiable = ident.get_identifiable(SemanticEntity(file_obj, None), [])
identifiable = ident.get_identifiable(SyncNode(file_obj, None), [])
# the path is copied to the identifiable
assert fp == identifiable.path
......
......@@ -37,7 +37,7 @@ from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
IdentifiableAdapter,
convert_value)
from caoscrawler.sync_graph import SemanticEntity
from caoscrawler.sync_graph import SyncNode
UNITTESTDIR = Path(__file__).parent
......@@ -123,7 +123,7 @@ def test_load_from_yaml_file():
def test_non_default_name():
ident = CaosDBIdentifiableAdapter()
identifiable = ident.get_identifiable(SemanticEntity(db.Record(name="don't touch it")
identifiable = ident.get_identifiable(SyncNode(db.Record(name="don't touch it")
.add_parent("Person")
.add_property(name="last_name", value='Tom'), db.RecordType()
.add_parent(name="Person")
......@@ -135,9 +135,9 @@ def test_wildcard_ref():
ident = CaosDBIdentifiableAdapter()
rec = (db.Record(name="don't touch it").add_parent("Person")
.add_property(name="last_name", value='Tom'))
dummy = SemanticEntity(db.Record(), None)
dummy = SyncNode(db.Record(), None)
dummy.id = 1
identifiable = ident.get_identifiable(SemanticEntity(rec, db.RecordType()
identifiable = ident.get_identifiable(SyncNode(rec, db.RecordType()
.add_parent(name="Person")
.add_property(name="is_referenced_by", value=["*"])),
......@@ -162,7 +162,7 @@ def test_get_identifiable():
.add_parent(name="Experiment", id=3)
.add_property(name="date", value="2022-02-01")
.add_property(name="result", value="FAIL"))
se = SemanticEntity(rec,
se = SyncNode(rec,
ident.get_registered_identifiable(rec))
id_r0 = ident.get_identifiable(se, [])
r_cur = se.fragments[0]
......@@ -179,7 +179,7 @@ def test_get_identifiable():
.add_parent(name="A", id=3)
.add_property(name="a", value="2022-02-01")
.add_property(name="result", value="FAIL"))
se = SemanticEntity(rec, ident.get_registered_identifiable(rec))
se = SyncNode(rec, ident.get_registered_identifiable(rec))
se.fragments.extend([
db.Record()
.add_parent(name="A", id=3)
......@@ -202,7 +202,7 @@ def test_get_identifiable():
.add_parent(name="A")
.add_property(name="a", value="2")
)
se = SemanticEntity(rec, ident.get_registered_identifiable(rec))
se = SyncNode(rec, ident.get_registered_identifiable(rec))
se.fragments.extend([
db.Record(name='a')
.add_parent(name="A")
......
......@@ -22,8 +22,10 @@ from functools import partial
from unittest.mock import MagicMock, Mock, patch
import linkahead as db
import pytest
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.sync_graph import SemanticEntity, SyncGraph
from caoscrawler.sync_graph import SyncGraph, SyncNode
from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
......@@ -51,8 +53,8 @@ def test_create_flat_list():
def test_create_reference_mapping():
a = db.Record().add_parent("A")
b = db.Record(id=132).add_parent("B").add_property('a', a)
ses = [SemanticEntity(a, db.RecordType().add_property("is_referenced_by", ["B"])),
SemanticEntity(b, db.RecordType().add_property("a"))]
ses = [SyncNode(a, db.RecordType().add_property("is_referenced_by", ["B"])),
SyncNode(b, db.RecordType().add_property("a"))]
(forward_references, backward_references, forward_id_references,
backward_id_references, forward_id_referenced_by,
......@@ -119,9 +121,6 @@ def test_merge_into():
ident_adapter.register_identifiable("RT2", ident_b)
st = SyncGraph([a, b], ident_adapter)
se_a = st.se_lookup[id(a)]
se_b = st.se_lookup[id(b)]
se_c = st.se_lookup[id(c)]
# CHECK REFERENCE MAP:
# c is referenced by a
......@@ -274,7 +273,7 @@ def test_backward_id_referenced_by():
entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
st = SyncGraph(entlist, ident_adapter)
assert st.se[1] in st.backward_id_referenced_by[st.se[0].uuid]
assert st.nodes[1] in st.backward_id_referenced_by[st.nodes[0].uuid]
@patch("caoscrawler.sync_graph.cached_get_entity_by",
......@@ -294,20 +293,20 @@ def test_merging():
db.Record(id=101).add_parent("A"),
db.Record(id=101).add_parent("A")]
st = SyncGraph(entlist, ident_adapter)
assert len(st.se) == 1
assert len(st.nodes) == 1
assert len(st.unchecked) == 0
assert entlist[0] in st.se[0].fragments
assert entlist[1] in st.se[0].fragments
assert 101 == st.nodes[0].id
assert "A" == st.nodes[0].parents[0].name
# merging based on path
entlist = [
db.File(path='101').add_parent("A"),
db.File(path='101').add_parent("A")]
st = SyncGraph(entlist, ident_adapter)
assert len(st.se) == 1
assert len(st.nodes) == 1
assert len(st.unchecked) == 0
assert entlist[0] in st.se[0].fragments
assert entlist[1] in st.se[0].fragments
assert '101' == st.nodes[0].path
assert "A" == st.nodes[0].parents[0].name
# merging based on identifiable
entlist = [
......@@ -315,17 +314,18 @@ def test_merging():
db.File(name='101').add_parent("A").add_property('a', value=1)]
st = SyncGraph(entlist, ident_adapter)
assert len(st.unchecked) == 2
st.make_identifiable(st.se[0])
st.check_remote_server(st.se[0])
st.set_missing(st.se[0])
st.set_identifiable_of_node(st.nodes[0],
Identifiable(recordtype="A", name='101', properties={'a': 1}))
assert len(st.unchecked) == 2
st.set_identifiable_of_node(st.nodes[1],
Identifiable(recordtype="A", name='101', properties={'a': 1}))
assert len(st.unchecked) == 1
st.make_identifiable(st.se[1])
assert st.se[1].id is None
assert st.merge_with_equivalent(st.se[1])
assert len(st.se) == 1
assert len(st.unchecked) == 0
assert entlist[0] in st.se[0].fragments
assert entlist[1] in st.se[0].fragments
assert len(st.nodes) == 1
assert st.nodes[1].id is None
assert '101' == st.nodes[0].name
assert "A" == st.nodes[0].parents[0].name
assert 1 == st.nodes[0].properties[0].value
assert "a" == st.nodes[0].properties[0].name
# Merging a mix. One Record needs the identifiable to be merged. But the identifying
# information is scattered in the other case.
......@@ -336,11 +336,133 @@ def test_merging():
db.Record(name='a').add_parent("A").add_property('a', value=1)]
st = SyncGraph(entlist, ident_adapter)
assert len(st.se) == 2
assert len(st.nodes) == 2
assert len(st.unchecked) == 1
st.make_identifiable(st.se[1])
assert st.merge_with_equivalent(st.se[1])
assert len(st.se) == 1
st.make_identifiable(st.nodes[1])
assert st.merge_with_equivalent(st.nodes[1])
assert len(st.nodes) == 1
assert len(st.unchecked) == 0
for ii in range(4):
assert entlist[ii] in st.se[0].fragments
assert entlist[ii] in st.nodes[0].fragments
def test_sync_node():
# initialization
rec = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
sn = SyncNode(rec)
assert sn.id == rec.id
assert sn.name == rec.name
assert sn.parents == rec.parents
assert sn.properties == rec.properties
assert sn.description == rec.description
assert sn.role == rec.role
fi = db.File(id=101, name='101', path='/a/')
sn = SyncNode(fi)
assert sn.role == fi.role
assert sn.name == fi.name
assert sn.id == fi.id
assert sn.path == fi.path
export = sn.export_entity()
export == rec
# merge no common information
rec_a = (db.Record(name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b = (db.Record(id=101)
.add_parent("B")
.add_parent(id=103)
.add_property(name="a", value='a')
.add_property(id=103, value='b'))
rec_b.description = "tja"
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
assert sn_a.id == rec_b.id
assert sn_a.name == rec_a.name
for p in rec_a.parents+rec_b.parents:
assert p in sn_a.parents
for p in rec_a.properties+rec_b.properties:
assert p in sn_a.properties
assert sn_a.description == rec_b.description
assert sn_a.role == rec_a.role
export = sn_a.export_entity()
assert export.id == rec_b.id
assert export.name == rec_a.name
for p in rec_a.parents+rec_b.parents:
assert p in export.parents
# if p.name is not None:
# assert p.name in [el.name for el in export.parents]
# if p.id is not None:
# assert p.id in [el.id for el in export.parents]
for p in rec_a.properties+rec_b.properties:
if p.name is not None:
assert p.name in [el.name for el in export.properties]
if p.id is not None:
assert p.id in [el.id for el in export.properties]
assert len(export.properties) == 2
assert export.get_property('a').value == 'a'
assert export.get_property(103).value == 'b'
assert export.description == rec_b.description
assert export.role == rec_a.role
# merge with common information
rec_a = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
rec_b = (db.Record(id=101, name='101')
.add_parent("A")
.add_parent(id=102)
.add_property(name="a", value='a'))
sn_a = SyncNode(rec_a)
sn_b = SyncNode(rec_b)
sn_a.update(sn_b)
assert sn_a.id == rec_b.id
assert sn_a.name == rec_a.name
for p in rec_a.parents+rec_b.parents:
assert p in sn_a.parents
for p in rec_a.properties+rec_b.properties:
assert p in sn_a.properties
assert sn_a.description == rec_b.description
assert sn_a.role == rec_a.role
# merge with conflicting information
sn_a = SyncNode(db.Record(id=102))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(id=101)))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(name='101')))
sn_a = SyncNode(db.Record(name='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(name='102')))
sn_a = SyncNode(db.Record(description='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.Record(description='101')))
sn_a = SyncNode(db.File(path='102'))
with pytest.raises(AssertionError):
sn_a.update(SyncNode(db.File(path='101')))
sn_a = SyncNode(db.File(path='102'))
sn_a.identifiable = Identifiable(name='a')
sn_b.identifiable = Identifiable(name='b')
sn_b = SyncNode(db.File(path='101'))
with pytest.raises(AssertionError):
sn_a.update(sn_b)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment