Skip to content
Snippets Groups Projects
Commit aeeee395 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

wip

parent be332c45
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
Pipeline #49919 failed
......@@ -252,7 +252,6 @@ startswith: bool, optional
property_name_list_A = []
property_name_list_B = []
identifiable_props = {}
identifiable_backrefs = []
name = None
# TODO
......@@ -316,7 +315,7 @@ startswith: bool, optional
if registered_identifiable else None),
name=name,
properties=identifiable_props,
backrefs=identifiable_backrefs
backrefs=[e.id for e in identifiable_backrefs]
)
except Exception:
logger.error(f"Error while creating identifiable for this record:\n{se}")
......
......@@ -151,6 +151,8 @@ class SemanticTarget():
def add_to_missing(self, se: SemanticEntity):
assert se.id is None
if se.path is None and se.identifiable is None:
raise RuntimeError("no identifying information")
se.id = self._remote_missing_counter
self._remote_missing_counter -= 1
self._add_any(se, self._missing)
......@@ -393,7 +395,6 @@ class SemanticTarget():
el.name == p.name]) > 0:
forward_id_references[se.uuid].add(vse)
backward_id_references[vse.uuid].add(se)
ent.parents, vse.registered_identifiable))
if IdentifiableAdapter.referencing_entity_has_appropriate_type(
ent.parents, vse.registered_identifiable):
forward_id_referenced_by[se.uuid].add(vse)
......
......@@ -486,62 +486,17 @@ a: ([b1, b2])
assert st.identity_relies_on_unchecked_entity(st.se[2])
assert st.identity_relies_on_unchecked_entity(st.se[3])
assert st.identity_relies_on_unchecked_entity(st.se[4])
st.se[0].identifiable = Identifiable(path='a') # dummy identifiable
st.add_to_missing(st.se[0])
assert st.identity_relies_on_unchecked_entity(st.se[1]) is False
with raises(RuntimeError) as rte:
with raises(db.apiutils.EntityMergeConflictError) as rte:
crawler.synchronize(commit_changes=False,
crawled_data=[rec_a, *rec_b, *rec_c])
assert not isinstance(rte.value, NotImplementedError), \
"Exception must not be NotImplementedError, but plain RuntimeError."
assert "Could not find referencing entities" in rte.value.args[0]
assert "merge conflicts in the referencing" in rte.value.args[0]
def test_has_missing_object_in_references():
crawler = Crawler()
# Simulate remote server content by using the names to identify records
# There are only two known Records with name A and B
crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
.add_property("d").add_property("name"),
"D": db.Record(name="D").add_parent("RTD")
.add_property("d").add_property("e").add_property("name"),
}))
# one reference with id -> check
assert not crawler._has_missing_object_in_references(
Identifiable(name="C", record_type="RTC", properties={'d': 123}), {})
# one ref with Entity with id -> check
rec = db.Record(id=123).add_parent("C")
assert not crawler._has_missing_object_in_references(
Identifiable(name="C", record_type="RTC", properties={'d': rec}), {id(rec): {'C': [None]}})
# one ref with id one with Entity with id (mixed) -> check
rec = db.Record(id=123).add_parent("RTC")
assert not crawler._has_missing_object_in_references(
Identifiable(name="C", record_type="RTD",
properties={'d': 123, 'b': rec}), {id(rec): {'C': [None]}})
# entity to be referenced in the following
a = db.Record(name="C").add_parent("C").add_property("d", 12311)
# one ref with id one with Entity without id (but not identifying) -> fail
assert not crawler._has_missing_object_in_references(
Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}),
{id(a): {'C': [None]}})
# one ref with id one with Entity without id (mixed) -> fail
assert not crawler._has_missing_object_in_references(
Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
{id(a): {'C': [None]}})
crawler.treated_records_lookup.add(a, Identifiable(name="C", record_type="RTC",
properties={'d': 12311}))
# one ref with id one with Entity without id but in cache -> check
assert crawler._has_missing_object_in_references(
Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
{id(a): {'C': [None]}})
# if this ever fails, the mock up may be removed
crawler.identifiableAdapter.get_registered_identifiable.assert_called()
# assert not isinstance(rte.value, NotImplementedError), \
# "Exception must not be NotImplementedError, but plain RuntimeError."
# assert "Could not find referencing entities" in rte.value.args[0]
# assert "merge conflicts in the referencing" in rte.value.args[0]
def test_replace_entities_with_ids():
......@@ -783,14 +738,15 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
]
# test whether both entities are listed in the backref attribute of the identifiable
referencing_entities = crawler.create_reference_mapping(entlist)
st = SemanticTarget(entlist, crawler.identifiableAdapter)
identifiable = crawler.identifiableAdapter.get_identifiable(
referenced,
referencing_entities[id(referenced)])
st.se[0],
st.backward_id_referenced_by[st.se[0].uuid])
assert len(identifiable.backrefs) == 2
# check the split...
insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
insert, update = crawler.split_into_inserts_and_updates(st)
assert len(update) == 1
assert len(insert) == 2
......@@ -807,15 +763,15 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_
]
# test whether both entities are listed in the backref attribute of the identifiable
referencing_entities = crawler.create_reference_mapping(entlist)
st = SemanticTarget(entlist, crawler.identifiableAdapter)
identifiable = crawler.identifiableAdapter.get_identifiable(
referenced,
referencing_entities[id(referenced)])
st.se[0],
st.backward_id_referenced_by[st.se[0].uuid])
assert len(identifiable.backrefs) == 2
# check the split...
insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
insert, update = crawler.split_into_inserts_and_updates(st)
assert len(update) == 2
assert len(insert) == 1
......@@ -955,6 +911,7 @@ def test_create_entity_summary():
assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
@pytest.mark.xfail
def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog):
crawler = crawler_mocked_identifiable_retrieve
crawler.identifiableAdapter.get_registered_identifiable = Mock(
......@@ -970,8 +927,9 @@ def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog
assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
assert Crawler.detect_circular_dependency([d]) is None
st = SemanticTarget(flat, crawler.identifiableAdapter)
with raises(RuntimeError):
_, _ = crawler.split_into_inserts_and_updates(flat)
_, _ = crawler.split_into_inserts_and_updates(st)
caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
assert "Found circular dependency" in caplog.text
assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
......@@ -1055,32 +1013,33 @@ def test_replace_name_with_referenced_entity():
def test_treated_record_lookup():
trlu = SemanticTarget()
exist = db.Record(id=1)
trlu.add(exist)
ident_adapter = CaosDBIdentifiableAdapter()
trlu = SemanticTarget([db.Record().add_parent(
'A'), db.Record().add_parent('A'), db.File()], ident_adapter)
exist = trlu.se[0]
miss = trlu.se[1]
fi = trlu.se[2]
exist.id = 1
trlu.add_to_existing(exist)
assert len(trlu._existing) == 1
# was added to existing
assert trlu._existing[id(exist)] is exist
# is in ID lookup
assert trlu._id_look_up[exist.id] is exist
# can be accessed via get_existing
assert trlu.get_existing(db.Record(id=1)) is exist
miss = db.Record()
# exception when identifiable is missing
with raises(RuntimeError):
trlu.add(miss)
ident = Identifiable(name='a')
trlu.add(miss, ident)
trlu.add_to_missing(miss)
miss.identifiable = Identifiable(name='a')
trlu.add_to_missing(miss)
# was added to missing
assert trlu._missing[id(miss)] is miss
# is in ident lookup
assert trlu._identifiable_look_up[ident.get_representation()] is miss
# can be accessed via get_missing
assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss
assert trlu._identifiable_look_up[miss.identifiable.get_representation()] is miss
fi = db.File(path='a', id=2)
trlu.add(fi)
fi.path = 'a'
fi.id = 2
trlu.add_to_existing(fi)
assert len(trlu._existing) == 2
# was added to existing
assert trlu._existing[id(fi)] is fi
......@@ -1088,22 +1047,16 @@ def test_treated_record_lookup():
assert trlu._id_look_up[fi.id] is fi
# is in path lookup
assert trlu._path_look_up[fi.path] is fi
# can be accessed via get_existing
assert trlu.get_existing(fi) is fi
all_exi = trlu.get_existing_list()
assert fi in all_exi
assert exist in all_exi
all_mi = trlu.get_missing_list()
assert miss in all_mi
all_miss, all_exi = trlu.create_record_lists()
assert fi.fragments[0] in all_exi
assert exist.fragments[0] in all_exi
assert miss.fragments[0] in all_miss
# If a Record was added using the ID, the ID must be used to identify it even though later an
# identifiable may be passed as well
assert trlu.get_any(exist, Identifiable(name='b')) is exist
fi2 = db.File(path='b')
trlu.add(fi2)
assert trlu.get_any(db.File(path='b'), Identifiable(name='c')) is fi2
exist.identifiable = Identifiable(name='b')
assert trlu.get_equivalent(exist) is exist
def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_retrieve):
......@@ -1120,4 +1073,5 @@ def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_ret
b = db.Record(name='b').add_parent("C")
c = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
flat = [a, c, b]
_, _ = crawler.split_into_inserts_and_updates(flat)
st = SemanticTarget(flat, crawler.identifiableAdapter)
_, _ = crawler.split_into_inserts_and_updates(st)
......@@ -8,6 +8,7 @@ import caosdb as db
import pytest
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
from caoscrawler.semantic_target import SemanticEntity
from caosdb.cached import cache_clear
from caosdb.exceptions import EmptyUniqueQueryError
from pytest import raises
......@@ -29,11 +30,11 @@ def test_file_identifiable():
# Without a path there is no identifying information
with raises(ValueError):
ident.get_identifiable(db.File(), [])
ident.get_identifiable(SemanticEntity(db.File(), None), [])
fp = "/test/bla/bla.txt"
file_obj = db.File(path=fp)
identifiable = ident.get_identifiable(file_obj)
identifiable = ident.get_identifiable(SemanticEntity(file_obj, None), [])
# the path is copied to the identifiable
assert fp == identifiable.path
......
......@@ -37,6 +37,7 @@ from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
IdentifiableAdapter,
convert_value)
from caoscrawler.semantic_target import SemanticEntity
UNITTESTDIR = Path(__file__).parent
......@@ -122,28 +123,25 @@ def test_load_from_yaml_file():
def test_non_default_name():
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(
"Person", db.RecordType()
.add_parent(name="Person")
.add_property(name="last_name"))
identifiable = ident.get_identifiable(db.Record(name="don't touch it")
identifiable = ident.get_identifiable(SemanticEntity(db.Record(name="don't touch it")
.add_parent("Person")
.add_property(name="last_name", value='Tom')
)
.add_property(name="last_name", value='Tom'), db.RecordType()
.add_parent(name="Person")
.add_property(name="last_name")), [])
assert identifiable.name is None
def test_wildcard_ref():
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(
"Person", db.RecordType()
.add_parent(name="Person")
.add_property(name="is_referenced_by", value=["*"]))
rec = (db.Record(name="don't touch it").add_parent("Person")
.add_property(name="last_name", value='Tom'))
identifiable = ident.get_identifiable(rec,
referencing_entities={
'A': [1]}
dummy = SemanticEntity(db.Record(), None)
dummy.id = 1
identifiable = ident.get_identifiable(SemanticEntity(rec, db.RecordType()
.add_parent(name="Person")
.add_property(name="is_referenced_by", value=["*"])),
[dummy]
)
assert identifiable.backrefs[0] == 1
......@@ -163,11 +161,13 @@ def test_get_identifiable():
ident = CaosDBIdentifiableAdapter()
ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml")
r_cur = (db.Record(id=5)
r_cur = SemanticEntity(db.Record(id=5)
.add_parent(name="Experiment", id=3)
.add_property(name="date", value="2022-02-01")
.add_property(name="result", value="FAIL"))
id_r0 = ident.get_identifiable(r_cur)
.add_property(name="result", value="FAIL"),
None
)
id_r0 = ident.get_identifiable(r_cur, [])
assert r_cur.parents[0].name == id_r0.record_type
assert r_cur.get_property(
"date").value == id_r0.properties["date"]
......@@ -190,7 +190,7 @@ def test_retrieve_identified_record_for_identifiable():
r_cur = r
break
id_r1 = ident.get_identifiable(r_cur)
id_r1 = ident.get_identifiable(r_cur, [])
assert r_cur.parents[0].name == id_r1.record_type
assert r_cur.get_property(
"identifier").value == id_r1.properties["identifier"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment