Skip to content
Snippets Groups Projects
Commit 97614aa2 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

wip

parent eff1b304
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
Pipeline #50341 passed with warnings
......@@ -299,46 +299,46 @@ class Crawler(object):
return data
def split_into_inserts_and_updates(self, st: SyncGraph):
""" iteratively identifies nodes in the SyncGraph st and checks whether those exist on the
remote server such that in the end two list are being created that list entities that need
to be update or inserted"""
entity_was_treated = True
# st.entities contains Entities which could not yet be checked against the remote server
# st.unchecked contains Entities which could not yet be checked against the remote server
while entity_was_treated and len(st.unchecked) > 0:
entity_was_treated = False
# For each element we try to find out whether we can find it in the server or whether
# it does not yet exist. Since a Record may reference other unkown Records it might not
# be possible to answer this right away.
# The following checks are done on each Record:
# 1. Is it in the cache of already checked Records?
# 2. Does it have to be new since a needed reference is missing?
# 3. Can it be checked on the remote server?
for se in list(st.unchecked):
if se not in st.unchecked:
if se not in st.unchecked: # the node might have been checke in the mean time
continue
if st.identity_relies_on_unchecked_entity(se):
if st.identity_relies_on_unchecked_entity(se): # no remote server check possible
continue
if se.identifiable is None:
if se.identifiable is None: # generate identifiable if it is missing
self.identifiableAdapter.check_identifying_props(se)
st.set_identifiable_of_node(se, st.identifiableAdapter.get_identifiable(
se, st.backward_id_referenced_by[se.uuid]))
# entity was merged with another due to the new identifiable
entity_was_treated = True
# if the node was merged with another due to the new identifiable, we skip
if se not in st.unchecked:
continue
# check remote server
identified_record = (
st.identifiableAdapter.retrieve_identified_record_for_identifiable(
se.identifiable))
remote_id = None
if identified_record is not None:
remote_id = identified_record.id
# set id of node. if node is missing, remote_id is None and the SyncGraph marks it
# as missing
st.set_id_of_node(se, remote_id)
entity_was_treated = True
# TODO
# for record in st.entities:
# self.replace_references_with_cached(record, referencing_entities)
# We postponed the merge for records where it failed previously and try it again now.
# This only might add properties of the postponed records to the already used ones.
if len(st.unchecked) > 0:
circle = st.unchecked_contains_circular_dependency()
......
......@@ -29,6 +29,7 @@ from typing import Union
import linkahead as db
from .exceptions import MissingIdentifyingProperty
from .sync_node import SyncNode
logger = logging.getLogger(__name__)
......@@ -60,7 +61,8 @@ class Identifiable():
if (record_id is None and path is None and name is None
and (backrefs is None or len(backrefs) == 0)
and (properties is None or len(properties) == 0)):
raise ValueError("There is no identifying information. You need to add a path or "
raise ValueError(
"There is no identifying information. You need to add a path or "
"properties or other identifying attributes.")
if properties is not None and 'name' in [k.lower() for k in properties.keys()]:
raise ValueError("Please use the separete 'name' keyword instead of the properties "
......
......@@ -148,6 +148,24 @@ identifiabel, identifiable and identified record) for a Record.
query_string = query_string[:-len(" AND ")]
return query_string
def check_identifying_props(self, node):
if node.registered_identifiable is None:
raise RuntimeError("no registered_identifiable")
for prop in node.registered_identifiable.properties:
if prop.name.lower() == "is_referenced_by":
continue
if prop.name.lower() == "name":
if node.name is None:
i = MissingIdentifyingProperty(f"The node has no name.")
i.prop = "name"
raise i
else:
continue
if (len([el for el in node.properties if el.name.lower() == prop.name.lower()]) == 0):
i = MissingIdentifyingProperty(f"The property {prop.name} is missing.")
i.prop = prop.name
@staticmethod
def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
"""Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
......
......@@ -442,6 +442,7 @@ class SyncGraph():
if target.path is not None:
self._path_look_up[target.path] = target
# due to the merge it might now be possible to create an identifiable
if (target.identifiable is None and not self.identity_relies_on_unchecked_entity(target)):
try:
identifiable = self.identifiableAdapter.get_identifiable(
......
......@@ -412,25 +412,6 @@ def test_set_id_of_node(simple_adapter):
st = SyncGraph(ent_list, simple_adapter)
with pytest.raises(ImpossibleMergeError):
st.export_record_lists()
ent_list = [
db.Record(id=101).add_parent("RT3")
.add_property('b', value=db.Record().add_parent("RT5")),
db.Record().add_parent("RT3")
.add_property('b', value=db.Record().add_parent("RT5")),
]
st = SyncGraph(ent_list, simple_adapter)
assert st.nodes[2].is_unidentifiable()
assert st.nodes[3].is_unidentifiable()
assert len(st.nodes) == 4
assert len(st.unchecked) == 1
st.set_id_of_node(st.nodes[1], 101)
assert len(st.nodes) == 3
assert len(st.unchecked) == 0
# until implementation of it ...
with pytest.raises(NotImplementedError):
# with pytest.raises(ImpossibleMergeError):
st.export_record_lists()
@patch("caoscrawler.sync_graph.cached_get_entity_by",
......@@ -657,26 +638,3 @@ def test_export_node():
assert len(p.value) == len(exp.get_property(p.name).value)
assert len(exp.properties) == len(rec_a.properties)
assert len(exp.parents) == len(rec_a.parents)
def test_remove_merged(simple_adapter):
# We reference an entity that is merged into another node and then remove the merged node
# This should result in the reference being removed
b = db.Record().add_parent("RT3").add_property('a', value=1)
ent_list = [
db.Record().add_parent("RT3").add_property('a', value=1),
b,
db.Record().add_parent("RT3").add_property('a', value=3).add_property('RT3', value=b),
]
st = SyncGraph(ent_list, simple_adapter)
se_a = st.nodes[0]
se_c = st.nodes[1]
for node in st.nodes:
print(node)
assert len(st.nodes) == 2
assert len(st.unchecked) == 2
st.remove_failed(se_a)
assert len(st.nodes) == 1
assert len(st.unchecked) == 1
assert "RT3" not in [p.name for p in se_c.properties]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment