Skip to content
Snippets Groups Projects
Commit ad092932 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

refactor merge

parent 9e1afc58
No related branches found
No related tags found
2 merge requests!178FIX: #96 Better error output for crawl.py script.,!167Sync Graph
Pipeline #51529 passed with warnings
...@@ -506,13 +506,18 @@ class SyncGraph: ...@@ -506,13 +506,18 @@ class SyncGraph:
"""A path or an ID is sufficiently identifying. Thus, those entities can be marked as """A path or an ID is sufficiently identifying. Thus, those entities can be marked as
checked checked
When this function returns, there is only one node for each ID (i.e. no two nodes with the
same ID). The same is true for paths.
This function also updates _id_look_up and _path_look_up
Last review by Alexander Schlemmer on 2024-05-29. Last review by Alexander Schlemmer on 2024-05-29.
""" """
for node in list(self.nodes): for node in list(self.nodes):
if node.id is not None: if node.id is not None:
eq_node = self.get_equivalent(node) eq_node = self.get_equivalent(node)
if eq_node is not None: if eq_node is not None:
self._merge_into(node, eq_node) self._basic_merge_into(node, eq_node)
else: else:
self._id_look_up[node.id] = node self._id_look_up[node.id] = node
self._mark_existing(node) self._mark_existing(node)
...@@ -521,7 +526,7 @@ class SyncGraph: ...@@ -521,7 +526,7 @@ class SyncGraph:
if node.path is not None: if node.path is not None:
eq_node = self.get_equivalent(node) eq_node = self.get_equivalent(node)
if eq_node is not None: if eq_node is not None:
self._merge_into(node, eq_node) self._basic_merge_into(node, eq_node)
else: else:
self._path_look_up[node.path] = node self._path_look_up[node.path] = node
try: try:
...@@ -531,39 +536,19 @@ class SyncGraph: ...@@ -531,39 +536,19 @@ class SyncGraph:
remote_id = None remote_id = None
if existing is not None: if existing is not None:
remote_id = existing.id remote_id = existing.id
self._path_look_up[node.path] = node
self.set_id_of_node(node, remote_id) self.set_id_of_node(node, remote_id)
def _merge_into(self, source: SyncNode, target: SyncNode): def _basic_merge_into(self, source: SyncNode, target: SyncNode):
"""tries to merge source into target and performs the necessary updates: """tries to merge source into target and updates member variables
- update the member variables of target using source (``target.update(source)``).
- replaces reference values to source by target
- updates the reference map
- updates lookup tables
- removes source from node lists
- marks target as missing/existing if source was marked that way
- adds an identifiable if now possible (e.g. merging based on ID might allow create an
identifiable when none of the two nodes had the sufficient properties on its own before)
- check whether dependent nodes can now get an identifiable (the merge might have set the
ID such that dependent nodes can now create an identifiable)
Last review by Alexander Schlemmer on 2024-05-29. - reference maps are updated
- self.nodes is updated
- self.unchecked is updated
- lookups are being updated
""" """
# sanity checks # sanity checks
if source is target: if source is target:
raise ValueError("source must not be target") raise ValueError("source must not be target")
if target.id is None and source.id is not None:
if self._id_look_up[source.id] != source:
raise ValueError(
"It is assumed that always only one node exists with a certain ID and that "
"node is in the look up"
)
if target.path is None and source.path is not None:
if self._id_look_up[source.path] != source:
raise ValueError(
"It is assumed that always only one node exists with a certain path and that"
" node is in the look up"
)
target.update(source) target.update(source)
...@@ -575,12 +560,13 @@ class SyncGraph: ...@@ -575,12 +560,13 @@ class SyncGraph:
# update reference mappings # update reference mappings
for setA, setB in ( for setA, setB in (
(self.forward_references, self.backward_references), (self.forward_references, self.backward_references),
(self.backward_references, self.forward_references), (self.backward_references, self.forward_references),
(self.forward_references_id_props, self.backward_references_id_props), (self.forward_references_id_props, self.backward_references_id_props),
(self.backward_references_id_props, self.forward_references_id_props), (self.backward_references_id_props, self.forward_references_id_props),
(self.forward_references_backref, self.backward_references_backref), (self.forward_references_backref, self.backward_references_backref),
(self.backward_references_backref, self.forward_references_backref),): (self.backward_references_backref, self.forward_references_backref),
):
for node in setA.pop(id(source)): for node in setA.pop(id(source)):
setA[id(target)].add(node) setA[id(target)].add(node)
setB[id(node)].remove(source) setB[id(node)].remove(source)
...@@ -596,9 +582,24 @@ class SyncGraph: ...@@ -596,9 +582,24 @@ class SyncGraph:
if target.path is not None: if target.path is not None:
self._path_look_up[target.path] = target self._path_look_up[target.path] = target
if target.identifiable is not None: if target.identifiable is not None:
self._identifiable_look_up[target.identifiable.get_representation()] = ( self._identifiable_look_up[target.identifiable.get_representation()] = target
target
) def _merge_into(self, source: SyncNode, target: SyncNode):
"""tries to merge source into target and performs the necessary updates:
- update the member variables of target using source (``target.update(source)``).
- replaces reference values to source by target
- updates the reference map
- updates lookup tables
- removes source from node lists
- marks target as missing/existing if source was marked that way
- adds an identifiable if now possible (e.g. merging based on ID might allow create an
identifiable when none of the two nodes had the sufficient properties on its own before)
- check whether dependent nodes can now get an identifiable (the merge might have set the
ID such that dependent nodes can now create an identifiable)
Last review by Alexander Schlemmer on 2024-05-29.
"""
self._basic_merge_into(source, target)
if (id(source) in self._existing and id(target) in self._missing) or ( if (id(source) in self._existing and id(target) in self._missing) or (
id(target) in self._existing and id(source) in self._missing id(target) in self._existing and id(source) in self._missing
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment