diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 86630a77b817dbfa61f395508da0d4bb1b59b642..48bbf9bd2162a7d8f39bc5dbdc8dad73bc8c62fd 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -118,6 +118,21 @@ class SyncGraph(): Last review by Alexander Schlemmer on 2024-05-24. """ + # General implementation remark: + # There are three cases where an update of one SyncNode can affect other nodes: + # - mark existing (add identifiables) + # - mark missing (add identifiables and add (negative) IDs) + # - merge (add identifiables) + # + # We cannot get an infinite recursion where one update triggers another update and so on + # because updates are conditional: + # Setting an ID removes the node (immediately) from the unchecked list and it is only tried to + # set an ID in _mark_missing if a node is in the uncheck list. Thus, setting the ID once + # prevents future attempts to set the ID of the same node. + # Also, setting an identifiable is only done when needed, i.e. there is no identifiable. + # Note, that when ever one node is changed, we check all dependend nodes (see usage of + # `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot + # miss a necessary update. def __init__(self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter): self.identifiableAdapter = identifiableAdapter # A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs. @@ -548,10 +563,7 @@ class SyncGraph(): # - mark existing # - mark missing # - merge - # For each dependent node, we check whether this allows to create an identifiable - for other_node in self._get_nodes_whose_identity_relies_on(target): - if self._identifiable_is_needed(other_node): - self._set_identifiable_of_node(other_node) + self._add_identifiables_to_dependend_nodes(target) def _identifiable_is_needed(self, node: SyncNode): """ @@ -587,6 +599,12 @@ class SyncGraph(): condition=lambda val: id(val) in se_lookup, value=lambda val: se_lookup[id(val)]) + def _add_identifiables_to_dependend_nodes(self, node): + """ For each dependent node, we check whether this allows to create an identifiable """ + for other_node in self._get_nodes_whose_identity_relies_on(node): + if self._identifiable_is_needed(other_node): + self._set_identifiable_of_node(other_node) + def _mark_missing(self, node: SyncNode): """Mark a sync node as missing and remove it from the dictionary of unchecked nodes. @@ -599,10 +617,7 @@ class SyncGraph(): # - mark existing # - mark missing # - merge - # For each dependent node, we check whether this allows to create an identifiable - for other_node in self._get_nodes_whose_identity_relies_on(node): - if self._identifiable_is_needed(other_node): - self._set_identifiable_of_node(other_node) + self._add_identifiables_to_dependend_nodes(node) # For each dependent node, we set the ID to None (missing) # (None is the default second argument of set_id_of_node.) for other_node in self._get_nodes_whose_identity_relies_on(node): @@ -622,7 +637,4 @@ class SyncGraph(): # - mark existing # - mark missing # - merge - # For each dependent node, we check whether this allows to create an identifiable - for other_node in self._get_nodes_whose_identity_relies_on(node): - if self._identifiable_is_needed(other_node): - self._set_identifiable_of_node(other_node) + self._add_identifiables_to_dependend_nodes(node)