diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 7589b556c3c70cf4124aec79cb41db3212a9c364..71b1f7eb8b654245a3794395ebfbf69a67270deb 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -290,12 +290,14 @@ class SyncGraph(): Equivalent means that ID, path or identifiable are the same. If a new information was added to the given SyncNode (e.g. the ID), it might be possible then to identify an equivalent node (i.e. one with the same ID in this example). - There might be more than one equivalent nodes in the graph. However, simply the first that - is found is being returned. (When an equivalent node is found this, the given node is + There might be more than one equivalent node in the graph. However, simply the first that + is found is being returned. (When an equivalent node is found, the given node is typically merged, into the one that was found and after the merge the graph is again - checked for equivalent nodes) + checked for equivalent nodes.) Returns None if no equivalent node is found. + + Last review by Alexander Schlemmer on 2024-05-28. """ if entity.id is not None and entity.id in self._id_look_up: candidate = self._id_look_up[entity.id] @@ -323,9 +325,13 @@ class SyncGraph(): def _set_identifiable_of_node(self, node: SyncNode, identifiable: Optional[Identifiable] = None): """sets the identifiable and checks whether an equivalent node can be found with that new - information. + information. If an equivalent node is found, 'node' is merged into that node. if no identifiable is given, the identifiable is retrieved from the identifiable adapter + + Raises a ValueError if the equivalent node found does not have an identifiable. + Raises a RuntimeError if there is no equivalent node found and + the (unique) string representation of the identifiable of node is already contained in the identifiable_look_up. """ if identifiable is None: self.identifiableAdapter.all_identifying_properties_exist(node)