diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 580c684a503d32c1ae4ffbca51cfcf6c43b6bd5c..10c827390e8672f2b8935c9b7921aebeb43bd6c1 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -504,7 +504,10 @@ class SyncGraph: def _mark_entities_with_path_or_id(self): """A path or an ID is sufficiently identifying. Thus, those entities can be marked as - checked""" + checked + + Last review by Alexander Schlemmer on 2024-05-29. + """ for node in list(self.nodes): if node.id is not None: eq_node = self.get_equivalent(node) @@ -533,16 +536,16 @@ class SyncGraph: def _merge_into(self, source: SyncNode, target: SyncNode): """tries to merge source into target and performs the necessary updates: - - update the membervariables of target using source (``target.update(source)``). + - update the member variables of target using source (``target.update(source)``). - replaces reference values to source by target - updates the reference map - updates lookup tables - removes source from node lists - marks target as missing/existing if source was marked that way - adds an identifiable if now possible (e.g. merging based on ID might allow create an - identifiable when none of the two nodes hand the sufficient properties on its own before) - - check whether dependent nodes can now get and identifiable (the merge might have set the - ID such that dependen nodes can now create an identifiable) + identifiable when none of the two nodes had the sufficient properties on its own before) + - check whether dependent nodes can now get an identifiable (the merge might have set the + ID such that dependent nodes can now create an identifiable) """ # sanity checks if source is target: @@ -569,32 +572,17 @@ class SyncGraph: ) # update reference mappings - for node in self.forward_references.pop(id(source)): - self.forward_references[id(target)].add(node) - self.backward_references[id(node)].remove(source) - self.backward_references[id(node)].add(target) - for node in self.backward_references.pop(id(source)): - self.backward_references[id(target)].add(node) - self.forward_references[id(node)].remove(source) - self.forward_references[id(node)].add(target) - - for node in self.forward_references_id_props.pop(id(source)): - self.forward_references_id_props[id(target)].add(node) - self.backward_references_id_props[id(node)].remove(source) - self.backward_references_id_props[id(node)].add(target) - for node in self.backward_references_id_props.pop(id(source)): - self.backward_references_id_props[id(target)].add(node) - self.forward_references_id_props[id(node)].remove(source) - self.forward_references_id_props[id(node)].add(target) - - for node in self.forward_references_backref.pop(id(source)): - self.forward_references_backref[id(target)].add(node) - self.backward_references_backref[id(node)].remove(source) - self.backward_references_backref[id(node)].add(target) - for node in self.backward_references_backref.pop(id(source)): - self.backward_references_backref[id(target)].add(node) - self.forward_references_backref[id(node)].remove(source) - self.forward_references_backref[id(node)].add(target) + for setA, setB in ( + (self.forward_references, self.backward_references), + (self.backward_references, self.forward_references), + (self.forward_references_id_props, self.backward_references_id_props), + (self.backward_references_id_props, self.forward_references_id_props), + (self.forward_references_backref, self.backward_references_backref), + (self.backward_references_backref, self.forward_references_backref),): + for node in setA.pop(id(source)): + setA[id(target)].add(node) + setB[id(node)].remove(source) + setB[id(node)].add(target) # remove unneeded SyncNode self.nodes.remove(source)