diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 3bf704c37455bb8a711c1bd45b7a82f67c65fb35..dd6bcfaa65ea70f5aeb7b7fbbfb853c033c7ad88 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -150,7 +150,7 @@ class SyncGraph(): self.backward_id_referenced_by, ) = self._create_reference_mapping(self.nodes) - # self._mark_entities_with_path_or_id() + self._mark_entities_with_path_or_id() def set_id_of_node(self, se: SyncNode, node_id: Optional[str]): """sets the ID attribute of the given SyncNode. If node_id is None, a negative Id will be @@ -407,37 +407,35 @@ class SyncGraph(): def _mark_entities_with_path_or_id(self): """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as checked """ - for semantic_entity in list(self.nodes[::-1]): - assert len(semantic_entity.fragments) == 1 - entity = semantic_entity.fragments[0] - if entity.id is None and entity.path is None: + for node in list(self.nodes[::-1]): + if node.id is None and node.path is None: continue - if entity.path is not None: + if node.path is not None: try: - existing = cached_get_entity_by(path=entity.path) + existing = cached_get_entity_by(path=node.path) except EmptyUniqueQueryError: existing = None if existing is not None: - semantic_entity.identify_with(existing) + node.identify_with(existing) - # at this point, semantic_entity has an ID if it is existing - treated_before = self.get_equivalent(semantic_entity) + # at this point, node has an ID if it is existing + treated_before = self.get_equivalent(node) if treated_before is None: - if semantic_entity.id is None or semantic_entity.id < 0: - self.set_missing(semantic_entity) + if node.id is None or node.id < 0: + self.set_missing(node) else: - self.set_existing(semantic_entity) + self.set_existing(node) else: - self._merge_into(semantic_entity, treated_before) + self._merge_into(node, treated_before) def _remove_non_identifiables(self): """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as checked """ - for semantic_entity in list(self.nodes[::-1]): + for node in list(self.nodes[::-1]): if "nonidentifiable" in [p.name for p in - semantic_entity.registered_identifiable.properties]: + node.registered_identifiable.properties]: - self.unchecked.remove(semantic_entity) + self.unchecked.remove(node) def _add_any(self, entity: SyncNode, lookup): """Add ``entity`` to this SemanticTarget and store in ``lookup`` cache.