From 77577f96042938ee1a0b54acf320c6ceeb744d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 18 Apr 2024 22:29:49 +0200 Subject: [PATCH] wip --- src/caoscrawler/sync_graph.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 3bf704c3..dd6bcfaa 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -150,7 +150,7 @@ class SyncGraph(): self.backward_id_referenced_by, ) = self._create_reference_mapping(self.nodes) - # self._mark_entities_with_path_or_id() + self._mark_entities_with_path_or_id() def set_id_of_node(self, se: SyncNode, node_id: Optional[str]): """sets the ID attribute of the given SyncNode. If node_id is None, a negative Id will be @@ -407,37 +407,35 @@ class SyncGraph(): def _mark_entities_with_path_or_id(self): """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as checked """ - for semantic_entity in list(self.nodes[::-1]): - assert len(semantic_entity.fragments) == 1 - entity = semantic_entity.fragments[0] - if entity.id is None and entity.path is None: + for node in list(self.nodes[::-1]): + if node.id is None and node.path is None: continue - if entity.path is not None: + if node.path is not None: try: - existing = cached_get_entity_by(path=entity.path) + existing = cached_get_entity_by(path=node.path) except EmptyUniqueQueryError: existing = None if existing is not None: - semantic_entity.identify_with(existing) + node.identify_with(existing) - # at this point, semantic_entity has an ID if it is existing - treated_before = self.get_equivalent(semantic_entity) + # at this point, node has an ID if it is existing + treated_before = self.get_equivalent(node) if treated_before is None: - if semantic_entity.id is None or semantic_entity.id < 0: - self.set_missing(semantic_entity) + if node.id is None or node.id < 0: + self.set_missing(node) else: - self.set_existing(semantic_entity) + self.set_existing(node) else: - self._merge_into(semantic_entity, treated_before) + self._merge_into(node, treated_before) def _remove_non_identifiables(self): """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as checked """ - for semantic_entity in list(self.nodes[::-1]): + for node in list(self.nodes[::-1]): if "nonidentifiable" in [p.name for p in - semantic_entity.registered_identifiable.properties]: + node.registered_identifiable.properties]: - self.unchecked.remove(semantic_entity) + self.unchecked.remove(node) def _add_any(self, entity: SyncNode, lookup): """Add ``entity`` to this SemanticTarget and store in ``lookup`` cache. -- GitLab