diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 56206ac216b582ad8fa21f5de0fca884210bf373..b8d4274b416e9861b9f33234cf290f4917d9577f 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -135,6 +135,8 @@ class SyncGraph(): for el in self.nodes: entities.append(el.export_entity()) node_map[id(el)] = entities[-1] + for oel in el.other: + node_map[id(oel)] = entities[-1] for ent in entities: for p in ent.properties: if isinstance(p.value, list): @@ -415,19 +417,7 @@ class SyncGraph(): """ assert source is not target target.update(source) - if id(source) in self._missing and id(target) not in self._missing: - self._treat_missing(target) - if id(source) in self._existing and id(target) not in self._existing: - self._treat_existing(target) - - if (target.identifiable is None and not self.identity_relies_on_unchecked_entity(target)): - try: - identifiable = self.identifiableAdapter.get_identifiable( - target, self.backward_id_referenced_by[target.uuid]) - self.set_identifiable_of_node(target, identifiable) - except Exception as es: - print(es) - pass + target.other.append(source) # update reference mappings for node in self.forward_references.pop(source.uuid): @@ -461,8 +451,27 @@ class SyncGraph(): self.nodes.remove(source) if source in self.unchecked: self.unchecked.remove(source) + else: + self.unchecked.remove(target) assert id(source) not in self._missing assert id(source) not in self._existing + if target.id is not None: + self._id_look_up[target.id] = target + if target.path is not None: + self._path_look_up[target.path] = target + + if (target.identifiable is None and not self.identity_relies_on_unchecked_entity(target)): + try: + identifiable = self.identifiableAdapter.get_identifiable( + target, self.backward_id_referenced_by[target.uuid]) + self.set_identifiable_of_node(target, identifiable) + except Exception as es: + print(es) + pass + if id(source) in self._missing and id(target) not in self._missing: + self._treat_missing(target) + if id(source) in self._existing and id(target) not in self._existing: + self._treat_existing(target) def _initialize_nodes(self, entities): """ create initial set of SemanticEntities from provided Entity list""" diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 66113cb2a9d4aa26de6477b90c71c6be47b8d7c0..0cc5d02b078a35fc5be58cc61140bbea507f12f1 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -61,6 +61,7 @@ class SyncNode(): self.uuid = uuid() self.identifiable = None self.registered_identifiable = registered_identifiable + self.other = [] def update(self, other: SyncNode) -> None: if other.identifiable is not None and self.identifiable is not None: @@ -92,7 +93,14 @@ class SyncNode(): ent.add_parent(p) for p in self.properties: if ent.get_property(p) is not None: - if ent.get_property(p).value != p.value: + entval = ent.get_property(p).value + pval = p.value + if isinstance(entval, SyncNode) and entval.id is not None: + entval = entval.id + if isinstance(pval, SyncNode) and pval.id is not None: + pval = pval.id + + if entval != pval: raise db.apiutils.EntityMergeConflictError(f"Differing values were set for Property {p.name}:\n" f"{ent.get_property(p).value}\n{p.value}") else: