diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 418baa543235542d81c119acc331ea718bf65b2a..86630a77b817dbfa61f395508da0d4bb1b59b642 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -122,16 +122,17 @@ class SyncGraph(): self.identifiableAdapter = identifiableAdapter # A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs. # This dictionary is initially set using _mark_entities_with_path_or_id and later updated - # using set_id_of_node. + # using set_id_of_node or during merges of nodes. self._id_look_up: dict[Union[int, TempID, str], SyncNode] = {} - # Similar as above for looking up nodes using paths: + # Similar as above for looking up nodes using paths self._path_look_up: dict[str, SyncNode] = {} - # Similar as above for looking up nodes using identifiables. This dictionary uses - # the get_representation method of Identifiable as keys. + # Similar as above for looking up nodes using identifiables. This dictionary uses the text + # representation generated by get_representation method of Identifiable as keys. self._identifiable_look_up: dict[str, SyncNode] = {} + # look up for the nodes that were marked as being missing (on the remote server) self._missing: dict[int, SyncNode] = {} + # same for existing self._existing: dict[int, SyncNode] = {} - self._nonidentifiable: dict[int, SyncNode] = {} # entities that are missing get negative IDs to allow identifiable creation self._remote_missing_counter = -1 @@ -327,7 +328,11 @@ class SyncGraph(): """ for ent in entities: if ent.role == "Record" and len(ent.parents) == 0: - raise RuntimeError(f"Records must have a parent.\n{ent}") + raise ValueError(f"Records must have a parent.\n{ent}") + if isinstance(ent.id, int) and ent.id < 0: + raise ValueError(f"Records must not have negative integers as IDs.\n{ent}") + if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id): + raise ValueError(f"Records must not have negative integers as IDs.\n{ent}") def _get_nodes_whose_identity_relies_on(self, node: SyncNode): """returns a set of nodes that reference the given node as identifying property or are @@ -452,17 +457,6 @@ class SyncGraph(): self._path_look_up[node.path] = node self.set_id_of_node(node, remote_id) - def _remove_non_identifiables(self): - """ A path or an ID is sufficiently identifying. Thus, those entities can be marked as - checked - - Last review by Alexander Schlemmer on 2024-05-24. - """ - for node in list(self.nodes[::-1]): - if "nonidentifiable" in [p.name for p in node.registered_identifiable.properties]: - self.unchecked.remove(node) - self._nonidentifiable[id(node)] = node - def _merge_into(self, source: SyncNode, target: SyncNode): """ tries to merge source into target and performs the necessary updates: - update the membervariables of target using source (``target.update(source)``). @@ -613,7 +607,6 @@ class SyncGraph(): # (None is the default second argument of set_id_of_node.) for other_node in self._get_nodes_whose_identity_relies_on(node): if other_node in self.unchecked: - print(f"set\n{other_node}\n to missing due to missing\n{node}") self.set_id_of_node(other_node) def _mark_existing(self, node: SyncNode):