diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index dc97f2c338f3cb354171b6ac65d9106d47b93a5e..e2687dcad263a2e7de482689bb48ff5355005428 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -309,9 +309,11 @@ class SyncGraph(): equivalent_se = self.get_equivalent(node) if equivalent_se is not None and equivalent_se is not node: self._merge_into(node, equivalent_se) - assert equivalent_se.identifiable is not None + if equivalent_se.identifiable is None: + raise ValueError("Identifiable is missing") else: - assert node.identifiable.get_representation() not in self._identifiable_look_up + if node.identifiable.get_representation() in self._identifiable_look_up: + raise RuntimeError("Identifiable is already in the look up") self._identifiable_look_up[node.identifiable.get_representation()] = node @staticmethod diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 5dd8baf75115116b506b5cf5694c2c0f4cecb77f..26dbb41123f01c0a1774c5aeb106e6d973381ff2 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -87,6 +87,9 @@ class SyncNode: parents are added if they are not yet in the list properties are added in any case. This may lead to duplication of properties. + We allow this duplication here and remove it when we create a db.Entity (export_entity + function) because if property values are SyncNode objects, they might not be comparable (no + ID, no identifiable) yet. """ if other.identifiable is not None and self.identifiable is not None: @@ -108,7 +111,10 @@ class SyncNode: if self.__getattribute__(attr) is None: self.__setattr__(attr, other.__getattribute__(attr)) else: - assert self.__getattribute__(attr) == other.__getattribute__(attr) + if self.__getattribute__(attr) != other.__getattribute__(attr): + raise ValueError(f"Trying to update {attr} but this would lead to an " + f"override of the value '{self.__getattribute__(attr)}' " + f"by the value '{other.__getattribute__(attr)}'") for p in other.parents: if not parent_in_list(p, self.parents): self.parents.append(p)