diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 47023c8bc5217629dc96603c64694ac80ac30c26..52e3bce58cbe1728cbd2f46113c26c7f018565fd 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -300,15 +300,15 @@ class IdentifiableAdapter(metaclass=ABCMeta): refs.append(val) return refs - def get_identifiable(self, se: SyncNode, identifiable_backrefs) -> Identifiable: + def get_identifiable(self, se: SyncNode, identifiable_backrefs: set[SyncNode]) -> Identifiable: """ - Retrieve the registered identifiable and fill the property values to create an - identifiable. + Take the registered identifiable of given SyncNode ``se`` and fill the property values to + create an identifiable. Args: - record: the record for which the Identifiable shall be created. - referencing_entities: a dictionary (Type: dict[str, list[db.Entity]]), that - allows to look up entities with a certain RecordType, that reference ``record`` + se: the SyncNode for which the Identifiable shall be created. + identifiable_backrefs: a set (Type: set[SyncNode]), that contains SyncNodes + with a certain RecordType, that reference ``se`` Returns: Identifiable, the identifiable for record. @@ -317,7 +317,6 @@ class IdentifiableAdapter(metaclass=ABCMeta): """ property_name_list_A = [] - property_name_list_B = [] identifiable_props = {} name = None @@ -335,7 +334,8 @@ class IdentifiableAdapter(metaclass=ABCMeta): if prop.name.lower() == "is_referenced_by": for el in identifiable_backrefs: - assert isinstance(el, SyncNode) + if not isinstance(el, SyncNode): + raise ValueError("Elements of `identifiable_backrefs` must be SyncNodes") if len(identifiable_backrefs) == 0: raise MissingReferencingEntityError( f"Could not find referencing entities of type(s): {prop.value}\n" @@ -347,6 +347,10 @@ class IdentifiableAdapter(metaclass=ABCMeta): ) elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0: raise RuntimeError("Referencing entity has no id") + # At this point we know that there is at least one referencing SyncNode + # with an ID. We do not need to set any property value (the reference will be used + # in the backrefs argument below) and can thus continue with the next identifying + # property continue options = [p.value for p in se.properties if p.name == prop.name] @@ -372,11 +376,7 @@ class IdentifiableAdapter(metaclass=ABCMeta): property_name_list_A.append(prop.name) # check for multi properties in the record: - for prop in property_name_list_A: - property_name_list_B.append(prop) - if len(set(property_name_list_B)) != len(property_name_list_B) or len( - set(property_name_list_A) - ) != len(property_name_list_A): + if len(set(property_name_list_A)) != len(property_name_list_A): raise RuntimeError( "Multi properties used in identifiables could cause unpredictable results and " "are not allowed. You might want to consider a Property with a list as value." diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 1108bb174a67723fc3dd62bb0c231e9066deb259..9c021a10f35e95ca56d45151b8d064ec905993ec 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -450,6 +450,11 @@ class SyncGraph: backreferences/"is_referenced_by" ("_references_backref"). I.e. the two latter are subesets of the former reference map. + Arguments: + ---------- + flat: list[SyncNode] + all SyncNodes that span the graph for which the reference map shall be created + Last review by Alexander Schlemmer on 2024-05-29. """ # TODO we need to treat children of RecordTypes somehow. diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py index 0dff53d5a87e60cfc0ef57b4268d627add209257..3daf782686a5844ab8798c3acf012175d5a76456 100644 --- a/src/caoscrawler/sync_node.py +++ b/src/caoscrawler/sync_node.py @@ -114,9 +114,10 @@ class SyncNode: self.__setattr__(attr, other.__getattribute__(attr)) else: if self.__getattribute__(attr) != other.__getattribute__(attr): - raise ValueError(f"Trying to update {attr} but this would lead to an " - f"override of the value '{self.__getattribute__(attr)}' " - f"by the value '{other.__getattribute__(attr)}'") + raise ImpossibleMergeError( + f"Trying to update {attr} but this would lead to an " + f"override of the value '{self.__getattribute__(attr)}' " + f"by the value '{other.__getattribute__(attr)}'") for p in other.parents: if not parent_in_list(p, self.parents): self.parents.append(p)