diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py index edab85d38c29ccad5e3a250d45fc9a2b7c701182..aead37694e86fa8f837585ab37ba44d0c64e61cd 100644 --- a/src/caoscrawler/identifiable.py +++ b/src/caoscrawler/identifiable.py @@ -40,8 +40,7 @@ class Identifiable(): The fingerprint of a Record in CaosDB. This class contains the information that is used by the CaosDB Crawler to identify Records. - On one hand, this can be the ID or a Record or the path of a File. - On the other hand, in order to check whether a Record exits in the CaosDB Server, a query can + In order to check whether a Record exits in the CaosDB Server, a query can be created using the information contained in the Identifiable. Parameters @@ -51,24 +50,22 @@ class Identifiable(): properties: dict, keys are names of Properties; values are Property values Note, that lists are not checked for equality but are interpreted as multiple conditions for a single Property. - path: str, In case of files: The path where the file is stored. backrefs: list, TODO future """ - def __init__(self, record_id: int = None, path: str = None, record_type: str = None, + def __init__(self, record_id: int = None, record_type: str = None, name: str = None, properties: dict = None, backrefs: list[Union[int, str]] = None): - if (record_id is None and path is None and name is None + if (record_id is None and name is None and (backrefs is None or len(backrefs) == 0) and (properties is None or len(properties) == 0)): raise ValueError( - "There is no identifying information. You need to add a path or " + "There is no identifying information. You need to add " "properties or other identifying attributes.") if properties is not None and 'name' in [k.lower() for k in properties.keys()]: raise ValueError("Please use the separete 'name' keyword instead of the properties " "dict for name") self.record_id = record_id - self.path = path self.record_type = record_type self.name = name if name == "": @@ -87,7 +84,7 @@ class Identifiable(): def _value_representation(value) -> str: """returns the string representation of property values to be used in the hash function - The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities + The string is the CaosDB ID of other Entities (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int and str. """ @@ -125,18 +122,13 @@ class Identifiable(): def __eq__(self, other) -> bool: """ - Identifiables are equal if they belong to the same Record. Since ID and path are on their - own enough to identify the Record it is sufficient if those attributes are equal. - 1. both IDs are set (not None) -> equal if IDs are equal - 2. both paths are set (not None) -> equal if paths are equal - 3. equal if attribute representations are equal + Identifiables are equal if they belong to the same Record. + equal if attribute representations are equal """ if not isinstance(other, Identifiable): raise ValueError("Identifiable can only be compared to other Identifiable objects.") elif self.record_id is not None and other.record_id is not None: return self.record_id == other.record_id - elif self.path is not None and other.path is not None: - return self.path == other.path elif self.get_representation() == other.get_representation(): return True else: @@ -145,6 +137,6 @@ class Identifiable(): def __repr__(self): pstring = json.dumps({k: str(v) for k, v in self.properties.items()}) return (f"{self.__class__.__name__} for RT {self.record_type}: id={self.record_id}; " - f"name={self.name}\n\tpath={self.path}\n" + f"name={self.name}\n" f"\tproperties:\n{pstring}\n" f"\tbackrefs:\n{self.backrefs}") diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index d8c3ecf1cd21cb1efb2a6cb38c5222ba284f0087..f8cbfef1c638c73b57e5bc90feac7d369a086efe 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -298,14 +298,13 @@ startswith: bool, optional # fill the values: for prop in se.registered_identifiable.properties: + # TDOO: + # If there are multiproperties in the registered_identifiable, then only the LAST is + # taken into account (later properties overwrite previous one in the dict below). if prop.name == "name": name = se.name continue - # problem: what happens with multi properties? - # case A: in the registered identifiable - # case B: in the identifiable - # treated elsewhere if prop.name.lower() == "is_referenced_by": for el in identifiable_backrefs: assert isinstance(el, SyncNode) @@ -356,7 +355,6 @@ startswith: bool, optional try: return Identifiable( record_id=se.id, - path=se.path, record_type=se.registered_identifiable.parents[0].name, name=name, properties=identifiable_props, @@ -399,6 +397,8 @@ startswith: bool, optional @staticmethod def referencing_entity_has_appropriate_type(parents, register_identifiable): + """ + """ if register_identifiable.get_property("is_referenced_by") is None: return False if register_identifiable.get_property("is_referenced_by").value is None: diff --git a/unittests/test_identifiable.py b/unittests/test_identifiable.py index 32bd729e5c09e61f064cb58750c78ae265f56539..074c3843e351b20d17813a661974fdc59ca0442a 100644 --- a/unittests/test_identifiable.py +++ b/unittests/test_identifiable.py @@ -83,13 +83,5 @@ def test_equality(): record_id=12, properties={"a": 0}) != Identifiable(record_id=13, properties={"a": 0}) assert Identifiable( record_id=12, properties={"a": 0}) == Identifiable(properties={"a": 0}) - assert Identifiable( - path="a", properties={"a": 0}) != Identifiable(path="b", properties={"a": 0}) - assert Identifiable( - path="a", properties={"a": 0}) == Identifiable(path="a", properties={"a": 1}) - assert Identifiable( - path="a", properties={"a": 0}) == Identifiable(properties={"a": 0}) - assert Identifiable(properties={"a": 0}) == Identifiable( - properties={"a": 0}) - assert Identifiable(properties={"a": 0}) != Identifiable( - properties={"a": 1}) + assert Identifiable(properties={"a": 0}) == Identifiable(properties={"a": 0}) + assert Identifiable(properties={"a": 0}) != Identifiable(properties={"a": 1})