From e42f344ce95c61510fd96be9ab1c75323da2219a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 15 Nov 2022 15:46:24 +0100 Subject: [PATCH] DOC: refine docstrings --- src/caoscrawler/identifiable.py | 7 ++++++- src/caoscrawler/identified_cache.py | 31 +++++++++++++++++------------ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py index dd6491ea..129bb9f3 100644 --- a/src/caoscrawler/identifiable.py +++ b/src/caoscrawler/identifiable.py @@ -65,7 +65,12 @@ class Identifiable(): @staticmethod def _value_representation(value) -> str: - """returns the string representation of property values to be used in the hash function """ + """returns the string representation of property values to be used in the hash function + + The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities + (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int + and str. + """ if value is None: return "None" diff --git a/src/caoscrawler/identified_cache.py b/src/caoscrawler/identified_cache.py index 878ae443..9df66e92 100644 --- a/src/caoscrawler/identified_cache.py +++ b/src/caoscrawler/identified_cache.py @@ -25,18 +25,7 @@ """ -This module is a cache for Records where we checked the existence in a remote server using -identifiables. If the Record was found, this means that we identified the corresponding Record -in the remote server and the ID of the local object can be set. -To prevent querying the server again and again for the same objects, this cache allows storing -Records that were found on a remote server and those that were not (typically in separate caches). -The look up in the cache is done using a hash of a string representation. - -TODO: We need a general review: -- How are entities identified with each other? -- What happens if the identification fails? - -Checkout how this was done in the old crawler. +see class docstring """ from .identifiable import Identifiable @@ -44,7 +33,23 @@ import caosdb as db class IdentifiedCache(object): - def __init__(self): + """ + This class is like a dictionary where the keys are Identifiables. When you check whether an + Identifiable exists as key this class returns True not only if that exact Python object is + used as a key, but if an Identifiable is used as key that is **equal** to the one being + considered (see __eq__ function of Identifiable). Similarly, if you do `cache[identifiable]` + you get the Record where the key is an Identifiable that is equal to the one in the rectangular + brackets. + + This class is used for Records where we checked the existence in a remote server using + identifiables. If the Record was found, this means that we identified the corresponding Record + in the remote server and the ID of the local object can be set. + To prevent querying the server again and again for the same objects, this cache allows storing + Records that were found on a remote server and those that were not (typically in separate + caches). + """ + + def __init__(self): self._cache = {} self._identifiables = [] -- GitLab