From e42f344ce95c61510fd96be9ab1c75323da2219a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Tue, 15 Nov 2022 15:46:24 +0100
Subject: [PATCH] DOC: refine docstrings

---
 src/caoscrawler/identifiable.py     |  7 ++++++-
 src/caoscrawler/identified_cache.py | 31 +++++++++++++++++------------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index dd6491ea..129bb9f3 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -65,7 +65,12 @@ class Identifiable():
 
     @staticmethod
     def _value_representation(value) -> str:
-        """returns the string representation of property values to be used in the hash function """
+        """returns the string representation of property values to be used in the hash function
+
+        The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities
+        (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int
+        and str.
+        """
 
         if value is None:
             return "None"
diff --git a/src/caoscrawler/identified_cache.py b/src/caoscrawler/identified_cache.py
index 878ae443..9df66e92 100644
--- a/src/caoscrawler/identified_cache.py
+++ b/src/caoscrawler/identified_cache.py
@@ -25,18 +25,7 @@
 
 
 """
-This module is a cache for Records where we checked the existence in a remote server using
-identifiables. If the Record was found, this means that we identified the corresponding Record
-in the remote server and the ID of the local object can be set.
-To prevent querying the server again and again for the same objects, this cache allows storing
-Records that were found on a remote server and those that were not (typically in separate caches).
-The look up in the cache is done using a hash of a string representation.
-
-TODO: We need a general review:
-- How are entities identified with each other?
-- What happens if the identification fails?
-
-Checkout how this was done in the old crawler.
+see class docstring
 """
 
 from .identifiable import Identifiable
@@ -44,7 +33,23 @@ import caosdb as db
 
 
 class IdentifiedCache(object):
-    def __init__(self):
+    """
+    This class is like a dictionary where the keys are Identifiables. When you check whether an
+    Identifiable exists as key this class returns True not only if that exact Python object is
+    used as a key, but if an Identifiable is used as key that is **equal** to the one being
+    considered (see __eq__ function of Identifiable). Similarly, if you do `cache[identifiable]`
+    you get the Record where the key is an Identifiable that is equal to the one in the rectangular
+    brackets.
+
+    This class is used for Records where we checked the existence in a remote server using
+    identifiables. If the Record was found, this means that we identified the corresponding Record
+    in the remote server and the ID of the local object can be set.
+    To prevent querying the server again and again for the same objects, this cache allows storing
+    Records that were found on a remote server and those that were not (typically in separate
+    caches).
+    """
+
+   def __init__(self):
         self._cache = {}
         self._identifiables = []
 
-- 
GitLab