From 4e9d33d1f55e620c3f6030d94d42cf7cd711a891 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Fri, 21 Oct 2022 15:55:45 +0200
Subject: [PATCH] MAINT: docs and string conversion only for specific types

---
 src/caoscrawler/identified_cache.py | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/caoscrawler/identified_cache.py b/src/caoscrawler/identified_cache.py
index 02618f31..7b680b5a 100644
--- a/src/caoscrawler/identified_cache.py
+++ b/src/caoscrawler/identified_cache.py
@@ -23,22 +23,30 @@
 # ** end header
 #
 
-"""
-stores identified records and is able to detect duplicates
-"""
-
-import caosdb as db
-
-from hashlib import sha256
 
 """
+This module is a cache for Records where we checked the existence in a remote server using
+identifiables. If the Record was found, this means that we identified the corresponding Record
+in the remote server and the ID of the local object can be set.
+To prevent querying the server again and again for the same objects, this cache allows storing
+Records that were found on a remote server and those that were not (typically in separate caches).
+The look up in the cache is done using a hash of a string representation.
+
 TODO: We need a general review:
 - How are entities identified with each other?
 - What happens if the identification fails?
+
+Checkout how this was done in the old crawler.
 """
 
+import caosdb as db
+
+from hashlib import sha256
+from datetime import datetime
+
 
 def _value_representation(value):
+    """returns the string representation of property values to be used in the hash function """
 
     # TODO: (for review)
     #       This expansion of the hash function was introduced recently
@@ -54,8 +62,11 @@ def _value_representation(value):
             return "PyID="+str(id(value))
     elif isinstance(value, list):
         return "["+", ".join([_value_representation(el) for el in value])+"]"
-    else:
+    elif (isinstance(value, str) or isinstance(value, int) or isinstance(value, float)
+          or isinstance(value, datetime)):
         return str(value)
+    else:
+        raise ValueError(f"Unknown datatype of the value: {value}")
 
 
 def _create_hashable_string(identifiable: db.Record):
-- 
GitLab