From e4de31312b54b8310ed7fb7b5698d4e4d3fb2268 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Fri, 17 Dec 2021 17:13:13 +0100
Subject: [PATCH] MAINT: introduce the cache in the crawler

---
 src/newcrawler/crawl.py                 | 15 +++++++++++----
 src/newcrawler/identifiable_adapters.py |  3 +--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index 48e9c9a5..c1760587 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -67,6 +67,7 @@ from argparse import RawTextHelpFormatter
 import caosdb as db
 from caosdb.common.datatype import is_reference
 from .stores import GeneralStore, RecordStore
+from .identified_cache import IdentifiedCache
 from .structure_elements import StructureElement, Directory
 from .converters import Converter, DirectoryConverter
 from .identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
@@ -103,6 +104,7 @@ class Crawler(object):
         """
         self.global_converters = converters
 
+        self.identified_cache = IdentifiedCache()
         self.recordStore = RecordStore()
 
         self.generalStore = generalStore
@@ -238,15 +240,19 @@ class Crawler(object):
                 return False
         return True
 
-    def get_identified_record_from_local_cache(self, identifiable: db.Record):
+    def get_identified_record_from_local_cache(self, record: db.Record):
         """
         returns the identifiable if an identifiable with the same values already exists locally
         (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
         that the same identifiable exists twice)
         """
-        raise NotImplementedError()
+        identifiable = self.identifiableAdapter.get_identifiable(record)
+        if identifiable in self.identified_cache:
+            return self.identified_cache[identifiable]
+        else:
+            return None
 
-    def add_identified_record_to_local_cache(self, identifiable: db.Record):
+    def add_identified_record_to_local_cache(self, record: db.Record):
         """
         adds the given identifiable to the local cache
 
@@ -254,7 +260,8 @@ class Crawler(object):
         (Each identifiable that is not found on the remote server, is 'cached' locally to prevent
         that the same identifiable exists twice)
         """
-        raise NotImplementedError()
+        identifiable = self.identifiableAdapter.get_identifiable(record)
+        self.identified_cache.add(identifiable=identifiable, record=record)
 
     def copy_attributes(self, fro: db.Entity, to: db.Entity):
         raise NotImplementedError()
diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py
index 06573d3a..27f2bae1 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/newcrawler/identifiable_adapters.py
@@ -102,7 +102,6 @@ class IdentifiableAdapter(object):
     def resolve_references(self, record: db.Record):
         pass
 
-    # TODO rename retrieve_registered_identifiable?
     def get_identifiable(self, record: db.Record):
         """
         retrieve the registred identifiable and fill the property values to create an
@@ -126,7 +125,7 @@ class IdentifiableAdapter(object):
             # case A: in the registered identifiable
             # case B: in the identifiable
 
-            #TODO use id if value is Entity
+            # TODO use id if value is Entity
             identifiable.add_property(record.get_property(prop.name))
             property_name_list_A.append(prop.name)
 
-- 
GitLab