diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 48e9c9a5cc49da1674b3fc512f7ee97bdd559531..c17605874df48c952ae51054bd71c2a4d0577d70 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -67,6 +67,7 @@ from argparse import RawTextHelpFormatter import caosdb as db from caosdb.common.datatype import is_reference from .stores import GeneralStore, RecordStore +from .identified_cache import IdentifiedCache from .structure_elements import StructureElement, Directory from .converters import Converter, DirectoryConverter from .identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter @@ -103,6 +104,7 @@ class Crawler(object): """ self.global_converters = converters + self.identified_cache = IdentifiedCache() self.recordStore = RecordStore() self.generalStore = generalStore @@ -238,15 +240,19 @@ class Crawler(object): return False return True - def get_identified_record_from_local_cache(self, identifiable: db.Record): + def get_identified_record_from_local_cache(self, record: db.Record): """ returns the identifiable if an identifiable with the same values already exists locally (Each identifiable that is not found on the remote server, is 'cached' locally to prevent that the same identifiable exists twice) """ - raise NotImplementedError() + identifiable = self.identifiableAdapter.get_identifiable(record) + if identifiable in self.identified_cache: + return self.identified_cache[identifiable] + else: + return None - def add_identified_record_to_local_cache(self, identifiable: db.Record): + def add_identified_record_to_local_cache(self, record: db.Record): """ adds the given identifiable to the local cache @@ -254,7 +260,8 @@ class Crawler(object): (Each identifiable that is not found on the remote server, is 'cached' locally to prevent that the same identifiable exists twice) """ - raise NotImplementedError() + identifiable = self.identifiableAdapter.get_identifiable(record) + self.identified_cache.add(identifiable=identifiable, record=record) def copy_attributes(self, fro: db.Entity, to: db.Entity): raise NotImplementedError() diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index 06573d3a659014d9c807a4768f5d264603b59932..27f2bae1884cecf1d73d3a1f3ca375bc5c9792a4 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -102,7 +102,6 @@ class IdentifiableAdapter(object): def resolve_references(self, record: db.Record): pass - # TODO rename retrieve_registered_identifiable? def get_identifiable(self, record: db.Record): """ retrieve the registred identifiable and fill the property values to create an @@ -126,7 +125,7 @@ class IdentifiableAdapter(object): # case A: in the registered identifiable # case B: in the identifiable - #TODO use id if value is Entity + # TODO use id if value is Entity identifiable.add_property(record.get_property(prop.name)) property_name_list_A.append(prop.name)