diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 8b499605097ed49d5c2632fb538f43a1a9c9f415..00b69aacb6aeff690ec860d3cc610138dc8026cd 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -656,54 +656,31 @@ class Crawler(object): def add_to_remote_missing_cache(self, record: db.Record): """ - adds the given identifiable to the local cache + stores the given Record in the remote_missing_cache. - No identifiable with the same values must exist locally. - (Each identifiable that is not found on the remote server, is 'cached' locally to prevent - that the same identifiable exists twice) - - Return False if there is no identifiable for this record and True otherwise. + If no identifiable can be created for the given Record, the Record is NOT stored. """ self.add_to_cache(record=record, cache=self.remote_missing_cache) def add_to_remote_existing_cache(self, record: db.Record): """ - adds the given identifiable to the local cache - - No identifiable with the same values must exist locally. - (Each identifiable that is not found on the remote server, is 'cached' locally to prevent - that the same identifiable exists twice) + stores the given Record in the remote_existing_cache. - Return False if there is no identifiable for this record and True otherwise. + If no identifiable can be created for the given Record, the Record is NOT stored. """ self.add_to_cache(record=record, cache=self.remote_existing_cache) - def add_to_cache(self, record: db.Record, cache): + def add_to_cache(self, record: db.Record, cache) -> Union[Identifiable, None]: """ - adds the given identifiable to the local cache + stores the given Record in the given cache. - No identifiable with the same values must exist locally. - (Each identifiable that is not found on the remote server, is 'cached' locally to prevent - that the same identifiable exists twice) - - Return False if there is no identifiable for this record and True otherwise. + If no identifiable can be created for the given Record, the Record is NOT stored. """ if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") identifiable = self.identifiableAdapter.get_identifiable(record) - if identifiable is None: - # TODO: this error report is bad - # we need appropriate handling for records without an identifiable - # or at least a simple fallback definition if tehre is no identifiable. - - # print(record) - # raise RuntimeError("No identifiable for record.") - - # TODO: check whether that holds: - # if there is no identifiable, for the cache that is the same - # as if the complete entity is the identifiable: - return - cache.add(identifiable=identifiable, record=record) + if identifiable is not None: + cache.add(identifiable=identifiable, record=record) @staticmethod def bend_references_to_new_object(old, new, entities):