From 438ef826b2ff99ecd7da398a4de5514be2999c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 27 Apr 2023 14:16:26 +0200 Subject: [PATCH] wip --- src/caoscrawler/crawl.py | 13 ++++--------- src/caoscrawler/identifiable_adapters.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3acdde95..0af64fd2 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -847,15 +847,10 @@ class Crawler(object): # all entity objects are replaced by their IDs except for the not yet inserted ones self.replace_entities_with_ids(el) - identified_records = [] - for record in to_be_updated: - if record.id is not None: - identified_records.append(db.execute_query(f"find entity with id={record.id}", - unique=True)) - else: - identified_records.append( - self.identifiableAdapter.retrieve_identified_record_for_record( - record, referencing_entities)) + identified_records = [ + self.identifiableAdapter.retrieve_identified_record_for_record(record, + referencing_entities) + for record in to_be_updated] # Merge with existing data to prevent unwanted overwrites to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records) # remove unnecessary updates from list by comparing the target records diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 241685b5..1d4ef209 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -264,8 +264,6 @@ identifiabel, identifiable and identified record) for a Record. """ pass - # TODO: remove side effect - # TODO: use ID if record has one? def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None): """ This function combines all functionality of the IdentifierAdapter by @@ -275,10 +273,12 @@ identifiabel, identifiable and identified record) for a Record. In case there was no appropriate registered identifiable or no identifiable could be found return value is None. """ - identifiable = self.get_identifiable(record, referencing_entities=referencing_entities) + if record.path is not None: + return db.get_entity_by_path(record.path) + if record.id is not None: + return db.get_entity_by_id(record.id) - if identifiable.path is not None: - return self.get_file(identifiable) + identifiable = self.get_identifiable(record, referencing_entities=referencing_entities) return self.retrieve_identified_record_for_identifiable(identifiable) @@ -450,6 +450,10 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): self._registered_identifiables[name] = definition def get_file(self, identifiable: Identifiable): + # TODO is this needed for Identifiable? + # or can we get rid of this function? + if isinstance(identifiable, db.Entity): + return db.get_entity_by_path(identifiable) if identifiable.path is None: raise RuntimeError("Path must not be None for File retrieval.") candidates = db.execute_query("FIND File which is stored at '{}'".format( -- GitLab