diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3acdde9591afed9c42e9ee524194f6668c3584b5..0af64fd2b2d9e835d6d8273921aa93981b510338 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -847,15 +847,10 @@ class Crawler(object): # all entity objects are replaced by their IDs except for the not yet inserted ones self.replace_entities_with_ids(el) - identified_records = [] - for record in to_be_updated: - if record.id is not None: - identified_records.append(db.execute_query(f"find entity with id={record.id}", - unique=True)) - else: - identified_records.append( - self.identifiableAdapter.retrieve_identified_record_for_record( - record, referencing_entities)) + identified_records = [ + self.identifiableAdapter.retrieve_identified_record_for_record(record, + referencing_entities) + for record in to_be_updated] # Merge with existing data to prevent unwanted overwrites to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records) # remove unnecessary updates from list by comparing the target records diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 241685b5cfe9d87acad16e0c6a871d9ea6ad79e3..1d4ef209d401fbc86fbcd8b2693e9a894bd679ea 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -264,8 +264,6 @@ identifiabel, identifiable and identified record) for a Record. """ pass - # TODO: remove side effect - # TODO: use ID if record has one? def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None): """ This function combines all functionality of the IdentifierAdapter by @@ -275,10 +273,12 @@ identifiabel, identifiable and identified record) for a Record. In case there was no appropriate registered identifiable or no identifiable could be found return value is None. """ - identifiable = self.get_identifiable(record, referencing_entities=referencing_entities) + if record.path is not None: + return db.get_entity_by_path(record.path) + if record.id is not None: + return db.get_entity_by_id(record.id) - if identifiable.path is not None: - return self.get_file(identifiable) + identifiable = self.get_identifiable(record, referencing_entities=referencing_entities) return self.retrieve_identified_record_for_identifiable(identifiable) @@ -450,6 +450,10 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): self._registered_identifiables[name] = definition def get_file(self, identifiable: Identifiable): + # TODO is this needed for Identifiable? + # or can we get rid of this function? + if isinstance(identifiable, db.Entity): + return db.get_entity_by_path(identifiable) if identifiable.path is None: raise RuntimeError("Path must not be None for File retrieval.") candidates = db.execute_query("FIND File which is stored at '{}'".format(