diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fef595a99507b5781bcde96e22e27c5658f93b8..672f36e81e81f33b68e0659569697cfd7b929f66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### ### Fixed ### +- usage of ID when looking for identified records ### Security ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index cadd7798d93b94bf4f11c76d18fe8431e61c5d0a..91753c763690fb729d46a36a12f6d690bfa81c6b 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -841,15 +841,19 @@ class Crawler(object): to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data) referencing_entities = self.create_reference_mapping(to_be_updated + to_be_inserted) - # TODO: refactoring of typo for el in to_be_updated: # all entity objects are replaced by their IDs except for the not yet inserted ones self.replace_entities_with_ids(el) - identified_records = [ - self.identifiableAdapter.retrieve_identified_record_for_record(record, - referencing_entities) - for record in to_be_updated] + identified_records = [] + for record in to_be_updated: + if record.id is not None: + identified_records.append(db.execute_query(f"find entity with id={record.id}", + unique=True)) + else: + identified_records.append( + self.identifiableAdapter.retrieve_identified_record_for_record( + record, referencing_entities)) # Merge with existing data to prevent unwanted overwrites to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records) # remove unnecessary updates from list by comparing the target records