From 53ce683625bb6f26515f342b5ee191842d2f5d5d Mon Sep 17 00:00:00 2001 From: florian <f.spreckelsen@inidscale.com> Date: Wed, 2 Nov 2022 14:54:53 +0100 Subject: [PATCH] FIX: Use force merge with a deepcopy --- src/caoscrawler/crawl.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 0952d645..0883ca8f 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -745,9 +745,15 @@ class Crawler(object): record._size = identified_record._size record._checksum = identified_record._checksum - merge_entities(record, identified_record) - to_be_updated.append(record) - self.add_to_remote_existing_cache(record) + # Create a temporary copy since the merge will be conducted in place + tmp = deepcopy(identified_record) + # A force merge will overwrite any properties that both + # the identified and the crawled record have with the + # values of the crawled record while keeping existing + # properties intact. + merge_entities(tmp, record, force=True) + to_be_updated.append(tmp) + self.add_to_remote_existing_cache(tmp) del flat[i] resolved_references = True -- GitLab