diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 0952d6456e23daa630d7da04ada521f21960fd5c..0883ca8fc529d477966083ff158557f396e1c22c 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -745,9 +745,15 @@ class Crawler(object): record._size = identified_record._size record._checksum = identified_record._checksum - merge_entities(record, identified_record) - to_be_updated.append(record) - self.add_to_remote_existing_cache(record) + # Create a temporary copy since the merge will be conducted in place + tmp = deepcopy(identified_record) + # A force merge will overwrite any properties that both + # the identified and the crawled record have with the + # values of the crawled record while keeping existing + # properties intact. + merge_entities(tmp, record, force=True) + to_be_updated.append(tmp) + self.add_to_remote_existing_cache(tmp) del flat[i] resolved_references = True