From c484ec4282aebd05442d2586b8d4b327bdf482ff Mon Sep 17 00:00:00 2001 From: Daniel <d.hornung@indiscale.com> Date: Thu, 22 Feb 2024 17:13:31 +0100 Subject: [PATCH] FIX Yet another corner case of referencing resolution resolved. --- src/caoscrawler/crawl.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 860f7134..771e2a41 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -705,9 +705,11 @@ class Crawler(object): treated_record = self.treated_records_lookup.get_existing(record) if treated_record is not None: self._merge_identified(treated_record, record, try_to_merge_later, all_records) + all_records.remove(record) referencing_entities = self.create_reference_mapping(all_records) else: self.treated_records_lookup.add(record, None) + assert record.id del flat[i] # 2. Can it be identified via a path? elif record.path is not None: @@ -724,10 +726,12 @@ class Crawler(object): treated_record = self.treated_records_lookup.get_any(record) if treated_record is not None: self._merge_identified(treated_record, record, try_to_merge_later, all_records) + all_records.remove(record) referencing_entities = self.create_reference_mapping(all_records) else: # TODO add identifiable if possible self.treated_records_lookup.add(record, None) + assert record.id del flat[i] entity_was_treated = True @@ -762,6 +766,7 @@ class Crawler(object): # describe the same object. # We merge record into treated_record in order to prevent loss of information self._merge_identified(treated_record, record, try_to_merge_later, all_records) + all_records.remove(record) referencing_entities = self.create_reference_mapping(all_records) del flat[i] @@ -780,6 +785,7 @@ class Crawler(object): record.id = identified_record.id record.path = identified_record.path self.treated_records_lookup.add(record, identifiable) + assert record.id del flat[i] entity_was_treated = True @@ -788,6 +794,7 @@ class Crawler(object): # missing record?) elif self._has_missing_object_in_references(identifiable, referencing_entities): self.treated_records_lookup.add(record, identifiable) + assert record.id del flat[i] entity_was_treated = True -- GitLab