diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 6e84b1bf3777ae2cea312a31604637f91e530b48..424d6faacd5544f28479d743c64074b3bd757733 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -726,7 +726,7 @@ class Crawler(object): # 2. Can it be identified via a path? # 3. Is it in the cache of already checked Records? # 4. Can it be checked on the remote server? - # 5. Does it have to be new since a needed referenc is missing? + # 5. Does it have to be new since a needed reference is missing? for i in reversed(range(len(flat))): record = flat[i] identifiable = self.identifiableAdapter.get_identifiable(record) @@ -742,17 +742,17 @@ class Crawler(object): del flat[i] # 2. Can it be identified via a path? elif record.path is not None: - exiting = self._get_entity_by_path(record.path) - if exiting is None: + existing = self._get_entity_by_path(record.path) + if existing is None: to_be_inserted.append(record) self.add_to_remote_missing_cache(record) del flat[i] else: - record.id = exiting.id + record.id = existing.id # TODO check the following copying of _size and _checksum # Copy over checksum and size too if it is a file - record._size = exiting._size - record._checksum = exiting._checksum + record._size = existing._size + record._checksum = existing._checksum to_be_updated.append(record) self.add_to_remote_existing_cache(record) del flat[i] @@ -785,7 +785,7 @@ class Crawler(object): del flat[i] resolved_references = True - # 5. Does it have to be new since a needed referenc is missing? + # 5. Does it have to be new since a needed reference is missing? # (Is it impossible to check this record because an identifiable references a # missing record?) elif self._has_missing_object_in_references(identifiable):