diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index ecfefa17527316e511216513f416b1020ae975ee..2aeb220cb3279c5bca367305f374218c4ce5c304 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -784,6 +784,8 @@ class Crawler(object): for i in reversed(range(len(crawled_data))): if not check_identical(crawled_data[i], identified_records[i]): + logger.debug("Sheduled update because of the folllowing diff:\n" + + str(compare_entities(crawled_data[i], identified_records[i]))) actual_updates.append(crawled_data[i]) return actual_updates diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py index 6f5545b1d0d53114282a3477b6855900f5294520..3f899a36ed5a9fed073df666abba25cccd2974be 100644 --- a/src/caoscrawler/scanner.py +++ b/src/caoscrawler/scanner.py @@ -264,6 +264,8 @@ def scanner(items: list[StructureElement], converters_path = [] for element in items: + element_path =os.path.join(*(structure_elements_path + [element.get_name()])) + logger.debug(f"Dealing with {element_path}") for converter in converters: # type is something like "matches files", replace isinstance with "type_matches" @@ -276,8 +278,7 @@ def scanner(items: list[StructureElement], record_store_copy = record_store.create_scoped_copy() # Create an entry for this matched structure element that contains the path: - general_store_copy[converter.name] = ( - os.path.join(*(structure_elements_path + [element.get_name()]))) + general_store_copy[converter.name] = element_path # extracts values from structure element and stores them in the # variable store