diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 4167ebf443916d0c28b42443b335082f0b52d13a..10e59b38f94e0d3f437b0f64f18325e28d9b1819 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -888,9 +888,9 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) def crawler_main(crawled_directory_path: str, cfood_file_name: str, - identifiables_definition_file: str = None, + identifiables_definition_file: Optional[str] = None, debug: bool = False, - provenance_file: str = None, + provenance_file: Optional[str] = None, dry_run: bool = False, prefix: str = "", securityMode: SecurityMode = SecurityMode.UPDATE, @@ -934,9 +934,10 @@ def crawler_main(crawled_directory_path: str, return_value : int 0 if successful """ - crawler = Crawler(debug=debug, securityMode=securityMode) + crawler = Crawler(securityMode=securityMode) try: - crawler.crawl_directory(crawled_directory_path, cfood_file_name, restricted_path) + crawled_data, debug_tree = crawler.crawl_directory( + crawled_directory_path, cfood_file_name, restricted_path) except ConverterValidationError as err: logger.error(err) return 1 @@ -958,7 +959,7 @@ def crawler_main(crawled_directory_path: str, remove_prefix = prefix if dry_run: - ins, upd = crawler.synchronize(commit_changes=False) + ins, upd = crawler.synchronize(crawled_data, commit_changes=False) inserts = [str(i) for i in ins] updates = [str(i) for i in upd] with open("dry.yml", "w") as f: @@ -967,7 +968,7 @@ def crawler_main(crawled_directory_path: str, "update": updates})) else: rtsfinder = dict() - for elem in crawler.crawled_data: + for elem in crawled_data: if isinstance(elem, db.File): # correct the file path: # elem.file = os.path.join(args.path, elem.file) @@ -1004,7 +1005,7 @@ def crawler_main(crawled_directory_path: str, raise RuntimeError("Missing RecordTypes: {}". format(", ".join(notfound))) - crawler.synchronize(commit_changes=True, unique_names=unique_names) + crawler.synchronize(crawled_data, commit_changes=True, unique_names=unique_names) return 0