From 1aeaa35940f925a1e7d4cca0b9146ec255fe5783 Mon Sep 17 00:00:00 2001 From: Alexander Schlemmer <alexander@mail-schlemmer.de> Date: Wed, 8 Mar 2023 15:09:31 +0100 Subject: [PATCH] MAINT: finished refactoring of main crawler functions --- src/caoscrawler/crawl.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 4167ebf4..10e59b38 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -888,9 +888,9 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) def crawler_main(crawled_directory_path: str, cfood_file_name: str, - identifiables_definition_file: str = None, + identifiables_definition_file: Optional[str] = None, debug: bool = False, - provenance_file: str = None, + provenance_file: Optional[str] = None, dry_run: bool = False, prefix: str = "", securityMode: SecurityMode = SecurityMode.UPDATE, @@ -934,9 +934,10 @@ def crawler_main(crawled_directory_path: str, return_value : int 0 if successful """ - crawler = Crawler(debug=debug, securityMode=securityMode) + crawler = Crawler(securityMode=securityMode) try: - crawler.crawl_directory(crawled_directory_path, cfood_file_name, restricted_path) + crawled_data, debug_tree = crawler.crawl_directory( + crawled_directory_path, cfood_file_name, restricted_path) except ConverterValidationError as err: logger.error(err) return 1 @@ -958,7 +959,7 @@ def crawler_main(crawled_directory_path: str, remove_prefix = prefix if dry_run: - ins, upd = crawler.synchronize(commit_changes=False) + ins, upd = crawler.synchronize(crawled_data, commit_changes=False) inserts = [str(i) for i in ins] updates = [str(i) for i in upd] with open("dry.yml", "w") as f: @@ -967,7 +968,7 @@ def crawler_main(crawled_directory_path: str, "update": updates})) else: rtsfinder = dict() - for elem in crawler.crawled_data: + for elem in crawled_data: if isinstance(elem, db.File): # correct the file path: # elem.file = os.path.join(args.path, elem.file) @@ -1004,7 +1005,7 @@ def crawler_main(crawled_directory_path: str, raise RuntimeError("Missing RecordTypes: {}". format(", ".join(notfound))) - crawler.synchronize(commit_changes=True, unique_names=unique_names) + crawler.synchronize(crawled_data, commit_changes=True, unique_names=unique_names) return 0 -- GitLab