diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bab1031394c51f8988dc25c4947c84fd2644b3d..30a9835cb1a24af7d44454ecffc22271b390cbc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The Crawler `synchronize` function now takes list of RecordType names. Records that have the given names as parents are excluded from inserts or updates +- `Crawler.synchronize` now takes an optional `path_for_authorized_run` argument + that specifies the path with which the crawler can be rerun to authorize + pending changes. ### Changed ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 3fd67e4bf2915d2755133ef8d4b8387b073a26a2..8c0c6f6dadde42a3239e9bc09a2e3131a8cc05da 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -877,6 +877,7 @@ class Crawler(object): crawled_data: Optional[list[db.Record]] = None, no_insert_RTs: Optional[list[str]] = None, no_update_RTs: Optional[list[str]] = None, + path_for_authorized_run: Optional[str] = "", ): """ This function applies several stages: @@ -898,6 +899,12 @@ class Crawler(object): no_update_RTs : list[str], optional list of RecordType names. Records that have one of those RecordTypes as parent will not be updated + path_for_authorized_run : str, optional + only used if there are changes that need authorization before being + applied. The form for rerunning the crawler with the authorization + of these changes will be generated with this path. See + ``caosadvancedtools.crawler.Crawler.save_form`` for more info about + the authorization form. Returns ------- @@ -966,14 +973,12 @@ class Crawler(object): pending_inserts = update_cache.get_inserts(self.run_id) if pending_inserts: Crawler.inform_about_pending_changes( - # TODO crawled_directory is no longer available - pending_inserts, self.run_id, "missing crawled_directory") + pending_inserts, self.run_id, path_for_authorized_run) pending_updates = update_cache.get_updates(self.run_id) if pending_updates: Crawler.inform_about_pending_changes( - # TODO crawled_directory is no longer available - pending_updates, self.run_id, "missing crawled_directory") + pending_updates, self.run_id, path_for_authorized_run) return (to_be_inserted, to_be_updated) @@ -1289,7 +1294,8 @@ def crawler_main(crawled_directory_path: str, _store_dry_run_data(inserts, updates) else: inserts, updates = crawler.synchronize(commit_changes=True, unique_names=unique_names, - crawled_data=crawled_data) + crawled_data=crawled_data, + path_for_authorized_run=crawled_directory_path) if "SHARED_DIR" in os.environ: _notify_about_inserts_and_updates(len(inserts), len(updates), userlog_public, crawler.run_id)