From 3e422cfcc4c99baa5c366cd9505fda47241e5028 Mon Sep 17 00:00:00 2001
From: florian <f.spreckelsen@inidscale.com>
Date: Mon, 12 Jun 2023 14:08:36 +0200
Subject: [PATCH] ENH: Add `path_for_authorized_run` argument to
 `Crawler.synchronize`

---
 src/caoscrawler/crawl.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index 3fd67e4b..8c0c6f6d 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -877,6 +877,7 @@ class Crawler(object):
                     crawled_data: Optional[list[db.Record]] = None,
                     no_insert_RTs: Optional[list[str]] = None,
                     no_update_RTs: Optional[list[str]] = None,
+                    path_for_authorized_run: Optional[str] = "",
                     ):
         """
         This function applies several stages:
@@ -898,6 +899,12 @@ class Crawler(object):
         no_update_RTs : list[str], optional
             list of RecordType names. Records that have one of those RecordTypes
             as parent will not be updated
+        path_for_authorized_run : str, optional
+            only used if there are changes that need authorization before being
+            applied. The form for rerunning the crawler with the authorization
+            of these changes will be generated with this path. See
+            ``caosadvancedtools.crawler.Crawler.save_form`` for more info about
+            the authorization form.
 
         Returns
         -------
@@ -966,14 +973,12 @@ class Crawler(object):
         pending_inserts = update_cache.get_inserts(self.run_id)
         if pending_inserts:
             Crawler.inform_about_pending_changes(
-                # TODO crawled_directory is no longer available
-                pending_inserts, self.run_id, "missing crawled_directory")
+                pending_inserts, self.run_id, path_for_authorized_run)
 
         pending_updates = update_cache.get_updates(self.run_id)
         if pending_updates:
             Crawler.inform_about_pending_changes(
-                # TODO crawled_directory is no longer available
-                pending_updates, self.run_id, "missing crawled_directory")
+                pending_updates, self.run_id, path_for_authorized_run)
 
         return (to_be_inserted, to_be_updated)
 
@@ -1289,7 +1294,8 @@ def crawler_main(crawled_directory_path: str,
             _store_dry_run_data(inserts, updates)
         else:
             inserts, updates = crawler.synchronize(commit_changes=True, unique_names=unique_names,
-                                                   crawled_data=crawled_data)
+                                                   crawled_data=crawled_data,
+                                                   path_for_authorized_run=crawled_directory_path)
             if "SHARED_DIR" in os.environ:
                 _notify_about_inserts_and_updates(len(inserts), len(updates), userlog_public,
                                                   crawler.run_id)
-- 
GitLab