Skip to content
Snippets Groups Projects
Commit 3e422cfc authored by florian's avatar florian
Browse files

ENH: Add `path_for_authorized_run` argument to `Crawler.synchronize`

parent a2aae620
No related branches found
No related tags found
2 merge requests!123REL: Release v0.6.0,!122Fix missing crawled_directory
...@@ -877,6 +877,7 @@ class Crawler(object): ...@@ -877,6 +877,7 @@ class Crawler(object):
crawled_data: Optional[list[db.Record]] = None, crawled_data: Optional[list[db.Record]] = None,
no_insert_RTs: Optional[list[str]] = None, no_insert_RTs: Optional[list[str]] = None,
no_update_RTs: Optional[list[str]] = None, no_update_RTs: Optional[list[str]] = None,
path_for_authorized_run: Optional[str] = "",
): ):
""" """
This function applies several stages: This function applies several stages:
...@@ -898,6 +899,12 @@ class Crawler(object): ...@@ -898,6 +899,12 @@ class Crawler(object):
no_update_RTs : list[str], optional no_update_RTs : list[str], optional
list of RecordType names. Records that have one of those RecordTypes list of RecordType names. Records that have one of those RecordTypes
as parent will not be updated as parent will not be updated
path_for_authorized_run : str, optional
only used if there are changes that need authorization before being
applied. The form for rerunning the crawler with the authorization
of these changes will be generated with this path. See
``caosadvancedtools.crawler.Crawler.save_form`` for more info about
the authorization form.
Returns Returns
------- -------
...@@ -966,14 +973,12 @@ class Crawler(object): ...@@ -966,14 +973,12 @@ class Crawler(object):
pending_inserts = update_cache.get_inserts(self.run_id) pending_inserts = update_cache.get_inserts(self.run_id)
if pending_inserts: if pending_inserts:
Crawler.inform_about_pending_changes( Crawler.inform_about_pending_changes(
# TODO crawled_directory is no longer available pending_inserts, self.run_id, path_for_authorized_run)
pending_inserts, self.run_id, "missing crawled_directory")
pending_updates = update_cache.get_updates(self.run_id) pending_updates = update_cache.get_updates(self.run_id)
if pending_updates: if pending_updates:
Crawler.inform_about_pending_changes( Crawler.inform_about_pending_changes(
# TODO crawled_directory is no longer available pending_updates, self.run_id, path_for_authorized_run)
pending_updates, self.run_id, "missing crawled_directory")
return (to_be_inserted, to_be_updated) return (to_be_inserted, to_be_updated)
...@@ -1289,7 +1294,8 @@ def crawler_main(crawled_directory_path: str, ...@@ -1289,7 +1294,8 @@ def crawler_main(crawled_directory_path: str,
_store_dry_run_data(inserts, updates) _store_dry_run_data(inserts, updates)
else: else:
inserts, updates = crawler.synchronize(commit_changes=True, unique_names=unique_names, inserts, updates = crawler.synchronize(commit_changes=True, unique_names=unique_names,
crawled_data=crawled_data) crawled_data=crawled_data,
path_for_authorized_run=crawled_directory_path)
if "SHARED_DIR" in os.environ: if "SHARED_DIR" in os.environ:
_notify_about_inserts_and_updates(len(inserts), len(updates), userlog_public, _notify_about_inserts_and_updates(len(inserts), len(updates), userlog_public,
crawler.run_id) crawler.run_id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment