diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 0f23acfdfde2a863a66f25901a85748b538f5d04..e916e34fa4de53731d037a4e6838722926628402 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -39,11 +39,13 @@ import sys import traceback import uuid import warnings + from argparse import RawTextHelpFormatter from copy import deepcopy from datetime import datetime from enum import Enum from typing import Any, List, Optional, Union +from urllib.parse import urljoin import linkahead as db import yaml @@ -750,15 +752,16 @@ one with the entities that need to be updated and the other with entities to be # Sending an Email with a link to a form to authorize updates is if get_config_setting("send_crawler_notifications"): filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id) + link_address = urljoin(db.configuration.get_config()[ + "Connection"]["url"], os.path.join("/Shared/", filename)) text = """Dear Curator, there where changes that need your authorization. Please check the following carefully and if the changes are ok, click on the following link: - {url}/Shared/{filename} + {link_address} {changes} - """.format(url=db.configuration.get_config()["Connection"]["url"], - filename=filename, + """.format(link_address, changes="\n".join([el[3] for el in pending_changes])) try: fro = get_config_setting("sendmail_from_address") @@ -899,7 +902,8 @@ the CaosDB Crawler successfully crawled the data and if get_config_setting("create_crawler_status_records"): text += ("You can checkout the CrawlerRun Record for more information:\n" f"{domain}/Entity/?P=0L10&query=find%20crawlerrun%20with%20run_id=%27{run_id}%27\n\n") - text += (f"You can download the logfile here:\n{domain}/Shared/" + logfile) + text += (f"You can download the logfile here:\n{ + urljoin(domain, os.path.join('/Shared/', logfile))}") send_mail( from_addr=get_config_setting("sendmail_from_address"), to=get_config_setting("sendmail_to_address"), @@ -1059,7 +1063,7 @@ def crawler_main(crawled_directory_path: str, userlog_public, htmluserlog_public, debuglog_public = configure_server_side_logging() # TODO make this optional _create_status_record( - get_config_setting("public_host_url") + "/Shared/" + htmluserlog_public, + urljoin(get_config_setting("public_host_url"),o s.path.join("/Shared/", htmluserlog_public)), crawler.run_id) else: # setup stdout logging for other cases root_logger = logging.getLogger() @@ -1128,7 +1132,7 @@ def crawler_main(crawled_directory_path: str, # pylint: disable=E0601 domain = get_config_setting("public_host_url") logger.error("Unexpected Error: Please tell your administrator about this and provide " - f"the following path.\n{domain}/Shared/" + debuglog_public) + f"the following path.\n{urljoin(domain, os.path.join('/Shared/', debuglog_public))}") _update_status_record(crawler.run_id, 0, 0, status="FAILED") return 1