Skip to content
Snippets Groups Projects
Commit c3532527 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

MAINT: Move new function to utils and add unittest

parent 1f495a82
No related branches found
No related tags found
2 merge requests!181Release 0.9.0,!179F fix url formatting
Pipeline #54416 passed
......@@ -45,7 +45,6 @@ from copy import deepcopy
from datetime import datetime
from enum import Enum
from typing import Any, List, Optional, Union
from urllib.parse import urljoin
import linkahead as db
import yaml
......@@ -74,6 +73,7 @@ from .scanner import (create_converter_registry, initialize_converters,
from .stores import GeneralStore
from .structure_elements import StructureElement
from .sync_graph import SyncGraph
from .utils import get_shared_resource_link
logger = logging.getLogger(__name__)
......@@ -752,8 +752,8 @@ one with the entities that need to be updated and the other with entities to be
# Sending an Email with a link to a form to authorize updates is
if get_config_setting("send_crawler_notifications"):
filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id)
link_address = urljoin(db.configuration.get_config()[
"Connection"]["url"], os.path.join("/Shared/", filename))
link_address = get_shared_resource_link(db.configuration.get_config()[
"Connection"]["url"], filename)
text = """Dear Curator,
there where changes that need your authorization. Please check the following
carefully and if the changes are ok, click on the following link:
......@@ -902,8 +902,7 @@ the CaosDB Crawler successfully crawled the data and
if get_config_setting("create_crawler_status_records"):
text += ("You can checkout the CrawlerRun Record for more information:\n"
f"{domain}/Entity/?P=0L10&query=find%20crawlerrun%20with%20run_id=%27{run_id}%27\n\n")
text += (f"You can download the logfile here:\n{
urljoin(domain, os.path.join('/Shared/', logfile))}")
text += (f"You can download the logfile here:\n{get_shared_resource_link(domain, logfile)}")
send_mail(
from_addr=get_config_setting("sendmail_from_address"),
to=get_config_setting("sendmail_to_address"),
......@@ -1063,7 +1062,7 @@ def crawler_main(crawled_directory_path: str,
userlog_public, htmluserlog_public, debuglog_public = configure_server_side_logging()
# TODO make this optional
_create_status_record(
urljoin(get_config_setting("public_host_url"),o s.path.join("/Shared/", htmluserlog_public)),
get_shared_resource_link(get_config_setting("public_host_url"), htmluserlog_public),
crawler.run_id)
else: # setup stdout logging for other cases
root_logger = logging.getLogger()
......@@ -1132,7 +1131,7 @@ def crawler_main(crawled_directory_path: str,
# pylint: disable=E0601
domain = get_config_setting("public_host_url")
logger.error("Unexpected Error: Please tell your administrator about this and provide "
f"the following path.\n{urljoin(domain, os.path.join('/Shared/', debuglog_public))}")
f"the following path.\n{get_shared_resource_link(domain, debuglog_public)}")
_update_status_record(crawler.run_id, 0, 0, status="FAILED")
return 1
......
......@@ -25,8 +25,11 @@
# Some utility functions, e.g. for extending pylib.
import os
import sys
from typing import Optional
from urllib.parse import urljoin
import linkahead as db
......@@ -69,3 +72,13 @@ def MissingImport(name: str, hint: str = "", err: Optional[Exception] = None) ->
_DummyClass.__name__ = name
return _DummyClass
def get_shared_resource_link(host_url, filename):
"""Return a link adress which is basically {host_url}/Shared/{filename}.
Use urllib.parse.join and os.path.join to prevent missing or extra ``/`` and the like.
"""
return urljoin(host_url, os.path.join("/Shared/", filename))
......@@ -22,7 +22,7 @@
import pytest
from caoscrawler.crawl import split_restricted_path
from caoscrawler.utils import MissingImport
from caoscrawler.utils import get_shared_resource_link, MissingImport
def test_split_restricted_path():
......@@ -66,3 +66,13 @@ def test_dummy_class():
assert "(Not Important)" in msg
orig_msg = str(err_info.value.__cause__)
assert orig_msg == "Old error"
def test_shared_resource_link():
assert get_shared_resource_link(
"https://example.com/", "file.txt") == "https://example.com/Shared/file.txt"
assert get_shared_resource_link(
"https://example.com", "file.txt") == "https://example.com/Shared/file.txt"
assert get_shared_resource_link(
"https://example.com", "path/to/file.txt") == "https://example.com/Shared/path/to/file.txt"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment