From 2a43219d346ac77a9edbe7b7f9bdb19073671a17 Mon Sep 17 00:00:00 2001
From: Florian Spreckelsen <f.spreckelsen@indiscale.com>
Date: Mon, 26 Aug 2024 10:18:42 +0200
Subject: [PATCH] FIX: Respect context-root and make links platform independent

---
 src/caoscrawler/utils.py    | 9 +++++++--
 unittests/test_utilities.py | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/caoscrawler/utils.py b/src/caoscrawler/utils.py
index 65a1bb7c..d9a5af83 100644
--- a/src/caoscrawler/utils.py
+++ b/src/caoscrawler/utils.py
@@ -25,9 +25,9 @@
 
 # Some utility functions, e.g. for extending pylib.
 
-import os
 import sys
 
+from posixpath import join as posixjoin
 from typing import Optional
 from urllib.parse import urljoin
 
@@ -81,4 +81,9 @@ def get_shared_resource_link(host_url, filename):
 
     """
 
-    return urljoin(host_url, os.path.join("/Shared/", filename))
+    if not host_url.endswith('/'):
+        # Fill with trailing '/' s. that urljoin doesn't remove the context root.
+        host_url += '/'
+    # Use posixjoin to always have '/' in links, even when running on
+    # Windows systems.
+    return urljoin(host_url, posixjoin("Shared/", filename))
diff --git a/unittests/test_utilities.py b/unittests/test_utilities.py
index 89b0b40c..15e84a60 100644
--- a/unittests/test_utilities.py
+++ b/unittests/test_utilities.py
@@ -76,3 +76,7 @@ def test_shared_resource_link():
         "https://example.com", "file.txt") == "https://example.com/Shared/file.txt"
     assert get_shared_resource_link(
         "https://example.com", "path/to/file.txt") == "https://example.com/Shared/path/to/file.txt"
+    assert get_shared_resource_link(
+        "https://example.com/context-root", "path/to/file.txt") == "https://example.com/context-root/Shared/path/to/file.txt"
+    assert get_shared_resource_link(
+        "https://example.com/context-root/", "path/to/file.txt") == "https://example.com/context-root/Shared/path/to/file.txt"
-- 
GitLab