From 9ac7b52a22bb110ceb053d5a7440dd83fa0a5ced Mon Sep 17 00:00:00 2001
From: Timm Fitschen <t.fitschen@indiscale.com>
Date: Thu, 3 Nov 2022 13:55:14 +0100
Subject: [PATCH] EHN: add http_proxy option

---
 src/caosdb/common/models.py         |  7 +--
 src/caosdb/connection/connection.py | 87 +++++++++++++++++++----------
 src/caosdb/schema-pycaosdb-ini.yml  | 10 +++-
 unittests/test_connection.py        | 22 +++++++-
 4 files changed, 86 insertions(+), 40 deletions(-)

diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py
index 00e3884a..7000ede9 100644
--- a/src/caosdb/common/models.py
+++ b/src/caosdb/common/models.py
@@ -55,8 +55,7 @@ from caosdb.common.timezone import TimeZone
 from caosdb.common.versioning import Version
 from caosdb.configuration import get_config
 from caosdb.connection.connection import get_connection
-from caosdb.connection.encode import (MultipartParam, multipart_encode,
-                                      ReadableMultiparts)
+from caosdb.connection.encode import MultipartParam, multipart_encode
 from caosdb.exceptions import (AmbiguousEntityError, AuthorizationError,
                                CaosDBConnectionError, CaosDBException,
                                ConsistencyError, EmptyUniqueQueryError,
@@ -3416,9 +3415,7 @@ class Container(list):
         if http_parts is not None and len(http_parts) > 0:
             http_parts.insert(
                 0, MultipartParam("FileRepresentation", xml2str(insert_xml)))
-
             body, headers = multipart_encode(http_parts)
-            body = ReadableMultiparts(body)
 
             http_response = con.update(
                 entity_uri_segment=[_ENTITY_URI_SEGMENT],
@@ -3574,8 +3571,6 @@ class Container(list):
                 0, MultipartParam("FileRepresentation", xml2str(insert_xml)))
 
             body, headers = multipart_encode(http_parts)
-            body = ReadableMultiparts(body)
-
             http_response = con.insert(
                 entity_uri_segment=[_ENTITY_URI_SEGMENT],
                 body=body,
diff --git a/src/caosdb/connection/connection.py b/src/caosdb/connection/connection.py
index 99cb7431..fa14b693 100644
--- a/src/caosdb/connection/connection.py
+++ b/src/caosdb/connection/connection.py
@@ -56,6 +56,7 @@ from pkg_resources import resource_filename
 
 from .interface import CaosDBHTTPResponse, CaosDBServerConnection
 from .utils import make_uri_path, parse_url, urlencode
+from .encode import MultipartYielder, ReadableMultiparts
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -114,7 +115,6 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection):
         self._base_path = None
         self._session = None
         self._timeout = None
-        self._verify = None
 
     def request(self, method, path, headers=None, body=None):
         """request.
@@ -146,13 +146,15 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection):
         if path.endswith("/."):
             path = path[:-1] + "%2E"
 
+        if isinstance(body, MultipartYielder):
+            body = ReadableMultiparts(body)
+
         try:
             response = self._session.request(
                 method=method,
                 url=self._base_path + path,
                 headers=headers,
                 data=body,
-                verify=self._verify,
                 timeout=self._timeout,
                 stream=True)
 
@@ -185,38 +187,57 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection):
             loaded.
         """
 
-        if "ssl_version" in config and config["cacert"] is not None:
-            ssl_version = getattr(ssl, config["ssl_version"])
-        else:
-            ssl_version = ssl.PROTOCOL_TLS
-
-        if "url" in config:
-            url = urlparse(config["url"])
-            path = url.path.strip("/")
-            if len(path) > 0:
-                path = path + "/"
-            self._base_path = url.scheme + "://" + url.netloc + "/" + path
-        else:
+        if "url" not in config:
             raise CaosDBConnectionError(
                 "No connection url specified. Please "
                 "do so via caosdb.configure_connection(...) or in a config "
                 "file.")
-
-        # TODO remove in next release
-        socket_proxy = config["socket_proxy"] if "socket_proxy" in config else None
-        https_proxy = config["https_proxy"] if "https_proxy" in config else None
+        if (not config["url"].lower().startswith("https://") and not config["url"].lower().startswith("http://")):
+            raise CaosDBConnectionError("The connection url is expected "
+                                        "to be a http or https url and "
+                                        "must include the url scheme "
+                                        "(i.e. start with https:// or "
+                                        "http://).")
+
+        url = urlparse(config["url"])
+        path = url.path.strip("/")
+        if len(path) > 0:
+            path = path + "/"
+        self._base_path = url.scheme + "://" + url.netloc + "/" + path
 
         self._session = HTTPSession()
-        self._session.mount(self._base_path, _SSLAdapter(ssl_version))
+
+        if url.scheme == "https":
+            self._setup_ssl(config)
+
+        # TODO(tf) remove in next release
+        socket_proxy = config["socket_proxy"] if "socket_proxy" in config else None
         if socket_proxy is not None:
             self._session.proxies = {
                 "https": "socks5://" + socket_proxy,
+                "http": "socks5://" + socket_proxy,
             }
 
-        if https_proxy is not None:
-            self._session.proxies = {
-                "https": https_proxy,
-            }
+        if "https_proxy" in config:
+            if self._session.proxies is None:
+                self._session.proxies = {}
+            self._session.proxies["https"] = config["https_proxy"]
+
+        if "http_proxy" in config:
+            if self._session.proxies is None:
+                self._session.proxies = {}
+            self._session.proxies["http"] = config["http_proxy"]
+
+        if "timeout" in config:
+            self._timeout = config["timeout"]
+
+    def _setup_ssl(self, config):
+        if "ssl_version" in config and config["cacert"] is not None:
+            ssl_version = getattr(ssl, config["ssl_version"])
+        else:
+            ssl_version = ssl.PROTOCOL_TLS
+
+        self._session.mount(self._base_path, _SSLAdapter(ssl_version))
 
         verify = True
         if "cacert" in config:
@@ -228,10 +249,7 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection):
                             "****************")
             verify = False
         if verify is not None:
-            self._verify = verify
-
-        if "timeout" in config:
-            self._timeout = config["timeout"]
+            self._session.verify = verify
 
 
 def _make_conf(*conf):
@@ -323,6 +341,10 @@ def configure_connection(**kwargs):
 
     Parameters
     ----------
+    url : str
+        The url of the CaosDB Server. HTTP and HTTPS urls are allowed. However,
+        it is **highly** recommend to avoid HTTP because passwords and
+        authentication token are send over the network in plain text.
 
     username : str
         Username for login; e.g. 'admin'.
@@ -351,8 +373,7 @@ def configure_connection(**kwargs):
         An authentication token which has been issued by the CaosDB Server.
         Implies `password_method="auth_token"` if set.  An example token string would be `["O","OneTimeAuthenticationToken","anonymous",["administration"],[],1592995200000,604800000,"3ZZ4WKRB-5I7DG2Q6-ZZE6T64P-VQ","197d0d081615c52dc18fb323c300d7be077beaad4020773bb58920b55023fa6ee49355e35754a4277b9ac525c882bcd3a22e7227ba36dfcbbdbf8f15f19d1ee9",1,30000]`.
 
-    https_proxy : str or bool (optional)
-        Define a https proxy, e.g. `http://localhost:8888`.
+    https_proxy : str, optional
         Define a proxy for the https connections, e.g. `http://localhost:8888`,
         `socks5://localhost:8888`, or `socks4://localhost:8888`. These are
         either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS
@@ -362,7 +383,13 @@ def configure_connection(**kwargs):
         proxies when authentication against the proxy is necessary. If
         unspecified, the https_proxy option of the pycaosdb.ini or the HTTPS_PROXY
         environment variable are being used. Use `None` to override these
-        options with a no-proxy setting. (Default: unspecified)
+        options with a no-proxy setting.
+
+    http_proxy : str, optional
+        Define a proxy for the http connections, e.g. `http://localhost:8888`.
+        If unspecified, the http_proxy option of the pycaosdb.ini or the
+        HTTP_PROXY environment variable are being used. Use `None` to override
+        these options with a no-proxy setting.
 
     implementation : CaosDBServerConnection
         The class which implements the connection. (Default:
diff --git a/src/caosdb/schema-pycaosdb-ini.yml b/src/caosdb/schema-pycaosdb-ini.yml
index d6aff8a8..64451a24 100644
--- a/src/caosdb/schema-pycaosdb-ini.yml
+++ b/src/caosdb/schema-pycaosdb-ini.yml
@@ -14,10 +14,10 @@ schema-pycaosdb-ini:
       additionalProperties: false
       properties:
         url:
-          description: URL of the CaosDB server
+          description: "URL of the CaosDB server. Allowed are HTTP and HTTPS connections. However, since authentication tokens and sometimes even passwords are send in plain text to the server it is **highly** recommended to use HTTPS connections whenever possible. HTTP is ok for testing and debugging."
           type: string
-          pattern: https://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)?
-          examples: ["https://demo.indiscale.com/", "https://localhost:10443/"]
+          pattern: http(s)?://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)?
+          examples: ["https://demo.indiscale.com/", "http://localhost:10080/"]
         username:
           type: string
           description: User name used for authentication with the server
@@ -59,6 +59,10 @@ schema-pycaosdb-ini:
           examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"]
           type: string
           description: "Define a proxy for the https connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. However, the connection will be secured using TLS in the tunneled connection nonetheless. Only the connection to the proxy is insecure which is why it is not recommended to use HTTP proxies when authentication against the proxy is necessary. Note: this option is overridden by the HTTPS_PROXY environment variable, if present."
+        http_proxy:
+          examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"]
+          type: string
+          description: "Define a proxy for the http connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. Note: this option is overridden by the HTTP_PROXY environment variable, if present."
         implementation:
           description: This option is used internally and for testing. Do not override.
           examples: [_DefaultCaosDBServerConnection]
diff --git a/unittests/test_connection.py b/unittests/test_connection.py
index ee564ea0..d0408674 100644
--- a/unittests/test_connection.py
+++ b/unittests/test_connection.py
@@ -37,7 +37,8 @@ from caosdb.connection.connection import (CaosDBServerConnection,
 from caosdb.connection.mockup import (MockUpResponse, MockUpServerConnection,
                                       _request_log_message)
 from caosdb.connection.utils import make_uri_path, quote, urlencode
-from caosdb.exceptions import ConfigurationError, LoginFailedError
+from caosdb.exceptions import (ConfigurationError, LoginFailedError,
+                               CaosDBConnectionError)
 from nose.tools import assert_equal as eq
 from nose.tools import assert_false as falz
 from nose.tools import assert_is_not_none as there
@@ -46,6 +47,13 @@ from nose.tools import assert_true as tru
 from pytest import raises
 
 
+def setup_function(function):
+    configure_connection(url="http://localhost:8888/some/path",
+                         password_method="plain", username="test",
+                         password="blub",
+                         implementation=MockUpServerConnection)
+
+
 def setup_module():
     _reset_config()
 
@@ -116,6 +124,18 @@ def test_configure_connection():
     tru(isinstance(c._delegate_connection, MockUpServerConnection))
 
 
+def test_configure_connection_bad_url():
+    configure_connection(url="https://localhost:8888")
+    with raises(CaosDBConnectionError) as exc_info:
+        configure_connection(url="ftp://localhost:8888")
+    assert exc_info.value.args[0].startswith(
+        "The connection url is expected to be a http or https url")
+    with raises(CaosDBConnectionError) as exc_info:
+        configure_connection(url="localhost:8888")
+    assert exc_info.value.args[0].startswith(
+        "The connection url is expected to be a http or https url")
+
+
 def test_connection_interface():
     with raiz(TypeError) as cm:
         CaosDBServerConnection()
-- 
GitLab