diff --git a/setup.py b/setup.py index 50c5b89ef278f8fd074a2a5c7b2ff67a77cb3eb3..f85c09381a5951c95a910c362de19c8c9c353eca 100755 --- a/setup.py +++ b/setup.py @@ -171,11 +171,10 @@ def setup_package(): python_requires='>=3.8', package_dir={'': 'src'}, install_requires=['lxml>=4.6.3', - "requests>=2.28.1", + "requests[socks]>=2.28.1", "python-dateutil>=2.8.2", 'PyYAML>=5.4.1', 'future', - 'PySocks>=1.6.7', ], extras_require={'keyring': ['keyring>=13.0.0'], 'jsonschema': ['jsonschema>=4.4.0']}, diff --git a/src/caosdb/common/administration.py b/src/caosdb/common/administration.py index 98d4d2826da7131ef79b5c3cc9b3d9597abc0248..bd09c626d5a9e48bc9d3610f23cb46660cb10db3 100644 --- a/src/caosdb/common/administration.py +++ b/src/caosdb/common/administration.py @@ -76,7 +76,7 @@ def get_server_properties(): con = get_connection() try: body = con._http_request( - method="GET", path="_server_properties").response + method="GET", path="_server_properties") except EntityDoesNotExistError: raise ServerConfigurationException( "Debug mode in server is probably disabled.") from None diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index 7000ede917995c6c01b78a822c2d39ac626fcc23..00e3884aef5bb4fbb86ebe53acd99fc6a05e67f5 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -55,7 +55,8 @@ from caosdb.common.timezone import TimeZone from caosdb.common.versioning import Version from caosdb.configuration import get_config from caosdb.connection.connection import get_connection -from caosdb.connection.encode import MultipartParam, multipart_encode +from caosdb.connection.encode import (MultipartParam, multipart_encode, + ReadableMultiparts) from caosdb.exceptions import (AmbiguousEntityError, AuthorizationError, CaosDBConnectionError, CaosDBException, ConsistencyError, EmptyUniqueQueryError, @@ -3415,7 +3416,9 @@ class Container(list): if http_parts is not None and len(http_parts) > 0: http_parts.insert( 0, MultipartParam("FileRepresentation", xml2str(insert_xml))) + body, headers = multipart_encode(http_parts) + body = ReadableMultiparts(body) http_response = con.update( entity_uri_segment=[_ENTITY_URI_SEGMENT], @@ -3571,6 +3574,8 @@ class Container(list): 0, MultipartParam("FileRepresentation", xml2str(insert_xml))) body, headers = multipart_encode(http_parts) + body = ReadableMultiparts(body) + http_response = con.insert( entity_uri_segment=[_ENTITY_URI_SEGMENT], body=body, diff --git a/src/caosdb/connection/SocksiPy.zip b/src/caosdb/connection/SocksiPy.zip deleted file mode 100644 index e81f1f9393c766a3acd41b44245f9e17f090cbe5..0000000000000000000000000000000000000000 Binary files a/src/caosdb/connection/SocksiPy.zip and /dev/null differ diff --git a/src/caosdb/connection/connection.py b/src/caosdb/connection/connection.py index 3fff3b7deed3bad24347d32769d5f955fa5f27cd..2c23ed0a05578fa957baa6bdb62793626f88e9ed 100644 --- a/src/caosdb/connection/connection.py +++ b/src/caosdb/connection/connection.py @@ -31,7 +31,7 @@ import sys from builtins import str # pylint: disable=redefined-builtin from errno import EPIPE as BrokenPipe from socket import error as SocketError -from urllib.parse import urlparse +from urllib.parse import quote, urlparse from requests import Session as HTTPSession from requests.exceptions import ConnectionError as HTTPConnectionError @@ -52,21 +52,15 @@ except ModuleNotFoundError: from pkg_resources import resource_filename from .interface import CaosDBHTTPResponse, CaosDBServerConnection -from .streaminghttp import StreamingHTTPSConnection from .utils import make_uri_path, parse_url, urlencode -try: - from urllib.parse import quote, urlparse -except ImportError: - from urllib import quote - from urlparse import urlparse # pylint: disable=missing-docstring _LOGGER = logging.getLogger(__name__) -class _WrappedHTTPResponse2(CaosDBHTTPResponse): +class _WrappedHTTPResponse(CaosDBHTTPResponse): def __init__(self, response): self.response = response @@ -92,32 +86,6 @@ class _WrappedHTTPResponse2(CaosDBHTTPResponse): self.response.close() -class _WrappedHTTPResponse(CaosDBHTTPResponse): - - def __init__(self, response): - self.response = response - - @property - def reason(self): - return self.response.reason - - @property - def status(self): - return self.response.status - - def read(self, size=None): - return self.response.read(size) - - def getheader(self, name, default=None): - return self.response.getheader(name=name, default=default) - - def getheaders(self): - return self.response.getheaders() - - def close(self): - self.response.close() - - class _DefaultCaosDBServerConnection(CaosDBServerConnection): """_DefaultCaosDBServerConnection. @@ -146,7 +114,7 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): including query and frament segments. headers : dict of str -> str, optional HTTP request headers. (Defautl: None) - body : str or bytes or readable, opional + body : str or bytes or readable, optional The body of the HTTP request. Bytes should be a utf-8 encoded string. **kwargs : @@ -163,37 +131,30 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): headers["User-Agent"] = self._useragent try: + session = HTTPSession() if self.setup_fields["https_proxy"] is not None: - session = HTTPSession() session.proxies = { "https": self.setup_fields["https_proxy"] } - response = session.request(method=method, - url=self.setup_fields["url_base_path"] + path, - headers=headers, data=body, stream=True) - return _WrappedHTTPResponse2(response) - except HTTPConnectionError as conn_err: - raise CaosDBConnectionError( - "Connection failed. Network or server down? " + str(conn_err) - ) + verify = True + if self.setup_fields["cacert"]: + verify = self.setup_fields["cacert"] + if self.setup_fields["ssl_insecure"]: + verify = False + url = urlparse(self.setup_fields["url_base_path"] + path) - try: - self._http_con = ProxyConnection( - # TODO looks as if configure needs to be done first. - # That is however not assured. - host=self.setup_fields["host"], - timeout=self.setup_fields["timeout"], - context=self.setup_fields["context"], - socket_proxy=self.setup_fields["socket_proxy"]) - self._http_con.request(method=method, url=self._base_path + path, - headers=headers, body=body) - except SocketError as socket_err: + + response = session.request(method=method, + url=self.setup_fields["url_base_path"] + path, + verify=verify, + headers=headers, data=body, stream=True) + return _WrappedHTTPResponse(response) + except HTTPConnectionError as conn_err: raise CaosDBConnectionError( - "Connection failed. Network or server down? " + str(socket_err) + "Connection failed. Network or server down? " + str(conn_err) ) - return _WrappedHTTPResponse(self._http_con.getresponse()) def configure(self, **config): """configure. @@ -271,7 +232,9 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): https_proxy = result.scheme + "://" + result.netloc self.setup_fields = { - "url_base_path": config["url"] + "/", + "url_base_path": config["url"], + "cacert": config["cacert"] if "cacert" in config else None, + "ssl_insecure": config["ssl_insecure"] if "ssl_insecure" in config else False, "host": host, "timeout": int(config.get("timeout")), "context": context, diff --git a/src/caosdb/connection/encode.py b/src/caosdb/connection/encode.py index 7b092aae784a76abec0104ef7269df7ae0111b3b..e43b78146d32afe6060c637fc13335fc5f1047a9 100644 --- a/src/caosdb/connection/encode.py +++ b/src/caosdb/connection/encode.py @@ -51,7 +51,8 @@ multipart/form-data is the standard way to upload files over HTTP __all__ = [ 'gen_boundary', 'encode_and_quote', 'MultipartParam', 'encode_string', - 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode' + 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode', + 'ReadableMultiparts', ] from urllib.parse import quote_plus from io import UnsupportedOperation @@ -363,7 +364,6 @@ def get_headers(params, boundary): headers['Content-Length'] = str(get_body_size(params, boundary)) return headers - class MultipartYielder(object): """An iterator that yields the parameters of a multipart/formdata http body.""" @@ -475,3 +475,37 @@ def multipart_encode(params, boundary=None, callback=None): params = MultipartParam.from_params(params) return MultipartYielder(params, boundary, callback), headers + +class ReadableMultiparts(object): + + def __init__(self, multipart_yielder): + self.multipart_yielder = multipart_yielder + self.current_block = None + self.left_over = b'' + + def read(self, size=-1): + result = self.left_over + while size == -1 or len(result) < size: + try: + next_chunk = self.multipart_yielder.next() + if hasattr(next_chunk, "encode"): + next_chunk = next_chunk.encode("utf8") + result += next_chunk + except StopIteration: + break + + if size == -1: + self.left_over = b'' + return result + + self.left_over = result[size:] + return result[:size] + + def __enter__(self): + pass + + def __exit__(self, type, value, traceback): + self.close() + + def close(self): + self.multipart_yielder.reset() diff --git a/src/caosdb/connection/streaminghttp.py b/src/caosdb/connection/streaminghttp.py deleted file mode 100644 index e34427956c7b122cc6e22c04ed579996968b03c4..0000000000000000000000000000000000000000 --- a/src/caosdb/connection/streaminghttp.py +++ /dev/null @@ -1,161 +0,0 @@ -# -*- encoding: utf-8 -*- -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# -# Original work Copyright (c) 2011 Chris AtLee -# Modified work Copyright (c) 2017 Biomedical Physics, MPI for Dynamics and Self-Organization -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -"""Streaming HTTP uploads module. - -This module extends the standard httplib and http.client HTTPConnection so that -iterable objects can be used in the body of HTTP requests. - -**N.B.** You must specify a Content-Length header if using an iterable object -since there is no way to determine in advance the total size that will be -yielded, and there is no way to reset an interator. -""" - -from __future__ import unicode_literals, print_function, absolute_import -import socks -import socket -try: - # python3 - from http import client as client -except ImportError: - # python2 - import httplib as client - - -__all__ = ['StreamingHTTPSConnection'] - - -class StreamingHTTPSConnection(client.HTTPSConnection, object): - """Subclass of `http.client.HTTSConnection` or `httplib.HTTPSConnection` - that overrides the `send()` method to support iterable body objects.""" - # pylint: disable=unused-argument, arguments-differ - - def __init__(self, socket_proxy=None, https_proxy=None, **kwargs): - host = kwargs["host"] - port = int(kwargs["port"]) if "port" in kwargs else None - if socket_proxy is not None: - proxy_host, proxy_port = socket_proxy.split(":") - socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxy_host, - int(proxy_port)) - socket.socket = socks.socksocket - if https_proxy is not None: - tunnel_host = host - tunnel_port = port - host, port = https_proxy.split(":") - port = int(port) - super(StreamingHTTPSConnection, self).__init__(**kwargs) - if tunnel_host is not None: - self.set_tunnel(host=host, port=port) - - def _send_output(self, body, **kwargs): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - - This method is implemented in differently in the various python - versions (which is extremely annoying). So we provide a unified but - relatively dumb implementaion which only serves our needs. - """ - self._buffer.extend(("".encode("utf-8"), "".encode("utf-8"))) - headers = "\r\n".encode("utf-8").join(self._buffer) - del self._buffer[:] - - self.send(headers) - if body is not None: - self.send(body) - - # pylint: disable=too-many-branches - - def send(self, value): - """Send ``value`` to the server. - - ``value`` can be a string-like object which supports a 'encode' method, - a file-like object that supports a .read() method, or an iterable object - that supports a .next() method. - - An encode()able ``value`` will be utf-8 encoded before sending. - """ - # Based on python 2.6's httplib.HTTPConnection.send() - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise client.NotConnected() - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print("send: ", repr(value)) - try: - blocksize = 8192 - if hasattr(value, 'read'): - if hasattr(value, 'seek'): - value.seek(0) - if self.debuglevel > 0: - print("sendIng a read()able") - data = value.read(blocksize) - while data: - self.sock.sendall(data) - data = value.read(blocksize) - elif hasattr(value, 'next'): - if hasattr(value, 'reset'): - value.reset() - if self.debuglevel > 0: - print("sendIng an iterable") - for data in value: - if hasattr(data, "encode"): - self.sock.sendall(data.encode('utf-8')) - else: - self.sock.sendall(data) - else: - if self.debuglevel > 0: - print("sendIng a byte-like") - self.sock.sendall(value) - except socket.error as err: - if err.args[0] == 32: # Broken pipe - self.close() - raise