diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8b15e764abae0844f7d39fc7b9a5098edbc47c3c..792ab22f1524f8d4dc1db90f2b65c7f8f28f90ed 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -54,14 +54,38 @@ pylint: allow_failure: true # run unit tests -unittest: +unittest_py3.8: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.8 + script: &python_test_script + # Python docker has problems with tox and pip so use plain pytest here + - touch ~/.pycaosdb.ini + - pip install nose pytest pytest-cov python-dateutil jsonschema==4.0.1 + - pip install . + - python -m pytest unittests + +# This needs to be changed once Python 3.9 isn't the standard Python in Debian +# anymore. +unittest_py3.9: tags: [ docker ] stage: test needs: [ ] script: + # verify that this actually is Python 3.9 + - python3 -c "import sys; assert sys.version.startswith('3.9')" - touch ~/.pycaosdb.ini - make unittest + +unittest_py3.10: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.10 + script: *python_test_script + # Trigger building of server image and integration tests trigger_build: stage: deploy @@ -88,12 +112,12 @@ build-testenv: stage: setup only: - schedules - script: + script: - cd unittests/docker - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY # use here general latest or specific branch latest... - docker pull $CI_REGISTRY_IMAGE|| true - - docker build + - docker build --pull --build-arg COMMIT=$CI_COMMIT_SHORT_SHA --cache-from $CI_REGISTRY_IMAGE @@ -105,7 +129,7 @@ build-testenv: pages_prepare: &pages_prepare tags: [ cached-dind ] stage: deploy - needs: [ code_style, pylint, unittest ] + needs: [ code_style, pylint, unittest_py3.8, unittest_py3.9, unittest_py3.10 ] only: refs: - /^release-.*$/i diff --git a/CHANGELOG.md b/CHANGELOG.md index e594d9c23d4a5d5791cd437e25fa08f953179e9a..f3f29f875aae54312cd6ddd890604e185d02de02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,50 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.10.0] - 2022-11-14 + +### Added ### + +* HTTP connections are allowed additionally to HTTPS connections. +* Dependency on the `requests` package. +* Dependency on the `python-dateutil` package. +* `Connection.https_proxy` and `Connection.http_proxy` option of the + pycaosdb.ini and the `https_proxy` and `http_proxy` parameter of the + `configure_connection` function. See the documentation of the + latter for more information. + Note that the `HTTP_PROXY` and `HTTPS_PROXY` environment variables are + respected as well, unless overridden programmatically. +* `apiutils.empty_diff` function that returns `True` if the diffs of two + entities found with the `compare_entitis` function are empty, `False` + otherwise. + +### Changed ### + +* `apiutils.compare_entities` now has an optional `compare_referenced_records` + argument to compare referenced Entities recursively (fomerly, only the + referenced Python objects would be compared). The default is `False` to + recover the original behavior. +* `apiutils.merge_entities` now has an optional + `merge_references_with_empty_diffs` argument that determines whether a merge + of two entities will be performed if they reference identical records (w.r.t + th above `empty_diff` function). Formerly this would have caused a merge + conflict if the referenced record(s) were identical, but stored in different + Python objects. +* `apiutils.merge_entities` now has an optional `force` argument (defaults to + `False`, i.e., the old behavior) which determines whether in case of merge + conflicts errors will be raised or the properties and attributes of entity A + will be overwritten by entity B. + +### Deprecated ### + +* `Connection.socket_proxy` option of the pycaosdb.ini. Please use + `Connection.https_proxy` or `Connection.http_proxy` instead. The deprecated + option will be removed with the next minor release. + +### Fixed ### + +* handling of special attributes (name, id, ...) in `apiutils.empty_diff` + ## [0.9.0] - 2022-10-24 (Florian Spreckelsen) diff --git a/setup.py b/setup.py index 9618cd53077e58c35cafb4611b3520b9355eead9..187b04f01773280c42711c1a4b80f5eddc91eae1 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ from setuptools import find_packages, setup ISRELEASED = True MAJOR = 0 -MINOR = 9 +MINOR = 10 MICRO = 0 # Do not tag as pre-release until this commit # https://github.com/pypa/packaging/pull/515 @@ -97,6 +97,9 @@ def get_version_info(): if os.path.exists('.git'): GIT_REVISION = git_version() + elif os.path.exists('caosdb_pylib_commit'): + with open('caosdb_pylib_commit', 'r') as f: + GIT_REVISION = f.read().strip() elif os.path.exists('src/caosdb/version.py'): # must be a source distribution, use existing version file try: @@ -171,7 +174,11 @@ def setup_package(): python_requires='>=3.8', package_dir={'': 'src'}, install_requires=['lxml>=4.6.3', - 'PyYAML>=5.4.1', 'future', 'PySocks>=1.6.7'], + "requests[socks]>=2.28.1", + "python-dateutil>=2.8.2", + 'PyYAML>=5.4.1', + 'future', + ], extras_require={'keyring': ['keyring>=13.0.0'], 'jsonschema': ['jsonschema>=4.4.0']}, setup_requires=["pytest-runner>=2.0,<3dev"], diff --git a/src/caosdb/apiutils.py b/src/caosdb/apiutils.py index bd5b0eeca217e1f77d1bd5d5c60e18f33dd76212..0862cf9f32575b9773bc16d845bb459d67b0140c 100644 --- a/src/caosdb/apiutils.py +++ b/src/caosdb/apiutils.py @@ -188,9 +188,8 @@ def getCommitIn(folder): return t.readline().strip() -def compare_entities(old_entity: Entity, new_entity: Entity): - """ - Compare two entites. +def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False): + """Compare two entites. Return a tuple of dictionaries, the first index belongs to additional information for old entity, the second index belongs to additional information for new entity. @@ -204,6 +203,23 @@ def compare_entities(old_entity: Entity, new_entity: Entity): - ... value (not implemented yet) In case of changed information the value listed under the respective key shows the value that is stored in the respective entity. + + If `compare_referenced_records` is `True`, also referenced entities will be + compared using this function (which is then called with + `compare_referenced_records = False` to prevent infinite recursion in case + of circular references). + + Parameters + ---------- + old_entity, new_entity : Entity + Entities to be compared + compare_referenced_records : bool, optional + Whether to compare referenced records in case of both, `old_entity` and + `new_entity`, have the same reference properties and both have a Record + object as value. If set to `False`, only the corresponding Python + objects are compared which may lead to unexpected behavior when + identical records are stored in different objects. Default is False. + """ olddiff: Dict[str, Any] = {"properties": {}, "parents": []} newdiff: Dict[str, Any] = {"properties": {}, "parents": []} @@ -270,9 +286,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity): matching[0].unit if (prop.value != matching[0].value): - olddiff["properties"][prop.name]["value"] = prop.value - newdiff["properties"][prop.name]["value"] = \ - matching[0].value + # basic comparison of value objects says they are different + same_value = False + if compare_referenced_records: + # scalar reference + if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity): + # explicitely not recursive to prevent infinite recursion + same_value = empty_diff( + prop.value, matching[0].value, compare_referenced_records=False) + # list of references + elif isinstance(prop.value, list) and isinstance(matching[0].value, list): + # all elements in both lists actually are entity objects + # TODO: check, whether mixed cases can be allowed or should lead to an error + if all([isinstance(x, Entity) for x in prop.value]) and all([isinstance(x, Entity) for x in matching[0].value]): + # can't be the same if the lengths are different + if len(prop.value) == len(matching[0].value): + # do a one-by-one comparison; the values are the same, if all diffs are empty + same_value = all( + [empty_diff(x, y, False) for x, y in zip(prop.value, matching[0].value)]) + + if not same_value: + olddiff["properties"][prop.name]["value"] = prop.value + newdiff["properties"][prop.name]["value"] = \ + matching[0].value if (len(newdiff["properties"][prop.name]) == 0 and len(olddiff["properties"][prop.name]) == 0): @@ -300,7 +336,36 @@ def compare_entities(old_entity: Entity, new_entity: Entity): return (olddiff, newdiff) -def merge_entities(entity_a: Entity, entity_b: Entity): +def empty_diff(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False): + """Check whether the `compare_entities` found any differences between + old_entity and new_entity. + + Parameters + ---------- + old_entity, new_entity : Entity + Entities to be compared + compare_referenced_records : bool, optional + Whether to compare referenced records in case of both, `old_entity` and + `new_entity`, have the same reference properties and both have a Record + object as value. + + """ + olddiff, newdiff = compare_entities( + old_entity, new_entity, compare_referenced_records) + for diff in [olddiff, newdiff]: + for key in ["parents", "properties"]: + if len(diff[key]) > 0: + # There is a difference somewhere in the diff + return False + for key in SPECIAL_ATTRIBUTES: + if key in diff and diff[key]: + # There is a difference in at least one special attribute + return False + # all elements of the two diffs were empty + return True + + +def merge_entities(entity_a: Entity, entity_b: Entity, merge_references_with_empty_diffs=True, force=False): """ Merge entity_b into entity_a such that they have the same parents and properties. @@ -314,13 +379,34 @@ def merge_entities(entity_a: Entity, entity_b: Entity): Returns entity_a. WARNING: This function is currently experimental and insufficiently tested. Use with care. + + Parameters + ---------- + entity_a, entity_b : Entity + The entities to be merged. entity_b will be merged into entity_a in place + merge_references_with_empty_diffs : bool, optional + Whether the merge is performed if entity_a and entity_b both reference + record(s) that may be different Python objects but have empty diffs. If + set to `False` a merge conflict will be raised in this case + instead. Default is True. + force : bool, optional + If True, in case `entity_a` and `entity_b` have the same properties, the + values of `entity_a` are replaced by those of `entity_b` in the merge. + If `False`, a RuntimeError is raised instead. Default is False. + + Returns + ------- + entity_a : Entity + The initial entity_a after the in-place merge + """ logging.warning( "This function is currently experimental and insufficiently tested. Use with care.") # Compare both entities: - diff_r1, diff_r2 = compare_entities(entity_a, entity_b) + diff_r1, diff_r2 = compare_entities( + entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs) # Go through the comparison and try to apply changes to entity_a: for key in diff_r2["parents"]: @@ -343,6 +429,9 @@ def merge_entities(entity_a: Entity, entity_b: Entity): if (diff_r1["properties"][key][attribute] is None): setattr(entity_a.get_property(key), attribute, diff_r2["properties"][key][attribute]) + elif force: + setattr(entity_a.get_property(key), attribute, + diff_r2["properties"][key][attribute]) else: raise RuntimeError( f"Merge conflict:\nEntity a ({entity_a.id}, {entity_a.name}) " @@ -370,6 +459,9 @@ def merge_entities(entity_a: Entity, entity_b: Entity): if sa_a != sa_b: if sa_a is None: setattr(entity_a, special_attribute, sa_b) + elif force: + # force overwrite + setattr(entity_a, special_attribute, sa_b) else: raise RuntimeError("Merge conflict.") return entity_a diff --git a/src/caosdb/cert/indiscale.ca.crt b/src/caosdb/cert/indiscale.ca.crt deleted file mode 100644 index 08a79d60c5d34626eb96f1a92e33d0ac22494f3c..0000000000000000000000000000000000000000 --- a/src/caosdb/cert/indiscale.ca.crt +++ /dev/null @@ -1,55 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIJ6TCCBdGgAwIBAgIIFVYzdrEDk6EwDQYJKoZIhvcNAQENBQAwgZMxCzAJBgNV -BAYTAkRFMRUwEwYDVQQIEwxMb3dlciBTYXhvbnkxEzARBgNVBAcTCkdvZXR0aW5n -ZW4xFzAVBgNVBAoTDkluZGlTY2FsZSBHbWJIMRwwGgYDVQQDExNJbmRpU2NhbGUg -Um9vdCBDQSAxMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGluZGlzY2FsZS5jb20wHhcN -MTkwODA3MDAwMDAwWhcNMzQwODA2MjM1OTU5WjCBkzELMAkGA1UEBhMCREUxFTAT -BgNVBAgTDExvd2VyIFNheG9ueTETMBEGA1UEBxMKR29ldHRpbmdlbjEXMBUGA1UE -ChMOSW5kaVNjYWxlIEdtYkgxHDAaBgNVBAMTE0luZGlTY2FsZSBSb290IENBIDEx -ITAfBgkqhkiG9w0BCQEWEmluZm9AaW5kaXNjYWxlLmNvbTCCBCIwDQYJKoZIhvcN -AQEBBQADggQPADCCBAoCggQBAKxJO3XOqrUxFU3qdVyk9tmZEHwhwntcLO+kRR5t -64/1Z/+VIPSgVN5phkSCukj2BPJITWKplWzJDAYWSvA/7cqavCtx8yP+m3AHWrRa -CeHbtkGZ1nzwyFel3GIr93e65REeWqBE3knzem+qxTlZ2hp8/w3oxUlhy7tGxjBs -JlekgLRDrnj4Opyb4GVjcVfcELmu3sLrrPX1wdYJrqaMQUR4BKZnbXxKdOYyX+kR -/W2P4sihCCJh7Wy29VXHwSSCM1qEkU3REjvPEmEElCG7UpqOfg+3jaNZDqnvfskf -okU4GuFCxSWQituyP9jm/hFVEhz59tUMYCllcjEi2jGmD2DBKpiru4t4/z0Aymf4 -Pep9hNtH1yhZMxpQeCYK9ESEE5d7do0bu/4YFp7jAg5vWZ8KlILZakmypVBFUw8I -U/QJoJ55j95vIp+kjFdXelIVcr5La/zOR82JldaoPfyoBKObzwpwqaWQwYm8pj4p -XkUdJTf8rpW21SSGWZm8JoFSYDfGvI61rPEjl/ohKhlG0tV6E2tCc406HNo/7pPe -pmx/v9ZWLbYDAH7MVMB4tv6zDRE/c4KTbh5/s70VbXbAeOG6DNwegdDLDYZOv6Yw -YQMz9NWtKGzvoFehP2vY5nGK95JVUcd90jaNaoURLB102VtxAjPIEQA1PjbQxLvC -7A6kshlpQiN7zS/R9IgiEkYP/9gjy6mMuQVxH7C+9cqmCnXvVmpHmxXGUqk61r/B -h12htsx5qjbbkToZYhUXBmwRq4LDtyoxNeaF2Jc+gE762obbHsSYMuSuh0kTFUUd -uqfrI8OyzX4r1w5dYf2FEetZTT2Obyxb3Cy0btJF5+zEerBX44RulkdC+TPTMhJw -b1jrPCACKywy9b6vJcSQ2V1+uLk7rH2JKD+fQRIKUqZZkhNKFYz5dnYYTgS45M0/ -C+vIvRnhgNSNb4efG6wyFvWEF8poDSPnJ4mM+0jHG/+cLqF/M2CMFvC+yU8Hj9YH -B+H2L6V1QlCkpw5Ai4ji6OaQmnrsjE8EJj58vwYKsjmLGuf4j5AivkQTxfgCPGrT -6CxSesoFmYDPSg/2eO+IfYEwnd7Rbs4aAhW8eo+lGpmK0DQxNjlejYt/Cgp7HWCq -m/VNqWPIDMSTTqyk1GTmp67NjEZKt2ukJxI2CpL8s/9x4f3GTjNyI750pKM/uzMk -OBKTMuWJQ6xeMR3h9RQlqlmwcErLXoUGInOTHHjRGXDI+ZBeLqT5DikcFiwbHG3+ -6FOuxXO0eqqg2tBW8cQ5kuRI0YFznipDUcfgDZt0JEkEXmRuL0nxYO35WKKdpGcF -xFRJtO4FRB4nVWekVRuK9m47IPm6vC4eo+pCNPPoQ+FjyQ8CAwEAAaM/MD0wDAYD -VR0TBAUwAwEB/zAdBgNVHQ4EFgQUFjE2TLaKASKEJ0LKOO+37/Hu7qowDgYDVR0P -AQH/BAQDAgEGMA0GCSqGSIb3DQEBDQUAA4IEAQB2j1GL1G0ferWp9wmuDdF2oumn -k+JIctRaGHaSrqHy4zjwj3Oqm2JA1ds+WfWozz+d38ZcLqSHo+g9ku5h/XOogQEU -O4/y7j44pxIUg0EcIpMHtf7KPocPfvi9lw/4vE/3V/WKh4E09SXWgyY5tMUlEMaB -6t8n7gg943HY2MJE1QU2wOPMXB1krFbunYxJcrUMs21w9jRWVv/wvaj6rkdvvSbU -Yre11J+VlhC6fxx+STohQopzE6jnsaHile56b9xAmCCKcPEpWeKKBFS7pVNHEIHF -uHWpgVjhoheEMMbYgu6l5E5K32TNYCKU49jNRWEKETjmYQSNl9dsSip+XlvaU8wQ -VRR8UMHZPiJDW/AAHCr+bXEarZ9mSj/y+R512YtVw95zCnGUtzOJViThoIk/IAOR -AJdnvsFmZSIKtFHpSEFYlTDq2yr1ulzbaDhuPRzita8b0cP27UvqRebZw5CvHN48 -B9a9tTYowKuJqmtjE6D00QA4xS8fRizLnx54uNmDbwf/8WavVk6MzDERwRE3OsSy -D0dV6gy3t2AqEpVBrICrFqvgAQa4fcFcIwz3Qbt5o5uEi7acRomY57YrxrlfNTwh -2oDQz+HQ/ZTDwZ3DrIgel7GrQ5fXrXDLL3ebtsbuIeBx8crOWQask832HcLtDVpu -E/FdJEMMjglzIcy2dHpuODIGFmgEVfHR4DOOSBl0hfNdlrYnhC0h8/6QFswtlYFF -8aQbGX7inK8L2in5wQ7ypeoMuXkQVYxlU1TEGmgB8aDke47MuX1FH+clsCaZ3s1E -ka6lV6cjNYcosS718B6b2JgDUzmGBn2Sdm1xFmJM16dXp7TSmC5/fYxXuE/CynDs -PmaUb9Ms6XUYSwKKhZ5HZdeRoNz8w62WNAeF7o7iX6IVrd/G1bJnSBN01istckyR -BDuIkaoBQ9yvHN6Bo/J3KR08ixF1dHFPo/oSgkBxkLakb/yeslBTP/oISiFeQ4+q -Gld1mhAvmG99dVZfoysrMjZSyghNbqwScjbYYN115lExV5ZeRtSwA7JCYE2lBjmB -vocmz/hh/ifbmmqIvSv0NtiBnM6mNqngZEWD/rAloVOQoq0KVJJ5lUCQrBSFtR4+ -G1JGMX6b7uRp4mfdqqDE62KxxfkWBUwzUTIKGb5K42ji1Gy5li/TIWJtLNGNNQ2A -0ui2RhwioaGGfYyomSFuAo5IPE/NF0ASjrTDW6GoNxypTSYE4/7oSoxeryafVnqN -S0fRyrgSLiuT5tAiZ3b5Q3EFYUM2OcU3ezr/ZUabf9qIsqOnCi91SqE88BQbenot -0HyUMdp/7QX9SyWM/azhcRiReAtkmq9pgeQA2TTZADDNTkKRljG9VeFDSwl7 ------END CERTIFICATE----- diff --git a/src/caosdb/common/administration.py b/src/caosdb/common/administration.py index 98d4d2826da7131ef79b5c3cc9b3d9597abc0248..bd09c626d5a9e48bc9d3610f23cb46660cb10db3 100644 --- a/src/caosdb/common/administration.py +++ b/src/caosdb/common/administration.py @@ -76,7 +76,7 @@ def get_server_properties(): con = get_connection() try: body = con._http_request( - method="GET", path="_server_properties").response + method="GET", path="_server_properties") except EntityDoesNotExistError: raise ServerConfigurationException( "Debug mode in server is probably disabled.") from None diff --git a/src/caosdb/common/timezone.py b/src/caosdb/common/timezone.py index 2bd3d3d4d739118e160f7b3a35757fbb0afe70cb..8fc5e710d3cbf6f20cf81397573f972db3b22f12 100644 --- a/src/caosdb/common/timezone.py +++ b/src/caosdb/common/timezone.py @@ -12,6 +12,7 @@ class TimeZone(): display_name : string A human-friendly name of the time zone: """ + def __init__(self, zone_id, offset, display_name): self.zone_id = zone_id self.offset = offset diff --git a/src/caosdb/configuration.py b/src/caosdb/configuration.py index 75827df0d00d6c82251c2c04fa47413ac2801928..1c108ac1d39c135dbc90f477be8b8f2f630391ce 100644 --- a/src/caosdb/configuration.py +++ b/src/caosdb/configuration.py @@ -31,12 +31,7 @@ try: except ImportError: pass -try: - # python2 - from ConfigParser import ConfigParser -except ImportError: - # python3 - from configparser import ConfigParser +from configparser import ConfigParser from os import environ, getcwd from os.path import expanduser, join, isfile @@ -59,6 +54,11 @@ def configure(inifile): _reset_config() read_config = _pycaosdbconf.read(inifile) validate_yaml_schema(config_to_yaml(_pycaosdbconf)) + + if "HTTPS_PROXY" in environ: + _pycaosdbconf["Connection"]["https_proxy"] = environ["HTTPS_PROXY"] + if "HTTP_PROXY" in environ: + _pycaosdbconf["Connection"]["http_proxy"] = environ["HTTP_PROXY"] return read_config diff --git a/src/caosdb/connection/SocksiPy.zip b/src/caosdb/connection/SocksiPy.zip deleted file mode 100644 index e81f1f9393c766a3acd41b44245f9e17f090cbe5..0000000000000000000000000000000000000000 Binary files a/src/caosdb/connection/SocksiPy.zip and /dev/null differ diff --git a/src/caosdb/connection/connection.py b/src/caosdb/connection/connection.py index 43eb3410d8d5bdc8323a811fd2b6424fb75f3fda..a2802848af7bae8fb65378532156d8469f31a9b8 100644 --- a/src/caosdb/connection/connection.py +++ b/src/caosdb/connection/connection.py @@ -28,9 +28,15 @@ from __future__ import absolute_import, print_function, unicode_literals import logging import ssl import sys +import warnings from builtins import str # pylint: disable=redefined-builtin from errno import EPIPE as BrokenPipe from socket import error as SocketError +from urllib.parse import quote, urlparse +from requests import Session as HTTPSession +from requests.exceptions import ConnectionError as HTTPConnectionError +from urllib3.poolmanager import PoolManager +from requests.adapters import HTTPAdapter from caosdb.configuration import get_config from caosdb.exceptions import (CaosDBException, HTTPClientError, @@ -49,16 +55,8 @@ except ModuleNotFoundError: from pkg_resources import resource_filename from .interface import CaosDBHTTPResponse, CaosDBServerConnection -from .streaminghttp import StreamingHTTPSConnection from .utils import make_uri_path, parse_url, urlencode - -try: - from urllib.parse import quote, urlparse -except ImportError: - from urllib import quote - from urlparse import urlparse - -# pylint: disable=missing-docstring +from .encode import MultipartYielder, ReadableMultiparts _LOGGER = logging.getLogger(__name__) @@ -67,6 +65,9 @@ class _WrappedHTTPResponse(CaosDBHTTPResponse): def __init__(self, response): self.response = response + self._generator = None + self._buffer = b'' + self._stream_consumed = False @property def reason(self): @@ -74,21 +75,71 @@ class _WrappedHTTPResponse(CaosDBHTTPResponse): @property def status(self): - return self.response.status + return self.response.status_code def read(self, size=None): - return self.response.read(size) + if self._stream_consumed is True: + raise RuntimeError("Stream is consumed") + + if self._buffer is None: + # the buffer has been drained in the previous call. + self._stream_consumed = True + return b'' + + if self._generator is None and (size is None or size == 0): + # return full content at once + self._stream_consumed = True + return self.response.content + + if len(self._buffer) >= size: + # still enough bytes in the buffer + result = chunk[:size] + self._buffer = chunk[size:] + return result + + if self._generator is None: + # first call to this method + if size is None or size == 0: + size = 512 + self._generator = self.response.iter_content(size) + + try: + # read new data into the buffer + chunk = self._buffer + next(self._generator) + result = chunk[:size] + if len(result) == 0: + self._stream_consumed = True + self._buffer = chunk[size:] + return result + except StopIteration: + # drain buffer + result = self._buffer + self._buffer = None + return result def getheader(self, name, default=None): - return self.response.getheader(name=name, default=default) + return self.response.headers[name] if name in self.response.headers else default def getheaders(self): - return self.response.getheaders() + return self.response.headers.items() def close(self): self.response.close() +class _SSLAdapter(HTTPAdapter): + """Transport adapter that allows us to use different SSL versions.""" + + def __init__(self, ssl_version): + self.ssl_version = ssl_version + super().__init__() + + def init_poolmanager(self, connections, maxsize, block=False): + self.poolmanager = PoolManager( + num_pools=connections, maxsize=maxsize, + block=block, ssl_version=self.ssl_version) + + class _DefaultCaosDBServerConnection(CaosDBServerConnection): """_DefaultCaosDBServerConnection. @@ -101,10 +152,11 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): def __init__(self): self._useragent = ("caosdb-pylib/{version} - {implementation}".format( version=version, implementation=type(self).__name__)) - self._http_con = None self._base_path = None + self._session = None + self._timeout = None - def request(self, method, path, headers=None, body=None, **kwargs): + def request(self, method, path, headers=None, body=None): """request. Send a HTTP request to the server. @@ -118,38 +170,40 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): including query and frament segments. headers : dict of str -> str, optional HTTP request headers. (Defautl: None) - body : str or bytes or readable, opional + body : str or bytes or readable, optional The body of the HTTP request. Bytes should be a utf-8 encoded string. - **kwargs : - Any keyword arguments will be ignored. - TODO: Why are they allowed then? Returns ------- - TODO: What? + response : CaosDBHTTPResponse """ if headers is None: headers = {} headers["User-Agent"] = self._useragent + + if path.endswith("/."): + path = path[:-1] + "%2E" + + if isinstance(body, MultipartYielder): + body = ReadableMultiparts(body) + try: - self._http_con = StreamingHTTPSConnection( - # TODO looks as if configure needs to be done first. - # That is however not assured. - host=self.setup_fields["host"], - timeout=self.setup_fields["timeout"], - context=self.setup_fields["context"], - socket_proxy=self.setup_fields["socket_proxy"]) - self._http_con.request(method=method, url=self._base_path + path, - headers=headers, body=body) - except SocketError as socket_err: + response = self._session.request( + method=method, + url=self._base_path + path, + headers=headers, + data=body, + timeout=self._timeout, + stream=True) + + return _WrappedHTTPResponse(response) + except HTTPConnectionError as conn_err: raise CaosDBConnectionError( - "Connection failed. Network or server down? " + str(socket_err) + "Connection failed. Network or server down? " + str(conn_err) ) - return _WrappedHTTPResponse(self._http_con.getresponse()) - def configure(self, **config): """configure. @@ -173,55 +227,69 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): loaded. """ + if "url" not in config: + raise CaosDBConnectionError( + "No connection url specified. Please " + "do so via caosdb.configure_connection(...) or in a config " + "file.") + if (not config["url"].lower().startswith("https://") and not config["url"].lower().startswith("http://")): + raise CaosDBConnectionError("The connection url is expected " + "to be a http or https url and " + "must include the url scheme " + "(i.e. start with https:// or " + "http://).") + + url = urlparse(config["url"]) + path = url.path.strip("/") + if len(path) > 0: + path = path + "/" + self._base_path = url.scheme + "://" + url.netloc + "/" + path + + self._session = HTTPSession() + + if url.scheme == "https": + self._setup_ssl(config) + + # TODO(tf) remove in next release + socket_proxy = config["socket_proxy"] if "socket_proxy" in config else None + if socket_proxy is not None: + self._session.proxies = { + "https": "socks5://" + socket_proxy, + "http": "socks5://" + socket_proxy, + } + + if "https_proxy" in config: + if self._session.proxies is None: + self._session.proxies = {} + self._session.proxies["https"] = config["https_proxy"] + + if "http_proxy" in config: + if self._session.proxies is None: + self._session.proxies = {} + self._session.proxies["http"] = config["http_proxy"] + + if "timeout" in config: + self._timeout = config["timeout"] + + def _setup_ssl(self, config): if "ssl_version" in config and config["cacert"] is not None: ssl_version = getattr(ssl, config["ssl_version"]) else: ssl_version = ssl.PROTOCOL_TLS - context = ssl.SSLContext(ssl_version) - context.verify_mode = ssl.CERT_REQUIRED - if config.get("ssl_insecure"): + self._session.mount(self._base_path, _SSLAdapter(ssl_version)) + + verify = True + if "cacert" in config: + verify = config["cacert"] + if "ssl_insecure" in config and config["ssl_insecure"]: _LOGGER.warning("*** Warning! ***\n" "Insecure SSL mode, certificate will not be checked! " "Please consider removing the `ssl_insecure` configuration option.\n" "****************") - context.verify_mode = ssl.CERT_NONE - - if (not context.verify_mode == ssl.CERT_NONE and - hasattr(context, "check_hostname")): - context.check_hostname = True - - if ("cacert" in config and config["cacert"] is not None and - config["cacert"]): - try: - context.load_verify_locations(config["cacert"]) - except Exception as exc: - raise CaosDBConnectionError("Could not load the cacert in" - "`{}`: {}".format(config["cacert"], - exc)) - - context.load_default_certs() - - if "url" in config: - parsed_url = parse_url(config["url"]) - host = parsed_url.netloc - self._base_path = parsed_url.path - else: - raise CaosDBConnectionError( - "No connection url specified. Please " - "do so via caosdb.configure_connection(...) or in a config " - "file.") - - socket_proxy = None - - if "socket_proxy" in config: - socket_proxy = config["socket_proxy"] - - self.setup_fields = { - "host": host, - "timeout": int(config.get("timeout")), - "context": context, - "socket_proxy": socket_proxy} + verify = False + if verify is not None: + self._session.verify = verify def _make_conf(*conf): @@ -252,7 +320,6 @@ _DEFAULT_CONF = { "password_method": "input", "implementation": _DefaultCaosDBServerConnection, "timeout": 210, - "cacert": resource_filename("caosdb", 'cert/indiscale.ca.crt') } @@ -314,6 +381,10 @@ def configure_connection(**kwargs): Parameters ---------- + url : str + The url of the CaosDB Server. HTTP and HTTPS urls are allowed. However, + it is **highly** recommend to avoid HTTP because passwords and + authentication token are send over the network in plain text. username : str Username for login; e.g. 'admin'. @@ -342,6 +413,24 @@ def configure_connection(**kwargs): An authentication token which has been issued by the CaosDB Server. Implies `password_method="auth_token"` if set. An example token string would be `["O","OneTimeAuthenticationToken","anonymous",["administration"],[],1592995200000,604800000,"3ZZ4WKRB-5I7DG2Q6-ZZE6T64P-VQ","197d0d081615c52dc18fb323c300d7be077beaad4020773bb58920b55023fa6ee49355e35754a4277b9ac525c882bcd3a22e7227ba36dfcbbdbf8f15f19d1ee9",1,30000]`. + https_proxy : str, optional + Define a proxy for the https connections, e.g. `http://localhost:8888`, + `socks5://localhost:8888`, or `socks4://localhost:8888`. These are + either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS + proxies are not supported. However, the connection will be secured + using TLS in the tunneled connection nonetheless. Only the connection + to the proxy is insecure which is why it is not recommended to use HTTP + proxies when authentication against the proxy is necessary. If + unspecified, the https_proxy option of the pycaosdb.ini or the HTTPS_PROXY + environment variable are being used. Use `None` to override these + options with a no-proxy setting. + + http_proxy : str, optional + Define a proxy for the http connections, e.g. `http://localhost:8888`. + If unspecified, the http_proxy option of the pycaosdb.ini or the + HTTP_PROXY environment variable are being used. Use `None` to override + these options with a no-proxy setting. + implementation : CaosDBServerConnection The class which implements the connection. (Default: _DefaultCaosDBServerConnection) @@ -372,6 +461,11 @@ def configure_connection(**kwargs): local_conf = _make_conf(_DEFAULT_CONF, global_conf, kwargs) connection = _Connection.get_instance() + + if "socket_proxy" in local_conf: + warnings.warn("Deprecated configuration option: socket_proxy. Use " + "the new https_proxy option instead", + DeprecationWarning, stacklevel=1) connection.configure(**local_conf) return connection @@ -599,7 +693,7 @@ class _Connection(object): # pylint: disable=useless-object-inheritance method=method, path=path, headers=headers, - body=body, **kwargs) + body=body) _LOGGER.debug("response: %s %s", str(http_response.status), str(http_response.getheaders())) self._authenticator.on_response(http_response) diff --git a/src/caosdb/connection/encode.py b/src/caosdb/connection/encode.py index 7b092aae784a76abec0104ef7269df7ae0111b3b..0b826cc4400275a2374308ee104cdbdabb619b75 100644 --- a/src/caosdb/connection/encode.py +++ b/src/caosdb/connection/encode.py @@ -51,7 +51,8 @@ multipart/form-data is the standard way to upload files over HTTP __all__ = [ 'gen_boundary', 'encode_and_quote', 'MultipartParam', 'encode_string', - 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode' + 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode', + 'ReadableMultiparts', ] from urllib.parse import quote_plus from io import UnsupportedOperation @@ -475,3 +476,40 @@ def multipart_encode(params, boundary=None, callback=None): params = MultipartParam.from_params(params) return MultipartYielder(params, boundary, callback), headers + + +class ReadableMultiparts(object): + """Wraps instances of the MultipartYielder class as a readable and withable + object.""" + + def __init__(self, multipart_yielder): + self.multipart_yielder = multipart_yielder + self.current_block = None + self.left_over = b'' + + def read(self, size=-1): + result = self.left_over + while size == -1 or len(result) < size: + try: + next_chunk = self.multipart_yielder.next() + if hasattr(next_chunk, "encode"): + next_chunk = next_chunk.encode("utf8") + result += next_chunk + except StopIteration: + break + + if size == -1: + self.left_over = b'' + return result + + self.left_over = result[size:] + return result[:size] + + def __enter__(self): + pass + + def __exit__(self, type, value, traceback): + self.close() + + def close(self): + self.multipart_yielder.reset() diff --git a/src/caosdb/connection/streaminghttp.py b/src/caosdb/connection/streaminghttp.py deleted file mode 100644 index 01774301b9bdb55bdbf6b56695042aaf354dba97..0000000000000000000000000000000000000000 --- a/src/caosdb/connection/streaminghttp.py +++ /dev/null @@ -1,152 +0,0 @@ -# -*- encoding: utf-8 -*- -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# -# Original work Copyright (c) 2011 Chris AtLee -# Modified work Copyright (c) 2017 Biomedical Physics, MPI for Dynamics and Self-Organization -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -"""Streaming HTTP uploads module. - -This module extends the standard httplib and http.client HTTPConnection so that -iterable objects can be used in the body of HTTP requests. - -**N.B.** You must specify a Content-Length header if using an iterable object -since there is no way to determine in advance the total size that will be -yielded, and there is no way to reset an interator. -""" - -from __future__ import unicode_literals, print_function, absolute_import -import socks -import socket -try: - # python3 - from http import client as client -except ImportError: - # python2 - import httplib as client - - -__all__ = ['StreamingHTTPSConnection'] - - -class StreamingHTTPSConnection(client.HTTPSConnection, object): - """Subclass of `http.client.HTTSConnection` or `httplib.HTTPSConnection` - that overrides the `send()` method to support iterable body objects.""" - # pylint: disable=unused-argument, arguments-differ - - def __init__(self, socket_proxy=None, **kwargs): - if socket_proxy is not None: - host, port = socket_proxy.split(":") - socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, host, - int(port)) - socket.socket = socks.socksocket - super(StreamingHTTPSConnection, self).__init__(**kwargs) - - def _send_output(self, body, **kwargs): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - - This method is implemented in differently in the various python - versions (which is extremely annoying). So we provide a unified but - relatively dumb implementaion which only serves our needs. - """ - self._buffer.extend(("".encode("utf-8"), "".encode("utf-8"))) - headers = "\r\n".encode("utf-8").join(self._buffer) - del self._buffer[:] - - self.send(headers) - if body is not None: - self.send(body) - - # pylint: disable=too-many-branches - - def send(self, value): - """Send ``value`` to the server. - - ``value`` can be a string-like object which supports a 'encode' method, - a file-like object that supports a .read() method, or an iterable object - that supports a .next() method. - - An encode()able ``value`` will be utf-8 encoded before sending. - """ - # Based on python 2.6's httplib.HTTPConnection.send() - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise client.NotConnected() - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print("send: ", repr(value)) - try: - blocksize = 8192 - if hasattr(value, 'read'): - if hasattr(value, 'seek'): - value.seek(0) - if self.debuglevel > 0: - print("sendIng a read()able") - data = value.read(blocksize) - while data: - self.sock.sendall(data) - data = value.read(blocksize) - elif hasattr(value, 'next'): - if hasattr(value, 'reset'): - value.reset() - if self.debuglevel > 0: - print("sendIng an iterable") - for data in value: - if hasattr(data, "encode"): - self.sock.sendall(data.encode('utf-8')) - else: - self.sock.sendall(data) - else: - if self.debuglevel > 0: - print("sendIng a byte-like") - self.sock.sendall(value) - except socket.error as err: - if err.args[0] == 32: # Broken pipe - self.close() - raise diff --git a/src/caosdb/schema-pycaosdb-ini.yml b/src/caosdb/schema-pycaosdb-ini.yml index a81bf006523ab7690ee0bf9d27e0a2d57ce8c3c3..64451a24da1587abbd9815b8ddefc4c85fa36e5a 100644 --- a/src/caosdb/schema-pycaosdb-ini.yml +++ b/src/caosdb/schema-pycaosdb-ini.yml @@ -14,10 +14,10 @@ schema-pycaosdb-ini: additionalProperties: false properties: url: - description: URL of the CaosDB server + description: "URL of the CaosDB server. Allowed are HTTP and HTTPS connections. However, since authentication tokens and sometimes even passwords are send in plain text to the server it is **highly** recommended to use HTTPS connections whenever possible. HTTP is ok for testing and debugging." type: string - pattern: https://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)? - examples: ["https://demo.indiscale.com/", "https://localhost:10443/"] + pattern: http(s)?://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)? + examples: ["https://demo.indiscale.com/", "http://localhost:10080/"] username: type: string description: User name used for authentication with the server @@ -54,7 +54,15 @@ schema-pycaosdb-ini: socket_proxy: examples: ["localhost:12345"] type: string - description: You can define a socket proxy to be used. This is for the case that the server sits behind a firewall which is being tunnelled with a socket proxy (SOCKS4 or SOCKS5) (e.g. via ssh's -D option or a dedicated proxy server). + description: Deprecated. Please use https_proxy instead. + https_proxy: + examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"] + type: string + description: "Define a proxy for the https connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. However, the connection will be secured using TLS in the tunneled connection nonetheless. Only the connection to the proxy is insecure which is why it is not recommended to use HTTP proxies when authentication against the proxy is necessary. Note: this option is overridden by the HTTPS_PROXY environment variable, if present." + http_proxy: + examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"] + type: string + description: "Define a proxy for the http connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. Note: this option is overridden by the HTTP_PROXY environment variable, if present." implementation: description: This option is used internally and for testing. Do not override. examples: [_DefaultCaosDBServerConnection] diff --git a/src/caosdb/utils/server_side_scripting.py b/src/caosdb/utils/server_side_scripting.py index 663178dcbda4293cb30dff88efbfb7b7302df70d..7e5ee4390ae3314792d12fd2942980aa3d9c9773 100644 --- a/src/caosdb/utils/server_side_scripting.py +++ b/src/caosdb/utils/server_side_scripting.py @@ -30,7 +30,8 @@ from lxml import etree from caosdb.connection.connection import get_connection from caosdb.connection.utils import urlencode -from caosdb.connection.encode import MultipartParam, multipart_encode +from caosdb.connection.encode import (MultipartParam, multipart_encode, + ReadableMultiparts) def _make_params(pos_args, opts): @@ -63,6 +64,7 @@ def _make_multipart_request(call, pos_args, opts, files): filename=filename)) body, headers = multipart_encode(parts) + body = ReadableMultiparts(body) return body, headers diff --git a/src/doc/conf.py b/src/doc/conf.py index 7f5f70a82fc2782cba18891bcb23598a93033b59..8f3dfbdbdf80307b8bd68c068360aca4b7e7a2c7 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -29,10 +29,10 @@ copyright = '2022, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.9.0' +version = '0.10.0' # The full version, including alpha/beta/rc tags # release = '0.5.2-rc2' -release = '0.9.0' +release = '0.10.0' # -- General configuration --------------------------------------------------- diff --git a/tox.ini b/tox.ini index e321891883b9425d24543a41f2d1283e0be52109..50c22d5716769ef2ec818f6c8fb94491ea372434 100644 --- a/tox.ini +++ b/tox.ini @@ -7,7 +7,6 @@ deps = . nose pytest pytest-cov - python-dateutil jsonschema==4.0.1 commands=py.test --cov=caosdb -vv {posargs} diff --git a/unittests/test_apiutils.py b/unittests/test_apiutils.py index 2ebdf95a3aa5ce76b983b2c3c47630e1a8884705..f9de6d1a037667d0ead0f02439bde13ac4f14f60 100644 --- a/unittests/test_apiutils.py +++ b/unittests/test_apiutils.py @@ -1,11 +1,11 @@ -# -*- encoding: utf-8 -*- # # This file is a part of the CaosDB Project. # -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen # Copyright (C) 2020 Timm Fitschen <t.fitschen@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> # Copyright (C) 2020-2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +20,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -# ** end header # # Test apiutils # A. Schlemmer, 02/2018 @@ -30,7 +29,7 @@ import pytest import caosdb as db import caosdb.apiutils from caosdb.apiutils import (apply_to_ids, compare_entities, create_id_query, - resolve_reference, merge_entities) + empty_diff, resolve_reference, merge_entities) from caosdb.common.models import SPECIAL_ATTRIBUTES @@ -272,8 +271,10 @@ def test_copy_entities(): for i in [0, 1]: assert c.properties[i] is not r.properties[i] for special in SPECIAL_ATTRIBUTES: - assert getattr(c.properties[i], special) == getattr(r.properties[i], special) - assert c.get_importance(c.properties[i]) == r.get_importance(r.properties[i]) + assert getattr(c.properties[i], special) == getattr( + r.properties[i], special) + assert c.get_importance( + c.properties[i]) == r.get_importance(r.properties[i]) def test_merge_entities(): @@ -326,10 +327,12 @@ def test_merge_bug_109(): assert r_a.get_property("test_bug_property").value == [18, 19] assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_a) @pytest.mark.xfail @@ -349,7 +352,212 @@ def test_bug_109(): assert r_a.get_property("test_bug_property").value == [18, 19] assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_a) + + +def test_wrong_merge_conflict_reference(): + """Test a wrongly detected merge conflict in case of two records referencing + two different, but identical objects. + + """ + # Two identical license records will be referenced from both records to be + # merged + license_rt = db.RecordType(name="license") + license_rec_a = db.Record(name="CC-BY-3.0").add_parent(license_rt) + license_rec_b = db.Record(name="CC-BY-3.0").add_parent(license_rt) + + # two referencing records + dataset_rt = db.RecordType(name="Dataset") + title_prop = db.Property(name="title", datatype=db.TEXT) + doi_prop = db.Property(name="DOI", datatype=db.TEXT) + rec_a = db.Record().add_parent(dataset_rt) + rec_a.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_a) + rec_a.add_property(name=title_prop.name, value="Some dataset title") + + rec_b = db.Record().add_parent(dataset_rt) + rec_b.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_b) + rec_b.add_property(name=doi_prop.name, value="https://doi.org/12345.678") + + merge_entities(rec_a, rec_b) + assert rec_a.get_property(license_rt.name) is not None + assert rec_a.get_property(license_rt.name).value is not None + assert isinstance(rec_a.get_property(license_rt.name).value, db.Record) + assert rec_a.get_property(license_rt.name).value.name == license_rec_a.name + assert rec_a.get_property(license_rt.name).value.name == license_rec_b.name + assert rec_a.get_property("title").value == "Some dataset title" + assert rec_a.get_property("doi").value == "https://doi.org/12345.678" + + # Reset rec_a + rec_a = db.Record().add_parent(dataset_rt) + rec_a.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_a) + rec_a.add_property(name=title_prop.name, value="Some dataset title") + + # this does not compare referenced records, so it will fail + with pytest.raises(RuntimeError) as re: + merge_entities(rec_a, rec_b, merge_references_with_empty_diffs=False) + assert "Merge conflict" in str(re.value) + + # ... as should this, of course + rec_b.get_property(license_rt.name).value.name = "Another license" + with pytest.raises(RuntimeError) as re: + merge_entities(rec_a, rec_b) + assert "Merge conflict" in str(re.value) + + +def test_empty_diff(): + + rec_a = db.Record(name="A") + rec_b = db.Record(name="B") + + assert empty_diff(rec_a, rec_a) + assert not empty_diff(rec_a, rec_b) + + rec_a.add_parent(name="RT") + rec_b.add_parent(name="RT") + assert empty_diff(rec_a, rec_a) + assert not empty_diff(rec_a, rec_b) + + rec_b.name = "A" + assert empty_diff(rec_a, rec_b) + + rec_a.add_property(name="some_prop", value=1) + assert not empty_diff(rec_a, rec_b) + + rec_b.add_property(name="some_prop", value=1) + assert empty_diff(rec_a, rec_b) + + rec_b.get_property("some_prop").value = 2 + assert not empty_diff(rec_a, rec_b) + + rec_b.get_property("some_prop").value = 1 + rec_b.add_property(name="some_other_prop", value="Test") + assert not empty_diff(rec_a, rec_b) + + rec_a.add_property(name="some_other_prop", value="Test") + assert empty_diff(rec_a, rec_b) + + # reference identical records, but different Python Record objects + ref_rec_a = db.Record(name="Ref").add_parent(name="RefType") + ref_rec_b = db.Record(name="Ref").add_parent(name="RefType") + rec_a.add_property(name="RefType", datatype="RefType", value=ref_rec_a) + rec_b.add_property(name="RefType", datatype="RefType", value=ref_rec_b) + # the default is `compare_referenced_records=False`, so the diff shouldn't + # be empty (different Python objects are referenced.) + assert not empty_diff(rec_a, rec_b) + # when looking into the referenced record, the diffs should be empty again + assert empty_diff(rec_a, rec_b, compare_referenced_records=True) + + # The same for lists of references + rec_a.remove_property("RefType") + rec_b.remove_property("RefType") + assert empty_diff(rec_a, rec_b) + rec_a.add_property(name="RefType", datatype=db.LIST( + "RefType"), value=[ref_rec_a, ref_rec_a]) + rec_b.add_property(name="RefType", datatype=db.LIST( + "RefType"), value=[ref_rec_b, ref_rec_b]) + assert not empty_diff(rec_a, rec_b) + assert empty_diff(rec_a, rec_b, compare_referenced_records=True) + + # special case of ids + rec_a = db.Record(id=12) + rec_b = db.Record() + assert not empty_diff(rec_a, rec_b) + rec_b.id = 13 + assert not empty_diff(rec_a, rec_b) + rec_b.id = 12 + assert empty_diff(rec_a, rec_b) + + +def test_force_merge(): + """Test whether a forced merge overwrites existing properties correctly.""" + + # name overwrite + recA = db.Record(name="A") + recB = db.Record(name="B") + + with pytest.raises(RuntimeError) as re: + merge_entities(recA, recB) + assert "Merge conflict" in str(re.value) + + merge_entities(recA, recB, force=True) + assert "B" == recA.name + # unchanged + assert "B" == recB.name + + # description overwrite + recA = db.Record() + recA.description = "something" + recB = db.Record() + recB.description = "something else" + + with pytest.raises(RuntimeError) as re: + merge_entities(recA, recB) + assert "Merge conflict" in str(re.value) + + merge_entities(recA, recB, force=True) + assert recA.description == "something else" + # unchanged + assert recB.description == "something else" + + # property overwrite + recA = db.Record() + recA.add_property(name="propA", value="something") + recB = db.Record() + recB.add_property(name="propA", value="something else") + + with pytest.raises(RuntimeError) as re: + merge_entities(recA, recB) + assert "Merge conflict" in str(re.value) + + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").value == "something else" + # unchanged + assert recB.get_property("propA").value == "something else" + + # don't remove a property that's not in recB + recA = db.Record() + recA.add_property(name="propA", value="something") + recA.add_property(name="propB", value=5.0) + recB = db.Record() + recB.add_property(name="propA", value="something else") + + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").value == "something else" + assert recA.get_property("propB").value == 5.0 + + # also overwrite datatypes ... + rtA = db.RecordType() + rtA.add_property(name="propA", datatype=db.INTEGER) + rtB = db.RecordType() + rtB.add_property(name="propA", datatype=db.TEXT) + + with pytest.raises(RuntimeError) as re: + merge_entities(rtA, rtB) + assert "Merge conflict" in str(re.value) + + merge_entities(rtA, rtB, force=True) + assert rtA.get_property("propA").datatype == db.TEXT + # unchanged + assert rtB.get_property("propA").datatype == db.TEXT + + # ... and units + recA = db.Record() + recA.add_property(name="propA", value=5, unit="m") + recB = db.Record() + recB.add_property(name="propA", value=5, unit="cm") + + with pytest.raises(RuntimeError) as re: + merge_entities(recA, recB) + assert "Merge conflict" in str(re.value) + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").unit == "cm" + # unchanged + assert recB.get_property("propA").unit == "cm" diff --git a/unittests/test_connection.py b/unittests/test_connection.py index ee564ea033f9afc80522d75a85557f70819ece1e..6cc23d87c5cdcf639709a444849a856a8c70af5f 100644 --- a/unittests/test_connection.py +++ b/unittests/test_connection.py @@ -37,7 +37,8 @@ from caosdb.connection.connection import (CaosDBServerConnection, from caosdb.connection.mockup import (MockUpResponse, MockUpServerConnection, _request_log_message) from caosdb.connection.utils import make_uri_path, quote, urlencode -from caosdb.exceptions import ConfigurationError, LoginFailedError +from caosdb.exceptions import (ConfigurationError, LoginFailedError, + CaosDBConnectionError) from nose.tools import assert_equal as eq from nose.tools import assert_false as falz from nose.tools import assert_is_not_none as there @@ -46,6 +47,13 @@ from nose.tools import assert_true as tru from pytest import raises +def setup_function(function): + configure_connection(url="http://localhost:8888/some/path", + password_method="plain", username="test", + password="blub", + implementation=MockUpServerConnection) + + def setup_module(): _reset_config() @@ -103,6 +111,7 @@ def test_configure_connection(): get_config().set("Connection", "password_method", "plain") get_config().set("Connection", "password", "test_password") get_config().set("Connection", "timeout", "200") + get_config().set("Connection", "ssl_insecure", "True") there(configure_connection) tru(hasattr(configure_connection, "__call__")) @@ -116,6 +125,18 @@ def test_configure_connection(): tru(isinstance(c._delegate_connection, MockUpServerConnection)) +def test_configure_connection_bad_url(): + configure_connection(url="https://localhost:8888") + with raises(CaosDBConnectionError) as exc_info: + configure_connection(url="ftp://localhost:8888") + assert exc_info.value.args[0].startswith( + "The connection url is expected to be a http or https url") + with raises(CaosDBConnectionError) as exc_info: + configure_connection(url="localhost:8888") + assert exc_info.value.args[0].startswith( + "The connection url is expected to be a http or https url") + + def test_connection_interface(): with raiz(TypeError) as cm: CaosDBServerConnection() diff --git a/unittests/test_server_side_scripting.py b/unittests/test_server_side_scripting.py index 1fb24d7e40bb843391a971c5f69680b541e1de0e..b699c4482d02972282167eb9683a956097ebc5e9 100644 --- a/unittests/test_server_side_scripting.py +++ b/unittests/test_server_side_scripting.py @@ -46,7 +46,7 @@ def setup_module(): content_type = kwargs["headers"]["Content-Type"] if content_type.startswith("multipart/form-data; boundary"): - parts = kwargs["body"] + parts = kwargs["body"].multipart_yielder stdout = [] for part in parts: if hasattr(part, "decode"):