diff --git a/.gitignore b/.gitignore index b522b1da9176e59756bffe89cd4eafe0d751a23c..55fb3f0d1bc6c101704557da8f35d6e784b5ea89 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ build/ src/caosdb/version.py # documentation -_apidoc \ No newline at end of file +_apidoc +*~ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 749b9a6dd4b9dce8fd04dc3dfac48ff7c1ae9def..29bfed59b109d1e52c22813718a0a43397379ce2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -61,14 +61,52 @@ mypy: allow_failure: true # run unit tests -unittest: +unittest_py3.7: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.7 + script: &python_test_script + # Python docker has problems with tox and pip so use plain pytest here + - touch ~/.pycaosdb.ini + - pip install nose pytest pytest-cov python-dateutil jsonschema>=4.4.0 + - pip install . + - python -m pytest unittests + +unittest_py3.8: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.8 + script: *python_test_script + +# This needs to be changed once Python 3.9 isn't the standard Python in Debian +# anymore. +unittest_py3.9: tags: [ docker ] stage: test needs: [ ] script: + # verify that this actually is Python 3.9 + - python3 -c "import sys; assert sys.version.startswith('3.9')" - touch ~/.pycaosdb.ini - make unittest + +unittest_py3.10: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.10 + script: *python_test_script + +unittest_py3.11: + tags: [ docker ] + stage: test + needs: [ ] + image: python:3.11 + script: *python_test_script + # Trigger building of server image and integration tests trigger_build: stage: deploy @@ -96,12 +134,12 @@ build-testenv: only: - schedules - web - script: + script: - cd unittests/docker - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY # use here general latest or specific branch latest... - docker pull $CI_REGISTRY_IMAGE|| true - - docker build + - docker build --pull --build-arg COMMIT=$CI_COMMIT_SHORT_SHA --cache-from $CI_REGISTRY_IMAGE @@ -113,7 +151,7 @@ build-testenv: pages_prepare: &pages_prepare tags: [ cached-dind ] stage: deploy - needs: [ code_style, pylint, unittest ] + needs: [ code_style, pylint, unittest_py3.8, unittest_py3.9, unittest_py3.10 ] only: refs: - /^release-.*$/i diff --git a/.gitlab/issue_templates/Default.md b/.gitlab/issue_templates/Default.md new file mode 100644 index 0000000000000000000000000000000000000000..aa1a65aca363b87aff50280e1a86824009d2098b --- /dev/null +++ b/.gitlab/issue_templates/Default.md @@ -0,0 +1,28 @@ +## Summary + +*Please give a short summary of what the issue is.* + +## Expected Behavior + +*What did you expect how the software should behave?* + +## Actual Behavior + +*What did the software actually do?* + +## Steps to Reproduce the Problem + +*Please describe, step by step, how others can reproduce the problem. Please try these steps for yourself on a clean system.* + +1. +2. +3. + +## Specifications + +- Version: *Which version of this software?* +- Platform: *Which operating system, which other relevant software versions?* + +## Possible fixes + +*Do you have ideas how the issue can be resolved?* diff --git a/.gitlab/merge_request_templates/Default.md b/.gitlab/merge_request_templates/Default.md new file mode 100644 index 0000000000000000000000000000000000000000..3629e0ca3695000863d8c254516f64bf59a7bf60 --- /dev/null +++ b/.gitlab/merge_request_templates/Default.md @@ -0,0 +1,56 @@ +# Summary + +*Insert a meaningful description for this merge request here: What is the new/changed behavior? +Which bug has been fixed? Are there related issues?* + + +# Focus + +*Point the reviewer to the core of the code change. Where should they start reading? What should +they focus on (e.g. security, performance, maintainability, user-friendliness, compliance with the +specs, finding more corner cases, concrete questions)?* + + +# Test Environment + +*How to set up a test environment for manual testing?* + + +# Check List for the Author + +Please, prepare your MR for a review. Be sure to write a summary and a focus and create gitlab +comments for the reviewer. They should guide the reviewer through the changes, explain your changes +and also point out open questions. For further good practices have a look at [our review +guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md) + +- [ ] All automated tests pass +- [ ] Reference related issues +- [ ] Up-to-date CHANGELOG.md (or not necessary) +- [ ] Up-to-date JSON schema (or not necessary) +- [ ] Appropriate user and developer documentation (or not necessary) + - Update / write published documentation (`make doc`). + - How do I use the software? Assume "stupid" users. + - How do I develop or debug the software? Assume novice developers. +- [ ] Annotations in code (Gitlab comments) + - Intent of new code + - Problems with old code + - Why this implementation? + + +# Check List for the Reviewer + +- [ ] I understand the intent of this MR +- [ ] All automated tests pass +- [ ] Up-to-date CHANGELOG.md (or not necessary) +- [ ] Appropriate user and developer documentation (or not necessary), also in published + documentation. +- [ ] The test environment setup works and the intended behavior is reproducible in the test + environment +- [ ] In-code documentation and comments are up-to-date. +- [ ] Check: Are there specifications? Are they satisfied? + +For further good practices have a look at [our review guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md). + + +/assign me +/target_branch dev diff --git a/CHANGELOG.md b/CHANGELOG.md index aa036beccf9e7f0ab42bebd1fedc455d5ea250e1..ebdfab4bc64e8640207e7e678cedb4bd1698fb98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,25 +5,164 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [Unreleased] ## ### Added ### +* `Entity.remove_value_from_property` function that removes a given value from a + property and optionally removes the property if it is empty afterwards. + ### Changed ### -* Set PyYAML dependency back to PyYaml>=5.4.1 (from 6.0) for better - compatibility with docker-compose) +* `_Messages` is now `Messages` and inherits from list instead of dict +* `Message.__init__` signature changed and `type` defaults to "Info" now. +* `Message.__eq__` changed. Equality is equality of `type`, `code`, and + `description` now. ### Deprecated ### +* The API of Messages has been simplified and some ways to interact with + messages have been deprecated. Warnings are raised correspondingly. +* `Message.get_code`. Use the `code` property instead. + ### Removed ### ### Fixed ### +- Detection for cyclic references when converting entites using the high level API. + ### Security ### ### Documentation ### +## [0.12.0] - 2023-06-02 ## + +### Added ### + +- Added location argument to `src/caosdb/utils/checkFileSystemConsistency.py` +- Entity getters: `get_entity_by_<name/id/path>` +- Cached versions of entity getters and of `execute_query` (`cached_query`) + +### Deprecated ### + +- getOriginUrlIn, getDiffIn, getBranchIn, getCommitIn (formerly apiutils) have been + moved to caosdb.utils.git_utils + +### Fixed ### + +- Fixed `src/caosdb/utils/checkFileSystemConsistency.py` + +### Documentation ### + +* [#83](https://gitlab.com/caosdb/caosdb-pylib/-/issues/83) - Improved + documentation on adding REFERENCE properties, both in the docstring of + `Entity.add_property` and in the data-insertion tutorial. + +## [0.11.2] - 2023-03-14 ## + +### Fixed ### +- root logger is no longer used to create warnings. Fixes undesired output in + stderr + +## [0.11.1] - 2023-03-07 ## +(Florian Spreckelsen) + +### Changed ### + +* Renamed `caosdb.common.models._Parents` to `caosdb.common.models._ParentList`. + +### Fixed ### + +* [caosdb-pylib#90](https://gitlab.com/caosdb/caosdb-pylib/-/issues/90): `Entity.get_parents_recursively()` did not work for unretrieved parents. + +## [0.11.0] - 2023-01-19 ## +(Florian Spreckelsen) + +### Added ### + +* `apiutils.EntityMergeConflictError` class for unresesolvable merge conflicts + when merging two entities +* Re-introduced support for Python 3.7 + +### Changed ### + +* `apiutils.merge_entities` now raises an `EntityMergeConflictError` in case of + unresolvable merge conflicts. + +### Fixed ### + +* [#82](https://gitlab.com/caosdb/caosdb-pylib/-/issues/82) Merging an entity + with properties with missing datatype leads to Exception - The correct + exception is raised in case of a missing LIST datatype. + +### Documentation ### + +* [Fixed](https://gitlab.com/caosdb/caosdb-pylib/-/issues/79) + `{action}_entity_permissions` help line. + +## [0.10.0] - 2022-11-14 +(Florian Spreckelsen) + +### Added ### + +* HTTP connections are allowed additionally to HTTPS connections. +* Dependency on the `requests` package. +* Dependency on the `python-dateutil` package. +* `Connection.https_proxy` and `Connection.http_proxy` option of the + pycaosdb.ini and the `https_proxy` and `http_proxy` parameter of the + `configure_connection` function. See the documentation of the + latter for more information. + Note that the `HTTP_PROXY` and `HTTPS_PROXY` environment variables are + respected as well, unless overridden programmatically. +* `apiutils.empty_diff` function that returns `True` if the diffs of two + entities found with the `compare_entitis` function are empty, `False` + otherwise. + +### Changed ### + +* `apiutils.compare_entities` now has an optional `compare_referenced_records` + argument to compare referenced Entities recursively (fomerly, only the + referenced Python objects would be compared). The default is `False` to + recover the original behavior. +* `apiutils.merge_entities` now has an optional + `merge_references_with_empty_diffs` argument that determines whether a merge + of two entities will be performed if they reference identical records (w.r.t + th above `empty_diff` function). Formerly this would have caused a merge + conflict if the referenced record(s) were identical, but stored in different + Python objects. +* `apiutils.merge_entities` now has an optional `force` argument (defaults to + `False`, i.e., the old behavior) which determines whether in case of merge + conflicts errors will be raised or the properties and attributes of entity A + will be overwritten by entity B. + +### Deprecated ### + +* `Connection.socket_proxy` option of the pycaosdb.ini. Please use + `Connection.https_proxy` or `Connection.http_proxy` instead. The deprecated + option will be removed with the next minor release. + +### Fixed ### + +* handling of special attributes (name, id, ...) in `apiutils.empty_diff` + +## [0.9.0] - 2022-10-24 +(Florian Spreckelsen) + +### Added ### + +* Add TimeZone class and parse the server's time zone in the Info response. + +### Fixed ### + +* [#141](https://gitlab.indiscale.com/caosdb/src/caosdb-pylib/-/issues/141) + `password_method = unauthenticated` not allowed by schema +* Set PyYAML dependency back to PyYaml>=5.4.1 (from 6.0) for better + compatibility with docker-compose + +### Documentation ### + +* Added curator role permissions example to code gallery + ## [0.8.0] - 2022-07-12 (Timm Fitschen) @@ -217,7 +356,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### -* Dynamic exception type `EntityMultiError`. +* Dynamic exception type `EntityMultiError`. * `get_something` functions from all error object in `exceptions.py` * `AmbiguityException` diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000000000000000000000000000000000..d9126aae6483459f8c8f248ed6a4fdf859f24e45 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,25 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - family-names: Fitschen + given-names: Timm + orcid: https://orcid.org/0000-0002-4022-432X + - family-names: Schlemmer + given-names: Alexander + orcid: https://orcid.org/0000-0003-4124-9649 + - family-names: Hornung + given-names: Daniel + orcid: https://orcid.org/0000-0002-7846-6375 + - family-names: tom Wörden + given-names: Henrik + orcid: https://orcid.org/0000-0002-5549-578X + - family-names: Parlitz + given-names: Ulrich + orcid: https://orcid.org/0000-0003-3058-1435 + - family-names: Luther + given-names: Stefan + orcid: https://orcid.org/0000-0001-7214-8125 +title: CaosDB - Pylib +version: 0.12.0 +doi: 10.3390/data4020083 +date-released: 2023-06-02 \ No newline at end of file diff --git a/README.md b/README.md index 602df33cecfc8ec37fd791e3257221e66f120cb3..7215591a4f31f1946029442de291eb9ccf9beea1 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ By participating, you are expected to uphold our [Code of Conduct](https://gitla * If you have a suggestion for the [documentation](https://docs.indiscale.com/caosdb-pylib/), the preferred way is also a merge request as describe above (the documentation resides in `src/doc`). However, you can also create an issue for it. -* You can also contact us at **info (AT) caosdb.de** and join the +* You can also contact us at **info (AT) caosdb.org** and join the CaosDB community on [#caosdb:matrix.org](https://matrix.to/#/!unwwlTfOznjEnMMXxf:matrix.org). diff --git a/README_SETUP.md b/README_SETUP.md index 48928d6c3f2c878a8d8b268b36ed2cdeba7f8014..01eea85188078ae6f2fe226e89e5c227497b4bd0 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -109,6 +109,8 @@ Now would be a good time to continue with the [tutorials](tutorials/index). - Run a specific test function: e.g. `tox -- unittests/test_schema.py::test_config_files` ## Documentation ## +We use sphinx to create the documentation. Docstrings in the code should comply +with the Googly style (see link below). Build documentation in `build/` with `make doc`. @@ -118,5 +120,11 @@ Build documentation in `build/` with `make doc`. - `sphinx-autoapi` - `recommonmark` +### How to contribute ### + +- [Google Style Python Docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) +- [Google Style Python Docstrings 2nd reference](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings) +- [References to other documentation](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#role-external) + ### Troubleshooting ### If the client is to be executed directly from the `/src` folder, an initial `.\setup.py install --user` must be called. diff --git a/RELEASE_GUIDELINES.md b/RELEASE_GUIDELINES.md index 00d0362e6630267c135e2566b993a91fccf1fd91..95ee8e314871153476c30790a456242e38dcaf9e 100644 --- a/RELEASE_GUIDELINES.md +++ b/RELEASE_GUIDELINES.md @@ -24,6 +24,7 @@ guidelines of the CaosDB Project - `version` variables in `src/doc/conf.py` - Version on [setup.py](./setup.py): Check the `MAJOR`, `MINOR`, `MICRO`, `PRE` variables and set `ISRELEASED` to `True`. Use the possibility to issue pre-release versions for testing. + - `CITATION.cff` (update version and date) 5. Merge the release branch into the main branch. @@ -40,8 +41,9 @@ guidelines of the CaosDB Project 11. Merge the main branch back into the dev branch. -12. After the merge of main to dev, start a new development version by - setting `ISRELEASED` to `False` and by increasing at least the `MICRO` - version in [setup.py](./setup.py). - Also update CHANGELOG.md (new "Unreleased" section). - Also update `src/doc/conf.py`. +12. After the merge of main to dev, start a new development version by setting + `ISRELEASED` to `False` and by increasing at least the `MICRO` version in + [setup.py](./setup.py). Please note that due to a bug in pip, the `PRE` + version has to remain empty in the setup.py. + Also update CHANGELOG.md (new "Unreleased" section). Also update + `src/doc/conf.py`. diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index ca6aad829a3e0607292cf69b8b1d4b7f7758993e..0000000000000000000000000000000000000000 --- a/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -testpaths=unittests -addopts=-x -vv --cov=caosdb diff --git a/release.sh b/release.sh index 1af097f014de6cd9eb3d3e8ba5da34aea0fe1671..f6335ae20d0c29e760b508aac831a35460a59ef3 100755 --- a/release.sh +++ b/release.sh @@ -1,4 +1,4 @@ #!/bin/bash rm -rf dist/ build/ .eggs/ python setup.py sdist bdist_wheel -python -m twine upload -s dist/* +python -m twine upload dist/* diff --git a/setup.py b/setup.py index 93514b9b0c5f07b716bc5082e712fe1f101d8f36..8fdf3b1c63322ec48af398d1dcb1c4028355d473 100755 --- a/setup.py +++ b/setup.py @@ -47,13 +47,13 @@ from setuptools import find_packages, setup ISRELEASED = False MAJOR = 0 -MINOR = 9 -MICRO = 0 +MINOR = 12 +MICRO = 1 # Do not tag as pre-release until this commit # https://github.com/pypa/packaging/pull/515 # has made it into a release. Probably we should wait for pypa/packaging>=21.4 # https://github.com/pypa/packaging/releases -PRE = "dev" # "dev" # e.g. rc0, alpha.1, 0.beta-23 +PRE = "" # "dev" # e.g. rc0, alpha.1, 0.beta-23 if PRE: VERSION = "{}.{}.{}-{}".format(MAJOR, MINOR, MICRO, PRE) @@ -97,6 +97,9 @@ def get_version_info(): if os.path.exists('.git'): GIT_REVISION = git_version() + elif os.path.exists('caosdb_pylib_commit'): + with open('caosdb_pylib_commit', 'r') as f: + GIT_REVISION = f.read().strip() elif os.path.exists('src/caosdb/version.py'): # must be a source distribution, use existing version file try: @@ -168,10 +171,14 @@ def setup_package(): "Topic :: Scientific/Engineering :: Information Analysis", ], packages=find_packages('src'), - python_requires='>=3.8', + python_requires='>=3.7', package_dir={'': 'src'}, install_requires=['lxml>=4.6.3', - 'PyYAML>=5.4.1', 'future', 'PySocks>=1.6.7'], + "requests[socks]>=2.26", + "python-dateutil>=2.8.2", + 'PyYAML>=5.4.1', + 'future', + ], extras_require={'keyring': ['keyring>=13.0.0'], 'jsonschema': ['jsonschema>=4.4.0']}, setup_requires=["pytest-runner>=2.0,<3dev"], diff --git a/src/caosdb/__init__.py b/src/caosdb/__init__.py index 7e06885fe495c1e8c4ccc99b7d0c0f8ff8c34b5b..acf323e860a93753b57f2e104531383b412f3fa0 100644 --- a/src/caosdb/__init__.py +++ b/src/caosdb/__init__.py @@ -46,6 +46,7 @@ from caosdb.common.models import (ACL, ALL, FIX, NONE, OBLIGATORY, RECOMMENDED, Query, QueryTemplate, Record, RecordType, delete, execute_query, get_global_acl, get_known_permissions, raise_errors) +from caosdb.utils.get_entity import get_entity_by_name, get_entity_by_path, get_entity_by_id from caosdb.configuration import _read_config_files, configure, get_config from caosdb.connection.connection import configure_connection, get_connection from caosdb.exceptions import * diff --git a/src/caosdb/apiutils.py b/src/caosdb/apiutils.py index 4c8393111bcbb4f9f91e309b81bebdcac55ba626..a46e30375b924d358448e73aece61562c36c700b 100644 --- a/src/caosdb/apiutils.py +++ b/src/caosdb/apiutils.py @@ -22,26 +22,32 @@ # # ** end header # -"""API-Utils: +"""API-Utils: Some simplified functions for generation of records etc. -Some simplified functions for generation of records etc. """ -import sys -import tempfile +import logging import warnings -from collections.abc import Iterable -from subprocess import call -from typing import Optional, Any, Dict, List +from collections.abc import Iterable +from typing import Any, Dict, List -from caosdb.common.datatype import (BOOLEAN, DATETIME, DOUBLE, FILE, INTEGER, - REFERENCE, TEXT, is_reference) -from caosdb.common.models import (Container, Entity, File, Property, Query, +from caosdb.common.datatype import is_reference +from caosdb.common.models import (Container, Entity, File, Property, Record, RecordType, execute_query, - get_config, SPECIAL_ATTRIBUTES) + SPECIAL_ATTRIBUTES) +from caosdb.exceptions import CaosDBException -import logging +from caosdb.utils.git_utils import (get_origin_url_in, get_diff_in, + get_branch_in, get_commit_in) + +logger = logging.getLogger(__name__) + + +class EntityMergeConflictError(CaosDBException): + """An error that is raised in case of an unresolvable conflict when merging + two entities. + """ def new_record(record_type, name=None, description=None, @@ -141,56 +147,39 @@ def retrieve_entities_with_ids(entities): def getOriginUrlIn(folder): - """return the Fetch URL of the git repository in the given folder.""" - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "remote", "show", "origin"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - urlString = "Fetch URL:" - - for line in t.readlines(): - if urlString in line: - return line[line.find(urlString) + len(urlString):].strip() - - return None + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_origin_url_in instead.""", + DeprecationWarning) + return get_origin_url_in(folder) def getDiffIn(folder, save_dir=None): - """returns the name of a file where the out put of "git diff" in the given - folder is stored.""" - with tempfile.NamedTemporaryFile(delete=False, mode="w", dir=save_dir) as t: - call(["git", "diff"], stdout=t, cwd=folder) - - return t.name + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_diff_in instead.""", + DeprecationWarning) + return get_diff_in(folder, save_dir) def getBranchIn(folder): - """returns the current branch of the git repository in the given folder. - - The command "git branch" is called in the given folder and the - output is returned - """ - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - return t.readline().strip() + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_branch_in instead.""", + DeprecationWarning) + return get_branch_in(folder) def getCommitIn(folder): - """returns the commit hash in of the git repository in the given folder. + warnings.warn(""" + This function is deprecated and will be removed with the next release. + Please use the caosdb.utils.git_utils.get_commit_in instead.""", + DeprecationWarning) + return get_commit_in(folder) - The command "git log -1 --format=%h" is called in the given folder - and the output is returned - """ - - with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: - call(["git", "log", "-1", "--format=%h"], stdout=t, cwd=folder) - with open(t.name, "r") as t: - return t.readline().strip() - -def compare_entities(old_entity: Entity, new_entity: Entity): - """ - Compare two entites. +def compare_entities(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False): + """Compare two entites. Return a tuple of dictionaries, the first index belongs to additional information for old entity, the second index belongs to additional information for new entity. @@ -199,11 +188,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity): - Additional parents (a list under key "parents") - Information about properties: - Each property lists either an additional property or a property with a changed: - - ... datatype - - ... importance or - - ... value (not implemented yet) + - datatype + - importance or + - value (not implemented yet) + In case of changed information the value listed under the respective key shows the value that is stored in the respective entity. + + If `compare_referenced_records` is `True`, also referenced entities will be + compared using this function (which is then called with + `compare_referenced_records = False` to prevent infinite recursion in case + of circular references). + + Parameters + ---------- + old_entity, new_entity : Entity + Entities to be compared + compare_referenced_records : bool, optional + Whether to compare referenced records in case of both, `old_entity` and + `new_entity`, have the same reference properties and both have a Record + object as value. If set to `False`, only the corresponding Python + objects are compared which may lead to unexpected behavior when + identical records are stored in different objects. Default is False. + """ olddiff: Dict[str, Any] = {"properties": {}, "parents": []} newdiff: Dict[str, Any] = {"properties": {}, "parents": []} @@ -270,9 +277,29 @@ def compare_entities(old_entity: Entity, new_entity: Entity): matching[0].unit if (prop.value != matching[0].value): - olddiff["properties"][prop.name]["value"] = prop.value - newdiff["properties"][prop.name]["value"] = \ - matching[0].value + # basic comparison of value objects says they are different + same_value = False + if compare_referenced_records: + # scalar reference + if isinstance(prop.value, Entity) and isinstance(matching[0].value, Entity): + # explicitely not recursive to prevent infinite recursion + same_value = empty_diff( + prop.value, matching[0].value, compare_referenced_records=False) + # list of references + elif isinstance(prop.value, list) and isinstance(matching[0].value, list): + # all elements in both lists actually are entity objects + # TODO: check, whether mixed cases can be allowed or should lead to an error + if all([isinstance(x, Entity) for x in prop.value]) and all([isinstance(x, Entity) for x in matching[0].value]): + # can't be the same if the lengths are different + if len(prop.value) == len(matching[0].value): + # do a one-by-one comparison; the values are the same, if all diffs are empty + same_value = all( + [empty_diff(x, y, False) for x, y in zip(prop.value, matching[0].value)]) + + if not same_value: + olddiff["properties"][prop.name]["value"] = prop.value + newdiff["properties"][prop.name]["value"] = \ + matching[0].value if (len(newdiff["properties"][prop.name]) == 0 and len(olddiff["properties"][prop.name]) == 0): @@ -300,27 +327,83 @@ def compare_entities(old_entity: Entity, new_entity: Entity): return (olddiff, newdiff) -def merge_entities(entity_a: Entity, entity_b: Entity): - """ - Merge entity_b into entity_a such that they have the same parents and properties. +def empty_diff(old_entity: Entity, new_entity: Entity, compare_referenced_records: bool = False): + """Check whether the `compare_entities` found any differences between + old_entity and new_entity. + + Parameters + ---------- + old_entity, new_entity : Entity + Entities to be compared + compare_referenced_records : bool, optional + Whether to compare referenced records in case of both, `old_entity` and + `new_entity`, have the same reference properties and both have a Record + object as value. - datatype, unit, value, name and description will only be changed in entity_a if they - are None for entity_a and set for entity_b. If there is a corresponding value - for entity_a different from None a RuntimeError will be raised informing of an - unresolvable merge conflict. + """ + olddiff, newdiff = compare_entities( + old_entity, new_entity, compare_referenced_records) + for diff in [olddiff, newdiff]: + for key in ["parents", "properties"]: + if len(diff[key]) > 0: + # There is a difference somewhere in the diff + return False + for key in SPECIAL_ATTRIBUTES: + if key in diff and diff[key]: + # There is a difference in at least one special attribute + return False + # all elements of the two diffs were empty + return True + + +def merge_entities(entity_a: Entity, entity_b: Entity, merge_references_with_empty_diffs=True, + force=False): + """Merge entity_b into entity_a such that they have the same parents and properties. + + datatype, unit, value, name and description will only be changed in entity_a + if they are None for entity_a and set for entity_b. If there is a + corresponding value for entity_a different from None, an + EntityMergeConflictError will be raised to inform about an unresolvable merge + conflict. The merge operation is done in place. Returns entity_a. WARNING: This function is currently experimental and insufficiently tested. Use with care. + + Parameters + ---------- + entity_a, entity_b : Entity + The entities to be merged. entity_b will be merged into entity_a in place + merge_references_with_empty_diffs : bool, optional + Whether the merge is performed if entity_a and entity_b both reference + record(s) that may be different Python objects but have empty diffs. If + set to `False` a merge conflict will be raised in this case + instead. Default is True. + force : bool, optional + If True, in case `entity_a` and `entity_b` have the same properties, the + values of `entity_a` are replaced by those of `entity_b` in the merge. + If `False`, an EntityMergeConflictError is raised instead. Default is False. + + Returns + ------- + entity_a : Entity + The initial entity_a after the in-place merge + + Raises + ------ + EntityMergeConflictError + In case of an unresolvable merge conflict. + """ - logging.warning( + logger.warning( "This function is currently experimental and insufficiently tested. Use with care.") # Compare both entities: - diff_r1, diff_r2 = compare_entities(entity_a, entity_b) + diff_r1, diff_r2 = compare_entities( + entity_a, entity_b, compare_referenced_records=merge_references_with_empty_diffs) # Go through the comparison and try to apply changes to entity_a: for key in diff_r2["parents"]: @@ -338,11 +421,22 @@ def merge_entities(entity_a: Entity, entity_b: Entity): raise NotImplementedError() for attribute in ("datatype", "unit", "value"): - if diff_r1["properties"][key][attribute] is None: - setattr(entity_a.get_property(key), attribute, - diff_r2["properties"][key][attribute]) - else: - raise RuntimeError("Merge conflict.") + if (attribute in diff_r2["properties"][key] and + diff_r2["properties"][key][attribute] is not None): + if (diff_r1["properties"][key][attribute] is None): + setattr(entity_a.get_property(key), attribute, + diff_r2["properties"][key][attribute]) + elif force: + setattr(entity_a.get_property(key), attribute, + diff_r2["properties"][key][attribute]) + else: + raise EntityMergeConflictError( + f"Entity a ({entity_a.id}, {entity_a.name}) " + f"has a Property '{key}' with {attribute}=" + f"{diff_r2['properties'][key][attribute]}\n" + f"Entity b ({entity_b.id}, {entity_b.name}) " + f"has a Property '{key}' with {attribute}=" + f"{diff_r1['properties'][key][attribute]}") else: # TODO: This is a temporary FIX for # https://gitlab.indiscale.com/caosdb/src/caosdb-pylib/-/issues/105 @@ -362,8 +456,13 @@ def merge_entities(entity_a: Entity, entity_b: Entity): if sa_a != sa_b: if sa_a is None: setattr(entity_a, special_attribute, sa_b) + elif force: + # force overwrite + setattr(entity_a, special_attribute, sa_b) else: - raise RuntimeError("Merge conflict.") + raise EntityMergeConflictError( + f"Conflict in special attribute {special_attribute}:\n" + f"A: {sa_a}\nB: {sa_b}") return entity_a diff --git a/src/caosdb/cached.py b/src/caosdb/cached.py new file mode 100644 index 0000000000000000000000000000000000000000..131526674d7df97d598a6d1bfbc2af7805c63a03 --- /dev/null +++ b/src/caosdb/cached.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +This module provides some cached versions of functions that retrieve Entities from a remote server. + +See also +======== + +- ``cache_initialize(...)`` : Re-initialize the cache. +- ``cache_clear()`` : Clear the cache. +- ``cached_query(query)`` : A cached version of ``execute_query(query)``. +- ``cached_get_entity_by(...)`` : Get an Entity by name, id, ... +""" + +from enum import Enum +from functools import lru_cache +from typing import Union + +from .utils import get_entity +from .common.models import execute_query, Entity, Container + + +# roughly 1GB for typical entity sizes +DEFAULT_SIZE = 33333 + +# This dict cache is solely for filling the real cache manually (e.g. to reuse older query results) +_DUMMY_CACHE = {} + + +class AccessType(Enum): + """Different access types for cached queries. Needed for filling the cache manually with +:func:`cache_fill` . + + """ + QUERY = 1 + PATH = 2 + EID = 3 + NAME = 4 + + +def cached_get_entity_by(eid: Union[str, int] = None, name: str = None, path: str = None, query: + str = None) -> Entity: + """Return a single entity that is identified uniquely by one argument. + +You must supply exactly one argument. + +If a query phrase is given, the result must be unique. If this is not what you need, use +:func:`cached_query` instead. + + """ + count = 0 + if eid is not None: + count += 1 + if name is not None: + count += 1 + if path is not None: + count += 1 + if query is not None: + count += 1 + if count != 1: + raise ValueError("You must supply exactly one argument.") + + if eid is not None: + return _cached_access(AccessType.EID, eid, unique=True) + if name is not None: + return _cached_access(AccessType.NAME, name, unique=True) + if path is not None: + return _cached_access(AccessType.PATH, path, unique=True) + if query is not None: + return _cached_access(AccessType.QUERY, query, unique=True) + + raise ValueError("Not all arguments may be None.") + + +def cached_query(query_string) -> Container: + """A cached version of :func:`caosdb.execute_query<caosdb.common.models.execute_query>`. + +All additional arguments are at their default values. + + """ + return _cached_access(AccessType.QUERY, query_string, unique=False) + + +@lru_cache(maxsize=DEFAULT_SIZE) +def _cached_access(kind: AccessType, value: Union[str, int], unique=True): + # This is the function that is actually cached. + # Due to the arguments, the cache has kind of separate sections for cached_query and + # cached_get_entity_by with the different AccessTypes. However, there is only one cache size. + + # The dummy dict cache is only for filling the cache manually, it is deleted afterwards. + if value in _DUMMY_CACHE: + return _DUMMY_CACHE[value] + + if kind == AccessType.QUERY: + return execute_query(value, unique=unique) + if kind == AccessType.NAME: + return get_entity.get_entity_by_name(value) + if kind == AccessType.EID: + return get_entity.get_entity_by_id(value) + if kind == AccessType.PATH: + return get_entity.get_entity_by_path(value) + + raise ValueError(f"Unknown AccessType: {kind}") + + +def cache_clear() -> None: + """Empty the cache that is used by `cached_query` and `cached_get_entity_by`.""" + _cached_access.cache_clear() + + +def cache_info(): + """Return info about the cache that is used by `cached_query` and `cached_get_entity_by`. + +Returns +------- + +out: named tuple + See the standard library :func:`functools.lru_cache` for details.""" + return _cached_access.cache_info() + + +def cache_initialize(maxsize=DEFAULT_SIZE) -> None: + """Create a new cache with the given size for `cached_query` and `cached_get_entity_by`. + + This implies a call of :func:`cache_clear`, the old cache is emptied. + + """ + cache_clear() + global _cached_access + _cached_access = lru_cache(maxsize=maxsize)(_cached_access.__wrapped__) + + +def cache_fill(items: dict, kind: AccessType = AccessType.EID, unique: bool = True) -> None: + """Add entries to the cache manually. + + This allows to fill the cache without actually submitting queries. Note that this does not + overwrite existing entries with the same keys. + +Parameters +---------- + +items: dict + A dictionary with the entries to go into the cache. The keys must be compatible with the + AccessType given in ``kind`` + +kind: AccessType, optional + The AccessType, for example ID, name, path or query. + +unique: bool, optional + If True, fills the cache for :func:`cached_get_entity_by`, presumably with + :class:`caosdb.Entity<caosdb.common.models.Entity>` objects. If False, the cache should be filled + with :class:`caosdb.Container<caosdb.common.models.Container>` objects, for use with + :func:`cached_query`. + + """ + # 1. add the given items to the corresponding dummy dict cache + _DUMMY_CACHE.update(items) + + # 2. call the cache function with each key (this only results in a dict look up) + for key in items.keys(): + _cached_access(kind, key, unique=unique) + + # 3. empty the dummy dict cache again + _DUMMY_CACHE.clear() diff --git a/src/caosdb/cert/indiscale.ca.crt b/src/caosdb/cert/indiscale.ca.crt deleted file mode 100644 index 08a79d60c5d34626eb96f1a92e33d0ac22494f3c..0000000000000000000000000000000000000000 --- a/src/caosdb/cert/indiscale.ca.crt +++ /dev/null @@ -1,55 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIJ6TCCBdGgAwIBAgIIFVYzdrEDk6EwDQYJKoZIhvcNAQENBQAwgZMxCzAJBgNV -BAYTAkRFMRUwEwYDVQQIEwxMb3dlciBTYXhvbnkxEzARBgNVBAcTCkdvZXR0aW5n -ZW4xFzAVBgNVBAoTDkluZGlTY2FsZSBHbWJIMRwwGgYDVQQDExNJbmRpU2NhbGUg -Um9vdCBDQSAxMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGluZGlzY2FsZS5jb20wHhcN -MTkwODA3MDAwMDAwWhcNMzQwODA2MjM1OTU5WjCBkzELMAkGA1UEBhMCREUxFTAT -BgNVBAgTDExvd2VyIFNheG9ueTETMBEGA1UEBxMKR29ldHRpbmdlbjEXMBUGA1UE -ChMOSW5kaVNjYWxlIEdtYkgxHDAaBgNVBAMTE0luZGlTY2FsZSBSb290IENBIDEx -ITAfBgkqhkiG9w0BCQEWEmluZm9AaW5kaXNjYWxlLmNvbTCCBCIwDQYJKoZIhvcN -AQEBBQADggQPADCCBAoCggQBAKxJO3XOqrUxFU3qdVyk9tmZEHwhwntcLO+kRR5t -64/1Z/+VIPSgVN5phkSCukj2BPJITWKplWzJDAYWSvA/7cqavCtx8yP+m3AHWrRa -CeHbtkGZ1nzwyFel3GIr93e65REeWqBE3knzem+qxTlZ2hp8/w3oxUlhy7tGxjBs -JlekgLRDrnj4Opyb4GVjcVfcELmu3sLrrPX1wdYJrqaMQUR4BKZnbXxKdOYyX+kR -/W2P4sihCCJh7Wy29VXHwSSCM1qEkU3REjvPEmEElCG7UpqOfg+3jaNZDqnvfskf -okU4GuFCxSWQituyP9jm/hFVEhz59tUMYCllcjEi2jGmD2DBKpiru4t4/z0Aymf4 -Pep9hNtH1yhZMxpQeCYK9ESEE5d7do0bu/4YFp7jAg5vWZ8KlILZakmypVBFUw8I -U/QJoJ55j95vIp+kjFdXelIVcr5La/zOR82JldaoPfyoBKObzwpwqaWQwYm8pj4p -XkUdJTf8rpW21SSGWZm8JoFSYDfGvI61rPEjl/ohKhlG0tV6E2tCc406HNo/7pPe -pmx/v9ZWLbYDAH7MVMB4tv6zDRE/c4KTbh5/s70VbXbAeOG6DNwegdDLDYZOv6Yw -YQMz9NWtKGzvoFehP2vY5nGK95JVUcd90jaNaoURLB102VtxAjPIEQA1PjbQxLvC -7A6kshlpQiN7zS/R9IgiEkYP/9gjy6mMuQVxH7C+9cqmCnXvVmpHmxXGUqk61r/B -h12htsx5qjbbkToZYhUXBmwRq4LDtyoxNeaF2Jc+gE762obbHsSYMuSuh0kTFUUd -uqfrI8OyzX4r1w5dYf2FEetZTT2Obyxb3Cy0btJF5+zEerBX44RulkdC+TPTMhJw -b1jrPCACKywy9b6vJcSQ2V1+uLk7rH2JKD+fQRIKUqZZkhNKFYz5dnYYTgS45M0/ -C+vIvRnhgNSNb4efG6wyFvWEF8poDSPnJ4mM+0jHG/+cLqF/M2CMFvC+yU8Hj9YH -B+H2L6V1QlCkpw5Ai4ji6OaQmnrsjE8EJj58vwYKsjmLGuf4j5AivkQTxfgCPGrT -6CxSesoFmYDPSg/2eO+IfYEwnd7Rbs4aAhW8eo+lGpmK0DQxNjlejYt/Cgp7HWCq -m/VNqWPIDMSTTqyk1GTmp67NjEZKt2ukJxI2CpL8s/9x4f3GTjNyI750pKM/uzMk -OBKTMuWJQ6xeMR3h9RQlqlmwcErLXoUGInOTHHjRGXDI+ZBeLqT5DikcFiwbHG3+ -6FOuxXO0eqqg2tBW8cQ5kuRI0YFznipDUcfgDZt0JEkEXmRuL0nxYO35WKKdpGcF -xFRJtO4FRB4nVWekVRuK9m47IPm6vC4eo+pCNPPoQ+FjyQ8CAwEAAaM/MD0wDAYD -VR0TBAUwAwEB/zAdBgNVHQ4EFgQUFjE2TLaKASKEJ0LKOO+37/Hu7qowDgYDVR0P -AQH/BAQDAgEGMA0GCSqGSIb3DQEBDQUAA4IEAQB2j1GL1G0ferWp9wmuDdF2oumn -k+JIctRaGHaSrqHy4zjwj3Oqm2JA1ds+WfWozz+d38ZcLqSHo+g9ku5h/XOogQEU -O4/y7j44pxIUg0EcIpMHtf7KPocPfvi9lw/4vE/3V/WKh4E09SXWgyY5tMUlEMaB -6t8n7gg943HY2MJE1QU2wOPMXB1krFbunYxJcrUMs21w9jRWVv/wvaj6rkdvvSbU -Yre11J+VlhC6fxx+STohQopzE6jnsaHile56b9xAmCCKcPEpWeKKBFS7pVNHEIHF -uHWpgVjhoheEMMbYgu6l5E5K32TNYCKU49jNRWEKETjmYQSNl9dsSip+XlvaU8wQ -VRR8UMHZPiJDW/AAHCr+bXEarZ9mSj/y+R512YtVw95zCnGUtzOJViThoIk/IAOR -AJdnvsFmZSIKtFHpSEFYlTDq2yr1ulzbaDhuPRzita8b0cP27UvqRebZw5CvHN48 -B9a9tTYowKuJqmtjE6D00QA4xS8fRizLnx54uNmDbwf/8WavVk6MzDERwRE3OsSy -D0dV6gy3t2AqEpVBrICrFqvgAQa4fcFcIwz3Qbt5o5uEi7acRomY57YrxrlfNTwh -2oDQz+HQ/ZTDwZ3DrIgel7GrQ5fXrXDLL3ebtsbuIeBx8crOWQask832HcLtDVpu -E/FdJEMMjglzIcy2dHpuODIGFmgEVfHR4DOOSBl0hfNdlrYnhC0h8/6QFswtlYFF -8aQbGX7inK8L2in5wQ7ypeoMuXkQVYxlU1TEGmgB8aDke47MuX1FH+clsCaZ3s1E -ka6lV6cjNYcosS718B6b2JgDUzmGBn2Sdm1xFmJM16dXp7TSmC5/fYxXuE/CynDs -PmaUb9Ms6XUYSwKKhZ5HZdeRoNz8w62WNAeF7o7iX6IVrd/G1bJnSBN01istckyR -BDuIkaoBQ9yvHN6Bo/J3KR08ixF1dHFPo/oSgkBxkLakb/yeslBTP/oISiFeQ4+q -Gld1mhAvmG99dVZfoysrMjZSyghNbqwScjbYYN115lExV5ZeRtSwA7JCYE2lBjmB -vocmz/hh/ifbmmqIvSv0NtiBnM6mNqngZEWD/rAloVOQoq0KVJJ5lUCQrBSFtR4+ -G1JGMX6b7uRp4mfdqqDE62KxxfkWBUwzUTIKGb5K42ji1Gy5li/TIWJtLNGNNQ2A -0ui2RhwioaGGfYyomSFuAo5IPE/NF0ASjrTDW6GoNxypTSYE4/7oSoxeryafVnqN -S0fRyrgSLiuT5tAiZ3b5Q3EFYUM2OcU3ezr/ZUabf9qIsqOnCi91SqE88BQbenot -0HyUMdp/7QX9SyWM/azhcRiReAtkmq9pgeQA2TTZADDNTkKRljG9VeFDSwl7 ------END CERTIFICATE----- diff --git a/src/caosdb/common/administration.py b/src/caosdb/common/administration.py index 98d4d2826da7131ef79b5c3cc9b3d9597abc0248..a27aaf0406c83ac33c37b676a9cdeab812bf2f7a 100644 --- a/src/caosdb/common/administration.py +++ b/src/caosdb/common/administration.py @@ -76,7 +76,7 @@ def get_server_properties(): con = get_connection() try: body = con._http_request( - method="GET", path="_server_properties").response + method="GET", path="_server_properties") except EntityDoesNotExistError: raise ServerConfigurationException( "Debug mode in server is probably disabled.") from None @@ -385,7 +385,7 @@ action : str Either "grant" or "deny" permission : str - For example "RETRIEVE:*". + For example ``RETRIEVE:*``. priority : bool, optional Whether the priority shall be set, defaults is False. diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index f974060f4727e575a94a3afcdd2f86520e6123a9..df77bb7311a86abc8a78715e082f115c6a3efc2b 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- # -# ** header v3.0 # This file is a part of the CaosDB Project. # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# Copyright (C) 2020-2022 Indiscale GmbH <info@indiscale.com> -# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2020-2023 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2020-2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> # Copyright (C) 2020-2022 Timm Fitschen <t.fitschen@indiscale.com> # # This program is free software: you can redistribute it and/or modify @@ -22,7 +21,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -# ** end header # """ @@ -33,6 +31,8 @@ transactions. All additional classes are either important for the entities or the transactions. """ + +from __future__ import annotations # Can be removed with 3.10. from __future__ import print_function, unicode_literals import re @@ -45,11 +45,13 @@ from os import listdir from os.path import isdir from random import randint from tempfile import NamedTemporaryFile +from typing import Any, Optional from warnings import warn from caosdb.common.datatype import (BOOLEAN, DATETIME, DOUBLE, INTEGER, TEXT, is_list_datatype, is_reference) from caosdb.common.state import State +from caosdb.common.timezone import TimeZone from caosdb.common.utils import uuid, xml2str from caosdb.common.versioning import Version from caosdb.configuration import get_config @@ -61,7 +63,6 @@ from caosdb.exceptions import (AmbiguousEntityError, AuthorizationError, EntityDoesNotExistError, EntityError, EntityHasNoDatatypeError, HTTPURITooLongError, MismatchingEntitiesError, QueryNotUniqueError, - ServerConfigurationException, TransactionError, UniqueNamesError, UnqualifiedParentsError, UnqualifiedPropertiesError) @@ -82,7 +83,7 @@ SPECIAL_ATTRIBUTES = ["name", "role", "datatype", "description", "id", "path", "checksum", "size"] -class Entity(object): +class Entity: """Entity is a generic CaosDB object. @@ -101,6 +102,8 @@ class Entity(object): self._checksum = None self._size = None self._upload = None + # If an entity is used (e.g. as parent), it is wrapped instead of being used directly. + # see Entity._wrap() self._wrapped_entity = None self._version = None self._cuid = None @@ -109,9 +112,9 @@ class Entity(object): self.__datatype = None self.datatype = datatype self.value = value - self.messages = _Messages() + self.messages = Messages() self.properties = _Properties() - self.parents = _Parents() + self.parents = _ParentList() self.path = None self.file = None self.unit = None @@ -416,7 +419,7 @@ class Entity(object): self.acl.is_permitted(permission=permission) def get_all_messages(self): - ret = _Messages() + ret = Messages() ret.append(self.messages) for p in self.properties: @@ -449,6 +452,66 @@ class Entity(object): return self + def remove_value_from_property(self, property_name: str, value: Any, + remove_if_empty_afterwards: Optional[bool] = True): + """Remove a value from a property given by name. + + Do nothing if this entity does not have a property of this + ``property_name`` or if the property value is different of the given + ``value``. By default, the property is removed from this entity if it + becomes empty (i.e., value=None) through removal of the value. This + behavior can be changed by setting ``remove_if_empty_afterwards`` to + ``False`` in which case the property remains. + + Notes + ----- + If the property value is a list and the value to be removed occurs more + than once in this list, only its first occurrance is deleted (similar + to the behavior of Python's ``list.remove()``.) + + If the property was empty (prop.value == None) before, the property is + not removed afterwards even if ``remove_if_empty_afterwards`` is set to + ``True``. Rationale: the property being empty is not an effect of + calling this function. + + Parameters + ---------- + property_name : str + Name of the property from which the ``value`` will be removed. + + value + Value that is to be removed. + + remove_if_empty_afterwards : bool, optional + Whether the property shall be removed from this entity if it is + emptied by removing the ``value``. Default is ``True``. + + Returns + ------- + self + This entity. + + """ + + if self.get_property(property_name) is None: + return self + if self.get_property(property_name).value is None: + remove_if_empty_afterwards = False + empty_afterwards = False + if isinstance(self.get_property(property_name).value, list): + if value in self.get_property(property_name).value: + self.get_property(property_name).value.remove(value) + if self.get_property(property_name).value == []: + self.get_property(property_name).value = None + empty_afterwards = True + elif self.get_property(property_name).value == value: + self.get_property(property_name).value = None + empty_afterwards = True + if remove_if_empty_afterwards and empty_afterwards: + self.remove_property(property_name) + + return self + def remove_parent(self, parent): self.parents.remove(parent) @@ -458,18 +521,30 @@ class Entity(object): unit=None, importance=None, inheritance=None): # @ReservedAssignment """Add a property to this entity. - The first parameter is meant to identify the property entity. So the method expects an instance of - Entity, an integer or a string here. The second parameter is the value of the new property. Any - other named parameter may be passed by means of the keywwords. Accepted keywords are: - id, name, description, importance, inheritance, datatype, and unit. Any other keyword will be - ignored right now. But that may change in the future. + The first parameter is meant to identify the property entity either via + its id or name, or by providing the corresponding ``Entity`` Python + object. The second parameter is the value of the new property. Any other + named parameter may be passed by means of the keywwords. Accepted + keywords are: id, name, description, importance, inheritance, datatype, + and unit. + + Notes + ----- + If you want to add a property to an already existing entity, the + property ``id`` of that property needs to be specified before you send + the updated entity to the server. Parameters ---------- - property : int, str, Property, optional - An identifying parameter, by default None - value : int, str, Property, optional - The value of the new property, by default None + property : int, str, Entity, optional + An identifier for the property to be added, either its name, its id, + or the corresponding Entity Python object. If ``None``, either the + `name` or the `id` argument have to be specified explicitly. Default + is ``None``. + value : int, str, bool, datetime, Entity, or list of these types, optional + The value of the new property. In case of a reference to another + entity, this value may be the referenced entities id or the + ``Entity`` as a Python object. Default is None. id : int, optional Id of the property, by default None name : str, optional @@ -488,17 +563,64 @@ class Entity(object): Returns ------- Entity + This Entity object to which the new property has been added. - Raises - ------ + Warns + ----- UserWarning If the first parameter is None then id or name must be defined and not be None. UserWarning If the first parameter is an integer then it is interpreted as the id and id must be undefined or None. UserWarning - If the first parameter is not None and neither an instance of Entity nor an integer it is + If the first parameter is not None and neither an instance of Entity nor an integer it is interpreted as the name and name must be undefined or None. + + Raises + ------ + ValueError: + If you try to add an ``Entity`` object with File or Record role (or, + equivalently, a ``File`` or ``Record`` object) as a property, a + ``ValueError`` is raised. + + Examples + -------- + Add a simple integer property with the name ``TestProp`` and the value + 27 to a Record: + + >>> import caosdb as db + >>> rec = db.Record(name="TestRec").add_parent(name="TestType") + >>> rec.add_property("TestProp", value=27) # specified by name, you could equally use the property's id if it is known + + You can also use the Python object: + + >>> prop = db.Property(name="TestProp", datatype=db.INTEGER) + >>> rec.add_property(prop, value=27) # specified via the Python object + + In case of updating an existing Record, the Property needs to be + specified by id: + + >>> rec = db.Record(name="TestRec").retrieve() + >>> prop2 = db.Property(name="OtherTestProp").retrieve() + >>> rec.add_property(id=prop2.id, value="My new value") + >>> rec.update() + + Let's look at the more advanced example of adding a list of integers as + value of the above integer ``TestProp``: + + >>> rec.add_property("TestProp", value=[27,28,29], datatype=db.LIST(db.INTEGER)) + + Note that since `TestProp` is a scalar integer Property, the datatype + `LIST<INTEGER>` has to be specified explicitly. + + Finally, we can also add reference properties, specified by the RecordType of the referenced entity. + + >>> ref_rec = db.Record(name="ReferencedRecord").add_parent(name="OtherRT") + >>> rec.add_property(name="OtherRT", value=ref_rec) # or value=ref_rec.id if ref_rec has one set by the server + + See more on adding properties and inserting data in + https://docs.indiscale.com/caosdb-pylib/tutorials/Data-Insertion.html. + """ pid = id @@ -530,7 +652,7 @@ class Entity(object): value=value, unit=unit) if abstract_property is not None: - new_property._wrap(property) + new_property._wrap(abstract_property) # FIXME: this really necessary? @@ -560,7 +682,8 @@ class Entity(object): if msg is not None: pass else: - msg = Message(type, code, description, body) + msg = Message(description=description, type=type, code=code, + body=body) self.messages.append(msg) return self @@ -585,11 +708,13 @@ class Entity(object): entity. If no `inheritance` is given, no properties will be inherited by the child. This parameter is case-insensitive. - Note that the behaviour is currently not yet specified when assigning parents to - Records, it only works for inheritance of RecordTypes (and Properties). - - For more information, it is recommended to look into the - :ref:`data insertion tutorial<tutorial-inheritance-properties>`. + Notes + ----- + Note that the behaviour of the `inheritance` argument currently has not + yet been specified when assigning parents to Records, it only works for + inheritance of RecordTypes (and Properties). For more information, it is + recommended to look into the :ref:`data insertion + tutorial<tutorial-inheritance-properties>`. Raises ------ @@ -621,26 +746,46 @@ class Entity(object): return self - def has_parent(self, parent, recursive=True, - check_name=True, check_id=False): - """Checks if this entity has a given parent. + def has_parent(self, parent: Entity, recursive: bool = True, retrieve: bool = True, + check_name: bool = True, check_id: bool = False): + """Check if this entity has a given parent. If 'check_name' and 'check_id' are both False, test for identity on the Python level. Otherwise use the name and/or ID for the check. Note that, if checked, name or ID should not be None, lest the check fail. - @param parent: Check for this parent. - @param recursive: Whether to check recursively. - @param check_name: Whether to use the name for ancestry check. - @param check_id: Whether to use the ID for ancestry check. - @return: True if 'parent' is a true parent, False otherwise. - """ +Parameters +---------- + +parent: Entity + Check for this parent. + +recursive: bool, optional + Whether to check recursively. + +check_name: bool, optional + Whether to use the name for ancestry check. + +check_id: bool, optional + Whether to use the ID for ancestry check. + +retrieve: bool, optional + If False, do not retrieve parents from the server. + +Returns +------- +out: bool + True if ``parent`` is a true parent, False otherwise. +""" if recursive: - parents = self.get_parents_recursively() + parents = self.get_parents_recursively(retrieve=retrieve) else: - parents = [pp._wrapped_entity for pp in self.parents] + if retrieve: + parents = [pp.retrieve()._wrapped_entity for pp in self.parents] + else: + parents = [pp._wrapped_entity for pp in self.parents] if not (check_name or check_id): return parent in parents @@ -659,39 +804,61 @@ class Entity(object): def get_parents(self): """Get all parents of this entity. - @return: _Parents(list) + @return: _ParentList(list) """ return self.parents - def get_parents_recursively(self): + def get_parents_recursively(self, retrieve: bool = True): """Get all ancestors of this entity. - @return: list of Entities - """ +Parameters +---------- - all_parents = _Parents() - self._get_parent_recursively(all_parents) +retrieve: bool, optional + If False, do not retrieve parents from the server. + +Returns +------- +out: List[Entity] + The parents of this Entity +""" + + all_parents = [] + self._get_parent_recursively(all_parents, retrieve=retrieve) return all_parents - def _get_parent_recursively(self, all_parents): + def _get_parent_recursively(self, all_parents: list, retrieve: bool = True): """Get all ancestors with a little helper. As a side effect of this method, the ancestors are added to all_parents. - @param all_parents: The added parents so far. + @param all_parents: list, The added parents so far. @return: None, but see side effects. """ for parent in self.parents: + # TODO: + # Comment on _wrap and _wrapped_entity + # Currently, I (henrik) do not why the wrapping is necessary (and it is not + # documented). However, the following illustrates, why I think, it is a bad idea. + # First you add a parent with rec.add_parent(parent), but then you cannot access + # attributes of parent when you use rec.parents[0] for example becasue you do not get + # the same object but a wrapping object and you need to know that you only get the + # original by accessing the private (!) _wrapped_entity object. w_parent = parent._wrapped_entity + if retrieve: + parent.retrieve() + for next_parent in parent.parents: + w_parent.add_parent(next_parent) - if w_parent not in all_parents: + if (w_parent.id, w_parent.name) not in [ + (all_p.id, all_p.name) for all_p in all_parents]: all_parents.append(w_parent) - w_parent._get_parent_recursively(all_parents) + w_parent._get_parent_recursively(all_parents, retrieve=retrieve) def get_parent(self, key): """Return the first parent matching the key or None if no match exists. @@ -890,7 +1057,7 @@ class Entity(object): def get_messages(self): """Get all messages of this entity. - @return: _Messages(list) + @return: Messages(list) """ return self.messages @@ -898,9 +1065,9 @@ class Entity(object): def get_warnings(self): """Get all warning messages of this entity. - @return _Messages(list): Warning messages. + @return Messages(list): Warning messages. """ - ret = _Messages() + ret = Messages() for m in self.messages: if m.type.lower() == "warning": @@ -911,9 +1078,9 @@ class Entity(object): def get_errors(self): """Get all error messages of this entity. - @return _Messages(list): Error messages. + @return Messages(list): Error messages. """ - ret = _Messages() + ret = Messages() for m in self.messages: if m.type.lower() == "error": @@ -1135,7 +1302,7 @@ class Entity(object): else: raise TypeError( 'Child was neither a Property, nor a Parent, nor a Message.\ - Was ' + str(type(child))) + Was ' + str(type(child)) + "\n" + str(child)) # add VALUE value = None @@ -1240,12 +1407,19 @@ class Entity(object): anyway. Set the flag 'strict' to True in order to force the server to take all warnings as errors. This prevents the server from inserting this entity if any warning occurs. - @param strict=False: Flag for strict mode. - @param raise_exception_on_error=True: Flag to raise an - exception when an error occurs. - @param unique=True: Flag to only allow - insertion of elements with unique names. - @param flags: A dictionary of flags to be send with the insertion. + Parameters + ---------- + strict : bool, optional + Flag for strict mode. Default is False. + raise_exception_on_error : bool, optional + Flag to raise an exception when an error occurs. Default is True. + unique : bool, optional + Flag to only allow insertion of elements with unique names. Default + is True. + flags : dict, optional + A dictionary of flags to be send with the insertion. Default is + None. + """ return Container().append(self).insert( @@ -1259,15 +1433,15 @@ class Entity(object): unique=True, flags=None, sync=True): """Update this entity. - There are two possible work-flows to perform this update: - First: - 1) retrieve an entity - 2) do changes - 3) call update method +There are two possible work-flows to perform this update: +First: + 1) retrieve an entity + 2) do changes + 3) call update method - Second: - 1) construct entity with id - 2) call update method. +Second: + 1) construct entity with id + 2) call update method. For slight changes the second one it is more comfortable. Furthermore, it is possible to stay off-line until calling the update method. The name, description, unit, datatype, path, @@ -1296,6 +1470,12 @@ class Entity(object): flags=flags)[0] def _wrap(self, entity): + """ + When entity shall be used as parent or property it is not added to the corresponding list + (such as the parent list) directly, but another Entity object is created and the original + Entity is wrapped using this function + TODO: document here and in dev docs why this is done. + """ self._wrapped_entity = entity return self @@ -1380,12 +1560,15 @@ def _parse_value(datatype, value): # reference via name return str(value) - except TypeError: + except TypeError as te: # deal with invalid XML: List of values without appropriate datatype if isinstance(value, list): - raise ServerConfigurationException( - "The server sent an invalid XML: List valued properties must be announced by " + raise TypeError( + "Invalid datatype: List valued properties must be announced by " "the datatype.\n" + f"Datatype: {datatype}\nvalue: {value}") + else: + # Everything else that's not related to wrong list assignments + raise te def _log_request(request, xml_body=None): @@ -1417,7 +1600,7 @@ class QueryTemplate(): self._cuid = None self.value = None self.datatype = None - self.messages = _Messages() + self.messages = Messages() self.properties = None self.parents = None self.path = None @@ -1537,7 +1720,7 @@ class QueryTemplate(): return self.id is not None def get_errors(self): - ret = _Messages() + ret = Messages() for m in self.messages: if m.type.lower() == "error": @@ -1596,6 +1779,7 @@ class Property(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1687,12 +1871,10 @@ class Property(Entity): class Message(object): - # @ReservedAssignment - - def __init__(self, type, code=None, description=None, body=None): # @ReservedAssignment - self.type = type - self.code = code + def __init__(self, type=None, code=None, description=None, body=None): # @ReservedAssignment self.description = description + self.type = type if type is not None else "Info" + self.code = int(code) if code is not None else None self.body = body def to_xml(self, xml=None): @@ -1715,11 +1897,13 @@ class Message(object): def __eq__(self, obj): if isinstance(obj, Message): - return self.type == obj.type and self.code == obj.code + return self.type == obj.type and self.code == obj.code and self.description == obj.description return False def get_code(self): + warn(("get_code is deprecated and will be removed in future. " + "Use self.code instead."), DeprecationWarning) return int(self.code) @@ -1729,6 +1913,7 @@ class RecordType(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=RECOMMENDED, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1784,6 +1969,7 @@ class Record(Entity): def add_property(self, property=None, value=None, id=None, name=None, description=None, datatype=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -1941,6 +2127,7 @@ class File(Record): def add_property(self, property=None, id=None, name=None, description=None, datatype=None, value=None, unit=None, importance=FIX, inheritance=FIX): # @ReservedAssignment + """See ``Entity.add_property``.""" return super().add_property( property=property, id=id, name=name, description=description, datatype=datatype, @@ -2091,7 +2278,8 @@ class _Properties(list): raise KeyError(str(prop) + " not found.") -class _Parents(list): +class _ParentList(list): + # TODO unclear why this class is private. Isn't it use full for users? def _get_entity_by_cuid(self, cuid): ''' @@ -2212,10 +2400,9 @@ class _Parents(list): raise KeyError(str(parent) + " not found.") -class _Messages(dict): - - """This 'kind of dictionary' stores error, warning, info, and other - messages. The mentioned three messages types are messages of special use. +class Messages(list): + """This specialization of list stores error, warning, info, and other + messages. The mentioned three messages types play a special role. They are generated by the client and the server while processing the entity to which the message in question belongs. It is RECOMMENDED NOT to specify such messages manually. The other messages are ignored by the server unless @@ -2226,25 +2413,18 @@ class _Messages(dict): <$Type code=$code description=$description>$body</$Type> - Messages are treated as 'equal' if and only if both they have the same type (case-insensitive), - and the same code (or no code). Every message - MUST NOT occur more than once per entity (to which the message in question belongs). - - If a message m2 is added while a messages m1 is already in this _Message object m2 will - OVERRIDE m1. - Error, warning, and info messages will be deleted before any transaction. Examples: - <<< msgs = _Messages() + <<< msgs = Messages() <<< # create Message <<< msg = Message(type="HelloWorld", code=1, description="Greeting the world", body="Hello, world!") - <<< # append it to the _Messages + <<< # append it to the Messages <<< msgs.append(msg) - <<< # use _Messages as list of Message objects + <<< # use Messages as list of Message objects <<< for m in msgs: ... assert isinstance(m,Message) @@ -2255,29 +2435,12 @@ class _Messages(dict): <<< msgs.append(msg) <<< # get it back via get(...) and the key tuple (type, code) <<< assert id(msgs.get("HelloWorld",1))==id(msg) - - <<< # delete Message via remove and the (type,code) tuple - <<< msgs.remove("HelloWorld",1) - <<< assert msgs.get("HelloWorld",1) == None - - <<< # short version of adding/setting/resetting a new Message - <<< msgs["HelloWorld",2] = "Greeting the world in German", "Hallo, Welt!" - <<< assert msgs["HelloWorld",2] == ("Greeting the world in German","Hallo, Welt!") - <<< msgs["HelloWorld",2] = "Greeting the world in German", "Huhu, Welt!" - <<< assert msgs["HelloWorld",2] == ("Greeting the world in German","Huhu, Welt!") - <<< del msgs["HelloWorld",2] - <<< assert msgs.get("HelloWorld",2) == None - - # this Message has no code and no description (make easy things easy...) - <<< - <<< msgs["HelloWorld"] = "Hello!" - <<< assert msgs["HelloWorld"] == "Hello!" - - (to be continued...) """ def clear_server_messages(self): - """Removes all error, warning and info messages.""" + """Removes all messages of type error, warning and info. All other + messages types are custom types which should be handled by custom + code.""" rem = [] for m in self: @@ -2287,9 +2450,18 @@ class _Messages(dict): for m in rem: self.remove(m) - return self - + ####################################################################### + # can be removed after 01.07.24 + # default implementation of list is sufficient def __setitem__(self, key, value): # @ReservedAssignment + if not isinstance(value, Message): + warn("__setitem__ will in future only accept Message objects as second argument. " + "You will no longe be" + " able to pass bodys such that Message object is created on the fly", + DeprecationWarning) + if not isinstance(key, int): + warn("__setitem__ will in future only accept int as first argument", + DeprecationWarning) if isinstance(key, tuple): if len(key) == 2: type = key[0] # @ReservedAssignment @@ -2300,7 +2472,7 @@ class _Messages(dict): else: raise TypeError( "('type', 'code'), ('type'), or 'type' expected.") - elif isinstance(key, _Messages._msg_key): + elif isinstance(key, Messages._msg_key): type = key._type # @ReservedAssignment code = key._code else: @@ -2321,13 +2493,19 @@ class _Messages(dict): if isinstance(value, Message): body = value.body description = value.description + m = Message else: body = value description = None - m = Message(type=type, code=code, description=description, body=body) - dict.__setitem__(self, _Messages._msg_key(type, code), m) + m = Message(type=type, code=code, description=description, body=body) + if isinstance(key, int): + super().__setitem__(key, m) + else: + self.append(m) def __getitem__(self, key): + if not isinstance(key, int): + warn("__getitem__ only supports integer keys in future.", DeprecationWarning) if isinstance(key, tuple): if len(key) == 2: type = key[0] # @ReservedAssignment @@ -2338,113 +2516,118 @@ class _Messages(dict): else: raise TypeError( "('type', 'code'), ('type'), or 'type' expected.") - elif isinstance(key, int) and int(key) >= 0: - for m in self.values(): - if key == 0: - return m - else: - key -= 1 - type = key # @ReservedAssignment - code = None + elif isinstance(key, int) and key >= 0: + return super().__getitem__(key) else: type = key # @ReservedAssignment code = None - m = dict.__getitem__(self, _Messages._msg_key(type, code)) - + m = self.get(type, code) + if m is None: + raise KeyError() if m.description: return (m.description, m.body) else: return m.body - def __init__(self): - dict.__init__(self) - def __delitem__(self, key): if isinstance(key, tuple): - if len(key) == 2: - type = key[0] # @ReservedAssignment - code = key[1] - elif len(key) == 1: - type = key[0] # @ReservedAssignment - code = None - else: - raise TypeError( - "('type', 'code'), ('type'), or 'type' expected.") + warn("__delitem__ only supports integer keys in future.", DeprecationWarning) + if self.get(key[0], key[1]) is not None: + self.remove(self.get(key[0], key[1])) else: - type = key # @ReservedAssignment - code = None - - return dict.__delitem__(self, _Messages._msg_key(type, code)) + super().__delitem__(key) def remove(self, obj, obj2=None): - if isinstance(obj, Message): - return dict.__delitem__(self, _Messages._msg_key.get(obj)) + if obj2 is not None: + warn("Supplying a second argument to remove is deprecated.", + DeprecationWarning) + super().remove(self.get(obj, obj2)) + else: + super().remove(obj) - return self.__delitem__((obj, obj2)) + def append(self, msg): + if isinstance(msg, Messages) or isinstance(msg, list): + warn("Supplying a list-like object to append is deprecated. Please use extend" + " instead.", DeprecationWarning) + for m in msg: + self.append(m) + return - def get(self, type, code=None, default=None): # @ReservedAssignment - try: - return dict.__getitem__(self, _Messages._msg_key(type, code)) - except KeyError: - return default + super().append(msg) - def extend(self, messages): - self.append(messages) + @staticmethod + def _hash(t, c): + return hash(str(t).lower() + (str(",") + str(c) if c is not None else '')) + # end remove + ####################################################################### - return self + def get(self, type, code=None, default=None, exact=False): # @ReservedAssignment + """ + returns a message from the list that kind of matches type and code - def append(self, msg): - if hasattr(msg, "__iter__"): - for m in msg: - self.append(m) + case and types (str/int) are ignored - return self + If no suitable message is found, the default argument is returned + If exact=True, the message has to match code and type exactly + """ + if not exact: + warn("The fuzzy mode (exact=False) is deprecated. Please use exact in future.", + DeprecationWarning) + + for msg in self: + if exact: + if msg.type == type and msg.code == code: + return msg + else: + if self._hash(msg.type, msg.code) == self._hash(type, code): + return msg - if isinstance(msg, Message): - dict.__setitem__(self, _Messages._msg_key.get(msg), msg) + return default - return self - else: - raise TypeError("Argument was not a Message") + def to_xml(self, add_to_element): + for m in self: + melem = m.to_xml() + add_to_element.append(melem) - return self + def __repr__(self): + xml = etree.Element("Messages") + self.to_xml(xml) - def __iter__(self): - return dict.values(self).__iter__() + return xml2str(xml) + ####################################################################### + # can be removed after 01.07.24 class _msg_key: def __init__(self, type, code): # @ReservedAssignment + warn("This class is deprecated.", DeprecationWarning) self._type = type self._code = code @staticmethod def get(msg): - return _Messages._msg_key(msg.type, msg.code) + return Messages._msg_key(msg.type, msg.code) def __eq__(self, obj): return self.__hash__() == obj.__hash__() def __hash__(self): - return hash(str(self._type).lower() + (str(",") + - str(self._code) if self._code is not None else '')) + return hash(str(self._type).lower() + (str(",") + str(self._code) + if self._code is not None else '')) def __repr__(self): return str(self._type) + (str(",") + str(self._code) if self._code is not None else '') + # end remove + ####################################################################### - def to_xml(self, add_to_element): - for m in self: - melem = m.to_xml() - add_to_element.append(melem) - - return self - - def __repr__(self): - xml = etree.Element("Messages") - self.to_xml(xml) - return xml2str(xml) +class _Messages(Messages): + def __init__(self, *args, **kwargs): + warn("_Messages is deprecated. " + "Use class Messages instead and beware of the slightly different API of the new" + " Messages class", DeprecationWarning) + super().__init__(*args, **kwargs) def _basic_sync(e_local, e_remote): @@ -2647,7 +2830,7 @@ class Container(list): list.__init__(self) self._timestamp = None self._srid = None - self.messages = _Messages() + self.messages = Messages() def extend(self, entities): """Extend this Container by appending all single entities in the given @@ -2694,9 +2877,11 @@ class Container(list): elif isinstance(entity, QueryTemplate): super().append(entity) else: - raise TypeError( - "Entity was neither an id nor a name nor an entity." + - " (was " + str(type(entity)) + ")") + warn("Entity was neither an id nor a name nor an entity." + + " (was " + str(type(entity)) + ":\n" + str(entity) + ")") + # raise TypeError( + # "Entity was neither an id nor a name nor an entity." + + # " (was " + str(type(entity)) + "\n" + str(entity) + ")") return self @@ -2738,11 +2923,11 @@ class Container(list): def get_errors(self): """Get all error messages of this container. - @return _Messages: Error messages. + @return Messages: Error messages. """ if self.has_errors(): - ret = _Messages() + ret = Messages() for m in self.messages: if m.type.lower() == "error": @@ -2755,11 +2940,11 @@ class Container(list): def get_warnings(self): """Get all warning messages of this container. - @return _Messages: Warning messages. + @return Messages: Warning messages. """ if self.has_warnings(): - ret = _Messages() + ret = Messages() for m in self.messages: if m.type.lower() == "warning": @@ -2770,7 +2955,7 @@ class Container(list): return None def get_all_messages(self): - ret = _Messages() + ret = Messages() for e in self: ret.extend(e.get_all_messages()) @@ -2953,7 +3138,7 @@ class Container(list): msg = "Request was not unique. CUID " + \ str(local_entity._cuid) + " was found " + \ str(len(sync_remote_entities)) + " times." - local_entity.add_message(Message("Error", None, msg)) + local_entity.add_message(Message(description=msg, type="Error")) if raise_exception_on_error: raise MismatchingEntitiesError(msg) @@ -2978,7 +3163,7 @@ class Container(list): msg = "Request was not unique. ID " + \ str(local_entity.id) + " was found " + \ str(len(sync_remote_entities)) + " times." - local_entity.add_message(Message("Error", None, msg)) + local_entity.add_message(Message(description=msg, type="Error")) if raise_exception_on_error: raise MismatchingEntitiesError(msg) @@ -3008,7 +3193,7 @@ class Container(list): msg = "Request was not unique. Path " + \ str(local_entity.path) + " was found " + \ str(len(sync_remote_entities)) + " times." - local_entity.add_message(Message("Error", None, msg)) + local_entity.add_message(Message(description=msg, type="Error")) if raise_exception_on_error: raise MismatchingEntitiesError(msg) @@ -3038,7 +3223,7 @@ class Container(list): msg = "Request was not unique. Name " + \ str(local_entity.name) + " was found " + \ str(len(sync_remote_entities)) + " times." - local_entity.add_message(Message("Error", None, msg)) + local_entity.add_message(Message(description=msg, type="Error")) if raise_exception_on_error: raise MismatchingEntitiesError(msg) @@ -3057,7 +3242,7 @@ class Container(list): msg = "Request was not unique. There are " + \ str(len(sync_remote_entities)) + \ " entities which could not be matched to one of the requested ones." - remote_container.add_message(Message("Error", None, msg)) + remote_container.add_message(Message(description=msg, type="Error")) if raise_exception_on_error: raise MismatchingEntitiesError(msg) @@ -3644,6 +3829,7 @@ class Container(list): for p in e.get_properties(): if p.id is None: if p.name is not None: + # TODO using try except for normal execution flow is bad style try: w = self.get_entity_by_name(p.name) p._wrap(w) @@ -3655,6 +3841,7 @@ class Container(list): for p in e.get_parents(): if p.id is None: if p.name is not None: + # TODO using try except for normal execution flow is bad style try: p._wrap(self.get_entity_by_name(p.name)) except KeyError: @@ -4118,7 +4305,7 @@ class Query(): The query string. flags : dict of str A dictionary of flags to be send with the query request. - messages : _Messages() + messages : Messages() A container of messages included in the last query response. cached : bool indicates whether the server used the query cache for the execution of @@ -4141,7 +4328,7 @@ class Query(): def __init__(self, q): self.flags = dict() - self.messages = _Messages() + self.messages = Messages() self.cached = None self.etag = None @@ -4240,7 +4427,7 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl Whether the query is expected to have only one entity as result. Defaults to False. raise_exception_on_error : bool - Whether an exception should be raises when there are errors in the + Whether an exception should be raised when there are errors in the resulting entities. Defaults to True. cache : bool Whether to use the query cache (equivalent to adding a "cache" flag). @@ -4265,6 +4452,10 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl class DropOffBox(list): + def __init__(self, *args, **kwargs): + warn(DeprecationWarning( + "The DropOffBox is deprecated and will be removed in future.")) + super().__init__(*args, **kwargs) path = None @@ -4308,7 +4499,7 @@ class UserInfo(): class Info(): def __init__(self): - self.messages = _Messages() + self.messages = Messages() self.sync() def sync(self): @@ -4327,6 +4518,8 @@ class Info(): if isinstance(m, UserInfo): self.user_info = m + elif isinstance(m, TimeZone): + self.time_zone = m else: self.messages.append(m) @@ -4453,13 +4646,16 @@ def _parse_single_xml_element(elem): elif elem.tag.lower() == 'stats': counts = elem.find("counts") - return Message(type="Counts", body=counts.attrib) + return Message(type="Counts", description=None, body=counts.attrib) elif elem.tag == "EntityACL": return ACL(xml=elem) elif elem.tag == "Permissions": return Permissions(xml=elem) elif elem.tag == "UserInfo": return UserInfo(xml=elem) + elif elem.tag == "TimeZone": + return TimeZone(zone_id=elem.get("id"), offset=elem.get("offset"), + display_name=elem.text.strip()) else: return Message(type=elem.tag, code=elem.get( "code"), description=elem.get("description"), body=elem.text) diff --git a/src/caosdb/common/timezone.py b/src/caosdb/common/timezone.py new file mode 100644 index 0000000000000000000000000000000000000000..8fc5e710d3cbf6f20cf81397573f972db3b22f12 --- /dev/null +++ b/src/caosdb/common/timezone.py @@ -0,0 +1,19 @@ +class TimeZone(): + """ + TimeZone, e.g. CEST, Europe/Berlin, UTC+4. + + + Attributes + ---------- + zone_id : string + ID of the time zone. + offset : int + Offset to UTC in seconds. + display_name : string + A human-friendly name of the time zone: + """ + + def __init__(self, zone_id, offset, display_name): + self.zone_id = zone_id + self.offset = offset + self.display_name = display_name diff --git a/src/caosdb/configuration.py b/src/caosdb/configuration.py index 75827df0d00d6c82251c2c04fa47413ac2801928..1c108ac1d39c135dbc90f477be8b8f2f630391ce 100644 --- a/src/caosdb/configuration.py +++ b/src/caosdb/configuration.py @@ -31,12 +31,7 @@ try: except ImportError: pass -try: - # python2 - from ConfigParser import ConfigParser -except ImportError: - # python3 - from configparser import ConfigParser +from configparser import ConfigParser from os import environ, getcwd from os.path import expanduser, join, isfile @@ -59,6 +54,11 @@ def configure(inifile): _reset_config() read_config = _pycaosdbconf.read(inifile) validate_yaml_schema(config_to_yaml(_pycaosdbconf)) + + if "HTTPS_PROXY" in environ: + _pycaosdbconf["Connection"]["https_proxy"] = environ["HTTPS_PROXY"] + if "HTTP_PROXY" in environ: + _pycaosdbconf["Connection"]["http_proxy"] = environ["HTTP_PROXY"] return read_config diff --git a/src/caosdb/connection/SocksiPy.zip b/src/caosdb/connection/SocksiPy.zip deleted file mode 100644 index e81f1f9393c766a3acd41b44245f9e17f090cbe5..0000000000000000000000000000000000000000 Binary files a/src/caosdb/connection/SocksiPy.zip and /dev/null differ diff --git a/src/caosdb/connection/connection.py b/src/caosdb/connection/connection.py index 43eb3410d8d5bdc8323a811fd2b6424fb75f3fda..46dadea9dfcfa6e614493b75d709f604aa188ef6 100644 --- a/src/caosdb/connection/connection.py +++ b/src/caosdb/connection/connection.py @@ -28,9 +28,15 @@ from __future__ import absolute_import, print_function, unicode_literals import logging import ssl import sys +import warnings from builtins import str # pylint: disable=redefined-builtin from errno import EPIPE as BrokenPipe from socket import error as SocketError +from urllib.parse import quote, urlparse +from requests import Session as HTTPSession +from requests.exceptions import ConnectionError as HTTPConnectionError +from urllib3.poolmanager import PoolManager +from requests.adapters import HTTPAdapter from caosdb.configuration import get_config from caosdb.exceptions import (CaosDBException, HTTPClientError, @@ -49,16 +55,8 @@ except ModuleNotFoundError: from pkg_resources import resource_filename from .interface import CaosDBHTTPResponse, CaosDBServerConnection -from .streaminghttp import StreamingHTTPSConnection from .utils import make_uri_path, parse_url, urlencode - -try: - from urllib.parse import quote, urlparse -except ImportError: - from urllib import quote - from urlparse import urlparse - -# pylint: disable=missing-docstring +from .encode import MultipartYielder, ReadableMultiparts _LOGGER = logging.getLogger(__name__) @@ -67,6 +65,9 @@ class _WrappedHTTPResponse(CaosDBHTTPResponse): def __init__(self, response): self.response = response + self._generator = None + self._buffer = b'' + self._stream_consumed = False @property def reason(self): @@ -74,21 +75,71 @@ class _WrappedHTTPResponse(CaosDBHTTPResponse): @property def status(self): - return self.response.status + return self.response.status_code def read(self, size=None): - return self.response.read(size) + if self._stream_consumed is True: + raise RuntimeError("Stream is consumed") + + if self._buffer is None: + # the buffer has been drained in the previous call. + self._stream_consumed = True + return b'' + + if self._generator is None and (size is None or size == 0): + # return full content at once + self._stream_consumed = True + return self.response.content + + if len(self._buffer) >= size: + # still enough bytes in the buffer + result = chunk[:size] + self._buffer = chunk[size:] + return result + + if self._generator is None: + # first call to this method + if size is None or size == 0: + size = 512 + self._generator = self.response.iter_content(size) + + try: + # read new data into the buffer + chunk = self._buffer + next(self._generator) + result = chunk[:size] + if len(result) == 0: + self._stream_consumed = True + self._buffer = chunk[size:] + return result + except StopIteration: + # drain buffer + result = self._buffer + self._buffer = None + return result def getheader(self, name, default=None): - return self.response.getheader(name=name, default=default) + return self.response.headers[name] if name in self.response.headers else default def getheaders(self): - return self.response.getheaders() + return self.response.headers.items() def close(self): self.response.close() +class _SSLAdapter(HTTPAdapter): + """Transport adapter that allows us to use different SSL versions.""" + + def __init__(self, ssl_version): + self.ssl_version = ssl_version + super().__init__() + + def init_poolmanager(self, connections, maxsize, block=False): + self.poolmanager = PoolManager( + num_pools=connections, maxsize=maxsize, + block=block, ssl_version=self.ssl_version) + + class _DefaultCaosDBServerConnection(CaosDBServerConnection): """_DefaultCaosDBServerConnection. @@ -101,10 +152,11 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): def __init__(self): self._useragent = ("caosdb-pylib/{version} - {implementation}".format( version=version, implementation=type(self).__name__)) - self._http_con = None self._base_path = None + self._session = None + self._timeout = None - def request(self, method, path, headers=None, body=None, **kwargs): + def request(self, method, path, headers=None, body=None): """request. Send a HTTP request to the server. @@ -118,38 +170,40 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): including query and frament segments. headers : dict of str -> str, optional HTTP request headers. (Defautl: None) - body : str or bytes or readable, opional + body : str or bytes or readable, optional The body of the HTTP request. Bytes should be a utf-8 encoded string. - **kwargs : - Any keyword arguments will be ignored. - TODO: Why are they allowed then? Returns ------- - TODO: What? + response : CaosDBHTTPResponse """ if headers is None: headers = {} headers["User-Agent"] = self._useragent + + if path.endswith("/."): + path = path[:-1] + "%2E" + + if isinstance(body, MultipartYielder): + body = ReadableMultiparts(body) + try: - self._http_con = StreamingHTTPSConnection( - # TODO looks as if configure needs to be done first. - # That is however not assured. - host=self.setup_fields["host"], - timeout=self.setup_fields["timeout"], - context=self.setup_fields["context"], - socket_proxy=self.setup_fields["socket_proxy"]) - self._http_con.request(method=method, url=self._base_path + path, - headers=headers, body=body) - except SocketError as socket_err: + response = self._session.request( + method=method, + url=self._base_path + path, + headers=headers, + data=body, + timeout=self._timeout, + stream=True) + + return _WrappedHTTPResponse(response) + except HTTPConnectionError as conn_err: raise CaosDBConnectionError( - "Connection failed. Network or server down? " + str(socket_err) + "Connection failed. Network or server down? " + str(conn_err) ) - return _WrappedHTTPResponse(self._http_con.getresponse()) - def configure(self, **config): """configure. @@ -173,55 +227,71 @@ class _DefaultCaosDBServerConnection(CaosDBServerConnection): loaded. """ + if "url" not in config: + raise CaosDBConnectionError( + "No connection url specified. Please " + "do so via caosdb.configure_connection(...) or in a config " + "file.") + if (not config["url"].lower().startswith("https://") and not config["url"].lower().startswith("http://")): + raise CaosDBConnectionError("The connection url is expected " + "to be a http or https url and " + "must include the url scheme " + "(i.e. start with https:// or " + "http://).") + + url = urlparse(config["url"]) + path = url.path.strip("/") + if len(path) > 0: + path = path + "/" + self._base_path = url.scheme + "://" + url.netloc + "/" + path + + self._session = HTTPSession() + + if url.scheme == "https": + self._setup_ssl(config) + + # TODO(tf) remove in next release + socket_proxy = config["socket_proxy"] if "socket_proxy" in config else None + if socket_proxy is not None: + self._session.proxies = { + "https": "socks5://" + socket_proxy, + "http": "socks5://" + socket_proxy, + } + + if "https_proxy" in config: + if self._session.proxies is None: + self._session.proxies = {} + self._session.proxies["https"] = config["https_proxy"] + + if "http_proxy" in config: + if self._session.proxies is None: + self._session.proxies = {} + self._session.proxies["http"] = config["http_proxy"] + + if "timeout" in config: + self._timeout = config["timeout"] + + def _setup_ssl(self, config): if "ssl_version" in config and config["cacert"] is not None: ssl_version = getattr(ssl, config["ssl_version"]) else: ssl_version = ssl.PROTOCOL_TLS - context = ssl.SSLContext(ssl_version) - context.verify_mode = ssl.CERT_REQUIRED - if config.get("ssl_insecure"): + self._session.mount(self._base_path, _SSLAdapter(ssl_version)) + + verify = True + if "cacert" in config: + verify = config["cacert"] + if "ssl_insecure" in config and config["ssl_insecure"]: _LOGGER.warning("*** Warning! ***\n" "Insecure SSL mode, certificate will not be checked! " "Please consider removing the `ssl_insecure` configuration option.\n" "****************") - context.verify_mode = ssl.CERT_NONE - - if (not context.verify_mode == ssl.CERT_NONE and - hasattr(context, "check_hostname")): - context.check_hostname = True - - if ("cacert" in config and config["cacert"] is not None and - config["cacert"]): - try: - context.load_verify_locations(config["cacert"]) - except Exception as exc: - raise CaosDBConnectionError("Could not load the cacert in" - "`{}`: {}".format(config["cacert"], - exc)) - - context.load_default_certs() - - if "url" in config: - parsed_url = parse_url(config["url"]) - host = parsed_url.netloc - self._base_path = parsed_url.path - else: - raise CaosDBConnectionError( - "No connection url specified. Please " - "do so via caosdb.configure_connection(...) or in a config " - "file.") - - socket_proxy = None - - if "socket_proxy" in config: - socket_proxy = config["socket_proxy"] - - self.setup_fields = { - "host": host, - "timeout": int(config.get("timeout")), - "context": context, - "socket_proxy": socket_proxy} + warnings.filterwarnings(action="ignore", module="urllib3", + message="Unverified HTTPS request is being made") + verify = False + if verify is not None: + self._session.verify = verify def _make_conf(*conf): @@ -252,7 +322,6 @@ _DEFAULT_CONF = { "password_method": "input", "implementation": _DefaultCaosDBServerConnection, "timeout": 210, - "cacert": resource_filename("caosdb", 'cert/indiscale.ca.crt') } @@ -314,6 +383,10 @@ def configure_connection(**kwargs): Parameters ---------- + url : str + The url of the CaosDB Server. HTTP and HTTPS urls are allowed. However, + it is **highly** recommend to avoid HTTP because passwords and + authentication token are send over the network in plain text. username : str Username for login; e.g. 'admin'. @@ -342,6 +415,24 @@ def configure_connection(**kwargs): An authentication token which has been issued by the CaosDB Server. Implies `password_method="auth_token"` if set. An example token string would be `["O","OneTimeAuthenticationToken","anonymous",["administration"],[],1592995200000,604800000,"3ZZ4WKRB-5I7DG2Q6-ZZE6T64P-VQ","197d0d081615c52dc18fb323c300d7be077beaad4020773bb58920b55023fa6ee49355e35754a4277b9ac525c882bcd3a22e7227ba36dfcbbdbf8f15f19d1ee9",1,30000]`. + https_proxy : str, optional + Define a proxy for the https connections, e.g. `http://localhost:8888`, + `socks5://localhost:8888`, or `socks4://localhost:8888`. These are + either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS + proxies are not supported. However, the connection will be secured + using TLS in the tunneled connection nonetheless. Only the connection + to the proxy is insecure which is why it is not recommended to use HTTP + proxies when authentication against the proxy is necessary. If + unspecified, the https_proxy option of the pycaosdb.ini or the HTTPS_PROXY + environment variable are being used. Use `None` to override these + options with a no-proxy setting. + + http_proxy : str, optional + Define a proxy for the http connections, e.g. `http://localhost:8888`. + If unspecified, the http_proxy option of the pycaosdb.ini or the + HTTP_PROXY environment variable are being used. Use `None` to override + these options with a no-proxy setting. + implementation : CaosDBServerConnection The class which implements the connection. (Default: _DefaultCaosDBServerConnection) @@ -372,6 +463,11 @@ def configure_connection(**kwargs): local_conf = _make_conf(_DEFAULT_CONF, global_conf, kwargs) connection = _Connection.get_instance() + + if "socket_proxy" in local_conf: + warnings.warn("Deprecated configuration option: socket_proxy. Use " + "the new https_proxy option instead", + DeprecationWarning, stacklevel=1) connection.configure(**local_conf) return connection @@ -599,7 +695,7 @@ class _Connection(object): # pylint: disable=useless-object-inheritance method=method, path=path, headers=headers, - body=body, **kwargs) + body=body) _LOGGER.debug("response: %s %s", str(http_response.status), str(http_response.getheaders())) self._authenticator.on_response(http_response) diff --git a/src/caosdb/connection/encode.py b/src/caosdb/connection/encode.py index 7b092aae784a76abec0104ef7269df7ae0111b3b..0b826cc4400275a2374308ee104cdbdabb619b75 100644 --- a/src/caosdb/connection/encode.py +++ b/src/caosdb/connection/encode.py @@ -51,7 +51,8 @@ multipart/form-data is the standard way to upload files over HTTP __all__ = [ 'gen_boundary', 'encode_and_quote', 'MultipartParam', 'encode_string', - 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode' + 'encode_file_header', 'get_body_size', 'get_headers', 'multipart_encode', + 'ReadableMultiparts', ] from urllib.parse import quote_plus from io import UnsupportedOperation @@ -475,3 +476,40 @@ def multipart_encode(params, boundary=None, callback=None): params = MultipartParam.from_params(params) return MultipartYielder(params, boundary, callback), headers + + +class ReadableMultiparts(object): + """Wraps instances of the MultipartYielder class as a readable and withable + object.""" + + def __init__(self, multipart_yielder): + self.multipart_yielder = multipart_yielder + self.current_block = None + self.left_over = b'' + + def read(self, size=-1): + result = self.left_over + while size == -1 or len(result) < size: + try: + next_chunk = self.multipart_yielder.next() + if hasattr(next_chunk, "encode"): + next_chunk = next_chunk.encode("utf8") + result += next_chunk + except StopIteration: + break + + if size == -1: + self.left_over = b'' + return result + + self.left_over = result[size:] + return result[:size] + + def __enter__(self): + pass + + def __exit__(self, type, value, traceback): + self.close() + + def close(self): + self.multipart_yielder.reset() diff --git a/src/caosdb/connection/streaminghttp.py b/src/caosdb/connection/streaminghttp.py deleted file mode 100644 index 01774301b9bdb55bdbf6b56695042aaf354dba97..0000000000000000000000000000000000000000 --- a/src/caosdb/connection/streaminghttp.py +++ /dev/null @@ -1,152 +0,0 @@ -# -*- encoding: utf-8 -*- -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# -# Original work Copyright (c) 2011 Chris AtLee -# Modified work Copyright (c) 2017 Biomedical Physics, MPI for Dynamics and Self-Organization -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -"""Streaming HTTP uploads module. - -This module extends the standard httplib and http.client HTTPConnection so that -iterable objects can be used in the body of HTTP requests. - -**N.B.** You must specify a Content-Length header if using an iterable object -since there is no way to determine in advance the total size that will be -yielded, and there is no way to reset an interator. -""" - -from __future__ import unicode_literals, print_function, absolute_import -import socks -import socket -try: - # python3 - from http import client as client -except ImportError: - # python2 - import httplib as client - - -__all__ = ['StreamingHTTPSConnection'] - - -class StreamingHTTPSConnection(client.HTTPSConnection, object): - """Subclass of `http.client.HTTSConnection` or `httplib.HTTPSConnection` - that overrides the `send()` method to support iterable body objects.""" - # pylint: disable=unused-argument, arguments-differ - - def __init__(self, socket_proxy=None, **kwargs): - if socket_proxy is not None: - host, port = socket_proxy.split(":") - socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, host, - int(port)) - socket.socket = socks.socksocket - super(StreamingHTTPSConnection, self).__init__(**kwargs) - - def _send_output(self, body, **kwargs): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - - This method is implemented in differently in the various python - versions (which is extremely annoying). So we provide a unified but - relatively dumb implementaion which only serves our needs. - """ - self._buffer.extend(("".encode("utf-8"), "".encode("utf-8"))) - headers = "\r\n".encode("utf-8").join(self._buffer) - del self._buffer[:] - - self.send(headers) - if body is not None: - self.send(body) - - # pylint: disable=too-many-branches - - def send(self, value): - """Send ``value`` to the server. - - ``value`` can be a string-like object which supports a 'encode' method, - a file-like object that supports a .read() method, or an iterable object - that supports a .next() method. - - An encode()able ``value`` will be utf-8 encoded before sending. - """ - # Based on python 2.6's httplib.HTTPConnection.send() - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise client.NotConnected() - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print("send: ", repr(value)) - try: - blocksize = 8192 - if hasattr(value, 'read'): - if hasattr(value, 'seek'): - value.seek(0) - if self.debuglevel > 0: - print("sendIng a read()able") - data = value.read(blocksize) - while data: - self.sock.sendall(data) - data = value.read(blocksize) - elif hasattr(value, 'next'): - if hasattr(value, 'reset'): - value.reset() - if self.debuglevel > 0: - print("sendIng an iterable") - for data in value: - if hasattr(data, "encode"): - self.sock.sendall(data.encode('utf-8')) - else: - self.sock.sendall(data) - else: - if self.debuglevel > 0: - print("sendIng a byte-like") - self.sock.sendall(value) - except socket.error as err: - if err.args[0] == 32: # Broken pipe - self.close() - raise diff --git a/src/caosdb/connection/utils.py b/src/caosdb/connection/utils.py index 9056bf9dea14fa2fa441fa13a5efe8e776990284..095d47035e24dad5b6d7041f5d3b8a739652f271 100644 --- a/src/caosdb/connection/utils.py +++ b/src/caosdb/connection/utils.py @@ -45,13 +45,16 @@ def urlencode(query): 3) All other parameters which can be passed to the respective functions are not implemented here and the default parameters will be used. + +.. code:: + >>> urlencode({'key': ['val1', 'val2']}, doseq=True) Traceback (most recent call last): ... TypeError: urlencode() got an unexpected keyword argument 'doseq' - Otherwise, this functions works exactly as its counterparts in the urllib - modules when they are called with only the query parameter. +Otherwise, this functions works exactly as its counterparts in the urllib +modules when they are called with only the query parameter. Parameters ---------- diff --git a/src/caosdb/high_level_api.py b/src/caosdb/high_level_api.py index 427a095a4bafc0c372b0169298f2980dbd902c49..3509a7b6bfe7ec322f2e0d2590334c6fc6f02cf8 100644 --- a/src/caosdb/high_level_api.py +++ b/src/caosdb/high_level_api.py @@ -49,7 +49,7 @@ from datetime import datetime from dateutil import parser warnings.warn("""EXPERIMENTAL! The high_level_api module is experimental and may be changed or -removed in future. Its purpose is to give an impression on how the Python client user interface +removed in the future. Its purpose is to give an impression on how the Python client user interface might be changed.""") @@ -265,7 +265,8 @@ class CaosDBPythonEntity(object): self._version = val def _set_property_from_entity(self, ent: db.Entity, importance: str, - references: Optional[db.Container]): + references: Optional[db.Container], + visited: Dict[int, "CaosDBPythonEntity"]): """ Set a new property using an entity from the normal python API. @@ -280,7 +281,7 @@ class CaosDBPythonEntity(object): raise RuntimeError("Multiproperty not implemented yet.") val = self._type_converted_value(ent.value, ent.datatype, - references) + references, visited) self.set_property( ent.name, val, @@ -382,7 +383,8 @@ class CaosDBPythonEntity(object): def _type_converted_list(self, val: List, pr: str, - references: Optional[db.Container]): + references: Optional[db.Container], + visited: Dict[int, "CaosDBPythonEntity"]): """ Convert a list to a python list of the correct type. @@ -396,13 +398,14 @@ class CaosDBPythonEntity(object): raise RuntimeError("Not a list.") return [ - self._type_converted_value(i, get_list_datatype(pr), references - ) for i in val] + self._type_converted_value(i, get_list_datatype(pr), references, + visited) for i in val] def _type_converted_value(self, val: Any, pr: str, - references: Optional[db.Container]): + references: Optional[db.Container], + visited: Dict[int, "CaosDBPythonEntity"]): """ Convert val to the correct type which is indicated by the database type string in pr. @@ -416,9 +419,9 @@ class CaosDBPythonEntity(object): # this needs to be checked as second case as it is the ONLY # case which does not depend on pr # TODO: we might need to pass through the reference container - return convert_to_python_object(val, references) + return convert_to_python_object(val, references, visited) elif isinstance(val, list): - return self._type_converted_list(val, pr, references) + return self._type_converted_list(val, pr, references, visited) elif pr is None: return val elif pr == DOUBLE: @@ -436,7 +439,7 @@ class CaosDBPythonEntity(object): elif pr == DATETIME: return self._parse_datetime(val) elif is_list_datatype(pr): - return self._type_converted_list(val, pr, references) + return self._type_converted_list(val, pr, references, visited) else: # Generic references to entities: return CaosDBPythonUnresolvedReference(val) @@ -561,8 +564,8 @@ class CaosDBPythonEntity(object): return propval def resolve_references(self, deep: bool, references: db.Container, - visited: Dict[Union[str, int], - "CaosDBPythonEntity"] = None): + visited: Optional[Dict[Union[str, int], + "CaosDBPythonEntity"]] = None): """ Resolve this entity's references. This affects unresolved properties as well as unresolved parents. @@ -629,18 +632,20 @@ class CaosDBPythonEntity(object): else: entity = CaosDBPythonRecord() - for parent in serialization["parents"]: - if "unresolved" in parent: - id = None - name = None - if "id" in parent: - id = parent["id"] - if "name" in parent: - name = parent["name"] - entity.add_parent(CaosDBPythonUnresolvedParent( - id=id, name=name)) - else: - raise NotImplementedError() + if "parents" in serialization: + for parent in serialization["parents"]: + if "unresolved" in parent: + id = None + name = None + if "id" in parent: + id = parent["id"] + if "name" in parent: + name = parent["name"] + entity.add_parent(CaosDBPythonUnresolvedParent( + id=id, name=name)) + else: + raise NotImplementedError( + "Currently, only unresolved parents can be deserialized.") for baseprop in ("name", "id", "description", "version"): if baseprop in serialization: @@ -673,7 +678,8 @@ class CaosDBPythonEntity(object): if f.name in metadata: propmeta.__setattr__(f.name, metadata[f.name]) else: - raise NotImplementedError() + pass + # raise NotImplementedError() return entity @@ -804,7 +810,9 @@ BASE_ATTRIBUTES = ( def _single_convert_to_python_object(robj: CaosDBPythonEntity, entity: db.Entity, - references: Optional[db.Container] = None): + references: Optional[db.Container] = None, + visited: Optional[Dict[int, + "CaosDBPythonEntity"]] = None): """ Convert a db.Entity from the standard API to a (previously created) CaosDBPythonEntity from the high level API. @@ -819,6 +827,17 @@ def _single_convert_to_python_object(robj: CaosDBPythonEntity, Returns the input object robj. """ + + # This parameter is used in the recursion to keep track of already visited + # entites (in order to detect cycles). + if visited is None: + visited = dict() + + if id(entity) in visited: + return visited[id(entity)] + else: + visited[id(entity)] = robj + for base_attribute in BASE_ATTRIBUTES: val = entity.__getattribute__(base_attribute) if val is not None: @@ -827,7 +846,8 @@ def _single_convert_to_python_object(robj: CaosDBPythonEntity, robj.__setattr__(base_attribute, val) for prop in entity.properties: - robj._set_property_from_entity(prop, entity.get_importance(prop), references) + robj._set_property_from_entity(prop, entity.get_importance(prop), references, + visited) for parent in entity.parents: robj.add_parent(CaosDBPythonUnresolvedParent(id=parent.id, @@ -921,7 +941,9 @@ def convert_to_entity(python_object): def convert_to_python_object(entity: Union[db.Container, db.Entity], - references: Optional[db.Container] = None): + references: Optional[db.Container] = None, + visited: Optional[Dict[int, + "CaosDBPythonEntity"]] = None): """ Convert either a container of CaosDB entities or a single CaosDB entity into the high level representation. @@ -933,15 +955,19 @@ def convert_to_python_object(entity: Union[db.Container, db.Entity], """ if isinstance(entity, db.Container): # Create a list of objects: - return [convert_to_python_object(i, references) for i in entity] + return [convert_to_python_object(i, references, visited) for i in entity] + # TODO: recursion problems? return _single_convert_to_python_object( - high_level_type_for_standard_type(entity)(), entity, references) + high_level_type_for_standard_type(entity)(), + entity, + references, + visited) def new_high_level_entity(entity: db.RecordType, importance_level: str, - name: str = None): + name: Optional[str] = None): """ Create an new record in high level format based on a record type in standard format. @@ -974,7 +1000,7 @@ def new_high_level_entity(entity: db.RecordType, return convert_to_python_object(r) -def create_record(rtname: str, name: str = None, **kwargs): +def create_record(rtname: str, name: Optional[str] = None, **kwargs): """ Create a new record based on the name of a record type. The new record is returned. @@ -1013,7 +1039,9 @@ def create_entity_container(record: CaosDBPythonEntity): return db.Container().extend(lse) -def query(query: str, resolve_references: bool = True, references: db.Container = None): +def query(query: str, + resolve_references: Optional[bool] = True, + references: Optional[db.Container] = None): """ """ diff --git a/src/caosdb/schema-pycaosdb-ini.yml b/src/caosdb/schema-pycaosdb-ini.yml index 5dabdd89795e19a757209e03cc843776be705777..cb07dfeb84bc16e212100232403b0f66543c73e9 100644 --- a/src/caosdb/schema-pycaosdb-ini.yml +++ b/src/caosdb/schema-pycaosdb-ini.yml @@ -14,10 +14,10 @@ schema-pycaosdb-ini: additionalProperties: false properties: url: - description: URL of the CaosDB server + description: "URL of the CaosDB server. Allowed are HTTP and HTTPS connections. However, since authentication tokens and sometimes even passwords are send in plain text to the server it is **highly** recommended to use HTTPS connections whenever possible. HTTP is ok for testing and debugging." type: string - pattern: https://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)? - examples: ["https://demo.indiscale.com/", "https://localhost:10443/"] + pattern: http(s)?://[-a-zA-Z0-9\.]+(:[0-9]+)?(/)? + examples: ["https://demo.indiscale.com/", "http://localhost:10080/"] username: type: string description: User name used for authentication with the server @@ -26,7 +26,7 @@ schema-pycaosdb-ini: description: The password input method defines how the password is supplied that is used for authentication with the server. type: string default: input - enum: [input, plain, pass, keyring] + enum: [input, unauthenticated, plain, pass, keyring] password_identifier: type: string password: @@ -54,7 +54,15 @@ schema-pycaosdb-ini: socket_proxy: examples: ["localhost:12345"] type: string - description: You can define a socket proxy to be used. This is for the case that the server sits behind a firewall which is being tunnelled with a socket proxy (SOCKS4 or SOCKS5) (e.g. via ssh's -D option or a dedicated proxy server). + description: Deprecated. Please use https_proxy instead. + https_proxy: + examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"] + type: string + description: "Define a proxy for the https connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. However, the connection will be secured using TLS in the tunneled connection nonetheless. Only the connection to the proxy is insecure which is why it is not recommended to use HTTP proxies when authentication against the proxy is necessary. Note: this option is overridden by the HTTPS_PROXY environment variable, if present." + http_proxy: + examples: ["http://localhost:8888", "socks5://localhost:8888", "socks4://localhost:8888"] + type: string + description: "Define a proxy for the http connections. These are either (non-TLS) HTTP proxies, SOCKS4 proxies, or SOCKS5 proxies. HTTPS proxies are not supported. Note: this option is overridden by the HTTP_PROXY environment variable, if present." implementation: description: This option is used internally and for testing. Do not override. examples: [_DefaultCaosDBServerConnection] @@ -98,6 +106,9 @@ schema-pycaosdb-ini: advancedtools: description: "Configuration settings for the caosadvancedtools." additionalProperties: true + caoscrawler: + description: "Configuration settings for the CaosDB Crawler." + additionalProperties: true sss_helper: description: "Configuration settings for server-side scripting." additionalProperties: true diff --git a/src/caosdb/utils/caosdb_admin.py b/src/caosdb/utils/caosdb_admin.py index 9fb94f57683036f5432a40198cc4ae98893665fb..09a8f64a3c6b9f0825089949840a8791604d1ded 100755 --- a/src/caosdb/utils/caosdb_admin.py +++ b/src/caosdb/utils/caosdb_admin.py @@ -621,8 +621,8 @@ USAGE for action in ["grant", "deny", "revoke_denial", "revoke_grant"]: action_entity_permissions_parser = subparsers.add_parser( - "{}_entity_permissions".format(action), - help="{} entity permissions to a role.".format(action)) + f"{action}_entity_permissions", + help=f"{action} entity permissions to one or more Entities.") action_entity_permissions_parser.set_defaults( call=do_action_entity_permissions, action=action) action_entity_permissions_parser.add_argument(dest="query", metavar="QUERY", diff --git a/src/caosdb/utils/checkFileSystemConsistency.py b/src/caosdb/utils/checkFileSystemConsistency.py index 6dd35f8a6f699a2c74ff41a9924cd65c436efd42..6c053fdca6acb3a6585589c0e6298ba0704ea590 100755 --- a/src/caosdb/utils/checkFileSystemConsistency.py +++ b/src/caosdb/utils/checkFileSystemConsistency.py @@ -30,7 +30,6 @@ import caosdb as db from argparse import ArgumentParser from argparse import RawDescriptionHelpFormatter -from _testcapi import raise_exception __all__ = [] __version__ = 0.1 @@ -82,17 +81,15 @@ def main(argv=None): program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % ( program_version, program_build_date) - program_shortdesc = __import__('__main__').__doc__.split("\n")[1] - program_license = '''%s + program_license = ''' - Created by timm fitschen on %s. Copyright 2016 BMPG. All rights reserved. Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE -''' % (program_shortdesc, str(__date__)) +''' # Setup argument parser parser = ArgumentParser(description=program_license, @@ -113,6 +110,7 @@ USAGE help="timeout in seconds for the database requests. [default: %(default)s]", metavar="TIMEOUT", default="200") + parser.add_argument('location') # Process arguments args = parser.parse_args() @@ -121,7 +119,7 @@ USAGE VERBOSITY = args.verbose TIMEOUT = args.timeout - runCheck(TIMEOUT) + print(runCheck(TIMEOUT, args.location).messages) return 0 diff --git a/src/caosdb/utils/create_revision.py b/src/caosdb/utils/create_revision.py index 0b7ce996311a96a6a0fe89935de729f07b67a353..419e1c9f2b97171be0dccf1bc772ae5db679c0b7 100644 --- a/src/caosdb/utils/create_revision.py +++ b/src/caosdb/utils/create_revision.py @@ -34,13 +34,15 @@ def bend_references(from_id, to_id, except_for=None): and those references are changed to point to to_id. entities having an id listed in except_for are excluded. - params: - from_id : int - the old object to which references where pointing - to_id : int - the new object to which references will be pointing - except_for : list of int - entities with id of this list will not be changed +Parameters +---------- + +from_id : int + the old object to which references where pointing +to_id : int + the new object to which references will be pointing +except_for : list of int + entities with id of this list will not be changed """ if except_for is None: except_for = [to_id] @@ -71,14 +73,16 @@ def create_revision(old_id, prop, value): This function changes the record with id old_id. The value of the propertye prop is changed to value. - params: - old_id : int - id of the record to be changed - prop : string - name of the property to be changed - value : type of corresponding property - the new value of the corresponding property - """ +Parameters +---------- + +old_id : int + id of the record to be changed +prop : string + name of the property to be changed +value : type of corresponding property + the new value of the corresponding property +""" record = db.execute_query("FIND {}".format(old_id))[0] new_rec = record.copy() new_rec.get_property(prop).value = value diff --git a/src/caosdb/utils/get_entity.py b/src/caosdb/utils/get_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..a27aafa99ffe3759a46876a5bcd5e686d631b1dc --- /dev/null +++ b/src/caosdb/utils/get_entity.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +"""Convenience functions to retrieve a specific entity.""" + +from typing import Union +from ..common.models import execute_query, Entity + + +def get_entity_by_name(name: str) -> Entity: + """Return the result of a unique query that uses the name to find the correct entity. + + Submits the query "FIND ENTITY WITH name='{name}'". + """ + return execute_query(f"FIND ENTITY WITH name='{name}'", unique=True) + + +def get_entity_by_id(eid: Union[str, int]) -> Entity: + """Return the result of a unique query that uses the id to find the correct entity. + + Submits the query "FIND ENTITY WITH id='{eid}'". + """ + return execute_query(f"FIND ENTITY WITH id='{eid}'", unique=True) + + +def get_entity_by_path(path: str) -> Entity: + """Return the result of a unique query that uses the path to find the correct file. + + Submits the query "FIND FILE WHICH IS STORED AT '{path}'". + """ + return execute_query(f"FIND FILE WHICH IS STORED AT '{path}'", unique=True) diff --git a/src/caosdb/utils/git_utils.py b/src/caosdb/utils/git_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7a58272a3bef1930f75a1e08364349388e2bb89f --- /dev/null +++ b/src/caosdb/utils/git_utils.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2020 Timm Fitschen <t.fitschen@indiscale.com> +# Copyright (C) 2020-2022 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +"""git-utils: Some functions for retrieving information about git repositories. + +""" + +import logging +import tempfile + +from subprocess import call + +logger = logging.getLogger(__name__) + + +def get_origin_url_in(folder: str): + """return the Fetch URL of the git repository in the given folder.""" + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "remote", "show", "origin"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + urlString = "Fetch URL:" + + for line in t.readlines(): + if urlString in line: + return line[line.find(urlString) + len(urlString):].strip() + + return None + + +def get_diff_in(folder: str, save_dir=None): + """returns the name of a file where the out put of "git diff" in the given + folder is stored.""" + with tempfile.NamedTemporaryFile(delete=False, mode="w", dir=save_dir) as t: + call(["git", "diff"], stdout=t, cwd=folder) + + return t.name + + +def get_branch_in(folder: str): + """returns the current branch of the git repository in the given folder. + + The command "git branch" is called in the given folder and the + output is returned + """ + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + return t.readline().strip() + + +def get_commit_in(folder: str): + """returns the commit hash in of the git repository in the given folder. + + The command "git log -1 --format=%h" is called in the given folder + and the output is returned + """ + + with tempfile.NamedTemporaryFile(delete=False, mode="w") as t: + call(["git", "log", "-1", "--format=%h"], stdout=t, cwd=folder) + with open(t.name, "r") as t: + return t.readline().strip() diff --git a/src/caosdb/utils/server_side_scripting.py b/src/caosdb/utils/server_side_scripting.py index 663178dcbda4293cb30dff88efbfb7b7302df70d..7e5ee4390ae3314792d12fd2942980aa3d9c9773 100644 --- a/src/caosdb/utils/server_side_scripting.py +++ b/src/caosdb/utils/server_side_scripting.py @@ -30,7 +30,8 @@ from lxml import etree from caosdb.connection.connection import get_connection from caosdb.connection.utils import urlencode -from caosdb.connection.encode import MultipartParam, multipart_encode +from caosdb.connection.encode import (MultipartParam, multipart_encode, + ReadableMultiparts) def _make_params(pos_args, opts): @@ -63,6 +64,7 @@ def _make_multipart_request(call, pos_args, opts, files): filename=filename)) body, headers = multipart_encode(parts) + body = ReadableMultiparts(body) return body, headers diff --git a/src/doc/administration.rst b/src/doc/administration.rst index 061acc8364d2ef62f743a20d7b9e6562baac0fc5..eab02e43a833559dc21ea7a9fa5edfaf6431facf 100644 --- a/src/doc/administration.rst +++ b/src/doc/administration.rst @@ -5,10 +5,12 @@ The Python script ``caosdb_admin.py`` should be used for administrative tasks. Call ``caosdb_admin.py --help`` to see how to use it. The most common task is to create a new user (in the CaosDB realm) and set a -password for the user (note that a user typically needs to be activated):: +password for the user (note that a user typically needs to be activated): - caosdb_admin.py create_user anna - caosdb_admin.py set_user_password anna - caosdb_admin.py add_user_roles anna administration - caosdb_admin.py activate_user anna +.. code:: console + + $ caosdb_admin.py create_user anna + $ caosdb_admin.py set_user_password anna + $ caosdb_admin.py add_user_roles anna administration + $ caosdb_admin.py activate_user anna diff --git a/src/doc/conf.py b/src/doc/conf.py index 9e65bff1eba5d114a77d3ed9405e883df6ad7470..0fa5de575f5424e267cad8ecc193cca8230faa8b 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -25,14 +25,14 @@ import sphinx_rtd_theme # noqa: E402 # -- Project information ----------------------------------------------------- project = 'pycaosdb' -copyright = '2022, IndiScale GmbH' +copyright = '2023, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.9.0' +version = '0.12.1' # The full version, including alpha/beta/rc tags # release = '0.5.2-rc2' -release = '0.9.0-dev' +release = '0.12.1-dev' # -- General configuration --------------------------------------------------- @@ -78,6 +78,9 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None +suppress_warnings = [ + "autosectionlabel.*", # duplicate labels +] # -- Options for HTML output ------------------------------------------------- diff --git a/src/doc/gallery/curator-permissions.rst b/src/doc/gallery/curator-permissions.rst new file mode 100644 index 0000000000000000000000000000000000000000..fa6b4022b7fbc1d042ed00f265e63a2675794a21 --- /dev/null +++ b/src/doc/gallery/curator-permissions.rst @@ -0,0 +1,123 @@ + +Setting permissions for a curator role +====================================== + +The following example shows how to create and set permissions for a ``curator`` +role that is allowed to insert, update, or delete any entity apart from a set of +RecordTypes and properties that define a "core data model" which can only be +altered with administration permissions. + +In the following, you'll learn how to + +1. create the ``curator`` role. +2. configure the ``global_entity_permissions.xml`` s.th. the ``curator`` role is + allowed to insert, update, or delete any entity by default. +3. use a Python script to override the above configuration for the entities in + the externally defined core data model. + +Prerequisites +------------- + +This example needs some preparations regarding your CaosDB setup that have to +(or, for the sake of simplicity, should) be done outside the actual Python +example script. + +The curator role +~~~~~~~~~~~~~~~~ + +First, a ``curator`` role is created with a meaningful description. We'll use +``caosdb_admin.py`` for this which leads to the following command: + +.. code:: console + + $ caosdb_admin.py create_role "curator" "A user who is permitted to create new Records, Properties, and RecordTypes but who is not allowed to change the core data model." + +To actually see how this role's permissions change, we also need a user with +this role. Assume you already have created and activated (see +:doc:`Administration <../administration>`) a ``test_curator`` user, then +``caosdb_admin.py`` is used again to assign it the correct role: + +.. code:: console + + $ caosdb_admin.py add_user_roles test_curator curator + +.. note:: + + The ``test_curator`` user shouldn't have administration privileges, otherwise + the below changes won't have any effect. + +The core data model and caosdb-advanced-user-tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In principle, the following script works with any data model defined in a json +or yaml file (just adapt lines 39-42 accordingly). In this example, we'll use the +`metadata schema <https://github.com/leibniz-zmt/zmt-metadata-schema>`_ that was +developed by J. Schmidt at the `Leibniz Centre for Tropical Marine Research +<https://www.leibniz-zmt.de/en/>`_. + +Clone the schemata into the same directory containing the below script via + +.. code:: console + + $ git clone https://github.com/leibniz-zmt/zmt-metadata-schema.git + +Furthermore, we'll need the `CaosDB Advanced User Tools +<https://gitlab.com/caosdb/caosdb-advanced-user-tools>`_ for loading the +metadata schemata from the json files, so install them via + +.. code:: console + + $ pip install caosadvancedtools + +The global entity permissions file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users with the ``curator`` role should be able to have any permission for all +entities by default. The exceptions for the core data model entities will be set +with the script below. These default settings are best done via the +``global_entities_permissions.xml`` config file (see the `server documentation +<https://docs.indiscale.com/caosdb-server/permissions.html#how-to-set-permissions>`_). Simply +add the following line to the file + +.. code:: xml + + <Grant priority="true" role="curator"><Permission name="*"/></Grant> + +This means that, by default, all users with the ``curator`` role are **granted** +all entity permissions (including insert, update, and delete as specified in the +beginning) **with priority**. This ensures, that no normal user is allowed to +overrule these permissions (since it is granted with priority), but it can still +be denied for the core data model entities by a **deny** rule with priority. See +the server documentation on `permission +calculation <https://docs.indiscale.com/caosdb-server/permissions.html#permission-calculation>`_ +for more information on which permission rules can or can't be overruled. + +Your complete ``global_entities_permissions.xml`` might then look like + +.. code:: xml + + <globalPermissions> + <Grant priority="false" role="?OWNER?"><Permission name="*"/></Grant> + <Grant priority="false" role="?OTHER?"><Permission name="RETRIEVE:*"/></Grant> + <Grant priority="false" role="?OTHER?"><Permission name="USE:*"/></Grant> + <Grant priority="false" role="anonymous"><Permission name="RETRIEVE:*"/></Grant> + <Grant priority="true" role="curator"><Permission name="*"/></Grant> + <Deny priority="false" role="?OTHER?"><Permission name="UPDATE:*"/></Deny> + <Deny priority="false" role="?OTHER?"><Permission name="DELETE"/></Deny> + <Deny priority="true" role="?OTHER?"><Permission name="EDIT:ACL"/></Deny> + </globalPermissions> + +.. note:: + + Note that you have to restart your CaosDB server after modifying the + ``global_entities_permissions.xml``. + +The code +-------- + +After having applied all of the above prerequisites and restarting your CaosDB +server, execute the following code. + +:download:`Download full code<curator_permissions.py>` + +.. literalinclude:: curator_permissions.py diff --git a/src/doc/gallery/curator_permissions.py b/src/doc/gallery/curator_permissions.py new file mode 100644 index 0000000000000000000000000000000000000000..16b4b7f6f1bb9abfb7e191c6a1101181984bce9a --- /dev/null +++ b/src/doc/gallery/curator_permissions.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import os +import sys + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_json_schema +from caosdb import administration as admin + +CURATOR = "curator" + + +def main(): + """Set curator role permissions: Is allowed to edit all Records; is allowed + to create new RTs and Properties and change them, but is not allowed to + change anything defined in the core data model, i.e., in the schemas. + + """ + dataspace_definitions = parse_model_from_json_schema( + "zmt-metadata-schema/schemas/dataspace.schema.json") + dataset_definitions = parse_model_from_json_schema( + "zmt-metadata-schema/schemas/dataset.schema.json") + + # Set general permissions. The curator users should be allowed to perform + # any transaction. + perms = admin._get_permissions(CURATOR) + general_grant_perms = [ + "TRANSACTION:*" + ] + + for p in general_grant_perms: + + g = admin.PermissionRule(action="Grant", permission=p, priority=True) + d = admin.PermissionRule(action="Deny", permission=p, priority=True) + + if g in perms: + perms.remove(g) + if d in perms: + perms.remove(d) + perms.add(g) + + admin._set_permissions(CURATOR, permission_rules=perms) + + # Deny all permissions that could change the data model ... + core_model_deny_permissions = [ + "DELETE", + "UPDATE:*", + "EDIT:ACL" + ] + # ... but allow read-access and of course using the entities as parents, + # properties, ... + core_model_grant_permissions = [ + "RETRIEVE:*", + "USE:*", + ] + + # Iterate over all entities defined in the schemas and update their access control list (ACL) accordingly. + updates = db.Container() + for model in [dataspace_definitions, dataset_definitions]: + + for ent in model.values(): + if ent.name in [u.name for u in updates]: + # Skip entities that have been updated already + continue + # The entity needs to be retrieved with the ACL flag to update the + # ACL down the road + ent.retrieve(flags={"ACL": None}) + for d in core_model_deny_permissions: + ent.deny(role=CURATOR, priority=True, permission=d) + ent.update_acl() + ent.retrieve(flags={"ACL": None}) + for g in core_model_grant_permissions: + ent.grant(role=CURATOR, priority=True, permission=g) + updates.append(ent) + ent.update_acl() + + +if __name__ == "__main__": + + sys.exit(main()) diff --git a/src/doc/gallery/index.rst b/src/doc/gallery/index.rst index a6ef53e4c7d1272c5dbc8c62b4d90a89591cac0f..bfba4317c3556d0692eb402f42ba3699be586d5a 100644 --- a/src/doc/gallery/index.rst +++ b/src/doc/gallery/index.rst @@ -14,3 +14,4 @@ This chapter collects code examples which can be immediately run against an empt :caption: The code examples: simulation + curator-permissions diff --git a/src/doc/high_level_api.rst b/src/doc/high_level_api.rst index 603052b135ad2289caea7e3bed59ae9d3301f811..e92f2dd5e326b14222ad3c776ce5f5ed1ed31536 100644 --- a/src/doc/high_level_api.rst +++ b/src/doc/high_level_api.rst @@ -60,7 +60,7 @@ it using its high level representation: from caosdb.high_level_api import query - res = query("FIND Record Experiment") + res = query("FIND Experiment") experiment = res[0] # Use a property: print(experiment.date) @@ -80,7 +80,7 @@ series of commands to achieve the same result: import caosdb as db - res = db.execute_query("FIND Record Experiment") + res = db.execute_query("FIND Experiment") output = res.get_property("output") output_file = db.File(id=output.value[0].id).retrieve() print(output_file.path) diff --git a/src/doc/tutorials/Data-Insertion.rst b/src/doc/tutorials/Data-Insertion.rst index f2c7f830d1403fbdf45354d1f36a4ea339759058..82df07691f7c78a2787d67463ca222d2e68249ca 100644 --- a/src/doc/tutorials/Data-Insertion.rst +++ b/src/doc/tutorials/Data-Insertion.rst @@ -83,33 +83,86 @@ corresponding python class: .. code:: python - rec = db.Record() + rec = db.Record() # rec.id is None rec.add_parent(name="Experiment") rec.add_property(name="date", value="2020-01-07") rec.insert() + print(rec.id) # rec.id set by the server + +Here, the record has a parent, the RecordType “Experimentâ€, and a Property date +with a value ``"2020-01-07"``. After the successful insertion, our new Record is +assigned an ``id`` by the server. In the following, let's assume this id to be +``256``. + +Reference Properties +-------------------- + +Now suppose we want to insert an analysis that references the above experiment +record as its source data. Since we know that the id of the experiment record is +256, we can do the following: + +.. code:: python + + ana = db.Record().add_parent(name="Analysis") # Create record and assign parent in one line + ana.add_property(name="Experiment", value=256) + ana.add_propertt(name="date", value="2020-01-08") + # possibly, add more properties here ... + ana.insert() + +The experiment record's id is used as the value of the ``Experiment`` property +of the analysis Record (note how we use the RecordType ``Experiment`` as a +``REFERENCE`` property here). Sending a CaosDB query like ``FIND RECORD +Experiment WHICH IS REFERENCED BY A Analysis WITH date=2020-01-08`` would now +return our original experiment record. -Here, the record has a parent: The RecordType “Experimentâ€. And a -Property: date. +Equivalently, we can also use the Python object of the experiment record, i.e., +``rec`` as the value of the ``Experiment`` property: -Note, that if you want to use a property that is not a primitive -datatype like db.INTEGER and so on, you need to use the ID of the Entity -that you are referencing. + +.. code:: python + + ana = db.Record().add_parent(name="Analysis") + ana.add_property(name="Experiment", value=rec) + ana.add_propertt(name="date", value="2020-01-08") + # possibly, add more properties here ... + ana.insert() + +Finally, we can also insert both records at the same time using a +``db.Container``: .. code:: python rec = db.Record() rec.add_parent(name="Experiment") - rec.add_property(name="report", value=235507) - rec.add_property(name="Analysis", value=230007) - rec.insert() + rec.add_property(name="date", value="2020-01-07") + ana = db.Record().add_parent(name="Analysis") + ana.add_property(name="Experiment", value=rec) + ana.add_propertt(name="date", value="2020-01-08") + + cont = db.Container().extend([rec, ana]) # Add experiment and analysis + # records to our container + cont.insert() # Insert both at the same time, the CaosDB server will + # resolve the reference upon insertion. -Of course, the IDs 235507 and 230007 need to exist in CaosDB. The first -example shows how to use a db.REFERENCE Property (report) and the second -shows that you can use any RecordType as Property to reference a Record -that has such a parent. +All three ways result in an Analysis record which references an Experiment +record. -Most Records do not have name however it can absolutely make sense. In -that case use the name argument when creating it. Another useful feature +.. note:: + + Instead of using the ``Experiment`` RecordType as a ``REFERENCE`` porperty, + we can also create an actual property with data type ``Experiment``: + ``db.property(name="source", datatype="Experiment")``. Now you can add this + property to the analysis record with the experiment record as a value as + explained above. As a rule of thumbs, using a separate property for these + references is meaningful whenever you want to highlight that, e.g., this + particular experiment provided the source data for your analysis (as opposed + to another experiment that was used for validation). + +Advanced insertions +------------------- + +Most Records do not have a name, however it can absolutely make sense to assign +one. In that case use the name argument when creating it. Another useful feature is the fact that properties can have units: .. code:: python @@ -134,7 +187,7 @@ container. E.g. if you have a python list ``analysis_results``: cont.insert() -Useful is also, that you can insert directly tabular data. +It may also be usefull to know that you can insert directly tabular data. .. code:: python @@ -144,8 +197,8 @@ Useful is also, that you can insert directly tabular data. print(recs) recs.insert() -With this example file -`test.csv <uploads/4f2c8756a26a3984c0af09d206d583e5/test.csv>`__. +Try it yourself with this example file +`test.csv <uploads/4f2c8756a26a3984c0af09d206d583e5/test.csv>`__! List Properties --------------- @@ -170,6 +223,28 @@ list-valued attribute in Python, as the following example illustrates. print(retrieved.get_property("TestList").value) +.. note:: + Properties of Entities that shall be updated need to have IDs. Let's look at an + example: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(name='date', value="2020-01-01") + retrieved.update() # Fails! The 'date' Property needs to have an ID. + +The easiest way to get around this is to use the corresponding entity getter: + +.. code:: python + + experiment = db.Record(id=1111).retrieve() + experiment.add_property(db.get_entity_by_name('date'), value="2020-01-01") + retrieved.update() # Works! + +There also are the functions ``get_entity_by_path`` and ``get_entity_by_id``. You can easily use +cached versions of those functions (see :doc:`caching options<caching>`). + + File Update ----------- diff --git a/src/doc/tutorials/Entity-Getters.rst b/src/doc/tutorials/Entity-Getters.rst new file mode 100644 index 0000000000000000000000000000000000000000..50ed13201e5720de22bf0b605bc5162834a458a8 --- /dev/null +++ b/src/doc/tutorials/Entity-Getters.rst @@ -0,0 +1,16 @@ + +Entity Getters +============== + +There is a very frequent situation when working with PyCaosDB: You need to get a specific Entity +from the remote server. For example, you need the Property Entity in order to make an update. Sure, +you can do a ``db.Entity().retrieve()`` or submit a query, but there is an even faster way which +also helps preventing errors: + +- ``get_entity_by_name`` +- ``get_entity_by_id`` +- ``get_entity_by_path`` + +You can call these functions with a single argument (name/id/path). Since these functions are +frequently used with the same arguments over and over again, you might want to look at the +:doc:`caching options<caching>`. diff --git a/src/doc/tutorials/basic_analysis.rst b/src/doc/tutorials/basic_analysis.rst index cc185e0ee08f9e5ee0f890c0ab55f52972882d17..c40cad28b8c9a3be537c641b9614da2eb4df8dd9 100644 --- a/src/doc/tutorials/basic_analysis.rst +++ b/src/doc/tutorials/basic_analysis.rst @@ -34,7 +34,7 @@ Often we are interested in table like data for our processing. And the disentang >>> from caosadvancedtools.table_converter import to_table >>> # Let us retrieve the data in a table like form using `SELECT` ->>> data = db.execute_query("SELECT quality_factor FROM RECORD Analysis with quality_factor" ) +>>> data = db.execute_query("SELECT quality_factor FROM Analysis with quality_factor" ) >>> table = to_table(data) >>> print(table) quality_factor diff --git a/src/doc/tutorials/caching.rst b/src/doc/tutorials/caching.rst new file mode 100644 index 0000000000000000000000000000000000000000..aad9a1ddbd9e93a3cd06887eaffcf956c3c5bea6 --- /dev/null +++ b/src/doc/tutorials/caching.rst @@ -0,0 +1,58 @@ + +Caching +======= + +.. note:: + + Caching is great, because it can speed up things considerably. But it can also create dangerous + pitfalls if the cache is not cleared when needed and you work with outdated data. Thus, please use + the cache with care and make sure to clear it when needed. + +Python provides great tools for caching. For example, you could define a ``cached_get_by_name`` +function, easily created from ``get_entity_by_name`` using Python's ``lru_cache``: + +.. code:: python + + @lru_cache(maxsize=1000) + def cached_get_by_name(name): + return db.get_entity_by_name(name) + + exp = cached_get_by_name('Experiment') + # reset the cache with + cached_get_by_name.cache_clear() + +For convenience, PyCaosDB provides the ``caosdb.cached`` module that defines the functions +``cached_query`` and ``cached_get_entity_by``, they use a shared cache. Let's have a look: + +.. code:: python + + from caosdb.cached import cached_query, cached_get_entity_by, cache_clear, cache_info, cache_initialize + rt1 = cached_get_entity_by(name='RT1') + qresult = cached_query('FIND Experiment WITH parameter=1') + # you can inspect the cache + print(cache_info()) + # this will not cause a server request since it is cached + rt1 = cached_get_entity_by(name='RT1') + # you can clear the cache with + cache_clear() + # If you want to have a cache with a custom size, you can initialize it (again). Old cached + # data is lost. + cache_initialize(maxsize=10) + + +If you want to manually add entities to the cache, you can do it yourself. This is useful when you +have entities on hand from previous queries that you want to add. + +.. code:: python + + from caosdb.cached import cache_fill, AccessType + # Here, items must be a dict with Entity IDs as keys and the Entities as values. + cache_fill(items, AccessType.EID, unique=True) + # If you now use IDs that were in items, they are taken from the cache. + e1 = cached_get_entity_by(eid=10001) + +When filling the cache with Entity objects for ``cached_get_entity_by``, you need to set +``unique=True``, whereas the cache for ``cached_query`` should be filled with Container object and +``unique=False``. + + diff --git a/src/doc/tutorials/complex_data_models.rst b/src/doc/tutorials/complex_data_models.rst index 0fa868e78bb45a2905dc99392a3a28a9832d369e..7b45b6a2681bcf781fd4acc9329ffada28d4e01c 100644 --- a/src/doc/tutorials/complex_data_models.rst +++ b/src/doc/tutorials/complex_data_models.rst @@ -69,8 +69,9 @@ Examples c.insert() # Useful for testing: wait until the user presses a key - # Meanwhile have a look at the WebUI: You can e.g. query "FIND Test*" to view - # all the entities created here and see the relations and links between them. + # Meanwhile have a look at the WebUI: You can e.g. query "FIND ENTITY Test*" + # to view all the entities created here and see the relations and links + # between them. b = input("Press any key to cleanup.") # cleanup everything after the user presses any button. c.delete() diff --git a/src/doc/tutorials/first_steps.rst b/src/doc/tutorials/first_steps.rst index 34b96bbeca416107fb34feb4707b9ef46fc49fe7..c84ec52aa63f0563b22c698081e89600c7af6122 100644 --- a/src/doc/tutorials/first_steps.rst +++ b/src/doc/tutorials/first_steps.rst @@ -25,7 +25,7 @@ However, you can also translate the examples to the data model that you have at Let's start with a simple query. ->>> response = db.execute_query("FIND RECORD Guitar") +>>> response = db.execute_query("FIND Guitar") Queries work the same way as in the web interface. You simply provide the query string to the corresponding function (``db.execute_query``). However, the result is not @@ -52,6 +52,7 @@ Let's look at the first element: <Record ... .. The above example needs doctest ELLIPSIS + You see that the object is a Record. It has a Parent and two Properties. .. note:: @@ -87,7 +88,7 @@ Ids can also come in handy when searching. Suppose you have some complicated con >>> # This condition is not that complicated and long but let's suppose it was. ->>> record = db.execute_query("FIND Analysis with quality_factor=0.08", unique=True) +>>> record = db.execute_query("FIND MusicalAnalysis with quality_factor=0.08", unique=True) >>> # You can use unique=True when you only expect one result Entity. An error will be >>> # thrown if the number of results is unequal to 1 and the resulting object will be >>> # an Entity and not a Container @@ -119,7 +120,7 @@ If the files are large data files, it is often a better idea to only retrieve th Summary ------- -Now you know, how you can use Python to send queries to CaosDB and you can access +Now you know how to use Python to send queries to CaosDB and you can access the result Records and their properties. The next tutorial shows how to make some meaningful use of this. diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst index 0b08d0b4fe153d803a780bd144787819b827db78..ce37993d7ec5e0888da8a2b4c58904bcbdc43bb4 100644 --- a/src/doc/tutorials/index.rst +++ b/src/doc/tutorials/index.rst @@ -14,6 +14,8 @@ advanced usage of the Python client. basic_analysis Data-Insertion errors + Entity-Getters + caching data-model-interface complex_data_models serverside diff --git a/tox.ini b/tox.ini index e321891883b9425d24543a41f2d1283e0be52109..8212226eef2759c1864a86b8a3ad8f926480db4a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=py38, py39, py310 +envlist=py37, py38, py39, py310, py311 skip_missing_interpreters = true [testenv] @@ -7,9 +7,13 @@ deps = . nose pytest pytest-cov - python-dateutil - jsonschema==4.0.1 + jsonschema>=4.4.0 commands=py.test --cov=caosdb -vv {posargs} [flake8] max-line-length=100 + +[pytest] +testpaths = unittests +xfail_strict = True +addopts = -x -vv --cov=caosdb diff --git a/unittests/docker/Dockerfile b/unittests/docker/Dockerfile index 286d235fea5e8ae3d358d7c5a1836a692f0b53b1..6023324d236e136314b18dc0af8afc02a66fdf90 100644 --- a/unittests/docker/Dockerfile +++ b/unittests/docker/Dockerfile @@ -1,4 +1,8 @@ -FROM debian:latest +FROM debian:bullseye +# Use local package repository +COPY sources.list.local /etc/apt/ +RUN mv /etc/apt/sources.list /etc/apt/sources.list.orig +RUN cat /etc/apt/sources.list.local /etc/apt/sources.list.orig > /etc/apt/sources.list RUN apt-get update && \ apt-get install -y \ pylint3 python3-pip tox git \ diff --git a/unittests/docker/sources.list.local b/unittests/docker/sources.list.local new file mode 100644 index 0000000000000000000000000000000000000000..c0b4107350ba37e77aa95d5a56c31976979e51e1 --- /dev/null +++ b/unittests/docker/sources.list.local @@ -0,0 +1,6 @@ +# Local repositories at Netcup +deb http://debian.netcup.net/debian/ buster main +deb http://mirrors.n-ix.net/debian-security buster/updates main +deb http://debian.netcup.net/debian/ buster-updates main + +# The original content follows here: \ No newline at end of file diff --git a/unittests/test_apiutils.py b/unittests/test_apiutils.py index 43ab8107183f16bf8df1d0ea8e447b378bcf8123..bda381cf6427377194e272dfa14b83399b6f012f 100644 --- a/unittests/test_apiutils.py +++ b/unittests/test_apiutils.py @@ -1,11 +1,12 @@ -# -*- encoding: utf-8 -*- # # This file is a part of the CaosDB Project. # -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen # Copyright (C) 2020 Timm Fitschen <t.fitschen@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> # Copyright (C) 2020-2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,7 +21,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -# ** end header # # Test apiutils # A. Schlemmer, 02/2018 @@ -30,6 +30,7 @@ import pytest import caosdb as db import caosdb.apiutils from caosdb.apiutils import (apply_to_ids, compare_entities, create_id_query, + empty_diff, EntityMergeConflictError, resolve_reference, merge_entities) from caosdb.common.models import SPECIAL_ATTRIBUTES @@ -272,8 +273,10 @@ def test_copy_entities(): for i in [0, 1]: assert c.properties[i] is not r.properties[i] for special in SPECIAL_ATTRIBUTES: - assert getattr(c.properties[i], special) == getattr(r.properties[i], special) - assert c.get_importance(c.properties[i]) == r.get_importance(r.properties[i]) + assert getattr(c.properties[i], special) == getattr( + r.properties[i], special) + assert c.get_importance( + c.properties[i]) == r.get_importance(r.properties[i]) def test_merge_entities(): @@ -296,6 +299,19 @@ def test_merge_entities(): assert r2.get_property("F").value == "text" +def test_merge_bug_conflict(): + r = db.Record() + r.add_property(name="C", value=4) + r2 = db.Record() + r2.add_property(name="C", value=4, datatype="TEXT") + merge_entities(r, r2) + + r3 = db.Record() + r3.add_property(name="C", value=4, datatype="INTEGER") + with pytest.raises(EntityMergeConflictError): + merge_entities(r3, r2) + + def test_merge_bug_109(): rt = db.RecordType(name="TestBug") p = db.Property(name="test_bug_property", datatype=db.LIST(db.INTEGER)) @@ -313,10 +329,12 @@ def test_merge_bug_109(): assert r_a.get_property("test_bug_property").value == [18, 19] assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_a) @pytest.mark.xfail @@ -336,7 +354,223 @@ def test_bug_109(): assert r_a.get_property("test_bug_property").value == [18, 19] assert "<Value>18</Value>\n <Value>19</Value>" in str(r_b) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_b) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_b) assert "<Value>18</Value>\n <Value>19</Value>" in str(r_a) - assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str(r_a) + assert "<Value>18</Value>\n <Value>19</Value>\n <Value>18</Value>\n <Value>19</Value>" not in str( + r_a) + + +def test_wrong_merge_conflict_reference(): + """Test a wrongly detected merge conflict in case of two records referencing + two different, but identical objects. + + """ + # Two identical license records will be referenced from both records to be + # merged + license_rt = db.RecordType(name="license") + license_rec_a = db.Record(name="CC-BY-3.0").add_parent(license_rt) + license_rec_b = db.Record(name="CC-BY-3.0").add_parent(license_rt) + + # two referencing records + dataset_rt = db.RecordType(name="Dataset") + title_prop = db.Property(name="title", datatype=db.TEXT) + doi_prop = db.Property(name="DOI", datatype=db.TEXT) + rec_a = db.Record().add_parent(dataset_rt) + rec_a.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_a) + rec_a.add_property(name=title_prop.name, value="Some dataset title") + + rec_b = db.Record().add_parent(dataset_rt) + rec_b.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_b) + rec_b.add_property(name=doi_prop.name, value="https://doi.org/12345.678") + + merge_entities(rec_a, rec_b) + assert rec_a.get_property(license_rt.name) is not None + assert rec_a.get_property(license_rt.name).value is not None + assert isinstance(rec_a.get_property(license_rt.name).value, db.Record) + assert rec_a.get_property(license_rt.name).value.name == license_rec_a.name + assert rec_a.get_property(license_rt.name).value.name == license_rec_b.name + assert rec_a.get_property("title").value == "Some dataset title" + assert rec_a.get_property("doi").value == "https://doi.org/12345.678" + + # Reset rec_a + rec_a = db.Record().add_parent(dataset_rt) + rec_a.add_property(name=license_rt.name, + datatype=license_rt.name, value=license_rec_a) + rec_a.add_property(name=title_prop.name, value="Some dataset title") + + # this does not compare referenced records, so it will fail + with pytest.raises(EntityMergeConflictError): + merge_entities(rec_a, rec_b, merge_references_with_empty_diffs=False) + + # ... as should this, of course + rec_b.get_property(license_rt.name).value.name = "Another license" + with pytest.raises(EntityMergeConflictError) as re: + merge_entities(rec_a, rec_b) + + +def test_empty_diff(): + + rec_a = db.Record(name="A") + rec_b = db.Record(name="B") + + assert empty_diff(rec_a, rec_a) + assert not empty_diff(rec_a, rec_b) + + rec_a.add_parent(name="RT") + rec_b.add_parent(name="RT") + assert empty_diff(rec_a, rec_a) + assert not empty_diff(rec_a, rec_b) + + rec_b.name = "A" + assert empty_diff(rec_a, rec_b) + + rec_a.add_property(name="some_prop", value=1) + assert not empty_diff(rec_a, rec_b) + + rec_b.add_property(name="some_prop", value=1) + assert empty_diff(rec_a, rec_b) + + rec_b.get_property("some_prop").value = 2 + assert not empty_diff(rec_a, rec_b) + + rec_b.get_property("some_prop").value = 1 + rec_b.add_property(name="some_other_prop", value="Test") + assert not empty_diff(rec_a, rec_b) + + rec_a.add_property(name="some_other_prop", value="Test") + assert empty_diff(rec_a, rec_b) + + # reference identical records, but different Python Record objects + ref_rec_a = db.Record(name="Ref").add_parent(name="RefType") + ref_rec_b = db.Record(name="Ref").add_parent(name="RefType") + rec_a.add_property(name="RefType", datatype="RefType", value=ref_rec_a) + rec_b.add_property(name="RefType", datatype="RefType", value=ref_rec_b) + # the default is `compare_referenced_records=False`, so the diff shouldn't + # be empty (different Python objects are referenced.) + assert not empty_diff(rec_a, rec_b) + # when looking into the referenced record, the diffs should be empty again + assert empty_diff(rec_a, rec_b, compare_referenced_records=True) + + # The same for lists of references + rec_a.remove_property("RefType") + rec_b.remove_property("RefType") + assert empty_diff(rec_a, rec_b) + rec_a.add_property(name="RefType", datatype=db.LIST( + "RefType"), value=[ref_rec_a, ref_rec_a]) + rec_b.add_property(name="RefType", datatype=db.LIST( + "RefType"), value=[ref_rec_b, ref_rec_b]) + assert not empty_diff(rec_a, rec_b) + assert empty_diff(rec_a, rec_b, compare_referenced_records=True) + + # special case of ids + rec_a = db.Record(id=12) + rec_b = db.Record() + assert not empty_diff(rec_a, rec_b) + rec_b.id = 13 + assert not empty_diff(rec_a, rec_b) + rec_b.id = 12 + assert empty_diff(rec_a, rec_b) + + +def test_force_merge(): + """Test whether a forced merge overwrites existing properties correctly.""" + + # name overwrite + recA = db.Record(name="A") + recB = db.Record(name="B") + + with pytest.raises(EntityMergeConflictError): + merge_entities(recA, recB) + + merge_entities(recA, recB, force=True) + assert "B" == recA.name + # unchanged + assert "B" == recB.name + + # description overwrite + recA = db.Record() + recA.description = "something" + recB = db.Record() + recB.description = "something else" + + with pytest.raises(EntityMergeConflictError) as emce: + merge_entities(recA, recB) + assert str(emce.value) == """Conflict in special attribute description: +A: something +B: something else""" + + merge_entities(recA, recB, force=True) + assert recA.description == "something else" + # unchanged + assert recB.description == "something else" + + # property overwrite + recA = db.Record() + recA.add_property(name="propA", value="something") + recB = db.Record() + recB.add_property(name="propA", value="something else") + + with pytest.raises(EntityMergeConflictError): + merge_entities(recA, recB) + + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").value == "something else" + # unchanged + assert recB.get_property("propA").value == "something else" + + # don't remove a property that's not in recB + recA = db.Record() + recA.add_property(name="propA", value="something") + recA.add_property(name="propB", value=5.0) + recB = db.Record() + recB.add_property(name="propA", value="something else") + + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").value == "something else" + assert recA.get_property("propB").value == 5.0 + + # also overwrite datatypes ... + rtA = db.RecordType() + rtA.add_property(name="propA", datatype=db.INTEGER) + rtB = db.RecordType() + rtB.add_property(name="propA", datatype=db.TEXT) + + with pytest.raises(EntityMergeConflictError): + merge_entities(rtA, rtB) + + merge_entities(rtA, rtB, force=True) + assert rtA.get_property("propA").datatype == db.TEXT + # unchanged + assert rtB.get_property("propA").datatype == db.TEXT + + # ... and units + recA = db.Record() + recA.add_property(name="propA", value=5, unit="m") + recB = db.Record() + recB.add_property(name="propA", value=5, unit="cm") + + with pytest.raises(EntityMergeConflictError): + merge_entities(recA, recB) + merge_entities(recA, recB, force=True) + assert recA.get_property("propA").unit == "cm" + # unchanged + assert recB.get_property("propA").unit == "cm" + + +def test_merge_missing_list_datatype_82(): + """Merging two properties, where the list-valued one has no datatype.""" + + recA = db.Record().add_property("a", 5, datatype="B") + recB_with_DT = db.Record().add_property("a", [1, 2], datatype=f"LIST<{db.DOUBLE}>") + merge_entities(recA, recB_with_DT, force=True) + assert recA.get_property("a").datatype == f"LIST<{db.DOUBLE}>" + + recA = db.Record().add_property("a", 5, datatype="B") + recB_without_DT = db.Record().add_property("a", [1, 2]) + with pytest.raises(TypeError) as te: + merge_entities(recA, recB_without_DT, force=True) + assert "Invalid datatype: List valued properties" in str(te.value) diff --git a/unittests/test_cached.py b/unittests/test_cached.py new file mode 100644 index 0000000000000000000000000000000000000000..ce302d671d6077aed7d8457e70da2076ebe65d50 --- /dev/null +++ b/unittests/test_cached.py @@ -0,0 +1,295 @@ +# -*- coding: utf-8 -*- +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2023 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2023 IndiScale GmbH <info@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" Test the caosdb.cached module """ + +from caosdb.cached import (cached_get_entity_by, cache_clear, cache_info, cache_fill, + AccessType, cache_initialize, cached_query) +from unittest.mock import patch +import caosdb as db +from copy import deepcopy +import pytest + + +DUMMY_SERVER_CONTENT = [ + db.Record(name='a', id=101), + db.Record(name='b', id=102), + db.Record(name='c', id=103), + db.File(path='p', id=104), + db.File(path='pp', id=105), +] + + +@pytest.fixture(autouse=True) +def cache_clean_up(): + cache_clear() + yield + cache_clear() + + +def mocked_name_query(name): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.name == name][0]) + + +def mocked_id_query(eid): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.id == eid][0]) + + +def mocked_path_query(path): + # copy the object, because Entities would normally be created from XML response + return deepcopy([el for el in DUMMY_SERVER_CONTENT if el.path == path][0]) + + +def mocked_gen_query(q, unique): + if unique: + if q == 'a': + return DUMMY_SERVER_CONTENT[0] + else: + return None + else: + if q == 'a': + return db.Container().extend([DUMMY_SERVER_CONTENT[0]]) + else: + return db.Container().extend(DUMMY_SERVER_CONTENT) + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_get_by_name(mocked_get_by_name): + mocked_get_by_name.side_effect = mocked_name_query + # first call; not in cache -> mocked_execute is touched + a = cached_get_entity_by(name='a') + assert a.id == 101 + assert mocked_get_by_name.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + b = cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(name='a') + assert mocked_get_by_name.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Entity(id=10001, name='lol')}, AccessType.NAME, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(name='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_name.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(name='c') + assert mocked_get_by_name.call_count == 3 + assert c.id == 103 + + +@patch("caosdb.utils.get_entity.get_entity_by_id") +def test_get_by_id(mocked_get_by_id): + mocked_get_by_id.side_effect = mocked_id_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(eid=102) + assert b.id == 102 + assert b.name == 'b' + assert mocked_get_by_id.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(eid=102) + assert mocked_get_by_id.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({10001: db.Entity(id=10001, name='lol')}, AccessType.EID, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(eid=10001) + assert lol.name == 'lol' + # this did not touch the mocked function + assert mocked_get_by_id.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(eid=103) + assert mocked_get_by_id.call_count == 3 + assert c.name == 'c' + + +@patch("caosdb.cached.get_entity.get_entity_by_path") +def test_get_by_path(mocked_get_by_path): + mocked_get_by_path.side_effect = mocked_path_query + # first call; not in cache -> mocked_execute is touched + b = cached_get_entity_by(path='p') + assert b.id == 104 + assert mocked_get_by_path.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 1 + # the cache returned the same object + assert a is b + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_get_entity_by(path='p') + assert mocked_get_by_path.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.File(id=10001, path='lol')}, AccessType.PATH, unique=True) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_get_entity_by(path='lol') + assert lol.id == 10001 + # this did not touch the mocked function + assert mocked_get_by_path.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_get_entity_by(path='pp') + assert mocked_get_by_path.call_count == 3 + assert c.id == 105 + + +@patch("caosdb.cached.execute_query") +def test_get_by_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().currsize == 0 + + # Non-existent entity + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 0 + assert cache_info().misses == 1 + + res = cached_get_entity_by(query='stuff') + assert res is None + assert cache_info().currsize == 1 + assert cache_info().hits == 1 + assert cache_info().misses == 1 + + # Existent entity + a = cached_get_entity_by(query='a') + assert a is not None + assert a.id == 101 + assert cache_info().currsize == 2 + assert cache_info().hits == 1 + assert cache_info().misses == 2 + + +@patch("caosdb.cached.execute_query") +def test_cached_query(mocked_query): + mocked_query.side_effect = mocked_gen_query + # test cache initialization + cache_initialize(maxsize=10) + assert cache_info().maxsize == 10 + # first call; not in cache -> mocked_execute is touched + res = cached_query('stuff') + assert len(res) == len(DUMMY_SERVER_CONTENT) + assert mocked_query.call_count == 1 + # second call; in cache -> mocked_execute is NOT touched (count is still 1) + a = cached_query('stuff') + assert mocked_query.call_count == 1 + # the cache returned the same object + assert a is res + # check the info + assert cache_info().hits == 1 + assert cache_info().currsize == 1 + # after clearing the test, the mock is used again + cache_clear() + cached_query('stuff') + assert mocked_query.call_count == 2 + # we fill the cache manually and make sure the element is used + cache_fill({'lol': db.Container().extend([db.Entity(id=10001, name='lol')])}, + AccessType.QUERY, unique=False) + # there are now two elements in the cache: a and lol + assert cache_info().currsize == 2 + # we can retrieve the inserted element + lol = cached_query('lol') + assert lol[0].id == 10001 + # this did not touch the mocked function + assert mocked_query.call_count == 2 + # make sure normal retrieval still works (count +1) + c = cached_query('a') + assert mocked_query.call_count == 3 + assert c[0].id == 101 + + +@patch("caosdb.utils.get_entity.get_entity_by_name") +def test_cache_size(mocked_get_by_name): + mocked_get_by_name.side_effect = lambda x: x + # first call; not in cache -> mocked_execute is touched + maxsize = 5 + cache_initialize(maxsize=maxsize) + assert cache_info().currsize == 0 + + names_first = ("a", "b", "c", "d", "e") + names_later = ("A", "B", "C", "D", "E") + names_fill = {"X": None, "Y": None, "Z": None} + + # Use the first batch of names + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == ii + assert cache_info().hits == 0 + assert cache_info().misses == ii + for ii, name in enumerate(names_first, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == ii + assert cache_info().misses == maxsize + + # use the second batch of names + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + assert cache_info().misses == len(names_first) + ii + for ii, name in enumerate(names_later, start=1): + cached_get_entity_by(name=name) + assert cache_info().currsize == maxsize + assert cache_info().hits == len(names_first) + ii + assert cache_info().misses == len(names_first) + len(names_later) + + # The cache is now filled with A,B,C,D,E (oldest to least recently used). + # Let's fill it with X,Y,Z. + cache_fill(names_fill, kind=AccessType.NAME) + + # Now, the cache should be: D,E,X,Y,Z + current_misses = cache_info().misses + + for name in ("Z", "Y", "X", "E", "D"): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + + for ii, name in enumerate(("A", "B", "C"), start=1): + cached_get_entity_by(name=name) + assert cache_info().misses == current_misses + ii diff --git a/unittests/test_configs/pycaosdb6.ini b/unittests/test_configs/pycaosdb6.ini new file mode 100644 index 0000000000000000000000000000000000000000..3826564f043c5702385a3d093cb4ebb8d4c24cd2 --- /dev/null +++ b/unittests/test_configs/pycaosdb6.ini @@ -0,0 +1,4 @@ +[Connection] +url=https://localhost:10443/ +# No username, unauthenticated connection +password_method = unauthenticated diff --git a/unittests/test_connection.py b/unittests/test_connection.py index ee564ea033f9afc80522d75a85557f70819ece1e..6cc23d87c5cdcf639709a444849a856a8c70af5f 100644 --- a/unittests/test_connection.py +++ b/unittests/test_connection.py @@ -37,7 +37,8 @@ from caosdb.connection.connection import (CaosDBServerConnection, from caosdb.connection.mockup import (MockUpResponse, MockUpServerConnection, _request_log_message) from caosdb.connection.utils import make_uri_path, quote, urlencode -from caosdb.exceptions import ConfigurationError, LoginFailedError +from caosdb.exceptions import (ConfigurationError, LoginFailedError, + CaosDBConnectionError) from nose.tools import assert_equal as eq from nose.tools import assert_false as falz from nose.tools import assert_is_not_none as there @@ -46,6 +47,13 @@ from nose.tools import assert_true as tru from pytest import raises +def setup_function(function): + configure_connection(url="http://localhost:8888/some/path", + password_method="plain", username="test", + password="blub", + implementation=MockUpServerConnection) + + def setup_module(): _reset_config() @@ -103,6 +111,7 @@ def test_configure_connection(): get_config().set("Connection", "password_method", "plain") get_config().set("Connection", "password", "test_password") get_config().set("Connection", "timeout", "200") + get_config().set("Connection", "ssl_insecure", "True") there(configure_connection) tru(hasattr(configure_connection, "__call__")) @@ -116,6 +125,18 @@ def test_configure_connection(): tru(isinstance(c._delegate_connection, MockUpServerConnection)) +def test_configure_connection_bad_url(): + configure_connection(url="https://localhost:8888") + with raises(CaosDBConnectionError) as exc_info: + configure_connection(url="ftp://localhost:8888") + assert exc_info.value.args[0].startswith( + "The connection url is expected to be a http or https url") + with raises(CaosDBConnectionError) as exc_info: + configure_connection(url="localhost:8888") + assert exc_info.value.args[0].startswith( + "The connection url is expected to be a http or https url") + + def test_connection_interface(): with raiz(TypeError) as cm: CaosDBServerConnection() diff --git a/unittests/test_high_level_api.py b/unittests/test_high_level_api.py index a9e55c9c2a79f7ead8bbb3fb652c1b81427e69e9..ea5e635eadaa849480de5f3ece10b813a538a1b0 100644 --- a/unittests/test_high_level_api.py +++ b/unittests/test_high_level_api.py @@ -154,7 +154,7 @@ def test_convert_with_references(): obj = convert_to_python_object(r) assert obj.ref.a == 42 # Parent does not automatically lead to a datatype: - assert obj.get_property_metadata("ref").datatype is "bla" + assert obj.get_property_metadata("ref").datatype == "bla" assert obj.ref.has_parent("bla") is True # Unresolved Reference: @@ -163,7 +163,7 @@ def test_convert_with_references(): obj = convert_to_python_object(r) # Parent does not automatically lead to a datatype: - assert obj.get_property_metadata("ref").datatype is "bla" + assert obj.get_property_metadata("ref").datatype == "bla" assert isinstance(obj.ref, CaosDBPythonUnresolvedReference) assert obj.ref.id == 27 @@ -641,3 +641,14 @@ def test_recursion_advanced(get_record_container): r.resolve_references(r, get_record_container) d = r.serialize(True) assert r == r.sources[0] + + +def test_cyclic_references(): + r1 = db.Record() + r2 = db.Record() + r1.add_property(name="ref_to_two", value=r2) + r2.add_property(name="ref_to_one", value=r1) + + # This would have lead to a recursion error before adding the detection for + # cyclic references: + r = convert_to_python_object(r1) diff --git a/unittests/test_issues.py b/unittests/test_issues.py index 1e649db4f23de67e55301e0a053fba70d14680b4..3fb48416511ba654d6f998442319c4ff29ac2956 100644 --- a/unittests/test_issues.py +++ b/unittests/test_issues.py @@ -34,6 +34,33 @@ def test_issue_100(): # Parse from (invalid) XML file filename = os.path.join(os.path.dirname(__file__), "data", "list_in_value.xml") xml_el = lxml.etree.parse(filename).getroot() - with raises(db.ServerConfigurationException) as exc_info: + with raises(TypeError) as exc_info: db.common.models._parse_single_xml_element(xml_el) - assert "invalid XML: List valued properties" in exc_info.value.msg + assert "Invalid datatype: List valued properties" in str(exc_info.value) + + +def test_issue_156(): + """Does parse_value make a mistake with entities? + + https://gitlab.indiscale.com/caosdb/src/caosdb-pylib/-/issues/156 + """ + project = db.Record(name="foo") + project.add_parent(name="RTName") + # <Record name="foo"> + # <Parent name="RTName"/> + # </Record> + experiment = db.Record() + experiment.add_property(name="RTName", value=project) + # <Record> + # <Property name="RTName" importance="FIX" flag="inheritance:FIX">foo</Property> + # </Record> + value = experiment.get_property("RTName").value + # <Record name="foo"> + # <Parent name="RTName"/> + # </Record> + parents = value.get_parents() + # <ParentList> + # <Parent name="RTName"/> + # </ParentList> + assert value is project + assert parents[0].name == "RTName" diff --git a/unittests/test_message.py b/unittests/test_message.py index 5e1003056c1b606a004b63bb7618e5e0474952bc..440e7169501afb0a35acb78df95cefae01bd9426 100644 --- a/unittests/test_message.py +++ b/unittests/test_message.py @@ -27,11 +27,14 @@ import caosdb as db from copy import deepcopy +import pytest + + def test_messages_dict_behavior(): from caosdb.common.models import Message - from caosdb.common.models import _Messages + from caosdb.common.models import Messages - msgs = _Messages() + msgs = Messages() # create Message msg = Message( @@ -40,12 +43,12 @@ def test_messages_dict_behavior(): description="Greeting the world", body="Hello, world!") - # append it to the _Messages + # append it to the Messages assert repr(msg) == '<HelloWorld code="1" description="Greeting the world">Hello, world!</HelloWorld>\n' msgs.append(msg) assert len(msgs) == 1 - # use _Messages as list of Message objects + # use Messages as list of Message objects for m in msgs: assert isinstance(m, Message) @@ -70,10 +73,6 @@ def test_messages_dict_behavior(): assert msgs["HelloWorld", 2] == ( "Greeting the world in German", "Hallo, Welt!") - msgs["HelloWorld", 2] = "Greeting the world in German", "Huhu, Welt!" - assert len(msgs) == 1 - assert msgs["HelloWorld", 2] == ( - "Greeting the world in German", "Huhu, Welt!") del msgs["HelloWorld", 2] assert msgs.get("HelloWorld", 2) is None @@ -83,11 +82,11 @@ def test_messages_dict_behavior(): def test_deepcopy(): - """Test whether deepcopy of _Messages objects doesn't mess up + """Test whether deepcopy of Messages objects doesn't mess up contained Messages objects. """ - msgs = db.common.models._Messages() + msgs = db.common.models.Messages() msg = db.Message(type="bla", code=1234, description="desc", body="blabla") msgs.append(msg) msg_copy = deepcopy(msgs)[0] @@ -102,7 +101,7 @@ def test_deepcopy(): def test_deepcopy_clear_server(): - msgs = db.common.models._Messages() + msgs = db.common.models.Messages() msg = db.Message(type="bla", code=1234, description="desc", body="blabla") err_msg = db.Message(type="Error", code=1357, description="error") msgs.extend([msg, err_msg]) @@ -116,3 +115,18 @@ def test_deepcopy_clear_server(): copied_msgs.clear_server_messages() assert len(copied_msgs) == 1 assert copied_msgs[0].code == msg.code + + +def test_list_behavior(): + msgs = db.common.models.Messages() + msgs.append(db.Message("test")) + assert len(msgs) == 1 + assert msgs[0] == db.Message("test") + assert msgs[0] != db.Message("test2") + + msgs.append(db.Message("test")) + assert len(msgs) == 2 + assert msgs[0] == msgs[1] + + with pytest.raises(IndexError): + msgs[3] diff --git a/unittests/test_property.py b/unittests/test_property.py index 7c756117765e510587c00d818e39fb3945d44c53..84f89b5a959192d7831e1bb3eab3a441912afe7e 100644 --- a/unittests/test_property.py +++ b/unittests/test_property.py @@ -1,11 +1,11 @@ # -*- encoding: utf-8 -*- # -# ** header v3.0 # This file is a part of the CaosDB Project. # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 - 2023 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2023 Florian Spreckelsen <f.spreckelsen@indiscale.com> # Copyright (C) 2020 Timm Fitschen <t.fitschen@indiscale.com> # # This program is free software: you can redistribute it and/or modify @@ -21,8 +21,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # -# ** end header -# """Tests for the Property class.""" import os @@ -138,3 +136,87 @@ def test_is_reference(): # restore retrieve function with original Entity.retrieve = real_retrieve + + +def test_remove_value_from_property(): + + rec = Record() + names_values_dtypes = [ + ("testListProp1", [1, 2, 3], db.LIST(db.INTEGER)), + ("testListProp2", ["a", "b", "a"], db.LIST(db.TEXT)), + ("testScalarProp1", "bla", db.TEXT), + ("testScalarProp2", False, db.BOOLEAN), + ("testEmptyProp", None, db.REFERENCE), + ("testNoneListProp", [None, None], db.LIST(db.REFERENCE)), + ] + for name, value, dtype in names_values_dtypes: + rec.add_property(name=name, value=value, datatype=dtype) + + # property doesn't exist, so do nothing + returned = rec.remove_value_from_property("nonexisting", "some_value") + assert returned is rec + for name, value, dtype in names_values_dtypes: + assert rec.get_property(name).value == value + assert rec.get_property(name).datatype == dtype + + # value doesn't exist so nothing changes either + rec.remove_value_from_property("testListProp1", 0) + assert rec.get_property("testListProp1").value == [1, 2, 3] + assert rec.get_property("testListProp1").datatype == db.LIST(db.INTEGER) + + returned = rec.remove_value_from_property("testScalarProp2", True) + assert returned is rec + assert rec.get_property("testScalarProp2").value is False + assert rec.get_property("testScalarProp2").datatype == db.BOOLEAN + + # Simple removals from lists without emptying them + rec.remove_value_from_property("testListProp1", 1) + assert rec.get_property("testListProp1").value == [2, 3] + + rec.remove_value_from_property("testListProp1", 2) + assert rec.get_property("testListProp1").value == [3] + + # similarly to Python's `list.remove()`, only remove first occurrance + rec.remove_value_from_property("testListProp2", "a") + assert rec.get_property("testListProp2").value == ["b", "a"] + + # default is to remove an empty property: + rec.remove_value_from_property("testListProp1", 3) + assert rec.get_property("testListProp1") is None + + rec.remove_value_from_property("testScalarProp1", "bla") + assert rec.get_property("testScalarProp1") is None + + # don't remove if `remove_if_empty_afterwards=False` + rec.remove_value_from_property("testListProp2", "b") + rec.remove_value_from_property("testListProp2", "a", remove_if_empty_afterwards=False) + assert rec.get_property("testListProp2") is not None + assert rec.get_property("testListProp2").value is None + assert rec.get_property("testListProp2").datatype == db.LIST(db.TEXT) + + rec.remove_value_from_property("testScalarProp2", False, remove_if_empty_afterwards=False) + assert rec.get_property("testScalarProp2") is not None + assert rec.get_property("testScalarProp2").value is None + assert rec.get_property("testScalarProp2").datatype == db.BOOLEAN + + # Special case of an already empty property: It is not empty because a value + # was removed by `remove_value_from_property` but never had a value in the + # first place. So even `remove_if_empty_afterwards=True` should not lead to + # its removal. + rec.remove_value_from_property("testEmptyProp", 1234, remove_if_empty_afterwards=True) + assert rec.get_property("testEmptyProp") is not None + assert rec.get_property("testEmptyProp").value is None + assert rec.get_property("testEmptyProp").datatype == db.REFERENCE + + # Corner case of corner case: remove with `value=None` and + # `remove_if_empty_afterwards=True` keeps the empty property. + rec.remove_value_from_property("testEmptyProp", None, remove_if_empty_afterwards=True) + assert rec.get_property("testEmptyProp") is not None + assert rec.get_property("testEmptyProp").value is None + assert rec.get_property("testEmptyProp").datatype == db.REFERENCE + + # Remove `None` from list `[None, None]` + rec.remove_value_from_property("testNoneListProp", None, remove_if_empty_afterwards=True) + assert rec.get_property("testNoneListProp") is not None + assert rec.get_property("testNoneListProp").value == [None] + assert rec.get_property("testNoneListProp").datatype == db.LIST(db.REFERENCE) diff --git a/unittests/test_server_side_scripting.py b/unittests/test_server_side_scripting.py index 1fb24d7e40bb843391a971c5f69680b541e1de0e..b699c4482d02972282167eb9683a956097ebc5e9 100644 --- a/unittests/test_server_side_scripting.py +++ b/unittests/test_server_side_scripting.py @@ -46,7 +46,7 @@ def setup_module(): content_type = kwargs["headers"]["Content-Type"] if content_type.startswith("multipart/form-data; boundary"): - parts = kwargs["body"] + parts = kwargs["body"].multipart_yielder stdout = [] for part in parts: if hasattr(part, "decode"):