Merge branch 'dev' into f-grpc

0c411c15 · Timm Fitschen · 1167f351 · eefbef6d · 0c411c15 · 0c411c15
Verified Commit 0c411c15 authored Jun 2, 2021 by Timm Fitschen
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -24,7 +24,7 @@

 variables:
  DEPLOY_REF: dev
-  CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/caosdb-server/caosdb-server-testenv:latest
+  CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-server/caosdb-server-testenv:latest

 image: $CI_REGISTRY_IMAGE
 stages:
@@ -42,14 +42,11 @@ build-testenv:
    - schedules
  script:
    - cd src/test/docker
-    - time docker load < /image-cache/caosdb-server-testenv.tar || true
    - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
      # use here general latest or specific branch latest...
    - docker build
      --pull
      -t $CI_REGISTRY_IMAGE .
-    - docker save $CI_REGISTRY_IMAGE > image.tar;
-          mv image.tar /image-cache/caosdb-server-testenv.tar;
    - docker push $CI_REGISTRY_IMAGE

 # Test: run unit tests of the server
@@ -70,7 +67,7 @@ trigger_build:
  stage: deploy
  script:
    - /usr/bin/curl -X POST
-      -F token=$DEPLOY_TRIGGER_TOKEN
+      -F token=$CI_JOB_TOKEN
      -F "variables[F_BRANCH]=$CI_COMMIT_REF_NAME"
      -F "variables[SERVER]=$CI_COMMIT_REF_NAME"
      -F "variables[TriggerdBy]=SERVER"
@@ -78,15 +75,17 @@ trigger_build:
      -F ref=$DEPLOY_REF https://gitlab.indiscale.com/api/v4/projects/14/trigger/pipeline

 # Build the sphinx documentation and make it ready for deployment by Gitlab Pages
-# documentation:
-#   stage: deploy
-
 # Special job for serving a static website. See https://docs.gitlab.com/ee/ci/yaml/README.html#pages
 pages:
  tags: [ cached-dind ]
  stage: deploy
  only:
-    - dev
+    refs:
+      - /^release-.*$/i
+      - master
+    variables:
+      # run pages only on gitlab.com
+      - $CI_SERVER_HOST == "gitlab.com"
  script:
    - echo "Deploying"
    - make doc

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+* New EntityState plug-in. The plug-in disabled by default and can be enabled
+  by setting the server property `EXT_ENTITY_STATE=ENABLED`. See
+  [!62](https://gitlab.com/caosdb/caosdb-server/-/merge_requests/62) for more
+  information.
+* `ETag` property for the query. The `ETag` is assigned to the query cache
+  each time the cache is cleared (currently whenever the server state is being
+  updated, i.e. the stored entities change).
+  This can be used to debug the query cache and also allows a client
+  to determine whether the server's state has changed between queries.
 * Basic caching for queries. The caching is enabled by default and can be
  controlled by the usual "cache" flag.

@@ -20,6 +29,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed

+* #130 - Error during `FIND ENTITY` when
+  `QUERY_FILTER_ENTITIES_WITHOUT_RETRIEVE_PERMISSIONS=False`.
+* #125 - `bend_symlinks` script did not allow whitespace in filename.
+* #122 - Dead-lock due to error in the DatabaseAccessManager.
+* #120 - Editing entities that were created with a no longer existing user
+  leads to a server error.
+* #31 - Queries with keywords in the path (e.g. `... STORED AT 0in.txt`)
+* #116 - Queries `FIND [ANY VERSION OF] *` and `FIND [ANY VERSION OF] ENTITY`.
+
 ### Security

 ## [0.3.0] - 2021-02-10
@@ -40,6 +58,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Changed

+* Server can be started without TLS even when not in debug mode.
 * Select queries would originally only select the returned properties by their
  names and would not check if a property is a subtype of a selected property. This
  has changed now and select queries will also return subtypes of selected

--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@
 #

 CAOSDB_SERVER_VERSION ?= $(shell mvn org.apache.maven.plugins:maven-help-plugin:3.1.0:evaluate -Dexpression=project.version -q -DforceStdout)
+CAOSDB_COMMAND_LINE_OPTIONS ?=
 SHELL:=/bin/bash
 JPDA_PORT ?= 9000
 JMX_PORT ?= 9090
@@ -41,13 +42,14 @@ run: compile
 	mvn exec:java@run

 run-debug: jar
-	java -Xrunjdwp:transport=dt_socket,address=0.0.0.0:$(JPDA_PORT),server=y,suspend=n -Dcaosdb.debug=true -jar target/caosdb-server.jar
+	java -Xrunjdwp:transport=dt_socket,address=0.0.0.0:$(JPDA_PORT),server=y,suspend=n -Dcaosdb.debug=true -jar target/caosdb-server.jar $(CAOSDB_COMMAND_LINE_OPTIONS)
+

 run-debug-single:
-	java -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=$(JMX_PORT) -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Xrunjdwp:transport=dt_socket,address=0.0.0.0:$(JPDA_PORT),server=y,suspend=n -Dcaosdb.debug=true -jar target/caosdb-server.jar
+	java -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=$(JMX_PORT) -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Xrunjdwp:transport=dt_socket,address=0.0.0.0:$(JPDA_PORT),server=y,suspend=n -Dcaosdb.debug=true -jar target/caosdb-server.jar $(CAOSDB_COMMAND_LINE_OPTIONS)

 run-single:
-	java -jar target/caosdb-server.jar
+	java -jar target/caosdb-server.jar $(CAOSDB_COMMAND_LINE_OPTIONS)

 formatting:
 	mvn fmt:format
@@ -64,7 +66,7 @@ antlr:
 	mvn antlr4:antlr4

 test: print-version easy-units
-	MAVEN_DEBUG_OPTS="-Xdebug -Xnoagent -Djava.compiler=NONE -Dcaosdb.debug=true -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=0.0.0.0:9000"
+	MAVEN_DEBUG_OPTS="-Xdebug -Xnoagent -Djava.compiler=NONE -Dcaosdb.debug=true -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=0.0.0.0:$(JPDA_PORT)"
 	mvn test -X

 test_misc:

--- a/README.md
+++ b/README.md
-<!--THIS FILE HAS BEEN GENERATED BY A SCRIPT. PLEASE DON'T CHANGE IT MANUALLY.-->

-# Welcome
+# README

-This is the **CaosDB Server** repository and a part of the CaosDB project.
+## Welcome

-# Setup
+This is the **CaosDB Java Server** repository and a part of the
+CaosDB project.
+
+## Setup

 Please read the [README_SETUP.md](README_SETUP.md) for instructions on how to
 setup this code.


-# Further Reading
+## Further Reading
+
+Please refer to the [official documentation](https://docs.indiscale.com/caosdb-server/) for more information.
+
+## Contributing
+
+Thank you very much to all contributers—[past, present](https://gitlab.com/caosdb/caosdb/-/blob/dev/HUMANS.md), and prospective ones.

-Please refer to the [official gitlab repository of the CaosDB
-project](https://gitlab.com/caosdb/caosdb) for more information.
+### Code of Conduct

-# License
+By participating, you are expected to uphold our [Code of Conduct](https://gitlab.com/caosdb/caosdb/-/blob/dev/CODE_OF_CONDUCT.md).

-Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute for
-Dynamics and Self-Organization Göttingen.
+### How to Contribute
+
+* You found a bug, have a question, or want to request a feature? Please
+[create an issue](https://gitlab.com/caosdb/caosdb-server/-/issues).
+* You want to contribute code? Please fork the repository and create a merge
+request in GitLab and choose this repository as target. Make sure to select
+"Allow commits from members who can merge the target branch" under Contribution
+when creating the merge request. This allows our team to work with you on your request.
+- If you have a suggestion for the [documentation](https://docs.indiscale.com/caosdb-server/),
+the preferred way is also a merge request as describe above (the documentation resides in `src/doc`).
+However, you can also create an issue for it.
+- You can also contact us at **info (AT) caosdb.de**.
+
+## License
+
+* Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute
+  for Dynamics and Self-Organization Göttingen.
+* Copyright (C) 2020-2021 Indiscale GmbH <info@indiscale.com>

 All files in this repository are licensed under a [GNU Affero General Public
 License](LICENCE.md) (version 3 or later).
-
--- a/README_SETUP.md
+++ b/README_SETUP.md
@@ -79,7 +79,7 @@ server:
     Replace `localhost` by your host name, if you want.
   - `keytool -importkeystore -srckeystore caosdb.jks -destkeystore caosdb.p12 -deststoretype PKCS12 -srcalias selfsigned`
   - Export the public part only: `openssl pkcs12 -in caosdb.p12 -nokeys -out cert.pem`.
-	 The resulting ``cert.pem` can safely be given to users to allow ssl verification.
+	 The resulting `cert.pem` can safely be given to users to allow ssl verification.
   - You can check the content of the certificate with `openssl x509 -in cert.pem -text`

   Alternatively, you can create a keystore from certificate files that you already have:

--- a/RELEASE_GUIDELINES.md
+++ b/RELEASE_GUIDELINES.md
@@ -21,13 +21,13 @@ guidelines of the CaosDB Project
 3. Update the version property in [pom.xml](./pom.xml) (probably this means to
   remove the `-SNAPSHOT`) and in `src/doc/conf.py`.

-4. Merge the release branch into the master branch.
+4. Merge the release branch into the main branch.

-5. Tag the latest commit of the master branch with `v<VERSION>`.
+5. Tag the latest commit of the main branch with `v<VERSION>`.

 6. Delete the release branch.

-7. Merge the master branch back into the dev branch.
+7. Merge the main branch back into the dev branch.

 8. Update the version property in [pom.xml](./pom.xml) for the next
   developlement round (with a `-SNAPSHOT` suffix).
--- a/caosdb-webui @ 5dfe8797
+++ b/caosdb-webui @ 5dfe8797
-Subproject commit 8c59cc861d646cbdba0ec749ba052656f67fd58d
+Subproject commit 5dfe879722bd01acc5209c581b60bf0ac49635b6
--- a/conf/core/server.conf
+++ b/conf/core/server.conf
@@ -75,7 +75,7 @@ MYSQL_SCHEMA_VERSION=v4.0.0
 # Server options
 # --------------------------------------------------
 # The context root is a prefix which allows running multiple instances of CaosDB using the same
-# hostname and port.
+# hostname and port. Must start with "/".
 CONTEXT_ROOT=
 # HTTPS port of this server instance.
 SERVER_PORT_HTTPS=443
@@ -188,3 +188,11 @@ GLOBAL_ENTITY_PERMISSIONS_FILE=./conf/core/global_entity_permissions.xml

 # If set to true, versioning of entities' history is enabled.
 ENTITY_VERSIONING_ENABLED=true
+
+
+# --------------------------------------------------
+# Extension settings
+# --------------------------------------------------
+
+# Enabling the state machine extension
+# EXT_STATE_ENTITY=ENABLE
--- a/doc/devel/Benchmarking.md
+++ b/doc/devel/Benchmarking.md
-# Profiling #

-If the server is started with the `run-debug-single` make target, it will expose
-the JMX interface, by default on port 9090.  Using a profiler such as VisualVM,
-one can then connect to the CaosDB server and profile execution times.

-## Example settings for VisualVM ##
+# Benchmarking CaosDB #

-In the sampler settings, you may want to add these expressions to the blocked
-packages: `org.restlet.**, com.mysql.**`.  Branches on the call tree which are
-entirely inside the blacklist, will become leaves.  Alternatively, specify a
-whitelist, for example with `org.caosdb.server.database.backend.implementation.**`,
-if you only want to see the time spent for certain MySQL calls.
+Benchmarking CaosDB may encompass several distinct areas: How much time is spent in the server's
+Java code, how much time is spent inside the SQL backend, are the same costly methods called more
+than once?  This documentation tries to answer some questions connected with these benchmarking
+aspects and give you the tools to answer your own questions.
+
+
+## Before you start ##
+In order to obtain meaningful results, you should disable caching.
+
+### MariaDB
+Set the corresponding variable to 0: `SET GLOBAL query_cache_type = 0;`
+
+### Java Server
+In the config:
+```conf
+CACHE_DISABLE=true
+```
+
+
+## Tools for the benchmarking ##
+
+For averaging over many runs of comparable requests and for putting the database into a
+representative state, Python scripts are used.  The scripts can be found in the `caosdb-dev-tools`
+repository, located at [https://gitlab.indiscale.com/caosdb/src/caosdb-dev-tools](https://gitlab.indiscale.com/caosdb/src/caosdb-dev-tools) in the folder
+`benchmarking`:
+
+### Python Script `fill_database.py` ###
+
+This commandline script is meant for filling the database with enough data to represeny an actual
+real-life case, it can easily create hundreds of thousands of Entities.
+
+The script inserts predefined amounts of randomized Entities into the database, RecordTypes,
+Properties and Records.  Each Record has a random (but with defined average) number of Properties,
+some of which may be references to other Records which have been inserted before.  Actual insertion
+of the Entities into CaosDB is done in chunks of a defined size.
+
+Users can tell the script to store times needed for the insertion of each chunk into a tsv file.
+
+### Python Script  `measure_execution_time.py` ###
+
+A somewhat outdated script which executes a given query a number of times and then save statistics
+about the `TransactionBenchmark` readings (see below for more information about the transaction
+benchmarks) delivered by the server.
+
+
+### Python Script  `sql_routine_measurement.py` 
+
+
+
+Simply call `./sql_routine_measurement.py` in the scripts directory. An sql
+file is automatically executed which enables the correct `performance_schema`
+tables. However, the performance_schema of mariadb needs to be enabled. Add
+`performance_schema=ON` to the configuration file of mariadb as it needs to be
+enabled on start up.
+This script expects the MariaDB server to be accessible on 127.0.0.1 with the default caosdb user
+and password (caosdb;random1234).
+
+
+The performance schema must be enabled (see below).
+
+### MariaDB General Query Log ###
+
+MariaDB and MySQL have a feature to enable the logging of SQL queries' times.  This logging must be
+turned on on the SQL server as described in the [upstream documentation](https://mariadb.com/kb/en/general-query-log/):
+Add to the mysql configuration:
+```
+log_output=TABLE
+general_log
+```
+or calling
+```sql
+SET GLOBAL log_output = 'TABLE';
+SET GLOBAL general_log = 'ON';
+```
+
+In the Docker environment LinkAhead, this can conveniently be 
+done with `linkahead mysqllog {on,off,store}`.
+
+### MariaDB Slow Query Log ###
+See [slow query log docs](https://mariadb.com/kb/en/slow-query-log-overview/)
+
+### MariaDB Performance Schema ###
+The most detailed information on execution times can be acquired using the performance schema.
+
+To use it, the `performance_schema` setting in the MariaDB server must be enabled([docs](https://mariadb.com/kb/en/performance-schema-overview/#enabling-the-performance-schema), for example by setting
+this in the config files:
+```
+[mysqld]
+
+performance_schema=ON
+```
+
+The performance schema provides many different tables in the `performance_schema`. You can instruct MariaDB to create
+those tables by setting the appropriate `instrument` and `consumer` variables. E.g. 
+```SQL
+update performance_schema.setup_instruments set enabled='YES', timed='YES' WHERE NAME LIKE '%statement%';
+update performance_schema.setup_consumers set enabled='YES' WHERE NAME LIKE '%statement%';
+```
+This can also be done via the configuration. 
+```
+[mysqld]
+
+performance_schema=ON
+performance-schema-instrument='statement/%=ON'
+performance-schema-consumer-events-statements-history=ON                        
+performance-schema-consumer-events-statements-history-long=ON
+```
+You may want to look at the result of the following commands:
+```sql
+
+select * from performance_schema.setup_consumers;
+select * from performance_schema.setup_instruments;
+```

-# Manual Java-side benchmarking #
+Note, that the `base_settings.sql` enables appropriate instruments and consumers.
+
+Before you start a measurement, you will want to empty the tables. E.g.:
+```sql
+truncate table  performance_schema.events_statements_history_long ;
+```
+The procedure `reset_stats` in `base_settings.sql` clears the typically used ones.
+
+The tables contain many columns. An example to get an informative view is
+```sql
+select left(sql_text,50), left(digest_text,50), ms(timer_wait) from performance_schema.events_statements_history_long order by ms(timer_wait);
+```
+where the function `ms` is defined in `base_settings.sql`.
+Or a very useful one:
+```sql
+select  left(digest_text,100) as digest,ms(sum_timer_wait) as time_ms, count_star from performance_schema.events_statements_summary_by_digest order by time_ms;
+```
+
+### Useful SQL configuration with docker
+In order to allow easy testing and debugging the following is useful when using docker.
+Change the docker-compose file to include the following for the mariadb service:
+```
+    networks:
+      # available on port 3306, host name 'sqldb'
+      - caosnet
+    ports:
+      - 3306:3306
+```
+Check it with `mysql -ucaosdb -prandom1234 -h127.0.0.1 caosdb`
+Add the appropriate changes (e.g. `performance_schema=ON`) to `profiles/empty/custom/mariadb.conf.d/mariadb.cnf` (or in the profile folder that you use).
+
+### Manual Java-side benchmarking #

 Benchmarking can be done using the `TransactionBenchmark` class (in package
 `org.caosdb.server.database.misc`).
@@ -26,9 +161,95 @@ Benchmarking can be done using the `TransactionBenchmark` class (in package
  - `Container.getTransactionBenchmark().addBenchmark()`
  - `Query.addBenchmark()`

-# Miscellaneous notes #

-Notes to self, details, etc.
+To enable transaction benchmarks and disable caching in the server, set these
+server settings:
+```conf
+TRANSACTION_BENCHMARK_ENABLED=true
+CACHE_DISABLE=true
+```
+Additionally, the server should be started via `make run-debug` (instead of
+`make run-single`), otherwise the benchmarking will not be active.
+
+#### Notable benchmarks and where to find them ##
+
+| Name                                 | Where measured                               | What measured                 |
+|--------------------------------------|----------------------------------------------|-------------------------------|
+| `Retrieve.init`                      | transaction/Transaction.java#135             | transaction/Retrieve.java#48  |
+| `Retrieve.transaction`               | transaction/Transaction.java#174             | transaction/Retrieve.java#133 |
+| `Retrieve.post_transaction`          | transaction/Transaction.java#182             | transaction/Retrieve.java#77  |
+| `EntityResource.httpGetInChildClass` | resource/transaction/EntityResource.java#118 | all except XML generation     |
+| `ExecuteQuery`                       | ?                                            | ?                             |
+|                                      |                                              |                               |
+
+### External JVM profilers ###
+
+Additionally to the transaction benchmarks, it is possible to benchmark the server execution via
+external Java profilers.  For example, [VisualVM](https://visualvm.github.io/) can connect to JVMs running locally or remotely
+(e.g. in a Docker container).  To enable this in LinkAhead's Docker environment, set
+
+```yaml
+devel:
+  profiler: true
+```
+Alternatively, start the server (without docker) with the `run-debug-single` make target, it will expose
+the JMX interface, by default on port 9090.
+
+Most profilers, like as VisualVM, only gather cumulative data for call trees, they do not provide
+complete call graphs (as callgrind/kcachegrind would do).  They also do not differentiate between
+calls with different query strings, as long as the Java process flow is the same (for example, `FIND
+Record 1234` and `FIND Record A WHICH HAS A Property B WHICH HAS A Property C>100` would be handled
+equally).
+
+
+#### Example settings for VisualVM 
+
+In the sampler settings, you may want to add these expressions to the blocked
+packages: `org.restlet.**, com.mysql.**`.  Branches on the call tree which are
+entirely inside the blacklist, will become leaves.  Alternatively, specify a
+whitelist, for example with `org.caosdb.server.database.backend.implementation.**`,
+if you only want to see the time spent for certain MySQL calls.
+
+
+## How to set up a representative database ##
+For reproducible results, it makes sense to start off with an empty database and fill it using the
+`fill_database.py` script, for example like this:
+
+```sh
+./fill_database.py -t 500 -p 700 -r 10000 -s 100 --clean
+```
+
+The `--clean` argument is not strictly necessary when the database was empty before, but it may make
+sense when there have been previous runs of the command.  This example would create 500 RecordTypes,
+700 Properties and 10000 Records with randomized properties, everything is inserted in chunks of 100
+Entities.
+
+## How to measure request times ##
+
+If the execution of the Java components is of interest, the VisualVM profiler should be started and
+connected to the server before any requests to the server are started.
+
+When doing performance tests which are used for detailed analysis, it is important that
+
+1. CaosDB is in a reproducible state, which should be documented
+2. all measurements are repeated several times to account for inevitable variance in access (for
+   example file system caching, network variablity etc.)
+
+### Filling the database ###
+
+By simply adding the option `-T logfile.tsv` to the `fill_database.py` command above, the times for
+inserting the records are stored in a tsv file and can be analyzed later.
+
+### Obtain statistics about a query ###
+
+To repeat single queries a number of times, `measure_execution_time.py` can be used, for example:
+
+```sh
+./measure_execution_time.py -n 120 -q "FIND MusicalInstrument WHICH IS REFERENCED BY Analysis"
+```
+
+This command executes the query 120 times, additional arguments could even plot the
+TransactionBenchmark results directly.

 ## On method calling order and benchmarked events ##

@@ -56,29 +277,37 @@ Notes to self, details, etc.
  - Executing the SQL statement
  - Java-side caching

-## Server settings ##
+## What is measured ##

- To enable the SQL general logs, log into the SQL server and do:
-  ```sql
-SET GLOBAL log_output = 'TABLE';
-SET GLOBAL general_log = 'ON';
-```
- To enable transaction benchmarks and disable caching in the server, set these
-  server settings:
-```conf
-TRANSACTION_BENCHMARK_ENABLED=true
-CACHE_DISABLE=true
-```
- Additionally, the server should be started via `make run-debug` (instead of
-  `make run-single`), otherwise the benchmarking will not be active.
+For a consistent interpretation, the exact definitions of the measured times are as follows:

-## Notable benchmarks and where to find them ##
+### SQL logs ###

-| Name                                 | Where measured                               | What measured                 |
-|--------------------------------------|----------------------------------------------|-------------------------------|
-| `Retrieve.init`                      | transaction/Transaction.java#135             | transaction/Retrieve.java#48  |
-| `Retrieve.transaction`               | transaction/Transaction.java#174             | transaction/Retrieve.java#133 |
-| `Retrieve.post_transaction`          | transaction/Transaction.java#182             | transaction/Retrieve.java#77  |
-| `EntityResource.httpGetInChildClass` | resource/transaction/EntityResource.java#118 | all except XML generation     |
-| `ExecuteQuery`                       | ?                                            | ?                             |
-|                                      |                                              |                               |
+As per https://mariadb.com/kb/en/general-query-log, the logs store only the time at which the SQL
+server received a query, not the duration of the query.
+
+#### Possible future enhancements ####
+
+- The `query_response_time` plugin may be additionally used in the future, see
+  https://mariadb.com/kb/en/query-response-time-plugin
+
+### Transaction benchmarks ###
+
+Transaction benchmarking manually collects timing information for each transaction.  At defined
+points, different measurements can be made, accumulated and will finally be returned to the client.
+Benchmark objects may consist of sub benchmarks and have a number of measurement objects, which
+contain the actual statistics.
+
+Because transaction benchmarks must be manually added to the server code, they only monitor those
+code paths where they are added.  On the other hand, their manual nature allows for a more
+abstracted analysis of performance bottlenecks.
+
+### Java profiler ###
+
+VisualVM records for each thread the call tree, specifically which methods were called how often and
+how much time was spent inside these methods.
+
+### Global requests ###
+
+Python scripts may measure the global time needed for the execution of each request.
+`fill_database.py` obtains its numbers this way.
--- a/misc/bend_symlinks/src/main.sh
+++ b/misc/bend_symlinks/src/main.sh
@@ -66,8 +66,10 @@ if [ $IS_MOVE -eq 1 ] ; then
    REPLACEMENT=$(new_dir "$REPLACEMENT")
 fi

+
 set -o noglob
-for syml in $(find -P $(realpath $FILE_SYSTEM_ROOT) -type l) ; do
+find -P $(realpath $FILE_SYSTEM_ROOT) -type l -print0 |
+        while ISF= read -r -d '' syml; do
  OLD_TARGET=$(realpath -m "$syml" | sed -n -r "/$REGEX_OLD/p")
  if [ -z "$OLD_TARGET" ] ; then
    # filter non matching

--- a/misc/bend_symlinks/src/utils.sh
+++ b/misc/bend_symlinks/src/utils.sh
@@ -37,6 +37,8 @@ function escape_simple_path () {
    SPATH=$(echo "$SPATH" | sed -r "s/\(/\\\\(/g")
    # {
    SPATH=$(echo "$SPATH" | sed -r "s/\{/\\\\{/g")
+    # white space
+    SPATH=$(echo "$SPATH" | sed -r "s/ /\\ /g")
    echo "$SPATH"
 }


--- a/misc/bend_symlinks/test/test_suite.sh
+++ b/misc/bend_symlinks/test/test_suite.sh
@@ -23,8 +23,11 @@ tearDown () {

 _make_test_file () {
    touch "$DATA_DIR/$1"
-    ln -s $(realpath "$DATA_DIR/$1") "$FILE_SYSTEM_ROOT/$1"
-    assertEquals "initial target $1" $(realpath "$FILE_SYSTEM_ROOT/$1") $(realpath "$DATA_DIR/$1")
+    TARGET=$(realpath "$DATA_DIR/$1")
+    LINK=$FILE_SYSTEM_ROOT/$1
+    ln -s "$TARGET" "$LINK"
+    LINKED=$(realpath "$LINK")
+    assertEquals "initial target $1" "$LINKED" "$TARGET"
 }

 _break_link_move_file () {
@@ -35,7 +38,8 @@ _break_link_move_file () {
    NEW_PATH_REAL=$(realpath "$NEW_PATH")
    LINK="$FILE_SYSTEM_ROOT/$1"
    mv "$OLD_PATH_REAL" "$NEW_PATH_REAL"
-    assertEquals "still target $OLD_PATH_REAL" $(realpath "$LINK") "$OLD_PATH_REAL"
+    LINKED=$(realpath "$LINK")
+    assertEquals "still target $OLD_PATH_REAL" "$LINKED" "$OLD_PATH_REAL"
    assertFalse "$LINK link is broken" "[ -f '$LINK' ]"
    assertFalse "$OLD_PATH_REAL was moved" "[ -f '$OLD_PATH_REAL' ]"
    assertTrue "$NEW_PATH_REAL is there" "[ -f '$NEW_PATH_REAL' ]"
@@ -51,7 +55,7 @@ assertLinkOk () {
    LINK=$(realpath "$FILE_SYSTEM_ROOT/$1")
    TARGET=$(realpath "$DATA_DIR/$2")
    assertTrue "target exists $LINK" "[ -f '$LINK' ]"
-    assertEquals "target matches $TARGET" $TARGET "$LINK"
+    assertEquals "target matches $TARGET" "$TARGET" "$LINK"
    set +o noglob
 }

@@ -135,6 +139,7 @@ testFullPathWithStrangeChars () {
    _testFullPathWithStrageChars "{"
    _testFullPathWithStrageChars "]"
    _testFullPathWithStrageChars "[.]"
+    _testFullPathWithStrageChars " "
 }

 testRegex () {

--- a/src/doc/Permissions.rst
+++ b/src/doc/Permissions.rst
@@ -28,7 +28,7 @@ A Permission Rule consists of:

 -  A type: Permission Rules can be of ``Grant`` or ``Deny`` type, either
   granting or denying specific permissions.
-  A `role <manuals/general/roles>`__ (or user): For which users the
+-  A :doc:`role <roles>` (or user): For which users the
   permission shall be granted or denied.
 -  A permission action: Which action shall be permitted or forbidden,
   for example all retrieval, or modifications of a specific entity.

--- a/src/doc/administration/server_side_scripting.rst
+++ b/src/doc/administration/server_side_scripting.rst
@@ -50,7 +50,7 @@ script invocation from a skeleton directory, located in the server directory, in
 - `readme.md` :: A small text file describing the purpose of the directory.

 Users of CaosDB are invited to populate the directory with whatever their
-scripts need.
+scripts need (for example a `.pycaosdb.ini` file).

 Invocation
 ------------

--- a/src/doc/conf.py
+++ b/src/doc/conf.py
@@ -55,8 +55,8 @@ templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ['.rst', '.md']
+# source_suffix = '.rst'

 # The master toctree document.
 master_doc = 'index'

--- a/src/doc/development/benchmarking.md
+++ b/src/doc/development/benchmarking.md
-
-# Benchmarking CaosDB #
-
-Benchmarking CaosDB may encompass several distinct areas: How much time is spent in the server's
-Java code, how much time is spent inside the SQL backend, are the same costly methods clalled more
-than once?  This documentation tries to answer some questions connected with these benchmarking
-aspects and give you the tools to answer your own questions.
-
-## Tools for the benchmarking ##
-
-For averaging over many runs of comparable requests and for putting the database into a
-representative state, Python scripts are used.  The scripts can be found in the `caosdb-dev-tools`
-repository, located at [https://gitlab.indiscale.com/caosdb/src/caosdb-dev-tools](https://gitlab.indiscale.com/caosdb/src/caosdb-dev-tools) in the folder
-`benchmarking`:
-
-### `fill_database.py` ###
-
-This commandline script is meant for filling the database with enough data to represeny an actual
-real-life case, it can easily create hundreds of thousands of Entities.
-
-The script inserts predefined amounts of randomized Entities into the database, RecordTypes,
-Properties and Records.  Each Record has a random (but with defined average) number of Properties,
-some of which may be references to other Records which have been inserted before.  Actual insertion
-of the Entities into CaosDB is done in chunks of a defined size.
-
-Users can tell the script to store times needed for the insertion of each chunk into a tsv file.
-
-### `measure_execution_time.py` ###
-
-A somewhat outdated script which executes a given query a number of times and then save statistics
-about the `TransactionBenchmark` readings (see below for more information about the transaction
-benchmarks) delivered by the server.
-
-### Benchmarking SQL commands ###
-
-MariaDB and MySQL have a feature to enable the logging of SQL queries' times.  This logging must be
-turned on on the SQL server as described in the [upstream documentation](https://mariadb.com/kb/en/general-query-log/).  For the Docker
-environment LinkAhead, this can conveniently be done with `linkahead mysqllog {on,off,store}`.
-
-### External JVM profilers ###
-
-Additionally to the transaction benchmarks, it is possible to benchmark the server execution via
-external Java profilers.  For example, [VisualVM](https://visualvm.github.io/) can connect to JVMs running locally or remotely
-(e.g. in a Docker container).  To enable this in LinkAhead's Docker environment, set
-
-```yaml
-devel:
-  profiler: true
-```
-
-Most profilers, like as VisualVM, only gather cumulative data for call trees, they do not provide
-complete call graphs (as callgrind/kcachegrind would do).  They also do not differentiate between
-calls with different query strings, as long as the Java process flow is the same (for example, `FIND
-Record 1234` and `FIND Record A WHICH HAS A Property B WHICH HAS A Property C>100` would be handled
-equally).
-
-## How to set up a representative database ##
-For reproducible results, it makes sense to start off with an empty database and fill it using the
-`fill_database.py` script, for example like this:
-
-```sh
-./fill_database.py -t 500 -p 700 -r 10000 -s 100 --clean
-```
-
-The `--clean` argument is not strictly necessary when the database was empty before, but it may make
-sense when there have been previous runs of the command.  This example would create 500 RecordTypes,
-700 Properties and 10000 Records with randomized properties, everything is inserted in chunks of 100
-Entities.
-
-## How to measure request times ##
-
-If the execution of the Java components is of interest, the VisualVM profiler should be started and
-connected to the server before any requests to the server are started.
-
-When doing performance tests which are used for detailed analysis, it is important that
-
-1. CaosDB is in a reproducible state, which should be documented
-2. all measurements are repeated several times to account for inevitable variance in access (for
-   example file system caching, network variablity etc.)
-
-### Filling the database ###
-
-By simply adding the option `-T logfile.tsv` to the `fill_database.py` command above, the times for
-inserting the records are stored in a tsv file and can be analyzed later.
-
-### Obtain statistics about a query ###
-
-To repeat single queries a number of times, `measure_execution_time.py` can be used, for example:
-
-```sh
-./measure_execution_time.py -n 120 -q "FIND MusicalInstrument WHICH IS REFERENCED BY Analysis"
-```
-
-This command executes the query 120 times, additional arguments could even plot the
-TransactionBenchmark results directly.
-
-## What is measured ##
-
-For a consistent interpretation, the exact definitions of the measured times are as follows:
-
-### SQL logs ###
-
-As per https://mariadb.com/kb/en/general-query-log, the logs store only the time at which the SQL
-server received a query, not the duration of the query.
-
-#### Possible future enhancements ####
-
- The `query_response_time` plugin may be additionally used in the future, see
-  https://mariadb.com/kb/en/query-response-time-plugin
-
-### Transaction benchmarks ###
-
-Transaction benchmarking manually collects timing information for each transaction.  At defined
-points, different measurements can be made, accumulated and will finally be returned to the client.
-Benchmark objects may consist of sub benchmarks and have a number of measurement objects, which
-contain the actual statistics.
-
-Because transaction benchmarks must be manually added to the server code, they only monitor those
-code paths where they are added.  On the other hand, their manual nature allows for a more
-abstracted analysis of performance bottlenecks.
-
-### Java profiler ###
-
-VisualVM records for each thread the call tree, specifically which methods were called how often and
-how much time was spent inside these methods.
-
-### Global requests ###
-
-Python scripts may measure the global time needed for the execution of each request.
-`fill_database.py` obtains its numbers this way.
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -27,5 +27,3 @@ Indices and tables
 ==================

 * :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
--- a/src/doc/roles.md
+++ b/src/doc/roles.md
@@ -10,7 +10,7 @@ users may have the same role, and there may be roles without any users.

 The user and their roles are always returned by the server in answers to requests
 and can thus be interpreted and used by clients.  The most important use though
-is [permission](manuals/general/permissions) checking in the server: Access and
+is [permission](permissions) checking in the server: Access and
 modification of
 entities can be controlled via roles, so that users of a given role are allowed
 or denied certain actions.  Incidentally, the permission to edit the permissions
@@ -32,4 +32,4 @@ There are some special roles, which are automatically assigned to users:

 Except for the `anonymous` role, these special roles are not returned by the
 server, but can nevertheless be used to define
-[permissions](manuals/general/permissions).
+[permissions](permissions.rst).
--- a/src/main/java/org/caosdb/server/CaosDBServer.java
+++ b/src/main/java/org/caosdb/server/CaosDBServer.java
@@ -118,7 +118,7 @@ public class CaosDBServer extends Application {
  private static ArrayList<Runnable> postShutdownHooks = new ArrayList<Runnable>();
  private static ArrayList<Runnable> preShutdownHooks = new ArrayList<Runnable>();
  private static boolean START_BACKEND = true;
-  private static boolean INSECURE = false;
+  private static boolean NO_TLS = false;
  public static final String REQUEST_TIME_LOGGER = "REQUEST_TIME_LOGGER";
  public static final String REQUEST_ERRORS_LOGGER = "REQUEST_ERRORS_LOGGER";
  private static Scheduler SCHEDULER;
@@ -161,24 +161,23 @@ public class CaosDBServer extends Application {
   * Parse the command line arguments.
   *
   * <ul>
-   *   <li>"nobackend": flag to run caosdb without any backend (for testing purposes)
-   *   <li>"insecure": flag to start only a http server (no https server)
+   *   <li>"--no-backend": flag to run caosdb without any backend (for testing purposes)
+   *   <li>"--no-tls": flag to start only a http server (no https server)
   * </ul>
   *
-   * <p>Both flags are only available in the debug mode which is controlled by the `caosdb.debug`
-   * JVM Property.
+   * <p>The --no-backend flag is only available in the debug mode which is controlled by the
+   * `caosdb.debug` JVM Property.
   *
   * @param args
   */
  private static void parseArguments(final String[] args) {
    for (final String s : args) {
-      if (s.equals("nobackend")) {
+      if (s.equals("--no-backend")) {
        START_BACKEND = false;
-      } else if (s.equals("insecure")) {
-        INSECURE = true;
+      } else if (s.equals("--no-tls")) {
+        NO_TLS = true;
      }
    }
-    INSECURE = INSECURE && isDebugMode(); // only allow insecure in debug mode
    START_BACKEND = START_BACKEND || !isDebugMode(); // always start backend if not in debug mode
  }

@@ -348,7 +347,7 @@ public class CaosDBServer extends Application {
    final int maxTotalConnections =
        Integer.parseInt(getServerProperty(ServerProperties.KEY_MAX_CONNECTIONS));

-    if (INSECURE) {
+    if (NO_TLS) {
      runHTTPServer(port_http, initialConnections, maxTotalConnections);
    } else {
      runHTTPSServer(

--- a/src/main/java/org/caosdb/server/accessControl/UserSources.java
+++ b/src/main/java/org/caosdb/server/accessControl/UserSources.java
@@ -79,8 +79,18 @@ public class UserSources extends HashMap<String, UserSource> {

  private static UserSources instance = new UserSources();

+  /**
+   * Check whether a user exists.
+   *
+   * @param principal - principal of the user.
+   * @return true iff the user identified by the given {@link Principal} exists.
+   */
  public static boolean isUserExisting(final Principal principal) {
-    return instance.get(principal.getRealm()).isUserExisting(principal.getUsername());
+    UserSource userSource = instance.get(principal.getRealm());
+    if (userSource != null) {
+      return userSource.isUserExisting(principal.getUsername());
+    }
+    return false;
  }

  private UserSources() {