diff --git a/.docker/Dockerfile b/.docker/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..b300a1a97aa22b3eafc91ef89c01bbd7111edd62
--- /dev/null
+++ b/.docker/Dockerfile
@@ -0,0 +1,37 @@
+FROM debian:latest
+RUN apt-get update && \
+    apt-get install \
+    curl \
+    git \
+    openjdk-11-jdk-headless \
+    python3-autopep8 \
+    python3-pip \
+    python3-pytest \
+    tox \
+    -y
+COPY .docker/wait-for-it.sh /wait-for-it.sh
+ADD https://gitlab.indiscale.com/api/v4/projects/97/repository/commits/${PYLIB} \
+    pylib_version.json
+RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git && \
+    cd caosdb-pylib && git checkout ${PYLIB} && pip3 install .
+ADD https://gitlab.indiscale.com/api/v4/projects/104/repository/commits/${ADVANCED} \
+    advanced_version.json
+RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git && \
+    cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install .
+COPY . /git
+
+# Delete .git because it is huge.
+RUN rm -r /git/.git
+
+# Install pycaosdb.ini for the tests
+RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini
+
+RUN cd /git/ && pip3 install .
+
+WORKDIR /git/integrationtests
+# wait for server,
+CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- \
+    # ... install pycaosdb.ini the server-side scripts
+    cp /git/.docker/sss_pycaosdb.ini /scripting/home/.pycaosdb.ini && \
+    # ... and run tests
+    pytest-3 .
diff --git a/.docker/cert.sh b/.docker/cert.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e22cfba2995b5fd9d812232f562b7254233fe5b0
--- /dev/null
+++ b/.docker/cert.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2019 Daniel Hornung, Göttingen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+
+
+# Creates a directory `cert` and certificates in this directory.
+#
+# The hostname for which the certificate is created can be changed by setting
+# the environment variable CAOSHOSTNAME.
+#
+# ## Overview of variables ##
+#
+# - CAOSHOSTNAME :: Hostname for the key (localhost)
+# - KEYPW :: Password for the key (default ist CaosDBSecret)
+# - KEYSTOREPW :: Password for the key store (same as KEYPW)
+function cert() {
+    mkdir -p cert
+    cd cert
+    KEYPW="${KEYPW:-CaosDBSecret}"
+    CAOSHOSTNAME="${CAOSHOSTNAME:-localhost}"
+    KEYSTOREPW="${KEYPW:-}"
+    # NOTE: KEYPW and KEYSTOREPW are the same, due to Java limitations.
+    KEYPW="${KEYPW}" openssl genrsa -aes256 -out caosdb.key.pem \
+         -passout env:KEYPW 2048
+    # Certificate is for localhost
+    KEYPW="${KEYPW}" openssl req -new -x509 -key caosdb.key.pem \
+         -out caosdb.cert.pem -passin env:KEYPW \
+         -subj "/C=/ST=/L=/O=/OU=/CN=${CAOSHOSTNAME}"
+    KEYPW="${KEYPW}" KEYSTOREPW="$KEYSTOREPW" openssl pkcs12 -export \
+         -inkey caosdb.key.pem -in caosdb.cert.pem -out all-certs.pkcs12 \
+         -passin env:KEYPW -passout env:KEYPW
+
+    keytool -importkeystore -srckeystore all-certs.pkcs12 -srcstoretype PKCS12 \
+            -deststoretype pkcs12 -destkeystore caosdb.jks \
+            -srcstorepass "${KEYPW}" \
+            -destkeypass "${KEYPW}" -deststorepass "$KEYSTOREPW"
+    echo "Certificates successfuly created."
+}
+
+cert
diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bbee24fbd8c898c479a0fafa13000ddf506d00eb
--- /dev/null
+++ b/.docker/docker-compose.yml
@@ -0,0 +1,43 @@
+version: '3.7'
+services:
+  sqldb:
+    image: mariadb:10.4
+    environment:
+      MYSQL_ROOT_PASSWORD: caosdb1234
+    networks:
+      - caosnet
+  caosdb-server:
+    image: "$CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG"
+    user: 999:999
+    depends_on:
+      - sqldb
+    networks:
+      - caosnet
+    volumes:
+      - type: bind
+        source: ./cert
+        target: /opt/caosdb/cert
+      - type: volume
+        source: extroot
+        target: /opt/caosdb/mnt/extroot
+      - type: volume
+        source: scripting
+        target: /opt/caosdb/git/caosdb-server/scripting
+      - type: volume
+        source: authtoken
+        target: /opt/caosdb/git/caosdb-server/authtoken
+    ports:
+      # - "from_outside:from_inside"
+      - "10443:10443"
+      - "10080:10080"
+    environment:
+      DEBUG: 1
+      CAOSDB_CONFIG_AUTHTOKEN_CONFIG: "conf/core/authtoken.example.yaml"
+      CAOSDB_CONFIG_TRANSACTION_BENCHMARK_ENABLED: "TRUE"
+volumes:
+  scripting:
+  extroot:
+  authtoken:
+networks:
+  caosnet:
+    driver: bridge
diff --git a/.docker/run.sh b/.docker/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b0e1a716f28516b83043fb3fdb6594515a0bafd4
--- /dev/null
+++ b/.docker/run.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+docker-compose -f tester.yml run tester
+rv=$?
+echo $rv > result
diff --git a/.docker/sss_pycaosdb.ini b/.docker/sss_pycaosdb.ini
new file mode 100644
index 0000000000000000000000000000000000000000..de2867f8dc66b3e81f10f35e40c36f9cb8591604
--- /dev/null
+++ b/.docker/sss_pycaosdb.ini
@@ -0,0 +1,9 @@
+; this is the pycaosdb.ini for the server-side-scripting home.
+[Connection]
+url = https://caosdb-server:10443
+cacert = /opt/caosdb/cert/caosdb.cert.pem
+debug = 0
+timeout = 5000
+
+[Misc]
+sendmail = /usr/local/bin/sendmail_to_file
diff --git a/.docker/tester.yml b/.docker/tester.yml
new file mode 100644
index 0000000000000000000000000000000000000000..83db879c6072bfdea7b3212c833116b96bb54d0c
--- /dev/null
+++ b/.docker/tester.yml
@@ -0,0 +1,26 @@
+version: '3.7'
+services:
+  tester:
+    image: "$CI_REGISTRY_IMAGE"
+    networks:
+      - docker_caosnet
+    volumes:
+      - type: bind
+        source: ./cert
+        target: /cert
+      - type: volume
+        source: extroot
+        target: /extroot
+      - type: volume
+        source: scripting
+        target: /scripting
+      - type: volume
+        source: authtoken
+        target: /authtoken
+networks:
+  docker_caosnet:
+    external: true
+volumes:
+  scripting:
+  extroot:
+  authtoken:
diff --git a/.docker/tester_pycaosdb.ini b/.docker/tester_pycaosdb.ini
new file mode 100644
index 0000000000000000000000000000000000000000..2159dec250b3dcb2f16043d12bdbe73675e4d75c
--- /dev/null
+++ b/.docker/tester_pycaosdb.ini
@@ -0,0 +1,31 @@
+; pycaosdb.ini for pytest test suites.
+
+[IntegrationTests]
+; location of the scripting bin dir which is used for the test scripts from the
+; server's perspective.
+test_server_side_scripting.bin_dir.server = scripting/bin-debug/
+; location of the scripting bin dir which is used for the test scripts from the
+; pyinttest's perspective.
+test_server_side_scripting.bin_dir.local = /scripting/bin-debug/
+
+; location of the files from the pyinttest perspective
+test_files.test_insert_files_in_dir.local = /extroot/test_insert_files_in_dir/
+; location of the files from the caosdb_servers perspective
+test_files.test_insert_files_in_dir.server = /opt/caosdb/mnt/extroot/test_insert_files_in_dir/
+
+; location of the one-time tokens from the pyinttest's perspective
+test_authentication.admin_token_crud = /authtoken/admin_token_crud.txt
+test_authentication.admin_token_expired = /authtoken/admin_token_expired.txt
+test_authentication.admin_token_3_attempts = /authtoken/admin_token_3_attempts.txt
+
+
+[Connection]
+url = https://caosdb-server:10443/
+username = admin
+cacert = /cert/caosdb.cert.pem
+debug = 0
+
+password_method = plain
+password = caosdb
+
+timeout = 500
diff --git a/.docker/wait-for-it.sh b/.docker/wait-for-it.sh
new file mode 100755
index 0000000000000000000000000000000000000000..d69e99f1f13257b559dce2433de0515379663efa
--- /dev/null
+++ b/.docker/wait-for-it.sh
@@ -0,0 +1,182 @@
+#!/usr/bin/env bash
+# License: 
+# From https://github.com/vishnubob/wait-for-it
+# The MIT License (MIT)
+# Use this script to test if a given TCP host/port are available
+
+WAITFORIT_cmdname=${0##*/}
+
+echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
+
+usage()
+{
+    cat << USAGE >&2
+Usage:
+    $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
+    -h HOST | --host=HOST       Host or IP under test
+    -p PORT | --port=PORT       TCP port under test
+                                Alternatively, you specify the host and port as host:port
+    -s | --strict               Only execute subcommand if the test succeeds
+    -q | --quiet                Don't output any status messages
+    -t TIMEOUT | --timeout=TIMEOUT
+                                Timeout in seconds, zero for no timeout
+    -- COMMAND ARGS             Execute command with args after the test finishes
+USAGE
+    exit 1
+}
+
+wait_for()
+{
+    if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+        echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+    else
+        echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
+    fi
+    WAITFORIT_start_ts=$(date +%s)
+    while :
+    do
+        if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
+            nc -z $WAITFORIT_HOST $WAITFORIT_PORT
+            WAITFORIT_result=$?
+        else
+            (echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
+            WAITFORIT_result=$?
+        fi
+        if [[ $WAITFORIT_result -eq 0 ]]; then
+            WAITFORIT_end_ts=$(date +%s)
+            echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
+            break
+        fi
+        sleep 1
+    done
+    return $WAITFORIT_result
+}
+
+wait_for_wrapper()
+{
+    # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
+    if [[ $WAITFORIT_QUIET -eq 1 ]]; then
+        timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+    else
+        timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
+    fi
+    WAITFORIT_PID=$!
+    trap "kill -INT -$WAITFORIT_PID" INT
+    wait $WAITFORIT_PID
+    WAITFORIT_RESULT=$?
+    if [[ $WAITFORIT_RESULT -ne 0 ]]; then
+        echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
+    fi
+    return $WAITFORIT_RESULT
+}
+
+# process arguments
+while [[ $# -gt 0 ]]
+do
+    case "$1" in
+        *:* )
+        WAITFORIT_hostport=(${1//:/ })
+        WAITFORIT_HOST=${WAITFORIT_hostport[0]}
+        WAITFORIT_PORT=${WAITFORIT_hostport[1]}
+        shift 1
+        ;;
+        --child)
+        WAITFORIT_CHILD=1
+        shift 1
+        ;;
+        -q | --quiet)
+        WAITFORIT_QUIET=1
+        shift 1
+        ;;
+        -s | --strict)
+        WAITFORIT_STRICT=1
+        shift 1
+        ;;
+        -h)
+        WAITFORIT_HOST="$2"
+        if [[ $WAITFORIT_HOST == "" ]]; then break; fi
+        shift 2
+        ;;
+        --host=*)
+        WAITFORIT_HOST="${1#*=}"
+        shift 1
+        ;;
+        -p)
+        WAITFORIT_PORT="$2"
+        if [[ $WAITFORIT_PORT == "" ]]; then break; fi
+        shift 2
+        ;;
+        --port=*)
+        WAITFORIT_PORT="${1#*=}"
+        shift 1
+        ;;
+        -t)
+        WAITFORIT_TIMEOUT="$2"
+        if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
+        shift 2
+        ;;
+        --timeout=*)
+        WAITFORIT_TIMEOUT="${1#*=}"
+        shift 1
+        ;;
+        --)
+        shift
+        WAITFORIT_CLI=("$@")
+        break
+        ;;
+        --help)
+        usage
+        ;;
+        *)
+        echoerr "Unknown argument: $1"
+        usage
+        ;;
+    esac
+done
+
+if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
+    echoerr "Error: you need to provide a host and port to test."
+    usage
+fi
+
+WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
+WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
+WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
+WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
+
+# check to see if timeout is from busybox?
+WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
+WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
+if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
+        WAITFORIT_ISBUSY=1
+        WAITFORIT_BUSYTIMEFLAG="-t"
+
+else
+        WAITFORIT_ISBUSY=0
+        WAITFORIT_BUSYTIMEFLAG=""
+fi
+
+if [[ $WAITFORIT_CHILD -gt 0 ]]; then
+    wait_for
+    WAITFORIT_RESULT=$?
+    exit $WAITFORIT_RESULT
+else
+    if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
+        wait_for_wrapper
+        WAITFORIT_RESULT=$?
+    else
+        wait_for
+        WAITFORIT_RESULT=$?
+    fi
+fi
+
+if [[ $WAITFORIT_CLI != "" ]]; then
+    if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
+        echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
+        exit $WAITFORIT_RESULT
+    fi
+    exec "${WAITFORIT_CLI[@]}"
+else
+    exit $WAITFORIT_RESULT
+fi
+
diff --git a/.gitignore b/.gitignore
index 6df7e28419776d5976ed34c11a69b39a3cbd3dec..11c17317428964b82b47d55399a4dde1a9e698a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-src/newcrawler.egg-info/
+src/caoscrawler.egg-info/
 .coverage
 __pycache__
 .tox
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a30140e684b465d40b964f1bfb9b97959b29834d
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,248 @@
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2018 Research Group Biomedical Physics,
+# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+# Copyright (C) 2019 Henrik tom Wörden 
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+variables:
+   CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-crawler/testenv:$CI_COMMIT_REF_NAME
+   CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/src/caosdb-pyinttest/base:latest
+
+stages:
+  - info
+  - setup
+  - cert
+  - style
+  - test
+  - deploy
+
+
+# During the test stage the CI pipeline (which runs in a "root" docker) starts
+# two docker containers with docker-compose (one for the caosdb-server, another
+# for the mysql-backend). Then a third docker is being started which contains
+# the test suite and executes it.
+#
+# +-------------(root docker)-------------------------+
+# |                                                   |
+# |             +-(caosdb_mysqlbackend)-------------+ |
+# |             |                                   | |
+# |             +-----------------------------------+ |
+# |             +-(caosdb-server)-------------------+ |
+# |             |                                   | |
+# |             | /opt/caosdb                       | |
+# |    .------->|  + /git/caosdb-server/scripting/  | |
+# |    | .----->|  + /git/caosdb-server/authtoken/  | |
+# |    | | .--->|  + /mnt/extroot                   | |
+# |    | | | .->|  + /cert                          | |
+# |    | | | |  |                                   | |
+# |    | | | |  +-----------------------------------+ |
+# |    | | | |                                        |
+# |    | | | |    filesystem:                         |
+# |    | | |  *---  /cert -----------.                |
+# |    | | |                          |               |
+# |    | | |      volumes:            |               |
+# |    | |  *-----  extroot  ------.  |               |
+# |    |  *-------  scripting  --.  | |               |
+# |     *---------  authtoken  -. | | |               |
+# |                             | | | |               |
+# | +-(crawler tests)---+       | | | |               |
+# | |                      |    | | | |               |
+# | | /authtoken           |<---* | | |               |
+# | | /scripting           |<----*  | |               |
+# | | /extroot             |<------*  |               |
+# | | /cert                |<--------*                |
+# | |                      |                          |
+# | +----------------------+                          |
+# +---------------------------------------------------+
+#
+# In the root docker, the directory /cert is mounted to .docker/cert relative
+# to this repository. The directory is created during the cert stage of this
+# pipeline and a certificate is created in there. The certificat is then
+# available in mounted directories in the server and crawler containers.
+#
+# Additional volumes in the root docker are shared by the caosdb-server and the crawler
+# containers.  These volumes are intended to be used for testing server-side scripting and
+# file-system features.
+#
+
+.env: &env
+  - echo "Pipeline triggered by $TRIGGERED_BY_REPO@$TRIGGERED_BY_REF ($TRIGGERED_BY_HASH)"
+  - echo "CI_REGISTRY_IMAGE_BASE = $CI_REGISTRY_IMAGE_BASE"
+  - echo "CI_REGISTRY_IMAGE = $CI_REGISTRY_IMAGE"
+  - echo "CAOSDB_TAG = $CAOSDB_TAG"
+  - echo "REFTAG = $REFTAG"
+  - echo "F_BRANCH = $F_BRANCH"
+  - echo "CI_COMMIT_REF_NAME = $CI_COMMIT_REF_NAME"
+  - ls -lah /image-cache/
+
+  - F_BRANCH=${F_BRANCH:-$CI_COMMIT_REF_NAME}
+  - echo $F_BRANCH
+  - if [[ "$REFTAG" == "" ]] ; then
+      if [[ "$F_BRANCH" == "dev" ]] ; then
+        REFTAG=dev;
+      fi;
+    fi
+  - REFTAG=${REFTAG:-dev_F_${F_BRANCH}}
+
+  - echo $F_BRANCH
+
+  - if [[ "$CAOSDB_TAG" == "" ]]; then
+      CAOSDB_TAG=${REFTAG};
+    fi
+  - echo $CAOSDB_TAG
+
+info:
+  tags: [cached-dind]
+  image: docker:20.10
+  stage: info
+  needs: []
+  script:
+    - *env
+
+unittest:
+  tags: [cached-dind]
+  image: docker:20.10
+  stage: test
+  image: $CI_REGISTRY_IMAGE
+  script:
+      - tox
+
+inttest:
+  tags: [docker]
+  services:
+    - docker:20.10-dind
+  variables:
+    # This is a workaround for the gitlab-runner health check mechanism when
+    # using docker-dind service.  The runner will otherwise guess the port
+    # wrong and the health check will timeout.
+    SERVICE_PORT_2376_TCP_PORT: 2375
+  stage: test
+  image: $CI_REGISTRY_IMAGE_BASE
+  needs: [cert]
+  script:
+      - *env
+      - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
+      - echo $CAOSDB_TAG
+
+      - cd .docker
+        # Store mariadb  version
+      - MARIADBVERSION=$(grep mariadb docker-compose.yml | awk '{print $2}')
+      - echo "mariadb image:"$MARIADBVERSION
+      - time docker load < /image-cache/caosdb-crawler-testenv-${CI_COMMIT_REF_NAME}.tar || true
+      - time docker load < /image-cache/caosdb-${REFTAG}.tar || time docker load < /image-cache/caosdb-dev.tar || true
+      - time docker load < /image-cache/$MARIADBVERSION.tar || true
+      - docker pull $CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG || CAOSDB_TAG=dev
+      - docker pull $CI_REGISTRY_IMAGE
+
+        # Here, the server and the mysql backend docker are being started
+      - CAOSDB_TAG=$CAOSDB_TAG docker-compose  up -d
+
+        # Store versions of CaosDB parts
+      - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_pylib_commit > hash_pylib
+      - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_webui_commit > hash_webui
+      - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_server_commit > hash_server
+      - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_mysqlbackend_commit > hash_mysql
+      - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_proto_commit > hash_proto
+      - cat hash_server
+      - cat hash_proto
+      - cat hash_mysql
+      - cat hash_webui
+      - cat hash_pylib
+        # Run the actual tests. This starts a new docker container within which
+        # the tests run. The return value is stored in .docker/result
+      - /bin/sh ./run.sh
+
+        # Save logs
+      - docker logs docker_caosdb-server_1 &> ../caosdb_log.txt
+      - docker logs docker_sqldb_1 &> ../mariadb_log.txt
+      - cd ..
+
+        # Stop the server
+      - docker-compose -f .docker/docker-compose.yml down
+
+        # the crawler docker writes the return value of the tests into the
+        # file result
+      - rc=`cat .docker/result`
+      - exit $rc
+  dependencies: [cert]
+  timeout: 3h
+  artifacts:
+    paths:
+      - caosdb_log.txt
+      - mariadb_log.txt
+      - .docker/hash_*
+    expire_in: 1 week
+
+build-testenv:
+  tags: [cached-dind]
+  image: docker:20.10
+  stage: setup
+  timeout: 2h
+  only:
+    - schedules
+    - web
+    - pushes
+  needs: []
+  script:
+      - df -h
+      - command -v wget
+      - if [ -z "$PYLIB" ]; then
+          if echo "$CI_COMMIT_REF_NAME" | grep -c "^f-" ; then
+            echo "Check if pylib has branch $CI_COMMIT_REF_NAME" ;
+            if wget https://gitlab.indiscale.com/api/v4/projects/97/repository/branches/${CI_COMMIT_REF_NAME} ; then
+              PYLIB=$CI_COMMIT_REF_NAME ;
+            fi;
+          fi;
+        fi;
+      - PYLIB=${PYLIB:-dev}
+      - echo $PYLIB
+
+      - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
+        # use here general latest or specific branch latest...
+      - docker build 
+        --build-arg PYLIB=${PYLIB}
+        --build-arg ADVANCED=${ADVANCED:dev}
+        --file .docker/Dockerfile
+        -t $CI_REGISTRY_IMAGE .
+      - docker push $CI_REGISTRY_IMAGE
+      - docker save $CI_REGISTRY_IMAGE > /image-cache/caosdb-crawler-testenv-${CI_COMMIT_REF_NAME}.tar
+
+cert:
+  tags: [docker]
+  stage: cert
+  image: $CI_REGISTRY_IMAGE
+  needs:
+    - job: build-testenv
+      optional: true
+  artifacts:
+    paths:
+      - .docker/cert/
+    expire_in: 1 week
+  script:
+      - cd .docker
+      - CAOSHOSTNAME=caosdb-server ./cert.sh
+
+style:
+  tags: [docker]
+  stage: style
+  image: $CI_REGISTRY_IMAGE
+  needs:
+    - job: build-testenv
+      optional: true
+  script:
+      - autopep8 -r --diff --exit-code .
+  allow_failure: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000000000000000000000000000000000000..d0a2883005d6651f0ba3ef22b9fa5fe0d03349aa
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,24 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+
+* Everything
+
+### Changed
+
+* Renamed module from `newcrawler` to `caoscrawler`
+
+### Deprecated
+
+### Removed
+
+### Fixed
+
+### Security
diff --git a/README.md b/README.md
index 88d8a6d9965e67ec268bff979ceb709dbf650129..59b88aaa36ed97d8c2cc9e4474820e3dad4a478b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# newcrawler
+# caoscrawler
 
 A new crawler for CaosDB.
 
diff --git a/integrationtests/README.md b/integrationtests/README.md
index 5c308f51a332d5a930f91eb30f0d93032ae47627..96789ed9f02036a0c7cc25ca1a60d9f0042a5557 100644
--- a/integrationtests/README.md
+++ b/integrationtests/README.md
@@ -1,3 +1,2 @@
-1. Clear database (see clear_database.py)
-2. Insert model (see insert_model.py)
-3. Run test.py
+1. Mount test_data/extroot as extroot folder in the CaosDB server
+2. use an empty server
diff --git a/integrationtests/model.yml b/integrationtests/basic_example/model.yml
similarity index 100%
rename from integrationtests/model.yml
rename to integrationtests/basic_example/model.yml
diff --git a/integrationtests/test.py b/integrationtests/basic_example/test.py
similarity index 91%
rename from integrationtests/test.py
rename to integrationtests/basic_example/test.py
index efff64305bbc9dd24ebf7817fb9d10d0523c9f5b..6e35f7f2e4532acb5a2c3c80d06d9faeabd0fe0a 100755
--- a/integrationtests/test.py
+++ b/integrationtests/basic_example/test.py
@@ -28,21 +28,22 @@
 module description
 """
 
+import os
 from caosdb import EmptyUniqueQueryError
 import argparse
 import sys
 from argparse import RawTextHelpFormatter
-from newcrawler import Crawler
+from caoscrawler import Crawler
 import caosdb as db
-from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 import pytest
 from caosadvancedtools.models.parser import parse_model_from_yaml
 import yaml
 
+# TODO is not yet merged in caosadvancedtools
 from caosadvancedtools.testutils import clear_database, set_test_key
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
-import os
 
 def rfp(*pathcomponents):
     """
@@ -52,14 +53,12 @@ def rfp(*pathcomponents):
     return os.path.join(os.path.dirname(__file__), *pathcomponents)
 
 
-
-
 @pytest.fixture
 def usemodel():
     model = parse_model_from_yaml(rfp("model.yml"))
     model.sync_data_model(noquestion=True, verbose=False)
 
-    
+
 @pytest.fixture
 def ident():
     ident = CaosDBIdentifiableAdapter()
@@ -68,12 +67,12 @@ def ident():
     ident.register_identifiable(
         "Person", db.RecordType()
         .add_parent(name="Person")
-        #.add_property(name="first_name")
+        # .add_property(name="first_name")
         .add_property(name="last_name"))
     ident.register_identifiable(
         "Measurement", db.RecordType()
         .add_parent(name="Measurement")
-        #.add_property(name="identifier")
+        # .add_property(name="identifier")
         .add_property(name="date")
         .add_property(name="project"))
     ident.register_identifiable(
@@ -106,7 +105,8 @@ def crawler_extended(ident):
     updateList = cr.updateList
     fileList = [r for r in updateList if r.role == "File"]
     for f in fileList:
-        f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
+        f.file = rfp("..", "unittests", "test_directories",
+                     "examples_article", f.file)
     return cr
 
 
@@ -150,6 +150,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler):
     assert len(ins) == 0
     assert len(ups) == 0
 
+
 def test_insertion(clear_database, usemodel, ident, crawler):
     ins, ups = crawler.synchronize()
 
@@ -169,6 +170,7 @@ def test_insertion(clear_database, usemodel, ident, crawler):
     assert len(ins) == 0
     assert len(ups) == 0
 
+
 def test_insertion_and_update(clear_database, usemodel, ident, crawler):
     ins, ups = crawler.synchronize()
 
@@ -184,7 +186,8 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler):
     ins, ups = cr.synchronize()
     assert len(ins) == 0
     assert len(ups) == 1
-    
+
+
 def test_identifiable_update(clear_database, usemodel, ident, crawler):
     ins, ups = crawler.synchronize()
 
@@ -197,23 +200,23 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
     l = cr.updateList
     for record in l:
         if (record.parents[0].name == "Measurement" and
-            record.get_property("date").value == "2020-01-03"):
+                record.get_property("date").value == "2020-01-03"):
             # maybe a bit weird, but add an email address to a measurement
-            record.add_property(name="email", value="testperson@testaccount.test")
+            record.add_property(
+                name="email", value="testperson@testaccount.test")
             print("one change")
             break
     ins, ups = cr.synchronize()
     assert len(ins) == 0
     assert len(ups) == 1
 
-    
     # Test the change within one property:
     cr = Crawler(debug=True, identifiableAdapter=ident)
     crawl_standard_test_directory(cr)
     l = cr.updateList
     for record in l:
         if (record.parents[0].name == "Measurement" and
-            record.get_property("date").value == "2020-01-03"):
+                record.get_property("date").value == "2020-01-03"):
             record.add_property(name="email", value="testperson@coolmail.test")
             print("one change")
             break
@@ -227,7 +230,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
     l = cr.updateList
     for record in l:
         if (record.parents[0].name == "Measurement" and
-            record.get_property("date").value == "2020-01-03"):
+                record.get_property("date").value == "2020-01-03"):
             record.add_property(name="email", value="testperson@coolmail.test")
             record.get_property("date").value = "2012-01-02"
             print("one change")
@@ -239,7 +242,8 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
 
 def test_file_insertion_dry(clear_database, usemodel, ident):
     crawler_extended = Crawler(debug=True, identifiableAdapter=ident)
-    crawl_standard_test_directory(crawler_extended, cfood="scifolder_extended.yml")
+    crawl_standard_test_directory(
+        crawler_extended, cfood="scifolder_extended.yml")
     updateList = crawler_extended.updateList
     fileList = [r for r in updateList if r.role == "File"]
     assert len(fileList) == 11
@@ -269,6 +273,7 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
         assert len(r) == 1
         assert r[0].get_property("ReadmeFile").value == f.id
 
+
 def test_file_update(clear_database, usemodel, ident, crawler_extended):
     ins1, ups1 = crawler_extended.synchronize(commit_changes=True)
     fileList_ins = [r for r in ins1 if r.role == "File"]
@@ -279,7 +284,8 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
     updateList = cr.updateList
     fileList = [r for r in updateList if r.role == "File"]
     for f in fileList:
-        f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
+        f.file = rfp("..", "unittests", "test_directories",
+                     "examples_article", f.file)
     ins2, ups2 = cr.synchronize(commit_changes=True)
     assert len(ups1) == 0
     assert len(ups2) == 0
@@ -288,21 +294,21 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
     res = db.execute_query("Find File")
     assert len(res) == 11
     assert len(res[0].parents) == 0
-    
+
     cr2 = Crawler(debug=True, identifiableAdapter=ident)
     crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml")
 
     updateList = cr2.updateList
     fileList = [r for r in updateList if r.role == "File"]
     for f in fileList:
-        f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
+        f.file = rfp("..", "unittests", "test_directories",
+                     "examples_article", f.file)
     ins3, ups3 = cr2.synchronize(commit_changes=True)
     assert len(ups3) == 11
 
     res = db.execute_query("Find File")
     assert len(res) == 11
     assert res[0].parents[0].name == "ProjectMarkdownReadme"
-    
 
     # TODO: Implement file update checks (based on checksum)
     # Add test with actual file update:
diff --git a/integrationtests/realworld_example/crawl.sh b/integrationtests/realworld_example/crawl.sh
new file mode 100755
index 0000000000000000000000000000000000000000..55a2a331fe517a539e2dd937ac35605c72b496c9
--- /dev/null
+++ b/integrationtests/realworld_example/crawl.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+python -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/data
+python load_and_insert_json_models.py
+python test_dataset_crawler.py
diff --git a/integrationtests/realworld_example/load_and_insert_json_models.py b/integrationtests/realworld_example/load_and_insert_json_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..682fd9c77531e63ed18dd13417399ad0d18a8de2
--- /dev/null
+++ b/integrationtests/realworld_example/load_and_insert_json_models.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import sys
+
+from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+
+
+def main():
+    # First load dataspace data model
+    dataspace_definitions = parse_model_from_json_schema(
+        "schema/dataspace.schema.json")
+    dataspace_definitions.sync_data_model(noquestion=True)
+
+    # Then general dataset definitions
+    dataset_definitions = parse_model_from_json_schema(
+        "schema/dataset.schema.json")
+    dataset_definitions.sync_data_model(noquestion=True)
+
+    # Finally, add inheritances as defined in yaml
+    dataset_inherits = parse_model_from_yaml(
+        "schema/dataset-inheritance.yml")
+    dataset_inherits.sync_data_model(noquestion=True)
+
+
+if __name__ == "__main__":
+
+    sys.exit(main())
diff --git a/integrationtests/realworld_example/test_dataset_crawler.py b/integrationtests/realworld_example/test_dataset_crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8713f490399471dc324c542f5d0e96bfe161b60a
--- /dev/null
+++ b/integrationtests/realworld_example/test_dataset_crawler.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+module description
+"""
+import json
+import os
+
+import caosdb as db
+
+from caoscrawler.crawl import Crawler
+from caoscrawler.converters import JSONFileConverter, DictConverter
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.structure_elements import File, JSONFile, Directory
+import pytest
+from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+
+#from caosadvancedtools.testutils import clear_database, set_test_key
+import sys
+
+# TODO is not yet merged in caosadvancedtools
+# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
+
+
+def rfp(*pathcomponents):
+    """
+    Return full path.
+    Shorthand convenience function.
+    """
+    return os.path.join(os.path.dirname(__file__), *pathcomponents)
+
+
+DATADIR = rfp("..", "test_data", "extroot", "realworld_example")
+
+
+@pytest.fixture
+def usemodel():
+    # First load dataspace data model
+    dataspace_definitions = parse_model_from_json_schema(
+        os.path.join(DATADIR, "schema", "dataspace.schema.json"))
+    dataspace_definitions.sync_data_model(noquestion=True)
+
+    # Then general dataset definitions
+    dataset_definitions = parse_model_from_json_schema(
+        os.path.join(DATADIR, "schema", "dataset.schema.json"))
+    dataset_definitions.sync_data_model(noquestion=True)
+
+    # Finally, add inheritances as defined in yaml
+    dataset_inherits = parse_model_from_yaml(
+        os.path.join(DATADIR, "schema", "dataset-inheritance.yml"))
+    dataset_inherits.sync_data_model(noquestion=True)
+
+
+def test_dataset(
+        # clear_database,
+        usemodel):
+    # json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
+
+    ident = CaosDBIdentifiableAdapter()
+    ident.register_identifiable(
+        "license", db.RecordType().add_parent("license").add_property("name"))
+    ident.register_identifiable("project_type", db.RecordType(
+    ).add_parent("project_type").add_property("name"))
+    ident.register_identifiable("Person", db.RecordType(
+    ).add_parent("Person").add_property("full_name"))
+
+    crawler = Crawler(debug=True, identifiableAdapter=ident)
+    crawler_definition = crawler.load_definition(
+        os.path.join(DATADIR, "dataset_cfoods.yml"))
+    # print(json.dumps(crawler_definition, indent=3))
+    # Load and register converter packages:
+    converter_registry = crawler.load_converters(crawler_definition)
+    # print("DictIntegerElement" in converter_registry)
+
+    records = crawler.start_crawling(
+        Directory("data", os.path.join(DATADIR, 'data')),
+        crawler_definition,
+        converter_registry
+    )
+    subd = crawler.debug_tree
+    subc = crawler.debug_metadata
+    # print(json.dumps(subc, indent=3))
+    # print(subd)
+    # print(subc)
+    # print(records)
+    ins, ups = crawler.synchronize()
+
+    dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
+                                 "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
+                                 " AND Person", unique=True)
+    assert dataspace.get_property("start_date").value == "2022-03-01"
+    db.execute_query("FIND RECORD Person with full_name='Max Schmitt' AND"
+                     " given_name='Max'", unique=True)
+
+    dataset = db.execute_query(f"FIND RECORD Dataset with Dataspace={dataspace.id} AND title="
+                               "'Random numbers created on a random autumn day in a random office'"
+                               "", unique=True)
+    assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Person WITH full_name="
+                            "'Alexa Nozone' AND WHICH REFERENCES Person WITH full_name='Max Schmitt'"
+                            "") == 1
+    assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH "
+                            "start_datetime='2022-02-10T16:36:48+01:00'") == 1
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json
new file mode 100644
index 0000000000000000000000000000000000000000..26e11e4e16081b8b5b64a83889bc1f4d160ef0e7
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json
@@ -0,0 +1,15 @@
+{
+	"name": "DEMO",
+	"dataspace_id": 20002,
+	"archived": false,
+	"coordinator": {
+        "full_name": "Max Schmitt",
+		"given_name": "Max",
+		"family_name": "Schmitt",
+		"email": "max.schmitt@email.de"
+	},
+	"start_date": "2022-03-01",
+	"end_date": "2032-02-28",
+	"comment": "Demonstration data space for DataCloud",
+	"url": "https://datacloud.de/index.php/f/7679"
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7a4d684e50cf4fa0699c66d27661d0d54055ec8b
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv
@@ -0,0 +1,101 @@
+index,A[kg],B[s],pH,Temp.[C]
+0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103
+1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594
+2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147
+3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233
+4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441
+5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873
+6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162
+7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676
+8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684
+9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632
+10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636
+11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112
+12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732
+13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337
+14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778
+15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225
+16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972
+17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504
+18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404
+19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175
+20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193
+21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964
+22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742
+23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912
+24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072
+25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565
+26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114
+27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716
+28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731
+29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792
+30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879
+31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315
+32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065
+33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574
+34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077
+35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761
+36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107
+37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802
+38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193
+39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148
+40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466
+41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686
+42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987
+43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624
+44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154
+45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345
+46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358
+47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935
+48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196
+49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441
+50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068
+51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919
+52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722
+53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831
+54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044
+55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592
+56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437
+57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791
+58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786
+59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107
+60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978
+61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521
+62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588
+63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923
+64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695
+65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734
+66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433
+67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371
+68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373
+69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758
+70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915
+71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415
+72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697
+73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031
+74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385
+75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612
+76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712
+77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861
+78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587
+79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815
+80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158
+81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734
+82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921
+83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127
+84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767
+85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342
+86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516
+87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525
+88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814
+89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933
+90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606
+91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997
+92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454
+93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997
+94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793
+95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706
+96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686
+97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246
+98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454
+99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..64df90e55eff065b1cc249a634444a72f9fd00d2
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json
@@ -0,0 +1,50 @@
+{
+        "title": "Random numbers created on a random autumn day in a random office",
+        "abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.",
+        "Event": [
+            {
+                "longitude": 18.445078548041533,
+                "start_datetime": "2022-02-10T16:36:48+01:00",
+                "latitude": 53.10833068997861,
+                "elevation": 2,
+                "location": "Bremen, Germany"
+            }
+        ],
+        "license": "CC-BY",
+        "authors": [
+            {
+                "firstname": "Max",
+                "lastname": "Schmitt",
+                "full_name": "Max Schmitt",
+                "affiliation": "CLOUD",
+                "ORCID": "0000-0001-6233-1866",
+                "email": "max.schmitt@email.de"
+            },
+            {
+                "firstname": "Alexa",
+                "lastname": "Nozone",
+                "full_name": "Alexa Nozone",
+                "affiliation": "CLOUD",
+                "email": "alexa.nozone@email.de"
+            }
+        ],
+        "comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de",
+        "project": {
+            "name": "Demonstration of Extremly important Metadata in Folders",
+			"full_name": "Project",
+            "project_acronym": "DEMO",
+            "project_type": "national",
+            "institute": "CLOUD",
+            "start_date": "2021-10-01",
+            "end_date": "2031-12-31",
+            "url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html",
+            "coordinator": {
+                "firstname": "Max",
+                "lastname": "Schmitt",
+                "email": "max.schmitt@email.de"
+            }
+        },
+        "method": {
+            "name": "Random Number Generator"
+        }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md
new file mode 100644
index 0000000000000000000000000000000000000000..2317ff8616c43e75f52637ff581017bf4a50d468
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md
@@ -0,0 +1,25 @@
+# Raw Data
+
+The `03_raw_data` folder is here to store all raw data of each dataset
+associated with the project – the data that has not been edited by you yet but
+which you plan to use in your research. It can be e.g. your unprocessed field
+sampling records, or useful data from an online repository. Organize your data
+in this folder in the following way:
+
+- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`.
+
+- **IMPORTANT**: provide the folder with information about your raw data by
+  filling out a metadata form for each of your datasets! For this, 
+  
+  - either copy the `metadata-template.json` file and put it into your dataset
+    folder. Open the copy with a text editor and fill out the fields. 
+  - or use the metadata editor in the DataCoud web client (press the  "+" button
+    and use "New matadata.json" file)
+
+  If you can’t find information about your data to fill in here, you should
+  reconsider using it - it is important to be able to trace your data sources to
+  ensure a FAIR scientific process!
+
+- For processing any of the data, make a copy of the dataset and paste it into
+  the `04_data_processing` folder. This way, you make sure to keep your raw data
+  in its original state.
\ No newline at end of file
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f457d239321b232fb2db7d46f4e1576c85911b0
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json
@@ -0,0 +1,52 @@
+{
+  "dataset": {
+    "title": "",
+    "abstract": "See https://github.com/CLOUD/metadata-schema for schema specification",
+    "license": "CC-BY",
+    "authors": [
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX",
+        "email": "name@domain.de"
+      },
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "email": "name@domain.de",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX"
+      }
+    ],
+    "project": {
+      "name": "",
+      "acronym": "",
+      "type": "DFG/",
+      "institute": "CLOUD",
+      "start_date": "YYYY-MM-DD",
+      "end_date": "YYYY-MM-DD",
+      "url": "",
+      "coordinator": {
+        "lastname": "",
+        "email": "",
+        "firstname": ""
+      }
+    },
+    "events_in_data": false,
+    "events": [
+      {
+        "longitude": 0,
+        "latitude": 0,
+        "elevation": 0,
+        "location": "",
+        "datetime": "YYYY-MM-DDTHH:mm:ss"
+      }
+    ],
+    "method": {
+      "name": "",
+      "url": ""
+    },
+    "max_files": 100
+  }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce1b002b18772b85f4bba3a222574f438a6ed0e3
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md
@@ -0,0 +1,10 @@
+# Data Processing
+
+The actual work is done in this `04_data_processing` folder. Depending on your
+field and type and size of project, you can organize this folder in the way that
+fits your process best. Here, a bit of chaos can happen ;) Keep in mind to
+document your processing steps in the `02_materials_and_methods` folder and to
+put in your final results into the `05_results` folder. In the end of your
+project, it should be possible to delete everything in this folder and
+reconstruct the working process using the documentation and raw data from
+previous folders.
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json
new file mode 100644
index 0000000000000000000000000000000000000000..05f9394dfbfa9a0b2b4844c7080a340585a9050f
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json
@@ -0,0 +1,52 @@
+{
+  "dataset": {
+    "title": "",
+    "abstract": "See https://github.com/cloud/metadata-schema for schema specification",
+    "license": "CC-BY",
+    "authors": [
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX",
+        "email": "name@domain.de"
+      },
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "email": "name@domain.de",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX"
+      }
+    ],
+    "project": {
+      "name": "",
+      "acronym": "",
+      "type": "DFG/",
+      "institute": "CLOUD",
+      "start_date": "YYYY-MM-DD",
+      "end_date": "YYYY-MM-DD",
+      "url": "",
+      "coordinator": {
+        "lastname": "",
+        "email": "",
+        "firstname": ""
+      }
+    },
+    "events_in_data": false,
+    "events": [
+      {
+        "longitude": 0,
+        "latitude": 0,
+        "elevation": 0,
+        "location": "",
+        "datetime": "YYYY-MM-DDTHH:mm:ss"
+      }
+    ],
+    "method": {
+      "name": "",
+      "url": ""
+    },
+    "max_files": 100
+  }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md
new file mode 100644
index 0000000000000000000000000000000000000000..ae0ab6571c52c0ec9a1cdc8aba27b31fd3be6fcc
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md
@@ -0,0 +1,7 @@
+# Results
+
+All the results that are final versions of your data analysis or processing,
+should be copied into this `05_results` folder. Organize your results folder in
+the way most fitting to your project.
+
+Provide metadata to your results files.
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/README.md b/integrationtests/test_data/extroot/realworld_example/data/35/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..809d699c462d064ff5193add8e23677bec84b0e0
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/README.md
@@ -0,0 +1,5 @@
+# Dataspace: DEMO
+
+This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage
+for all of CLOUD's research data.
+
diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1589cba2b44afc3e2645b0ee72f91bf83b327032
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
@@ -0,0 +1,528 @@
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+Data:
+  type: Directory
+  match: data
+  subtree:
+    dataspace_dir:
+      type: Directory
+      match: (?P<dataspace_dir_number>[0-9]+)
+      records:
+        Dataspace:
+          name: $dataspace_dir_number
+      subtree:
+        dataspace_json:
+          type: JSONFile
+          match: .dataspace.json
+          validate: schema/dataspace.schema.json
+          subtree:
+            dataspace_id_element:
+              type: DictIntegerElement
+              match_name: "dataspace_id"
+              match_value: "(?P<id>[0-9]+)"
+              records:
+                Dataspace:
+                  dataspace_id: $id
+            archived_element:
+              type: DictBooleanElement
+              match_name: "archived"
+              match_value: "(?P<archived>.*)"
+              records:
+                Dataspace:
+                  archived: $archived
+            url_element:
+              type: DictTextElement
+              match_name: "url"
+              match_value: "(?P<url>.*)"
+              records:
+                Dataspace:
+                  url: $url
+            coordinator_element:
+              type: DictDictElement
+              match_name: "coordinator"
+              records:
+                Person:
+                  parents:
+                    - Person
+                Dataspace:
+                  Person: $Person
+              subtree: &person_subtree
+                full_name_element:
+                  type: DictTextElement
+                  match_name: "full_name"
+                  match_value: "(?P<full_name>.*)"
+                  records:
+                    Person:
+                      full_name: $full_name
+                full_name_nonlatin_element:
+                  type: DictTextElement
+                  match_name: "full_name_nonlatin"
+                  match_value: "(?P<full_name_nonlatin>.*)"
+                  records:
+                    Person:
+                      full_name_nonlatin: $full_name_nonlatin
+                family_name_element:
+                  type: DictTextElement
+                  match_name: "family_name"
+                  match_value: "(?P<family_name>.*)"
+                  records:
+                    Person:
+                      family_name: $family_name
+                given_name_element:
+                  type: DictTextElement
+                  match_name: "given_name"
+                  match_value: "(?P<given_name>.*)"
+                  records:
+                    Person:
+                      given_name: $given_name
+                email_element:
+                  type: DictTextElement
+                  match_name: "email"
+                  match_value: "(?P<email>.*)"
+                  records:
+                    Person:
+                      email: $email
+                affiliation_element:
+                  type: DictTextElement
+                  match_name: "affiliation"
+                  match_value: "(?P<affiliation>.*)"
+                  records:
+                    Person:
+                      affiliation: $affiliation
+                ORCID_element:
+                  type: DictTextElement
+                  match_name: "ORCID"
+                  match_value: "(?P<ORCID>.*)"
+                  records:
+                    Person:
+                      ORCID: $ORCID
+            start_date_element:
+              type: DictTextElement
+              match_name: "start_date"
+              match_value: "(?P<start_date>.*)"
+              records:
+                Dataspace:
+                  start_date: $start_date
+            end_date_element:
+              type: DictTextElement
+              match_name: "end_date"
+              match_value: "(?P<end_date>.*)"
+              records:
+                Dataspace:
+                  end_date: $end_date
+            comment:
+              type: DictTextElement
+              match_name: "comment"
+              match_value: "(?P<comment>.*)"
+              records:
+                Dataspace:
+                  comment: $comment
+        raw_data_dir:
+          type: Directory
+          match: 03_raw_data
+          subtree: &template
+            # TODO collect info from metadata.json and look into sub-directories
+            # (only one level) for metadata.json
+            dataset_dir:
+              match: (?P<dataset_dir_name>.*)
+              type: Directory
+              records:
+                Dataset:
+                  Dataspace: $Dataspace
+              subtree:
+                metadata_json: &metadata_json_template
+                  type: JSONFile
+                  match: metadata.json
+                  validate: schema/dataset.schema.json
+                  subtree:
+                    title_element:
+                      type: DictTextElement
+                      match_name: "title"
+                      match_value: "(?P<title>.*)"
+                      records:
+                        Dataset:
+                          title: $title
+                    authors_element:
+                      type: DictListElement
+                      match_name: "authors"
+                      subtree:
+                        author_element:
+                          type: Dict
+                          records:
+                            Person:
+                              parents:
+                                - Person
+                            Dataset:
+                              authors: +$Person
+                          subtree: *person_subtree
+                    abstract_element:
+                      type: DictTextElement
+                      match_name: "abstract"
+                      match_value: "(?P<abstract>.*)"
+                      records:
+                        Dataset:
+                          abstract: $abstract
+                    comment_element:
+                      type: DictTextElement
+                      match_name: "comment"
+                      match_value: "(?P<comment>.*)"
+                      records:
+                        Dataset:
+                          comment: $comment
+                    license_element:
+                      type: DictTextElement
+                      match_name: "license"
+                      match_value: "(?P<license_name>.*)"
+                      records:
+                        license:
+                          # TODO: As soon as such things can be validated, a
+                          # creation of a new license has to be forbidden here
+                          # (although this is effectively done already by
+                          # validating against the above schema.)
+                          name: $license_name
+                        Dataset:
+                          license: $license
+                    dataset_doi_element:
+                      type: DictTextElement
+                      match_name: "dataset_doi"
+                      match_value: "(?P<dataset_doi>.*)"
+                      records:
+                        Dataset:
+                          dataset_doi: $dataset_doi
+                    related_to_dois_element:
+                      type: DictListElement
+                      match_name: "related_to_dois"
+                      subtree:
+                        related_to_doi_element:
+                          type: TextElement
+                          match: "(?P<related_to_doi>).*"
+                          records:
+                            Dataset:
+                              related_to_dois: +$related_to_doi
+                    Keywords_element:
+                      type: DictListElement
+                      match_name: "Keyword"
+                    Events_element:
+                      type: DictListElement
+                      match_name: "Event"
+                      subtree:
+                        Event_element:
+                          type: Dict
+                          records:
+                            Event:
+                              parents:
+                                - Event
+                            Dataset:
+                              Event: +$Event
+                          subtree:
+                            label_element:
+                              type: DictTextElement
+                              match_name: "label"
+                              match_value: "(?P<label>.*)"
+                              records:
+                                Event:
+                                  label: $label
+                            comment_element:
+                              type: DictTextElement
+                              match_name: "comment"
+                              match_value: "(?P<comment>.*)"
+                              records:
+                                Event:
+                                  comment: $comment
+                            start_datetime_element:
+                              type: DictTextElement
+                              match_name: start_datetime
+                              match_value: "(?P<start_datetime>.*)"
+                              records:
+                                Event:
+                                  start_datetime: $start_datetime
+                            end_datetime_element:
+                              type: DictTextElement
+                              match_name: end_datetime
+                              match_value: "(?P<end_datetime>.*)"
+                              records:
+                                Event:
+                                  end_datetime: $end_datetime
+                            longitude_element:
+                              type: DictFloatElement
+                              match_name: "longitude"
+                              match_value: "(?P<longitude>.*)"
+                              records:
+                                Event:
+                                  longitude: $longitude
+                            latitude_element:
+                              type: DictFloatElement
+                              match_name: "latitude"
+                              match_value: "(?P<latitude>.*)"
+                              records:
+                                Event:
+                                  latitude: $latitude
+                            elevation_element:
+                              type: DictFloatElement
+                              match_name: "elevation"
+                              match_value: "(?P<elevation>.*)"
+                              records:
+                                Event:
+                                  elevation: $elevation
+                            location_element:
+                              type: DictTextElement
+                              match_name: location
+                              match_value: "(?P<location>.*)"
+                              records:
+                                Event:
+                                  location: $location
+                            igsn_element:
+                              type: DictTextElement
+                              match_name: igsn
+                              match_value: "(?P<igsn>.*)"
+                              records:
+                                Event:
+                                  igsn: $igsn
+                    events_in_data_element:
+                      type: DictBooleanElement
+                      match_name: "events_in_data"
+                      match_value: "(?P<events_in_data>.*)"
+                      records:
+                        Dataset:
+                          events_in_data: $events_in_data
+                    geojson_element:
+                      type: DictTextElement
+                      match_name: "geojson"
+                      match_value: "(?P<geojson>.*)"
+                      records:
+                        Dataset:
+                          geojson: $geojson
+                    project_element:
+                      type: DictDictElement
+                      match_name: "project"
+                      records:
+                        Project:
+                          parents:
+                            - Project
+                        Dataset:
+                          Project: $Project
+                      subtree:
+                        full_name_element:
+                          type: DictTextElement
+                          match_name: "full_name"
+                          match_value: "(?P<full_name>.*)"
+                          records:
+                            Project:
+                              full_name: $full_name
+                        project_id_element:
+                          type: DictTextElement
+                          match_name: "project_id"
+                          match_value: "(?P<project_id>.*)"
+                          records:
+                            Project:
+                              project_id: $project_id
+                        project_type_element:
+                          type: DictTextElement
+                          match_name: "project_type"
+                          match_value: "(?P<project_type_name>.*)"
+                          records:
+                            project_type:
+                              name: $project_type_name
+                            Project:
+                              project_type: $project_type
+                        institute_element:
+                          type: DictTextElement
+                          match_name: "institute"
+                          match_value: "(?P<institute>.*)"
+                          records:
+                            Project:
+                              institute: $institute
+                        start_date_element:
+                          type: DictTextElement
+                          match_name: "start_date"
+                          match_value: "(?P<start_date>.*)"
+                          records:
+                            Project:
+                              start_date: $start_date
+                        end_date_element:
+                          type: DictTextElement
+                          match_name: "end_date"
+                          match_value: "(?P<end_date>.*)"
+                          records:
+                            Project:
+                              end_date: $end_date
+                        url_element:
+                          type: DictTextElement
+                          match_name: "url"
+                          match_value: "(?P<url>.*)"
+                          records:
+                            Project:
+                              url: $url
+                        coordinators_element:
+                          type: DictListElement
+                          match_name: "coordinators"
+                          subtree:
+                            coordinator_element:
+                              type: Dict
+                              records:
+                                Person:
+                                  parents:
+                                    - Person
+                                Project:
+                                  coordinators: +$Person
+                              subtree: *person_subtree
+                        campaign_element:
+                          type: DictDictElement
+                          match_name: "campaign"
+                          records:
+                            Campaign:
+                              parents:
+                                - Campaign
+                            Dataset:
+                              Campaign: $Campaign
+                          subtree:
+                            label_element:
+                              type: DictTextElement
+                              match_name: "label"
+                              match_value: "(?P<label>.*)"
+                              records:
+                                Campaign:
+                                  label: $label
+                            optional_label_element:
+                              type: DictTextElement
+                              match_name: "optional_label"
+                              match_value: "(?P<optional_label>.*)"
+                              records:
+                                Campaign:
+                                  optional_label: $optional_label
+                            start_date_element:
+                              type: DictTextElement
+                              match_name: "start_date"
+                              match_value: "(?P<start_date>.*)"
+                              records:
+                                Campaign:
+                                  start_date: $start_date
+                            end_date_element:
+                              type: DictTextElement
+                              match_name: "end_date"
+                              match_value: "(?P<end_date>.*)"
+                              records:
+                                Campaign:
+                                  end_date: $end_date
+                            responsible_scientists_element:
+                              type: DictListElement
+                              match_name: "responsible_scientists"
+                              subtree:
+                                responsible_scientist_element:
+                                  type: Dict
+                                  records:
+                                    Person:
+                                      parents:
+                                        - Person
+                                    Campaign:
+                                      responsible_scientists: +$Person
+                                  subtree: *person_subtree
+                        Methods_element:
+                          type: DictListElement
+                          match_name: "Method"
+                          subtree:
+                            Method_element:
+                              type: Dict
+                              records:
+                                Method:
+                                  parents:
+                                    - Method
+                                Dataset:
+                                  Method: +$Method
+                              subtree:
+                                method_name_element:
+                                  type: DictTextElement
+                                  match_name: "method_name"
+                                  match_value: "(?P<method_name>.*)"
+                                  records:
+                                    Method:
+                                      name: $method_name
+                                abbreviation_element:
+                                  type: DictTextElement
+                                  match_name: "abbreviation"
+                                  match_value: "(?P<abbreviation>.*)"
+                                  records:
+                                    Method:
+                                      abbreviation: $abbreviation
+                                url_element:
+                                  type: DictTextElement
+                                  match_name: "url"
+                                  match_value: "(?P<url>.*)"
+                                  records:
+                                    Method:
+                                      url: $url
+                        Taxa_element:
+                          type: DictListElement
+                          match_name: "Taxon"
+                          subtree:
+                            Taxon_element:
+                              type: Dict
+                              records:
+                                Taxon:
+                                  parents:
+                                    - Taxon
+                                Dataset:
+                                  Taxon: +$Taxon
+                              subtree:
+                                taxon_name_element:
+                                  type: DictTextElement
+                                  match_name: "taxon_name"
+                                  match_value: "(?P<taxon_name>.*)"
+                                  records:
+                                    Taxon:
+                                      name: $taxon_name
+                        archived_element:
+                          type: DictBooleanElement
+                          match_name: "archived"
+                          match_value: "(P<archived>.*)"
+                          records:
+                            Dataset:
+                              archived: $archived
+                        publication_date_element:
+                          type: DictTextElement
+                          match_name: "publication_date"
+                          match_value: "(P<publication_date>.*)"
+                          records:
+                            Dataset:
+                              publication_date: $publication_date
+                        max_files_element:
+                          type: DictIntegerElement
+                          match_name: "max_files"
+                          match_value: "(P<max_files>.*)"
+                          records:
+                            Dataset:
+                              max_files: $max_files
+                auxiliary_file: &aux_file_template
+                  type: File
+                  match: "(?P<aux_file_name>(?!metadata.json).*)"
+                  # TODO File, path and reference dataset in file record
+                child_dataset_dir:
+                  type: Directory
+                  match: (?P<child_dataset_dir_name>.*)
+                  subtree:
+                    metadata_json: *metadata_json_template
+                    auxiliary_file: *aux_file_template
+        data_processing_dir:
+          type: Directory
+          match: 04_data_processing
+          subtree: *template
+        results_dir:
+          type: Directory
+          match: 05_results
+          subtree: *template
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/README.md b/integrationtests/test_data/extroot/realworld_example/schema/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0bb95f8b844374bba72c7c6989ac57cfa5fc305
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/README.md
@@ -0,0 +1,37 @@
+# Dataset Schemas
+
+These schema's are derived from the [metadata
+schemas](https://github.com/leibniz-zmt/zmt-metadata-schema) used at the Leibniz
+Center for Tropical Marine Research (Leibniz ZMT).
+
+# Copyright
+
+BSD 3-Clause License
+
+Copyright (c) 2022 ZMT
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d12053a0007cdea1005e7673db69f46b35a063d
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml
@@ -0,0 +1,18 @@
+extern:
+- Keyword
+- Taxon
+- full_name
+- full_name_nonlatin
+- name
+
+full_name:
+  inherit_from_obligatory:
+  - name
+
+full_name_nonlatin:
+  inherit_from_obligatory:
+  - name
+
+Taxon:
+  inherit_from_obligatory:
+  - Keyword
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json
new file mode 100644
index 0000000000000000000000000000000000000000..83d6a60d857349772c960af637671cb21c8abd5d
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json
@@ -0,0 +1,365 @@
+{
+  "title": "Dataset",
+  "description": "",
+  "type": "object",
+  "properties": {
+    "title": {
+      "type": "string",
+      "description": "full dataset title"
+    },
+    "authors": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "title": "Person",
+        "properties": {
+          "full_name": {
+            "type": "string",
+            "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+          },
+          "full_name_nonlatin": {
+            "type": "string",
+            "description": "Full name (non-latin alphabet)"
+          },
+          "family_name": {
+            "type": "string",
+            "description": "Family name (latin transcription)"
+          },
+          "given_name": {
+            "type": "string",
+            "description": "Given/other names (latin transcription)"
+          },
+          "affiliation": {
+            "type": "string"
+          },
+          "ORCID": {
+            "type": "string",
+            "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+            "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+          },
+          "email": {
+            "type": "string",
+            "format": "email"
+          }
+        },
+        "required": [
+          "full_name",
+          "email"
+        ]
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "abstract": {
+      "type": "string",
+      "minLength": 80,
+      "maxLength": 1000,
+      "description": "Abstract with at least 80 characters"
+    },
+    "comment": {
+      "type": "string"
+    },
+    "license": {
+      "type": "string",
+      "enum": [
+        "CC-BY",
+        "CC-BY-SA",
+        "CC0",
+        "restricted access"
+      ]
+    },
+    "dataset_doi": {
+      "type": "string",
+      "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)",
+      "description": "Dataset DOI, e.g. 10.1594/PANGAEA.938740"
+    },
+    "related_to_dois": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)"
+      },
+      "description": "DOIs of related publications and/or datasets, e.g. 10.1000/182"
+    },
+    "Keyword": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "Event": {
+      "type": "array",
+      "description": "https://wiki.pangaea.de/wiki/Event",
+      "items": {
+        "type": "object",
+        "properties": {
+          "label": {
+            "type": "string"
+          },
+          "comment": {
+            "type": "string"
+          },
+          "start_datetime": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "end_datetime": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "longitude": {
+            "type": "number",
+            "minimum": -180,
+            "maximum": 180,
+            "description": "longitude (W/E) in decimal degree (-180 to 180)"
+          },
+          "latitude": {
+            "type": "number",
+            "minimum": -90,
+            "maximum": 90,
+            "description": "latitude (N/S) in decimal degree (-90 to 90)"
+          },
+          "elevation": {
+            "type": "number",
+            "minimum": -10000,
+            "maximum": 20000,
+            "description": "elevation in m"
+          },
+          "location": {
+            "type": "string",
+            "description": "geographical location as text (e.g., North Sea; Espoo, Finland)"
+          },
+          "igsn": {
+            "type": "string",
+            "description": "International Geo Sample Number (http://www.geosamples.org/aboutigsn)"
+          }
+        },
+        "required": [
+          "longitude",
+          "latitude",
+          "start_datetime"
+        ]
+      }
+    },
+    "events_in_data": {
+      "type": "boolean",
+      "description": "Does the data contain additional information about timepoints and locations?"
+    },
+    "geojson": {
+      "type": "string",
+      "pattern": "",
+      "description": "GeoJSON for complex geographic structures"
+    },
+    "project": {
+      "title": "Project",
+      "description": "https://wiki.pangaea.de/wiki/Project",
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string", 
+          "description": "short name of project"
+        },
+        "full_name": {
+          "type": "string",
+          "description": "Full name (latin transcription, all UTF-8 characters allowed)"
+        },
+        "project_id": {
+          "type": "string",
+          "description": "Project ID"
+        },
+        "project_type": {
+          "type": "string",
+          "enum": [
+            "DFG",
+            "EU",
+            "BMBF",
+            "national",
+            "international"
+          ]
+        },
+        "institute": {
+          "type": "string",
+          "description": "place of coordination or project office",
+          "default": "Centre for Research"
+        },
+        "start_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "end_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "url": {
+          "type": "string",
+          "format": "uri"
+        },
+        "coordinators": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "title": "Person",
+            "properties": {
+              "full_name": {
+                "type": "string",
+                "description": "Full name (latin transcription, all UTF-8 characters allowed)"
+              },
+              "full_name_nonlatin": {
+                "type": "string",
+                "description": "Full name (non-latin alphabet)"
+              },
+              "family_name": {
+                "type": "string",
+                "description": "Family name (latin transcription)"
+              },
+              "given_name": {
+                "type": "string",
+                "description": "Given/other names (latin transcription)"
+              },
+              "affiliation": {
+                "type": "string"
+              },
+              "ORCID": {
+                "type": "string",
+                "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+                "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+              },
+              "email": {
+                "type": "string",
+                "format": "email"
+              }
+            },
+            "required": [
+              "full_name",
+              "email"
+            ]
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        }
+      },
+      "required": ["name", "full_name"]
+    },
+    "campaign": {
+      "title": "Campaign",
+      "description": "https://wiki.pangaea.de/wiki/Campaign, synonyms: cruise, expedition, leg, ",
+      "type": "object",
+      "properties": {
+        "label": {
+          "type": "string",
+          "description": "is unique and does not contain blanks; uses abbreviations instead of full names"
+        },
+        "optional_label": {
+          "type": "string"
+        },
+        "start_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "end_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "responsible_scientists": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "title": "Person",
+            "properties": {
+              "full_name": {
+                "type": "string",
+                "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+              },
+              "full_name_nonlatin": {
+                "type": "string",
+                "description": "Full name (non-latin alphabet)"
+              },
+              "family_name": {
+                "type": "string",
+                "description": "Family name (latin transcription)"
+              },
+              "given_name": {
+                "type": "string",
+                "description": "Given/other names (latin transcription)"
+              },
+              "affiliation": {
+                "type": "string"
+              },
+              "ORCID": {
+                "type": "string",
+                "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+                "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+              },
+              "email": {
+                "type": "string",
+                "format": "email"
+              }
+            },
+            "required": [
+              "full_name",
+              "email"
+            ]
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        }
+      }
+    },
+    "Method": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "description": "https://wiki.pangaea.de/wiki/Method",
+        "properties": {
+          "method_name": {
+            "type": "string",
+            "description": "full official name of tool/instrument/device/gear"
+          },
+          "abbreviation": {
+            "type": "string",
+            "description": "may be used for import in an event list to avoid misspellings"
+          },
+          "url": {
+            "type": "string",
+            "description": "should contain a web address, where an official description of the device can be found"
+          }
+        }
+      }
+    },
+    "Taxon": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "archived": {
+      "type": "boolean",
+      "description": "Has the dataset been archived?",
+      "default": false
+    },
+    "publication_date": {
+      "type": "string",
+      "format": "date"
+    },
+    "max_files": {
+      "type": "integer",
+      "description": "Maximum number of files to included by the CaosDB crawler",
+      "default": 100
+    }
+  },
+  "required": [
+    "title",
+    "authors",
+    "abstract"
+  ]
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
new file mode 100644
index 0000000000000000000000000000000000000000..01653bfa821e0a0acbb5a481bfd458e2ed784fb9
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
@@ -0,0 +1,45 @@
+{
+  "title": "Dataspace",
+  "description": "A Dataspace is a folder in the DataCloud with a pre-defined structure",
+  "type": "object",
+  "properties": {
+    "dataspace_id": {
+      "type": "integer",
+      "description": "Integer ID of Dataspace (matches LDAP GID)",
+      "minimum": 20000
+    },
+    "archived": { "type": "boolean" },
+    "url": {
+      "type": "string",
+      "description": "link to folder on file system (CaosDB or cloud folder)"
+    },
+    "coordinator": {
+      "type": "object",
+      "title": "Person",
+      "properties": {
+        "full_name": {
+          "type": "string",
+          "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+        },
+        "full_name_nonlatin": {
+          "type": "string",
+          "description": "Full name (non-latin alphabet)"
+        },
+        "family_name": {
+          "type": "string",
+          "description": "Family name (latin transcription)"
+        },
+        "given_name": {
+          "type": "string",
+          "description": "Given/other names (latin transcription)"
+        },
+        "email": { "type": "string", "format": "email" }
+      },
+      "required": ["full_name", "email"]
+    },
+    "start_date": { "type": "string", "format": "date" },
+    "end_date": { "type": "string", "format": "date" },
+    "comment": { "type": "string" }
+  },
+  "required": ["dataspace_id", "url", "coordinator"]
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7e251eeced7bf626e77364fc5555b1cb10dd3afb
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml
@@ -0,0 +1,26 @@
+extern:
+- name
+- url
+- Dataset
+
+german_name:
+  datatype: TEXT
+  inherit_from_obligatory:
+  - name
+
+Department:
+  recommended_properties:
+    url:
+    german_name:
+
+
+WorkingGroup:
+  recommended_properties:
+    Department:
+    german_name:
+    url:
+
+Dataset:
+  recommended_properties:
+    WorkingGroup:
+    
diff --git a/setup.cfg b/setup.cfg
index b89b07543d91dd35e2238aaddd363e85dd45f2d2..2f8d46b30ee04d68adc6aef69e1a04115bbc44d8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-name = newcrawler
+name = caoscrawler
 version = 0.1
 author = Alexander Schlemmer
 author_email = alexander.schlemmer@ds.mpg.de
@@ -16,7 +16,6 @@ classifiers =
 [options]
 package_dir =
             = src
-
 packages = find:
 python_requires = >=3.6
 install_requires =
@@ -24,6 +23,7 @@ install_requires =
 	caosdb
 	caosadvancedtools
     yaml-header-tools
+    pyyaml
 
 [options.packages.find]
 where = src
@@ -35,4 +35,4 @@ per-file-ignores = __init__.py:F401
 
 [options.entry_points]
 console_scripts =
-  crawler2.0 = newcrawler.crawl:main
+  caosdb-crawler = caoscrawler.crawl:main
diff --git a/src/newcrawler/__init__.py b/src/caoscrawler/__init__.py
similarity index 100%
rename from src/newcrawler/__init__.py
rename to src/caoscrawler/__init__.py
diff --git a/src/newcrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml
similarity index 100%
rename from src/newcrawler/cfood-schema.yml
rename to src/caoscrawler/cfood-schema.yml
diff --git a/src/newcrawler/converters.py b/src/caoscrawler/converters.py
similarity index 96%
rename from src/newcrawler/converters.py
rename to src/caoscrawler/converters.py
index ebc3ab19ceb0f8c18cba5cb1bc3f86d5e31bfb84..b8b9bd2ce7bff206d1233953f05c795a45a5b4ca 100644
--- a/src/newcrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -37,6 +37,7 @@ from .structure_elements import (StructureElement, Directory, File, Dict, JSONFi
                                  TextElement, DictTextElement, DictElement, DictListElement)
 from typing import Optional, Union
 from abc import abstractmethod
+from string import Template
 import yaml_header_tools
 
 import yaml
@@ -63,6 +64,7 @@ def handle_value(value: Union[dict, str], values: GeneralStore):
     - the final value of the property
     - the collection mode (can be single, list or multiproperty)
     """
+    # @review Florian Spreckelsen 2022-05-13
 
     if type(value) == dict:
         if "value" not in value:
@@ -90,12 +92,20 @@ def handle_value(value: Union[dict, str], values: GeneralStore):
         propvalue = value
         return (propvalue, collection_mode)
 
-    if propvalue.startswith("$"):
-        propvalue = values[propvalue[1:]]
-    # Allow the insertion of $ signs at the beginning
-    if type(propvalue) == str and propvalue.startswith("$$"):
-        propvalue = propvalue[1:]
-
+    # Check if the replacement is a single variable containing a record:
+    match = re.match(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$", propvalue)
+    if match is not None:
+        varname = match.group("varname")
+        if varname in values:
+            if values[varname] is None:
+                propvalue = None
+                return (propvalue, collection_mode)
+            if isinstance(values[varname], db.Entity):
+                propvalue = values[varname]
+                return (propvalue, collection_mode)
+
+    propvalue_template = Template(propvalue)
+    propvalue = propvalue_template.safe_substitute(**values.get_storage())
     return (propvalue, collection_mode)
 
 
diff --git a/src/newcrawler/crawl.py b/src/caoscrawler/crawl.py
similarity index 96%
rename from src/newcrawler/crawl.py
rename to src/caoscrawler/crawl.py
index 605f1463d9853a100443ea8ed698e4169266fa13..b0f576a2c73342cc1301ff0f27b74bb519768541 100644
--- a/src/newcrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -122,6 +122,23 @@ def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False):
     return True
 
 
+def _resolve_datatype(prop: db.Property, remote_entity: db.Entity):
+
+    if remote_entity.role == "Property":
+        datatype = remote_entity.datatype
+    elif remote_entity.role == "RecordType":
+        datatype = remote_entity.name
+    else:
+        raise RuntimeError("Cannot set datatype.")
+
+    # Treat lists separately
+    if isinstance(prop.value, list) and not datatype.startswith("LIST"):
+        datatype = db.LIST(datatype)
+
+    prop.datatype = datatype
+    return prop
+
+
 class Crawler(object):
     """
     Crawler class that encapsulates crawling functions.
@@ -188,7 +205,7 @@ class Crawler(object):
         #       tested in the next lines of code:
 
         # Load the cfood schema:
-        with open(files('newcrawler').joinpath('cfood-schema.yml'), "r") as f:
+        with open(files('caoscrawler').joinpath('cfood-schema.yml'), "r") as f:
             schema = yaml.safe_load(f)
 
         # Add custom converters to converter enum in schema:
@@ -243,43 +260,43 @@ class Crawler(object):
         converter_registry: dict[str, dict[str, str]] = {
             "Directory": {
                 "converter": "DirectoryConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "SimpleFile": {
                 "converter": "SimpleFileConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "MarkdownFile": {
                 "converter": "MarkdownFileConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "File": {
                 "converter": "FileConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "JSONFile": {
                 "converter": "JSONFileConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "Dict": {
                 "converter": "DictConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictBooleanElement": {
                 "converter": "DictBooleanElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictFloatElement": {
                 "converter": "DictFloatElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictTextElement": {
                 "converter": "DictTextElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictIntegerElement": {
                 "converter": "DictIntegerElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictListElement": {
                 "converter": "DictListElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "DictDictElement": {
                 "converter": "DictDictElementConverter",
-                "package": "newcrawler.converters"},
+                "package": "caoscrawler.converters"},
             "TextElement": {
                 "converter": "TextElementConverter",
-                "package": "newcrawler.converters"}
+                "package": "caoscrawler.converters"}
         }
 
         # More converters from definition file:
@@ -465,8 +482,8 @@ class Crawler(object):
         """
         for p in record.properties:
             if (isinstance(p.value, list)):
+                lst = []
                 for el in p.value:
-                    lst = []
                     if (isinstance(el, db.Entity) and el.id is None):
                         cached = self.get_identified_record_from_local_cache(
                             el)
@@ -481,7 +498,7 @@ class Crawler(object):
                         lst.append(cached)
                     else:
                         lst.append(el)
-                    p.value = lst
+                p.value = lst
             if (isinstance(p.value, db.Entity) and p.value.id is None):
                 cached = self.get_identified_record_from_local_cache(p.value)
                 if cached is None:
@@ -703,6 +720,10 @@ class Crawler(object):
 
     @staticmethod
     def execute_inserts_in_list(to_be_inserted):
+        for record in to_be_inserted:
+            for prop in record.properties:
+                entity = db.Entity(name=prop.name).retrieve()
+                prop = _resolve_datatype(prop, entity)
         print("INSERT")
         print(to_be_inserted)
         if len(to_be_inserted) > 0:
@@ -719,12 +740,7 @@ class Crawler(object):
                 if prop.id is None:
                     entity = db.Entity(name=prop.name).retrieve()
                     prop.id = entity.id
-                    if entity.role == "Property":
-                        prop.datatype = entity.datatype
-                    elif entity.role == "RecordType":
-                        prop.datatype = entity.name
-                    else:
-                        raise RuntimeError("Cannot set datatype.")
+                    prop = _resolve_datatype(prop, entity)
         print("UPDATE")
         print(to_be_updated)
         if len(to_be_updated) > 0:
@@ -753,6 +769,7 @@ class Crawler(object):
             updateList)
 
         # remove unnecessary updates from list
+        # TODO: refactoring of typo
         for el in to_be_updated:
             self.replace_entities_by_ids(el)
 
@@ -907,19 +924,11 @@ def crawler_main(args_path,
         crawler.save_debug_data(args_provenance)
 
     if args_load_identifiables is not None:
-        with open(args_load_identifiables, "r") as f:
-            identifiable_data = yaml.safe_load(f)
 
         ident = CaosDBIdentifiableAdapter()
+        ident.load_from_yaml_definition(args_load_identifiables)
         crawler.identifiableAdapter = ident
 
-        for k, v in identifiable_data.items():
-            rt = db.RecordType()
-            rt.add_parent(k)
-            for pn in v:
-                rt.add_property(name=pn)
-            ident.register_identifiable(k, rt)
-
     if args_dry_sync:
         ins, upd = crawler.synchronize(commit_changes=False)
         inserts = [str(i) for i in ins]
@@ -996,6 +1005,7 @@ def parse_args():
 
     return parser.parse_args()
 
+
 def main():
     args = parse_args()
     return crawler_main(
@@ -1009,5 +1019,6 @@ def main():
         args.prefix
     )
 
+
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/src/newcrawler/extension-converters-config-schema.yml b/src/caoscrawler/extension-converters-config-schema.yml
similarity index 100%
rename from src/newcrawler/extension-converters-config-schema.yml
rename to src/caoscrawler/extension-converters-config-schema.yml
diff --git a/src/newcrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
similarity index 97%
rename from src/newcrawler/identifiable_adapters.py
rename to src/caoscrawler/identifiable_adapters.py
index c1125ee1bdaba71ed4fa339fa74b379604293c98..47fd5324a4803c67d7c9f99448378e7b5f9241bd 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -23,6 +23,8 @@
 # ** end header
 #
 
+import yaml
+
 from datetime import datetime
 import caosdb as db
 from abc import abstractmethod, ABCMeta
@@ -412,6 +414,17 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
     def __init__(self):
         self._registered_identifiables = dict()
 
+    def load_from_yaml_definition(self, path: str):
+        """Load identifiables defined in a yaml file"""
+        with open(path, 'r') as yaml_f:
+            identifiable_data = yaml.safe_load(yaml_f)
+
+        for key, value in identifiable_data.items():
+            rt = db.RecordType().add_parent(key)
+            for prop_name in value:
+                rt.add_property(name=prop_name)
+            self.register_identifiable(key, rt)
+
     def register_identifiable(self, name: str, definition: db.RecordType):
         self._registered_identifiables[name] = definition
 
diff --git a/src/newcrawler/identified_cache.py b/src/caoscrawler/identified_cache.py
similarity index 99%
rename from src/newcrawler/identified_cache.py
rename to src/caoscrawler/identified_cache.py
index cba00dd2bfff8a0f886878f532133bb18b1a20de..0b9d7a47bdecc4094edb1296f4c04dfa083a2436 100644
--- a/src/newcrawler/identified_cache.py
+++ b/src/caoscrawler/identified_cache.py
@@ -66,7 +66,6 @@ def _create_hashable_string(identifiable: db.Record):
                 else:
                     tmplist.append(val)
             value = str(tmplist)
-        
 
         rec_string += "{}:".format(pname) + value
     return rec_string
diff --git a/src/newcrawler/stores.py b/src/caoscrawler/stores.py
similarity index 100%
rename from src/newcrawler/stores.py
rename to src/caoscrawler/stores.py
diff --git a/src/newcrawler/structure_elements.py b/src/caoscrawler/structure_elements.py
similarity index 100%
rename from src/newcrawler/structure_elements.py
rename to src/caoscrawler/structure_elements.py
diff --git a/src/newcrawler/utils.py b/src/caoscrawler/utils.py
similarity index 100%
rename from src/newcrawler/utils.py
rename to src/caoscrawler/utils.py
diff --git a/src/doc/Makefile b/src/doc/Makefile
index dc1690a8f7f74815b25a51e519e4712c7c92b7ec..bea7f860173d930527c84fae43cb7d5bdf6cae97 100644
--- a/src/doc/Makefile
+++ b/src/doc/Makefile
@@ -29,7 +29,7 @@
 SPHINXOPTS    ?= -a
 SPHINXBUILD   ?= sphinx-build
 SPHINXAPIDOC   ?= sphinx-apidoc
-PY_BASEDIR    = ../newcrawler
+PY_BASEDIR    = ../caoscrawler
 SOURCEDIR     = .
 BUILDDIR      = ../../build/doc
 
diff --git a/src/doc/conf.py b/src/doc/conf.py
index 75731285a77f8a30fcb4bfc6be0483c4bba0052a..fb37cdd96c440300741aeb49e90caffe4370f5d7 100644
--- a/src/doc/conf.py
+++ b/src/doc/conf.py
@@ -3,7 +3,7 @@
 # Configuration file for the Sphinx documentation builder.
 #
 # Based on the configuration for caosdb-pylib.
-# 
+#
 # # Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
 #
 # This file only contains a selection of the most common options. For a full
@@ -28,7 +28,7 @@ import sphinx_rtd_theme  # noqa: E402
 
 # -- Project information -----------------------------------------------------
 
-project = 'caosdb-newcrawler'
+project = 'caosdb-caoscrawler'
 copyright = '2021, MPIDS'
 author = 'Alexander Schlemmer'
 
@@ -115,7 +115,7 @@ html_static_path = ['_static']
 # -- Options for HTMLHelp output ---------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'caosdb-newcrawlerdoc'
+htmlhelp_basename = 'caosdb-caoscrawlerdoc'
 
 
 # -- Options for LaTeX output ------------------------------------------------
@@ -142,7 +142,7 @@ latex_elements = {
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'caosdb-newcrawler.tex', 'caosdb-newcrawler Documentation',
+    (master_doc, 'caosdb-caoscrawler.tex', 'caosdb-caoscrawler Documentation',
      'MPIDS', 'manual'),
 ]
 
@@ -152,7 +152,7 @@ latex_documents = [
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
+    (master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation',
      [author], 1)
 ]
 
@@ -163,8 +163,8 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
-     author, 'caosdb-newcrawler', 'One line description of project.',
+    (master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation',
+     author, 'caosdb-caoscrawler', 'One line description of project.',
      'Miscellaneous'),
 ]
 
diff --git a/tox.ini b/tox.ini
index 8b5ad34fb1583790de5365f2bfa4ff7b3704574c..2cf966fb5b80e62cb7f216b0785ba567e13ee3ff 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist=py36, py37, py38, py39
+envlist=py36, py37, py38, py39, py310
 skip_missing_interpreters = true
 
 [testenv]
diff --git a/unittests/records.xml b/unittests/records.xml
index 0ae34124a8875a723d7f0879687d8f0bdec51de0..f7455ec6b8995db8cd205f69729c32358beee8c0 100644
--- a/unittests/records.xml
+++ b/unittests/records.xml
@@ -78,6 +78,7 @@
     <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property>
     <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property>
     <Property id="249" name="responsible" datatype="LIST&lt;Person&gt;" importance="FIX" flag="inheritance:FIX">
+      <Value>289</Value>
       <Value>288</Value>
     </Property>
   </Record>
diff --git a/unittests/test_cache.py b/unittests/test_cache.py
index 7061b63c1f07a9ea2989509710b5f4043e73898d..135316b92fda0ac1e43f4e5f2c4f28fbf1272494 100644
--- a/unittests/test_cache.py
+++ b/unittests/test_cache.py
@@ -5,7 +5,7 @@
 import caosdb as db
 from pytest import raises
 
-from newcrawler.identified_cache import _create_hashable_string as create_hash_string
+from caoscrawler.identified_cache import _create_hashable_string as create_hash_string
 
 
 def test_normal_hash_creation():
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 100b10062916fb992d2bb19241d1cf8ea543e44c..5f56486ba0f63fdd64d4e4dd80e6d6eaeed705d1 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -27,16 +27,16 @@
 test the converters module
 """
 
-from newcrawler.converters import Converter
-from newcrawler.stores import GeneralStore
-from newcrawler.converters import (ConverterValidationError,
-                                   MarkdownFileConverter, JSONFileConverter,
-                                   DictConverter)
-from newcrawler.structure_elements import Directory
-from newcrawler.structure_elements import (File, DictTextElement,
-                                           DictListElement, DictElement,
-                                           DictBooleanElement, DictDictElement,
-                                           DictIntegerElement, DictFloatElement)
+from caoscrawler.converters import Converter
+from caoscrawler.stores import GeneralStore
+from caoscrawler.converters import (ConverterValidationError,
+                                    MarkdownFileConverter, JSONFileConverter,
+                                    DictConverter)
+from caoscrawler.structure_elements import Directory
+from caoscrawler.structure_elements import (File, DictTextElement,
+                                            DictListElement, DictElement,
+                                            DictBooleanElement, DictDictElement,
+                                            DictIntegerElement, DictFloatElement)
 
 from test_tool import rfp
 
@@ -50,25 +50,25 @@ def converter_registry():
     converter_registry: dict[str, dict[str, str]] = {
         "Directory": {
             "converter": "DirectoryConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "MarkdownFile": {
             "converter": "MarkdownFileConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "Dict": {
             "converter": "DictConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "DictTextElement": {
             "converter": "DictTextElementConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "DictListElement": {
             "converter": "DictListElementConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "TextElement": {
             "converter": "TextElementConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
         "JSONFile": {
             "converter": "JSONFileConverter",
-            "package": "newcrawler.converters"},
+            "package": "caoscrawler.converters"},
     }
 
     for key, value in converter_registry.items():
@@ -118,9 +118,13 @@ def testDirectoryConverter(converter_registry):
 
 
 def test_markdown_converter(converter_registry):
-    test_readme = File("README.md", rfp(
-        "test_directories", "examples_article", "DataAnalysis",
-        "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"))
+    test_readme = File(
+        "README.md",
+        rfp(
+            "test_directories", "examples_article", "DataAnalysis",
+            "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"
+        )
+    )
 
     converter = MarkdownFileConverter({
         "match": "(.*)"
@@ -155,8 +159,11 @@ def test_markdown_converter(converter_registry):
     assert children[0].name == "responsible"
     assert children[0].value.__class__ == str
 
-    test_readme2 = File("README.md", rfp("test_directories", "examples_article",
-                        "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))
+    test_readme2 = File(
+        "README.md",
+        rfp("test_directories", "examples_article",
+            "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")
+    )
 
     m = converter.match(test_readme2)
     assert m is not None
@@ -177,7 +184,8 @@ def test_json_converter(converter_registry):
     test_json = File("testjson.json", rfp(
         "test_directories", "examples_json", "testjson.json"))
 
-    schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_directories", "examples_json", "testjson.schema.json")
+    schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                               "test_directories", "examples_json", "testjson.schema.json")
     jsonconverter = JSONFileConverter(
         definition={"match": "(.*)", "validate": schema_path},
         name="TestJSONFileConverter",
@@ -203,9 +211,10 @@ def test_json_converter(converter_registry):
     assert children[2].name == "archived"
     assert children[2].value.__class__ == bool
 
-    assert children[3].__class__ == DictDictElement
-    assert children[3].name == "coordinator"
-    assert children[3].value.__class__ == dict
+    assert children[3].__class__ == DictListElement
+    assert children[3].name == "Person"
+    assert children[3].value.__class__ == list
+    assert len(children[3].value) == 2
 
     assert children[4].__class__ == DictTextElement
     assert children[4].name == "start_date"
@@ -224,10 +233,12 @@ def test_json_converter(converter_registry):
     assert children[7].name == "url"
     assert children[7].value.__class__ == str
 
-    broken_json = File("brokenjson.json", rfp(
-        "test_directories", "examples_json", "brokenjson.json"))
+    broken_json = File(
+        "brokenjson.json",
+        rfp("test_directories", "examples_json", "brokenjson.json")
+    )
     m = jsonconverter.match(broken_json)
-    
+
     # Doesn't validate because of missing required 'name' property
     with pytest.raises(ConverterValidationError) as err:
         children = jsonconverter.create_children(None, broken_json)
diff --git a/unittests/test_directories/example_substitutions/ExperimentalData/220512_data.dat b/unittests/test_directories/example_substitutions/ExperimentalData/220512_data.dat
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/unittests/test_directories/example_substitutions/substitutions.yml b/unittests/test_directories/example_substitutions/substitutions.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1b4e8784a69d1ad1b80fa757ad77cd137c8cc7b5
--- /dev/null
+++ b/unittests/test_directories/example_substitutions/substitutions.yml
@@ -0,0 +1,22 @@
+
+ExperimentalData:  # name of the converter
+  type: Directory
+  match: ExperimentalData
+  records:
+    Project:
+      name: project
+  subtree:
+    File:  # name of the converter
+      type: SimpleFile
+      match: (?P<year>[0-9]{2,2})(?P<month>[0-9]{2,2})(?P<day>[0-9]{2,2})_data.dat
+      records:
+        Experiment:
+          date: 20$year-$month-$day
+
+        ExperimentSeries:
+          Experiment: $Experiment
+
+        Project:
+          Experiments: +$Experiment
+          dates: +20$year-$month-$day
+      
diff --git a/unittests/test_directories/examples_json/jsontest_cfood.yml b/unittests/test_directories/examples_json/jsontest_cfood.yml
index bcf79a2d5183ebb496f8e180e9c264bb3ac05e48..f1eb6a9fa186c07f551bd12a84050f544abfdabc 100644
--- a/unittests/test_directories/examples_json/jsontest_cfood.yml
+++ b/unittests/test_directories/examples_json/jsontest_cfood.yml
@@ -3,13 +3,56 @@ JSONTest:  # name of the converter
   type: JSONFile
   match: '(.*)'
   validate: ./testjson.schema.json
-  subtree: 
-    element:  # name of the first subtree element which is a converter
+  records:
+    Project:  # this is an identifiable in this case
+      parents:
+        - Project  # not needed as the name is equivalent
+  subtree:
+    name_element:
+      type: DictTextElement
+      match_name: "name"
+      match_value: "(?P<name>.*)"
+      records:
+        Project:
+          name: $name
+    url_element:  # name of the first subtree element which is a converter
       type: DictTextElement
       match_value: "(?P<url>.*)"
       match_name: "url"
       records:
-        Project:  # this is an identifiable in this case
-          parents:
-            - Project  # not needed as the name is equivalent
-        url: $url
+        Project:
+          url: $url
+    persons_element:
+      type: DictListElement
+      match_name: "Person"
+      subtree:
+        person_element:
+          type: Dict
+          records:
+            Person:
+              parents:
+                - Person
+            Project:
+              Person: +$Person
+          subtree:
+            firstname_element:
+              type: DictTextElement
+              match_name: "firstname"
+              match_value: "(?P<firstname>.*)"
+              records:
+                Person:
+                  firstname: $firstname
+            lastname_element:
+              type: DictTextElement
+              match_name: "lastname"
+              match_value: "(?P<lastname>.*)"
+              records:
+                Person:
+                  lastname: $lastname
+            email_element:
+              type: DictTextElement
+              match_name: "email"
+              match_value: "(?P<email>.*)"
+              records:
+                Person:
+                  email: $email
diff --git a/unittests/test_directories/examples_json/testjson.json b/unittests/test_directories/examples_json/testjson.json
index cd26c9c3295d6a2a8a6110f0876fffb62f60419e..b893b608a6a2119c5c3252cd9cff4c4100f404da 100644
--- a/unittests/test_directories/examples_json/testjson.json
+++ b/unittests/test_directories/examples_json/testjson.json
@@ -2,11 +2,18 @@
 	"name": "DEMO",
 	"projectId": 10002,
 	"archived": false,
-	"coordinator": {
-		"firstname": "Miri",
-		"lastname": "Mueller",
-		"email": "miri.mueller@science.de"
-	},
+	"Person": [
+        {
+		    "firstname": "Miri",
+		    "lastname": "Mueller",
+		    "email": "miri.mueller@science.de"
+	    },
+        {
+            "firstname": "Mara",
+            "lastname": "Mueller",
+		    "email": "mara.mueller@science.de"
+        }
+    ],
 	"start_date": "2022-03-01",
 	"candidates": ["Mouse", "Penguine"],
 	"rvalue": 0.4444,
diff --git a/unittests/test_directories/examples_json/testjson.schema.json b/unittests/test_directories/examples_json/testjson.schema.json
index a684e9b663d8cba1ba1931aae5615040b2797240..fc784a61079e4737f1a0176fe4240133f5d1b5d0 100644
--- a/unittests/test_directories/examples_json/testjson.schema.json
+++ b/unittests/test_directories/examples_json/testjson.schema.json
@@ -11,25 +11,28 @@
     "archived": {
       "type": "boolean"
     },
-    "coordinator": {
-      "type": "object",
-      "properties": {
-        "firstname": {
-          "type": "string"
-        },
-        "lastname": {
-          "type": "string"
-        },
-        "email": {
-          "type": "string"
+    "Person": {
+        "type": "array",
+        "items": {
+            "type": "object",
+            "properties": {
+                "firstname": {
+                    "type": "string"
+                },
+                "lastname": {
+                    "type": "string"
+                },
+                "email": {
+                    "type": "string"
+                }
+            },
+            "required": [
+                "firstname",
+                "lastname",
+                "email"
+            ],
+            "additionalProperties": true
         }
-      },
-      "required": [
-        "firstname",
-        "lastname",
-        "email"
-      ],
-      "additionalProperties": true
     },
     "start_date": {
       "type": "string",
@@ -51,7 +54,7 @@
   "required": [
     "name",
     "projectId",
-    "coordinator"
+    "Person"
   ],
   "additionalProperties": false
 }
diff --git a/unittests/test_directories/single_file_test_data/identifiables.yml b/unittests/test_directories/single_file_test_data/identifiables.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e32746d5a6984096cc46fa618250832b325965b0
--- /dev/null
+++ b/unittests/test_directories/single_file_test_data/identifiables.yml
@@ -0,0 +1,7 @@
+Person:
+  - full_name
+Keyword:
+  - name
+Project:
+  - project_id
+  - title
diff --git a/unittests/test_entity_comparison.py b/unittests/test_entity_comparison.py
index 1e22280d893ae7bba301baa61213b5e49eaaba6c..549bc4f42a59765d25446d44fbb845e49ca4d9b9 100644
--- a/unittests/test_entity_comparison.py
+++ b/unittests/test_entity_comparison.py
@@ -7,19 +7,19 @@ import caosdb as db
 import pytest
 from pytest import raises
 
-from newcrawler.crawl import check_identical
+from caoscrawler.crawl import check_identical
 
 
 def test_compare_entities():
     record1 = db.Record()
     record2 = db.Record()
-    
+
     assert check_identical(record1, record2)
 
     record1.add_property(name="type", value="int")
     assert not check_identical(record1, record2)
     assert not check_identical(record2, record1)
-    
+
     record2.add_property(name="type", value="int")
     assert check_identical(record1, record2)
     record2.get_property("type").value = "int2"
@@ -36,8 +36,10 @@ def test_compare_entities():
     # This is confusing, but needed:
     record1.add_property(name="field_with_type", value=42, datatype=db.INTEGER)
     record2.add_property(name="field_with_type", value=42)
-    assert not check_identical(record1, record2)  # not identical, because record1 sets the datatype
-    assert check_identical(record2, record1)  # identical, because record2 sets the datatype
+    # not identical, because record1 sets the datatype
+    assert not check_identical(record1, record2)
+    # identical, because record2 sets the datatype
+    assert check_identical(record2, record1)
     record2.get_property("field_with_type").datatype = db.INTEGER
     assert check_identical(record1, record2)
     assert check_identical(record2, record1)
@@ -79,8 +81,10 @@ def test_compare_entities():
     for attribute, values in zip(("_checksum", "_size"),
                                  (vals[0], vals[1])):
         setattr(record1, attribute, values[0])
-        assert not check_identical(record1, record2)  # not identical, because record1 sets the datatype
-        assert check_identical(record2, record1)  # identical, because record2 sets the datatype
+        # not identical, because record1 sets the datatype
+        assert not check_identical(record1, record2)
+        # identical, because record2 sets the datatype
+        assert check_identical(record2, record1)
 
         setattr(record2, attribute, values[1])
         assert not check_identical(record1, record2)
@@ -89,5 +93,3 @@ def test_compare_entities():
         setattr(record2, attribute, values[0])
         assert check_identical(record1, record2)
         assert check_identical(record2, record1)
-
-    
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
index 234fae20c53e137bf049e496dbe178a30e5de833..b0b9801993dc68fe473e788b8ca79a2244912676 100644
--- a/unittests/test_file_identifiables.py
+++ b/unittests/test_file_identifiables.py
@@ -7,7 +7,7 @@ import caosdb as db
 import pytest
 from pytest import raises
 
-from newcrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
+from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
 
 
 def test_file_identifiable():
@@ -41,7 +41,8 @@ def test_file_identifiable():
     assert file_obj.checksum != identifiable.checksum
 
     # This is the wrong method, so it should definitely return None:
-    identified_file = ident.retrieve_identified_record_for_identifiable(identifiable)
+    identified_file = ident.retrieve_identified_record_for_identifiable(
+        identifiable)
     assert identified_file is None
     # This is the correct method to use:
     identified_file = ident.get_file(identifiable)
@@ -71,5 +72,3 @@ def test_file_identifiable():
     with raises(RuntimeError, match=".*unambigiously.*"):
         records.append(test_record_alsocorrect_path)
         identified_file = ident.get_file(file_obj)
-    
-    
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index 4a3ae786438e99ded8925d4405d9b051cd86bf66..ef7998a460c07342d30a3f769fd609c1045a9cca 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -27,8 +27,10 @@
 test identifiable_adapters module
 """
 
+import os
 from datetime import datetime
-from newcrawler.identifiable_adapters import IdentifiableAdapter
+from caoscrawler.identifiable_adapters import (
+    CaosDBIdentifiableAdapter, IdentifiableAdapter)
 import caosdb as db
 
 
@@ -57,3 +59,27 @@ def test_create_query_for_identifiable():
     query = IdentifiableAdapter.create_query_for_identifiable(
         db.Record(name="TestRecord").add_parent("TestType"))
     assert query.lower() == "find record testtype with name='testrecord'"
+
+
+def test_load_from_yaml_file():
+    ident = CaosDBIdentifiableAdapter()
+    ident.load_from_yaml_definition(
+        os.path.join(os.path.dirname(__file__), "test_directories",
+                     "single_file_test_data", "identifiables.yml")
+    )
+
+    person_i = ident.get_registered_identifiable(
+        db.Record().add_parent("Person"))
+    assert person_i is not None
+    assert person_i.get_property("full_name") is not None
+
+    keyword_i = ident.get_registered_identifiable(
+        db.Record().add_parent("Keyword"))
+    assert keyword_i is not None
+    assert keyword_i.get_property("name") is not None
+
+    project_i = ident.get_registered_identifiable(
+        db.Record().add_parent("Project"))
+    assert project_i is not None
+    assert project_i.get_property("project_id") is not None
+    assert project_i.get_property("title") is not None
diff --git a/unittests/test_identified_cache.py b/unittests/test_identified_cache.py
index 9a1034634692e3d55935d31e2b3923d874f3f673..33add97d4309d87705144ec5331366d0bcd05541 100644
--- a/unittests/test_identified_cache.py
+++ b/unittests/test_identified_cache.py
@@ -27,12 +27,13 @@
 test identified_cache module
 """
 
-from newcrawler.identified_cache import _create_hashable_string, IdentifiedCache
+from caoscrawler.identified_cache import _create_hashable_string, IdentifiedCache
 import caosdb as db
 
 
 def test_create_hash():
-    assert _create_hashable_string(db.Record("A").add_parent("B")) == "P<B>N<A>"
+    assert _create_hashable_string(
+        db.Record("A").add_parent("B")) == "P<B>N<A>"
     assert _create_hashable_string(db.Record("A")
                                    .add_parent("B").add_property('a', 5)) == "P<B>N<A>a:5"
     assert (_create_hashable_string(
diff --git a/unittests/test_json.py b/unittests/test_json.py
index d4da1fe7f20d3b2ea8c623315542fce90fb18497..97d9831de20a2b9f712294d1a0f6322789580f30 100644
--- a/unittests/test_json.py
+++ b/unittests/test_json.py
@@ -31,9 +31,11 @@ import os
 
 from pytest import raises
 
-from newcrawler.converters import JSONFileConverter, DictConverter
-from newcrawler.crawl import Crawler
-from newcrawler.structure_elements import File, JSONFile
+import caosdb as db
+
+from caoscrawler.converters import JSONFileConverter, DictConverter
+from caoscrawler.crawl import Crawler
+from caoscrawler.structure_elements import File, JSONFile
 from test_tool import rfp, dircheckstr
 
 
@@ -47,19 +49,27 @@ def test_json():
     # Load and register converter packages:
     converter_registry = crawler.load_converters(crawler_definition)
 
-    crawler.start_crawling(
+    records = crawler.start_crawling(
         JSONFile(os.path.basename(json_file_path), json_file_path),
         crawler_definition,
         converter_registry
     )
-    subd = crawler.debug_tree
-    subc = crawler.debug_metadata
-    #print(json.dumps(subd, indent=3))
-    print(subd)
-    print(subc)
+
+    rec = [r for r in records if r.name == "DEMO"]
+    assert len(rec) == 1
+    rec = rec[0]
+    assert len(rec.parents) == 1
+    assert rec.parents[0].name == "Project"
+    assert rec.get_property("url") is not None
+    assert rec.get_property("url").value == "https://site.de/index.php/"
+    assert rec.get_property("Person") is not None
+    assert isinstance(rec.get_property("Person").value, list)
+    assert len(rec.get_property("Person").value) == 2
+
 
 def test_broken_validation():
-    crawler_definition_path = rfp("broken_cfoods", "broken_validation_path.yml")
+    crawler_definition_path = rfp(
+        "broken_cfoods", "broken_validation_path.yml")
     crawler = Crawler()
     with raises(FileNotFoundError) as err:
         crawler_definition = crawler.load_definition(crawler_definition_path)
diff --git a/unittests/test_schema.py b/unittests/test_schema.py
index cac37c758aa838d78eb24435db55b099258900ac..0736698eb32146fb3cfbee6acbcf11f5436df27e 100644
--- a/unittests/test_schema.py
+++ b/unittests/test_schema.py
@@ -6,13 +6,14 @@ from importlib_resources import files
 import caosdb as db
 
 from os.path import join, dirname
-from newcrawler import Crawler
+from caoscrawler import Crawler
 
 import pytest
 from pytest import raises
 
 from jsonschema.exceptions import ValidationError
 
+
 def rfp(*pathcomponents):
     """
     Return full path.
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index dd9fb83d772496cc6b3729f2893997360d318f18..1e7f10069c49ce6cab71da5f469e28b69158b4b5 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -3,9 +3,9 @@
 # Adapted from check-sfs
 # A. Schlemmer, 06/2021
 
-from newcrawler import Crawler
-from newcrawler.structure_elements import File, DictTextElement, DictListElement
-from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
+from caoscrawler import Crawler
+from caoscrawler.structure_elements import File, DictTextElement, DictListElement
+from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
 from functools import partial
 from copy import deepcopy
 from unittest.mock import MagicMock, Mock
@@ -30,7 +30,7 @@ def dircheckstr(*pathcomponents):
     """
     Return the debug tree identifier for a given path.
     """
-    return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
+    return "caoscrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
 
 
 @pytest.fixture
@@ -74,8 +74,10 @@ def test_record_structure_generation(crawler):
     subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
     subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
     assert len(subd) == 2
-    assert len(subd[0]) == 2  # variables store on Data Analysis node of debug tree
-    assert len(subd[1]) == 0  # record store on Data Analysis node of debug tree
+    # variables store on Data Analysis node of debug tree
+    assert len(subd[0]) == 2
+    # record store on Data Analysis node of debug tree
+    assert len(subd[1]) == 0
     assert len(subc) == 2
     assert len(subc[0]) == 2
     assert len(subc[1]) == 0
@@ -84,7 +86,8 @@ def test_record_structure_generation(crawler):
     assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
     assert subc[0]["DataAnalysis"] == False
 
-    subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
+    subd = crawler.debug_tree[dircheckstr(
+        "DataAnalysis", "2020_climate-model-predict")]
     subc = crawler.debug_metadata["copied"][dircheckstr(
         "DataAnalysis", "2020_climate-model-predict")]
 
@@ -92,7 +95,8 @@ def test_record_structure_generation(crawler):
     assert len(subd[1]["Project"].get_parents()) == 1
     assert subd[1]["Project"].get_parents()[0].name == "Project"
     assert subd[1]["Project"].get_property("date").value == "2020"
-    assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
+    assert subd[1]["Project"].get_property(
+        "identifier").value == "climate-model-predict"
 
     assert len(subd[0]) == 6
     assert subd[0]["date"] == "2020"
@@ -129,15 +133,19 @@ def test_record_structure_generation(crawler):
     assert len(subd[1]["Project"].get_parents()) == 1
     assert subd[1]["Project"].get_parents()[0].name == "Project"
     assert subd[1]["Project"].get_property("date").value == "2020"
-    assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
+    assert subd[1]["Project"].get_property(
+        "identifier").value == "climate-model-predict"
 
     assert len(subd[1]["Measurement"].get_parents()) == 1
     assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
     assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
-    assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors"
+    assert subd[1]["Measurement"].get_property(
+        "identifier").value == "prediction-errors"
     assert subd[1]["Measurement"].get_property("project").value != "$Project"
-    assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
-    assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
+    assert subd[1]["Measurement"].get_property(
+        "project").value.__class__ == db.Record
+    assert subd[1]["Measurement"].get_property(
+        "project").value == subd[0]["Project"]
 
     # Check the copy flags for the second level in the hierarchy:
     assert subc[1]["Project"] is True
@@ -176,9 +184,15 @@ def test_crawler_update_list(crawler, ident):
     # If the following assertions fail, that is a hint, that the test file records.xml has changed
     # and this needs to be updated:
     assert len(ident.get_records()) == 18
-    assert len([r for r in ident.get_records() if r.parents[0].name == "Person"]) == 5
-    assert len([r for r in ident.get_records() if r.parents[0].name == "Measurement"]) == 11
-    assert len([r for r in ident.get_records() if r.parents[0].name == "Project"]) == 2
+    assert len(
+        [r for r in ident.get_records() if r.parents[0].name == "Person"]
+    ) == 5
+    assert len(
+        [r for r in ident.get_records() if r.parents[0].name == "Measurement"]
+    ) == 11
+    assert len(
+        [r for r in ident.get_records() if r.parents[0].name == "Project"]
+    ) == 2
 
     # The crawler contains lots of duplicates, because identifiables have not been resolved yet:
     assert len(ident.get_records()) != len(crawler.updateList)
@@ -194,8 +208,10 @@ def test_crawler_update_list(crawler, ident):
 
     id_r0 = ident.get_identifiable(r_cur)
     assert r_cur.parents[0].name == id_r0.parents[0].name
-    assert r_cur.get_property("first_name").value == id_r0.get_property("first_name").value
-    assert r_cur.get_property("last_name").value == id_r0.get_property("last_name").value
+    assert r_cur.get_property(
+        "first_name").value == id_r0.get_property("first_name").value
+    assert r_cur.get_property(
+        "last_name").value == id_r0.get_property("last_name").value
     assert len(r_cur.parents) == 1
     assert len(id_r0.parents) == 1
     assert len(r_cur.properties) == 2
@@ -213,9 +229,11 @@ def test_crawler_update_list(crawler, ident):
 
     id_r1 = ident.get_identifiable(r_cur)
     assert r_cur.parents[0].name == id_r1.parents[0].name
-    assert r_cur.get_property("identifier").value == id_r1.get_property("identifier").value
+    assert r_cur.get_property(
+        "identifier").value == id_r1.get_property("identifier").value
     assert r_cur.get_property("date").value == id_r1.get_property("date").value
-    assert r_cur.get_property("project").value == id_r1.get_property("project").value
+    assert r_cur.get_property(
+        "project").value == id_r1.get_property("project").value
     assert len(r_cur.parents) == 1
     assert len(id_r1.parents) == 1
     assert len(r_cur.properties) == 4
@@ -228,7 +246,8 @@ def test_crawler_update_list(crawler, ident):
     assert idr_r1_test != idr_r0_test
 
     assert len(idr_r1.properties) == 4
-    assert r_cur.get_property("responsible").value == idr_r1.get_property("responsible").value
+    assert r_cur.get_property(
+        "responsible").value == idr_r1.get_property("responsible").value
     assert r_cur.description == idr_r1.description
 
     # test whether compare_entites function works in this context:
@@ -355,14 +374,17 @@ def test_split_into_inserts_and_updates_trivial(crawler):
 def test_split_into_inserts_and_updates_single(mock_retrieve):
     crawler = mock_retrieve
 
-    entlist = [db.Record(name="A").add_parent("C"), db.Record(name="B").add_parent("C")]
+    entlist = [db.Record(name="A").add_parent(
+        "C"), db.Record(name="B").add_parent("C")]
 
     assert crawler.get_identified_record_from_local_cache(entlist[0]) is None
     assert crawler.get_identified_record_from_local_cache(entlist[1]) is None
     assert crawler.can_be_checked_externally(entlist[0])
     assert crawler.can_be_checked_externally(entlist[1])
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(entlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(entlist[1]) is None
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        entlist[0]).id == 1111
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        entlist[1]) is None
 
     insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
     assert len(insert) == 1
@@ -416,7 +438,8 @@ def test_split_into_inserts_and_updates_with_complex(mock_retrieve):
     #      ^
     #      |
     # F <- B <- G
-    a = db.Record(name="A").add_parent("C").add_property('d', 13).add_property('e', "lskdjlsfdj")
+    a = db.Record(name="A").add_parent("C").add_property(
+        'd', 13).add_property('e', "lskdjlsfdj")
     b = db.Record(name="B").add_parent("C")
     g = db.Record(name="G").add_parent("C")
     f = db.Record(name="F").add_parent("C")
@@ -457,7 +480,8 @@ def test_all_references_are_existing_already(crawler):
         base_mocked_lookup, known={"A": db.Record(name="A").add_parent("C"),
                                    "B": db.Record(name="B").add_parent("C")}))
 
-    assert crawler.all_references_are_existing_already(db.Record().add_property('a', 123))
+    assert crawler.all_references_are_existing_already(
+        db.Record().add_property('a', 123))
     assert crawler.all_references_are_existing_already(db.Record()
                                                        .add_property('a', db.Record(id=123)))
     assert crawler.all_references_are_existing_already(db.Record()
@@ -475,7 +499,8 @@ def test_all_references_are_existing_already(crawler):
 
 
 def test_can_be_checked_externally(crawler):
-    assert crawler.can_be_checked_externally(db.Record().add_property('a', 123))
+    assert crawler.can_be_checked_externally(
+        db.Record().add_property('a', 123))
     assert crawler.can_be_checked_externally(db.Record()
                                              .add_property('a', db.Record(id=123)))
     assert crawler.can_be_checked_externally(db.Record()
diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py
index 2361e99373042a5f5ce73b8eb98083f7431d4836..d0b431a539a15e3e83906540c69becff437742ec 100644
--- a/unittests/test_tool_extended.py
+++ b/unittests/test_tool_extended.py
@@ -3,9 +3,9 @@
 # Adapted from check-sfs
 # A. Schlemmer, 06/2021
 
-from newcrawler import Crawler
-from newcrawler.structure_elements import File, DictTextElement, DictListElement
-from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
+from caoscrawler import Crawler
+from caoscrawler.structure_elements import File, DictTextElement, DictListElement
+from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
 from functools import partial
 from copy import deepcopy
 from unittest.mock import MagicMock, Mock
@@ -30,7 +30,7 @@ def dircheckstr(*pathcomponents, structure_element_type="Directory"):
     """
     Return the debug tree identifier for a given path.
     """
-    return ("newcrawler.structure_elements." + structure_element_type + ": " +
+    return ("caoscrawler.structure_elements." + structure_element_type + ": " +
             basename(join(*pathcomponents)) + ", " +
             rfp("test_directories", "examples_article", *pathcomponents))
 
@@ -47,7 +47,7 @@ def crawler():
 # def ident(crawler):
 #     ident = LocalStorageIdentifiableAdapter()
 #     crawler.identifiableAdapter = ident
-    
+
 #     ident.restore_state(rfp("records.xml"))
 
 #     ident.register_identifiable(
diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py
new file mode 100644
index 0000000000000000000000000000000000000000..071bf4646d20e35ed05dafaf5fabf786dc182dcc
--- /dev/null
+++ b/unittests/test_variable_substitutions.py
@@ -0,0 +1,61 @@
+#!/bin/python
+# Tests for variable substitutions
+# A. Schlemmer, 05/2022
+
+from caoscrawler import Crawler
+from caoscrawler.structure_elements import File, DictTextElement, DictListElement
+from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
+from functools import partial
+from copy import deepcopy
+from unittest.mock import MagicMock, Mock
+from os.path import join, dirname, basename
+import yaml
+import caosdb as db
+from caosdb.apiutils import compare_entities
+
+import pytest
+from pytest import raises
+
+
+def rfp(*pathcomponents):
+    """
+    Return full path.
+    Shorthand convenience function.
+    """
+    return join(dirname(__file__), *pathcomponents)
+
+
+def dircheckstr(element_type, *pathcomponents):
+    """
+    Return the debug tree identifier for a given path.
+    """
+    return "caoscrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents)
+
+
+@pytest.fixture
+def crawler():
+    crawler = Crawler(debug=True)
+    crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
+                            rfp("test_directories", "example_substitutions", "substitutions.yml"))
+    return crawler
+
+
+def test_substitutions(crawler):
+    # @review Florian Spreckelsen 2022-05-13
+    for i in range(2):
+        subd = crawler.debug_tree[dircheckstr(
+            "File", "ExperimentalData", "220512_data.dat")]
+        assert subd[i]["Experiment"].get_property("date").value == "2022-05-12"
+        assert isinstance(subd[i]["ExperimentSeries"].get_property(
+            "Experiment").value, db.Record)
+
+        subd = crawler.debug_tree[dircheckstr("Directory", "ExperimentalData")]
+        assert subd[i]["Project"].name == "project"
+        assert isinstance(subd[i]["Project"].get_property(
+            "Experiments").value, list)
+        assert isinstance(subd[i]["Project"].get_property(
+            "Experiments").value[0], db.Record)
+
+        assert isinstance(subd[i]["Project"].get_property("dates").value, list)
+        assert subd[i]["Project"].get_property(
+            "dates").value[0] == "2022-05-12"