diff --git a/.docker/Dockerfile b/.docker/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b300a1a97aa22b3eafc91ef89c01bbd7111edd62 --- /dev/null +++ b/.docker/Dockerfile @@ -0,0 +1,37 @@ +FROM debian:latest +RUN apt-get update && \ + apt-get install \ + curl \ + git \ + openjdk-11-jdk-headless \ + python3-autopep8 \ + python3-pip \ + python3-pytest \ + tox \ + -y +COPY .docker/wait-for-it.sh /wait-for-it.sh +ADD https://gitlab.indiscale.com/api/v4/projects/97/repository/commits/${PYLIB} \ + pylib_version.json +RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git && \ + cd caosdb-pylib && git checkout ${PYLIB} && pip3 install . +ADD https://gitlab.indiscale.com/api/v4/projects/104/repository/commits/${ADVANCED} \ + advanced_version.json +RUN git clone https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git && \ + cd caosdb-advanced-user-tools && git checkout ${ADVANCED} && pip3 install . +COPY . /git + +# Delete .git because it is huge. +RUN rm -r /git/.git + +# Install pycaosdb.ini for the tests +RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini + +RUN cd /git/ && pip3 install . + +WORKDIR /git/integrationtests +# wait for server, +CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- \ + # ... install pycaosdb.ini the server-side scripts + cp /git/.docker/sss_pycaosdb.ini /scripting/home/.pycaosdb.ini && \ + # ... and run tests + pytest-3 . diff --git a/.docker/cert.sh b/.docker/cert.sh new file mode 100755 index 0000000000000000000000000000000000000000..e22cfba2995b5fd9d812232f562b7254233fe5b0 --- /dev/null +++ b/.docker/cert.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Daniel Hornung, Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + + +# Creates a directory `cert` and certificates in this directory. +# +# The hostname for which the certificate is created can be changed by setting +# the environment variable CAOSHOSTNAME. +# +# ## Overview of variables ## +# +# - CAOSHOSTNAME :: Hostname for the key (localhost) +# - KEYPW :: Password for the key (default ist CaosDBSecret) +# - KEYSTOREPW :: Password for the key store (same as KEYPW) +function cert() { + mkdir -p cert + cd cert + KEYPW="${KEYPW:-CaosDBSecret}" + CAOSHOSTNAME="${CAOSHOSTNAME:-localhost}" + KEYSTOREPW="${KEYPW:-}" + # NOTE: KEYPW and KEYSTOREPW are the same, due to Java limitations. + KEYPW="${KEYPW}" openssl genrsa -aes256 -out caosdb.key.pem \ + -passout env:KEYPW 2048 + # Certificate is for localhost + KEYPW="${KEYPW}" openssl req -new -x509 -key caosdb.key.pem \ + -out caosdb.cert.pem -passin env:KEYPW \ + -subj "/C=/ST=/L=/O=/OU=/CN=${CAOSHOSTNAME}" + KEYPW="${KEYPW}" KEYSTOREPW="$KEYSTOREPW" openssl pkcs12 -export \ + -inkey caosdb.key.pem -in caosdb.cert.pem -out all-certs.pkcs12 \ + -passin env:KEYPW -passout env:KEYPW + + keytool -importkeystore -srckeystore all-certs.pkcs12 -srcstoretype PKCS12 \ + -deststoretype pkcs12 -destkeystore caosdb.jks \ + -srcstorepass "${KEYPW}" \ + -destkeypass "${KEYPW}" -deststorepass "$KEYSTOREPW" + echo "Certificates successfuly created." +} + +cert diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..e5bb4c9b8ca6ad1750922cb07c92cd6c5eb77c6b --- /dev/null +++ b/.docker/docker-compose.yml @@ -0,0 +1,42 @@ +version: '3.7' +services: + sqldb: + image: mariadb:10.4 + environment: + MYSQL_ROOT_PASSWORD: caosdb1234 + networks: + - caosnet + caosdb-server: + image: "$CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG" + user: 999:999 + depends_on: + - sqldb + networks: + - caosnet + volumes: + - type: bind + source: ./cert + target: /opt/caosdb/cert + - type: bind + source: "../integrationtests/test_data/extroot" + target: /opt/caosdb/mnt/extroot + - type: volume + source: scripting + target: /opt/caosdb/git/caosdb-server/scripting + - type: volume + source: authtoken + target: /opt/caosdb/git/caosdb-server/authtoken + ports: + # - "from_outside:from_inside" + - "10443:10443" + - "10080:10080" + environment: + DEBUG: 1 + CAOSDB_CONFIG_AUTHTOKEN_CONFIG: "conf/core/authtoken.example.yaml" + CAOSDB_CONFIG_TRANSACTION_BENCHMARK_ENABLED: "TRUE" +volumes: + scripting: + authtoken: +networks: + caosnet: + driver: bridge diff --git a/.docker/run.sh b/.docker/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..b0e1a716f28516b83043fb3fdb6594515a0bafd4 --- /dev/null +++ b/.docker/run.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +docker-compose -f tester.yml run tester +rv=$? +echo $rv > result diff --git a/.docker/sss_pycaosdb.ini b/.docker/sss_pycaosdb.ini new file mode 100644 index 0000000000000000000000000000000000000000..de2867f8dc66b3e81f10f35e40c36f9cb8591604 --- /dev/null +++ b/.docker/sss_pycaosdb.ini @@ -0,0 +1,9 @@ +; this is the pycaosdb.ini for the server-side-scripting home. +[Connection] +url = https://caosdb-server:10443 +cacert = /opt/caosdb/cert/caosdb.cert.pem +debug = 0 +timeout = 5000 + +[Misc] +sendmail = /usr/local/bin/sendmail_to_file diff --git a/.docker/tester.yml b/.docker/tester.yml new file mode 100644 index 0000000000000000000000000000000000000000..83db879c6072bfdea7b3212c833116b96bb54d0c --- /dev/null +++ b/.docker/tester.yml @@ -0,0 +1,26 @@ +version: '3.7' +services: + tester: + image: "$CI_REGISTRY_IMAGE" + networks: + - docker_caosnet + volumes: + - type: bind + source: ./cert + target: /cert + - type: volume + source: extroot + target: /extroot + - type: volume + source: scripting + target: /scripting + - type: volume + source: authtoken + target: /authtoken +networks: + docker_caosnet: + external: true +volumes: + scripting: + extroot: + authtoken: diff --git a/.docker/tester_pycaosdb.ini b/.docker/tester_pycaosdb.ini new file mode 100644 index 0000000000000000000000000000000000000000..2159dec250b3dcb2f16043d12bdbe73675e4d75c --- /dev/null +++ b/.docker/tester_pycaosdb.ini @@ -0,0 +1,31 @@ +; pycaosdb.ini for pytest test suites. + +[IntegrationTests] +; location of the scripting bin dir which is used for the test scripts from the +; server's perspective. +test_server_side_scripting.bin_dir.server = scripting/bin-debug/ +; location of the scripting bin dir which is used for the test scripts from the +; pyinttest's perspective. +test_server_side_scripting.bin_dir.local = /scripting/bin-debug/ + +; location of the files from the pyinttest perspective +test_files.test_insert_files_in_dir.local = /extroot/test_insert_files_in_dir/ +; location of the files from the caosdb_servers perspective +test_files.test_insert_files_in_dir.server = /opt/caosdb/mnt/extroot/test_insert_files_in_dir/ + +; location of the one-time tokens from the pyinttest's perspective +test_authentication.admin_token_crud = /authtoken/admin_token_crud.txt +test_authentication.admin_token_expired = /authtoken/admin_token_expired.txt +test_authentication.admin_token_3_attempts = /authtoken/admin_token_3_attempts.txt + + +[Connection] +url = https://caosdb-server:10443/ +username = admin +cacert = /cert/caosdb.cert.pem +debug = 0 + +password_method = plain +password = caosdb + +timeout = 500 diff --git a/.docker/wait-for-it.sh b/.docker/wait-for-it.sh new file mode 100755 index 0000000000000000000000000000000000000000..d69e99f1f13257b559dce2433de0515379663efa --- /dev/null +++ b/.docker/wait-for-it.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# License: +# From https://github.com/vishnubob/wait-for-it +# The MIT License (MIT) +# Use this script to test if a given TCP host/port are available + +WAITFORIT_cmdname=${0##*/} + +echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } + +usage() +{ + cat << USAGE >&2 +Usage: + $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] + -h HOST | --host=HOST Host or IP under test + -p PORT | --port=PORT TCP port under test + Alternatively, you specify the host and port as host:port + -s | --strict Only execute subcommand if the test succeeds + -q | --quiet Don't output any status messages + -t TIMEOUT | --timeout=TIMEOUT + Timeout in seconds, zero for no timeout + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit 1 +} + +wait_for() +{ + if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then + echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" + else + echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout" + fi + WAITFORIT_start_ts=$(date +%s) + while : + do + if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then + nc -z $WAITFORIT_HOST $WAITFORIT_PORT + WAITFORIT_result=$? + else + (echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 + WAITFORIT_result=$? + fi + if [[ $WAITFORIT_result -eq 0 ]]; then + WAITFORIT_end_ts=$(date +%s) + echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" + break + fi + sleep 1 + done + return $WAITFORIT_result +} + +wait_for_wrapper() +{ + # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 + if [[ $WAITFORIT_QUIET -eq 1 ]]; then + timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & + else + timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & + fi + WAITFORIT_PID=$! + trap "kill -INT -$WAITFORIT_PID" INT + wait $WAITFORIT_PID + WAITFORIT_RESULT=$? + if [[ $WAITFORIT_RESULT -ne 0 ]]; then + echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" + fi + return $WAITFORIT_RESULT +} + +# process arguments +while [[ $# -gt 0 ]] +do + case "$1" in + *:* ) + WAITFORIT_hostport=(${1//:/ }) + WAITFORIT_HOST=${WAITFORIT_hostport[0]} + WAITFORIT_PORT=${WAITFORIT_hostport[1]} + shift 1 + ;; + --child) + WAITFORIT_CHILD=1 + shift 1 + ;; + -q | --quiet) + WAITFORIT_QUIET=1 + shift 1 + ;; + -s | --strict) + WAITFORIT_STRICT=1 + shift 1 + ;; + -h) + WAITFORIT_HOST="$2" + if [[ $WAITFORIT_HOST == "" ]]; then break; fi + shift 2 + ;; + --host=*) + WAITFORIT_HOST="${1#*=}" + shift 1 + ;; + -p) + WAITFORIT_PORT="$2" + if [[ $WAITFORIT_PORT == "" ]]; then break; fi + shift 2 + ;; + --port=*) + WAITFORIT_PORT="${1#*=}" + shift 1 + ;; + -t) + WAITFORIT_TIMEOUT="$2" + if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi + shift 2 + ;; + --timeout=*) + WAITFORIT_TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + WAITFORIT_CLI=("$@") + break + ;; + --help) + usage + ;; + *) + echoerr "Unknown argument: $1" + usage + ;; + esac +done + +if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then + echoerr "Error: you need to provide a host and port to test." + usage +fi + +WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} +WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} +WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} +WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} + +# check to see if timeout is from busybox? +WAITFORIT_TIMEOUT_PATH=$(type -p timeout) +WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) +if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then + WAITFORIT_ISBUSY=1 + WAITFORIT_BUSYTIMEFLAG="-t" + +else + WAITFORIT_ISBUSY=0 + WAITFORIT_BUSYTIMEFLAG="" +fi + +if [[ $WAITFORIT_CHILD -gt 0 ]]; then + wait_for + WAITFORIT_RESULT=$? + exit $WAITFORIT_RESULT +else + if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then + wait_for_wrapper + WAITFORIT_RESULT=$? + else + wait_for + WAITFORIT_RESULT=$? + fi +fi + +if [[ $WAITFORIT_CLI != "" ]]; then + if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then + echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" + exit $WAITFORIT_RESULT + fi + exec "${WAITFORIT_CLI[@]}" +else + exit $WAITFORIT_RESULT +fi + diff --git a/.gitignore b/.gitignore index 2282d256281f1c0df0575389d9479ab2e028622e..459e19fb740d9d8291db0d0437ac6093292920f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,15 @@ -src/newcrawler.egg-info/ -__pycache__ -unittests/provenance.yml +src/caoscrawler.egg-info/ .coverage +__pycache__ +provenance.yml +.tox TAGS -src/.coverage +build/ +*~ +.pdbrc +provenance.yml +*.pkcs12 +*.pem +*.jks +*.tar.gz +*.sql diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..a30140e684b465d40b964f1bfb9b97959b29834d --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,248 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +variables: + CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-crawler/testenv:$CI_COMMIT_REF_NAME + CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/src/caosdb-pyinttest/base:latest + +stages: + - info + - setup + - cert + - style + - test + - deploy + + +# During the test stage the CI pipeline (which runs in a "root" docker) starts +# two docker containers with docker-compose (one for the caosdb-server, another +# for the mysql-backend). Then a third docker is being started which contains +# the test suite and executes it. +# +# +-------------(root docker)-------------------------+ +# | | +# | +-(caosdb_mysqlbackend)-------------+ | +# | | | | +# | +-----------------------------------+ | +# | +-(caosdb-server)-------------------+ | +# | | | | +# | | /opt/caosdb | | +# | .------->| + /git/caosdb-server/scripting/ | | +# | | .----->| + /git/caosdb-server/authtoken/ | | +# | | | .--->| + /mnt/extroot | | +# | | | | .->| + /cert | | +# | | | | | | | | +# | | | | | +-----------------------------------+ | +# | | | | | | +# | | | | | filesystem: | +# | | | | *--- /cert -----------. | +# | | | | | | +# | | | | volumes: | | +# | | | *----- extroot ------. | | +# | | *------- scripting --. | | | +# | *--------- authtoken -. | | | | +# | | | | | | +# | +-(crawler tests)---+ | | | | | +# | | | | | | | | +# | | /authtoken |<---* | | | | +# | | /scripting |<----* | | | +# | | /extroot |<------* | | +# | | /cert |<--------* | +# | | | | +# | +----------------------+ | +# +---------------------------------------------------+ +# +# In the root docker, the directory /cert is mounted to .docker/cert relative +# to this repository. The directory is created during the cert stage of this +# pipeline and a certificate is created in there. The certificat is then +# available in mounted directories in the server and crawler containers. +# +# Additional volumes in the root docker are shared by the caosdb-server and the crawler +# containers. These volumes are intended to be used for testing server-side scripting and +# file-system features. +# + +.env: &env + - echo "Pipeline triggered by $TRIGGERED_BY_REPO@$TRIGGERED_BY_REF ($TRIGGERED_BY_HASH)" + - echo "CI_REGISTRY_IMAGE_BASE = $CI_REGISTRY_IMAGE_BASE" + - echo "CI_REGISTRY_IMAGE = $CI_REGISTRY_IMAGE" + - echo "CAOSDB_TAG = $CAOSDB_TAG" + - echo "REFTAG = $REFTAG" + - echo "F_BRANCH = $F_BRANCH" + - echo "CI_COMMIT_REF_NAME = $CI_COMMIT_REF_NAME" + - ls -lah /image-cache/ + + - F_BRANCH=${F_BRANCH:-$CI_COMMIT_REF_NAME} + - echo $F_BRANCH + - if [[ "$REFTAG" == "" ]] ; then + if [[ "$F_BRANCH" == "dev" ]] ; then + REFTAG=dev; + fi; + fi + - REFTAG=${REFTAG:-dev_F_${F_BRANCH}} + + - echo $F_BRANCH + + - if [[ "$CAOSDB_TAG" == "" ]]; then + CAOSDB_TAG=${REFTAG}; + fi + - echo $CAOSDB_TAG + +info: + tags: [cached-dind] + image: docker:20.10 + stage: info + needs: [] + script: + - *env + +unittest: + tags: [cached-dind] + image: docker:20.10 + stage: test + image: $CI_REGISTRY_IMAGE + script: + - tox + +inttest: + tags: [docker] + services: + - docker:20.10-dind + variables: + # This is a workaround for the gitlab-runner health check mechanism when + # using docker-dind service. The runner will otherwise guess the port + # wrong and the health check will timeout. + SERVICE_PORT_2376_TCP_PORT: 2375 + stage: test + image: $CI_REGISTRY_IMAGE_BASE + needs: [cert] + script: + - *env + - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY + - echo $CAOSDB_TAG + + - cd .docker + # Store mariadb version + - MARIADBVERSION=$(grep mariadb docker-compose.yml | awk '{print $2}') + - echo "mariadb image:"$MARIADBVERSION + - time docker load < /image-cache/caosdb-crawler-testenv-${CI_COMMIT_REF_NAME}.tar || true + - time docker load < /image-cache/caosdb-${REFTAG}.tar || time docker load < /image-cache/caosdb-dev.tar || true + - time docker load < /image-cache/$MARIADBVERSION.tar || true + - docker pull $CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG || CAOSDB_TAG=dev + - docker pull $CI_REGISTRY_IMAGE + + # Here, the server and the mysql backend docker are being started + - CAOSDB_TAG=$CAOSDB_TAG docker-compose up -d + + # Store versions of CaosDB parts + - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_pylib_commit > hash_pylib + - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_webui_commit > hash_webui + - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_server_commit > hash_server + - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_mysqlbackend_commit > hash_mysql + - docker exec -u 0 -t docker_caosdb-server_1 cat /opt/caosdb/git/caosdb_proto_commit > hash_proto + - cat hash_server + - cat hash_proto + - cat hash_mysql + - cat hash_webui + - cat hash_pylib + # Run the actual tests. This starts a new docker container within which + # the tests run. The return value is stored in .docker/result + - /bin/sh ./run.sh + + # Save logs + - docker logs docker_caosdb-server_1 &> ../caosdb_log.txt + - docker logs docker_sqldb_1 &> ../mariadb_log.txt + - cd .. + + # Stop the server + - docker-compose -f .docker/docker-compose.yml down + + # the crawler docker writes the return value of the tests into the + # file result + - rc=`cat .docker/result` + - exit $rc + dependencies: [cert] + timeout: 3h + artifacts: + paths: + - caosdb_log.txt + - mariadb_log.txt + - .docker/hash_* + expire_in: 1 week + +build-testenv: + tags: [cached-dind] + image: docker:20.10 + stage: setup + timeout: 2h + only: + - schedules + - web + - pushes + needs: [] + script: + - df -h + - command -v wget + - if [ -z "$PYLIB" ]; then + if echo "$CI_COMMIT_REF_NAME" | grep -c "^f-" ; then + echo "Check if pylib has branch $CI_COMMIT_REF_NAME" ; + if wget https://gitlab.indiscale.com/api/v4/projects/97/repository/branches/${CI_COMMIT_REF_NAME} ; then + PYLIB=$CI_COMMIT_REF_NAME ; + fi; + fi; + fi; + - PYLIB=${PYLIB:-dev} + - echo $PYLIB + + - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY + # use here general latest or specific branch latest... + - docker build + --build-arg PYLIB=${PYLIB} + --build-arg ADVANCED=${ADVANCED:dev} + --file .docker/Dockerfile + -t $CI_REGISTRY_IMAGE . + - docker push $CI_REGISTRY_IMAGE + - docker save $CI_REGISTRY_IMAGE > /image-cache/caosdb-crawler-testenv-${CI_COMMIT_REF_NAME}.tar + +cert: + tags: [docker] + stage: cert + image: $CI_REGISTRY_IMAGE + needs: + - job: build-testenv + optional: true + artifacts: + paths: + - .docker/cert/ + expire_in: 1 week + script: + - cd .docker + - CAOSHOSTNAME=caosdb-server ./cert.sh + +style: + tags: [docker] + stage: style + image: $CI_REGISTRY_IMAGE + needs: + - job: build-testenv + optional: true + script: + - autopep8 -r --diff --exit-code . + allow_failure: true diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..a6e38b041e80e3d8b983f9f1562160a642d9480b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "test-setup/caosdb-deploy"] + path = test-setup/caosdb-deploy + url = git@gitlab.indiscale.com:caosdb/src/caosdb-deploy.git diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..7b6a67d2c767755e25376c7c1b7db96fda8cb8f9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +- FIX: Variables are now also replaced when the value is given as a list. + +### Added + +* Everything + +### Changed + +* Renamed module from `newcrawler` to `caoscrawler` + +### Deprecated + +### Removed + +### Fixed +- Fixed #12 + +### Security diff --git a/README.md b/README.md index 88d8a6d9965e67ec268bff979ceb709dbf650129..59b88aaa36ed97d8c2cc9e4474820e3dad4a478b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# newcrawler +# caoscrawler A new crawler for CaosDB. diff --git a/integrationtests/README.md b/integrationtests/README.md index 5c308f51a332d5a930f91eb30f0d93032ae47627..96789ed9f02036a0c7cc25ca1a60d9f0042a5557 100644 --- a/integrationtests/README.md +++ b/integrationtests/README.md @@ -1,3 +1,2 @@ -1. Clear database (see clear_database.py) -2. Insert model (see insert_model.py) -3. Run test.py +1. Mount test_data/extroot as extroot folder in the CaosDB server +2. use an empty server diff --git a/integrationtests/model.yml b/integrationtests/basic_example/model.yml similarity index 96% rename from integrationtests/model.yml rename to integrationtests/basic_example/model.yml index 055c4fb5b7894c3f444859d15ad1dbc806fa3fab..7e1a391186be6a01fb10d0b32e8516238012f374 100644 --- a/integrationtests/model.yml +++ b/integrationtests/basic_example/model.yml @@ -83,3 +83,6 @@ hdf5File: Measurement: recommended_properties: date: +ReadmeFile: + datatype: REFERENCE +ProjectMarkdownReadme: diff --git a/integrationtests/basic_example/test.py b/integrationtests/basic_example/test.py new file mode 100755 index 0000000000000000000000000000000000000000..cecd6533669fd9fb75124faf758efeae8b8d9778 --- /dev/null +++ b/integrationtests/basic_example/test.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# 2021 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import os +from caosdb import EmptyUniqueQueryError +import argparse +import sys +from argparse import RawTextHelpFormatter +from caoscrawler import Crawler +import caosdb as db +from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +import pytest +from caosadvancedtools.models.parser import parse_model_from_yaml +import yaml + +# TODO is not yet merged in caosadvancedtools +from caosadvancedtools.testutils import clear_database, set_test_key +set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return os.path.join(os.path.dirname(__file__), *pathcomponents) + + +@pytest.fixture +def usemodel(): + model = parse_model_from_yaml(rfp("model.yml")) + model.sync_data_model(noquestion=True, verbose=False) + + +@pytest.fixture +def ident(): + ident = CaosDBIdentifiableAdapter() + + # TODO place this definition of identifiables elsewhere + ident.register_identifiable( + "Person", db.RecordType() + .add_parent(name="Person") + # .add_property(name="first_name") + .add_property(name="last_name")) + ident.register_identifiable( + "Measurement", db.RecordType() + .add_parent(name="Measurement") + # .add_property(name="identifier") + .add_property(name="date") + .add_property(name="project")) + ident.register_identifiable( + "Project", db.RecordType() + .add_parent(name="Project") + .add_property(name="date") + .add_property(name="identifier")) + return ident + + +def crawl_standard_test_directory(cr: Crawler, + subdir: str = "examples_article", + cfood: str = "scifolder_cfood.yml"): + cr.crawl_directory(rfp("..", "unittests", "test_directories", subdir), + rfp("..", "unittests", cfood)) + + +@pytest.fixture +def crawler(ident): + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr) + return cr + + +@pytest.fixture +def crawler_extended(ident): + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + # correct paths for current working directory + file_list = [r for r in cr.target_data if r.role == "File"] + for f in file_list: + f.file = rfp("..", "unittests", "test_directories", + "examples_article", f.file) + return cr + + +def test_single_insertion(clear_database, usemodel, crawler): + ins, ups = crawler.synchronize() + + # This test also generates the file records.xml used in some of the unittesets: + res = db.execute_query("FIND Record") + for i in reversed(range(len(res))): + if res[i].parents[0].name == "PyTestInfo": + del res[i] + filename = rfp("..", "unittests", "records.xml") + with open(filename, "w") as f: + xml = res.to_xml() + # Remove noscript and transaction benchmark: + for tag in ("noscript", "TransactionBenchmark"): + if xml.find(tag) is not None: + xml.remove(xml.find(tag)) + f.write(db.common.utils.xml2str(xml)) + + assert len(ins) == 18 + assert len(ups) == 0 + + # Do a second run on the same data, there should be no changes: + crawler = Crawler(debug=True, identifiableAdapter=ident_adapt) + crawler.copy_attributes = Mock() + crawler.crawl_directory(rfp("../unittests/test_directories", "examples_article"), + rfp("../unittests/scifolder_cfood.yml")) + ins, ups = crawler.synchronize() + assert len(ins) == 0 + assert len(ups) == 0 + + +def test_multiple_insertions(clear_database, usemodel, ident, crawler): + ins, ups = crawler.synchronize() + + # Do a second run on the same data, there should be no changes: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr) + ins, ups = cr.synchronize() + assert len(ins) == 0 + assert len(ups) == 0 + + +def test_insertion(clear_database, usemodel, ident, crawler): + ins, ups = crawler.synchronize() + + # Do a second run on the same data, there should a new insert: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, "example_insert") + assert len(cr.target_data) == 3 + ins, ups = cr.synchronize() + assert len(ins) == 1 + assert len(ups) == 0 + + # Do it again to check whether nothing is changed: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, "example_insert") + assert len(cr.target_data) == 3 + ins, ups = cr.synchronize() + assert len(ins) == 0 + assert len(ups) == 0 + + +def test_insertion_and_update(clear_database, usemodel, ident, crawler): + ins, ups = crawler.synchronize() + + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, "example_insert") + ins, ups = cr.synchronize() + + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, "example_overwrite_1") + # print(cr.target_data) + # cr.save_debug_data(rfp("provenance.yml")) + assert len(cr.target_data) == 3 + ins, ups = cr.synchronize() + assert len(ins) == 0 + assert len(ups) == 1 + + +def test_identifiable_update(clear_database, usemodel, ident, crawler): + ins, ups = crawler.synchronize() + + # Do a second run on the same data with a change in one + # of the identifiables: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr) + + # Test the addition of a single property: + l = cr.target_data + for record in l: + if (record.parents[0].name == "Measurement" and + record.get_property("date").value == "2020-01-03"): + # maybe a bit weird, but add an email address to a measurement + record.add_property( + name="email", value="testperson@testaccount.test") + print("one change") + break + ins, ups = cr.synchronize() + assert len(ins) == 0 + assert len(ups) == 1 + + # Test the change within one property: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr) + l = cr.target_data + for record in l: + if (record.parents[0].name == "Measurement" and + record.get_property("date").value == "2020-01-03"): + record.add_property(name="email", value="testperson@coolmail.test") + print("one change") + break + ins, ups = cr.synchronize() + assert len(ins) == 0 + assert len(ups) == 1 + + # Changing the date should result in a new insertion: + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr) + l = cr.target_data + for record in l: + if (record.parents[0].name == "Measurement" and + record.get_property("date").value == "2020-01-03"): + record.add_property(name="email", value="testperson@coolmail.test") + record.get_property("date").value = "2012-01-02" + print("one change") + break + ins, ups = cr.synchronize() + assert len(ins) == 1 + assert len(ups) == 0 + + +def test_file_insertion_dry(clear_database, usemodel, ident): + crawler_extended = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory( + crawler_extended, cfood="scifolder_extended.yml") + file_list = [r for r in crawler_extended.target_data if r.role == "File"] + assert len(file_list) == 11 + + for f in file_list: + assert f.path.endswith("README.md") + assert f.path == f.file + + ins, ups = crawler_extended.synchronize(commit_changes=False) + assert len(ups) == 0 + file_list_ins = [r for r in ins if r.role == "File"] + assert len(file_list_ins) == 11 + + +def test_file_insertion(clear_database, usemodel, ident, crawler_extended): + ins, ups = crawler_extended.synchronize(commit_changes=True) + file_list_ins = [r for r in ins if r.role == "File"] + assert len(file_list_ins) == 11 + + assert db.execute_query("COUNT File") > 0 + + # find record which references File does not seem to be possible + # retrieve ids of files: + files = db.execute_query("FIND File") + for f in files: + r = db.execute_query("FIND Record which references {}".format(f.id)) + assert len(r) == 1 + assert r[0].get_property("ReadmeFile").value == f.id + + +def test_file_update(clear_database, usemodel, ident, crawler_extended): + ins1, ups1 = crawler_extended.synchronize(commit_changes=True) + file_list_ins = [r for r in ins1 if r.role == "File"] + + cr = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr, cfood="scifolder_extended.yml") + + file_list = [r for r in cr.target_data if r.role == "File"] + for f in file_list: + f.file = rfp("..", "unittests", "test_directories", + "examples_article", f.file) + ins2, ups2 = cr.synchronize(commit_changes=True) + assert len(ups1) == 0 + assert len(ups2) == 0 + + # Try adding a parent: + res = db.execute_query("Find File") + assert len(res) == 11 + assert len(res[0].parents) == 0 + + cr2 = Crawler(debug=True, identifiableAdapter=ident) + crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") + + file_list = [r for r in cr2.target_data if r.role == "File"] + for f in file_list: + f.file = rfp("..", "unittests", "test_directories", + "examples_article", f.file) + ins3, ups3 = cr2.synchronize(commit_changes=True) + assert len(ups3) == 11 + + res = db.execute_query("Find File") + assert len(res) == 11 + assert res[0].parents[0].name == "ProjectMarkdownReadme" + + # TODO: Implement file update checks (based on checksum) + # Add test with actual file update: + # assert len(ins2) == 0 + # assert len(ups2) == len(file_list_ins) diff --git a/integrationtests/clear_database.py b/integrationtests/clear_database.py deleted file mode 100644 index 138cf4e6abb256d5710cd2b32f55a1fb51f3fbed..0000000000000000000000000000000000000000 --- a/integrationtests/clear_database.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2020 Indiscale GmbH <info@indiscale.com> -# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public -# License along with this program. If not, see -# <https://www.gnu.org/licenses/>. -# -# ** end header -# -"""Clear the database before and after the integration tests.""" -import caosdb as db - - -def clear_all(): - """First remove Records, then RecordTypes, then Properties, finally - files. Since there may be no entities, execute all deletions - without raising errors. - - """ - db.execute_query("FIND Record").delete( - raise_exception_on_error=False) - db.execute_query("FIND RecordType").delete( - raise_exception_on_error=False) - db.execute_query("FIND Property").delete( - raise_exception_on_error=False) - db.execute_query("FIND File").delete( - raise_exception_on_error=False) - - -if __name__ == "__main__": - clear_all() diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py deleted file mode 100755 index 45bdb6c837c36c999b289548e0f685519cd3aa85..0000000000000000000000000000000000000000 --- a/integrationtests/insert_model.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2021 Henrik tom Wörden -# 2021 Alexander Schlemmer -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -import caosdb as db -from caosadvancedtools.models.data_model import DataModel -from caosadvancedtools.models.parser import parse_model_from_yaml - - -def main(): - model = parse_model_from_yaml("model.yml") - model.sync_data_model(noquestion=True) - - -if __name__ == "__main__": - main() diff --git a/integrationtests/test-profile/custom/caosdb-server/.add_dir_to_git b/integrationtests/test-profile/custom/caosdb-server/.add_dir_to_git new file mode 100644 index 0000000000000000000000000000000000000000..c51a03ac8e38c55c161ae55fe6ba805a4e1b05f5 --- /dev/null +++ b/integrationtests/test-profile/custom/caosdb-server/.add_dir_to_git @@ -0,0 +1 @@ +This directory should be created when cloning or pulling this git repository. diff --git a/integrationtests/test-profile/custom/caosdb-server/scripting/home/.pycaosdb.ini b/integrationtests/test-profile/custom/caosdb-server/scripting/home/.pycaosdb.ini new file mode 100644 index 0000000000000000000000000000000000000000..f45f1dbb14a343f2bee23e48b850df0ab48ca13b --- /dev/null +++ b/integrationtests/test-profile/custom/caosdb-server/scripting/home/.pycaosdb.ini @@ -0,0 +1,8 @@ +[Connection] +url = https://localhost:10443 +cacert = /opt/caosdb/cert/caosdb.cert.pem +debug = 0 +timeout = 5000 + +[Misc] +sendmail = /usr/local/bin/sendmail_to_file diff --git a/integrationtests/test-profile/custom/other/restore/caosdb.2022-01-20T08:27:49.631552786+00:00.dump.sql b/integrationtests/test-profile/custom/other/restore/caosdb.2022-01-20T08:27:49.631552786+00:00.dump.sql new file mode 100644 index 0000000000000000000000000000000000000000..c527a0040a469b5f48cbd50c786fe9ff24d545d5 --- /dev/null +++ b/integrationtests/test-profile/custom/other/restore/caosdb.2022-01-20T08:27:49.631552786+00:00.dump.sql @@ -0,0 +1,5716 @@ +-- MariaDB dump 10.19 Distrib 10.5.12-MariaDB, for debian-linux-gnu (x86_64) +-- +-- Host: sqldb Database: caosdb +-- ------------------------------------------------------ +-- Server version 10.5.11-MariaDB-1:10.5.11+maria~focal + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Table structure for table `archive_collection_type` +-- + +DROP TABLE IF EXISTS `archive_collection_type`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_collection_type` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `collection` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + UNIQUE KEY `archive_collection_type-d-e-p-v` (`domain_id`,`entity_id`,`property_id`,`_iversion`), + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_collection_type_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_collection_type_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_collection_type_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_collection_type` +-- + +LOCK TABLES `archive_collection_type` WRITE; +/*!40000 ALTER TABLE `archive_collection_type` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_collection_type` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_data_type` +-- + +DROP TABLE IF EXISTS `archive_data_type`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_data_type` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `datatype` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + UNIQUE KEY `archive_data_type-d-e-p-v` (`domain_id`,`entity_id`,`property_id`,`_iversion`), + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + KEY `datatype` (`datatype`), + CONSTRAINT `archive_data_type_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_data_type_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_data_type_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_data_type_ibfk_4` FOREIGN KEY (`datatype`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_data_type` +-- + +LOCK TABLES `archive_data_type` WRITE; +/*!40000 ALTER TABLE `archive_data_type` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_data_type` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_date_data` +-- + +DROP TABLE IF EXISTS `archive_date_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_date_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` int(11) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_date_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_date_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_date_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_date_data` +-- + +LOCK TABLES `archive_date_data` WRITE; +/*!40000 ALTER TABLE `archive_date_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_date_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_datetime_data` +-- + +DROP TABLE IF EXISTS `archive_datetime_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_datetime_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` bigint(20) NOT NULL, + `value_ns` int(10) unsigned DEFAULT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_datetime_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_datetime_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_datetime_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_datetime_data` +-- + +LOCK TABLES `archive_datetime_data` WRITE; +/*!40000 ALTER TABLE `archive_datetime_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_datetime_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_desc_overrides` +-- + +DROP TABLE IF EXISTS `archive_desc_overrides`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_desc_overrides` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `description` text COLLATE utf8_unicode_ci NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + UNIQUE KEY `archive_desc_overrides-d-e-p-v` (`domain_id`,`entity_id`,`property_id`,`_iversion`), + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_desc_overrides_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_desc_overrides_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_desc_overrides_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_desc_overrides` +-- + +LOCK TABLES `archive_desc_overrides` WRITE; +/*!40000 ALTER TABLE `archive_desc_overrides` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_desc_overrides` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_double_data` +-- + +DROP TABLE IF EXISTS `archive_double_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_double_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` double NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + `unit_sig` bigint(20) DEFAULT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_double_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_double_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_double_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_double_data` +-- + +LOCK TABLES `archive_double_data` WRITE; +/*!40000 ALTER TABLE `archive_double_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_double_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_entities` +-- + +DROP TABLE IF EXISTS `archive_entities`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_entities` ( + `id` int(10) unsigned NOT NULL, + `description` text COLLATE utf8_unicode_ci DEFAULT NULL, + `role` enum('RECORDTYPE','RECORD','FILE','DOMAIN','PROPERTY','DATATYPE','ROLE','QUERYTEMPLATE') COLLATE utf8_unicode_ci NOT NULL, + `acl` int(10) unsigned DEFAULT NULL, + `_iversion` int(10) unsigned NOT NULL, + PRIMARY KEY (`id`,`_iversion`), + KEY `acl` (`acl`), + CONSTRAINT `archive_entities_ibfk_1` FOREIGN KEY (`id`, `_iversion`) REFERENCES `entity_version` (`entity_id`, `_iversion`) ON DELETE CASCADE, + CONSTRAINT `archive_entities_ibfk_2` FOREIGN KEY (`acl`) REFERENCES `entity_acl` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_entities` +-- + +LOCK TABLES `archive_entities` WRITE; +/*!40000 ALTER TABLE `archive_entities` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_entities` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_enum_data` +-- + +DROP TABLE IF EXISTS `archive_enum_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_enum_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` varbinary(255) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_enum_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_enum_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_enum_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_enum_data` +-- + +LOCK TABLES `archive_enum_data` WRITE; +/*!40000 ALTER TABLE `archive_enum_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_enum_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_files` +-- + +DROP TABLE IF EXISTS `archive_files`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_files` ( + `file_id` int(10) unsigned NOT NULL, + `path` text COLLATE utf8_unicode_ci NOT NULL, + `size` bigint(20) unsigned NOT NULL, + `hash` binary(64) DEFAULT NULL, + `_iversion` int(10) unsigned NOT NULL, + PRIMARY KEY (`file_id`,`_iversion`), + CONSTRAINT `archive_files_ibfk_1` FOREIGN KEY (`file_id`, `_iversion`) REFERENCES `entity_version` (`entity_id`, `_iversion`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_files` +-- + +LOCK TABLES `archive_files` WRITE; +/*!40000 ALTER TABLE `archive_files` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_files` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_integer_data` +-- + +DROP TABLE IF EXISTS `archive_integer_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_integer_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` bigint(20) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + `unit_sig` bigint(20) DEFAULT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_integer_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_integer_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_integer_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_integer_data` +-- + +LOCK TABLES `archive_integer_data` WRITE; +/*!40000 ALTER TABLE `archive_integer_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_integer_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_isa` +-- + +DROP TABLE IF EXISTS `archive_isa`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_isa` ( + `child` int(10) unsigned NOT NULL, + `child_iversion` int(10) unsigned NOT NULL, + `parent` int(10) unsigned NOT NULL, + `direct` tinyint(1) DEFAULT 1, + KEY `parent` (`parent`), + KEY `child` (`child`,`child_iversion`), + CONSTRAINT `archive_isa_ibfk_1` FOREIGN KEY (`parent`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_isa_ibfk_2` FOREIGN KEY (`child`, `child_iversion`) REFERENCES `entity_version` (`entity_id`, `_iversion`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_isa` +-- + +LOCK TABLES `archive_isa` WRITE; +/*!40000 ALTER TABLE `archive_isa` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_isa` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_name_data` +-- + +DROP TABLE IF EXISTS `archive_name_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_name_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `value` (`value`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_name_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_name_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_name_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_name_data` +-- + +LOCK TABLES `archive_name_data` WRITE; +/*!40000 ALTER TABLE `archive_name_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_name_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_name_overrides` +-- + +DROP TABLE IF EXISTS `archive_name_overrides`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_name_overrides` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `name` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + UNIQUE KEY `archive_name_overrides-d-e-p-v` (`domain_id`,`entity_id`,`property_id`,`_iversion`), + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_name_overrides_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_name_overrides_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_name_overrides_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_name_overrides` +-- + +LOCK TABLES `archive_name_overrides` WRITE; +/*!40000 ALTER TABLE `archive_name_overrides` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_name_overrides` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_null_data` +-- + +DROP TABLE IF EXISTS `archive_null_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_null_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_null_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_null_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_null_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_null_data` +-- + +LOCK TABLES `archive_null_data` WRITE; +/*!40000 ALTER TABLE `archive_null_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_null_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_query_template_def` +-- + +DROP TABLE IF EXISTS `archive_query_template_def`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_query_template_def` ( + `id` int(10) unsigned NOT NULL, + `definition` mediumtext COLLATE utf8_unicode_ci NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + PRIMARY KEY (`id`,`_iversion`), + CONSTRAINT `archive_query_template_def_ibfk_1` FOREIGN KEY (`id`, `_iversion`) REFERENCES `entity_version` (`entity_id`, `_iversion`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_query_template_def` +-- + +LOCK TABLES `archive_query_template_def` WRITE; +/*!40000 ALTER TABLE `archive_query_template_def` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_query_template_def` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_reference_data` +-- + +DROP TABLE IF EXISTS `archive_reference_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_reference_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` int(10) unsigned NOT NULL, + `value_iversion` int(10) unsigned DEFAULT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + KEY `value` (`value`), + CONSTRAINT `archive_reference_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_reference_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_reference_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_reference_data_ibfk_4` FOREIGN KEY (`value`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_reference_data` +-- + +LOCK TABLES `archive_reference_data` WRITE; +/*!40000 ALTER TABLE `archive_reference_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_reference_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `archive_text_data` +-- + +DROP TABLE IF EXISTS `archive_text_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `archive_text_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` text COLLATE utf8_unicode_ci NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`,`_iversion`), + KEY `domain_id_2` (`domain_id`,`_iversion`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `archive_text_data_ibfk_1` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_text_data_ibfk_2` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `archive_text_data_ibfk_3` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `archive_text_data` +-- + +LOCK TABLES `archive_text_data` WRITE; +/*!40000 ALTER TABLE `archive_text_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `archive_text_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `collection_type` +-- + +DROP TABLE IF EXISTS `collection_type`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `collection_type` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `collection` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + UNIQUE KEY `collection_type-d-e-p` (`domain_id`,`entity_id`,`property_id`), + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + CONSTRAINT `collection_type_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `collection_type_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `collection_type_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `collection_type` +-- + +LOCK TABLES `collection_type` WRITE; +/*!40000 ALTER TABLE `collection_type` DISABLE KEYS */; +/*!40000 ALTER TABLE `collection_type` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `data_type` +-- + +DROP TABLE IF EXISTS `data_type`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `data_type` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `datatype` int(10) unsigned NOT NULL, + UNIQUE KEY `datatype_ukey` (`domain_id`,`entity_id`,`property_id`), + KEY `name_ov_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `datatype_forkey_ent` (`entity_id`), + KEY `datatype_forkey_pro` (`property_id`), + KEY `datatype_forkey_type` (`datatype`), + CONSTRAINT `datatype_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `datatype_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `datatype_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`), + CONSTRAINT `datatype_forkey_type` FOREIGN KEY (`datatype`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `data_type` +-- + +LOCK TABLES `data_type` WRITE; +/*!40000 ALTER TABLE `data_type` DISABLE KEYS */; +INSERT INTO `data_type` VALUES (0,0,20,14),(0,0,21,14),(0,0,24,14),(0,0,100,14); +/*!40000 ALTER TABLE `data_type` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `date_data` +-- + +DROP TABLE IF EXISTS `date_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `date_data` ( + `domain_id` int(10) unsigned DEFAULT NULL, + `entity_id` int(10) unsigned DEFAULT NULL, + `property_id` int(10) unsigned DEFAULT NULL, + `value` int(11) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci DEFAULT NULL, + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + KEY `date_data_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `date_ov_forkey_ent` (`entity_id`), + KEY `date_ov_forkey_pro` (`property_id`), + CONSTRAINT `date_ov_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `date_ov_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `date_ov_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `date_data` +-- + +LOCK TABLES `date_data` WRITE; +/*!40000 ALTER TABLE `date_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `date_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `datetime_data` +-- + +DROP TABLE IF EXISTS `datetime_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `datetime_data` ( + `domain_id` int(10) unsigned NOT NULL COMMENT 'Domain.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity.', + `property_id` int(10) unsigned NOT NULL COMMENT 'Property.', + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Status of this statement.', + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + `value_ns` int(10) unsigned DEFAULT NULL, + `value` bigint(20) NOT NULL, + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `dat_entity_id_entity` (`entity_id`), + KEY `dat_property_id_entity` (`property_id`), + CONSTRAINT `dat_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `dat_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `dat_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `datetime_data` +-- + +LOCK TABLES `datetime_data` WRITE; +/*!40000 ALTER TABLE `datetime_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `datetime_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `desc_overrides` +-- + +DROP TABLE IF EXISTS `desc_overrides`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `desc_overrides` ( + `domain_id` int(10) unsigned DEFAULT NULL, + `entity_id` int(10) unsigned DEFAULT NULL, + `property_id` int(10) unsigned DEFAULT NULL, + `description` text COLLATE utf8_unicode_ci DEFAULT NULL, + UNIQUE KEY `desc_ov_ukey` (`domain_id`,`entity_id`,`property_id`), + KEY `desc_ov_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `desc_ov_forkey_ent` (`entity_id`), + KEY `desc_ov_forkey_pro` (`property_id`), + CONSTRAINT `desc_ov_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `desc_ov_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `desc_ov_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `desc_overrides` +-- + +LOCK TABLES `desc_overrides` WRITE; +/*!40000 ALTER TABLE `desc_overrides` DISABLE KEYS */; +/*!40000 ALTER TABLE `desc_overrides` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `double_data` +-- + +DROP TABLE IF EXISTS `double_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `double_data` ( + `domain_id` int(10) unsigned NOT NULL COMMENT 'Domain.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity.', + `property_id` int(10) unsigned NOT NULL COMMENT 'Property.', + `value` double NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Status of this statement.', + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + `unit_sig` bigint(20) DEFAULT NULL, + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `dou_entity_id_entity` (`entity_id`), + KEY `dou_property_id_entity` (`property_id`), + CONSTRAINT `dou_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `dou_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `dou_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `double_data` +-- + +LOCK TABLES `double_data` WRITE; +/*!40000 ALTER TABLE `double_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `double_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `entities` +-- + +DROP TABLE IF EXISTS `entities`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `entities` ( + `id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Unique identifier.', + `description` text COLLATE utf8_unicode_ci DEFAULT NULL, + `role` enum('RECORDTYPE','RECORD','FILE','DOMAIN','PROPERTY','DATATYPE','ROLE','QUERYTEMPLATE') COLLATE utf8_unicode_ci NOT NULL, + `acl` int(10) unsigned DEFAULT NULL COMMENT 'Access Control List for the entity.', + PRIMARY KEY (`id`), + KEY `entity_entity_acl` (`acl`), + CONSTRAINT `entity_entity_acl` FOREIGN KEY (`acl`) REFERENCES `entity_acl` (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=103 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `entities` +-- + +LOCK TABLES `entities` WRITE; +/*!40000 ALTER TABLE `entities` DISABLE KEYS */; +INSERT INTO `entities` VALUES (0,'The default domain.','ROLE',0),(1,'The default recordtype.','ROLE',0),(2,'The default record.','ROLE',0),(3,'The default file.','ROLE',0),(4,'The default property.','ROLE',0),(7,'The default datatype.','ROLE',0),(8,'The QueryTemplate role.','ROLE',0),(11,'The default reference data type.','DATATYPE',0),(12,'The default integer data type.','DATATYPE',0),(13,'The default double data type.','DATATYPE',0),(14,'The default text data type.','DATATYPE',0),(15,'The default datetime data type.','DATATYPE',0),(16,'The default timespan data type.','DATATYPE',0),(17,'The default file reference data type.','DATATYPE',0),(18,'The defaulf boolean data type','DATATYPE',0),(20,'Name of an entity','PROPERTY',0),(21,'Unit of an entity.','PROPERTY',0),(24,'Description of an entity.','PROPERTY',0),(50,'The SQLite file data type.','DATATYPE',0),(99,NULL,'RECORDTYPE',0),(100,'This is a unique key which should be only known to the pytest file that is used to run tests within this instance of CaosDB.','PROPERTY',2),(101,NULL,'RECORDTYPE',2),(102,NULL,'RECORD',2); +/*!40000 ALTER TABLE `entities` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `entity_acl` +-- + +DROP TABLE IF EXISTS `entity_acl`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `entity_acl` ( + `id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `acl` varbinary(65525) NOT NULL, + PRIMARY KEY (`id`), + KEY `entity_acl_acl` (`acl`(3072)) +) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `entity_acl` +-- + +LOCK TABLES `entity_acl` WRITE; +/*!40000 ALTER TABLE `entity_acl` DISABLE KEYS */; +INSERT INTO `entity_acl` VALUES (0,''),(2,'[{\"realm\":\"PAM\",\"bitSet\":536608371,\"username\":\"admin\"}]'); +/*!40000 ALTER TABLE `entity_acl` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `entity_version` +-- + +DROP TABLE IF EXISTS `entity_version`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `entity_version` ( + `entity_id` int(10) unsigned NOT NULL, + `hash` varbinary(255) DEFAULT NULL, + `version` varbinary(255) NOT NULL, + `_iversion` int(10) unsigned NOT NULL, + `_ipparent` int(10) unsigned DEFAULT NULL, + `srid` varbinary(255) NOT NULL, + PRIMARY KEY (`entity_id`,`_iversion`), + UNIQUE KEY `entity_version-e-v` (`entity_id`,`version`), + KEY `srid` (`srid`), + CONSTRAINT `entity_version_ibfk_1` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) ON DELETE CASCADE, + CONSTRAINT `entity_version_ibfk_2` FOREIGN KEY (`srid`) REFERENCES `transactions` (`srid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `entity_version` +-- + +LOCK TABLES `entity_version` WRITE; +/*!40000 ALTER TABLE `entity_version` DISABLE KEYS */; +INSERT INTO `entity_version` VALUES (0,NULL,'507b9d49fb5379a29f7214cf0e01785266f60caa',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(1,NULL,'94331f259172f041c6c2cadc367381f8adc8e13e',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(2,NULL,'97cf2265ee2438ebae4cb0ca4d567ad73ea3c439',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(3,NULL,'9d58d341bd5dfb27def78ead97da879480d0ff32',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(4,NULL,'7ccfd2bc86f0ea0d178f7bbc45d50aca320e49ce',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(7,NULL,'dfa7e8bb2308d095713b7c7ed133797934bbd786',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(8,NULL,'ee9c70057e457be52f0aecd86aa989f53a696dd8',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(11,NULL,'d198fe9ac5d91945019d8a37cfb1f04b11e8900b',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(12,NULL,'c792b402eeec2182b787f401d2a25ac59ce20d0c',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(13,NULL,'d3fc056e7fe063c5f1e9c039a157b3cf28a64bac',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(14,NULL,'621026c1a4d23dd53e92c96f922dd450e4d126dc',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(15,NULL,'419c2c18ac6522879aaaee83991b99ab71c2dcde',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(16,NULL,'a7561ff1d50e64b3a0faeb96ded9b5949fb8ccbc',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(17,NULL,'9540f72b8adb5d6c19ac1af4bc9f512ef46a0485',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(18,NULL,'e2156c6825353edf67dff833919706e8d19f8500',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(20,NULL,'3fddf8ff1cd074b1f0f393a4d2359c7da216e456',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(21,NULL,'8efa386bc8a3a673158476289fbab2ac7469cb21',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(24,NULL,'d0e0a82e89328da66105f98854256e273be32208',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(50,NULL,'2cebb0cb377f6de1216351e21cc32a0d2e866f19',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(99,NULL,'15b8ed5414e9bcd871acb2c62421c822a9154f66',1,NULL,'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'),(100,NULL,'6fbec42f1bda31574472914ce0614b4ba447bd3a',1,NULL,'41dd8224-1e1a-4ad9-9d37-bad5841cb2a2'),(101,NULL,'bc922c549546af75ded1b2b6272c11825f78b5ed',1,NULL,'41dd8224-1e1a-4ad9-9d37-bad5841cb2a2'),(102,NULL,'63e3fffa8ad0ea5f900641f562f905991e149f33',1,NULL,'c2deb139-70f3-4ba6-bbef-40ae2e33ec7d'); +/*!40000 ALTER TABLE `entity_version` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `enum_data` +-- + +DROP TABLE IF EXISTS `enum_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `enum_data` ( + `domain_id` int(10) unsigned DEFAULT NULL, + `entity_id` int(10) unsigned DEFAULT NULL, + `property_id` int(10) unsigned DEFAULT NULL, + `value` varbinary(255) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci DEFAULT NULL, + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + KEY `enum_ov_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `enum_ov_forkey_ent` (`entity_id`), + KEY `enum_ov_forkey_pro` (`property_id`), + CONSTRAINT `enum_ov_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `enum_ov_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `enum_ov_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `enum_data` +-- + +LOCK TABLES `enum_data` WRITE; +/*!40000 ALTER TABLE `enum_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `enum_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `feature_config` +-- + +DROP TABLE IF EXISTS `feature_config`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `feature_config` ( + `_key` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `_value` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + PRIMARY KEY (`_key`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `feature_config` +-- + +LOCK TABLES `feature_config` WRITE; +/*!40000 ALTER TABLE `feature_config` DISABLE KEYS */; +INSERT INTO `feature_config` VALUES ('ENTITY_VERSIONING','ENABLED'); +/*!40000 ALTER TABLE `feature_config` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `files` +-- + +DROP TABLE IF EXISTS `files`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `files` ( + `file_id` int(10) unsigned NOT NULL COMMENT 'The file''s ID.', + `path` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Directory of the file.', + `size` bigint(20) unsigned NOT NULL COMMENT 'Size in kB (oktet bytes).', + `hash` binary(64) DEFAULT NULL, + `checked_timestamp` bigint(20) NOT NULL DEFAULT 0, + PRIMARY KEY (`file_id`), + CONSTRAINT `fil_file_id_entity` FOREIGN KEY (`file_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `files` +-- + +LOCK TABLES `files` WRITE; +/*!40000 ALTER TABLE `files` DISABLE KEYS */; +/*!40000 ALTER TABLE `files` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `groups` +-- + +DROP TABLE IF EXISTS `groups`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `groups` ( + `user_id` int(10) unsigned NOT NULL, + `group_id` int(10) unsigned NOT NULL, + UNIQUE KEY `user_id` (`user_id`,`group_id`), + KEY `group_id_entities_id` (`group_id`), + CONSTRAINT `group_id_entities_id` FOREIGN KEY (`group_id`) REFERENCES `entities` (`id`), + CONSTRAINT `user_id_entities_id` FOREIGN KEY (`user_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `groups` +-- + +LOCK TABLES `groups` WRITE; +/*!40000 ALTER TABLE `groups` DISABLE KEYS */; +/*!40000 ALTER TABLE `groups` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `integer_data` +-- + +DROP TABLE IF EXISTS `integer_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `integer_data` ( + `domain_id` int(10) unsigned NOT NULL COMMENT 'Domain.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity.', + `property_id` int(10) unsigned NOT NULL COMMENT 'Property.', + `value` bigint(20) NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Status of this statement.', + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + `unit_sig` bigint(20) DEFAULT NULL, + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `int_entity_id_entity` (`entity_id`), + KEY `int_property_id_entity` (`property_id`), + CONSTRAINT `int_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `int_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `int_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `integer_data` +-- + +LOCK TABLES `integer_data` WRITE; +/*!40000 ALTER TABLE `integer_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `integer_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `isa` +-- + +DROP TABLE IF EXISTS `isa`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `isa` ( + `child` int(10) unsigned NOT NULL COMMENT 'Child', + `parent` int(10) unsigned NOT NULL COMMENT 'Parent', + `type` enum('INHERITANCE','SUBTYPING') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Type of is-a relation.', + UNIQUE KEY `child` (`child`,`parent`), + KEY `parent_entity` (`parent`), + CONSTRAINT `child_entity` FOREIGN KEY (`child`) REFERENCES `entities` (`id`), + CONSTRAINT `parent_entity` FOREIGN KEY (`parent`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `isa` +-- + +LOCK TABLES `isa` WRITE; +/*!40000 ALTER TABLE `isa` DISABLE KEYS */; +/*!40000 ALTER TABLE `isa` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `isa_cache` +-- + +DROP TABLE IF EXISTS `isa_cache`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `isa_cache` ( + `child` int(10) unsigned NOT NULL, + `parent` int(10) unsigned NOT NULL, + `rpath` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + PRIMARY KEY (`child`,`parent`,`rpath`), + KEY `isa_cache_parent_entity` (`parent`), + CONSTRAINT `isa_cache_child_entity` FOREIGN KEY (`child`) REFERENCES `entities` (`id`), + CONSTRAINT `isa_cache_parent_entity` FOREIGN KEY (`parent`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `isa_cache` +-- + +LOCK TABLES `isa_cache` WRITE; +/*!40000 ALTER TABLE `isa_cache` DISABLE KEYS */; +INSERT INTO `isa_cache` VALUES (102,101,'102'); +/*!40000 ALTER TABLE `isa_cache` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `logging` +-- + +DROP TABLE IF EXISTS `logging`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `logging` ( + `level` int(11) NOT NULL, + `logger` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `message` mediumtext COLLATE utf8_unicode_ci NOT NULL, + `millis` bigint(20) NOT NULL, + `logRecord` blob NOT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `logging` +-- + +LOCK TABLES `logging` WRITE; +/*!40000 ALTER TABLE `logging` DISABLE KEYS */; +/*!40000 ALTER TABLE `logging` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `name_data` +-- + +DROP TABLE IF EXISTS `name_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `name_data` ( + `domain_id` int(10) unsigned NOT NULL, + `entity_id` int(10) unsigned NOT NULL, + `property_id` int(10) unsigned NOT NULL, + `value` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL, + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + UNIQUE KEY `domain_id_2` (`domain_id`,`entity_id`,`property_id`), + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `entity_id` (`entity_id`), + KEY `property_id` (`property_id`), + KEY `value` (`value`), + CONSTRAINT `name_data_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `name_data_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `name_data_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `name_data` +-- + +LOCK TABLES `name_data` WRITE; +/*!40000 ALTER TABLE `name_data` DISABLE KEYS */; +INSERT INTO `name_data` VALUES (0,0,20,'DOMAIN','FIX',0),(0,1,20,'RECORDTYPE','FIX',0),(0,2,20,'RECORD','FIX',0),(0,3,20,'FILE','FIX',0),(0,4,20,'PROPERTY','FIX',0),(0,7,20,'DATATYPE','FIX',0),(0,8,20,'QUERYTEMPLATE','FIX',0),(0,11,20,'REFERENCE','FIX',0),(0,12,20,'INTEGER','FIX',0),(0,13,20,'DOUBLE','FIX',0),(0,14,20,'TEXT','FIX',0),(0,15,20,'DATETIME','FIX',0),(0,16,20,'TIMESPAN','FIX',0),(0,17,20,'FILE','FIX',0),(0,18,20,'BOOLEAN','FIX',0),(0,20,20,'name','FIX',0),(0,21,20,'unit','FIX',0),(0,24,20,'description','FIX',0),(0,50,20,'SQLITE','FIX',0),(0,100,20,'TestIdentification','FIX',0),(0,101,20,'PyTestInfo','FIX',0); +/*!40000 ALTER TABLE `name_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `name_overrides` +-- + +DROP TABLE IF EXISTS `name_overrides`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `name_overrides` ( + `domain_id` int(10) unsigned DEFAULT NULL, + `entity_id` int(10) unsigned DEFAULT NULL, + `property_id` int(10) unsigned DEFAULT NULL, + `name` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, + UNIQUE KEY `name_ov_ukey` (`domain_id`,`entity_id`,`property_id`), + KEY `name_ov_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `name_ov_forkey_ent` (`entity_id`), + KEY `name_ov_forkey_pro` (`property_id`), + CONSTRAINT `name_ov_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `name_ov_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `name_ov_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `name_overrides` +-- + +LOCK TABLES `name_overrides` WRITE; +/*!40000 ALTER TABLE `name_overrides` DISABLE KEYS */; +/*!40000 ALTER TABLE `name_overrides` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `null_data` +-- + +DROP TABLE IF EXISTS `null_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `null_data` ( + `domain_id` int(10) unsigned DEFAULT NULL, + `entity_id` int(10) unsigned DEFAULT NULL, + `property_id` int(10) unsigned DEFAULT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX') COLLATE utf8_unicode_ci DEFAULT NULL, + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + KEY `null_data_dom_ent_idx` (`domain_id`,`entity_id`), + KEY `null_forkey_ent` (`entity_id`), + KEY `null_forkey_pro` (`property_id`), + CONSTRAINT `null_forkey_dom` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `null_forkey_ent` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `null_forkey_pro` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `null_data` +-- + +LOCK TABLES `null_data` WRITE; +/*!40000 ALTER TABLE `null_data` DISABLE KEYS */; +INSERT INTO `null_data` VALUES (0,101,100,'OBLIGATORY',0); +/*!40000 ALTER TABLE `null_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `passwd` +-- + +DROP TABLE IF EXISTS `passwd`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `passwd` ( + `principal` varbinary(255) NOT NULL, + `hash` varbinary(255) NOT NULL, + `alg` varchar(255) COLLATE utf8_unicode_ci DEFAULT 'SHA-512', + `it` int(10) unsigned DEFAULT 5000, + `salt` varbinary(255) NOT NULL, + PRIMARY KEY (`principal`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `passwd` +-- + +LOCK TABLES `passwd` WRITE; +/*!40000 ALTER TABLE `passwd` DISABLE KEYS */; +/*!40000 ALTER TABLE `passwd` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `passwords` +-- + +DROP TABLE IF EXISTS `passwords`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `passwords` ( + `entity_id` int(10) unsigned NOT NULL COMMENT 'User ID.', + `password` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Password.', + PRIMARY KEY (`entity_id`), + CONSTRAINT `use_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `passwords` +-- + +LOCK TABLES `passwords` WRITE; +/*!40000 ALTER TABLE `passwords` DISABLE KEYS */; +INSERT INTO `passwords` VALUES (98,'37d7bd8a833261b4e4653644ee0a065f522b92b3738ca9ae2cb43a83844bf352c4a59c386a44965997a508c61988c9484c093775027425091d6d3d435c3c0e0c'),(99,'37d7bd8a833261b4e4653644ee0a065f522b92b3738ca9ae2cb43a83844bf352c4a59c386a44965997a508c61988c9484c093775027425091d6d3d435c3c0e0c'); +/*!40000 ALTER TABLE `passwords` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `permissions` +-- + +DROP TABLE IF EXISTS `permissions`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `permissions` ( + `role` varbinary(255) NOT NULL, + `permissions` mediumtext COLLATE utf8_unicode_ci NOT NULL, + PRIMARY KEY (`role`), + CONSTRAINT `perm_name_roles` FOREIGN KEY (`role`) REFERENCES `roles` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `permissions` +-- + +LOCK TABLES `permissions` WRITE; +/*!40000 ALTER TABLE `permissions` DISABLE KEYS */; +INSERT INTO `permissions` VALUES ('administration','[{\"grant\":\"true\",\"priority\":\"true\",\"permission\":\"*\"}]'); +/*!40000 ALTER TABLE `permissions` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `query_template_def` +-- + +DROP TABLE IF EXISTS `query_template_def`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `query_template_def` ( + `id` int(10) unsigned NOT NULL, + `definition` mediumtext COLLATE utf8_unicode_ci NOT NULL, + PRIMARY KEY (`id`), + CONSTRAINT `query_template_def_ibfk_1` FOREIGN KEY (`id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `query_template_def` +-- + +LOCK TABLES `query_template_def` WRITE; +/*!40000 ALTER TABLE `query_template_def` DISABLE KEYS */; +/*!40000 ALTER TABLE `query_template_def` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `reference_data` +-- + +DROP TABLE IF EXISTS `reference_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `reference_data` ( + `domain_id` int(10) unsigned NOT NULL COMMENT 'Domain.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity.', + `property_id` int(10) unsigned NOT NULL COMMENT 'Property.', + `value` int(10) unsigned NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Status of this statement.', + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + `value_iversion` int(10) unsigned DEFAULT NULL, + KEY `entity_id` (`entity_id`,`property_id`), + KEY `ref_domain_id_entity` (`domain_id`), + KEY `ref_property_id_entity` (`property_id`), + KEY `ref_value_entity` (`value`), + KEY `value` (`value`,`value_iversion`), + CONSTRAINT `ref_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `ref_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `ref_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`), + CONSTRAINT `ref_value_entity` FOREIGN KEY (`value`) REFERENCES `entities` (`id`), + CONSTRAINT `reference_data_ibfk_1` FOREIGN KEY (`value`, `value_iversion`) REFERENCES `entity_version` (`entity_id`, `_iversion`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `reference_data` +-- + +LOCK TABLES `reference_data` WRITE; +/*!40000 ALTER TABLE `reference_data` DISABLE KEYS */; +/*!40000 ALTER TABLE `reference_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `roles` +-- + +DROP TABLE IF EXISTS `roles`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `roles` ( + `name` varbinary(255) NOT NULL, + `description` mediumtext COLLATE utf8_unicode_ci DEFAULT NULL, + PRIMARY KEY (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `roles` +-- + +LOCK TABLES `roles` WRITE; +/*!40000 ALTER TABLE `roles` DISABLE KEYS */; +INSERT INTO `roles` VALUES ('administration','Users with this role have unrestricted permissions.'),('anonymous','Users who did not authenticate themselves.'); +/*!40000 ALTER TABLE `roles` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `stats` +-- + +DROP TABLE IF EXISTS `stats`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `stats` ( + `name` varchar(255) COLLATE utf8_unicode_ci NOT NULL, + `value` blob DEFAULT NULL, + PRIMARY KEY (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `stats` +-- + +LOCK TABLES `stats` WRITE; +/*!40000 ALTER TABLE `stats` DISABLE KEYS */; +INSERT INTO `stats` VALUES ('RootBenchmark','��\0sr\0-org.caosdb.server.database.misc.RootBenchmark����Qk]\0\0xr\04org.caosdb.server.database.misc.TransactionBenchmark����Qk]\0J\0sinceL\0measurementst\0Ljava/util/Map;[\0stackTraceElementst\0[Ljava/lang/StackTraceElement;L\0\rsubBenchmarksq\0~\0xp\0\0~v��sr\0java.util.HashMap���`�\0F\0\nloadFactorI\0 thresholdxp?@\0\0\0\0\0\0w\0\0\0\0\0\0\0xur\0[Ljava.lang.StackTraceElement;F*<<�\"9\0\0xp\0\0\0sr\0java.lang.StackTraceElementa Ś&6݅\0B\0formatI\0\nlineNumberL\0classLoaderNamet\0Ljava/lang/String;L\0declaringClassq\0~\0\nL\0fileNameq\0~\0\nL\0\nmethodNameq\0~\0\nL\0\nmoduleNameq\0~\0\nL\0\rmoduleVersionq\0~\0\nxp\0\0Bpt\0java.lang.Threadt\0Thread.javat\0\rgetStackTracet\0 java.baset\011.0.13sq\0~\0 \0\0 t\0appt\04org.caosdb.server.database.misc.TransactionBenchmarkt\0TransactionBenchmark.javat\0<init>ppsq\0~\0 \0\0\0�q\0~\0t\0-org.caosdb.server.database.misc.RootBenchmarkq\0~\0q\0~\0ppsq\0~\0 \0\0q\0~\0q\0~\0q\0~\0t\0<clinit>ppsq\0~\0 \0\0<q\0~\0t\0org.caosdb.server.CaosDBServert\0CaosDBServer.javat\0initBackendppsq\0~\0 \0\0\0�q\0~\0q\0~\0q\0~\0t\0mainppsq\0~\0?@\0\0\0\0\0w\0\0\0\0\0\0t\0Infosr\0,org.caosdb.server.database.misc.SubBenchmark����Qk]\0L\0nameq\0~\0\nxq\0~\0\0\0~v��$sq\0~\0?@\0\0\0\0\0\0w\0\0\0\0\0\0\0xuq\0~\0\0\0\0\nsq\0~\0 \0\0Bpq\0~\0q\0~\0\rq\0~\0q\0~\0q\0~\0sq\0~\0 \0\0 q\0~\0q\0~\0q\0~\0q\0~\0ppsq\0~\0 \0\0\0�q\0~\0t\0,org.caosdb.server.database.misc.SubBenchmarkq\0~\0q\0~\0ppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0t\0getBenchmarkppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0q\0~\0+ppsq\0~\0 \0\0\0#q\0~\0t\02org.caosdb.server.transaction.TransactionInterfacet\0TransactionInterface.javat\0getTransactionBenchmarkppsq\0~\0 \0\0\0/q\0~\0q\0~\0.q\0~\0/t\0executeppsq\0~\0 \0\0\0�q\0~\0t\0org.caosdb.server.utils.Infot\0 Info.javat\0syncDatabaseppsq\0~\0 \0\0\0�q\0~\0t\0/org.caosdb.server.database.misc.RootBenchmark$1q\0~\0t\0runppsq\0~\0 \0\0=pq\0~\0q\0~\0\rq\0~\09q\0~\0q\0~\0sq\0~\0?@\0\0\0\0\0w\0\0\0\0\0\0t\0 SyncStatssq\0~\0\"\0\0~v��$sq\0~\0?@\0\0\0\0\0\0w\0\0\0\0\0\0\0xuq\0~\0\0\0\0 sq\0~\0 \0\0Bpq\0~\0q\0~\0\rq\0~\0q\0~\0q\0~\0sq\0~\0 \0\0 q\0~\0q\0~\0q\0~\0q\0~\0ppsq\0~\0 \0\0\0�q\0~\0q\0~\0)q\0~\0q\0~\0ppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0q\0~\0+ppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0q\0~\0+ppsq\0~\0 \0\0\0/q\0~\0q\0~\0.q\0~\0/q\0~\02ppsq\0~\0 \0\0\0�q\0~\0q\0~\04q\0~\05q\0~\06ppsq\0~\0 \0\0\0�q\0~\0q\0~\08q\0~\0q\0~\09ppsq\0~\0 \0\0=pq\0~\0q\0~\0\rq\0~\09q\0~\0q\0~\0sq\0~\0?@\0\0\0\0\0w\0\0\0\0\0\0t\0MySQLSyncStatssq\0~\0\"\0\0~v��%sq\0~\0?@\0\0\0\0\0\0w\0\0\0\0\0\0\0xuq\0~\0\0\0\0sq\0~\0 \0\0Bpq\0~\0q\0~\0\rq\0~\0q\0~\0q\0~\0sq\0~\0 \0\0 q\0~\0q\0~\0q\0~\0q\0~\0ppsq\0~\0 \0\0\0�q\0~\0q\0~\0)q\0~\0q\0~\0ppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0q\0~\0+ppsq\0~\0 \0\0�q\0~\0q\0~\0q\0~\0q\0~\0+ppsq\0~\0 \0\0\0�q\0~\0t\0-org.caosdb.server.database.BackendTransactiont\0BackendTransaction.javat\0getImplementationppsq\0~\0 \0\0\0+q\0~\0t\08org.caosdb.server.database.backend.transaction.SyncStatst\0SyncStats.javaq\0~\02ppsq\0~\0 \0\0\0�q\0~\0q\0~\0Tq\0~\0Ut\0executeTransactionppsq\0~\0 \0\0\00q\0~\0q\0~\0.q\0~\0/q\0~\02ppsq\0~\0 \0\0\0�q\0~\0q\0~\04q\0~\05q\0~\06ppsq\0~\0 \0\0\0�q\0~\0q\0~\08q\0~\0q\0~\09ppsq\0~\0 \0\0=pq\0~\0q\0~\0\rq\0~\09q\0~\0q\0~\0sq\0~\0?@\0\0\0\0\0\0w\0\0\0\0\0\0\0xq\0~\0Jxq\0~\0<xq\0~\0!x'),('TransactionBenchmark','��\0sr\00caosdb.server.database.misc.TransactionBenchmark�Cl=���E\0J\0sinceL\0acct\0Ljava/util/HashMap;L\0countsq\0~\0xp\0\0l���Wsr\0java.util.HashMap���`�\0F\0\nloadFactorI\0 thresholdxp?@\0\0\0\0\0w\0\0\0\0\0\0t\0 SyncStatssr\0java.lang.Long;��̏#�\0J\0valuexr\0java.lang.Number������\0\0xp\0\0\0\0\0\0\0t\0GetInfosq\0~\0\0\0\0\0\0\0 xsq\0~\0?@\0\0\0\0\0w\0\0\0\0\0\0q\0~\0sr\0java.lang.Integer⠤���8\0I\0valuexq\0~\0\0\0\0q\0~\0 sq\0~\0\0\0\0x'); +/*!40000 ALTER TABLE `stats` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `text_data` +-- + +DROP TABLE IF EXISTS `text_data`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `text_data` ( + `domain_id` int(10) unsigned NOT NULL COMMENT 'Domain.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity.', + `property_id` int(10) unsigned NOT NULL COMMENT 'Property.', + `value` text COLLATE utf8_unicode_ci NOT NULL, + `status` enum('OBLIGATORY','RECOMMENDED','SUGGESTED','FIX','REPLACEMENT') COLLATE utf8_unicode_ci NOT NULL COMMENT 'Status of this statement.', + `pidx` int(10) unsigned NOT NULL DEFAULT 0, + KEY `domain_id` (`domain_id`,`entity_id`), + KEY `str_entity_id_entity` (`entity_id`), + KEY `str_property_id_entity` (`property_id`), + CONSTRAINT `str_domain_id_entity` FOREIGN KEY (`domain_id`) REFERENCES `entities` (`id`), + CONSTRAINT `str_entity_id_entity` FOREIGN KEY (`entity_id`) REFERENCES `entities` (`id`), + CONSTRAINT `str_property_id_entity` FOREIGN KEY (`property_id`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `text_data` +-- + +LOCK TABLES `text_data` WRITE; +/*!40000 ALTER TABLE `text_data` DISABLE KEYS */; +INSERT INTO `text_data` VALUES (0,102,100,'10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2','FIX',0); +/*!40000 ALTER TABLE `text_data` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `transaction_log` +-- + +DROP TABLE IF EXISTS `transaction_log`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `transaction_log` ( + `transaction` varchar(255) COLLATE utf8_unicode_ci NOT NULL COMMENT 'Transaction.', + `entity_id` int(10) unsigned NOT NULL COMMENT 'Entity ID.', + `username` varbinary(255) NOT NULL, + `seconds` bigint(20) unsigned NOT NULL DEFAULT 0, + `nanos` int(10) unsigned NOT NULL DEFAULT 0, + `realm` varbinary(255) NOT NULL, + KEY `entity_id` (`entity_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `transaction_log` +-- + +LOCK TABLES `transaction_log` WRITE; +/*!40000 ALTER TABLE `transaction_log` DISABLE KEYS */; +INSERT INTO `transaction_log` VALUES ('Insert',100,'admin',1642667277,464000000,'PAM'),('Insert',101,'admin',1642667277,464000000,'PAM'),('Insert',102,'admin',1642667277,633000000,'PAM'); +/*!40000 ALTER TABLE `transaction_log` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `transactions` +-- + +DROP TABLE IF EXISTS `transactions`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `transactions` ( + `srid` varbinary(255) NOT NULL, + `username` varbinary(255) NOT NULL, + `realm` varbinary(255) NOT NULL, + `seconds` bigint(20) unsigned NOT NULL, + `nanos` int(10) unsigned NOT NULL, + PRIMARY KEY (`srid`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `transactions` +-- + +LOCK TABLES `transactions` WRITE; +/*!40000 ALTER TABLE `transactions` DISABLE KEYS */; +INSERT INTO `transactions` VALUES ('41dd8224-1e1a-4ad9-9d37-bad5841cb2a2','admin','PAM',1642667277,464000000),('c2deb139-70f3-4ba6-bbef-40ae2e33ec7d','admin','PAM',1642667277,633000000),('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e','administration','CaosDB',0,0); +/*!40000 ALTER TABLE `transactions` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `units_lin_con` +-- + +DROP TABLE IF EXISTS `units_lin_con`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `units_lin_con` ( + `signature_from` bigint(20) NOT NULL, + `signature_to` bigint(20) NOT NULL, + `a` decimal(65,30) NOT NULL, + `b_dividend` int(11) NOT NULL, + `b_divisor` int(11) NOT NULL, + `c` decimal(65,30) NOT NULL, + PRIMARY KEY (`signature_from`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `units_lin_con` +-- + +LOCK TABLES `units_lin_con` WRITE; +/*!40000 ALTER TABLE `units_lin_con` DISABLE KEYS */; +/*!40000 ALTER TABLE `units_lin_con` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `user_info` +-- + +DROP TABLE IF EXISTS `user_info`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `user_info` ( + `realm` varbinary(255) NOT NULL, + `name` varbinary(255) NOT NULL, + `email` varbinary(255) DEFAULT NULL, + `status` enum('ACTIVE','INACTIVE') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'INACTIVE', + `entity` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`realm`,`name`), + KEY `subject_entity` (`entity`), + CONSTRAINT `subjects_ibfk_1` FOREIGN KEY (`entity`) REFERENCES `entities` (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `user_info` +-- + +LOCK TABLES `user_info` WRITE; +/*!40000 ALTER TABLE `user_info` DISABLE KEYS */; +/*!40000 ALTER TABLE `user_info` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `user_roles` +-- + +DROP TABLE IF EXISTS `user_roles`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `user_roles` ( + `realm` varbinary(255) NOT NULL, + `user` varbinary(255) NOT NULL, + `role` varbinary(255) NOT NULL, + PRIMARY KEY (`realm`,`user`,`role`), + KEY `user_roles_ibfk_1` (`role`), + CONSTRAINT `user_roles_ibfk_1` FOREIGN KEY (`role`) REFERENCES `roles` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `user_roles` +-- + +LOCK TABLES `user_roles` WRITE; +/*!40000 ALTER TABLE `user_roles` DISABLE KEYS */; +/*!40000 ALTER TABLE `user_roles` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Dumping routines for database 'caosdb' +-- +/*!50003 DROP FUNCTION IF EXISTS `CaosDBVersion` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `CaosDBVersion`() RETURNS varchar(255) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +RETURN 'v5.0.0' ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `constructDateTimeWhereClauseForColumn` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `constructDateTimeWhereClauseForColumn`(seconds_col VARCHAR(255), nanos_col VARCHAR(255), vDateTimeSecLow VARCHAR(255), vDateTimeNSLow VARCHAR(255), vDateTimeSecUpp VARCHAR(255), vDateTimeNSUpp VARCHAR(255), operator CHAR(4)) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +BEGIN + + DECLARE isInterval BOOLEAN DEFAULT vDateTimeSecUpp IS NOT NULL or vDateTimeNSUpp IS NOT NULL; + DECLARE operator_prefix CHAR(1) DEFAULT LEFT(operator,1); + + IF isInterval THEN + IF operator = '=' THEN + RETURN " 0=1"; + ELSEIF operator = '!=' THEN + RETURN " 0=1"; + ELSEIF operator = '>' or operator = '<=' THEN + RETURN CONCAT(" ", seconds_col, operator_prefix, vDateTimeSecUpp); + ELSEIF operator = '<' or operator = '>=' THEN + RETURN CONCAT(" ", seconds_col, operator_prefix, vDateTimeSecLow); + ELSEIF operator = "(" THEN + RETURN CONCAT(" ", seconds_col, ">=", vDateTimeSecLow, " AND ",seconds_col, "<", vDateTimeSecUpp); + ELSEIF operator = "!(" THEN + RETURN CONCAT(" ", seconds_col, "<", vDateTimeSecLow, " OR ", seconds_col, ">=", vDateTimeSecUpp); + END IF; + ELSE + IF operator = '=' THEN + RETURN CONCAT(" ", + seconds_col, + "=", vDateTimeSecLow, IF(vDateTimeNSLow IS NULL, CONCAT(' AND ', nanos_col, ' IS NULL'), CONCAT(' AND ', + nanos_col, + '=', vDateTimeNSLow))); + ELSEIF operator = '!=' THEN + RETURN CONCAT(" ", + seconds_col, + "!=", vDateTimeSecLow, IF(vDateTimeNSLow IS NULL, '', CONCAT(' OR ', + nanos_col, + '!=', vDateTimeNSLow))); + ELSEIF operator = '>' or operator = '<' THEN + RETURN CONCAT(" ", + seconds_col, operator, vDateTimeSecLow, IF(vDateTimeNSLow IS NULL, '', CONCAT(' OR (',seconds_col,'=', vDateTimeSecLow, ' AND ',nanos_col, operator, vDateTimeNSLow, ')'))); + ELSEIF operator = '>=' or operator = '<=' THEN + RETURN CONCAT( + " ", seconds_col, operator, vDateTimeSecLow, + IF(vDateTimeNSLow IS NULL, + '', + CONCAT( + ' AND (', seconds_col, operator_prefix, vDateTimeSecLow, + ' OR ', nanos_col, operator, vDateTimeNSLow, + ' OR ', nanos_col, ' IS NULL)'))); + ELSEIF operator = "(" THEN + RETURN IF(vDateTimeNSLow IS NULL,CONCAT(" ",seconds_col,"=", vDateTimeSecLow),CONCAT(" ",seconds_col,"=",vDateTimeSecLow," AND ",nanos_col,"=",vDateTimeNSLow)); + ELSEIF operator = "!(" THEN + RETURN IF(vDateTimeNSLow IS NULL,CONCAT(" ",seconds_col,"!=",vDateTimeSecLow, ""),CONCAT(" ",seconds_col,"!=",vDateTimeSecLow," OR ",nanos_col, " IS NULL OR ", nanos_col, "!=",vDateTimeNSLow)); + END IF; + END IF; + return ' 0=1'; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `convert_unit` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `convert_unit`(unit_sig BIGINT, value DECIMAL(65,30)) RETURNS decimal(65,30) + DETERMINISTIC +BEGIN + DECLARE ret DECIMAL(65,30) DEFAULT value; + + SELECT (((value+a)*b_dividend)/b_divisor+c) INTO ret FROM units_lin_con WHERE signature_from=unit_sig; + RETURN ret; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `getAggValueWhereClause` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `getAggValueWhereClause`(entities VARCHAR(255), properties VARCHAR(255)) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +BEGIN + RETURN CONCAT(" EXISTS (SELECT 1 FROM `", entities, "` AS ent WHERE ent.id = subdata.entity_id LIMIT 1)", IF(properties IS NOT NULL AND properties != '', CONCAT(" AND EXISTS (SELECT 1 FROM `", properties, "` as props WHERE props.id = subdata.property_id LIMIT 1)"),'')); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `getDateTimeWhereClause` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `getDateTimeWhereClause`(vDateTime VARCHAR(255), operator CHAR(4)) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +BEGIN + DECLARE sep_loc INTEGER DEFAULT LOCATE('--',vDateTime); + DECLARE vDateTimeLow VARCHAR(255) DEFAULT IF(sep_loc != 0, SUBSTRING_INDEX(vDateTime, '--',1), vDateTime); + DECLARE vDateTimeUpp VARCHAR(255) DEFAULT IF(sep_loc != 0, SUBSTRING_INDEX(vDateTime, '--',-1), NULL); + + DECLARE vDateTimeSecLow VARCHAR(255) DEFAULT SUBSTRING_INDEX(vDateTimeLow, 'UTC', 1); + DECLARE vDateTimeNSLow VARCHAR(255) DEFAULT IF(SUBSTRING_INDEX(vDateTimeLow, 'UTC', -1)='',NULL,SUBSTRING_INDEX(vDateTimeLow, 'UTC', -1)); + + DECLARE vDateTimeSecUpp VARCHAR(255) DEFAULT IF(sep_loc != 0, SUBSTRING_INDEX(vDateTimeUpp, 'UTC', 1), NULL); + DECLARE vDateTimeNSUpp VARCHAR(255) DEFAULT IF(sep_loc != 0 AND SUBSTRING_INDEX(vDateTimeUpp, 'UTC', -1)!='',SUBSTRING_INDEX(vDateTimeUpp, 'UTC', -1),NULL); + + + RETURN constructDateTimeWhereClauseForColumn("subdata.value", "subdata.value_ns", vDateTimeSecLow, vDateTimeNSLow, vDateTimeSecUpp, vDateTimeNSUpp, operator); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `getDateWhereClause` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `getDateWhereClause`(vDateTimeDotNotation VARCHAR(255), operator CHAR(4)) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +BEGIN + DECLARE isInterval INTEGER DEFAULT LOCATE('--',vDateTimeDotNotation); + + DECLARE vILB VARCHAR(255) DEFAULT IF(isInterval != 0, SUBSTRING_INDEX(vDateTimeDotNotation, '--', 1), vDateTimeDotNotation); + + DECLARE vEUB VARCHAR(255) DEFAULT IF(isInterval != 0, SUBSTRING_INDEX(vDateTimeDotNotation, '--', -1), NULL); + DECLARE vILB_Date INTEGER DEFAULT SUBSTRING_INDEX(vILB, '.', 1); + DECLARE vEUB_Date INTEGER DEFAULT SUBSTRING_INDEX(vEUB, '.', 1); + + DECLARE hasTime INTEGER DEFAULT LOCATE('.NULL.NULL',vILB); + + DECLARE dom INTEGER DEFAULT vILB_Date % 100; + + DECLARE mon INTEGER DEFAULT ((vILB_Date % 10000) - dom) / 100; + + DECLARE yea INTEGER DEFAULT (vILB_Date - (vILB_Date % 10000)) / 10000; + + IF operator = '=' and hasTime != 0 THEN + RETURN CONCAT(" subdata.value=", vILB_Date); + ELSEIF operator = "!=" and hasTime != 0 THEN + IF mon != 0 and dom != 0 THEN + RETURN CONCAT(" subdata.value!=", vILB_Date, " and subdata.value%100!=0"); + ELSEIF mon != 0 THEN + RETURN CONCAT(" subdata.value!=", vILB_Date, " and subdata.value%100=0 and subdata.value%10000!=0"); + ELSE + RETURN CONCAT(" subdata.value!=", vILB_Date, " and subdata.value%10000=0"); + END IF; + ELSEIF operator = "(" and hasTime != 0 THEN + IF mon != 0 and dom != 0 THEN + RETURN CONCAT(" subdata.value=", vILB_Date); + ELSEIF mon != 0 THEN + RETURN CONCAT(" subdata.value=",vILB_Date," OR (subdata.value>", vILB_Date, " and subdata.value<", vEUB_Date, " and subdata.value%10000!=0)"); + ELSE + RETURN CONCAT(" subdata.value=",vILB_Date," OR (subdata.value>", vILB_Date, " and subdata.value<", vEUB_Date,")"); + END IF; + ELSEIF operator = "!(" THEN + IF hasTime = 0 THEN + RETURN " 0=0"; + END IF; + IF mon != 0 and dom != 0 THEN + RETURN CONCAT(" subdata.value!=",vILB_Date); + ELSEIF mon != 0 THEN + RETURN CONCAT(" (subdata.value!=",vILB_Date, " AND subdata.value%100=0) OR ((subdata.value<", vILB_Date, " or subdata.value>", vEUB_Date, ") and subdata.value%100!=0)"); + ELSE + RETURN CONCAT(" (subdata.value!=",vILB_Date, " AND subdata.value%10000=0) OR ((subdata.value<", vILB_Date, " or subdata.value>=", vEUB_Date, ") and subdata.value%10000!=0)"); + END IF; + ELSEIF operator = "<" THEN + IF mon != 0 and dom != 0 THEN + RETURN CONCAT(" subdata.value<", vILB_Date, " and (subdata.value%100!=0 or (subdata.value<", yea*10000+mon*100, " and subdata.value%10000!=0) or (subdata.value<", yea*10000, " and subdata.value%10000=0))"); + ELSEIF mon != 0 THEN + RETURN CONCAT(" subdata.value<", vILB_Date, " and (subdata.value%10000!=0 or (subdata.value<", yea*10000, "))"); + ELSE + RETURN CONCAT(" subdata.value<", vILB_Date); + END IF; + ELSEIF operator = ">" THEN + IF mon != 0 and dom != 0 THEN + RETURN CONCAT(" subdata.value>", vILB_Date); + ELSEIF mon != 0 THEN + RETURN CONCAT(" subdata.value>=",vEUB_Date); + ELSE + RETURN CONCAT(" subdata.value>=",vEUB_Date); + END IF; + ELSEIF operator = "<=" THEN + IF mon != 0 and dom != 0 THEN + + RETURN CONCAT(" subdata.value<=", vILB_Date, + " or (subdata.value<=", yea*10000 + mon*100, " and subdata.value%100=0)"); + ELSEIF mon != 0 THEN + + RETURN CONCAT(" subdata.value<", vEUB_Date); + ELSE + + RETURN CONCAT(" subdata.value<", vEUB_Date); + END IF; + ELSEIF operator = ">=" THEN + IF mon != 0 and dom != 0 THEN + + RETURN CONCAT(" subdata.value>=", vILB_Date, + " or (subdata.value>=", yea*10000 + mon*100, " and subdata.value%100=0)", + " or (subdata.value>=", yea*10000, " and subdata.value%10000=0)"); + ELSEIF mon != 0 THEN + + RETURN CONCAT(" subdata.value>=", yea*10000 + mon*100, + " or (subdata.value>=", yea*10000, " and subdata.value%10000=0)"); + ELSE + + RETURN CONCAT(" subdata.value>=", yea*10000); + END IF; + END IF; + + return ' 0=1'; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `getDoubleWhereClause` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `getDoubleWhereClause`(value DOUBLE, unit_sig BIGINT, valueStdUnit DECIMAL(65,30), stdUnit_sig BIGINT, o CHAR(4)) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + DETERMINISTIC +BEGIN + RETURN IF(unit_sig IS NULL AND value IS NOT NULL, + CONCAT('subdata.value ', o, ' \'', value, '\''), + CONCAT( + IF(value IS NULL, '', + CONCAT('(subdata.unit_sig=', unit_sig, ' AND subdata.value ', o, ' \'', value, '\') OR ')), + IF(unit_sig = stdUnit_sig,'',CONCAT('(subdata.unit_sig=', stdUnit_sig,' AND subdata.value ', o, ' \'', valueStdUnit, '\') OR ')),'(standard_unit(subdata.unit_sig)=', stdUnit_sig,' AND convert_unit(subdata.unit_sig,subdata.value) ', o, ' ', valueStdUnit, ')')); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `get_head_relative` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `get_head_relative`(EntityID INT UNSIGNED, + Offset INT UNSIGNED) RETURNS varbinary(255) + READS SQL DATA +BEGIN + + + + + RETURN ( + SELECT e.version + FROM entity_version AS e + WHERE e.entity_id = EntityID + ORDER BY e._iversion DESC + LIMIT 1 OFFSET Offset + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `get_head_version` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `get_head_version`(EntityID INT UNSIGNED) RETURNS varbinary(255) + READS SQL DATA +BEGIN + RETURN get_head_relative(EntityID, 0); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `get_iversion` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `get_iversion`(EntityID INT UNSIGNED, + Version VARBINARY(255)) RETURNS int(10) unsigned + READS SQL DATA +BEGIN + RETURN ( + SELECT e._iversion + FROM entity_version AS e + WHERE e.entity_id = EntityID + AND e.version = Version + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `get_primary_parent_version` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `get_primary_parent_version`(EntityID INT UNSIGNED, + Version VARBINARY(255)) RETURNS varbinary(255) + READS SQL DATA +BEGIN + RETURN ( + SELECT p.version + FROM entity_version AS e INNER JOIN entity_version AS p + ON (e._ipparent = p._iversion + AND e.entity_id = p.entity_id) + WHERE e.entity_id = EntityID + AND e.version = Version + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `get_version_timestamp` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `get_version_timestamp`(EntityID INT UNSIGNED, + Version VARBINARY(255)) RETURNS varchar(255) CHARSET utf8 COLLATE utf8_unicode_ci + READS SQL DATA +BEGIN + RETURN ( + SELECT concat(t.seconds, '.', t.nanos) + FROM entity_version AS e INNER JOIN transactions AS t + ON ( e.srid = t.srid ) + WHERE e.entity_id = EntityID + AND e.version = Version + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `is_feature_config` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `is_feature_config`(_Key VARCHAR(255), + Expected VARCHAR(255)) RETURNS tinyint(1) + READS SQL DATA +BEGIN + RETURN ( + SELECT f._value = Expected FROM feature_config as f WHERE f._key = _Key + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `makeStmt` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `makeStmt`(sourceSet VARCHAR(255), targetSet VARCHAR(255), data VARCHAR(20000), + properties VARCHAR(20000), versioned BOOLEAN) RETURNS varchar(20000) CHARSET utf8 COLLATE utf8_unicode_ci + NO SQL +BEGIN + IF sourceSet = "entities" AND versioned THEN + RETURN CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id, _iversion) SELECT entity_id, _iversion FROM ', + data, + IF(properties IS NULL, '', + CONCAT(' AS data JOIN `', properties, '` AS prop ON (data.property_id = prop.id) WHERE ', + 'data.entity_id = prop.id2 OR prop.id2 = 0'))); + END IF; + RETURN CONCAT( + IF(targetSet IS NULL, + CONCAT('DELETE FROM `',sourceSet,'` WHERE NOT EXISTS (SELECT 1 FROM '), + CONCAT('INSERT IGNORE INTO `',targetSet,'` (id) SELECT id FROM `',sourceSet,'` ', + 'WHERE EXISTS (SELECT 1 FROM ')), + IF(properties IS NULL, + CONCAT(data,' as data WHERE '), + CONCAT('`',properties,'` as prop JOIN ',data,' as data ON (data.property_id=prop.id) WHERE ', + '(data.entity_id=prop.id2 OR prop.id2=0) AND ')), + 'data.entity_id=`', sourceSet, '`.`id` LIMIT 1)' + ); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `ms` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `ms`(ps bigint(20) unsigned ) RETURNS double +return TRUNCATE(ps/1000000000,3) ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `standard_unit` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `standard_unit`(unit_sig BIGINT) RETURNS bigint(20) + DETERMINISTIC +BEGIN + DECLARE ret BIGINT DEFAULT unit_sig; + + SELECT signature_to INTO ret FROM units_lin_con WHERE signature_from=unit_sig; + RETURN ret; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `_get_head_iversion` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `_get_head_iversion`(EntityID INT UNSIGNED) RETURNS int(10) unsigned + READS SQL DATA +BEGIN + + + + + RETURN ( + SELECT e._iversion + FROM entity_version AS e + WHERE e.entity_id = EntityID + ORDER BY e._iversion DESC + LIMIT 1 + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP FUNCTION IF EXISTS `_get_version` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` FUNCTION `_get_version`(EntityID INT UNSIGNED, + IVersion INT UNSIGNED) RETURNS varbinary(255) + READS SQL DATA +BEGIN + RETURN ( + SELECT version FROM entity_version + WHERE entity_id = EntityID + AND _iversion = IVersion + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applyBackReference` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applyBackReference`(in sourceSet VARCHAR(255), targetSet VARCHAR(255), + in propertiesTable VARCHAR(255), in entitiesTable VARCHAR(255), in subQuery BOOLEAN, + in versioned BOOLEAN) +BEGIN + DECLARE newTableName VARCHAR(255) DEFAULT NULL; + + + IF subQuery IS TRUE THEN + call registerTempTableName(newTableName); + + SET @createBackRefSubQueryTableStr = CONCAT('CREATE TEMPORARY TABLE `',newTableName,'` ( entity_id INT UNSIGNED NOT NULL, id INT UNSIGNED NOT NULL, CONSTRAINT `',newTableName,'PK` PRIMARY KEY (id, entity_id))'); + + PREPARE createBackRefSubQueryTable FROM @createBackRefSubQueryTableStr; + EXECUTE createBackRefSubQueryTable; + DEALLOCATE PREPARE createBackRefSubQueryTable; + + SET @backRefSubResultSetStmtStr = CONCAT('INSERT IGNORE INTO `', + newTableName, + '` (id,entity_id) SELECT entity_id AS id, value AS entity_id FROM `reference_data` AS data ', + 'WHERE EXISTS (SELECT 1 FROM `', + sourceSet, + '` AS source WHERE source.id=data.value LIMIT 1)', + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')) + ); + + PREPARE backRefSubResultSetStmt FROM @backRefSubResultSetStmtStr; + EXECUTE backRefSubResultSetStmt; + DEALLOCATE PREPARE backRefSubResultSetStmt; + + SELECT newTableName as list; + ELSE + IF versioned THEN + IF sourceSet = "entities" THEN + + SET @stmtBackRefStr = CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id, _iversion) SELECT source.id, _get_head_iversion(source.id)', + + ' FROM entities AS source WHERE EXISTS (', + 'SELECT 1 FROM `reference_data` AS data WHERE data.value=source.id AND (', + 'data.value_iversion IS NULL OR data.value_iversion=_get_head_iversion(source.id))', + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + ') UNION ALL ', + + 'SELECT source.id, source._iversion FROM archive_entities AS source WHERE EXISTS (', + 'SELECT 1 FROM `reference_data` AS data WHERE data.value=source.id AND ', + '(data.value_iversion IS NULL OR data.value_iversion=source._iversion)', + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + + ')'); + ELSEIF targetSet IS NULL OR sourceSet = targetSet THEN + SET @stmtBackRefStr = CONCAT('DELETE FROM `', + sourceSet, + '` WHERE NOT EXISTS (SELECT 1 FROM `reference_data` AS data WHERE data.value=`', + sourceSet, + '`.`id` AND ( data.value_iversion IS NULL OR data.value_iversion=`', + sourceSet, + '`._iversion)', + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + ')'); + ELSE + SET @stmtBackRefStr = CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id, _iversion) SELECT source.id, source._iversion FROM `', + sourceSet, + '` AS source WHERE EXISTS (', + 'SELECT 1 FROM `reference_data` AS data WHERE data.value=source.id AND', + ' (data.value_iversion IS NULL OR data.value_iversion=source._iversion)', + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + + ')'); + END IF; + ELSE + + IF targetSet IS NULL OR sourceSet = targetSet THEN + + SET @stmtBackRefStr = CONCAT('DELETE FROM `', + sourceSet, + '` WHERE NOT EXISTS (SELECT 1 FROM `reference_data` AS data WHERE data.value=`', + sourceSet, + '`.`id`', + IF(entitiesTable IS NULL, + '', + CONCAT(' + AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' + AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + ')'); + ELSE + + SET @stmtBackRefStr = CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id) SELECT id FROM `', + sourceSet, + '` AS source WHERE EXISTS (SELECT 1 FROM `reference_data` AS data WHERE data.value=source.id', + IF(entitiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + entitiesTable, + '` AS e WHERE e.id=data.entity_id LIMIT 1)')), + IF(propertiesTable IS NULL, + '', + CONCAT(' AND EXISTS (SELECT 1 FROM `', + propertiesTable, + '` AS p WHERE p.id=data.property_id LIMIT 1)')), + ')'); + END IF; + END IF; + + PREPARE stmtBackRef FROM @stmtBackRefStr; + EXECUTE stmtBackRef; + DEALLOCATE PREPARE stmtBackRef; + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applyIDFilter` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applyIDFilter`(in sourceSet VARCHAR(255), in targetSet VARCHAR(255), + in o CHAR(2), in vInt BIGINT, in agg CHAR(3), in versioned BOOLEAN) +IDFILTER_LABEL: BEGIN +DECLARE data VARCHAR(20000) DEFAULT NULL; +DECLARE aggVal VARCHAR(255) DEFAULT NULL; + + +IF agg IS NOT NULL THEN + IF versioned THEN + + SELECT 1 FROM id_agg_with_versioning_not_implemented; + END IF; + SET @stmtIDAggValStr = CONCAT( + "SELECT ", + agg, + "(id) INTO @sAggVal FROM `", + sourceSet, + "`"); + PREPARE stmtIDAggVal FROM @stmtIDAggValStr; + EXECUTE stmtIDAggVal; + DEALLOCATE PREPARE stmtIDAggVal; + SET aggVal = @sAggVal; +END IF; + + +IF targetSet IS NULL OR targetSet = sourceSet THEN + SET data = CONCAT( + "DELETE FROM `", + sourceSet, + "` WHERE ", + IF(o IS NULL OR vInt IS NULL, + "1=1", + CONCAT("NOT id", + o, + vInt)), + IF(aggVal IS NULL, + "", + CONCAT(" AND id!=", + aggVal))); +ELSEIF versioned AND sourceSet = "entities" THEN + + + SET data = CONCAT( + "INSERT IGNORE INTO `", + targetSet, + '` (id, _iversion) SELECT id, _get_head_iversion(id) FROM `entities` WHERE ', + IF(o IS NULL OR vInt IS NULL, + "1=1", + CONCAT("id", + o, + vInt)), + IF(aggVal IS NULL, + "", + CONCAT(" AND id=", + aggVal)), + ' UNION SELECT id, _iversion FROM `archive_entities` WHERE ', + IF(o IS NULL OR vInt IS NULL, + "1=1", + CONCAT("id", + o, + vInt)), + IF(aggVal IS NULL, + "", + CONCAT(" AND id=", + aggVal))); + + +ELSE + SET data = CONCAT( + "INSERT IGNORE INTO `", + targetSet, + IF(versioned, + '` (id, _iversion) SELECT data.id, data._iversion FROM `', + '` (id) SELECT data.id FROM `'), + sourceSet, + "` AS data WHERE ", + IF(o IS NULL OR vInt IS NULL, + "1=1", + CONCAT("data.id", + o, + vInt)), + IF(aggVal IS NULL, + "", + CONCAT(" AND data.id=", + aggVal))); +END IF; + +Set @stmtIDFilterStr = data; +PREPARE stmtIDFilter FROM @stmtIDFilterStr; +EXECUTE stmtIDFilter; +DEALLOCATE PREPARE stmtIDFilter; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applyPOV` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applyPOV`(in sourceSet VARCHAR(255), + in targetSet VARCHAR(255), + in propertiesTable VARCHAR(255), + in refIdsTable VARCHAR(255), + in o CHAR(4), + in vText VARCHAR(255), + in vInt INT, + in vDouble DOUBLE, + in unit_sig BIGINT, + in vDoubleStdUnit DOUBLE, + in stdUnit_sig BIGINT, + in vDateTime VARCHAR(255), + in vDateTimeDotNotation VARCHAR(255), + in agg CHAR(3), + in pname VARCHAR(255), + in versioned BOOLEAN) +POV_LABEL: BEGIN + DECLARE data TEXT DEFAULT NULL; + DECLARE sTextData VARCHAR(20000) DEFAULT NULL; + DECLARE sNameData VARCHAR(20000) DEFAULT NULL; + DECLARE sEnumData VARCHAR(20000) DEFAULT NULL; + DECLARE sIntData VARCHAR(20000) DEFAULT NULL; + DECLARE sDoubleData VARCHAR(20000) DEFAULT NULL; + DECLARE sDatetimeData VARCHAR(20000) DEFAULT NULL; + DECLARE sNullData VARCHAR(20000) DEFAULT NULL; + DECLARE sDateData VARCHAR(20000) DEFAULT NULL; + DECLARE sRefData VARCHAR(20000) DEFAULT NULL; + DECLARE aggValue VARCHAR(255) DEFAULT NULL; + DECLARE aggValueWhereClause VARCHAR(20000) DEFAULT NULL; + DECLARE distinctUnits INT DEFAULT 0; + DECLARE usedStdUnit BIGINT DEFAULT NULL; + DECLARE keepTabl VARCHAR(255) DEFAULT NULL; + DECLARE existence_op VARCHAR(255) DEFAULT "EXISTS"; + + IF o = '->' THEN + + call applyRefPOV(sourceSet,targetSet, propertiesTable, refIdsTable, versioned); + LEAVE POV_LABEL; + ELSEIF o = '0' THEN + + + SET vText = NULL; + SET sTextData = 'SELECT domain_id, entity_id, property_id FROM `null_data` AS subdata'; + + + + ELSEIF o = '!0' THEN + + + SET vText = NULL; + + SET sTextData = CONCAT( + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `text_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `name_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `enum_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `integer_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `double_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `date_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `datetime_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, property_id FROM `reference_data` AS subdata ', + 'WHERE subdata.value IS NOT NULL'); + + ELSEIF o = "(" or o = "!(" THEN + IF versioned THEN + SET sTextData = IF(vText IS NULL, + CONCAT( + ' SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) ', + 'AS _iversion, property_id FROM `date_data` UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_date_data`'), + IF(vDateTimeDotNotation IS NULL, NULL, + CONCAT(' SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) ', + 'AS _iversion, property_id FROM `date_data` AS subdata WHERE ', + getDateWhereClause(vDateTimeDotNotation, o), ' UNION ALL ', + 'SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_date_data` ', + 'AS subdata WHERE ', getDateWhereClause(vDateTimeDotNotation, o)))); + SET sDatetimeData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `datetime_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_datetime_data`', + IF(vDateTime IS NULL, NULL, + CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `datetime_data` AS subdata WHERE ', getDateTimeWhereClause(vDateTime, o), ' UNION ALL SELECT DISTINCT domain_id, entity_id,_iversion, property_id FROM `archive_datetime_data` AS subdata WHERE ', getDateTimeWhereClause(vDateTime, o)))); + ELSE + SET sTextData = IF(vText IS NULL, + ' SELECT DISTINCT domain_id, entity_id, property_id FROM `date_data`', + IF(vDateTimeDotNotation IS NULL, NULL, + CONCAT(' SELECT DISTINCT domain_id, entity_id, property_id FROM `date_data` AS subdata WHERE ', + getDateWhereClause(vDateTimeDotNotation, o)))); + SET sDatetimeData = IF(vText IS NULL, + ' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `datetime_data`', + IF(vDateTime IS NULL, NULL, + CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `datetime_data` ', + 'AS subdata WHERE ', getDateTimeWhereClause(vDateTime, o)))); + END IF; + SET vText = NULL; + ELSEIF agg IS NOT NULL THEN + IF versioned THEN + SELECT 1 FROM versioned_agg_pov_filter_not_implemented; + END IF; + + + + SET aggValueWhereClause = CONCAT(getDoubleWhereClause(vDouble, unit_sig, vDoubleStdUnit, stdUnit_sig, o), ' AND '); + SET aggValueWhereClause = CONCAT(IF(aggValueWhereClause IS NULL, '', aggValueWhereClause), getAggValueWhereClause(sourceSet, propertiesTable)); + + + SET @aggValueStmtStr = CONCAT('SELECT ',agg,'(subdata.value), ', agg, '(convert_unit(subdata.unit_sig,subdata.value)), COUNT(DISTINCT standard_unit(subdata.unit_sig)), max(standard_unit(subdata.unit_sig)) INTO @sAggValue, @sAggValueConvert, @distinctUnits, @StdUnitSig FROM (SELECT entity_id, property_id, value, unit_sig FROM `integer_data` UNION ALL SELECT entity_id, property_id, value, unit_sig FROM `double_data`) AS subdata WHERE ', aggValueWhereClause); + + + PREPARE stmtAggValueStmt FROM @aggValueStmtStr; + EXECUTE stmtAggValueStmt; + DEALLOCATE PREPARE stmtAggValueStmt; + + SET distinctUnits = @distinctUnits; + SET aggValue = @sAggValue; + + + IF distinctUnits = 1 THEN + SET aggValue = @sAggValueConvert; + SET usedStdUnit = @StdUnitSig; + ELSE + call raiseWarning(CONCAT("The filter POV(",IF(pname IS NULL, 'NULL', pname),",",IF(o IS NULL, 'NULL', o),",",IF(vText IS NULL, 'NULL', vText),") with the aggregate function '", agg, "' could not match the values against each other with their units. The values had different base units. Only their numric value had been taken into account." )); + END IF; + + IF aggValue IS NULL THEN + SET sTextData = 'SELECT NULL as domain_id, NULL as entity_id, NULL as property_id'; + ELSE + SET sTextData = ''; + SET sIntData = CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `integer_data` as subdata WHERE ', getDoubleWhereClause(aggValue, usedStdUnit, aggValue, usedStdUnit, '=')); + SET sDoubleData = CONCAT(' SELECT DISTINCT domain_id, entity_id, property_id FROM `double_data` as subdata WHERE ', getDoubleWhereClause(aggValue, usedStdUnit, aggValue, usedStdUnit, '=')); + END IF; + + SET vText = NULL; + ELSE + + IF versioned THEN + SET sTextData = IF(vText IS NULL, + 'SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `text_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_text_data` ', + CONCAT( + 'SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id ', + 'FROM `text_data` AS subdata WHERE subdata.value ', o,' ? ', + 'UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id ', + 'FROM `archive_text_data` AS subdata WHERE subdata.value ', o, '?' + )); + SET sNameData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `name_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_name_data` ', CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `name_data` AS subdata WHERE subdata.value ', o, ' ? UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_name_data` AS subdata WHERE subdata.value ', o, '?')); + SET sEnumData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `enum_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_enum_data` ', CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `enum_data` AS subdata WHERE subdata.value ', o, ' ? UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_enum_data` AS subdata WHERE subdata.value ', o, '?')); + IF o = "!=" AND refIdsTable IS NOT NULL THEN + SET existence_op = "NOT EXISTS"; + END IF; + SET sRefData = IF(vText IS NULL, + ' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `reference_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_reference_data`', + IF(refIdsTable IS NULL, + NULL, + CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `reference_data` AS subdata WHERE ', existence_op, ' (SELECT 1 FROM `', refIdsTable, '` AS refIdsTable WHERE subdata.value=refIdsTable.id LIMIT 1) AND subdata.status != "REPLACEMENT" UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_reference_data` AS subdata WHERE ', existence_op, ' (SELECT 1 FROM `', refIdsTable, '` AS refIdsTable WHERE subdata.value=refIdsTable.id LIMIT 1) AND subdata.status != "REPLACEMENT"'))); + SET sDoubleData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT subdata.domain_id, subdata.entity_id, _get_head_iversion(subdata.entity_id) AS _iversion, subdata.property_id FROM `double_data` AS subdata UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_double_data` ', IF(vDouble IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id), property_id FROM `double_data` AS subdata WHERE ', getDoubleWhereClause(vDouble,unit_sig,vDoubleStdUnit,stdUnit_sig,o), ' UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_double_data` AS subdata WHERE ', getDoubleWhereClause(vDouble, unit_sig, vDoubleStdUnit, stdUnit_sig, o)))); + SET sIntData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT subdata.domain_id, subdata.entity_id, _get_head_iversion(subdata.entity_id) AS _iversion, subdata.property_id FROM `integer_data` AS subdata UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_integer_data`', IF(vInt IS NULL AND vDoubleStdUnit IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `integer_data` AS subdata WHERE ', getDoubleWhereClause(vInt, unit_sig, vDoubleStdUnit, stdUnit_sig, o), ' UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_integer_data` AS subdata WHERE ', getDoubleWhereClause(vInt, unit_sig, vDoubleStdUnit, stdUnit_sig, o)))); + SET sDatetimeData = IF(vText IS NULL,' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `datetime_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_datetime_data`', IF(vDateTime IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `datetime_data` AS subdata WHERE ',getDateTimeWhereClause(vDateTime,o), ' UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_datetime_data` AS subdata WHERE ',getDateTimeWhereClause(vDateTime,o)))); + SET sDateData = IF(vText IS NULL,' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `date_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_date_data`', IF(vDateTimeDotNotation IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `date_data` AS subdata WHERE ', getDateWhereClause(vDateTimeDotNotation,o), ' UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_date_data` AS subdata WHERE ', getDateWhereClause(vDateTimeDotNotation,o)))); + SET sNullData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id FROM `null_data` UNION ALL SELECT DISTINCT domain_id, entity_id, _iversion, property_id FROM `archive_null_data`', NULL); + + ELSE + SET sTextData = IF(vText IS NULL, 'SELECT DISTINCT domain_id, entity_id, property_id FROM `text_data`', CONCAT('SELECT DISTINCT domain_id, entity_id, property_id FROM `text_data` AS subdata WHERE subdata.value ',o,' ?')); + SET sNameData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `name_data`', CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `name_data` AS subdata WHERE subdata.value ', o, ' ?')); + SET sEnumData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `enum_data`', CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `enum_data` AS subdata WHERE subdata.value ', o, ' ?')); + IF o = "!=" AND refIdsTable IS NOT NULL THEN + SET existence_op = "NOT EXISTS"; + END IF; + SET sRefData = IF(vText IS NULL, + ' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `reference_data`', + IF(refIdsTable IS NULL, + NULL, + CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `reference_data` AS subdata WHERE ',existence_op ,' (SELECT 1 FROM `', refIdsTable, '` AS refIdsTable WHERE subdata.value=refIdsTable.id LIMIT 1) AND subdata.status != "REPLACEMENT"'))); + SET sDoubleData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT subdata.domain_id, subdata.entity_id, subdata.property_id FROM `double_data` AS subdata', IF(vDouble IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `double_data` AS subdata WHERE ', getDoubleWhereClause(vDouble,unit_sig,vDoubleStdUnit,stdUnit_sig,o)))); + SET sIntData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT subdata.domain_id, subdata.entity_id, subdata.property_id FROM `integer_data` AS subdata', IF(vInt IS NULL AND vDoubleStdUnit IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `integer_data` AS subdata WHERE ', getDoubleWhereClause(vInt, unit_sig, vDoubleStdUnit, stdUnit_sig, o)))); + SET sDatetimeData = IF(vText IS NULL,' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `datetime_data`', IF(vDateTime IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `datetime_data` AS subdata WHERE ',getDateTimeWhereClause(vDateTime,o)))); + SET sDateData = IF(vText IS NULL,' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `date_data`', IF(vDateTimeDotNotation IS NULL, NULL, CONCAT(' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `date_data` AS subdata WHERE ',getDateWhereClause(vDateTimeDotNotation,o)))); + SET sNullData = IF(vText IS NULL, ' UNION ALL SELECT DISTINCT domain_id, entity_id, property_id FROM `null_data`', NULL); + END IF; + + END IF; + + SET data = CONCAT('(',sTextData, + IF(sNameData IS NULL, '', sNameData), + IF(sEnumData IS NULL, '', sEnumData), + IF(sDoubleData IS NULL, '', sDoubleData), + IF(sIntData IS NULL, '', sIntData), + IF(sDatetimeData IS NULL, '', sDatetimeData), + IF(sDateData IS NULL, '', sDateData), + IF(sRefData IS NULL, '', sRefData), + IF(sNullData IS NULL, '', sNullData), + ')' + ); + + + call createTmpTable(keepTabl, versioned); + IF versioned THEN + + SET @stmtPOVkeepTblStr = CONCAT( + 'INSERT IGNORE INTO `', keepTabl, '` (id, _iversion) SELECT entity_id AS id, _iversion FROM ', data, + ' as data', IF(propertiesTable IS NULL, '', CONCAT( + ' WHERE EXISTS (Select 1 from `', propertiesTable, '` AS prop ', + 'WHERE prop.id = data.property_id AND (prop.id2=data.entity_id OR prop.id2=0))'))); + + IF targetSet IS NOT NULL THEN + SET @stmtPOVStr = CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id, _iversion) SELECT source.id, source._iversion FROM `', + keepTabl, + '` AS source'); + ELSE + + SET @stmtPOVStr = CONCAT('DELETE FROM `', + sourceSet, + '` WHERE NOT EXISTS (SELECT 1 FROM `', + keepTabl, + '` AS data WHERE data.id=`', + sourceSet, + '`.`id` AND data._iversion=`', + sourceSet, + '`._iversion LIMIT 1)'); + + END IF; + + + PREPARE stmt3 FROM @stmtPOVStr; + PREPARE stmtPOVkeepTbl FROM @stmtPOVkeepTblStr; + IF vText IS NULL THEN + EXECUTE stmtPOVkeepTbl; + ELSE + SET @vText = vText; + EXECUTE stmtPOVkeepTbl USING @vText, @vText, @vText, @vText, @vText, @vText; + END IF; + EXECUTE stmt3; + DEALLOCATE PREPARE stmt3; + DEALLOCATE PREPARE stmtPOVkeepTbl; + ELSE + + SET @stmtPOVkeepTblStr = CONCAT('INSERT IGNORE INTO `', keepTabl, '` (id) SELECT DISTINCT entity_id AS id FROM ', data, ' as data', IF(propertiesTable IS NULL, '', CONCAT(' WHERE EXISTS (Select 1 from `', propertiesTable, '` AS prop WHERE prop.id = data.property_id AND (prop.id2=data.entity_id OR prop.id2=0))'))); + + SET @stmtPOVStr = CONCAT( + IF(targetSet IS NULL, + CONCAT('DELETE FROM `', + sourceSet, + '` WHERE NOT EXISTS (SELECT 1 FROM `'), + CONCAT('INSERT IGNORE INTO `', + targetSet, + '` (id) SELECT id FROM `', + sourceSet, + '` WHERE EXISTS (SELECT 1 FROM `')), + keepTabl, + '` AS data WHERE data.id=`', + sourceSet, + '`.`id` LIMIT 1)' + ); + + + PREPARE stmt3 FROM @stmtPOVStr; + PREPARE stmtPOVkeepTbl FROM @stmtPOVkeepTblStr; + IF vText IS NULL THEN + EXECUTE stmtPOVkeepTbl; + ELSE + SET @vText = vText; + EXECUTE stmtPOVkeepTbl USING @vText, @vText, @vText; + END IF; + EXECUTE stmt3; + DEALLOCATE PREPARE stmt3; + DEALLOCATE PREPARE stmtPOVkeepTbl; + END IF; + + SELECT @stmtPOVkeepTblStr as applyPOVStmt1, @stmtPOVStr as applyPOVStmt2, keepTabl as applyPOVIntermediateResultSet; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applyRefPOV` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applyRefPOV`(in sourceSet VARCHAR(255), in targetSet VARCHAR(255), + in properties VARCHAR(255), in refs VARCHAR(255), + in versioned BOOLEAN) +BEGIN + DECLARE data VARCHAR(20000) DEFAULT CONCAT( + '(SELECT domain_id, entity_id, property_id FROM `reference_data` AS subdata ', + 'WHERE EXISTS (SELECT 1 FROM `', refs, '` AS refs WHERE subdata.value=refs.id LIMIT 1))'); + + IF versioned THEN + SET data = CONCAT( + '(SELECT domain_id, entity_id, _get_head_iversion(entity_id) AS _iversion, property_id ', + 'FROM `reference_data` AS subdata WHERE EXISTS (', + 'SELECT 1 FROM `', refs, '` AS refs WHERE subdata.value=refs.id LIMIT 1) ', + 'UNION ALL SELECT domain_id, entity_id, _iversion, property_id ', + 'FROM `archive_reference_data` AS subdata WHERE EXISTS (', + 'SELECT 1 FROM `', refs, '` AS refs WHERE subdata.value=refs.id LIMIT 1))'); + END IF; + SET @stmtRefPOVStr = makeStmt(sourceSet,targetSet,data,properties, versioned); + + PREPARE stmt4 FROM @stmtRefPOVStr; + EXECUTE stmt4; + + SELECT @stmtRefPOVstr as applyRefPOVStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applySAT` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applySAT`(in sourceSet VARCHAR(255), in targetSet VARCHAR(255), in loc MEDIUMTEXT, in op CHAR(5)) +BEGIN + + IF targetSet IS NULL OR sourceSet = targetSet THEN + SET @stmtSATString = CONCAT('DELETE FROM `', sourceSet, '` WHERE id NOT IN (SELECT file_id FROM files WHERE path ', op, ' ?)'); + ELSE + SET @stmtSATString = CONCAT('INSERT INTO `', targetSet, '` (id) SELECT data.id FROM `',sourceSet,'` as data WHERE EXISTS (SELECT 1 FROM `files` as f WHERE f.file_id=data.id AND f.path ', op, ' ?)'); + END IF; + PREPARE stmtSAT FROM @stmtSATString; + SET @loc = loc; + EXECUTE stmtSAT USING @loc; + DEALLOCATE PREPARE stmtSAT; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `applyTransactionFilter` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `applyTransactionFilter`(in sourceSet VARCHAR(255), targetSet VARCHAR(255), in transaction VARCHAR(255), in operator_u CHAR(2), in realm VARCHAR(255), in userName VARCHAR(255), in ilb BIGINT, in ilb_nanos INT UNSIGNED, in eub BIGINT, in eub_nanos INT UNSIGNED, in operator_t CHAR(2)) +BEGIN + DECLARE data TEXT default CONCAT('(SELECT entity_id FROM transaction_log AS t WHERE t.transaction=\'', + transaction, + '\'', + IF(userName IS NOT NULL, + CONCAT(' AND t.realm', operator_u, '? AND t.username', operator_u, '?'), + '' + ), + IF(ilb IS NOT NULL, + CONCAT(" AND", constructDateTimeWhereClauseForColumn("t.seconds", "t.nanos", ilb, ilb_nanos, eub, eub_nanos, operator_t)), + "" + ), + ')' + ); + + SET @stmtTransactionStr = makeStmt(sourceSet, targetSet, data, NULL, FALSE); + PREPARE stmtTransactionFilter from @stmtTransactionStr; + IF userName IS NOT NULL THEN + SET @userName = userName; + SET @realm = realm; + EXECUTE stmtTransactionFilter USING @realm, @userName; + ELSE + EXECUTE stmtTransactionFilter; + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `calcComplementUnion` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `calcComplementUnion`(in targetSet VARCHAR(255), in subResultSet VARCHAR(255), in universe VARCHAR(255), in versioned BOOLEAN) +BEGIN + IF versioned AND universe = "entities" THEN + SET @stmtComplementUnionStr = CONCAT( + 'INSERT IGNORE INTO `', targetSet, + '` SELECT e.id, _get_head_iversion(e.id) FROM entities as e WHERE NOT EXISTS ( SELECT 1 FROM `', + subResultSet, + '` AS diff WHERE diff.id=e.id AND diff._iversion = _get_head_iversion(e.id)) UNION ALL SELECT e.id, e._iversion FROM archive_entities AS e WHERE NOT EXISTS ( SELECT 1 FROM `', + subResultSet, + '` as diff WHERE e.id = diff.id AND e._iversion = diff._iversion)'); + ELSEIF versioned THEN + SET @stmtComplementUnionStr = CONCAT( + 'INSERT IGNORE INTO `', targetSet, + '` SELECT id FROM `',universe, + '` AS universe WHERE NOT EXISTS ( SELECT 1 FROM `', + subResultSet,'` + AS diff WHERE diff.id=universe.id AND diff._iversion = universe.id_version)'); + ELSE + SET @stmtComplementUnionStr = CONCAT('INSERT IGNORE INTO `', targetSet, '` SELECT id FROM `',universe, '` AS universe WHERE NOT EXISTS ( SELECT 1 FROM `', subResultSet,'` AS diff WHERE diff.id=universe.id)'); + END IF; + PREPARE stmtComplementUnion FROM @stmtComplementUnionStr; + EXECUTE stmtComplementUnion; + DEALLOCATE PREPARE stmtComplementUnion; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `calcDifference` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `calcDifference`(in resultSetTable VARCHAR(255), in diff VARCHAR(255), in versioned BOOLEAN) +BEGIN + IF versioned THEN + SET @diffStmtStr = CONCAT('DELETE FROM `', resultSetTable, '` WHERE EXISTS ( SELECT 1 FROM `', diff,'` AS diff WHERE diff.id=`',resultSetTable,'`.`id` AND diff._iversion=`', resultSetTable, '`.`_iversion`)'); + ELSE + SET @diffStmtStr = CONCAT('DELETE FROM `', resultSetTable, '` WHERE EXISTS ( SELECT 1 FROM `', diff,'` AS diff WHERE diff.id=`',resultSetTable,'`.`id`)'); + END IF; + PREPARE diffStmt FROM @diffStmtStr; + EXECUTE diffStmt; + DEALLOCATE PREPARE diffStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `calcIntersection` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `calcIntersection`(in resultSetTable VARCHAR(255), in intersectWith VARCHAR(255), in versioned BOOLEAN) +BEGIN + IF versioned THEN + SET @diffStmtStr = CONCAT('DELETE FROM `', + resultSetTable, + '` WHERE NOT EXISTS ( SELECT 1 FROM `', + intersectWith, + '` AS diff WHERE diff.id=`', + resultSetTable, + '`.`id` AND diff._iversion=`', + resultSetTable, + '`.`_iversion`)'); + ELSE + SET @diffStmtStr = CONCAT('DELETE FROM `', resultSetTable, '` WHERE NOT EXISTS ( SELECT 1 FROM `', intersectWith,'` AS diff WHERE diff.id=`',resultSetTable,'`.`id`)'); + END IF; + PREPARE diffStmt FROM @diffStmtStr; + EXECUTE diffStmt; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `calcUnion` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `calcUnion`(in targetSet VARCHAR(255), in sourceSet VARCHAR(255)) +BEGIN + SET @diffStmtStr = CONCAT('INSERT IGNORE INTO `', targetSet, '` SELECT * FROM `',sourceSet,'`'); + PREPARE diffStmt FROM @diffStmtStr; + EXECUTE diffStmt; + DEALLOCATE PREPARE diffStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `cleanUpLinCon` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `cleanUpLinCon`() +BEGIN + + DELETE FROM units_lin_con WHERE NOT EXISTS (SELECT '1' FROM double_data WHERE unit_sig=signature_from) AND NOT EXISTS (SELECT '1' FROM integer_data WHERE unit_sig=signature_from); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `cleanUpQuery` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `cleanUpQuery`() +BEGIN + CREATE TEMPORARY TABLE IF NOT EXISTS warnings (warning TEXT NOT NULL); + SELECT * from warnings; + + SET @pstmtstr = CONCAT('DROP TEMPORARY TABLE IF EXISTS `warnings`', + IF(@tempTableList IS NULL, '', CONCAT(',',@tempTableList))); + PREPARE pstmt FROM @pstmtstr; + EXECUTE pstmt; + + SET @tempTableList = NULL; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `copyTable` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `copyTable`(in fromTable VARCHAR(255), in toTable VARCHAR(255)) +BEGIN + SET @copyTableStmtStr = CONCAT('INSERT IGNORE INTO `', toTable, '` (id) SELECT id FROM `', fromTable, '`'); + PREPARE copyTableStmt FROM @copyTableStmtStr; + EXECUTE copyTableStmt; + DEALLOCATE PREPARE copyTableStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `createTmpTable` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `createTmpTable`(out newTableName VARCHAR(255), in versioned BOOLEAN) +BEGIN + call registerTempTableName(newTableName); + + IF versioned THEN + SET @createTableStmtStr = CONCAT('CREATE TEMPORARY TABLE `', newTableName, + '` ( id INT UNSIGNED, _iversion INT UNSIGNED, PRIMARY KEY (id, _iversion))' ); + ELSE + SET @createTableStmtStr = CONCAT('CREATE TEMPORARY TABLE `', newTableName,'` ( id INT UNSIGNED PRIMARY KEY)' ); + END IF; + + PREPARE createTableStmt FROM @createTableStmtStr; + EXECUTE createTableStmt; + DEALLOCATE PREPARE createTableStmt; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `createTmpTable2` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `createTmpTable2`(out newTableName VARCHAR(255)) +BEGIN + call registerTempTableName(newTableName); + SET @createTableStmtStr = CONCAT('CREATE TEMPORARY TABLE `', newTableName, + '` ( id INT UNSIGNED, id2 INT UNSIGNED, domain INT UNSIGNED, CONSTRAINT `', + newTableName,'PK` PRIMARY KEY (id,id2,domain) )' ); + + PREPARE createTableStmt FROM @createTableStmtStr; + EXECUTE createTableStmt; + DEALLOCATE PREPARE createTableStmt; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `deleteEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `deleteEntity`(in EntityID INT UNSIGNED) +BEGIN + + + DELETE FROM files where file_id=EntityID; + + + DELETE FROM data_type + WHERE ( domain_id = 0 + AND entity_id = 0 + AND property_id = EntityID ) + OR datatype = EntityID; + DELETE FROM collection_type + WHERE domain_id = 0 + AND entity_id = 0 + AND property_id = EntityID; + + + DELETE FROM name_data + WHERE domain_id = 0 + AND entity_id = EntityID + AND property_id = 20; + + DELETE FROM entities where id=EntityID; + + + DELETE FROM entity_acl + WHERE NOT EXISTS ( + SELECT 1 FROM entities + WHERE entities.acl = entity_acl.id LIMIT 1) + AND NOT EXISTS ( + SELECT 1 FROM archive_entities + WHERE archive_entities.acl = entity_acl.id LIMIT 1); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `deleteEntityProperties` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `deleteEntityProperties`(in EntityID INT UNSIGNED) +BEGIN + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + + CALL deleteIsa(EntityID); + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + SELECT max(e._iversion) INTO IVersion + FROM entity_version AS e + WHERE e.entity_id = EntityID; + + + INSERT INTO archive_reference_data (domain_id, entity_id, + property_id, value, value_iversion, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, value_iversion, + status, pidx, IVersion AS _iversion + FROM reference_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_null_data (domain_id, entity_id, + property_id, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, status, + pidx, IVersion AS _iversion + FROM null_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_text_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion + FROM text_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_name_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion + FROM name_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_enum_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion + FROM enum_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_integer_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion, unit_sig) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion, unit_sig + FROM integer_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_double_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion, unit_sig) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion, unit_sig + FROM double_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_datetime_data (domain_id, entity_id, + property_id, value, value_ns, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, value_ns, + status, pidx, IVersion AS _iversion + FROM datetime_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_date_data (domain_id, entity_id, + property_id, value, status, pidx, _iversion) + SELECT domain_id, entity_id, property_id, value, status, + pidx, IVersion AS _iversion + FROM date_data + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_name_overrides (domain_id, entity_id, + property_id, name, _iversion) + SELECT domain_id, entity_id, property_id, name, + IVersion AS _iversion + FROM name_overrides + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_desc_overrides (domain_id, entity_id, + property_id, description, _iversion) + SELECT domain_id, entity_id, property_id, description, + IVersion AS _iversion + FROM desc_overrides + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_data_type (domain_id, entity_id, + property_id, datatype, _iversion) + SELECT domain_id, entity_id, property_id, datatype, + IVersion AS _iversion + FROM data_type + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_collection_type (domain_id, entity_id, + property_id, collection, _iversion) + SELECT domain_id, entity_id, property_id, collection, + IVersion AS _iversion + FROM collection_type + WHERE (domain_id = 0 AND entity_id = EntityID) + OR domain_id = EntityID; + + INSERT INTO archive_query_template_def (id, definition, _iversion) + SELECT id, definition, IVersion AS _iversion + FROM query_template_def + WHERE id = EntityID; + + END IF; + + DELETE FROM reference_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM null_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM text_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM name_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM enum_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM integer_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM double_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM datetime_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM date_data + where (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + + DELETE FROM name_overrides + WHERE (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM desc_overrides + WHERE (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + + DELETE FROM data_type + WHERE (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + DELETE FROM collection_type + WHERE (domain_id=0 AND entity_id=EntityID) OR domain_id=EntityID; + + DELETE FROM query_template_def WHERE id=EntityID; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `deleteIsa` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `deleteIsa`(IN EntityID INT UNSIGNED) +BEGIN + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + SELECT max(_iversion) INTO IVersion + FROM entity_version + WHERE entity_id = EntityID; + + + INSERT IGNORE INTO archive_isa (child, child_iversion, parent, direct) + SELECT e.child, IVersion AS child_iversion, e.parent, rpath = EntityID + FROM isa_cache AS e + WHERE e.child = EntityID; + END IF; + + DELETE FROM isa_cache + WHERE child = EntityID + OR rpath = EntityID + OR rpath LIKE concat('%>', EntityID) + OR rpath LIKE concat('%>', EntityID, '>%'); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `deleteLinCon` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `deleteLinCon`(in sig BIGINT) +BEGIN + + DELETE FROM units_lin_con WHERE signature_from=sig; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `delete_all_entity_versions` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `delete_all_entity_versions`( + in EntityID INT UNSIGNED) +BEGIN + + DELETE FROM entity_version WHERE entity_id = EntityID; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `entityACL` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `entityACL`(out ACLID INT UNSIGNED, in ACLSTR VARBINARY(65525)) +BEGIN + SELECT id INTO ACLID FROM entity_acl as t WHERE t.acl=ACLSTR LIMIT 1; + IF ACLID IS NULL THEN + INSERT INTO entity_acl (acl) VALUES (ACLSTR); + SET ACLID = LAST_INSERT_ID(); + END IF; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `finishNegationFilter` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `finishNegationFilter`(in resultSetTable VARCHAR(255), in diff VARCHAR(255)) +BEGIN + call calcDifference(resultSetTable, diff); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `finishSubProperty` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `finishSubProperty`(in sourceSet VARCHAR(255),in targetSet VARCHAR(255), + in list VARCHAR(255), in versioned BOOLEAN) +BEGIN + DECLARE data VARCHAR(20000) DEFAULT CONCAT('`',list,'`'); + SET @finishSubPropertyStmtStr = makeStmt(sourceSet, targetSet, data, NULL, versioned); + + PREPARE finishSubPropertyStmt FROM @finishSubPropertyStmtStr; + EXECUTE finishSubPropertyStmt; + DEALLOCATE PREPARE finishSubPropertyStmt; + + SELECT @finishSubPropertyStmtStr AS finishSubPropertyStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getChildren` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getChildren`(in tableName varchar(255), in versioned BOOLEAN) +BEGIN + DECLARE found_children INT UNSIGNED DEFAULT 0; + + DROP TEMPORARY TABLE IF EXISTS dependTemp; + CREATE TEMPORARY TABLE dependTemp (id INT UNSIGNED, _iversion INT UNSIGNED, PRIMARY KEY(id, _iversion)); + + + SET @initDepend = CONCAT( + 'INSERT IGNORE INTO dependTemp (id, _iversion) SELECT i.child, ', + IF(versioned, + '_get_head_iversion(i.child)', + '0'), + ' FROM isa_cache AS i INNER JOIN `', + tableName, + '` AS t ON (i.parent=t.id);'); + PREPARE initDependStmt FROM @initDepend; + + EXECUTE initDependStmt; + SET found_children = found_children + ROW_COUNT(); + + + + IF versioned IS TRUE THEN + SET @initDepend = CONCAT( + 'INSERT IGNORE INTO dependTemp (id, _iversion) ', + 'SELECT i.child, i.child_iversion FROM archive_isa AS i INNER JOIN `', + tableName, + '` AS t ON (i.parent=t.id);'); + PREPARE initDependStmt FROM @initDepend; + + EXECUTE initDependStmt; + SET found_children = found_children + ROW_COUNT(); + END IF; + + + + + IF found_children != 0 THEN + SET @transfer = CONCAT( + 'INSERT IGNORE INTO `', + tableName, + IF(versioned, + '` (id, _iversion) SELECT id, _iversion FROM dependTemp', + '` (id) SELECT id FROM dependTemp')); + PREPARE transferstmt FROM @transfer; + EXECUTE transferstmt; + DEALLOCATE PREPARE transferstmt; + END IF; + + + DEALLOCATE PREPARE initDependStmt; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getDependentEntities` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getDependentEntities`(in EntityID INT UNSIGNED) +BEGIN + +DROP TEMPORARY TABLE IF EXISTS refering; +CREATE TEMPORARY TABLE refering ( +id INT UNSIGNED UNIQUE +); + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM reference_data WHERE (value=EntityID OR property_id=EntityID) AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM reference_data WHERE (value=EntityID OR property_id=EntityID) AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM text_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM text_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM enum_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM enum_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM name_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM name_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM integer_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM integer_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM double_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM double_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM datetime_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM datetime_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM date_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM date_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id FROM null_data WHERE property_id=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id FROM null_data WHERE property_id=EntityID AND domain_id!=EntityID AND entity_id!=EntityID AND domain_id!=0; + +INSERT IGNORE INTO refering (id) SELECT entity_id from data_type WHERE datatype=EntityID AND domain_id=0 AND entity_id!=EntityID; +INSERT IGNORE INTO refering (id) SELECT domain_id from data_type WHERE datatype=EntityID; + + +Select id from refering WHERE id!=0 and id!=EntityID; + +DROP TEMPORARY TABLE refering; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getFile` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8mb4 */ ; +/*!50003 SET character_set_results = utf8mb4 */ ; +/*!50003 SET collation_connection = utf8mb4_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getFile`(in FileID INT) +BEGIN + +Select name, description, role into @name, @description, @role from entities where id=FileID LIMIT 1; + +IF @role = 'file' Then + Select path, hash, size into @FilePath, @FileHash, @FileSize from files where file_id=FileID LIMIT 1; + Select timestamp, user_id, user_agent into @FileCreated, @FileCreator, @FileGenerator from history where entity_id=FileID AND event='insertion' LIMIT 1; + +Select +FileID as FileID, +@FilePath as FilePath, +@FileSize as FileSize, +@FileHash as FileHash, +@FileDescription as FileDescription, +@FileCreated as FileCreated, +@FileCreator as FileCreator, +@FileGenerator as FileGenerator, +NULL as FileOwner, +NULL as FilePermission, +NULL as FileChecksum; + +END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getFileIdByPath` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getFileIdByPath`(in FilePath VARCHAR(255)) +BEGIN + +Select file_id as FileID from files where path=FilePath LIMIT 1; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getRole` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getRole`(in RoleName VARCHAR(255)) +BEGIN + +Select e.id INTO @RoleID from entities e where e.name=RoleName AND e.role=RoleName LIMIT 1; + +call retrieveEntity(@RoleID); + + + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `getRules` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8mb4 */ ; +/*!50003 SET character_set_results = utf8mb4 */ ; +/*!50003 SET collation_connection = utf8mb4_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `getRules`(in DomainID INT UNSIGNED, in EntityID INT UNSIGNED, in TransType VARCHAR(255)) +BEGIN + + + + +SELECT rules.transaction, rules.criterion, rules.modus from rules where if(DomainID is null, rules.domain_id=0,rules.domain_id=DomainID) AND if(EntityID is null, rules.entity_id=0,rules.entity_id=EntityID) AND if(TransType is null,true=true,rules.transaction=TransType); + + + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `get_version_history` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `get_version_history`( + in EntityID INT UNSIGNED) +BEGIN + + SELECT c.version AS child, + NULL as parent, + t.seconds AS child_seconds, + t.nanos AS child_nanos, + t.username AS child_username, + t.realm AS child_realm + FROM entity_version AS c INNER JOIN transactions as t + ON ( c.srid = t.srid ) + WHERE c.entity_id = EntityID + AND c._ipparent is Null + + + + + + + UNION SELECT c.version AS child, + p.version AS parent, + t.seconds AS child_seconds, + t.nanos AS child_nanos, + t.username AS child_username, + t.realm AS child_realm + FROM entity_version AS p + INNER JOIN entity_version as c + INNER JOIN transactions AS t + ON (c._ipparent = p._iversion + AND c.entity_id = p.entity_id + AND t.srid = c.srid) + WHERE p.entity_id = EntityID; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initAutoIncrement` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initAutoIncrement`() +BEGIN + + SELECT @max := MAX(entity_id)+ 1 FROM transaction_log; + IF @max IS NOT NULL THEN + SET @stmtStr = CONCAT('ALTER TABLE entities AUTO_INCREMENT=',@max); + PREPARE stmt FROM @stmtStr; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initBackReference` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initBackReference`(in pid INT UNSIGNED, in pname VARCHAR(255), in entity_id INT UNSIGNED, in ename VARCHAR(255)) +BEGIN + DECLARE propertiesTable VARCHAR(255) DEFAULT NULL; + DECLARE entitiesTable VARCHAR(255) DEFAULT NULL; + + IF pname IS NOT NULL THEN + + call createTmpTable(propertiesTable, FALSE); + call initSubEntity(pid, pname, propertiesTable); + END IF; + + IF ename IS NOT NULL THEN + + call createTmpTable(entitiesTable, FALSE); + call initSubEntity(entity_id, ename, entitiesTable); + END IF; + + SELECT propertiesTable, entitiesTable; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initDisjunctionFilter` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initDisjunctionFilter`(in versioned BOOLEAN) +BEGIN + call initEmptyTargetSet(NULL, versioned); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initEmptyTargetSet` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initEmptyTargetSet`(in targetSet VARCHAR(255), in versioned BOOLEAN) +BEGIN + DECLARE newTableName VARCHAR(255) DEFAULT targetSet; + IF targetSet IS NOT NULL THEN + SET @isNotEmptyVar = NULL; + SET @isEmptyStmtStr = CONCAT("SELECT 1 INTO @isNotEmptyVar FROM `",targetSet,"` LIMIT 1"); + PREPARE stmtIsNotEmpty FROM @isEmptyStmtStr; + EXECUTE stmtIsNotEmpty; + DEALLOCATE PREPARE stmtIsNotEmpty; + IF @isNotEmptyVar IS NOT NULL THEN + call createTmpTable(newTableName, versioned); + END IF; + ELSE + call createTmpTable(newTableName, versioned); + END IF; + SELECT newTableName AS newTableName; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initEntity`(in eid INT UNSIGNED, in ename VARCHAR(255), + in enameLike VARCHAR(255), in enameRegexp VARCHAR(255), + in resultset VARCHAR(255), in versioned BOOLEAN) +initEntityLabel: BEGIN + DECLARE select_columns VARCHAR(255) DEFAULT '` (id) SELECT entity_id FROM name_data '; + SET @initEntityStmtStr = NULL; + + + + IF versioned IS TRUE THEN + SET select_columns = '` (id, _iversion) SELECT entity_id, _get_head_iversion(entity_id) FROM name_data '; + END IF; + IF ename IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + select_columns, + 'WHERE value=?; '); + SET @query_param = ename; + ELSEIF enameLike IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + select_columns, + 'WHERE value LIKE ?;'); + SET @query_param = enameLike; + ELSEIF enameRegexp IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + select_columns, + 'WHERE value REGEXP ?;'); + SET @query_param = enameRegexp; + END IF; + + + IF @initEntityStmtStr IS NOT NULL THEN + PREPARE initEntityStmt FROM @initEntityStmtStr; + EXECUTE initEntityStmt USING @query_param; + DEALLOCATE PREPARE initEntityStmt; + END IF; + + IF eid IS NOT NULL THEN + + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + IF(versioned, + '` (id, _iversion) SELECT id, _get_head_iversion(id) ', + '` (id) SELECT id '), + 'FROM entities WHERE id=',eid,';'); + PREPARE initEntityStmt FROM @initEntityStmtStr; + EXECUTE initEntityStmt; + DEALLOCATE PREPARE initEntityStmt; + END IF; + + + + + IF versioned IS TRUE THEN + SET select_columns = '` (id, _iversion) SELECT entity_id, _iversion FROM archive_name_data '; + IF ename IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + select_columns, + 'WHERE value=?; '); + SET @query_param = ename; + ELSEIF enameLike IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + select_columns, + 'WHERE value LIKE ?;'); + SET @query_param = enameLike; + ELSEIF enameRegexp IS NOT NULL THEN + SET @initEntityStmtStr = CONCAT( + 'INSERT IGNORE INTO `', + resultset, + 'WHERE value REGEXP ?;'); + SET @query_param = enameRegexp; + END IF; + + + IF @initEntityStmtStr IS NOT NULL THEN + PREPARE initEntityStmt FROM @initEntityStmtStr; + EXECUTE initEntityStmt USING @query_param; + DEALLOCATE PREPARE initEntityStmt; + END IF; + END IF; + + + + IF @initEntityStmtStr IS NOT NULL THEN + call getChildren(resultset, versioned); + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initPOVPropertiesTable` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initPOVPropertiesTable`(in pid INT UNSIGNED, in pname VARCHAR(255), in sourceSet VARCHAR(255)) +BEGIN + DECLARE propertiesTable VARCHAR(255) DEFAULT NULL; + DECLARE replTbl VARCHAR(255) DEFAULT NULL; + DECLARE ecount INT DEFAULT 0; + DECLARE t1 BIGINT DEFAULT 0; + DECLARE t2 BIGINT DEFAULT 0; + DECLARE t3 BIGINT DEFAULT 0; + DECLARE t4 BIGINT DEFAULT 0; + DECLARE t5 BIGINT DEFAULT 0; + DECLARE t6 BIGINT DEFAULT 0; + + + IF pname is NOT NULL THEN + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t1 from (select uuid() uid) as alias; + call createTmpTable2(propertiesTable); + + + SET @initPOVPropertiesTableStmt1 = CONCAT('INSERT IGNORE INTO `', propertiesTable, '` (id, id2, domain) SELECT property_id, entity_id, domain_id from name_overrides WHERE name = ? UNION ALL SELECT entity_id, domain_id, 0 FROM name_data WHERE value = ?;'); + PREPARE stmt FROM @initPOVPropertiesTableStmt1; + SET @pname = pname; + EXECUTE stmt USING @pname, @pname; + SET ecount = ROW_COUNT(); + + + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t2 from (select uuid() uid) as alias; + IF pid IS NOT NULL THEN + SET @initPOVPropertiesTableStmt2 = CONCAT('INSERT IGNORE INTO `', propertiesTable, '` (id, id2, domain) VALUES (?, 0, 0)'); + PREPARE stmt FROM @initPOVPropertiesTableStmt2; + SET @pid = pid; + EXECUTE stmt USING @pid; + SET ecount = ecount + ROW_COUNT(); + END IF; + + + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t3 from (select uuid() uid) as alias; + IF ecount > 0 THEN + + call getChildren(propertiesTable, False); + END IF; + + + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t4 from (select uuid() uid) as alias; + IF ecount > 0 THEN + call createTmpTable2(replTbl); + SET @replTblStmt1 := CONCAT('INSERT IGNORE INTO `',replTbl, '` (id, id2, domain) SELECT r.value as id, r.entity_id as id2, 0 as domain_id FROM reference_data AS r WHERE status="REPLACEMENT" AND domain_id=0 AND EXISTS (SELECT * FROM `', sourceSet, '` AS s WHERE s.id=r.entity_id) AND EXISTS (SELECT * FROM `', propertiesTable, '` AS p WHERE p.domain = 0 AND p.id2=0 AND p.id=r.property_id);'); + PREPARE replStmt1 FROM @replTblStmt1; + EXECUTE replStmt1; + DEALLOCATE PREPARE replStmt1; + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t5 from (select uuid() uid) as alias; + + SET @replTblStmt2 := CONCAT('INSERT IGNORE INTO `', propertiesTable, '` SELECT id, id2, domain FROM `', replTbl, '`;'); + PREPARE replStmt2 FROM @replTblStmt2; + EXECUTE replStmt2; + DEALLOCATE PREPARE replStmt2; + SELECT conv( concat( substring(uid,16,3), substring(uid,10,4), substring(uid,1,8)),16,10) div 10000 - (141427 * 24 * 60 * 60 * 1000) as current_mills INTO t6 from (select uuid() uid) as alias; + END IF; + END IF; + SELECT propertiesTable, t1, t2, t3, t4, t5, t6, @initPOVPropertiesTableStmt1 as initPOVPropertiesTableStmt1, @initPOVPropertiesTableStmt2 as initPOVPropertiesTableStmt2, @replTblStmt1 as replTblStmt1, @replTblStmt2 as replTblStmt2; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initPOVRefidsTable` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initPOVRefidsTable`(in vInt INT UNSIGNED, in vText VARCHAR(255)) +BEGIN + DECLARE refIdsTable VARCHAR(255) DEFAULT NULL; + + + IF vText IS NOT NULL THEN + + call createTmpTable(refIdsTable, FALSE); + call initSubEntity(vInt, vText, refIdsTable); + + END IF; + SELECT refIdsTable; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initQuery` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initQuery`(in versioned BOOLEAN) +BEGIN + CREATE TEMPORARY TABLE IF NOT EXISTS warnings (warning TEXT NOT NULL); + + call createTmpTable(@resultSet, versioned); + SELECT @resultSet as tablename; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initSubEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initSubEntity`(in e_id INT UNSIGNED, in ename VARCHAR(255), in tableName VARCHAR(255)) +BEGIN + DECLARE ecount INT DEFAULT 0; + DECLARE op VARCHAR(255) DEFAULT '='; + + IF LOCATE("%", ename) > 0 THEN + SET op = "LIKE"; + END IF; + + SET @stmtStr = CONCAT('INSERT IGNORE INTO `', + tableName, + '` (id) SELECT entity_id FROM name_data WHERE value ', + op, + ' ? AND domain_id=0;'); + + PREPARE stmt FROM @stmtStr; + SET @ename = ename; + EXECUTE stmt USING @ename; + SET ecount = ROW_COUNT(); + DEALLOCATE PREPARE stmt; + + IF e_id IS NOT NULL THEN + SET @stmtStr = CONCAT('INSERT IGNORE INTO `', tableName, '` (id) VALUES (', e_id, ')'); + PREPARE stmt FROM @stmtStr; + EXECUTE stmt; + SET ecount = ecount + ROW_COUNT(); + DEALLOCATE PREPARE stmt; + END IF; + + IF ecount > 0 THEN + + call getChildren(tableName, False); + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `initSubProperty` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `initSubProperty`(in sourceSet VARCHAR(255), in propertiesTable VARCHAR(255), in refIdsTable VARCHAR(255)) +BEGIN +DECLARE newTableName VARCHAR(255) DEFAULT NULL; + call registerTempTableName(newTableName); + + SET @createSubPropertyListTableStr = CONCAT('CREATE TEMPORARY TABLE `', newTableName,'` ( entity_id INT UNSIGNED NOT NULL, id INT UNSIGNED NOT NULL, domain INT UNSIGNED NOT NULL, CONSTRAINT `',newTableName,'PK` PRIMARY KEY (entity_id, id, domain)) ' ); + + PREPARE createSubPropertyListTable FROM @createSubPropertyListTableStr; + EXECUTE createSubPropertyListTable; + DEALLOCATE PREPARE createSubPropertyListTable; + + SET @subResultSetStmtStr = CONCAT('INSERT IGNORE INTO `', newTableName, '` (domain, entity_id, id) + SELECT data1.domain_id as domain, data1.entity_id as entity_id, data1.value as id + FROM reference_data as data1 JOIN reference_data as data2 + ON (data1.domain_id=0 + AND data1.domain_id=data2.domain_id + AND data2.entity_id=data1.entity_id + AND ( + (data1.property_id=data2.value AND data2.status="REPLACEMENT") + OR + (data1.property_id!=data2.value AND data2.status!="REPLACEMENT" AND data1.status!="REPLACEMENT" AND data1.property_id=data2.property_id) + ) + AND EXISTS (SELECT 1 FROM `', sourceSet, '` as source WHERE source.id=data1.entity_id LIMIT 1)', + IF(propertiesTable IS NULL, '', CONCAT(' AND EXISTS (SELECT 1 FROM `', propertiesTable, '` as props WHERE props.id=data2.property_id LIMIT 1)')), + IF(refIdsTable IS NULL, '', CONCAT(' AND EXISTS (SELECT 1 FROM `', refIdsTable, '` as refs WHERE refs.id=data1.value LIMIT 1)')), + ')' + ); + + + PREPARE subResultSetStmt FROM @subResultSetStmtStr; + EXECUTE subResultSetStmt; + DEALLOCATE PREPARE subResultSetStmt; + + SELECT newTableName as list; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insertEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insertEntity`(in EntityName VARCHAR(255), in EntityDesc TEXT, in EntityRole VARCHAR(255), in ACL VARBINARY(65525)) +BEGIN + DECLARE NewEntityID INT UNSIGNED DEFAULT NULL; + DECLARE NewACLID INT UNSIGNED DEFAULT NULL; + DECLARE Hash VARBINARY(255) DEFAULT NULL; + DECLARE Version VARBINARY(255) DEFAULT NULL; + DECLARE Transaction VARBINARY(255) DEFAULT NULL; + + + + call entityACL(NewACLID, ACL); + + + INSERT INTO entities (description, role, acl) + VALUES (EntityDesc, EntityRole, NewACLID); + + + SET NewEntityID = LAST_INSERT_ID(); + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + + SET Transaction = @SRID; + SET Version = SHA1(UUID()); + CALL insert_single_child_version(NewEntityID, Hash, Version, Null, Transaction); + END IF; + + + + IF EntityName IS NOT NULL THEN + INSERT INTO name_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES (0, NewEntityID, 20, EntityName, "FIX", 0); + END IF; + + SELECT NewEntityID as EntityID, Version as Version; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insertEntityProperty` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insertEntityProperty`( + in DomainID INT UNSIGNED, + in EntityID INT UNSIGNED, + in PropertyID INT UNSIGNED, + in Datatable VARCHAR(255), + in PropertyValue TEXT, + in PropertyUnitSig BIGINT, + in PropertyStatus VARCHAR(255), + in NameOverride VARCHAR(255), + in DescOverride TEXT, + in DatatypeOverride INT UNSIGNED, + in Collection VARCHAR(255), + in PropertyIndex INT UNSIGNED) +BEGIN + DECLARE ReferenceValueIVersion INT UNSIGNED DEFAULT NULL; + DECLARE ReferenceValue INT UNSIGNED DEFAULT NULL; + DECLARE AT_PRESENT INTEGER DEFAULT NULL; + + CASE Datatable + WHEN 'double_data' THEN + INSERT INTO double_data + (domain_id, entity_id, property_id, value, unit_sig, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyValue, PropertyUnitSig, PropertyStatus, PropertyIndex); + WHEN 'integer_data' THEN + INSERT INTO integer_data + (domain_id, entity_id, property_id, value, unit_sig, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyValue, PropertyUnitSig, PropertyStatus, PropertyIndex); + WHEN 'datetime_data' THEN + INSERT INTO datetime_data + (domain_id, entity_id, property_id, value, value_ns, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, SUBSTRING_INDEX(PropertyValue, 'UTC', 1), IF(SUBSTRING_INDEX(PropertyValue, 'UTC', -1)='',NULL,SUBSTRING_INDEX(PropertyValue, 'UTC', -1)), PropertyStatus, PropertyIndex); + WHEN 'reference_data' THEN + + + SET AT_PRESENT=LOCATE("@", PropertyValue); + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") AND AT_PRESENT > 0 THEN + SET ReferenceValue = SUBSTRING_INDEX(PropertyValue, '@', 1); + SET ReferenceValueIVersion = get_iversion(ReferenceValue, + SUBSTRING_INDEX(PropertyValue, '@', -1)); + + IF ReferenceValueIVersion IS NULL THEN + SELECT 0 from `ReferenceValueIVersion_WAS_NULL`; + END IF; + + ELSE + SET ReferenceValue = PropertyValue; + END IF; + + INSERT INTO reference_data + (domain_id, entity_id, property_id, value, value_iversion, status, + pidx) + VALUES + (DomainID, EntityID, PropertyID, ReferenceValue, + ReferenceValueIVersion, PropertyStatus, PropertyIndex); + WHEN 'enum_data' THEN + INSERT INTO enum_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyValue, PropertyStatus, PropertyIndex); + WHEN 'date_data' THEN + INSERT INTO date_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, SUBSTRING_INDEX(PropertyValue, '.', 1), PropertyStatus, PropertyIndex); + WHEN 'text_data' THEN + INSERT INTO text_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyValue, PropertyStatus, PropertyIndex); + WHEN 'null_data' THEN + INSERT INTO null_data + (domain_id, entity_id, property_id, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyStatus, PropertyIndex); + WHEN 'name_data' THEN + INSERT INTO name_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES + (DomainID, EntityID, PropertyID, PropertyValue, PropertyStatus, PropertyIndex); + + ELSE + SELECT * FROM table_does_not_exist; + END CASE; + + IF DatatypeOverride IS NOT NULL THEN + call overrideType(DomainID, EntityID, PropertyID, DatatypeOverride); + IF Collection IS NOT NULL THEN + INSERT INTO collection_type (domain_id, entity_id, property_id, collection) VALUES (DomainID, EntityID, PropertyID, Collection); + END IF; + END IF; + + IF NameOverride IS NOT NULL THEN + call overrideName(DomainID, EntityID, PropertyID, NameOverride); + END IF; + + IF DescOverride IS NOT NULL THEN + call overrideDesc(DomainID, EntityID, PropertyID, DescOverride); + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insertIsa` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insertIsa`(IN c INT UNSIGNED, IN p INT UNSIGNED) +insert_is_a_proc: BEGIN + + INSERT INTO isa_cache (child, parent, rpath) VALUES (c, p, c); + + IF p = c THEN + + LEAVE insert_is_a_proc; + END IF; + + + + + + INSERT IGNORE INTO isa_cache SELECT + c + AS child, + i.parent + AS parent, + IF(p=i.rpath or i.rpath=parent, + p, + concat(p, ">", i.rpath)) + AS rpath + FROM isa_cache AS i WHERE i.child = p AND i.child != i.parent; + + + + INSERT IGNORE INTO isa_cache SELECT + l.child, + r.parent, + if(l.rpath=l.child and r.rpath=c, + c, + concat(if(l.rpath=l.child, + c, + concat(l.rpath, '>', c)), + if(r.rpath=c, + '', + concat('>', r.rpath)))) + AS rpath + FROM + isa_cache as l INNER JOIN isa_cache as r + ON (l.parent = c AND c = r.child AND l.child != l.parent); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insertLinCon` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insertLinCon`(in signature_from BIGINT, in signature_to BIGINT, in a DECIMAL(65,30), in b_dividend BIGINT, in b_divisor BIGINT, in c DECIMAL(65,30)) +BEGIN + + INSERT IGNORE INTO units_lin_con (signature_from, signature_to, a, b_dividend, b_divisor, c) VALUES (signature_from, signature_to, a, b_dividend, b_divisor, c); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insertUser` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insertUser`(in Name VARCHAR(255), in Password VARCHAR(255)) +BEGIN + + +INSERT INTO entities (name, role, acl) VALUES (Name, 'USER', 0); + +SET @LAST_UserID = LAST_INSERT_ID(); + +INSERT INTO passwords VALUES (@LAST_UserID, Password); + +Select @LAST_UserID as UserID; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `insert_single_child_version` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `insert_single_child_version`( + in EntityID INT UNSIGNED, + in Hash VARBINARY(255), + in Version VARBINARY(255), + in Parent VARBINARY(255), + in Transaction VARBINARY(255)) +BEGIN + DECLARE newiversion INT UNSIGNED DEFAULT NULL; + DECLARE newipparent INT UNSIGNED DEFAULT NULL; + + + IF Parent IS NOT NULL THEN + SELECT e._iversion INTO newipparent + FROM entity_version AS e + WHERE e.entity_id = EntityID + AND e.version = Parent; + IF newipparent IS NULL THEN + + SELECT concat("This parent does not exists: ", Parent) + FROM nonexisting; + END IF; + END IF; + + + + SELECT max(e._iversion)+1 INTO newiversion + FROM entity_version AS e + WHERE e.entity_id=EntityID; + IF newiversion IS NULL THEN + SET newiversion = 1; + END IF; + + INSERT INTO entity_version + (entity_id, hash, version, _iversion, _ipparent, srid) + VALUES + (EntityID, Hash, Version, newiversion, newipparent, Transaction); + + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `intersectTable` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `intersectTable`(in resultSetTable VARCHAR(255), in diff VARCHAR(255)) +BEGIN + SET @diffStmtStr = CONCAT('DELETE FROM `', resultSetTable, '` WHERE id NOT IN ( SELECT id FROM `', diff,'`)'); + PREPARE diffStmt FROM @diffStmtStr; + EXECUTE diffStmt; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `isSubtype` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `isSubtype`(in c INT UNSIGNED, in p INT UNSIGNED) +BEGIN + DECLARE ret BOOLEAN DEFAULT FALSE; + SELECT TRUE INTO ret FROM isa_cache AS i WHERE i.child=c AND i.parent=p LIMIT 1; + SELECT ret as ISA; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `overrideDesc` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `overrideDesc`(in DomainID INT UNSIGNED, in EntityID INT UNSIGNED, in PropertyID INT UNSIGNED, in Description TEXT) +BEGIN + INSERT INTO desc_overrides (domain_id, entity_id, property_id, description) VALUES (DomainID, EntityID, PropertyID, Description); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `overrideName` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `overrideName`(in DomainID INT UNSIGNED, in EntityID INT UNSIGNED, in PropertyID INT UNSIGNED, in Name VARCHAR(255)) +BEGIN + INSERT INTO name_overrides (domain_id, entity_id, property_id, name) VALUES (DomainID, EntityID, PropertyID, Name); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `overrideType` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `overrideType`(in DomainID INT UNSIGNED, in EntityID INT UNSIGNED, in PropertyID INT UNSIGNED, in Datatype INT UNSIGNED) +BEGIN + INSERT INTO data_type (domain_id, entity_id, property_id, datatype) VALUES (DomainID, EntityID, PropertyID, Datatype); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `raiseWarning` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `raiseWarning`(in str VARCHAR(20000)) +BEGIN + INSERT INTO warnings VALUES (str); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `registerSubdomain` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `registerSubdomain`(in amount INT UNSIGNED) +BEGIN + DECLARE ED INTEGER DEFAULT NULL; + + SELECT COUNT(id) INTO ED FROM entities WHERE Role='DOMAIN' AND id!=0; + + WHILE ED < amount DO + INSERT INTO entities (description, role, acl) VALUES + (NULL, 'DOMAIN', 0); + SET ED = ED + 1; + END WHILE; + + SELECT id as DomainID FROM entities WHERE Role='DOMAIN' and id!=0; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `registerTempTableName` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `registerTempTableName`(out newTableName VARCHAR(255)) +BEGIN + SET newTableName = md5(CONCAT(RAND(),CURRENT_TIMESTAMP())); + SET @tempTableList = IF(@tempTableList IS NULL, + CONCAT('`',newTableName,'`'), + CONCAT(@tempTableList, ',`', newTableName, '`') + ); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `reset_stats` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `reset_stats`() +BEGIN + truncate table performance_schema.events_statements_summary_by_digest; + truncate table performance_schema.events_statements_history_long; + END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveDatatype` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveDatatype`(in DatatypeName VARCHAR(255)) +BEGIN + +Select e.id INTO @DatatypeID from entities e where e.name=DatatypeName AND e.role='DATATYPE' LIMIT 1; + +call retrieveEntity(@DatatypeID); + + + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveEntity`( + in EntityID INT UNSIGNED, + in Version VARBINARY(255)) +retrieveEntityBody: BEGIN + DECLARE FilePath VARCHAR(255) DEFAULT NULL; + DECLARE FileSize VARCHAR(255) DEFAULT NULL; + DECLARE FileHash VARCHAR(255) DEFAULT NULL; + DECLARE DatatypeID INT UNSIGNED DEFAULT NULL; + DECLARE CollectionName VARCHAR(255) DEFAULT NULL; + DECLARE IsHead BOOLEAN DEFAULT TRUE; + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + + IF Version IS NULL OR UPPER(Version) = "HEAD" THEN + SET Version = get_head_version(EntityID); + ELSEIF UPPER(LEFT(Version, 5)) = "HEAD~" THEN + SET IsHead = FALSE; + SET Version = get_head_relative(EntityID, SUBSTR(Version, 6)); + ELSE + SELECT get_head_version(EntityID) = Version INTO IsHead; + END IF; + + IF IsHead IS FALSE THEN + SET IVersion=get_iversion(EntityID, Version); + + IF IVersion IS NULL THEN + + SELECT 0 FROM entities WHERE 0 = 1; + LEAVE retrieveEntityBody; + END IF; + + SELECT path, size, HEX(hash) + INTO FilePath, FileSize, FileHash + FROM archive_files + WHERE file_id = EntityID + AND _iversion = IVersion + LIMIT 1; + + SELECT datatype + INTO DatatypeID + FROM archive_data_type + WHERE domain_id = 0 + AND entity_id = 0 + AND property_id = EntityID + AND _iversion = IVersion + LIMIT 1; + + SELECT collection + INTO CollectionName + FROM archive_collection_type + WHERE domain_id = 0 + AND entity_id = 0 + AND property_id = EntityID + AND _iversion = IVersion + LIMIT 1; + + + SELECT + ( SELECT value FROM + ( SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_ID = DatatypeID + AND property_id = 20 + UNION SELECT DatatypeID AS value + ) AS tmp LIMIT 1 ) AS Datatype, + CollectionName AS Collection, + EntityID AS EntityID, + ( SELECT value FROM archive_name_data + WHERE domain_id = 0 + AND entity_ID = EntityID + AND property_id = 20 + AND _iversion = IVersion + + ) AS EntityName, + e.description AS EntityDesc, + e.role AS EntityRole, + FileSize AS FileSize, + FilePath AS FilePath, + FileHash AS FileHash, + (SELECT acl FROM entity_acl AS a WHERE a.id = e.acl) AS ACL, + Version AS Version + FROM archive_entities AS e + WHERE e.id = EntityID + AND e._iversion = IVersion + LIMIT 1; + + + LEAVE retrieveEntityBody; + + END IF; + END IF; + + SELECT path, size, hex(hash) + INTO FilePath, FileSize, FileHash + FROM files + WHERE file_id = EntityID + LIMIT 1; + + SELECT datatype INTO DatatypeID + FROM data_type + WHERE domain_id=0 + AND entity_id=0 + AND property_id=EntityID + LIMIT 1; + + SELECT collection INTO CollectionName + FROM collection_type + WHERE domain_id=0 + AND entity_id=0 + AND property_id=EntityID + LIMIT 1; + + SELECT + ( SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_ID = DatatypeID + AND property_id = 20 LIMIT 1 ) AS Datatype, + CollectionName AS Collection, + EntityID AS EntityID, + ( SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_ID = EntityID + AND property_id = 20 LIMIT 1) AS EntityName, + e.description AS EntityDesc, + e.role AS EntityRole, + FileSize AS FileSize, + FilePath AS FilePath, + FileHash AS FileHash, + (SELECT acl FROM entity_acl AS a WHERE a.id = e.acl) AS ACL, + Version AS Version + FROM entities e WHERE id = EntityID LIMIT 1; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveEntityParents` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveEntityParents`( + in EntityID INT UNSIGNED, + in Version VARBINARY(255)) +retrieveEntityParentsBody: BEGIN + + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + DECLARE IsHead BOOLEAN DEFAULT TRUE; + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + IF Version IS NOT NULL THEN + SELECT get_head_version(EntityID) = Version INTO IsHead; + END IF; + + IF IsHead IS FALSE THEN + SELECT e._iversion INTO IVersion + FROM entity_version as e + WHERE e.entity_id = EntityID + AND e.version = Version; + + IF IVersion IS NULL THEN + + LEAVE retrieveEntityParentsBody; + END IF; + + SELECT + i.parent AS ParentID, + ( SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_id = ParentID + AND property_id = 20 + ) AS ParentName, + + + + + e.description AS ParentDescription, + e.role AS ParentRole, + (SELECT acl FROM entity_acl AS a WHERE a.id = e.acl) AS ACL + FROM archive_isa AS i JOIN entities AS e + ON (i.parent = e.id) + WHERE i.child = EntityID + AND i.child_iversion = IVersion + AND i.direct IS TRUE + ; + + LEAVE retrieveEntityParentsBody; + END IF; + END IF; + + SELECT + i.parent AS ParentID, + ( SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_id = ParentID + AND property_id = 20 ) AS ParentName, + e.description AS ParentDescription, + e.role AS ParentRole, + (SELECT acl FROM entity_acl AS a WHERE a.id = e.acl) AS ACL + FROM isa_cache AS i JOIN entities AS e + ON (i.parent = e.id) + WHERE i.child = EntityID + AND i.rpath = EntityID; +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveEntityProperties` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveEntityProperties`( + in DomainID INT UNSIGNED, + in EntityID INT UNSIGNED, + in Version VARBINARY(255)) +retrieveEntityPropertiesBody: BEGIN + + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + DECLARE IsHead BOOLEAN DEFAULT TRUE; + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + IF Version IS NOT NULL THEN + IF DomainID = 0 THEN + SELECT get_head_version(EntityID) = Version INTO IsHead; + ELSE + SELECT get_head_version(DomainID) = Version INTO IsHead; + END IF; + + END IF; + + IF IsHead IS FALSE THEN + SELECT e._iversion INTO IVersion + FROM entity_version as e + WHERE ((e.entity_id = EntityID AND DomainID = 0) + OR (e.entity_id = DomainID)) + AND e.version = Version; + + IF IVersion IS NULL THEN + + LEAVE retrieveEntityPropertiesBody; + END IF; + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_double_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_integer_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + CONCAT(value, '.NULL.NULL') AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_date_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + CONCAT(value, 'UTC', IF(value_ns IS NULL, '', value_ns)) + AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_datetime_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_text_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_enum_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + IF(value_iversion IS NULL, value, + + CONCAT(value, "@", _get_version(value, value_iversion))) + AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_reference_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + NULL AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_null_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM archive_name_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND property_id != 20 + AND _iversion = IVersion; + + LEAVE retrieveEntityPropertiesBody; + END IF; + END IF; + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM double_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM integer_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + CONCAT(value, '.NULL.NULL') AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM date_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + CONCAT(value, 'UTC', IF(value_ns IS NULL, '', value_ns)) + AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM datetime_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM text_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM enum_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + IF(value_iversion IS NULL, value, + CONCAT(value, "@", _get_version(value, value_iversion))) + AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM reference_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + NULL AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM null_data + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + + SELECT + property_id AS PropertyID, + value AS PropertyValue, + status AS PropertyStatus, + pidx AS PropertyIndex + FROM name_data + WHERE domain_id = DomainID + AND entity_id = EntityID + AND property_id != 20; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveOverrides` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveOverrides`( + in DomainID INT UNSIGNED, + in EntityID INT UNSIGNED, + in Version VARBINARY(255)) +retrieveOverridesBody: BEGIN + + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + DECLARE IsHead BOOLEAN DEFAULT TRUE; + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + IF Version IS NOT NULL THEN + IF DomainID = 0 THEN + SELECT get_head_version(EntityID) = Version INTO IsHead; + ELSE + SELECT get_head_version(DomainID) = Version INTO IsHead; + END IF; + END IF; + + IF IsHead IS FALSE THEN + SELECT e._iversion INTO IVersion + FROM entity_version as e + WHERE ((e.entity_id = EntityID AND DomainID = 0) + OR (e.entity_id = DomainID)) + AND e.version = Version; + + IF IVersion IS NULL THEN + + LEAVE retrieveOverridesBody; + END IF; + + + SELECT + NULL AS collection_override, + name AS name_override, + NULL AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM archive_name_overrides + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + NULL AS collection_override, + NULL AS name_override, + description AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM archive_desc_overrides + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + NULL AS collection_override, + NULL AS name_override, + NULL AS desc_override, + IFNULL((SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_id = datatype + AND property_id = 20 + LIMIT 1), datatype) AS type_override, + entity_id, + property_id + FROM archive_data_type + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion + + UNION ALL + + + SELECT + collection AS collection_override, + NULL AS name_override, + NULL AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM archive_collection_type + WHERE domain_id = DomainID + AND entity_id = EntityID + AND _iversion = IVersion; + + LEAVE retrieveOverridesBody; + END IF; + END IF; + + SELECT + NULL AS collection_override, + name AS name_override, + NULL AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM name_overrides + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + SELECT + NULL AS collection_override, + NULL AS name_override, + description AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM desc_overrides + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + SELECT + NULL AS collection_override, + NULL AS name_override, + NULL AS desc_override, + IFNULL((SELECT value FROM name_data + WHERE domain_id = 0 + AND entity_ID = datatype + AND property_id = 20 LIMIT 1), datatype) AS type_override, + entity_id, + property_id + FROM data_type + WHERE domain_id = DomainID + AND entity_id = EntityID + + UNION ALL + + SELECT + collection AS collection_override, + NULL AS name_override, + NULL AS desc_override, + NULL AS type_override, + entity_id, + property_id + FROM collection_type + WHERE domain_id = DomainID + AND entity_id = EntityID; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `retrieveQueryTemplateDef` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `retrieveQueryTemplateDef`( + in EntityID INT UNSIGNED, + in Version VARBINARY(255)) +retrieveQueryTemplateDefBody: BEGIN + + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + DECLARE IsHead BOOLEAN DEFAULT TRUE; + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + + IF Version IS NOT NULL THEN + SELECT get_head_version(EntityID) = Version INTO IsHead; + END IF; + + IF IsHead IS FALSE THEN + + SELECT e._iversion INTO IVersion + FROM entity_version as e + WHERE e.entity_id = EntityID + AND e.version = Version; + + IF IVersion IS NULL THEN + + LEAVE retrieveQueryTemplateDefBody; + END IF; + + SELECT definition + FROM archive_query_template_def + WHERE id = EntityID + AND _iversion = IVersion; + + LEAVE retrieveQueryTemplateDefBody; + END IF; + END IF; + + SELECT definition + FROM query_template_def + WHERE id = EntityID; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `setFileProperties` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `setFileProperties`( + in EntityID INT UNSIGNED, + in FilePath TEXT, + in FileSize BIGINT UNSIGNED, + in FileHash VARCHAR(255) +) +BEGIN + DECLARE IVersion INT UNSIGNED DEFAULT NULL; + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + SELECT max(e._iversion) INTO IVersion + FROM entity_version AS e + WHERE e.entity_id = EntityID; + + INSERT INTO archive_files (file_id, path, size, hash, + _iversion) + SELECT file_id, path, size, hash, IVersion AS _iversion + FROM files + WHERE file_id = EntityID; + END IF; + + DELETE FROM files WHERE file_id = EntityID; + + IF FilePath IS NOT NULL THEN + INSERT INTO files (file_id, path, size, hash) + VALUES (EntityID, FilePath, FileSize, unhex(FileHash)); + END IF; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `setPassword` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `setPassword`(in EntityID INT UNSIGNED, in NewPassword VARCHAR(255)) +BEGIN + + + DELETE FROM passwords where entity_id=EntityID; + INSERT INTO passwords (entity_id, password) VALUES (EntityID, NewPassword); + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `set_transaction` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `set_transaction`( + srid VARBINARY(255), + username VARCHAR(255), + realm VARCHAR(255), + seconds BIGINT UNSIGNED, + nanos INT(10) UNSIGNED) +BEGIN + + SET @SRID = srid; + INSERT INTO transactions (srid, username, realm, seconds, nanos) + VALUES (srid, username, realm, seconds, nanos); +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `showEntityAutoIncr` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `showEntityAutoIncr`() +BEGIN +SELECT `AUTO_INCREMENT` +FROM INFORMATION_SCHEMA.TABLES +WHERE TABLE_SCHEMA = 'caosdb' +AND TABLE_NAME = 'entities'; + + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `updateEntity` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `updateEntity`( + in EntityID INT UNSIGNED, + in EntityName VARCHAR(255), + in EntityDescription TEXT, + in EntityRole VARCHAR(255), + in Datatype VARCHAR(255), + in Collection VARCHAR(255), + in ACL VARBINARY(65525)) +BEGIN + DECLARE ACLID INT UNSIGNED DEFAULT NULL; + DECLARE Hash VARBINARY(255) DEFAULT NULL; + DECLARE Version VARBINARY(255) DEFAULT SHA1(UUID()); + DECLARE ParentVersion VARBINARY(255) DEFAULT NULL; + DECLARE Transaction VARBINARY(255) DEFAULT NULL; + DECLARE OldIVersion INT UNSIGNED DEFAULT NULL; + + call entityACL(ACLID, ACL); + + IF is_feature_config("ENTITY_VERSIONING", "ENABLED") THEN + SELECT max(_iversion) INTO OldIVersion + FROM entity_version + WHERE entity_id = EntityID; + + + INSERT INTO archive_entities (id, description, role, + acl, _iversion) + SELECT e.id, e.description, e.role, e.acl, OldIVersion + FROM entities AS e + WHERE e.id = EntityID; + + INSERT INTO archive_data_type (domain_id, entity_id, property_id, + datatype, _iversion) + SELECT e.domain_id, e.entity_id, e.property_id, e.datatype, + OldIVersion + FROM data_type AS e + WHERE e.domain_id = 0 + AND e.entity_id = 0 + AND e.property_id = EntityID; + + INSERT INTO archive_collection_type (domain_id, entity_id, property_id, + collection, _iversion) + SELECT e.domain_id, e.entity_id, e.property_id, e.collection, + OldIVersion + FROM collection_type as e + WHERE e.domain_id = 0 + AND e.entity_id = 0 + AND e.property_id = EntityID; + + + SET Transaction = @SRID; + SELECT e.version INTO ParentVersion + FROM entity_version as e + WHERE e.entity_id = EntityID + AND e._iversion = OldIVersion; + CALL insert_single_child_version( + EntityID, Hash, Version, + ParentVersion, Transaction); + END IF; + + UPDATE entities e + SET e.description = EntityDescription, + e.role=EntityRole, + e.acl = ACLID + WHERE e.id = EntityID; + + + + DELETE FROM name_data + WHERE domain_id = 0 AND entity_id = EntityID AND property_id = 20; + IF EntityName IS NOT NULL THEN + INSERT INTO name_data + (domain_id, entity_id, property_id, value, status, pidx) + VALUES (0, EntityID, 20, EntityName, "FIX", 0); + END IF; + + DELETE FROM data_type + WHERE domain_id=0 AND entity_id=0 AND property_id=EntityID; + + DELETE FROM collection_type + WHERE domain_id=0 AND entity_id=0 AND property_id=EntityID; + + IF Datatype IS NOT NULL THEN + INSERT INTO data_type (domain_id, entity_id, property_id, datatype) + SELECT 0, 0, EntityID, + ( SELECT entity_id FROM name_data WHERE domain_id = 0 + AND property_id = 20 AND value = Datatype LIMIT 1 ); + + IF Collection IS NOT NULL THEN + INSERT INTO collection_type (domain_id, entity_id, property_id, + collection) + SELECT 0, 0, EntityID, Collection; + END IF; + END IF; + + Select Version as Version; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!50003 DROP PROCEDURE IF EXISTS `updateLinCon` */; +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = utf8 */ ; +/*!50003 SET character_set_results = utf8 */ ; +/*!50003 SET collation_connection = utf8_general_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ; +DELIMITER ;; +CREATE DEFINER=`caosdb`@`%` PROCEDURE `updateLinCon`(in sig_from BIGINT, in sig_to BIGINT, in new_a DECIMAL(65,30), in new_b_dividend BIGINT, in new_b_divisor BIGINT, in new_c DECIMAL(65,30)) +BEGIN + UPDATE units_lin_con SET signature_to=sig_to, a=new_a, b_dividend=new_b_dividend, b_divisor=new_b_divisor, c=new_c where signature_from=sig_from; + +END ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2022-01-20 8:27:49 diff --git a/integrationtests/test-profile/custom/other/restore/caosroot.2022-01-20T09271642667269.tar.gz b/integrationtests/test-profile/custom/other/restore/caosroot.2022-01-20T09271642667269.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f6809d8067ecf58fd449567a8339fc3f498e651 Binary files /dev/null and b/integrationtests/test-profile/custom/other/restore/caosroot.2022-01-20T09271642667269.tar.gz differ diff --git a/integrationtests/test-profile/paths/extroot/README.md b/integrationtests/test-profile/paths/extroot/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ee741757ce62d03d49358a7245faf44b20e6cd60 --- /dev/null +++ b/integrationtests/test-profile/paths/extroot/README.md @@ -0,0 +1,2 @@ +This directory is mounted into the LinkAhead docker container when the debug +profile is used, to allow the inclusion of external file systems. diff --git a/integrationtests/test-profile/profile.yml b/integrationtests/test-profile/profile.yml new file mode 100644 index 0000000000000000000000000000000000000000..f830a2fbe6c6a4ae35362676db310f3eadf6f4cc --- /dev/null +++ b/integrationtests/test-profile/profile.yml @@ -0,0 +1,179 @@ +default: + # Optionally, specify a base compose file. The default base compose file is + # located in ./compose/docker-compose-default.yml. A different compose file + # may be useful when other services shall be included, e.g. nginx or django. + # base_compose_file: "compose/docker-compose-default.yml" + + # Directories (list of strings) (or single directory (string, deprecated)) + # with customization files. + # Directories listed later in the list take precedence over earlier ones. + # If you change this, you need to include the full list of directories + # (including possibly directories contained in the default setting). + # custom: + # Standard directory for customizations + # - "./custom" # included by default + # LinkAhead Theme for the web interface + # - "./theme-linkahead" # included by default + # - "./included_customization" # since this is later in the list this takes precedence + + # Paths to be mounted into Docker, all entries are optional. + paths: + # extroot: From where files are copied/symlinked. This is a + # list of `NAME: PATH` pairs or a single path. + extroot: + # "": "paths/extroot" + "": "../test_data/extroot/" + # + # "base": "/path/to/base/dir" + # "other": "/path/to/other" + # + # dropoffbox: (Soon to be deprecated.) Files can be written here, but note that you may need to + # become root to remove this directory later. The corresponding server property is DROP_OFF_BOX. + # dropoffbox: "/path/to/dropoffbox" + + # Docker building configuration + # References can be either hashes of commits or branch names + refs: + # SERVER: dev + # PYLIB: dev + # MYSQLBACKEND: dev + # WEBUI: dev + # ADVANCEDUSERTOOLS: dev + + # General configuration options + conf: + # Shall the SQL & caosroot dumps at custom/other/restore/ be used? + # restore: false + restore: true + # uncomment to disable tls (ssl). This might be insecure! + # no_tls: false + # Shall the local users be imported as LinkAhead users? + # local_users: false + # Shall the anonymous user have the administration role? Implies auth_optional: TRUE + # anonymous_admin: false + # Shall NIS/LDAP be used for authentication? + # nis: false + # Shall a mail server be used? + # mail: false + # You can provide the path to an non-standard sendmail executable + # sendmail: /usr/sbin/sendmail + # sendmail: /usr/local/bin/sendmail_to_file + # Shall the server run in debug mode? + # This will bind-mount the following directories from custom into the Docker + # container: + # - debug-authtoken :: Authentication tokens will be stored here. + # - debug-scripting-bin :: Used as the server-side scripting bin dir. + # debug: false + debug: true + # URL of the docker registry. Set to "" to look locally. + # registry_server: "gitlab.indiscale.com:5050" + # The account for accessing the registry server. "" means no account. + # registry_account: "" + # The secret token for accessing the registry server. "" means no token. + # registry_token: "" + # Name of the docker image + # image: "caosdb/src/caosdb-deploy" + # Tag of the docker image + # tag: "latest" + # Name of the main Docker container. Set to "" to use an auto-generated + # name, which is necessary for running multiple instances + # container_name: "linkahead" + # Directory where backups shall be stored. + # backup_dir: "backup" + # The time zone for the server + # timezone: "Coordinated Universal Time" + + # You can set labels for the docker container here + # labels: + # label_1_key: label_1_value + # label_2_key: label_2_value + + # User/Group of the server, either numeric or names. + # user_group: 999:999 + + # Network settings. + network: + # The subnet for the Docker containers + # default: auto selected by docker + # You can set it with: + # subnet: 10.3.128.0/17 + # Port for accessing LinkAhead via HTTPS + # port_ssl: 10443 + # Port for accessing LinkAhead via plain HTTP (not recommended when + # accessible from untrusted networks, but ok for testing or when behind + # a proxy) + # port_plain: 8000 + # Port for GRPC end-point via HTTPS + # port_grpc_ssl: 8443 + # Port for GRPC end-point via plain HTTP + # port_grpc_plain: 8080 + # Port for debugging the LinkAhead JVM + # port_debug: 9000 + # Port for profiling the LinkAhead JVM via JMX + # port_profiler: 9090 + # listen to ip address ("" means any) + # bind_ip: "127.0.0.1" + + server: + # All the keys of conf are set as environment variables in the server + # container before the server start. This overrides the server.conf + # settings in any other files, even the settings from + # `custom/caosdb-server/conf/ext/server.conf.d/` + # Check out conf/core/server.conf in the caosdb-server repository for + # options. + # + # When the conf variables are unset, the server uses its default values + # or the values from the `server.conf.d` directory. + conf: + # uncomment to enable the anonymous user + # auth_optional: TRUE + # uncomment to use the your custom authtoken config. See + # `conf/core/authtoken.example.yaml` for examples. + # Note: The path is relative to the caosdb server's root directory. + # authtoken_config: conf/core/authtoken.yaml + + # HTTPS port of the grpc end-point + # grpc_server_port_https: 8443 + # HTTP port of the grpc end-point + # grpc_server_port_http: 8080 + + # Development configuration options + # devel: + # Copy the caosdb-server jar from this location into the Docker container. + # Note that this is implemented by copying the file to + # custom/caosdb-server/target/, any file there will be overwritten. + # jar: /var/build/caosdb-server/0123abcd/target/caosdb-server-<version>-jar-with-dependencies.jar + + # The following is for the very specific case of server-side scripts + # requiring additional Python packages that are not installed during + # the regular build process of LinkAhead. If additional packages are + # needed, list them below. Mind that only packages that can be + # installed by pip are supported. + + # scripting: + # packages: + + # Packages can be installed from PyPI or external git + # repositories. In this case, `mode: "pip"` has to be + # provided. `package` can be the package name in PyPI (possibly + # with a version specification, i.e., `my_package>=1.0`, or it + # can be the URL of a git repository of a Python + # package. Essentially, the command `pip3 install + # package_string` will be executed within LinkAhead. + + # <package1_key>: + # mode: "pip" + # package: "<package_string>" + + # Alternatively, local packages can be copied into LinkAhead and + # then be installed using pip. Here, `mode: "copy"` has to be + # provided. `path` specifies the path to the Python package on + # the host system. `package` is the name of the destination + # directory within the LinkAhead container into which the local + # package will be copied. After copying, a `pip3 install .` is + # run from within that directory. + + # <package2_key>: + # mode: "copy" + # path: "/path/to/local/python/package" + # package: "<package_string>" diff --git a/integrationtests/test.py b/integrationtests/test.py deleted file mode 100644 index 782687be27863e479186717d698b9965f7be8c64..0000000000000000000000000000000000000000 --- a/integrationtests/test.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> -# 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> -# 2021 Alexander Schlemmer -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -""" -module description -""" - -import argparse -import sys -from argparse import RawTextHelpFormatter -from newcrawler import Crawler -from unittest.mock import Mock -import caosdb as db -from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter - -import os - - -def rfp(*pathcomponents): - """ - Return full path. - Shorthand convenience function. - """ - return os.path.join(os.path.dirname(__file__), *pathcomponents) - - -def main(args): - ident_adapt = CaosDBIdentifiableAdapter() - # TODO place this definition of identifiables elsewhere - ident_adapt.register_identifiable( - "Person", db.RecordType() - .add_parent(name="Person") - .add_property(name="first_name") - .add_property(name="last_name")) - ident_adapt.register_identifiable( - "Measurement", db.RecordType() - .add_parent(name="Measurement") - .add_property(name="identifier") - .add_property(name="date") - .add_property(name="project")) - ident_adapt.register_identifiable( - "Project", db.RecordType() - .add_parent(name="Project") - .add_property(name="date") - .add_property(name="identifier")) - - crawler = Crawler(debug=True, identifiableAdapter=ident_adapt) - crawler.copy_attributes = Mock() - crawler.crawl_directory(rfp("../unittests/test_directories", "examples_article"), - rfp("../unittests/scifolder_cfood.yml")) - ins, ups = crawler.synchronize() - assert len(ins) == 18 - assert len(ups) == 0 - - -def parse_args(): - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=RawTextHelpFormatter) - # parser.add_argument("path", - # help="the subtree of files below the given path will " - # "be considered. Use '/' for everything.") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - sys.exit(main(args)) diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json new file mode 100644 index 0000000000000000000000000000000000000000..26e11e4e16081b8b5b64a83889bc1f4d160ef0e7 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json @@ -0,0 +1,15 @@ +{ + "name": "DEMO", + "dataspace_id": 20002, + "archived": false, + "coordinator": { + "full_name": "Max Schmitt", + "given_name": "Max", + "family_name": "Schmitt", + "email": "max.schmitt@email.de" + }, + "start_date": "2022-03-01", + "end_date": "2032-02-28", + "comment": "Demonstration data space for DataCloud", + "url": "https://datacloud.de/index.php/f/7679" +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a4d684e50cf4fa0699c66d27661d0d54055ec8b --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv @@ -0,0 +1,101 @@ +index,A[kg],B[s],pH,Temp.[C] +0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103 +1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594 +2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147 +3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233 +4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441 +5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873 +6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162 +7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676 +8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684 +9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632 +10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636 +11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112 +12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732 +13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337 +14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778 +15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225 +16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972 +17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504 +18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404 +19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175 +20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193 +21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964 +22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742 +23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912 +24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072 +25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565 +26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114 +27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716 +28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731 +29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792 +30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879 +31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315 +32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065 +33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574 +34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077 +35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761 +36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107 +37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802 +38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193 +39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148 +40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466 +41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686 +42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987 +43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624 +44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154 +45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345 +46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358 +47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935 +48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196 +49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441 +50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068 +51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919 +52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722 +53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831 +54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044 +55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592 +56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437 +57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791 +58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786 +59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107 +60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978 +61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521 +62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588 +63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923 +64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695 +65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734 +66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433 +67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371 +68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373 +69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758 +70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915 +71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415 +72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697 +73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031 +74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385 +75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612 +76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712 +77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861 +78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587 +79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815 +80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158 +81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734 +82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921 +83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127 +84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767 +85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342 +86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516 +87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525 +88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814 +89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933 +90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606 +91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997 +92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454 +93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997 +94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793 +95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706 +96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686 +97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246 +98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454 +99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827 diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..64df90e55eff065b1cc249a634444a72f9fd00d2 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json @@ -0,0 +1,50 @@ +{ + "title": "Random numbers created on a random autumn day in a random office", + "abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.", + "Event": [ + { + "longitude": 18.445078548041533, + "start_datetime": "2022-02-10T16:36:48+01:00", + "latitude": 53.10833068997861, + "elevation": 2, + "location": "Bremen, Germany" + } + ], + "license": "CC-BY", + "authors": [ + { + "firstname": "Max", + "lastname": "Schmitt", + "full_name": "Max Schmitt", + "affiliation": "CLOUD", + "ORCID": "0000-0001-6233-1866", + "email": "max.schmitt@email.de" + }, + { + "firstname": "Alexa", + "lastname": "Nozone", + "full_name": "Alexa Nozone", + "affiliation": "CLOUD", + "email": "alexa.nozone@email.de" + } + ], + "comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de", + "project": { + "name": "Demonstration of Extremly important Metadata in Folders", + "full_name": "Project", + "project_acronym": "DEMO", + "project_type": "national", + "institute": "CLOUD", + "start_date": "2021-10-01", + "end_date": "2031-12-31", + "url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html", + "coordinator": { + "firstname": "Max", + "lastname": "Schmitt", + "email": "max.schmitt@email.de" + } + }, + "method": { + "name": "Random Number Generator" + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md new file mode 100644 index 0000000000000000000000000000000000000000..2317ff8616c43e75f52637ff581017bf4a50d468 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md @@ -0,0 +1,25 @@ +# Raw Data + +The `03_raw_data` folder is here to store all raw data of each dataset +associated with the project – the data that has not been edited by you yet but +which you plan to use in your research. It can be e.g. your unprocessed field +sampling records, or useful data from an online repository. Organize your data +in this folder in the following way: + +- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`. + +- **IMPORTANT**: provide the folder with information about your raw data by + filling out a metadata form for each of your datasets! For this, + + - either copy the `metadata-template.json` file and put it into your dataset + folder. Open the copy with a text editor and fill out the fields. + - or use the metadata editor in the DataCoud web client (press the "+" button + and use "New matadata.json" file) + + If you can’t find information about your data to fill in here, you should + reconsider using it - it is important to be able to trace your data sources to + ensure a FAIR scientific process! + +- For processing any of the data, make a copy of the dataset and paste it into + the `04_data_processing` folder. This way, you make sure to keep your raw data + in its original state. \ No newline at end of file diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json new file mode 100644 index 0000000000000000000000000000000000000000..7f457d239321b232fb2db7d46f4e1576c85911b0 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/CLOUD/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md new file mode 100644 index 0000000000000000000000000000000000000000..ce1b002b18772b85f4bba3a222574f438a6ed0e3 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md @@ -0,0 +1,10 @@ +# Data Processing + +The actual work is done in this `04_data_processing` folder. Depending on your +field and type and size of project, you can organize this folder in the way that +fits your process best. Here, a bit of chaos can happen ;) Keep in mind to +document your processing steps in the `02_materials_and_methods` folder and to +put in your final results into the `05_results` folder. In the end of your +project, it should be possible to delete everything in this folder and +reconstruct the working process using the documentation and raw data from +previous folders. diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json new file mode 100644 index 0000000000000000000000000000000000000000..05f9394dfbfa9a0b2b4844c7080a340585a9050f --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/cloud/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md new file mode 100644 index 0000000000000000000000000000000000000000..ae0ab6571c52c0ec9a1cdc8aba27b31fd3be6fcc --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md @@ -0,0 +1,7 @@ +# Results + +All the results that are final versions of your data analysis or processing, +should be copied into this `05_results` folder. Organize your results folder in +the way most fitting to your project. + +Provide metadata to your results files. diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/README.md b/integrationtests/test_data/extroot/realworld_example/data/35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..809d699c462d064ff5193add8e23677bec84b0e0 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/README.md @@ -0,0 +1,5 @@ +# Dataspace: DEMO + +This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage +for all of CLOUD's research data. + diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml new file mode 100644 index 0000000000000000000000000000000000000000..eaf2690ae130cb61c8a74452e3e4e1d4fd06846a --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml @@ -0,0 +1,535 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# +Data: + type: Directory + match: data + subtree: + dataspace_dir: + type: Directory + match: (?P<dataspace_dir_number>[0-9]+) + records: + Dataspace: + name: $dataspace_dir_number + subtree: + dataspace_json: + type: JSONFile + match: .dataspace.json + validate: schema/dataspace.schema.json + subtree: + dataspace_id_element: + type: DictIntegerElement + match_name: "dataspace_id" + match_value: "(?P<id>[0-9]+)" + records: + Dataspace: + dataspace_id: $id + archived_element: + type: DictBooleanElement + match_name: "archived" + match_value: "(?P<archived>.*)" + records: + Dataspace: + archived: $archived + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Dataspace: + url: $url + coordinator_element: + type: DictDictElement + match_name: "coordinator" + records: + Person: + parents: + - Person + Dataspace: + Person: $Person + subtree: &person_subtree + full_name_element: + type: DictTextElement + match_name: "full_name" + match_value: "(?P<full_name>.*)" + records: + Person: + full_name: $full_name + full_name_nonlatin_element: + type: DictTextElement + match_name: "full_name_nonlatin" + match_value: "(?P<full_name_nonlatin>.*)" + records: + Person: + full_name_nonlatin: $full_name_nonlatin + family_name_element: + type: DictTextElement + match_name: "family_name" + match_value: "(?P<family_name>.*)" + records: + Person: + family_name: $family_name + given_name_element: + type: DictTextElement + match_name: "given_name" + match_value: "(?P<given_name>.*)" + records: + Person: + given_name: $given_name + email_element: + type: DictTextElement + match_name: "email" + match_value: "(?P<email>.*)" + records: + Person: + email: $email + affiliation_element: + type: DictTextElement + match_name: "affiliation" + match_value: "(?P<affiliation>.*)" + records: + Person: + affiliation: $affiliation + ORCID_element: + type: DictTextElement + match_name: "ORCID" + match_value: "(?P<ORCID>.*)" + records: + Person: + ORCID: $ORCID + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Dataspace: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Dataspace: + end_date: $end_date + comment: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Dataspace: + comment: $comment + raw_data_dir: + type: Directory + match: 03_raw_data + subtree: &template + # TODO collect info from metadata.json and look into sub-directories + # (only one level) for metadata.json + dataset_dir: + match: (?P<dataset_dir_name>.*) + type: Directory + records: + Dataset: + Dataspace: $Dataspace + subtree: + metadata_json: &metadata_json_template + type: JSONFile + match: metadata.json + validate: schema/dataset.schema.json + subtree: + title_element: + type: DictTextElement + match_name: "title" + match_value: "(?P<title>.*)" + records: + Dataset: + title: $title + authors_element: + type: DictListElement + match_name: "authors" + subtree: + author_element: + type: Dict + records: + Person: + parents: + - Person + Dataset: + authors: +$Person + subtree: *person_subtree + abstract_element: + type: DictTextElement + match_name: "abstract" + match_value: "(?P<abstract>.*)" + records: + Dataset: + abstract: $abstract + comment_element: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Dataset: + comment: $comment + license_element: + type: DictTextElement + match_name: "license" + match_value: "(?P<license_name>.*)" + records: + license: + # TODO: As soon as such things can be validated, a + # creation of a new license has to be forbidden here + # (although this is effectively done already by + # validating against the above schema.) + name: $license_name + Dataset: + license: $license + dataset_doi_element: + type: DictTextElement + match_name: "dataset_doi" + match_value: "(?P<dataset_doi>.*)" + records: + Dataset: + dataset_doi: $dataset_doi + related_to_dois_element: + type: DictListElement + match_name: "related_to_dois" + subtree: + related_to_doi_element: + type: TextElement + match: "(?P<related_to_doi>).*" + records: + Dataset: + related_to_dois: +$related_to_doi + Keywords_element: + type: DictListElement + match_name: "Keyword" + Events_element: + type: DictListElement + match_name: "Event" + subtree: + Event_element: + type: Dict + records: + Event: + parents: + - Event + Dataset: + Event: +$Event + subtree: + label_element: + type: DictTextElement + match_name: "label" + match_value: "(?P<label>.*)" + records: + Event: + label: $label + comment_element: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Event: + comment: $comment + start_datetime_element: + type: DictTextElement + match_name: start_datetime + match_value: "(?P<start_datetime>.*)" + records: + Event: + start_datetime: $start_datetime + end_datetime_element: + type: DictTextElement + match_name: end_datetime + match_value: "(?P<end_datetime>.*)" + records: + Event: + end_datetime: $end_datetime + longitude_element: + type: DictFloatElement + match_name: "longitude" + match_value: "(?P<longitude>.*)" + records: + Event: + longitude: $longitude + latitude_element: + type: DictFloatElement + match_name: "latitude" + match_value: "(?P<latitude>.*)" + records: + Event: + latitude: $latitude + elevation_element: + type: DictFloatElement + match_name: "elevation" + match_value: "(?P<elevation>.*)" + records: + Event: + elevation: $elevation + location_element: + type: DictTextElement + match_name: location + match_value: "(?P<location>.*)" + records: + Event: + location: $location + igsn_element: + type: DictTextElement + match_name: igsn + match_value: "(?P<igsn>.*)" + records: + Event: + igsn: $igsn + events_in_data_element: + type: DictBooleanElement + match_name: "events_in_data" + match_value: "(?P<events_in_data>.*)" + records: + Dataset: + events_in_data: $events_in_data + geojson_element: + type: DictTextElement + match_name: "geojson" + match_value: "(?P<geojson>.*)" + records: + Dataset: + geojson: $geojson + project_element: + type: DictDictElement + match_name: "project" + records: + Project: + parents: + - Project + Dataset: + Project: $Project + subtree: + name_element: + type: DictTextElement + match_name: "name" + match_value: "(?P<name>.*)" + records: + Project: + name: $name + full_name_element: + type: DictTextElement + match_name: "full_name" + match_value: "(?P<full_name>.*)" + records: + Project: + full_name: $full_name + project_id_element: + type: DictTextElement + match_name: "project_id" + match_value: "(?P<project_id>.*)" + records: + Project: + project_id: $project_id + project_type_element: + type: DictTextElement + match_name: "project_type" + match_value: "(?P<project_type_name>.*)" + records: + project_type: + name: $project_type_name + Project: + project_type: $project_type + institute_element: + type: DictTextElement + match_name: "institute" + match_value: "(?P<institute>.*)" + records: + Project: + institute: $institute + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Project: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Project: + end_date: $end_date + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Project: + url: $url + coordinators_element: + type: DictListElement + match_name: "coordinators" + subtree: + coordinator_element: + type: Dict + records: + Person: + parents: + - Person + Project: + coordinators: +$Person + subtree: *person_subtree + campaign_element: + type: DictDictElement + match_name: "campaign" + records: + Campaign: + parents: + - Campaign + Dataset: + Campaign: $Campaign + subtree: + label_element: + type: DictTextElement + match_name: "label" + match_value: "(?P<label>.*)" + records: + Campaign: + label: $label + optional_label_element: + type: DictTextElement + match_name: "optional_label" + match_value: "(?P<optional_label>.*)" + records: + Campaign: + optional_label: $optional_label + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Campaign: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Campaign: + end_date: $end_date + responsible_scientists_element: + type: DictListElement + match_name: "responsible_scientists" + subtree: + responsible_scientist_element: + type: Dict + records: + Person: + parents: + - Person + Campaign: + responsible_scientists: +$Person + subtree: *person_subtree + Methods_element: + type: DictListElement + match_name: "Method" + subtree: + Method_element: + type: Dict + records: + Method: + parents: + - Method + Dataset: + Method: +$Method + subtree: + method_name_element: + type: DictTextElement + match_name: "method_name" + match_value: "(?P<method_name>.*)" + records: + Method: + name: $method_name + abbreviation_element: + type: DictTextElement + match_name: "abbreviation" + match_value: "(?P<abbreviation>.*)" + records: + Method: + abbreviation: $abbreviation + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Method: + url: $url + Taxa_element: + type: DictListElement + match_name: "Taxon" + subtree: + Taxon_element: + type: Dict + records: + Taxon: + parents: + - Taxon + Dataset: + Taxon: +$Taxon + subtree: + taxon_name_element: + type: DictTextElement + match_name: "taxon_name" + match_value: "(?P<taxon_name>.*)" + records: + Taxon: + name: $taxon_name + archived_element: + type: DictBooleanElement + match_name: "archived" + match_value: "(P<archived>.*)" + records: + Dataset: + archived: $archived + publication_date_element: + type: DictTextElement + match_name: "publication_date" + match_value: "(P<publication_date>.*)" + records: + Dataset: + publication_date: $publication_date + max_files_element: + type: DictIntegerElement + match_name: "max_files" + match_value: "(P<max_files>.*)" + records: + Dataset: + max_files: $max_files + auxiliary_file: &aux_file_template + type: File + match: "(?P<aux_file_name>(?!metadata.json).*)" + # TODO File, path and reference dataset in file record + child_dataset_dir: + type: Directory + match: (?P<child_dataset_dir_name>.*) + subtree: + metadata_json: *metadata_json_template + auxiliary_file: *aux_file_template + data_processing_dir: + type: Directory + match: 04_data_processing + subtree: *template + results_dir: + type: Directory + match: 05_results + subtree: *template diff --git a/integrationtests/test_data/extroot/realworld_example/identifiables.yml b/integrationtests/test_data/extroot/realworld_example/identifiables.yml new file mode 100644 index 0000000000000000000000000000000000000000..0ea0265ecfec05392c599457d81339bc91ba18d0 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/identifiables.yml @@ -0,0 +1,22 @@ +license: + - name +project_type: + - name +Keyword: + - name +Taxon: + - name +Person: + - email + # - full_name +Dataset: + - title + # - DOI +Event: + - longitude + - latitude + - start_datetime +Dataspace: + - dataspace_id +Project: + - name diff --git a/integrationtests/test_data/extroot/realworld_example/schema/README.md b/integrationtests/test_data/extroot/realworld_example/schema/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e0bb95f8b844374bba72c7c6989ac57cfa5fc305 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/README.md @@ -0,0 +1,37 @@ +# Dataset Schemas + +These schema's are derived from the [metadata +schemas](https://github.com/leibniz-zmt/zmt-metadata-schema) used at the Leibniz +Center for Tropical Marine Research (Leibniz ZMT). + +# Copyright + +BSD 3-Clause License + +Copyright (c) 2022 ZMT +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d12053a0007cdea1005e7673db69f46b35a063d --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml @@ -0,0 +1,18 @@ +extern: +- Keyword +- Taxon +- full_name +- full_name_nonlatin +- name + +full_name: + inherit_from_obligatory: + - name + +full_name_nonlatin: + inherit_from_obligatory: + - name + +Taxon: + inherit_from_obligatory: + - Keyword diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..83d6a60d857349772c960af637671cb21c8abd5d --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json @@ -0,0 +1,365 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "full dataset title" + }, + "authors": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "abstract": { + "type": "string", + "minLength": 80, + "maxLength": 1000, + "description": "Abstract with at least 80 characters" + }, + "comment": { + "type": "string" + }, + "license": { + "type": "string", + "enum": [ + "CC-BY", + "CC-BY-SA", + "CC0", + "restricted access" + ] + }, + "dataset_doi": { + "type": "string", + "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)", + "description": "Dataset DOI, e.g. 10.1594/PANGAEA.938740" + }, + "related_to_dois": { + "type": "array", + "items": { + "type": "string", + "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)" + }, + "description": "DOIs of related publications and/or datasets, e.g. 10.1000/182" + }, + "Keyword": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + } + }, + "Event": { + "type": "array", + "description": "https://wiki.pangaea.de/wiki/Event", + "items": { + "type": "object", + "properties": { + "label": { + "type": "string" + }, + "comment": { + "type": "string" + }, + "start_datetime": { + "type": "string", + "format": "date-time" + }, + "end_datetime": { + "type": "string", + "format": "date-time" + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180, + "description": "longitude (W/E) in decimal degree (-180 to 180)" + }, + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90, + "description": "latitude (N/S) in decimal degree (-90 to 90)" + }, + "elevation": { + "type": "number", + "minimum": -10000, + "maximum": 20000, + "description": "elevation in m" + }, + "location": { + "type": "string", + "description": "geographical location as text (e.g., North Sea; Espoo, Finland)" + }, + "igsn": { + "type": "string", + "description": "International Geo Sample Number (http://www.geosamples.org/aboutigsn)" + } + }, + "required": [ + "longitude", + "latitude", + "start_datetime" + ] + } + }, + "events_in_data": { + "type": "boolean", + "description": "Does the data contain additional information about timepoints and locations?" + }, + "geojson": { + "type": "string", + "pattern": "", + "description": "GeoJSON for complex geographic structures" + }, + "project": { + "title": "Project", + "description": "https://wiki.pangaea.de/wiki/Project", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "short name of project" + }, + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UTF-8 characters allowed)" + }, + "project_id": { + "type": "string", + "description": "Project ID" + }, + "project_type": { + "type": "string", + "enum": [ + "DFG", + "EU", + "BMBF", + "national", + "international" + ] + }, + "institute": { + "type": "string", + "description": "place of coordination or project office", + "default": "Centre for Research" + }, + "start_date": { + "type": "string", + "format": "date" + }, + "end_date": { + "type": "string", + "format": "date" + }, + "url": { + "type": "string", + "format": "uri" + }, + "coordinators": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UTF-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + } + }, + "required": ["name", "full_name"] + }, + "campaign": { + "title": "Campaign", + "description": "https://wiki.pangaea.de/wiki/Campaign, synonyms: cruise, expedition, leg, ", + "type": "object", + "properties": { + "label": { + "type": "string", + "description": "is unique and does not contain blanks; uses abbreviations instead of full names" + }, + "optional_label": { + "type": "string" + }, + "start_date": { + "type": "string", + "format": "date" + }, + "end_date": { + "type": "string", + "format": "date" + }, + "responsible_scientists": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + } + } + }, + "Method": { + "type": "array", + "items": { + "type": "object", + "description": "https://wiki.pangaea.de/wiki/Method", + "properties": { + "method_name": { + "type": "string", + "description": "full official name of tool/instrument/device/gear" + }, + "abbreviation": { + "type": "string", + "description": "may be used for import in an event list to avoid misspellings" + }, + "url": { + "type": "string", + "description": "should contain a web address, where an official description of the device can be found" + } + } + } + }, + "Taxon": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + } + }, + "archived": { + "type": "boolean", + "description": "Has the dataset been archived?", + "default": false + }, + "publication_date": { + "type": "string", + "format": "date" + }, + "max_files": { + "type": "integer", + "description": "Maximum number of files to included by the CaosDB crawler", + "default": 100 + } + }, + "required": [ + "title", + "authors", + "abstract" + ] +} diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..01653bfa821e0a0acbb5a481bfd458e2ed784fb9 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json @@ -0,0 +1,45 @@ +{ + "title": "Dataspace", + "description": "A Dataspace is a folder in the DataCloud with a pre-defined structure", + "type": "object", + "properties": { + "dataspace_id": { + "type": "integer", + "description": "Integer ID of Dataspace (matches LDAP GID)", + "minimum": 20000 + }, + "archived": { "type": "boolean" }, + "url": { + "type": "string", + "description": "link to folder on file system (CaosDB or cloud folder)" + }, + "coordinator": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "email": { "type": "string", "format": "email" } + }, + "required": ["full_name", "email"] + }, + "start_date": { "type": "string", "format": "date" }, + "end_date": { "type": "string", "format": "date" }, + "comment": { "type": "string" } + }, + "required": ["dataspace_id", "url", "coordinator"] +} diff --git a/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml new file mode 100644 index 0000000000000000000000000000000000000000..7e251eeced7bf626e77364fc5555b1cb10dd3afb --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml @@ -0,0 +1,26 @@ +extern: +- name +- url +- Dataset + +german_name: + datatype: TEXT + inherit_from_obligatory: + - name + +Department: + recommended_properties: + url: + german_name: + + +WorkingGroup: + recommended_properties: + Department: + german_name: + url: + +Dataset: + recommended_properties: + WorkingGroup: + diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/DataAnalysis/results.md b/integrationtests/test_data/extroot/use_case_simple_presentation/DataAnalysis/results.md new file mode 100644 index 0000000000000000000000000000000000000000..b867d778942ce5595286870bd6a92e53015be0e8 --- /dev/null +++ b/integrationtests/test_data/extroot/use_case_simple_presentation/DataAnalysis/results.md @@ -0,0 +1,8 @@ +--- +identifier: test analysis +date: 2022-03-16 +source_identifier: crawlertest +source_date: 2022-03-16 + +frequency: 17 +--- diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/ExperimentalData/data.md b/integrationtests/test_data/extroot/use_case_simple_presentation/ExperimentalData/data.md new file mode 100644 index 0000000000000000000000000000000000000000..60dcd78ed1f70428b18e8762a14dc3fe7f3fa5cd --- /dev/null +++ b/integrationtests/test_data/extroot/use_case_simple_presentation/ExperimentalData/data.md @@ -0,0 +1,5 @@ +--- +date: "2022-03-16" +identifier: crawlertest +alpha: 16 +--- diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..0b17073a823269c9eebbf53df6b9002ac8dbd45f --- /dev/null +++ b/integrationtests/test_data/extroot/use_case_simple_presentation/cfood.yml @@ -0,0 +1,115 @@ +# This is only a scifolder test cfood with a limited functionality. +# The full scifolder cfood will be developed here: +# https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood + +Definitions: + type: Definitions + #include "description.yml" + +Converters: {} + +extroot: + type: Directory + match: use_case_simple_presentation + subtree: + ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: + DataFile: + type: MarkdownFile + match: ^data\.md$ + + records: + mdfile: + parents: [] + role: File + path: $DataFile + file: $DataFile + + Experiment: + mdfile: $mdfile + + + subtree: + date: + type: DictTextElement + match_name: date + match_value: (?P<date>.+) + records: + Experiment: + date: $date + identifier: + type: DictTextElement + match_name: identifier + match_value: (?P<identifier>.+) + records: + Experiment: + identifier: $identifier + parameter_alpha: + type: DictTextElement + match_name: alpha + match_value: (?P<alpha>[0-9]+) + records: + Experiment: + alpha: $alpha + + DataAnalysis: + type: Directory + match: DataAnalysis + subtree: + DataFile: + type: MarkdownFile + match: ^results\.md$ + + records: + mdfile: + parents: [] + role: File + path: $DataFile + file: $DataFile + + Experiment: {} + + DataAnalysis: + mdfile: $mdfile + sources: +$Experiment + + subtree: + date: + type: DictTextElement + match_name: date + match_value: (?P<date>.+) + records: + DataAnalysis: + date: $date + identifier: + type: DictTextElement + match_name: identifier + match_value: (?P<identifier>.+) + records: + DataAnalysis: + identifier: $identifier + + frequency: + type: DictTextElement + match_name: frequency + match_value: (?P<frequency>[0-9]+) + records: + DataAnalysis: + frequency: $frequency + + source_date: + type: DictTextElement + match_name: source_date + match_value: (?P<source_date>.+) + records: + Experiment: + date: $source_date + source_identifier: + type: DictTextElement + match_name: source_identifier + match_value: (?P<source_identifier>.+) + records: + Experiment: + identifier: $source_identifier diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/identifiables.yml b/integrationtests/test_data/extroot/use_case_simple_presentation/identifiables.yml new file mode 100644 index 0000000000000000000000000000000000000000..94b593bfb4c425ce71a4f94504d4f0033538cacb --- /dev/null +++ b/integrationtests/test_data/extroot/use_case_simple_presentation/identifiables.yml @@ -0,0 +1,6 @@ +Experiment: +- date +- identifier +DataAnalysis: +- date +- identifier diff --git a/integrationtests/test_data/extroot/use_case_simple_presentation/model.yml b/integrationtests/test_data/extroot/use_case_simple_presentation/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1dcbd487b56519553a873fdd252c93ce21402a1 --- /dev/null +++ b/integrationtests/test_data/extroot/use_case_simple_presentation/model.yml @@ -0,0 +1,42 @@ + + + +ScientificActivity: + description: | + The base record type for all scientific activities, like experiments, + data analysis records, simulations or publications. + recommended_properties: + sources: + description: This scientific activity is based on the activity referenced here. + datatype: LIST<ScientificActivity> + date: + description: The date according to https://doi.org/10.3390/data5020043 + datatype: DATETIME + identifier: + description: An identifier according to https://doi.org/10.3390/data5020043 + datatype: TEXT + mdfile: + description: The file storing information about this record. + datatype: FILE + +Experiment: + description: | + The base record type for all records containing data from experiments. + inherit_from_obligatory: + - ScientificActivity + obligatory_properties: + alpha: + description: A ficticious piece of data. + datatype: DOUBLE + unit: km + +DataAnalysis: + description: | + The base record type for all records containing results from data analysis. + inherit_from_obligatory: + - ScientificActivity + recommended_properties: + frequency: + description: A ficticious piece of data. + datatype: DOUBLE + unit: Hz diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py new file mode 100644 index 0000000000000000000000000000000000000000..cffb84946611e1c37f28d638acfa234ad12253b1 --- /dev/null +++ b/integrationtests/test_realworld_example.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +module description +""" +import json +import os + +import caosdb as db + +from caoscrawler.crawl import Crawler, main as crawler_main +from caoscrawler.converters import JSONFileConverter, DictConverter +from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from caoscrawler.structure_elements import File, JSONFile, Directory +import pytest +from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml + +#from caosadvancedtools.testutils import clear_database, set_test_key +import sys + +# TODO is not yet merged in caosadvancedtools +# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return os.path.join(os.path.dirname(__file__), *pathcomponents) + + +DATADIR = rfp("test_data", "extroot", "realworld_example") + + +@pytest.fixture +def usemodel(): + # First load dataspace data model + dataspace_definitions = parse_model_from_json_schema( + os.path.join(DATADIR, "schema", "dataspace.schema.json")) + dataspace_definitions.sync_data_model(noquestion=True) + + # Then general dataset definitions + dataset_definitions = parse_model_from_json_schema( + os.path.join(DATADIR, "schema", "dataset.schema.json")) + dataset_definitions.sync_data_model(noquestion=True) + + # Finally, add inheritances as defined in yaml + dataset_inherits = parse_model_from_yaml( + os.path.join(DATADIR, "schema", "dataset-inheritance.yml")) + dataset_inherits.sync_data_model(noquestion=True) + + +@pytest.fixture +def clear_database(): + # TODO(fspreck): Remove once the corresponding advancedtools function can + # be used. + ents = db.execute_query("FIND ENTITY WITH ID>99") + if ents: + ents.delete() + + +def test_dataset( + clear_database, + usemodel): + + ident = CaosDBIdentifiableAdapter() + ident.register_identifiable( + "license", db.RecordType().add_parent("license").add_property("name")) + ident.register_identifiable("project_type", db.RecordType( + ).add_parent("project_type").add_property("name")) + ident.register_identifiable("Person", db.RecordType( + ).add_parent("Person").add_property("full_name")) + + crawler = Crawler(identifiableAdapter=ident) + crawler_definition = crawler.load_definition( + os.path.join(DATADIR, "dataset_cfoods.yml")) + # print(json.dumps(crawler_definition, indent=3)) + # Load and register converter packages: + converter_registry = crawler.load_converters(crawler_definition) + # print("DictIntegerElement" in converter_registry) + + records = crawler.start_crawling( + Directory("data", os.path.join(DATADIR, 'data')), + crawler_definition, + converter_registry + ) + crawler.synchronize() + + dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND " + "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'" + " AND Person", unique=True) + assert dataspace.get_property("start_date").value == "2022-03-01" + db.execute_query("FIND RECORD Person with full_name='Max Schmitt' AND" + " given_name='Max'", unique=True) + + dataset = db.execute_query(f"FIND RECORD Dataset with Dataspace={dataspace.id} AND title=" + "'Random numbers created on a random autumn day in a random office'" + "", unique=True) + assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Person WITH full_name=" + "'Alexa Nozone' AND WHICH REFERENCES Person WITH full_name='Max Schmitt'" + "") == 1 + assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH " + "start_datetime='2022-02-10T16:36:48+01:00'") == 1 + + +def test_event_update(clear_database, usemodel): + + identifiable_path = os.path.join(DATADIR, "identifiables.yml") + crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml") + + # TODO(fspreck): Use crawler_main + crawler_main( + os.path.join(DATADIR, 'data'), + crawler_definition_path, + identifiable_path, + True, + os.path.join(DATADIR, "provenance.yml"), + False, + "" + ) + + old_dataset_rec = db.execute_query( + "FIND RECORD Dataset WHICH HAS AN EVENT WITH location='Bremen, Germany'") + assert len(old_dataset_rec) == 1 + old_dataset_rec = old_dataset_rec[0] + assert old_dataset_rec.get_property("Event").datatype == db.LIST("Event") + assert len(old_dataset_rec.get_property("Event").value) == 1 + old_event_rec = db.Record( + id=old_dataset_rec.get_property("Event").value[0]).retrieve() + + # TODO(fspreck): crawl again manually, edit the event records in the update + # list, synchronize, and test whether the events have been updated. + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition(identifiable_path) + + second_crawler = Crawler(identifiableAdapter=ident) + crawler_definition = second_crawler.load_definition( + crawler_definition_path) + converter_registry = second_crawler.load_converters(crawler_definition) + records = second_crawler.start_crawling( + Directory("data", os.path.join(DATADIR, "data")), + crawler_definition, + converter_registry + ) + + for rec in records: + if rec.parents[0].name == "Event": + rec.get_property("longitude").value = 0.0 + rec.get_property("latitude").value = 0.0 + rec.get_property("location").value = "Origin" + elif rec.parents[0].name == "Dataset": + rec.get_property("Event").value[0].get_property( + "longitude").value = 0.0 + rec.get_property("Event").value[0].get_property( + "latitude").value = 0.0 + rec.get_property("Event").value[0].get_property( + "location").value = "Origin" + second_crawler.synchronize() + + # Dataset is still the same Record, but with an updated event + new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve() + for prop in old_dataset_rec.get_properties(): + if not prop.name == "Event": + assert new_dataset_rec.get_property( + prop.name).datatype == prop.datatype + assert new_dataset_rec.get_property( + prop.name).value == prop.value + assert new_dataset_rec.get_property("Event").datatype == db.LIST("Event") + assert new_dataset_rec.get_property("Event").value is not None + assert len(new_dataset_rec.get_property("Event").value) == 1 + assert new_dataset_rec.get_property("Event").value[0] != old_event_rec.id + + # The event has new properties + new_event_rec = db.Record( + id=new_dataset_rec.get_property("Event").value[0]).retrieve() + assert new_event_rec.get_property("longitude").value == 0.0 + assert new_event_rec.get_property("latitude").value == 0.0 + assert new_event_rec.get_property("location").value == "Origin" + assert new_event_rec.get_property( + "start_datetime").value == old_event_rec.get_property("start_datetime").value diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py new file mode 100644 index 0000000000000000000000000000000000000000..f1c838d1aadf4cb8b51043a8a24b93eddf275c75 --- /dev/null +++ b/integrationtests/test_use_case_simple_presentation.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" +import os +import pytest +from subprocess import run + +import caosdb as db +from caosadvancedtools.loadFiles import loadpath +from caosadvancedtools.models import parser as parser +from caoscrawler.crawl import main as crawler_main + + +# TODO(fspreck) Re-eneable once this is part of dev in advancedusertools. +# from caosadvancedtools.testutils import clear_database, set_test_key +# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + +DATADIR = os.path.join(os.path.dirname(__file__), "test_data", + "extroot", "use_case_simple_presentation") + + +@pytest.fixture +def clear_database(): + # TODO(fspreck): Remove once the corresponding advancedtools function can be + # used. + ents = db.execute_query("FIND ENTITY WITH ID>99") + if ents: + ents.delete() + + +def test_complete_crawler( + clear_database +): + # Setup the data model: + model = parser.parse_model_from_yaml(os.path.join(DATADIR, "model.yml")) + model.sync_data_model(noquestion=True, verbose=False) + + # Insert the data: + for path in [ + "/opt/caosdb/mnt/extroot/use_case_simple_presentation/ExperimentalData", + "/opt/caosdb/mnt/extroot/use_case_simple_presentation/DataAnalysis"]: + loadpath( + path=path, + include=None, + exclude=None, + prefix="/", + dryrun=False, + forceAllowSymlinks=False) + + crawler_main(DATADIR, + os.path.join(DATADIR, "cfood.yml"), + os.path.join(DATADIR, "identifiables.yml"), + True, + os.path.join(DATADIR, "provenance.yml"), + False, + "/use_case_simple_presentation") + + res = db.execute_query("FIND Record Experiment") + assert len(res) == 1 + assert res[0].get_property("identifier").value == "crawlertest" + assert res[0].get_property("date").value == "2022-03-16" + + lf = db.File(id=res[0].get_property("mdfile").value).retrieve() + assert lf.path == "/ExperimentalData/data.md" + + assert res[0].get_property("alpha").value == 16.0 + assert res[0].get_property("alpha").unit == "km" + + res_da = db.execute_query("FIND Record DataAnalysis") + assert len(res_da) == 1 + assert res_da[0].get_property("sources").value[0] == res[0].id + + lf = db.File(id=res_da[0].get_property("mdfile").value).retrieve() + assert lf.path == "/DataAnalysis/results.md" diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 501dea852f18a271e49245342111f1c1c8e3fdd0..0000000000000000000000000000000000000000 --- a/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[build-system] -requires = [ - "setuptools>=42", - "wheel" -] -build-backend = "setuptools.build_meta" - \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 6da8aae14bbd3e70d61d1f7b9f3bb06043ff1aa8..f818888e98690a861228b1f3c0214b1cc94fb6e1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,2 @@ [pytest] testpaths=unittests -addopts=-x -vv --cov=newcrawler diff --git a/setup.cfg b/setup.cfg index b14cc72add8b6eed430a3bbf8549bbc886d36849..2f8d46b30ee04d68adc6aef69e1a04115bbc44d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -name = newcrawler +name = caoscrawler version = 0.1 author = Alexander Schlemmer author_email = alexander.schlemmer@ds.mpg.de @@ -18,9 +18,21 @@ package_dir = = src packages = find: python_requires = >=3.6 +install_requires = + importlib-resources + caosdb + caosadvancedtools + yaml-header-tools + pyyaml [options.packages.find] where = src +[options.package_data] +* = *.yml [flake8] -per-file-ignores = __init__.py:F401 \ No newline at end of file +per-file-ignores = __init__.py:F401 + +[options.entry_points] +console_scripts = + caosdb-crawler = caoscrawler.crawl:main diff --git a/src/newcrawler/__init__.py b/src/caoscrawler/__init__.py similarity index 100% rename from src/newcrawler/__init__.py rename to src/caoscrawler/__init__.py diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml new file mode 100644 index 0000000000000000000000000000000000000000..6505cde7601d89dea84fa80d1ab7c36b2eca6895 --- /dev/null +++ b/src/caoscrawler/cfood-schema.yml @@ -0,0 +1,65 @@ +cfood: + type: object + additionalProperties: + $ref: + "#/$defs/converter" + $defs: + converter: + properties: + type: + enum: + - Directory + - File + - DictTextElement + - TextElement + - SimpleFile + - YamlFileCaosDBRecord + - MarkdownFile + - DictListElement + - DictDictElement + - DictFloatElement + - DictIntegerElement + - DictBooleanElement + - Definitions + - Dict + - JSONFile + description: Type of this converter node. + match: + description: typically a regexp which is matched to a structure element name + type: string + match_name: + description: a regexp that is matched to the key of a key-value pair + type: string + match_value: + description: a regexp that is matched to the value of a key-value pair + type: string + records: + description: This field is used to define new records or to modify records which have been defined on a higher level. + type: object + properties: + parents: + description: Parents for this record are given here as a list of names. + type: array + items: + type: string + additionalProperties: + oneOf: + - type: object + properties: + value: + description: Dictionary notation for variable values. Values can be given by a variable which is indicated by an initial "$". Use "$$" for setting values actually starting with a dollar sign. + type: string + collection_mode: + description: The collection mode defines whether the resulting property will be a single property or whether the values of multiple structure elements will be collected either into a list or a multiproperty. + enum: + - single + - list + - multiproperty + additionalProperties: false + - type: string + description: The short notation for values. Values can be given by a variable which is indicated by an initial "$". Use "$$" for setting values actually starting with a dollar sign. Multiproperties can be set using an initial "*" and list properties using an initial "+". + subtree: + type: object + additionalProperties: + $ref: + "#/$defs/converter" diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py new file mode 100644 index 0000000000000000000000000000000000000000..babc5aa4ff8c329c9bdd93c1eceb7c03c5f7280c --- /dev/null +++ b/src/caoscrawler/converters.py @@ -0,0 +1,635 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Henrik tom Wörden +# 2021 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +from jsonschema import validate, ValidationError +import os +import re +import caosdb as db +import json +import warnings +from .utils import has_parent +from .stores import GeneralStore, RecordStore +from .structure_elements import (StructureElement, Directory, File, Dict, JSONFile, + DictIntegerElement, DictBooleanElement, + DictFloatElement, DictDictElement, + TextElement, DictTextElement, DictElement, DictListElement) +from typing import Optional, Union +from abc import abstractmethod +from string import Template +import yaml_header_tools + +import yaml + +# These are special properties which are (currently) treated differently +# by the converters: +SPECIAL_PROPERTIES = ("description", "name", "id", "path", + "file", "checksum", "size") + + +class ConverterValidationError(Exception): + """To be raised if contents of an element to be converted are invalid.""" + + def __init__(self, msg): + self.message = msg + + +def replace_variables(propvalue, values: GeneralStore): + # Check if the replacement is a single variable containing a record: + match = re.match(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$", propvalue) + if match is not None: + varname = match.group("varname") + if varname in values: + if values[varname] is None: + return None + if isinstance(values[varname], db.Entity): + return values[varname] + + propvalue_template = Template(propvalue) + return propvalue_template.safe_substitute(**values.get_storage()) + + +def handle_value(value: Union[dict, str, list], values: GeneralStore): + """ + determines whether the given value needs to set a property, be added to an existing value (create a list) or + add as an additional property (multiproperty). + + Variable names (starting with a "$") are replaced by the corresponding value stored in the + `values` GeneralStore. + + Parameters: + - value: if str, the value to be interpreted. E.g. "4", "hallo" or "$a" etc. + if dict, must have keys "value" and "collection_mode". The returned tuple is directly + created from the corresponding values. + if list, each element is checked for replacement and the resulting list will be used + as (list) value for the property + Returns a tuple: + - the final value of the property; variable names contained in `values` are replaced. + - the collection mode (can be single, list or multiproperty) + """ + # @review Florian Spreckelsen 2022-05-13 + + if type(value) == dict: + if "value" not in value: + # TODO: how do we handle this case? Just ignore? + # or disallow? + raise NotImplementedError() + propvalue = value["value"] + # can be "single", "list" or "multiproperty" + collection_mode = value["collection_mode"] + elif type(value) == str: + propvalue = value + collection_mode = "single" + if propvalue.startswith("+"): + collection_mode = "list" + propvalue = propvalue[1:] + elif propvalue.startswith("*"): + collection_mode = "multiproperty" + propvalue = propvalue[1:] + elif type(value) == list: + # TODO: (for review) + # This is a bit dirty right now and needed for + # being able to directly set list values. Semantics is, however, a bit + # different from the two cases above. + collection_mode = "single" + propvalue = value + + # variables replacement: + propvalue = [replace_variables(i, values) for i in propvalue] + + return (propvalue, collection_mode) + else: + # value is another simple type + # collection_mode = "single" + # propvalue = value["value"] + # return (propvalue, collection_mode) + raise RuntimeError() + + propvalue = replace_variables(propvalue, values) + return (propvalue, collection_mode) + + +def create_records(values: GeneralStore, + records: RecordStore, + def_records: dict): + # list of keys to identify, which variables have been set by which paths: + # the items are tuples: + # 0: record name + # 1: property name + keys_modified = [] + + for name, record in def_records.items(): + role = "Record" + # This allows us to create e.g. Files + if "role" in record: + role = record["role"] + + # whether the record already exists in the store or not are actually really + # different distinct cases for treating the setting and updating of variables: + if name not in records: + if role == "Record": + c_record = db.Record() + elif role == "File": + c_record = db.File() + else: + raise RuntimeError("Role {} not supported.".format(role)) + # add the new record to the record store: + records[name] = c_record + # additionally add the new record to the general store: + values[name] = c_record + + c_record = records[name] + + for key, value in record.items(): + if key == "parents" or key == "role": + continue + + # Allow replacing variables in keys / names of properties: + key_template = Template(key) + key = key_template.safe_substitute(**values.get_storage()) + + keys_modified.append((name, key)) + propvalue, collection_mode = handle_value(value, values) + + if key in SPECIAL_PROPERTIES: + # e.g. description, name, etc. + # list mode does not work for them + if key == "path" and not propvalue.startswith(os.path.sep): + propvalue = os.path.sep + propvalue + setattr(c_record, key, propvalue) + else: + + if c_record.get_property(key) is None: + + if collection_mode == "list": + c_record.add_property(name=key, value=[propvalue]) + elif (collection_mode == "multiproperty" or + collection_mode == "single"): + c_record.add_property(name=key, value=propvalue) + else: + if collection_mode == "list": + c_record.get_property(key).value.append(propvalue) + elif collection_mode == "multiproperty": + c_record.add_property(name=key, value=propvalue) + elif collection_mode == "single": + c_record.get_property(key).value = propvalue + + # no matter whether the record existed in the record store or not, + # parents will be added when they aren't present in the record yet: + if "parents" in record: + for parent in record["parents"]: + if not has_parent(c_record, parent): + c_record.add_parent(parent) + else: + # add the "fallback" parent only for Records, not for Files: + if role == "Record": + # if not has_parent(c_record, name): + if len(c_record.parents) == 0: + c_record.add_parent(name) + return keys_modified + + +class Converter(object): + """ + Converters treat StructureElements contained in the hierarchical sturcture. + """ + + def __init__(self, definition: dict, + name: str, + converter_registry: dict): + self.definition = definition + self.name = name + + # Used to store usage information for debugging: + self.metadata: dict[str, set[str]] = { + "usage": set() + } + + self.converters = [] + + if "subtree" in definition: + for converter_name in definition['subtree']: + converter_definition = definition["subtree"][converter_name] + self.converters.append(Converter.converter_factory( + converter_definition, converter_name, converter_registry)) + + @staticmethod + def converter_factory(definition: dict, + name: str, + converter_registry: dict): + + if "type" not in definition: + raise RuntimeError( + "Type is mandatory for converter entries in CFood definition.") + + if definition["type"] not in converter_registry: + raise RuntimeError("Unknown Type: {}".format(definition["type"])) + + if "class" not in converter_registry[definition["type"]]: + raise RuntimeError("Converter class not loaded correctly.") + + # instatiates an object of the required class, e.g. DirectoryConverter(definition, name) + converter = converter_registry[definition["type"]]["class"](definition, name, + converter_registry) + + return converter + + def create_values(self, + values: GeneralStore, + element: StructureElement): + """ + Extract information from the structure element and store them as values in the + general store. + + values: The GeneralStore to store values in. + element: The StructureElement to extract values from. + """ + m = self.match(element) + if m is None: + # this should never happen as the condition was checked before already + raise RuntimeError("Condition does not match.") + values.update(m) + + @abstractmethod + def create_children(self, values: GeneralStore, + element: StructureElement): + pass + + def create_records(self, values: GeneralStore, + records: RecordStore, + element: StructureElement): + + if "records" not in self.definition: + return [] + + return create_records(values, + records, + self.definition["records"]) + + @abstractmethod + def typecheck(self, element: StructureElement): + pass + + @abstractmethod + def match(self, element: StructureElement) -> Optional[dict]: + pass + + +class DirectoryConverter(Converter): + + def __init__(self, definition: dict, name: str, + converter_registry: dict): + """ + Initialize a new directory converter. + """ + super().__init__(definition, name, converter_registry) + + def create_children(self, generalStore: GeneralStore, + element: StructureElement): + if not isinstance(element, Directory): + raise RuntimeError( + "Directory converters can only create children from directories.") + + return self.create_children_from_directory(element) + + def typecheck(self, element: StructureElement): + return isinstance(element, Directory) + + def match(self, element: StructureElement): + if not isinstance(element, Directory): + raise RuntimeError("Element must be a directory.") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + return m.groupdict() + + @staticmethod + def create_children_from_directory(element: Directory): + """ + Creates a list of files (of type File) and directories (of type Directory) for a + given directory. No recursion. + + element: A directory (of type Directory) which will be traversed. + """ + children: list[StructureElement] = [] + + for name in sorted(os.listdir(element.path)): + path = os.path.join(element.path, name) + + if os.path.isdir(path): + children.append(Directory(name, path)) + elif os.path.isfile(path): + children.append(File(name, path)) + + return children + + +class SimpleFileConverter(Converter): + """ + Just a file, ignore the contents. + """ + + def typecheck(self, element: StructureElement): + return isinstance(element, File) + + def create_children(self, generalStore: GeneralStore, + element: StructureElement): + return list() + + def match(self, element: StructureElement): + if not isinstance(element, File): + raise RuntimeError("Element must be a file.") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + return m.groupdict() + + +class MarkdownFileConverter(Converter): + def __init__(self, definition: dict, name: str, + converter_registry: dict): + """ + Initialize a new directory converter. + """ + super().__init__(definition, name, converter_registry) + + def create_children(self, generalStore: GeneralStore, + element: StructureElement): + if not isinstance(element, File): + raise RuntimeError("A markdown file is needed to create children.") + + header = yaml_header_tools.get_header_from_file( + element.path, clean=False) + children: list[StructureElement] = [] + + for name, entry in header.items(): + if type(entry) == list: + children.append(DictListElement(name, entry)) + elif type(entry) == str: + children.append(DictTextElement(name, entry)) + else: + raise RuntimeError( + "Header entry {} has incompatible type.".format(name)) + return children + + def typecheck(self, element: StructureElement): + return isinstance(element, File) + + def match(self, element: StructureElement): + if not isinstance(element, File): + raise RuntimeError("Element must be a file.") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + try: + yaml_header_tools.get_header_from_file(element.path) + except yaml_header_tools.NoValidHeader: + # TODO(salexan): Raise a validation error instead of just not + # matching silently. + return None + return m.groupdict() + + +class DictConverter(Converter): + # TODO use Dict as typecheck? + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A dict is needed to create children") + + return self._create_children_from_dict(element.value) + + def _create_children_from_dict(self, data): + children = [] + + for name, value in data.items(): + if type(value) == list: + children.append(DictListElement(name, value)) + elif type(value) == str: + children.append(DictTextElement(name, value)) + elif type(value) == dict: + children.append(DictDictElement(name, value)) + elif type(value) == int: + children.append(DictIntegerElement(name, value)) + elif type(value) == bool: + children.append(DictBooleanElement(name, value)) + elif type(value) == float: + children.append(DictFloatElement(name, value)) + else: + children.append(DictElement(name, value)) + warnings.warn(f"The value in the dict for key:{name} has an unknown type. " + "The fallback type DictElement is used.") + + return children + + # TODO use Dict as typecheck? + def typecheck(self, element: StructureElement): + return isinstance(element, Dict) + + def match(self, element: StructureElement): + """ + Allways matches if the element has the right type. + """ + if not isinstance(element, Dict): + raise RuntimeError("Element must be a DictElement.") + return {} + + +class FileConverter(Converter): + def typecheck(self, element: StructureElement): + return isinstance(element, File) + + def match(self, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("Element must be a file") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + return m.groupdict() + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + return [] + + +class JSONFileConverter(DictConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, File) + + def match(self, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("Element must be a file") + m = re.match(self.definition["match"], element.name) + if m is None: + return None + return m.groupdict() + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A JSON file is needed to create children") + with open(element.path, 'r') as json_file: + json_data = json.load(json_file) + if not isinstance(json_data, dict): + raise NotImplementedError("JSON file must contain a dict") + if "validate" in self.definition and self.definition["validate"]: + if isinstance(self.definition["validate"], dict): + schema = self.definition["validate"] + elif isinstance(self.definition["validate"], str): + + with open(self.definition["validate"], 'r') as json_file: + schema = json.load(json_file) + else: + raise ValueError("The value of 'validate' has to be a string describing the path " + "to the json schema file (relative to the cfood yml) " + "or a dict containing the schema.") + # Validate the json content + try: + validate(instance=json_data, schema=schema) + except ValidationError as err: + raise ConverterValidationError( + f"Couldn't validate {json_data}:\n{err.message}") + + return self._create_children_from_dict(json_data) + + +class _AbstractDictElementConverter(Converter): + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + return [] + + def typecheck(self, element: StructureElement): + return True + + def match(self, element: StructureElement): + """ + Try to match the given structure element. + + If it does not match, return None. + + Else return a dictionary containing the variables from the matched regexp + as key value pairs. + """ + if not self.typecheck(element): + raise RuntimeError( + f"Element has an invalid type: {type(element)}.") + m1 = re.match(self.definition["match_name"], element.name) + if m1 is None: + return None + m2 = re.match(self.definition["match_value"], str(element.value)) + if m2 is None: + return None + values = dict() + values.update(m1.groupdict()) + values.update(m2.groupdict()) + return values + + +class DictBooleanElementConverter(_AbstractDictElementConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, DictBooleanElement) + + +class DictFloatElementConverter(_AbstractDictElementConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, DictFloatElement) + + +class DictTextElementConverter(_AbstractDictElementConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, DictTextElement) + + +class DictIntegerElementConverter(_AbstractDictElementConverter): + def typecheck(self, element: StructureElement): + return isinstance(element, DictIntegerElement) + + +class DictListElementConverter(Converter): + def create_children(self, generalStore: GeneralStore, + element: StructureElement): + if not isinstance(element, DictListElement): + raise RuntimeError( + "This converter can only process DictListElements.") + children = [] + for index, list_element in enumerate(element.value): + # TODO(fspreck): Refactor this and merge with DictXXXElements maybe? + if isinstance(list_element, str): + children.append(TextElement(str(index), list_element)) + elif isinstance(list_element, dict): + children.append(Dict(str(index), list_element)) + else: + raise NotImplementedError( + f"Unkown type {type(list_element)} in list element {list_element}.") + return children + + def typecheck(self, element: StructureElement): + return isinstance(element, DictListElement) + + def match(self, element: StructureElement): + if not isinstance(element, DictListElement): + raise RuntimeError("Element must be a DictListElement.") + m = re.match(self.definition["match_name"], element.name) + if m is None: + return None + if "match" in self.definition: + raise NotImplementedError( + "Match is not implemented for DictListElement.") + return m.groupdict() + + +class DictDictElementConverter(DictConverter): + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("A dict is needed to create children") + + return self._create_children_from_dict(element.value) + + def typecheck(self, element: StructureElement): + return isinstance(element, DictDictElement) + + def match(self, element: StructureElement): + if not self.typecheck(element): + raise RuntimeError("Element must be a DictDictElement.") + m = re.match(self.definition["match_name"], element.name) + if m is None: + return None + if "match" in self.definition: + raise NotImplementedError( + "Match is not implemented for DictDictElement.") + return m.groupdict() + + +class TextElementConverter(Converter): + def create_children(self, generalStore: GeneralStore, + element: StructureElement): + return [] + + def typecheck(self, element: StructureElement): + return isinstance(element, TextElement) + + def match(self, element: StructureElement): + if not isinstance(element, TextElement): + raise RuntimeError("Element must be a TextElement.") + m = re.match(self.definition["match"], element.value) + if m is None: + return None + return m.groupdict() diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py new file mode 100644 index 0000000000000000000000000000000000000000..0e95523a52c6e9b2212be0c4c911913d26bcdb91 --- /dev/null +++ b/src/caoscrawler/crawl.py @@ -0,0 +1,1052 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Henrik tom Wörden +# 2021 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +Crawl a file structure using a yaml cfood definition and synchronize +the acuired data with CaosDB. +""" + +import sys +import os +import yaml +from importlib_resources import files +import argparse +from argparse import RawTextHelpFormatter +import caosdb as db +from caosdb.common.datatype import is_reference +from .stores import GeneralStore, RecordStore +from .identified_cache import IdentifiedCache +from .structure_elements import StructureElement, Directory +from .converters import Converter, DirectoryConverter +from .identifiable_adapters import (IdentifiableAdapter, + LocalStorageIdentifiableAdapter, + CaosDBIdentifiableAdapter) +from collections import defaultdict +from typing import Union, Any, Optional, Type +from caosdb.apiutils import compare_entities, merge_entities +from copy import deepcopy +from jsonschema import validate + + +import importlib + +SPECIAL_PROPERTIES_STRICT = ("description", "name", "id", "path") +SPECIAL_PROPERTIES_NOT_STRICT = ("file", "checksum", "size") + + +def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False): + """ + This function uses compare_entities to check whether to entities are identical + in a quite complex fashion: + - If one of the entities has additional parents or additional properties -> not identical + - If the value of one of the properties differs -> not identical + - If datatype, importance or unit are reported different for a property by compare_entities + return "not_identical" only if these attributes are set explicitely by record1. + Ignore the difference otherwise. + - If description, name, id or path appear in list of differences -> not identical. + - If file, checksum, size appear -> Only different, if explicitely set by record1. + + record1 serves as the reference, so datatype, importance and unit checks are carried + out using the attributes from record1. In that respect, the function is not symmetrical + in its arguments. + """ + comp = compare_entities(record1, record2) + + if ignore_id: + if "id" in comp[0]: + del comp[0]["id"] + if "id" in comp[1]: + del comp[1]["id"] + + for j in range(2): + for label in ("parents", ): + if len(comp[j][label]) > 0: + return False + for special_property in SPECIAL_PROPERTIES_STRICT: + if special_property in comp[0] or special_property in comp[1]: + return False + + for special_property in SPECIAL_PROPERTIES_NOT_STRICT: + if special_property in comp[0]: + attr_val = comp[0][special_property] + other_attr_val = (comp[1][special_property] + if special_property in comp[1] else None) + if attr_val is not None and attr_val != other_attr_val: + return False + + for key in comp[0]["properties"]: + if len(comp[0]["properties"][key]) == 0: + # This is a new property + return False + for attribute in ("datatype", "importance", "unit"): + # only make an update for those attributes if there is a value difference and + # the value in the target_data is not None + if attribute in comp[0]["properties"][key]: + attr_val = comp[0]["properties"][key][attribute] + other_attr_val = (comp[1]["properties"][key][attribute] + if attribute in comp[1]["properties"][key] else None) + if attr_val is not None and attr_val != other_attr_val: + return False + + if "value" in comp[0]["properties"][key]: + return False + + # Check for removed properties: + for key in comp[1]["properties"]: + if len(comp[1]["properties"][key]) == 0: + # This is a removed property + return False + + return True + + +def _resolve_datatype(prop: db.Property, remote_entity: db.Entity): + """ sets the datatype on the given property (side effect) """ + + if remote_entity.role == "Property": + datatype = remote_entity.datatype + elif remote_entity.role == "RecordType": + datatype = remote_entity.name + else: + raise RuntimeError("Cannot set datatype.") + + # Treat lists separately + if isinstance(prop.value, list) and not datatype.startswith("LIST"): + datatype = db.LIST(datatype) + + prop.datatype = datatype + return prop + + +class Crawler(object): + """ + Crawler class that encapsulates crawling functions. + Furthermore it keeps track of the storage for records (record store) and the + storage for values (general store). + """ + + def __init__(self, converters: list[Converter] = [], + generalStore: Optional[GeneralStore] = None, + debug: bool = False, + identifiableAdapter: IdentifiableAdapter = None): + """ + Create a new crawler and initialize an empty RecordStore and GeneralStore. + + converters: The set of converters used for this crawler. + recordStore: An initial GeneralStore which might store e.g. environment variables. + + debug: Create a debugging information tree when set to True. + The debugging information tree is a variable stored in + self.debug_tree. It is a dictionary mapping directory entries + to a tuple of general stores and record stores which are valid for the directory scope. + Furthermore, it is stored in a second tree named self.debug_copied whether the + objects in debug_tree had been copied from a higher level in the hierarchy + of the structureelements. + """ + + # TODO: check if this feature is really needed + self.global_converters = converters + + self.identified_cache = IdentifiedCache() + self.recordStore = RecordStore() + + self.generalStore = generalStore + if generalStore is None: + self.generalStore = GeneralStore() + + self.identifiableAdapter = identifiableAdapter + if identifiableAdapter is None: + self.identifiableAdapter = LocalStorageIdentifiableAdapter() + + self.debug = debug + if self.debug: + # order in the tuple: + # 0: generalStore + # 1: recordStore + self.debug_tree: dict[str, tuple] = dict() + self.debug_metadata: dict[str, dict] = dict() + self.debug_metadata["copied"] = dict() + self.debug_metadata["provenance"] = defaultdict(lambda: dict()) + self.debug_metadata["usage"] = defaultdict(lambda: set()) + + def load_definition(self, crawler_definition_path: str): + """ + Load a cfood from a crawler definition defined by + crawler definition path and validate it using cfood-schema.yml. + """ + + # Load the cfood from a yaml file: + with open(crawler_definition_path, "r") as f: + crawler_definition = yaml.safe_load(f) + + # TODO: at this point this function can already load the cfood schema extensions + # from the crawler definition and add them to the yaml schema that will be + # tested in the next lines of code: + + # Load the cfood schema: + with open(files('caoscrawler').joinpath('cfood-schema.yml'), "r") as f: + schema = yaml.safe_load(f) + + # Add custom converters to converter enum in schema: + if "Converters" in crawler_definition: + for key in crawler_definition["Converters"]: + schema["cfood"]["$defs"]["converter"]["properties"]["type"]["enum"].append( + key) + + # Validate the cfood schema: + validate(instance=crawler_definition, schema=schema["cfood"]) + + return self._resolve_validator_paths(crawler_definition, crawler_definition_path) + + def _resolve_validator_paths(self, definition: dict, definition_path: str): + """Resolve path to validation files with respect to the file in which + the crawler was defined. + + """ + + for key, value in definition.items(): + + if key == "validate" and isinstance(value, str): + # Validator is given by a path + if not value.startswith('/'): + # Not an absolute path + definition[key] = os.path.join( + os.path.dirname(definition_path), value) + if not os.path.isfile(definition[key]): + raise FileNotFoundError( + f"Couldn't find validation file {definition[key]}") + elif isinstance(value, dict): + # Recursively resolve all validators + definition[key] = self._resolve_validator_paths( + value, definition_path) + + return definition + + def load_converters(self, definition: dict): + """ + Currently the converter registry is a dictionary containing for each converter: + - key is the short code, abbreviation for the converter class name + - module is the name of the module to be imported which must be installed + - class is the converter class to load and associate with this converter entry + + all other info for the converter needs to be included in the converter plugin + directory: + schema.yml file + README.md documentation + """ + + # Defaults for the converter registry: + converter_registry: dict[str, dict[str, str]] = { + "Directory": { + "converter": "DirectoryConverter", + "package": "caoscrawler.converters"}, + "SimpleFile": { + "converter": "SimpleFileConverter", + "package": "caoscrawler.converters"}, + "MarkdownFile": { + "converter": "MarkdownFileConverter", + "package": "caoscrawler.converters"}, + "File": { + "converter": "FileConverter", + "package": "caoscrawler.converters"}, + "JSONFile": { + "converter": "JSONFileConverter", + "package": "caoscrawler.converters"}, + "Dict": { + "converter": "DictConverter", + "package": "caoscrawler.converters"}, + "DictBooleanElement": { + "converter": "DictBooleanElementConverter", + "package": "caoscrawler.converters"}, + "DictFloatElement": { + "converter": "DictFloatElementConverter", + "package": "caoscrawler.converters"}, + "DictTextElement": { + "converter": "DictTextElementConverter", + "package": "caoscrawler.converters"}, + "DictIntegerElement": { + "converter": "DictIntegerElementConverter", + "package": "caoscrawler.converters"}, + "DictListElement": { + "converter": "DictListElementConverter", + "package": "caoscrawler.converters"}, + "DictDictElement": { + "converter": "DictDictElementConverter", + "package": "caoscrawler.converters"}, + "TextElement": { + "converter": "TextElementConverter", + "package": "caoscrawler.converters"} + } + + # More converters from definition file: + if "Converters" in definition: + for key, entry in definition["Converters"].items(): + converter_registry[key] = { + "converter": entry["converter"], + "package": entry["package"] + } + + # Load modules and associate classes: + for key, value in converter_registry.items(): + module = importlib.import_module(value["package"]) + value["class"] = getattr(module, value["converter"]) + return converter_registry + + def crawl_directory(self, dirname: str, crawler_definition_path: str): + """ Crawl a single directory. + + Convenience function that starts the crawler (calls start_crawling) + with a single directory as the StructureElement. + """ + + crawler_definition = self.load_definition(crawler_definition_path) + # Load and register converter packages: + converter_registry = self.load_converters(crawler_definition) + + if not dirname: + raise ValueError( + "You have to provide a non-empty path for crawling.") + dir_structure_name = os.path.basename(dirname) + if not dir_structure_name and dirname.endswith('/'): + if dirname == '/': + # Crawling the entire file system + dir_structure_name = "root" + else: + # dirname had a trailing '/' + dir_structure_name = os.path.basename(dirname[:-1]) + + self.start_crawling(Directory(dir_structure_name, + dirname), + crawler_definition, + converter_registry) + + @staticmethod + def create_local_converters(crawler_definition: dict, + converter_registry: dict): + local_converters = [] + + for key, value in crawler_definition.items(): + # Definitions and Converters are reserved keywords + # on the top level of the yaml file. + # TODO: there should also be a top level keyword for the actual + # CFood to avoid confusion between top level keywords + # and the CFood. + if key == "Definitions": + continue + elif key == "Converters": + continue + local_converters.append(Converter.converter_factory( + value, key, converter_registry)) + + return local_converters + + def start_crawling(self, items: Union[list[StructureElement], StructureElement], + crawler_definition: dict, + converter_registry: dict): + """ + Start point of the crawler recursion. + + Parameters + ---------- + items: list + A list of structure elements (or a single StructureElement) that is used for + generating the initial items for the crawler. This could e.g. be a Directory. + crawler_definition : dict + A dictionary representing the crawler definition, possibly from a yaml + file. + + Returns + ------- + target_data : list + the final list with the target state of Records. + """ + + # This function builds the tree of converters out of the crawler definition. + + if self.generalStore is None: + raise RuntimeError("Should not happen.") + + if not isinstance(items, list): + items = [items] + + local_converters = Crawler.create_local_converters(crawler_definition, + converter_registry) + # This recursive crawling procedure generates the update list: + self.target_data: list[db.Record] = [] + self._crawl(items, + self.global_converters, local_converters, self.generalStore, self.recordStore, + [], []) + + if self.debug: + self.debug_converters = self.global_converters + local_converters + + return self.target_data + + def synchronize(self, commit_changes: bool = True): + """ + Carry out the actual synchronization. + """ + + # After the crawling, the actual synchronization with the database, based on the + # update list is carried out: + + return self._synchronize(self.target_data, commit_changes) + + def can_be_checked_externally(self, record: db.Record): + """ + Returns False if there is at least one property in record which: + a) is a reference property AND + b) where the value is set to a db.Entity (instead of an ID) AND + c) where the ID of the value is not set (to an integer) + + Returns True otherwise. + """ + identifiable = self.identifiableAdapter.get_identifiable(record) + for p in identifiable.properties: + + if isinstance(p.value, list): + for el in p.value: + if isinstance(el, db.Entity) and el.id is None: + return False + # TODO: please check! + # I removed the condition "is_reference", because the datatype field + # that is checked within this function is not always present for references + # parsed from the file structure. We have to rely on the condition, that + # if a property value is of type entity, it can be assumed to be a reference. + # elif (is_reference(p) and isinstance(p.value, db.Entity) + # and p.value.id is None): + elif isinstance(p.value, db.Entity) and p.value.id is None: + return False + return True + + def create_flat_list(self, ent_list: list[db.Entity], flat: list[db.Entity]): + """ + Recursively adds all properties contained in entities from ent_list to + the output list flat. Each element will only be added once to the list. + + TODO: This function will be moved to pylib as it is also needed by the + high level API. + """ + for ent in ent_list: + for p in ent.properties: + # For lists append each element that is of type Entity to flat: + if isinstance(p.value, list): + for el in p.value: + if isinstance(el, db.Entity): + if el not in flat: + flat.append(el) + # TODO: move inside if block? + self.create_flat_list([el], flat) + elif isinstance(p.value, db.Entity): + if p.value not in flat: + flat.append(p.value) + # TODO: move inside if block? + self.create_flat_list([p.value], flat) + + def all_references_are_existing_already(self, record: db.Record): + """ + returns true if all references either have IDs or were checked remotely and not found (i.e. + they exist in the local cache) + """ + identifiable = self.identifiableAdapter.get_identifiable(record) + for p in identifiable.properties: + # if (is_reference(p) + # Entity instead of ID and not cached locally + if (isinstance(p.value, list)): + for el in p.value: + if (isinstance(el, db.Entity) and el.id is None + and self.get_identified_record_from_local_cache(el) is None): + return False + if (isinstance(p.value, db.Entity) and p.value.id is None + and self.get_identified_record_from_local_cache(p.value) is None): + # might be checked when reference is resolved + return False + return True + + def replace_references_with_cached(self, record: db.Record): + """ + Replace all references with the versions stored in the cache. + + If the cache version is not identical, raise an error. + """ + for p in record.properties: + if (isinstance(p.value, list)): + lst = [] + for el in p.value: + if (isinstance(el, db.Entity) and el.id is None): + cached = self.get_identified_record_from_local_cache( + el) + if cached is None: + raise RuntimeError("Not in cache.") + if not check_identical(cached, el, True): + if isinstance(p.value, db.File): + if p.value.path != cached.path: + raise RuntimeError("Not identical.") + else: + raise RuntimeError("Not identical.") + lst.append(cached) + else: + lst.append(el) + p.value = lst + if (isinstance(p.value, db.Entity) and p.value.id is None): + cached = self.get_identified_record_from_local_cache(p.value) + if cached is None: + raise RuntimeError("Not in cache.") + if not check_identical(cached, p.value, True): + if isinstance(p.value, db.File): + if p.value.path != cached.path: + raise RuntimeError("Not identical.") + else: + raise RuntimeError("Not identical.") + p.value = cached + + def get_identified_record_from_local_cache(self, record: db.Record): + """ + returns the identifiable if an identifiable with the same values already exists locally + (Each identifiable that is not found on the remote server, is 'cached' locally to prevent + that the same identifiable exists twice) + """ + if self.identifiableAdapter is None: + raise RuntimeError("Should not happen.") + identifiable = self.identifiableAdapter.get_identifiable(record) + if identifiable is None: + # TODO: check whether the same idea as below works here + identifiable = record + # return None + + if identifiable in self.identified_cache: + return self.identified_cache[identifiable] + else: + return None + + def add_identified_record_to_local_cache(self, record: db.Record): + """ + adds the given identifiable to the local cache + + No identifiable with the same values must exist locally. + (Each identifiable that is not found on the remote server, is 'cached' locally to prevent + that the same identifiable exists twice) + + Return False if there is no identifiable for this record and True otherwise. + """ + if self.identifiableAdapter is None: + raise RuntimeError("Should not happen.") + identifiable = self.identifiableAdapter.get_identifiable(record) + if identifiable is None: + # TODO: this error report is bad + # we need appropriate handling for records without an identifiable + # or at least a simple fallback definition if tehre is no identifiable. + + # print(record) + # raise RuntimeError("No identifiable for record.") + + # TODO: check whether that holds: + # if there is no identifiable, for the cache that is the same + # as if the complete entity is the identifiable: + identifiable = record + self.identified_cache.add(identifiable=identifiable, record=record) + + def copy_attributes(self, fro: db.Entity, to: db.Entity): + """ + Copy all attributes from one entity to another entity. + """ + + merge_entities(to, fro) + + def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): + if self.identifiableAdapter is None: + raise RuntimeError("Should not happen.") + to_be_inserted: list[db.Entity] = [] + to_be_updated: list[db.Entity] = [] + flat = list(ent_list) + # assure all entities are direct members TODO Can this be removed at some point?Check only? + self.create_flat_list(ent_list, flat) + + # TODO: can the following be removed at some point + for ent in flat: + if ent.role == "Record" and len(ent.parents) == 0: + raise RuntimeError("Records must have a parent.") + + resolved_references = True + # flat contains Entities which could not yet be checked against the remote server + while resolved_references and len(flat) > 0: + resolved_references = False + + for i in reversed(range(len(flat))): + record = flat[i] + + # TODO remove if the exception is never raised + if (record.id is not None or record in to_be_inserted): + raise RuntimeError("This should not be reached since treated elements" + "are removed from the list") + # Check the local cache first for duplicate + elif self.get_identified_record_from_local_cache(record) is not None: + + # This record is a duplicate that can be removed. Make sure we do not lose + # information + # Update an (local) identified record that will be inserted + newrecord = self.get_identified_record_from_local_cache( + record) + self.copy_attributes(fro=record, to=newrecord) + # Bend references to the other object + # TODO refactor this + for el in flat + to_be_inserted + to_be_updated: + for p in el.properties: + if isinstance(p.value, list): + for index, val in enumerate(p.value): + if val is record: + p.value[index] = newrecord + else: + if p.value is record: + p.value = newrecord + + del flat[i] + + # all references need to be IDs that exist on the remote server + elif self.can_be_checked_externally(record): + + # Check remotely + # TODO: remove deepcopy? + identified_record = self.identifiableAdapter.retrieve_identified_record_for_record( + deepcopy(record)) + if identified_record is None: + # identifiable does not exist remotely + to_be_inserted.append(record) + self.add_identified_record_to_local_cache(record) + del flat[i] + else: + # side effect + record.id = identified_record.id + # On update every property needs to have an ID. + # This will be achieved by the function execute_updates_in_list below. + # For files this is not enough, we also need to copy over + # checksum and size: + if isinstance(record, db.File): + record._size = identified_record._size + record._checksum = identified_record._checksum + + to_be_updated.append(record) + # TODO think this through + self.add_identified_record_to_local_cache(record) + del flat[i] + resolved_references = True + + # e.g. references an identifiable that does not exist remotely + elif self.all_references_are_existing_already(record): + + # TODO: (for review) + # This was the old version, but also for this case the + # check for identifiables has to be done. + # to_be_inserted.append(record) + # self.add_identified_record_to_local_cache(record) + # del flat[i] + + # TODO: (for review) + # If the following replacement is not done, the cache will + # be invalid as soon as references are resolved. + # replace references by versions from cache: + self.replace_references_with_cached(record) + + identified_record = self.identifiableAdapter.retrieve_identified_record_for_record( + deepcopy(record)) + if identified_record is None: + # identifiable does not exist remotely + to_be_inserted.append(record) + self.add_identified_record_to_local_cache(record) + del flat[i] + else: + # side effect + record.id = identified_record.id + # On update every property needs to have an ID. + # This will be achieved by the function execute_updates_in_list below. + + to_be_updated.append(record) + # TODO think this through + self.add_identified_record_to_local_cache(record) + del flat[i] + + resolved_references = True + + if len(flat) > 0: + raise RuntimeError( + "Could not resolve all Entity references. Circular Dependency?") + + return to_be_inserted, to_be_updated + + def replace_entities_with_ids(self, rec: db.Record): + for el in rec.properties: + if isinstance(el.value, db.Entity): + if el.value.id is not None: + el.value = el.value.id + elif isinstance(el.value, list): + for index, val in enumerate(el.value): + if isinstance(val, db.Entity): + if val.id is not None: + el.value[index] = val.id + + @staticmethod + def remove_unnecessary_updates(target_data: list[db.Record], + identified_records: list[db.Record]): + """ + checks whether all relevant attributes (especially Property values) are equal + + Returns (in future) + ------- + update list without unecessary updates + + """ + if len(target_data) != len(identified_records): + raise RuntimeError("The lists of updates and of identified records need to be of the " + "same length!") + # TODO this can now easily be changed to a function without side effect + for i in reversed(range(len(target_data))): + identical = check_identical(target_data[i], identified_records[i]) + + if identical: + del target_data[i] + continue + else: + pass + + @staticmethod + def execute_inserts_in_list(to_be_inserted): + for record in to_be_inserted: + for prop in record.properties: + entity = db.Entity(name=prop.name).retrieve() + _resolve_datatype(prop, entity) + print("INSERT") + print(to_be_inserted) + if len(to_be_inserted) > 0: + db.Container().extend(to_be_inserted).insert() + + @staticmethod + def execute_updates_in_list(to_be_updated): + # retrieve ids of properties when missing: + for record in to_be_updated: + for parent in record.parents: + if parent.id is None: + parent.id = db.Entity(name=parent.name).retrieve().id + for prop in record.properties: + if prop.id is None: + entity = db.Entity(name=prop.name).retrieve() + prop.id = entity.id + _resolve_datatype(prop, entity) + print("UPDATE") + print(to_be_updated) + if len(to_be_updated) > 0: + db.Container().extend(to_be_updated).update() + + def _synchronize(self, target_data: list[db.Record], commit_changes: bool = True): + """ + This function applies several stages: + 1) Retrieve identifiables for all records in target_data. + 2) Compare target_data with existing records. + 3) Insert and update records based on the set of identified differences. + + This function makes use of an IdentifiableAdapter which is used to retrieve + register and retrieve identifiables. + + if commit_changes is True, the changes are synchronized to the CaosDB server. + For debugging in can be useful to set this to False. + + Return the final to_be_inserted and to_be_updated as tuple. + """ + + if self.identifiableAdapter is None: + raise RuntimeError("Should not happen.") + + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(target_data) + + # TODO: refactoring of typo + for el in to_be_updated: + # all entity objects are replaced by their IDs except for the not yet inserted ones + self.replace_entities_with_ids(el) + + identified_records = [ + self.identifiableAdapter.retrieve_identified_record_for_record(record) + for record in to_be_updated] + # remove unnecessary updates from list by comparing the target records to the existing ones + self.remove_unnecessary_updates(to_be_updated, identified_records) + + if commit_changes: + self.execute_inserts_in_list(to_be_inserted) + self.execute_updates_in_list(to_be_updated) + + return (to_be_inserted, to_be_updated) + + @staticmethod + def debug_build_usage_tree(converter: Converter): + res: dict[str, dict[str, Any]] = { + converter.name: { + "usage": ", ".join(converter.metadata["usage"]), + "subtree": {} + } + } + + for subconv in converter.converters: + d = Crawler.debug_build_usage_tree(subconv) + k = list(d.keys()) + if len(k) != 1: + raise RuntimeError( + "Unkonwn error during building of usage tree.") + res[converter.name]["subtree"][k[0]] = d[k[0]] + return res + + def save_debug_data(self, filename: str): + paths: dict[str, Union[dict, list]] = dict() + + def flatten_debug_info(key): + mod_info = self.debug_metadata[key] + paths[key] = dict() + for record_name in mod_info: + if key == "provenance": + paths[key][record_name] = dict() + for prop_name in mod_info[record_name]: + paths[key][record_name][prop_name] = { + "structure_elements_path": "/".join( + mod_info[record_name][prop_name][0]), + "converters_path": "/".join( + mod_info[record_name][prop_name][1])} + elif key == "usage": + paths[key][record_name] = ", ".join(mod_info[record_name]) + for key in ("provenance", "usage"): + flatten_debug_info(key) + + paths["converters_usage"] = [self.debug_build_usage_tree( + cv) for cv in self.debug_converters] + + with open(filename, "w") as f: + f.write(yaml.dump(paths, sort_keys=False)) + + def _crawl(self, items: list[StructureElement], + global_converters: list[Converter], + local_converters: list[Converter], + generalStore: GeneralStore, + recordStore: RecordStore, + structure_elements_path: list[str], converters_path: list[str]): + """ + Crawl a list of StructureElements and apply any matching converters. + + items: structure_elements (e.g. files and folders on one level on the hierarchy) + global_converters and local_converters: globally or locally defined converters for + treating structure elements. A locally defined converter could be + one that is only valid for a specific subtree of the originally + cralwed StructureElement structure. + generalStore and recordStore: This recursion of the crawl function should only operate on copies of the + global stores of the Crawler object. + """ + for element in items: + for converter in global_converters + local_converters: + # type is something like "matches files", replace isinstance with "type_matches" + # match function tests regexp for example + if (converter.typecheck(element) and + converter.match(element) is not None): + generalStore_copy = generalStore.create_scoped_copy() + recordStore_copy = recordStore.create_scoped_copy() + + # Create an entry for this matched structure element: + generalStore_copy[converter.name] = ( + os.path.join(*(structure_elements_path + [element.get_name()]))) + + # extracts values from structure element and stores them in the + # variable store + converter.create_values(generalStore_copy, element) + + keys_modified = converter.create_records( + generalStore_copy, recordStore_copy, element) + + children = converter.create_children( + generalStore_copy, element) + if self.debug: + # add provenance information for each varaible + self.debug_tree[str(element)] = ( + generalStore_copy.get_storage(), recordStore_copy.get_storage()) + self.debug_metadata["copied"][str(element)] = ( + generalStore_copy.get_dict_copied(), recordStore_copy.get_dict_copied()) + self.debug_metadata["usage"][str(element)].add( + "/".join(converters_path + [converter.name])) + mod_info = self.debug_metadata["provenance"] + for record_name, prop_name in keys_modified: + # TODO: check + internal_id = recordStore_copy.get_internal_id( + record_name) + record_identifier = record_name + \ + "_" + str(internal_id) + converter.metadata["usage"].add(record_identifier) + mod_info[record_identifier][prop_name] = (structure_elements_path + [element.get_name()], + converters_path + [converter.name]) + + self._crawl(children, global_converters, converter.converters, + generalStore_copy, recordStore_copy, + structure_elements_path + [element.get_name()], + converters_path + [converter.name]) + # if the crawler is running out of scope, copy all records in + # the recordStore, that were created in this scope + # to the general update container. + scoped_records = recordStore.get_records_current_scope() + for record in scoped_records: + self.target_data.append(record) + + # TODO: the scoped variables should be cleaned up as soon if the variables + # are no longer in the current scope. This can be implemented as follows, + # but this breaks the test "test_record_structure_generation", because + # some debug info is also deleted. This implementation can be used as soon + # as the remaining problems with the debug_tree are fixed. + # Delete the variables that are no longer needed: + # scoped_names = recordStore.get_names_current_scope() + # for name in scoped_names: + # del recordStore[name] + # del generalStore[name] + + return self.target_data + + +def main(crawled_directory_path: str, + cfood_file_name: str, + identifiables_definition_file: str = None, + debug: bool = False, + provenance_file: str = None, + dry_run: bool = False, + prefix: str = ""): + """ + + Parameters + ---------- + crawled_directory_path : str + path to be crawled + cfood_file_name : str + filename of the cfood to be used + identifiables_definition_file : str + filename of an identifiable definition yaml file + debug : bool + whether or not to run in debug mode + provenance_file : str + provenance information will be stored in a file with given filename + dry_run : bool + do not commit any chnages to the server + prefix : str + remove the given prefix from file paths + + Returns + ------- + return_value : int + 0 if successful + """ + crawler = Crawler(debug=debug) + crawler.crawl_directory(crawled_directory_path, cfood_file_name) + if provenance_file is not None: + crawler.save_debug_data(provenance_file) + + if identifiables_definition_file is not None: + + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition(identifiables_definition_file) + crawler.identifiableAdapter = ident + + if dry_run: + ins, upd = crawler.synchronize(commit_changes=False) + inserts = [str(i) for i in ins] + updates = [str(i) for i in upd] + with open("dry.yml", "w") as f: + f.write(yaml.dump({ + "insert": inserts, + "update": updates})) + else: + rtsfinder = dict() + for elem in crawler.target_data: + if isinstance(elem, db.File): + # correct the file path: + # elem.file = os.path.join(args.path, elem.file) + if elem.path.startswith(prefix): + elem.path = elem.path[len(prefix):] + elem.file = None + # TODO: as long as the new file backend is not finished + # we are using the loadFiles function to insert symlinks. + # Therefore, I am setting the files to None here. + # Otherwise, the symlinks in the database would be replaced + # by uploads of the files which we currently do not want to happen. + + # Check whether all needed RecordTypes exist: + if len(elem.parents) > 0: + for parent in elem.parents: + if parent.name in rtsfinder: + continue + + rt = db.RecordType(name=parent.name) + try: + rt.retrieve() + rtsfinder[parent.name] = True + except db.TransactionError: + rtsfinder[parent.name] = False + notfound = [k for k, v in rtsfinder.items() if not v] + if len(notfound) > 0: + raise RuntimeError("Missing RecordTypes: {}". + format(", ".join(notfound))) + + crawler.synchronize(commit_changes=True) + return 0 + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("cfood_file_name", + help="Path name of the cfood yaml file to be used.") + parser.add_argument("--provenance", required=False, + help="Path name of the provenance yaml file. " + "This file will only be generated if this option is set.") + parser.add_argument("--debug", required=False, action="store_true", + help="Path name of the cfood yaml file to be used.") + parser.add_argument("crawled_directory_path", + help="The subtree of files below the given path will " + "be considered. Use '/' for everything.") + + parser.add_argument("-n", "--dry-run", action="store_true", + help="Create two files dry.yml to show" + "what would actually be committed without doing the synchronization.") + + # TODO: load identifiables is a dirty implementation currently + parser.add_argument("-i", "--load-identifiables", + help="Load identifiables from " + "the given yaml file.") + + parser.add_argument("-p", "--prefix", + help="Remove the given prefix from the paths " + "of all file objects.") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main( + args.crawled_directory_path, + args.cfood_file_name, + args.load_identifiables, + args.debug, + args.provenance, + args.dry_run, + args.prefix + )) diff --git a/src/caoscrawler/extension-converters-config-schema.yml b/src/caoscrawler/extension-converters-config-schema.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/newcrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py similarity index 65% rename from src/newcrawler/identifiable_adapters.py rename to src/caoscrawler/identifiable_adapters.py index ff40a11f60ff6ad08d0b3137ae65dce9db7fa24d..358a843f44dbecbc1115b97f5ecaf5291257314e 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -23,16 +23,17 @@ # ** end header # +import yaml + from datetime import datetime import caosdb as db -from abc import abstractmethod -from .utils import has_parent -from caosdb.common.datatype import is_reference +from abc import abstractmethod, ABCMeta from .utils import has_parent def convert_value(value): - """ Returns a string representation of the value that is suitable to be used in the query + """ Returns a string representation of the value that is suitable + to be used in the query looking for the identified record. Parameters @@ -53,7 +54,7 @@ def convert_value(value): return str(value) -class IdentifiableAdapter(object): +class IdentifiableAdapter(metaclass=ABCMeta): """ Base class for identifiable adapters. @@ -90,7 +91,8 @@ class IdentifiableAdapter(object): """ if len(ident.parents) != 1: - raise RuntimeError("Multiple parents for identifiables not supported.") + raise RuntimeError( + "Multiple parents for identifiables not supported.") query_string = "FIND Record " + ident.get_parents()[0].name query_string += " WITH " @@ -100,13 +102,34 @@ class IdentifiableAdapter(object): "The identifiable must have features to identify it.") if ident.name is not None: - query_string += "name='{}' AND ".format(ident.name) + query_string += "name='{}'".format(ident.name) + if len(ident.get_properties()) > 0: + query_string += " AND " + + query_string += IdentifiableAdapter.create_property_query(ident) + return query_string - for p in ident.get_properties(): - if isinstance(p.value, list): + @staticmethod + def create_property_query(entity: db.Entity): + query_string = "" + for p in entity.get_properties(): + if p.value is None: + query_string += "'" + p.name + "' IS NULL AND " + elif isinstance(p.value, list): for v in p.value: query_string += ("'" + p.name + "'='" + convert_value(v) + "' AND ") + + # TODO: (for review) + # This code would allow for more complex identifiables with + # subproperties being checked directly. + # we currently do not need them and they could introduce + # problems in the local caching mechanism. + # However, it could be discussed to implement a similar mechanism. + # elif isinstance(p.value, db.Entity): + # query_string += ("'" + p.name + "' WITH (" + + # IdentifiableAdapter.create_property_query(p.value) + + # ") AND ") else: query_string += ("'" + p.name + "'='" + convert_value(p.value) + "' AND ") @@ -121,11 +144,38 @@ class IdentifiableAdapter(object): """ pass + @abstractmethod + def resolve_reference(self, record: db.Record): + pass + + @abstractmethod + def get_file(self, identifiable: db.File): + """ + Retrieve the file object for a (File) identifiable. + """ + pass + + def get_identifiable_for_file(self, record: db.File): + """ + Retrieve an identifiable for a file. + + Currently an identifiable for a file ist just a File object + with a specific path. In the future, this could be extended + to allow for names, parents and custom properties. + """ + identifiable = db.File() + identifiable.path = record.path + return identifiable + def get_identifiable(self, record: db.Record): """ retrieve the registred identifiable and fill the property values to create an identifiable """ + + if record.role == "File": + return self.get_identifiable_for_file(record) + registered_identifiable = self.get_registered_identifiable(record) if registered_identifiable is None: @@ -133,13 +183,17 @@ class IdentifiableAdapter(object): identifiable = db.Record(name=record.name) if len(registered_identifiable.parents) != 1: - raise RuntimeError("Multiple parents for identifiables not supported.") + raise RuntimeError("Multiple parents for identifiables" + "not supported.") identifiable.add_parent(registered_identifiable.parents[0]) property_name_list_A = [] property_name_list_B = [] # fill the values: for prop in registered_identifiable.properties: + if prop.name == "name": + # The name can be an identifiable, but it isn't a property + continue # problem: what happens with multi properties? # case A: in the registered identifiable # case B: in the identifiable @@ -147,6 +201,10 @@ class IdentifiableAdapter(object): if record.get_property(prop.name) is None: raise NotImplementedError() record_prop = record.get_property(prop.name) + if record_prop is None: + # TODO: how to handle missing values in identifiables + # raise an exception? + raise NotImplementedError() newval = record_prop.value record_prop_new = db.Property(name=record_prop.name, id=record_prop.id, @@ -168,18 +226,20 @@ class IdentifiableAdapter(object): return identifiable @abstractmethod - def retrieve_identified_record(self, identifiable: db.Record): + def retrieve_identified_record_for_identifiable(self, identifiable: db.Record): """ Retrieve identifiable record for a given identifiable. This function will return None if there is either no identifiable registered or no corresponding identified record in the database for a given record. + + Warning: this function is not expected to work correctly for file identifiables. """ pass - # TODO: the name is confusing. it returns the identified record # TODO: remove side effect - def retrieve_identifiable(self, record: db.Record): + # TODO: use ID if record has one? + def retrieve_identified_record_for_record(self, record: db.Record): """ This function combines all functionality of the IdentifierAdapter by returning the identifiable after having checked for an appropriate @@ -193,7 +253,10 @@ class IdentifiableAdapter(object): if identifiable is None: return None - return self.retrieve_identified_record(identifiable) + if identifiable.role == "File": + return self.get_file(identifiable) + + return self.retrieve_identified_record_for_identifiable(identifiable) class LocalStorageIdentifiableAdapter(IdentifiableAdapter): @@ -211,9 +274,24 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): def get_records(self): return self._records + def get_file(self, identifiable: db.File): + """ + Just look in records for a file with the same path. + """ + candidates = [] + for record in self._records: + if record.role == "File" and record.path == identifiable.path: + candidates.append(record) + if len(candidates) > 1: + raise RuntimeError("Identifiable was not defined unambigiously.") + if len(candidates) == 0: + return None + return candidates[0] + def store_state(self, filename): with open(filename, "w") as f: - f.write(db.common.utils.xml2str(db.Container().extend(self._records).to_xml())) + f.write(db.common.utils.xml2str( + db.Container().extend(self._records).to_xml())) def restore_state(self, filename): with open(filename, "r") as f: @@ -232,7 +310,8 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): Return True in that case and False otherwise. """ if len(registered_identifiable.parents) != 1: - raise RuntimeError("Multiple parents for identifiables not supported.") + raise RuntimeError( + "Multiple parents for identifiables not supported.") if not has_parent(record, registered_identifiable.parents[0].name): return False @@ -248,7 +327,8 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): if self.is_identifiable_for_record(definition, record): identifiable_candidates.append(definition) if len(identifiable_candidates) > 1: - raise RuntimeError("Multiple candidates for an identifiable found.") + raise RuntimeError( + "Multiple candidates for an identifiable found.") if len(identifiable_candidates) == 0: return None return identifiable_candidates[0] @@ -264,7 +344,8 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): identifiable is the record that was created during the crawler run. """ if len(identifiable.parents) != 1: - raise RuntimeError("Multiple parents for identifiables not supported.") + raise RuntimeError( + "Multiple parents for identifiables not supported.") if not has_parent(record, identifiable.parents[0].name): return False for prop in identifiable.properties: @@ -287,7 +368,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): return False return True - def retrieve_identified_record(self, identifiable: db.Record): + def retrieve_identified_record_for_identifiable(self, identifiable: db.Record): candidates = [] for record in self._records: if self.check_record(record, identifiable): @@ -298,21 +379,58 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter): return None return candidates[0] + def resolve_reference(self, value: db.Record): + if self.get_registered_identifiable(value) is None: + raise NotImplementedError("Non-identifiable references cannot" + " be used as properties in identifiables.") + # TODO: just resolve the entity + + value_identifiable = self.retrieve_identified_record_for_record(value) + if value_identifiable is None: + raise RuntimeError("The identifiable which is used as property" + " here has to be inserted first.") + + if value_identifiable.id is None: + raise RuntimeError("The entity has not been assigned an ID.") + + return value_identifiable.id + class CaosDBIdentifiableAdapter(IdentifiableAdapter): """ Identifiable adapter which can be used for production. - - - TODO: store registred identifiables not locally """ + # TODO: don't store registered identifiables locally + def __init__(self): self._registered_identifiables = dict() + def load_from_yaml_definition(self, path: str): + """Load identifiables defined in a yaml file""" + with open(path, 'r') as yaml_f: + identifiable_data = yaml.safe_load(yaml_f) + + for key, value in identifiable_data.items(): + rt = db.RecordType().add_parent(key) + for prop_name in value: + rt.add_property(name=prop_name) + self.register_identifiable(key, rt) + def register_identifiable(self, name: str, definition: db.RecordType): self._registered_identifiables[name] = definition + def get_file(self, identifiable: db.File): + if identifiable.path is None: + raise RuntimeError("Path must not be None for File retrieval.") + candidates = db.execute_query("FIND File which is stored at {}".format( + identifiable.path)) + if len(candidates) > 1: + raise RuntimeError("Identifiable was not defined unambigiously.") + if len(candidates) == 0: + return None + return candidates[0] + def get_registered_identifiable(self, record: db.Record): """ returns the registred identifiable for the given Record @@ -325,7 +443,16 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): if definition.parents[0].name.lower() == rt_name.lower(): return definition - def retrieve_identified_record(self, identifiable: db.Record): + def resolve_reference(self, record: db.Record): + """ + Current implementation just sets the id for this record + as a value. It needs to be verified that references all contain an ID. + """ + if record.id is None: + return record + return record.id + + def retrieve_identified_record_for_identifiable(self, identifiable: db.Record): query_string = self.create_query_for_identifiable(identifiable) candidates = db.execute_query(query_string) if len(candidates) > 1: diff --git a/src/newcrawler/identified_cache.py b/src/caoscrawler/identified_cache.py similarity index 72% rename from src/newcrawler/identified_cache.py rename to src/caoscrawler/identified_cache.py index e02e19d86f8f262f984a6ae5b7a84675ef259581..0b9d7a47bdecc4094edb1296f4c04dfa083a2436 100644 --- a/src/newcrawler/identified_cache.py +++ b/src/caoscrawler/identified_cache.py @@ -36,10 +36,27 @@ def _create_hashable_string(identifiable: db.Record): """ creates a string from the attributes of an identifiable that can be hashed """ + if identifiable.role == "File": + # Special treatment for files: + return "P<>N<>{}:{}".format("path", identifiable.path) + if len(identifiable.parents) != 1: + # TODO: extend this + # maybe something like this: + # parent_names = ",".join( + # sorted([p.name for p in identifiable.parents]) + raise RuntimeError("Cache entry can only be generated for entities with 1 parent.") rec_string = "P<{}>N<{}>".format(identifiable.parents[0].name, identifiable.name) for pname in sorted([p.name for p in identifiable.properties]): value = str(identifiable.get_property(pname).value) - if isinstance(identifiable.get_property(pname).value, db.Entity): + + # TODO: (for review) + # This expansion of the hash function was introduced recently + # to allow the special case of Files as values of properties. + # We need to review the completeness of all the cases here, as the cache + # is crucial for correct identification of insertion and updates. + if isinstance(identifiable.get_property(pname).value, db.File): + value = str(identifiable.get_property(pname).value.path) + elif isinstance(identifiable.get_property(pname).value, db.Entity): value = str(identifiable.get_property(pname).value.id) elif isinstance(identifiable.get_property(pname).value, list): tmplist = [] diff --git a/src/newcrawler/stores.py b/src/caoscrawler/stores.py similarity index 88% rename from src/newcrawler/stores.py rename to src/caoscrawler/stores.py index 452061e6cf26bcb69f95c20f109b1d15a53f4a04..7ae451994b43a12559dea4ab7f85574c85b2a074 100644 --- a/src/newcrawler/stores.py +++ b/src/caoscrawler/stores.py @@ -47,6 +47,10 @@ class Store(object): def __contains__(self, key: str): return key in self._storage + def __delitem__(self, key: str): + del self._storage[key] + del self._copied[key] + def update(self, other: dict): self._storage.update(other) for key in other: @@ -87,6 +91,17 @@ class GeneralStore(Store): class RecordStore(Store): + def get_names_current_scope(self): + """ + Return the names of all records that were created in the current scope. + """ + lst = [] + + for key in self._storage: + if not self._copied[key]: + lst.append(key) + return lst + def get_records_current_scope(self): """ Return all records that were created in the current scope. diff --git a/src/newcrawler/structure_elements.py b/src/caoscrawler/structure_elements.py similarity index 69% rename from src/newcrawler/structure_elements.py rename to src/caoscrawler/structure_elements.py index 6562d03c16b3c4f380077e05268a370370b2c725..6be653a4758e8c3fb789b22ea655836a3d976c34 100644 --- a/src/newcrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -60,18 +60,52 @@ class File(FileSystemStructureElement): pass -class DictTextElement(StructureElement): - def __init__(self, name: str, value: str): +class JSONFile(File): + pass + + +class DictElement(StructureElement): + def __init__(self, name: str, value): super().__init__(name) self.value = value -class DictListElement(StructureElement): - def __init__(self, name: str, value: list): +class Dict(StructureElement): + def __init__(self, name: str, value: dict): super().__init__(name) self.value = value +class DictTextElement(DictElement): + def __init__(self, name: str, value: str): + super().__init__(name, value) + + +class DictIntegerElement(DictElement): + def __init__(self, name: str, value: int): + super().__init__(name, value) + + +class DictBooleanElement(DictElement): + def __init__(self, name: str, value: bool): + super().__init__(name, value) + + +class DictDictElement(Dict, DictElement): + def __init__(self, name: str, value: dict): + DictElement.__init__(self, name, value) + + +class DictListElement(DictElement): + def __init__(self, name: str, value: dict): + super().__init__(name, value) + + +class DictFloatElement(DictElement): + def __init__(self, name: str, value: float): + super().__init__(name, value) + + class TextElement(StructureElement): def __init__(self, name: str, value: str): super().__init__(name) diff --git a/src/newcrawler/utils.py b/src/caoscrawler/utils.py similarity index 97% rename from src/newcrawler/utils.py rename to src/caoscrawler/utils.py index 35fefe6719d579bc8e8a39489f8a872c0cca11b8..61b363099d0892b74e91f257bccb6cc832c3d59f 100644 --- a/src/newcrawler/utils.py +++ b/src/caoscrawler/utils.py @@ -23,11 +23,10 @@ # ** end header # -import caosdb as db -from datetime import datetime - # Some utility functions, e.g. for extending pylib. +import caosdb as db + def has_parent(entity: db.Entity, name: str): """ diff --git a/src/doc/Makefile b/src/doc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bea7f860173d930527c84fae43cb7d5bdf6cae97 --- /dev/null +++ b/src/doc/Makefile @@ -0,0 +1,49 @@ +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> +# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +# This Makefile is a wrapper for sphinx scripts. +# +# It is based upon the autocreated makefile for Sphinx documentation. + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= -a +SPHINXBUILD ?= sphinx-build +SPHINXAPIDOC ?= sphinx-apidoc +PY_BASEDIR = ../caoscrawler +SOURCEDIR = . +BUILDDIR = ../../build/doc + + +.PHONY: doc-help Makefile + +# Put it first so that "make" without argument is like "make help". +doc-help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile apidoc + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +apidoc: + @$(SPHINXAPIDOC) -o _apidoc --separate $(PY_BASEDIR) diff --git a/src/doc/_apidoc/modules.rst b/src/doc/_apidoc/modules.rst new file mode 100644 index 0000000000000000000000000000000000000000..17f187982981ffbf7bcc857056d10644c2bd422b --- /dev/null +++ b/src/doc/_apidoc/modules.rst @@ -0,0 +1,7 @@ +newcrawler +========== + +.. toctree:: + :maxdepth: 4 + + newcrawler diff --git a/src/doc/_apidoc/newcrawler.converters.rst b/src/doc/_apidoc/newcrawler.converters.rst new file mode 100644 index 0000000000000000000000000000000000000000..893391c229b94baeed9a44c57877ed33f37b2f5e --- /dev/null +++ b/src/doc/_apidoc/newcrawler.converters.rst @@ -0,0 +1,7 @@ +newcrawler.converters module +============================ + +.. automodule:: newcrawler.converters + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.crawl.rst b/src/doc/_apidoc/newcrawler.crawl.rst new file mode 100644 index 0000000000000000000000000000000000000000..b00a6ab6498a0482cea3e9faa54d66d66991dc2d --- /dev/null +++ b/src/doc/_apidoc/newcrawler.crawl.rst @@ -0,0 +1,7 @@ +newcrawler.crawl module +======================= + +.. automodule:: newcrawler.crawl + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.identifiable_adapters.rst b/src/doc/_apidoc/newcrawler.identifiable_adapters.rst new file mode 100644 index 0000000000000000000000000000000000000000..d8926f41b72d2c54931f045d75f9fe59b21e6076 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.identifiable_adapters.rst @@ -0,0 +1,7 @@ +newcrawler.identifiable\_adapters module +======================================== + +.. automodule:: newcrawler.identifiable_adapters + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.identified_cache.rst b/src/doc/_apidoc/newcrawler.identified_cache.rst new file mode 100644 index 0000000000000000000000000000000000000000..6f697362ad44d1fec01f328550dc8667cc889019 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.identified_cache.rst @@ -0,0 +1,7 @@ +newcrawler.identified\_cache module +=================================== + +.. automodule:: newcrawler.identified_cache + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.rst b/src/doc/_apidoc/newcrawler.rst new file mode 100644 index 0000000000000000000000000000000000000000..202444a5efbde248e52d712575ade49f6dd50601 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.rst @@ -0,0 +1,24 @@ +newcrawler package +================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + newcrawler.converters + newcrawler.crawl + newcrawler.identifiable_adapters + newcrawler.identified_cache + newcrawler.stores + newcrawler.structure_elements + newcrawler.utils + +Module contents +--------------- + +.. automodule:: newcrawler + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.stores.rst b/src/doc/_apidoc/newcrawler.stores.rst new file mode 100644 index 0000000000000000000000000000000000000000..7d446c1cd45a6bf1c4b6cf1b1d33e9a2a5ad9751 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.stores.rst @@ -0,0 +1,7 @@ +newcrawler.stores module +======================== + +.. automodule:: newcrawler.stores + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.structure_elements.rst b/src/doc/_apidoc/newcrawler.structure_elements.rst new file mode 100644 index 0000000000000000000000000000000000000000..4613e1d58b0ef9c7cc38096aa25270f469836ce5 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.structure_elements.rst @@ -0,0 +1,7 @@ +newcrawler.structure\_elements module +===================================== + +.. automodule:: newcrawler.structure_elements + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/_apidoc/newcrawler.utils.rst b/src/doc/_apidoc/newcrawler.utils.rst new file mode 100644 index 0000000000000000000000000000000000000000..4df55a234fd85072068e41d1ce7bb3b17fd1a698 --- /dev/null +++ b/src/doc/_apidoc/newcrawler.utils.rst @@ -0,0 +1,7 @@ +newcrawler.utils module +======================= + +.. automodule:: newcrawler.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/src/doc/conf.py b/src/doc/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..fb37cdd96c440300741aeb49e90caffe4370f5d7 --- /dev/null +++ b/src/doc/conf.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# Based on the configuration for caosdb-pylib. +# +# # Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, add these +# directories to sys.path here. This is particularly necessary if this package is installed at a +# different version, for example via `pip install`. +# +# If the directory is relative to the documentation root, use os.path.abspath to make it absolute, +# like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +import sphinx_rtd_theme # noqa: E402 + + +# -- Project information ----------------------------------------------------- + +project = 'caosdb-caoscrawler' +copyright = '2021, MPIDS' +author = 'Alexander Schlemmer' + +# The short X.Y version +version = '0.1' +# The full version, including alpha/beta/rc tags +# release = '0.5.2-rc2' +release = '0.1' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosectionlabel', + 'sphinx.ext.intersphinx', + 'sphinx.ext.napoleon', # For Google style docstrings + "sphinx_rtd_theme", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +source_suffix = ['.rst'] + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# + +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'caosdb-caoscrawlerdoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'caosdb-caoscrawler.tex', 'caosdb-caoscrawler Documentation', + 'MPIDS', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation', + author, 'caosdb-caoscrawler', 'One line description of project.', + 'Miscellaneous'), +] + + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + + +# -- Extension configuration ------------------------------------------------- + +# True to prefix each section label with the name of the document it is in, followed by a colon. For +# example, index:Introduction for a section called Introduction that appears in document +# index.rst. Useful for avoiding ambiguity when the same section heading appears in different +# documents. +# +# Note: This stops "normal" links from working, so it should be kept at False. +# autosectionlabel_prefix_document = True + +# -- Options for intersphinx ------------------------------------------------- + +# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#confval-intersphinx_mapping +intersphinx_mapping = { + "python": ("https://docs.python.org/", None), + "caosdb-mysqlbackend": ("https://docs.indiscale.com/caosdb-mysqlbackend/", + None), + "caosdb-server": ("https://docs.indiscale.com/caosdb-server/", None), + "caosdb-pylib": ("https://docs.indiscale.com/caosdb-pylib/", None), + "caosdb-advanced-user-tools": ("https://docs.indiscale.com/caosdb-advanced-user-tools/", None), +} + + +# TODO Which options do we want? +autodoc_default_options = { + 'members': None, + 'undoc-members': None, +} diff --git a/src/doc/index.rst b/src/doc/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f11d73b58a3216b1d735d6565650148c150ebb68 --- /dev/null +++ b/src/doc/index.rst @@ -0,0 +1,186 @@ +Crawler 2.0 Documentation +========================= + +Introduction +------------ + +The crawler is the main date integration tool for CaosDB. +Its task is to automatically synchronize data found on file systems or in other +sources of data with the semantic data model of CaosDB. + +More specifically, data that is contained in a hierarchical structure is converted to a data +structure that is consistent with a predefined semantic data model. + +The hierarchical sturcture can be for example a file tree. However it can be +also something different like the contents of a json file or a file tree with +json files. + +Concepts +-------- + +Structure Elements +++++++++++++++++++ + +This hierarchical structure is assumed to be consituted of a tree of +StructureElements. The tree is created on the fly by so called Converters which +are defined in a yaml file. The tree of StructureElements is a model +of the existing data (For example could a tree of Python file objects +(StructureElements) represent a file tree that exists on some file server). + +Relevant sources in: +src/structure_elements.py + +Converters +++++++++++ + +Converters treat StructureElements and thereby create the StructureElement that +are the children of the treated StructureElement. Converters therefore create +the above named tree. The definition of a Converter also contains what +Converters shall be used to treat the generated child-StructureElements. The +definition is there a tree itself. (Question: Should there be global Converters +that are always checked when treating a StructureElement? Should Converters be +associated with generated child-StructureElements? Currently, all children are +created and checked against all Converters. It could be that one would like to +check file-StructureElements against one set of Converters and +directory-StructureElements against another) + +Each StructureElement in the tree has a set of data values, i.e a dictionary of +key value pairs. +Some of those values are set due to the kind of StructureElement. For example, +a file could have the file name as such a key value pair: 'filename': <sth>. +Converters may define additional functions that create further values. For +example, a regular expresion could be used to get a date from a file name. + + + + +A converter is defined via a yml file or part of it. The definition states +what kind of StructureElement it treats (typically one). +Also, it defines how children of the current StructureElement are +created and what Converters shall be used to treat those. + +The yaml definition looks like the following: + +TODO: outdated, see cfood-schema.yml + +converter-name: + type: <StructureElement Type> + match: ".*" + records: + Experiment1: + parents: + - Experiment + - Blablabla + date: $DATUM + <...> + Experiment2: + parents: + - Experiment + valuegenerators: + datepattern: + <...> + childrengenerators: + create_children_from_directory: + sort-by-date: true + subtree: + + +records: + Measurement: <- wird automatisch ein value im valueStore + run_number: 25 + Experiment1: + Measurement: +Measurement <- Element in List (list is cleared before run) + *Measurement <- Multi Property (properties are removed before run) + Measurement <- Overwrite + +UPDATE-Stage prüft ob es z.B. Gleichheit zwischen Listen gibt (die dadurch definiert sein +kann, dass alle Elemente vorhanden, aber nicht zwingend in der richtigen Reihenfolge sind) +evtl. brauchen wir das nicht, weil crawler eh schon deterministisch ist. + +The converter-name is a description of what it represents (e.g. +'experiment-folder') and is used as identifier. + +The type restricts what kind of StructureElements are treated. +The match is by default a regular expression, that is matche against the +name of StructureElements. Discussion: StructureElements might not have a +name (e.g. a dict) or should a name be created artificially if necessary +(e.g. "root-dict")? It might make sense to allow keywords like "always" and +other kinds of checks. For example a dictionary could be checked against a +json-schema definition. + +recordtypes is a list of definitions that define the semantic structure +(see details below). + +valuegenerators allow to provide additional functionality that creates +data values in addition to the ones given by default via the +StructureElement. This can be for example a match group of a regular +expression applied to the filename. +It should be possible to access the values of parent nodes. For example, +the name of a parent node could be accessed with $converter-name.name. +Discussion: This can introduce conflicts, if the key <converver-name> +already exists. An alternative would be to identify those lookups. E.g. +$$converter-name.name (2x$). + +childrengenerators denotes how StructureElements shall be created that are +children of the current one. + +subtree contains a list of Converter defnitions that look like the one +described here. + +those keywords should be allowed but not required. I.e. if no +valuegenerators shall be defined, the keyword may be omitted. + + +Relevant sources in: +src/converters.py + +Identifiables ++++++++++++++ + +Relevant sources in: +src/identifiable_adapters.py + +The Crawler ++++++++++++ + +The crawler can be considered the main program doing the synchronization in basically two steps: +1. Based on a yaml-specification scan the file system (or other sources) and create a set + of CaosDB Entities that are supposed to be inserted or updated in a CaosDB instance. +2. Compare the current state of the CaosDB instance with the set of CaosDB Entities created in + step 1, taking into account the :ref:`registered identifiables<Identifiables>`. Insert or + update entites accordingly. + +Relevant sources in: +src/crawl.py + + + +Special Cases +============= + +Variable Precedence +++++++++++++ + +Let's assume the following situation + +.. code-block:: yaml + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + + +Making use of the $description variable could refer to two different variables created here: +1. The structure element path. +2. The value of the matched expression. + +The matched expression does take precedence over the structure element path and shadows it. + +Make sure, that if you want to be able to use the structure element path, to give unique names +to the variables like: + +.. code-block:: yaml + description_text_block: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py deleted file mode 100644 index aa31436c25933b035f50da7166b66f9614bc37b9..0000000000000000000000000000000000000000 --- a/src/newcrawler/converters.py +++ /dev/null @@ -1,424 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2021 Henrik tom Wörden -# 2021 Alexander Schlemmer -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -import os -import re -import caosdb as db -from .utils import has_parent -from .stores import GeneralStore, RecordStore -from .structure_elements import (StructureElement, Directory, File, - TextElement, DictTextElement, DictListElement) -from typing import Type, Optional, Union -from abc import abstractmethod -import yaml_header_tools - - -def handle_value(value: Union[dict, str], values: GeneralStore): - """ - Function to generically handle values for properties defined in the - yaml structure. - - Returns a tuple: - - the final value of the property - - the collection mode (can be single, list or multiproperty) - """ - - if type(value) == dict: - propvalue = value["value"] - # can be "single", "list" or "multiproperty" - collection_mode = value["collection_mode"] - elif type(value) == str: - propvalue = value - collection_mode = "single" - if propvalue.startswith("+"): - collection_mode = "list" - propvalue = propvalue[1:] - elif propvalue.startswith("*"): - collection_mode = "multiproperty" - propvalue = propvalue[1:] - - if propvalue.startswith("$"): - propvalue = values[propvalue[1:]] - # Allow the insertion of $ signs at the beginning - if type(propvalue) == str and propvalue.startswith("$$"): - propvalue = propvalue[1:] - - return (propvalue, collection_mode) - - -class Converter(object): - """ - Converters treat StructureElements contained in the hierarchical sturcture. - - A converter is defined via a yml file or part of it. The definition states - what kind of StructureElement it treats (typically one). - Also, it defines how children of the current StructureElement are - created and what Converters shall be used to treat those. - - The yaml definition looks like the following: - - converter-name: - type: <StructureElement Type> - match: ".*" - records: - Experiment1: - parents: - - Experiment - - Blablabla - date: $DATUM - <...> - Experiment2: - parents: - - Experiment - valuegenerators: - datepattern: - <...> - childrengenerators: - create_children_from_directory: - sort-by-date: true - subtree: - - - records: - Measurement: <- wird automatisch ein value im valueStore - run_number: 25 - Experiment1: - Measurement: +Measurement <- Element in List (list is cleared before run) - *Measurement <- Multi Property (properties are removed before run) - Measurement <- Overwrite - - UPDATE-Stage prüft ob es z.B. Gleichheit zwischen Listen gibt (die dadurch definiert sein - kann, dass alle Elemente vorhanden, aber nicht zwingend in der richtigen Reihenfolge sind) - evtl. brauchen wir das nicht, weil crawler eh schon deterministisch ist. - - The converter-name is a description of what it represents (e.g. - 'experiment-folder') and is used as identifier. - - The type restricts what kind of StructureElements are treated. - The match is by default a regular expression, that is matche against the - name of StructureElements. Discussion: StructureElements might not have a - name (e.g. a dict) or should a name be created artificially if necessary - (e.g. "root-dict")? It might make sense to allow keywords like "always" and - other kinds of checks. For example a dictionary could be checked against a - json-schema definition. - - recordtypes is a list of definitions that define the semantic structure - (see details below). - - valuegenerators allow to provide additional functionality that creates - data values in addition to the ones given by default via the - StructureElement. This can be for example a match group of a regular - expression applied to the filename. - It should be possible to access the values of parent nodes. For example, - the name of a parent node could be accessed with $converter-name.name. - Discussion: This can introduce conflicts, if the key <converver-name> - already exists. An alternative would be to identify those lookups. E.g. - $$converter-name.name (2x$). - - childrengenerators denotes how StructureElements shall be created that are - children of the current one. - - subtree contains a list of Converter defnitions that look like the one - described here. - - those keywords should be allowed but not required. I.e. if no - valuegenerators shall be defined, the keyword may be omitted. - """ - - def __init__(self, definition: dict, - name: str): - self.definition = definition - self.name = name - - # Used to store usage information for debugging: - self.metadata: dict[str, set[str]] = { - "usage": set() - } - - self.converters = [] - - if "subtree" in definition: - for converter_name in definition['subtree']: - converter_definition = definition["subtree"][converter_name] - self.converters.append(Converter.converter_factory( - converter_definition, converter_name)) - - @staticmethod - def converter_factory(definition: dict, - name: str): - # The following dict is a mapping from types (as written in the yaml file) - # to classes implementing the specific converters: - converter_registry: dict[str, Type[Converter]] = { - "Directory": DirectoryConverter, - "MarkdownFile": MarkdownFileConverter, - "DictTextElement": DictTextElementConverter, - "DictListElement": DictListElementConverter, - "TextElement": TextElementConverter - } - - if "type" not in definition: - raise RuntimeError("Type is mandatory for converter entries in CFood definition.") - - if definition["type"] not in converter_registry: - raise RuntimeError("Unknown Type: {}".format(definition["type"])) - - # instatiates an object of the required class, e.g. DirectoryConverter(definition, name) - converter = converter_registry[definition["type"]](definition, name) - - return converter - - def create_values(self, - values: GeneralStore, - element: StructureElement): - """ - Extract information from the structure element and store them as values in the - general store. - - values: The GeneralStore to store values in. - element: The StructureElement to extract values from. - """ - m = self.match(element) - if m is None: - # this should never happen as the condition was checked before already - raise RuntimeError("Condition does not match.") - values.update(m) - - @abstractmethod - def create_children(self, values: GeneralStore, - element: StructureElement): - pass - - def create_records(self, values: GeneralStore, - records: RecordStore, - element: StructureElement): - - if "records" not in self.definition: - return [] - - # list of keys to identify, which variables have been set by which paths: - # these are tuples: - # 0: record name - # 1: property name - keys_modified = [] - - for name, record in self.definition["records"].items(): - # whether the record already exists in the store or not are actually really - # different distinct cases for treating the setting and updating of variables: - if name not in records: - c_record = db.Record() - # add the new record to the record store: - records[name] = c_record - # additionally add the new record to the general store: - values[name] = c_record - - c_record = records[name] - - for key, value in record.items(): - if key == "parents": - continue - keys_modified.append((name, key)) - propvalue, collection_mode = handle_value(value, values) - - if c_record.get_property(key) is None: - - if collection_mode == "list": - c_record.add_property(name=key, value=[propvalue]) - elif (collection_mode == "multiproperty" or - collection_mode == "single"): - c_record.add_property(name=key, value=propvalue) - else: - if collection_mode == "list": - c_record.get_property(key).value.append(propvalue) - elif collection_mode == "multiproperty": - c_record.add_property(name=key, value=propvalue) - elif collection_mode == "single": - c_record.get_property(key).value = propvalue - - # no matter whether the record existed in the record store or not, - # parents will be added when they aren't present in the record yet: - if "parents" in record: - for parent in record["parents"]: - if not has_parent(c_record, parent): - c_record.add_parent(parent) - else: - if not has_parent(c_record, name): - c_record.add_parent(name) - return keys_modified - - @abstractmethod - def typecheck(self, element: StructureElement): - pass - - @abstractmethod - def match(self, element: StructureElement) -> Optional[dict]: - pass - - -class DirectoryConverter(Converter): - - def __init__(self, definition: dict, name: str): - """ - Initialize a new directory converter. - """ - super().__init__(definition, name) - - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - if not isinstance(element, Directory): - raise RuntimeError("Directory converters can only create children from directories.") - - return self.create_children_from_directory(element) - - def typecheck(self, element: StructureElement): - return isinstance(element, Directory) - - def match(self, element: StructureElement): - if not isinstance(element, Directory): - raise RuntimeError("Element must be a directory.") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - return m.groupdict() - - @staticmethod - def create_children_from_directory(element: Directory): - """ - Creates a list of files (of type File) and directories (of type Directory) for a - given directory. No recursion. - - element: A directory (of type Directory) which will be traversed. - """ - children: list[StructureElement] = [] - - for name in sorted(os.listdir(element.path)): - path = os.path.join(element.path, name) - - if os.path.isdir(path): - children.append(Directory(name, path)) - elif os.path.isfile(path): - children.append(File(name, path)) - - return children - - -class MarkdownFileConverter(Converter): - def __init__(self, definition: dict, name: str): - """ - Initialize a new directory converter. - """ - super().__init__(definition, name) - - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - if not isinstance(element, File): - raise RuntimeError("A markdown file is needed to create children.") - - header = yaml_header_tools.get_header_from_file(element.path, clean=False) - children: list[StructureElement] = [] - - for name, entry in header.items(): - if type(entry) == list: - children.append(DictListElement(name, entry)) - elif type(entry) == str: - children.append(DictTextElement(name, entry)) - else: - raise RuntimeError("Header entry {} has incompatible type.".format(name)) - return children - - def typecheck(self, element: StructureElement): - return isinstance(element, File) - - def match(self, element: StructureElement): - if not isinstance(element, File): - raise RuntimeError("Element must be a file.") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - try: - yaml_header_tools.get_header_from_file(element.path) - except yaml_header_tools.NoValidHeader: - return None - return m.groupdict() - - -class DictTextElementConverter(Converter): - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - return [] - - def typecheck(self, element: StructureElement): - return isinstance(element, DictTextElement) - - def match(self, element: StructureElement): - if not isinstance(element, DictTextElement): - raise RuntimeError("Element must be a DictTextElement.") - m1 = re.match(self.definition["match_name"], element.name) - if m1 is None: - return None - m2 = re.match(self.definition["match_value"], element.value) - if m2 is None: - return None - values = dict() - values.update(m1.groupdict()) - values.update(m2.groupdict()) - return values - - -class DictListElementConverter(Converter): - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - if not isinstance(element, DictListElement): - raise RuntimeError("This converter can only process DictListElements.") - return [TextElement(str(index), list_element) for index, list_element in enumerate(element.value)] - - def typecheck(self, element: StructureElement): - return isinstance(element, DictListElement) - - def match(self, element: StructureElement): - if not isinstance(element, DictListElement): - raise RuntimeError("Element must be a DictListElement.") - m = re.match(self.definition["match_name"], element.name) - if m is None: - return None - if "match" in self.definition: - raise NotImplementedError("Match is not implemented for DictListElement.") - return m.groupdict() - - -class TextElementConverter(Converter): - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - return [] - - def typecheck(self, element: StructureElement): - return isinstance(element, TextElement) - - def match(self, element: StructureElement): - if not isinstance(element, TextElement): - raise RuntimeError("Element must be a TextElement.") - m = re.match(self.definition["match"], element.value) - if m is None: - return None - return m.groupdict() diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py deleted file mode 100644 index 89efc089cf73fe6844fb2a20bbea0730d869af46..0000000000000000000000000000000000000000 --- a/src/newcrawler/crawl.py +++ /dev/null @@ -1,577 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2021 Henrik tom Wörden -# 2021 Alexander Schlemmer -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -""" -Data that is contained in a hierarchical structure is converted to a data -structure that is consistent with a predefined semantic data model. - -The hierarchical sturcture can be for example a file tree. However it can be -also something different like the contents of a json file or a file tree with -json files. - - -This hierarchical structure is assumed to be consituted of a tree of -StructureElements. The tree is created on the fly by so called Converters which -are defined in a yaml file. The tree of StructureElements is a model -of the existing data (For example could a tree of Python file objects -(StructureElements) represent a file tree that exists on some file server). - -Converters treat StructureElements and thereby create the StructureElement that -are the children of the treated StructureElement. Converters therefore create -the above named tree. The definition of a Converter also contains what -Converters shall be used to treat the generated child-StructureElements. The -definition is there a tree itself. (Question: Should there be global Converters -that are always checked when treating a StructureElement? Should Converters be -associated with generated child-StructureElements? Currently, all children are -created and checked against all Converters. It could be that one would like to -check file-StructureElements against one set of Converters and -directory-StructureElements against another) - -Each StructureElement in the tree has a set of data values, i.e a dictionary of -key value pairs. -Some of those values are set due to the kind of StructureElement. For example, -a file could have the file name as such a key value pair: 'filename': <sth>. -Converters may define additional functions that create further values. For -example, a regular expresion could be used to get a date from a file name. - - -""" - -import sys -import os -import yaml -import argparse -from argparse import RawTextHelpFormatter -import caosdb as db -from caosdb.common.datatype import is_reference -from .stores import GeneralStore, RecordStore -from .identified_cache import IdentifiedCache -from .structure_elements import StructureElement, Directory -from .converters import Converter, DirectoryConverter -from .identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter -from collections import defaultdict -from typing import Union, Any, Optional -from caosdb.apiutils import compare_entities -from copy import deepcopy - - -class Crawler(object): - """ - Crawler class that encapsulates crawling functions. - Furthermore it keeps track of the storage for records (record store) and the - storage for values (general store). - """ - - def __init__(self, converters: list[Converter] = [], - generalStore: Optional[GeneralStore] = None, - debug: bool = False, - identifiableAdapter: IdentifiableAdapter = None): - """ - Create a new crawler and initialize an empty RecordStore and GeneralStore. - - converters: The set of converters used for this crawler. - recordStore: An initial GeneralStore which might store e.g. environment variables. - - debug: Create a debugging information tree when set to True. - The debugging information tree is a variable stored in - self.debug_tree. It is a dictionary mapping directory entries - to a tuple of general stores and record stores which are valid for the directory scope. - Furthermore, it is stored in a second tree named self.debug_copied whether the - objects in debug_tree had been copied from a higher level in the hierarchy - of the structureelements. - """ - self.global_converters = converters - - self.identified_cache = IdentifiedCache() - self.recordStore = RecordStore() - - self.generalStore = generalStore - if generalStore is None: - self.generalStore = GeneralStore() - - self.identifiableAdapter = identifiableAdapter - if identifiableAdapter is None: - self.identifiableAdapter = LocalStorageIdentifiableAdapter() - - self.debug = debug - if self.debug: - # order in the tuple: - # 0: generalStore - # 1: recordStore - self.debug_tree: dict[str, tuple] = dict() - self.debug_metadata: dict[str, dict] = dict() - self.debug_metadata["copied"] = dict() - self.debug_metadata["provenance"] = defaultdict(lambda: dict()) - self.debug_metadata["usage"] = defaultdict(lambda: set()) - - def crawl_directory(self, dirname: str, crawler_definition_path: str): - """ Crawl a single directory. - - Convenience function that starts the crawler (calls start_crawling) - with a single cirectory as the StructureElement. - """ - - # Load the cfood from a yaml file: - with open(crawler_definition_path, "r") as f: - crawler_definition = yaml.load(f, Loader=yaml.SafeLoader) - - self.start_crawling(Directory(os.path.basename(dirname), - dirname), - crawler_definition) - - @staticmethod - def create_local_converters(crawler_definition: dict): - local_converters = [] - - for key, value in crawler_definition.items(): - if key == "Definitions": - continue - local_converters.append(Converter.converter_factory(value, key)) - - return local_converters - - def start_crawling(self, item: StructureElement, - crawler_definition: dict): - """ - Start point of the crawler recursion. - - item: A structure element that is used for generating the initial items for the crawler. - This could e.g. be a Directory. - crawler_definition: A dictionary representing the crawler definition, possibly from a yaml - file. - - Return the final update list. - """ - - # This function builds the tree of converters out of the crawler definition. - - if not isinstance(item, Directory): - raise NotImplementedError("Currently only directories are supported as items.") - - if self.generalStore is None: - raise RuntimeError("Should not happen.") - - local_converters = Crawler.create_local_converters(crawler_definition) - # This recursive crawling procedure generates the update list: - self.updateList: list[db.Record] = [] - self._crawl(DirectoryConverter.create_children_from_directory(item), - self.global_converters, local_converters, self.generalStore, self.recordStore, - [], []) - - if self.debug: - self.debug_converters = self.global_converters + local_converters - - return self.updateList - - def synchronize(self): - """ - Carry out the actual synchronization. - """ - - # After the crawling, the actual synchronization with the database, based on the - # update list is carried out: - - return self._synchronize(self.updateList) - - def can_be_checked_externally(self, record: db.Record): - """ - Returns False if there is at least one property in record which: - a) is a reference property AND - b) where the value is set to a db.Entity (instead of an ID) AND - c) where the ID of the value is not set (to an integer) - - Returns True otherwise. - """ - identifiable = self.identifiableAdapter.get_identifiable(record) - for p in identifiable.properties: - # TODO: implement for lists? - if (is_reference(p) and isinstance(p.value, db.Entity) - and p.value.id is None): - return False - return True - - def create_flat_list(self, ent_list: list[db.Entity], flat: list[db.Entity]): - """ - Recursively adds all properties contained in entities from ent_list to - the output list flat. Each element will only be added once to the list. - """ - for ent in ent_list: - for p in ent.properties: - # TODO: implement for lists? - if isinstance(p.value, db.Entity): - if p.value not in flat: - flat.append(p.value) - self.create_flat_list([p.value], flat) - - def all_references_are_existing_already(self, record): - """ - returns true if all references either have IDs or were checked remotely and not found (i.e. - they exist in the local cache) - """ - identifiable = self.identifiableAdapter.get_identifiable(record) - for p in identifiable.properties: - if (is_reference(p) - # Entity instead of ID and not cached locally - # TODO: implement for lists? - and isinstance(p.value, db.Entity) - and p.value.id is None - and self.get_identified_record_from_local_cache(p.value) is None): - # might be checked when reference is resolved - return False - return True - - def get_identified_record_from_local_cache(self, record: db.Record): - """ - returns the identifiable if an identifiable with the same values already exists locally - (Each identifiable that is not found on the remote server, is 'cached' locally to prevent - that the same identifiable exists twice) - """ - identifiable = self.identifiableAdapter.get_identifiable(record) - if identifiable is None: - return None - if identifiable in self.identified_cache: - return self.identified_cache[identifiable] - else: - return None - - def add_identified_record_to_local_cache(self, record: db.Record): - """ - adds the given identifiable to the local cache - - No identifiable with the same values must exist locally. - (Each identifiable that is not found on the remote server, is 'cached' locally to prevent - that the same identifiable exists twice) - """ - identifiable = self.identifiableAdapter.get_identifiable(record) - if identifiable is None: - raise RuntimeError() - self.identified_cache.add(identifiable=identifiable, record=record) - - def copy_attributes(self, fro: db.Entity, to: db.Entity): - raise NotImplementedError() - - def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): - to_be_inserted = [] - to_be_updated = [] - flat = list(ent_list) - # assure all entities are direct members TODO Can this be removed at some point?Check only? - self.create_flat_list(ent_list, flat) - - # TODO: can the following be removed at some point - for ent in flat: - if len(ent.parents) == 0: - raise RuntimeError("Records must have a parent.") - - resolved_references = True - # flat contains Entities which could not yet be checked against the remote server - while resolved_references and len(flat) > 0: - resolved_references = False - - for i in reversed(range(len(flat))): - record = flat[i] - - # TODO remove if the exception is never raised - if (record.id is not None or record in to_be_inserted): - raise Exception("This should not be reached since treated elements are removed" - " from the list") - # Check the local cache first for duplicate - elif self.get_identified_record_from_local_cache(record) is not None: - # This record is a duplicate that can be removed. Make sure we do not lose - # information - # Update an (local) identified record that will be inserted - newrecord = self.get_identified_record_from_local_cache(record) - self.copy_attributes(fro=record, to=newrecord) - # Bend references to the other object - # TODO refactor this - for el in flat + to_be_inserted + to_be_updated: - for p in el.properties: - if isinstance(p.value, list): - for index, val in enumerate(p.value): - if val is record: - p.value[index] = newrecord - else: - if p.value is record: - p.value = newrecord - - del flat[i] - continue - - # all references need to be IDs that exist on the remote server - elif self.can_be_checked_externally(record): - - # Check remotely - identified_record = self.identifiableAdapter.retrieve_identifiable( - deepcopy(record)) - if identified_record is None: - # identifiable does not exist remote - to_be_inserted.append(record) - self.add_identified_record_to_local_cache(record) - del flat[i] - else: - # side effect - record.id = identified_record.id - to_be_updated.append(record) - # TODO think this through - self.add_identified_record_to_local_cache(record) - del flat[i] - resolved_references = True - - # e.g. references an identifiable that does not exist remotely - elif self.all_references_are_existing_already(record): - to_be_inserted.append(record) - self.add_identified_record_to_local_cache(record) - del flat[i] - resolved_references = True - if len(flat) > 0: - raise RuntimeError("Could not resolve all Entity references. Circular Dependency?") - - return to_be_inserted, to_be_updated - - def replace_entities_by_ids(self, rec: db.Record): - for el in rec.properties: - if isinstance(el.value, db.Entity): - el.value = el.value.id - elif isinstance(el.value, list): - for index, val in enumerate(el.value): - if isinstance(val, db.Entity): - el.value[index] = val.id - - @staticmethod - def remove_unnecessary_updates(updateList: list[db.Record], - identified_records: list[db.Record]): - """ - checks whether all relevant attributes (especially Property values) are equal - - Returns (in future) - ------- - update list without unecessary updates - - """ - if len(updateList) != len(identified_records): - raise RuntimeError("The lists of updates and of identified records need to be of the " - "same length!") - # TODO this can now easily be changed to a function without side effect - for i in reversed(range(len(updateList))): - comp = compare_entities(updateList[i], identified_records[i]) - identical = True - for j in range(2): - # TODO: should be implemented elsewhere (?) - for label in ("parents", ): - if len(comp[j][label]) > 0: - identical = False - break - if not identical: - break - for key in comp[0]["properties"]: - for attribute in ("datatype", "importance", "unit"): - # only make an update for those attributes if there is a value difference and - # the value in the updateList is not None - if attribute in comp[0]["properties"][key]: - attr_val = comp[0]["properties"][key][attribute] - other_attr_val = (comp[1]["properties"][key][attribute] - if attribute in comp[1]["properties"][key] else None) - if attr_val is not None and attr_val != other_attr_val: - identical = False - break - - if "value" in comp[0]["properties"][key]: - identical = False - - if not identical: - break - - if identical: - del updateList[i] - continue - else: - pass - - @staticmethod - def execute_inserts_in_list(to_be_inserted): - if len(to_be_inserted) > 0: - db.Container().extend(to_be_inserted).insert() - - @staticmethod - def execute_updates_in_list(to_be_updated): - if len(to_be_updated) > 0: - db.Container().extend(to_be_updated).update() - - def _synchronize(self, updateList: list[db.Record]): - """ - This function applies several stages: - 1) Retrieve identifiables for all records in updateList. - 2) Compare updateList with existing records. - 3) Insert and update records based on the set of identified differences. - - This function makes use of an IdentifiableAdapter which is used to retrieve - register and retrieve identifiables. - - Return the final insertList and updateList as tuple. - """ - - if self.identifiableAdapter is None: - raise RuntimeError("Should not happen.") - - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(updateList) - - # remove unnecessary updates from list - for el in to_be_updated: - self.replace_entities_by_ids(el) - - identified_records = [self.identifiableAdapter.retrieve_identifiable(record) for record - in to_be_updated] - - self.remove_unnecessary_updates(to_be_updated, identified_records) - - self.execute_inserts_in_list(to_be_inserted) - self.execute_updates_in_list(to_be_updated) - - return (to_be_inserted, to_be_updated) - - @staticmethod - def debug_build_usage_tree(converter: Converter): - res: dict[str, dict[str, Any]] = { - converter.name: { - "usage": ", ".join(converter.metadata["usage"]), - "subtree": {} - } - } - - for subconv in converter.converters: - d = Crawler.debug_build_usage_tree(subconv) - k = list(d.keys()) - if len(k) != 1: - raise RuntimeError("Unkonwn error during building of usage tree.") - res[converter.name]["subtree"][k[0]] = d[k[0]] - return res - - def save_debug_data(self, filename: str): - paths: dict[str, Union[dict, list]] = dict() - - def flatten_debug_info(key): - mod_info = self.debug_metadata[key] - paths[key] = dict() - for record_name in mod_info: - if key == "provenance": - paths[key][record_name] = dict() - for prop_name in mod_info[record_name]: - paths[key][record_name][prop_name] = { - "structure_elements_path": "/".join( - mod_info[record_name][prop_name][0]), - "converters_path": "/".join( - mod_info[record_name][prop_name][1])} - elif key == "usage": - paths[key][record_name] = ", ".join(mod_info[record_name]) - for key in ("provenance", "usage"): - flatten_debug_info(key) - - paths["converters_usage"] = [self.debug_build_usage_tree( - cv) for cv in self.debug_converters] - - with open(filename, "w") as f: - f.write(yaml.dump(paths, sort_keys=False)) - - def _crawl(self, items: list[StructureElement], - global_converters: list[Converter], - local_converters: list[Converter], - generalStore: GeneralStore, - recordStore: RecordStore, - structure_elements_path: list[str], converters_path: list[str]): - """ - Crawl a list of StructureElements and apply any matching converters. - - items: structure_elements (e.g. files and folders on one level on the hierarchy) - global_converters and local_converters: globally or locally defined converters for - treating structure elements. A locally defined converter could be - one that is only valid for a specific subtree of the originally - cralwed StructureElement structure. - generalStore and recordStore: This recursion of the crawl function should only operate on copies of the - global stores of the Crawler object. - """ - for element in items: - for converter in global_converters + local_converters: - # type is something like "matches files", replace isinstance with "type_matches" - # match function tests regexp for example - if (converter.typecheck(element) and - converter.match(element) is not None): - generalStore_copy = generalStore.create_scoped_copy() - recordStore_copy = recordStore.create_scoped_copy() - # extracts values from structure element and stores them in the converter - # this could e.g. be the storage of a variable from the regexp in the - # converter object or the extraction from values from a file and storage - # in the converter object - # -> rather store it in the variable storage than in the converter? - converter.create_values(generalStore_copy, element) - - keys_modified = converter.create_records( - generalStore_copy, recordStore_copy, element) - - children = converter.create_children(generalStore_copy, element) - if self.debug: - # add provenance information for each varaible - self.debug_tree[str(element)] = ( - generalStore_copy.get_storage(), recordStore_copy.get_storage()) - self.debug_metadata["copied"][str(element)] = ( - generalStore_copy.get_dict_copied(), recordStore_copy.get_dict_copied()) - self.debug_metadata["usage"][str(element)].add( - "/".join(converters_path + [converter.name])) - mod_info = self.debug_metadata["provenance"] - for record_name, prop_name in keys_modified: - # TODO: check - internal_id = recordStore_copy.get_internal_id(record_name) - record_identifier = record_name + "_" + str(internal_id) - converter.metadata["usage"].add(record_identifier) - mod_info[record_identifier][prop_name] = (structure_elements_path + [element.get_name()], - converters_path + [converter.name]) - - self._crawl(children, global_converters, converter.converters, - generalStore_copy, recordStore_copy, - structure_elements_path + [element.get_name()], - converters_path + [converter.name]) - # if the crawler is running out of scope, copy all records in the recordStore, that were created in this scope - # to the general update container. - scoped_records = recordStore.get_records_current_scope() - for record in scoped_records: - self.updateList.append(record) - return self.updateList - - -def main(*args): - pass - - -def parse_args(): - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=RawTextHelpFormatter) - parser.add_argument("path", - help="the subtree of files below the given path will " - "be considered. Use '/' for everything.") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - sys.exit(main(*args)) diff --git a/tox.ini b/tox.ini index 161c1d743e3861039625fc7b181c0c9fff25e311..2cf966fb5b80e62cb7f216b0785ba567e13ee3ff 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=py36, py37, py38, py39 +envlist=py36, py37, py38, py39, py310 skip_missing_interpreters = true [testenv] @@ -7,3 +7,5 @@ deps = . pytest pytest-cov commands=py.test --cov=caosdb -vv {posargs} +[flake8] +max-line-length=100 diff --git a/unittests/broken_cfoods/broken1.yml b/unittests/broken_cfoods/broken1.yml new file mode 100644 index 0000000000000000000000000000000000000000..9fd4c52934c56512ada8ea564ccd540e07e25661 --- /dev/null +++ b/unittests/broken_cfoods/broken1.yml @@ -0,0 +1,79 @@ +Definitions: + type: Definitions + #include "description.yml" + +# Converter-Provenance +# DataAnalysis/project_dir/measurement/match/identifier +# Structure-Element-Provenance +# DataAnalysis/2020_SpeedOflight/2020-11-10_kram + +DataAnalysis: # name of the converter + type: Directory_djskfj + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter + type: Directory + match: (?P<date>.*?)_(?P<identifier>.*) + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + date: $date + identifier: $identifier + + subtree: + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: + Measurement: + date: $date + identifier: $identifier + project: $Project + subtree: + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? + subtree: + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. + + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records + +ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + +SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template diff --git a/unittests/broken_cfoods/broken_validation_path.yml b/unittests/broken_cfoods/broken_validation_path.yml new file mode 100644 index 0000000000000000000000000000000000000000..a59978764ba1f400c491ecd94cfebedfe92fc4eb --- /dev/null +++ b/unittests/broken_cfoods/broken_validation_path.yml @@ -0,0 +1,4 @@ +BrokenValidationPathTest: + type: JSONFile + match: "(.*)" + validate: ./this-file-does-not-exist.schema.json diff --git a/unittests/records.xml b/unittests/records.xml index 635f22bb5e185fdf7844b2d5c26ba0fc68b83d8c..f7455ec6b8995db8cd205f69729c32358beee8c0 100644 --- a/unittests/records.xml +++ b/unittests/records.xml @@ -1,168 +1,157 @@ <Entities> - <Record id="109"> - <Version id="f83969a8655440054a322e9d371bce728c030f40" head="true"/> - <Parent id="100" name="Person"/> - <Property id="101" name="first_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="102" name="last_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorA</Property> - </Record> - <Record id="110"> - <Version id="698791344c26b48101c23a2c07746a79039c8266" head="true"/> - <Parent id="103" name="Project"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">SpeedOfLight</Property> - </Record> - <Record id="111"> - <Version id="7a52266b776ad26c471e21ed05d0d2e397526d27" head="true"/> - <Parent id="100" name="Person"/> - <Property id="101" name="first_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="102" name="last_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorD</Property> - </Record> - <Record id="112"> - <Version id="7753ede1808e20fc958f0a113dd1ec3fef7fca13" head="true"/> - <Parent id="103" name="Project"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">climate-model-predict</Property> - </Record> - <Record id="113"> - <Version id="3a49980c61ad722f3e827cf822bc37d9246cfe54" head="true"/> - <Parent id="100" name="Person"/> - <Property id="101" name="first_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="102" name="last_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorB</Property> - </Record> - <Record id="114"> - <Version id="f59a362efab99d729c7365af27447579ff676b1a" head="true"/> - <Parent id="100" name="Person"/> - <Property id="101" name="first_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="102" name="last_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorC</Property> - </Record> - <Record id="115"> - <Version id="a4137cf9d2c48991aa51abba59dffdabfeb366d7" head="true"/> - <Parent id="100" name="Person"/> - <Property id="101" name="first_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="102" name="last_name" datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorE</Property> - </Record> - <Record id="116"> - <Version id="790f5e3b165816448cd7221f1170f91781da9d25" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-04</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">average-all-exp</Property> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">110</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>109</Value> + <Record id="281"> + <Version id="291faf0ae67b0437d5ab8dd0c6c60cf43c8cc027" head="true"/> + <Parent id="250" name="Project"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">climate-model-predict</Property> + </Record> + <Record id="282"> + <Version id="59f41d5ebba6f6d7c881452386c3bd76e03a6871" head="true"/> + <Parent id="259" name="Person"/> + <Property id="261" name="first_name" description="First name of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="262" name="last_name" description="LastName of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorE</Property> + </Record> + <Record id="283"> + <Version id="58c553e40002e184c32ea062993701237fc21934" head="true"/> + <Parent id="259" name="Person"/> + <Property id="261" name="first_name" description="First name of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="262" name="last_name" description="LastName of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorD</Property> + </Record> + <Record id="284" description="Average temperatures of the years 2000-2009 as obtained from wheatherdata.example"> + <Version id="f9dbd861ccffff0c9a08df41a82ca60a374a92bb" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2000-01-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>283</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average over all data of each type of experiment separately and comined.</Property> - </Record> - <Record id="118"> - <Version id="520e3e1ab113002c26051652e169f4e2c3273cea" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-05</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">average-all-exp-corr</Property> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">110</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>109</Value> + </Record> + <Record id="285" description="Average temperatures of the years 1990-1999 as obtained from wheatherdata.example"> + <Version id="561a29c3b200f47a0c8cd1d43b3430f9ae4bbbb4" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">1990-01-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>283</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average over all data of each type of experiment separately and comined.</Property> - </Record> - <Record id="119"> - <Version id="3383403dc3080d1b566b3b39063d61c110260cb0" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-02-08</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">prediction-errors</Property> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>111</Value> + </Record> + <Record id="286" description="Average temperatures of the years 1980-1989 as obtained from wheatherdata.example"> + <Version id="8ec5f56b96a0e60130f909ab6b4a035f1579e856" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">1980-01-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>283</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">comparison between predicted and measured temperatures for 2010 to 2019</Property> - </Record> - <Record id="120"> - <Version id="5da5ef24abcdec5be0f24fb9977f4c574d463768" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">110</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>109</Value> - <Value>113</Value> + </Record> + <Record id="287"> + <Version id="b967d4ba9a333fd37b723d2b4c6f7e18ee0d41e3" head="true"/> + <Parent id="250" name="Project"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">SpeedOfLight</Property> + </Record> + <Record id="288"> + <Version id="18a8c4200597bf745391829c6cb9c04c747264fb" head="true"/> + <Parent id="259" name="Person"/> + <Property id="261" name="first_name" description="First name of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="262" name="last_name" description="LastName of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorB</Property> + </Record> + <Record id="289"> + <Version id="799b41948bde740f37e202a5bab70e3d8829b3f6" head="true"/> + <Parent id="259" name="Person"/> + <Property id="261" name="first_name" description="First name of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="262" name="last_name" description="LastName of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorA</Property> + </Record> + <Record id="290"> + <Version id="905f204d9bdc58890b59367338be038383f4dcf9" head="true"/> + <Parent id="259" name="Person"/> + <Property id="261" name="first_name" description="First name of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="262" name="last_name" description="LastName of a Person." datatype="TEXT" importance="FIX" flag="inheritance:FIX">AuthorC</Property> + </Record> + <Record id="291" description="Time-of-flight measurements to determine the speed of light"> + <Version id="2d2f795a165fe1401ed0270f5b0bee9e6781e2c9" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>289</Value> + <Value>288</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Time-of-flight measurements to determine the speed of light</Property> - </Record> - <Record id="121"> - <Version id="7efa963b3663818aee4f9438cbbe26f8565af1e9" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-02</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX">Cavity</Property> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">110</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>109</Value> - <Value>114</Value> + </Record> + <Record id="292" description="comparison between predicted and measured temperatures for 2010 to 2019"> + <Version id="454be377ae35e44d89b7d28fc44d518b7e9321a3" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-02-08</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">prediction-errors</Property> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>283</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Cavity resonance measurements for determining the speed of light</Property> - </Record> - <Record id="122"> - <Version id="244cda0358bc6937ee0944a86a0e0580c7b41781" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-03</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">110</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>109</Value> - <Value>113</Value> + </Record> + <Record id="293" description="Average over all data of each type of experiment separately and comined."> + <Version id="12f3cd8eb6ba7a264ecc2d296c6e8d3a9f7ffc95" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-05</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">average-all-exp-corr</Property> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>289</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Radio interferometry measurements to determine the speed of light</Property> - </Record> - <Record id="123"> - <Version id="656558b1a926824b98a733c1fdd9d0444c75745f" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">1980-01-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>111</Value> + </Record> + <Record id="294" description="Average over all data of each type of experiment separately and comined."> + <Version id="4b513be5a2dbad332a3442eabe45ac7b1eae3b22" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-04</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">average-all-exp</Property> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>289</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average temperatures of the years 1980-1989 as obtained from wheatherdata.example</Property> - </Record> - <Record id="124"> - <Version id="971900b83171b89787aa26a0a7b543d096ee1ebc" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">1990-01-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>111</Value> + </Record> + <Record id="295" description="Code for fitting the predictive model to the training data and for predicting the average annual temperature for all measurement stations for the years 2010 to 2019"> + <Version id="e08fb3f41d0d2ab505f68795d4ee85c8235ef794" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-02-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>282</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average temperatures of the years 1990-1999 as obtained from wheatherdata.example</Property> - </Record> - <Record id="125"> - <Version id="deeb78862ae1ccdc831532e51bd225976af67807" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2000-01-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>111</Value> + </Record> + <Record id="296" description="Average temperatures of the years 2010-2019 as obtained from wheatherdata.example"> + <Version id="81b7dae68df569f9fbf65e75448446093f816ab1" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2010-01-01</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">281</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>283</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average temperatures of the years 2000-2009 as obtained from wheatherdata.example</Property> - </Record> - <Record id="126"> - <Version id="133832dd2b528ab4a83a189dc8fca9d1d7c541fa" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2010-01-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>111</Value> + </Record> + <Record id="297" description="Radio interferometry measurements to determine the speed of light"> + <Version id="f3553ee9660b43b6a7598614de8eb17f40cf9782" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-03</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>289</Value> + <Value>288</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Average temperatures of the years 2010-2019 as obtained from wheatherdata.example</Property> - </Record> - <Record id="127"> - <Version id="865aebd7662341f8582b4f0b32bd96fd986eb815" head="true"/> - <Parent id="106" name="Measurement"/> - <Property id="104" name="date" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-02-01</Property> - <Property id="105" name="identifier" datatype="TEXT" importance="FIX" flag="inheritance:FIX"/> - <Property id="103" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">112</Property> - <Property id="107" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> - <Value>115</Value> + </Record> + <Record id="298" description="Cavity resonance measurements for determining the speed of light"> + <Version id="06ddcf6f8a8c30761912c3752139acc3f6c610eb" head="true"/> + <Parent id="278" name="Measurement"/> + <Property id="247" name="date" description="date of the experiment" datatype="DATETIME" importance="FIX" flag="inheritance:FIX">2020-01-02</Property> + <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">Cavity</Property> + <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> + <Property id="249" name="responsible" datatype="LIST<Person>" importance="FIX" flag="inheritance:FIX"> + <Value>289</Value> + <Value>290</Value> </Property> - <Property id="24" name="description" description="Description of an entity." datatype="TEXT" importance="FIX" flag="inheritance:FIX">Code for fitting the predictive model to the training data and for predicting the average annual temperature for all measurement stations for the years 2010 to 2019</Property> </Record> </Entities> diff --git a/unittests/scifolder_cfood.yml b/unittests/scifolder_cfood.yml index 1dd01de855725a7d4c719437dd2e805012e3f29a..1fd7c98d57b35fa651e36bee2c529a46e3a96cde 100644 --- a/unittests/scifolder_cfood.yml +++ b/unittests/scifolder_cfood.yml @@ -1,79 +1,82 @@ +# This is only a scifolder test cfood with a limited functionality. +# The full scifolder cfood will be developed here: +# https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood + Definitions: type: Definitions #include "description.yml" -# Converter-Provenance -# DataAnalysis/project_dir/measurement/match/identifier -# Structure-Element-Provenance -# DataAnalysis/2020_SpeedOflight/2020-11-10_kram - -DataAnalysis: # name of the converter +Data: # name of the converter type: Directory - match: DataAnalysis - subtree: &template - project_dir: # name of the first subtree element which is a converter + match: (.*) + subtree: + DataAnalysis: # name of the converter type: Directory - match: (?P<date>.*?)_(?P<identifier>.*) - records: - Project: # this is an identifiable in this case - parents: - - Project # not needed as the name is equivalent - date: $date - identifier: $identifier - - subtree: - measurement: # new name for folders on the 3rd level + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter type: Directory - match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + match: (?P<date>.*?)_(?P<identifier>.*) records: - Measurement: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent date: $date identifier: $identifier - project: $Project + subtree: - README: - type: MarkdownFile # this is a subclass of converter File - # function signature: GeneralStore, StructureElement - # preprocessors: custom.caosdb.convert_values - match: README\.md - # how to make match case insensitive? + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: + Measurement: + date: $date + identifier: $identifier + project: $Project subtree: - description: - type: DictTextElement - match_value: (?P<description>.*) - match_name: description - records: - Measurement: - description: $description - responsible_single: - type: DictTextElement - match_name: responsible - match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) - records: &responsible_records - Person: - first_name: $first_name - last_name: $last_name - Measurement: # this uses the reference to the above defined record - responsible: +$Person # each record also implicitely creates a variable - # with the same name. The "+" indicates, that - # this will become a list entry in list property - # "responsible" belonging to Measurement. - - responsible_list: - type: DictListElement - match_name: responsible + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? subtree: - Person: - type: TextElement - match: *person_regexp - records: *responsible_records + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. -ExperimentalData: # name of the converter - type: Directory - match: ExperimentalData - subtree: *template + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records -SimulationData: # name of the converter - type: Directory - match: SimulationData - subtree: *template + ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + + SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template diff --git a/unittests/scifolder_extended.yml b/unittests/scifolder_extended.yml new file mode 100644 index 0000000000000000000000000000000000000000..2a1416b778e96ba57fc216d9763572568703ab75 --- /dev/null +++ b/unittests/scifolder_extended.yml @@ -0,0 +1,103 @@ +# This is only a scifolder test cfood with a limited functionality. +# The full scifolder cfood will be developed here: +# https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood + +Definitions: + type: Definitions + #include "description.yml" + +Data: # name of the converter + type: Directory + match: (.*) + subtree: + DataAnalysis: # name of the converter + type: Directory + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter + type: Directory + match: (?P<date>.*?)_(?P<identifier>.*) + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + date: $date + identifier: $identifier + + subtree: + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: + Measurement: + date: $date + identifier: $identifier + project: $Project + subtree: + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? + records: # this block is very verbose and intended to make sure that this + # file is inserted correctly (and can be supplemented with properties + # and / or parents), TODO: maybe there should be a shorthand + ReadmeFile: + parents: [] + role: File + path: $README + file: $README # this is automatically the relative path + # starting from the top level structure element + # of this element + Measurement: + ReadmeFile: $ReadmeFile + + subtree: + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. + + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records + + # sources_list: + # type: DictListElement + # match_name: sources + # subtree: + # Source: + # type: TextElement + # match: &path ... ??? + + ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + + SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template diff --git a/unittests/scifolder_extended2.yml b/unittests/scifolder_extended2.yml new file mode 100644 index 0000000000000000000000000000000000000000..f1dfc2d4635b6956930343685c7b17ca4f2f1679 --- /dev/null +++ b/unittests/scifolder_extended2.yml @@ -0,0 +1,100 @@ +# This is only a scifolder test cfood with a limited functionality. +# The full scifolder cfood will be developed here: +# https://gitlab.indiscale.com/caosdb/src/crawler-cfoods/scifolder-cfood + +Definitions: + type: Definitions + #include "description.yml" + +DataAnalysis: # name of the converter + type: Directory + match: DataAnalysis + subtree: &template + project_dir: # name of the first subtree element which is a converter + type: Directory + match: (?P<date>.*?)_(?P<identifier>.*) + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + date: $date + identifier: $identifier + + subtree: + measurement: # new name for folders on the 3rd level + type: Directory + match: (?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2})(_(?P<identifier>.*))? + records: + Measurement: + date: $date + identifier: $identifier + project: $Project + subtree: + README: + type: MarkdownFile # this is a subclass of converter File + # function signature: GeneralStore, StructureElement + # preprocessors: custom.caosdb.convert_values + match: ^README\.md$ + # how to make match case insensitive? + records: # this block is very verbose and intended to make sure that this + # file is inserted correctly (and can be supplemented with properties + # and / or parents), TODO: maybe there should be a shorthand + ReadmeFile: + parents: + - ProjectMarkdownReadme + role: File + path: $README + file: $README # this is automatically the relative path + # starting from the top level structure element + # of this element + Measurement: + ReadmeFile: $ReadmeFile + + subtree: + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + records: + Measurement: + description: $description + responsible_single: + type: DictTextElement + match_name: responsible + match_value: &person_regexp ((?P<first_name>.+) )?(?P<last_name>.+) + records: &responsible_records + Person: + first_name: $first_name + last_name: $last_name + Measurement: # this uses the reference to the above defined record + responsible: +$Person # each record also implicitely creates a variable + # with the same name. The "+" indicates, that + # this will become a list entry in list property + # "responsible" belonging to Measurement. + + responsible_list: + type: DictListElement + match_name: responsible + subtree: + Person: + type: TextElement + match: *person_regexp + records: *responsible_records + + # sources_list: + # type: DictListElement + # match_name: sources + # subtree: + # Source: + # type: TextElement + # match: &path ... ??? + +ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + subtree: *template + +SimulationData: # name of the converter + type: Directory + match: SimulationData + subtree: *template diff --git a/unittests/test_cache.py b/unittests/test_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..135316b92fda0ac1e43f4e5f2c4f28fbf1272494 --- /dev/null +++ b/unittests/test_cache.py @@ -0,0 +1,56 @@ +#!/bin/python +# Tests for entity comparison +# A. Schlemmer, 06/2021 + +import caosdb as db +from pytest import raises + +from caoscrawler.identified_cache import _create_hashable_string as create_hash_string + + +def test_normal_hash_creation(): + # Test the initial functionality: + # hash comprises only one parent, name and properties: + + r1 = db.Record() + r1.add_property(name="test") + r1.add_parent("bla") + hash1 = create_hash_string(r1) + + r2 = db.Record() + r2.add_property(name="test2") + r2.add_parent("bla") + hash2 = create_hash_string(r2) + + assert hash1 != hash2 + + r3 = db.Record() + r3.add_property(name="test") + r3.add_parent("bla bla") + hash3 = create_hash_string(r3) + assert hash1 != hash3 + assert hash2 != hash3 + + # no name and no properties and no parents: + r4 = db.Record() + with raises(RuntimeError, match=".*1 parent.*"): + create_hash_string(r4) + + # should work + r4.add_parent("bla") + assert len(create_hash_string(r4)) > 0 + r4.add_property(name="test") + assert len(create_hash_string(r4)) > 0 + + r4.add_parent("bla bla") + with raises(RuntimeError, match=".*1 parent.*"): + create_hash_string(r4) + + +def test_file_hash_creation(): + f1 = db.File(path="/bla/bla/test1.txt") + hash1 = create_hash_string(f1) + f2 = db.File(path="/bla/bla/test2.txt") + hash2 = create_hash_string(f2) + + assert hash1 != hash2 diff --git a/unittests/test_converters.py b/unittests/test_converters.py index b9a730c226bb25fd97b23cd39f61d2237758de91..ec45707fd97e9ab6666843b77e5c86b9a7d98531 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -27,16 +27,59 @@ test the converters module """ -from newcrawler.converters import Converter -from newcrawler.stores import GeneralStore -from newcrawler.converters import MarkdownFileConverter -from newcrawler.structure_elements import Directory -from newcrawler.structure_elements import File, DictTextElement, DictListElement +from caoscrawler.converters import Converter +from caoscrawler.stores import GeneralStore +from caoscrawler.converters import (ConverterValidationError, + MarkdownFileConverter, JSONFileConverter, + DictConverter) +from caoscrawler.structure_elements import Directory +from caoscrawler.structure_elements import (File, DictTextElement, + DictListElement, DictElement, + DictBooleanElement, DictDictElement, + DictIntegerElement, DictFloatElement) + +from caoscrawler.converters import handle_value from test_tool import rfp +import pytest +import os +import importlib + + +@pytest.fixture +def converter_registry(): + converter_registry: dict[str, dict[str, str]] = { + "Directory": { + "converter": "DirectoryConverter", + "package": "caoscrawler.converters"}, + "MarkdownFile": { + "converter": "MarkdownFileConverter", + "package": "caoscrawler.converters"}, + "Dict": { + "converter": "DictConverter", + "package": "caoscrawler.converters"}, + "DictTextElement": { + "converter": "DictTextElementConverter", + "package": "caoscrawler.converters"}, + "DictListElement": { + "converter": "DictListElementConverter", + "package": "caoscrawler.converters"}, + "TextElement": { + "converter": "TextElementConverter", + "package": "caoscrawler.converters"}, + "JSONFile": { + "converter": "JSONFileConverter", + "package": "caoscrawler.converters"}, + } + + for key, value in converter_registry.items(): + module = importlib.import_module(value["package"]) + value["class"] = getattr(module, value["converter"]) + return converter_registry -def testConverterTrivial(): + +def testConverterTrivial(converter_registry): types = [ "Directory", @@ -50,33 +93,45 @@ def testConverterTrivial(): Converter.converter_factory( definition={ "type": ct}, - name="Test") + name="Test", + converter_registry=converter_registry) -def testDirectoryConverter(): +def testDirectoryConverter(converter_registry): """ test using the "test_directories" folder""" dc = Converter.converter_factory( definition={ "type": "Directory" }, - name="Test") + name="Test", converter_registry=converter_registry) elements = dc.create_children(GeneralStore(), Directory("test_directories", rfp("test_directories"))) # Check whether the right structure elements were created - assert len(elements) == 1 - assert isinstance(elements[0], Directory) - assert elements[0].name == "examples_article" + # this has been updated, there are more directories now + # assert len(elements) == 1 + element_names = [] + for element in elements: + assert isinstance(element, Directory) + element_names.append(element.name) + assert "examples_article" in element_names + assert "example_overwrite_1" in element_names + assert "example_insert" in element_names -def test_markdown_converter(): - test_readme = File("README.md", rfp( - "test_directories", "examples_article", "DataAnalysis", - "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")) +def test_markdown_converter(converter_registry): + test_readme = File( + "README.md", + rfp( + "test_directories", "examples_article", "DataAnalysis", + "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md" + ) + ) converter = MarkdownFileConverter({ "match": "(.*)" - }, "TestMarkdownFileConverter") + }, "TestMarkdownFileConverter", + converter_registry) m = converter.match(File("test_tool.py", rfp( "test_tool.py"))) @@ -89,7 +144,8 @@ def test_markdown_converter(): converter = MarkdownFileConverter({ "match": "README.md" - }, "TestMarkdownFileConverter") + }, "TestMarkdownFileConverter", + converter_registry) m = converter.match(test_readme) assert m is not None @@ -105,8 +161,11 @@ def test_markdown_converter(): assert children[0].name == "responsible" assert children[0].value.__class__ == str - test_readme2 = File("README.md", rfp("test_directories", "examples_article", - "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")) + test_readme2 = File( + "README.md", + rfp("test_directories", "examples_article", + "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md") + ) m = converter.match(test_readme2) assert m is not None @@ -121,3 +180,99 @@ def test_markdown_converter(): assert children[0].__class__ == DictListElement assert children[0].name == "responsible" assert children[0].value.__class__ == list + + +def test_json_converter(converter_registry): + test_json = File("testjson.json", rfp( + "test_directories", "examples_json", "testjson.json")) + + schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "test_directories", "examples_json", "testjson.schema.json") + jsonconverter = JSONFileConverter( + definition={"match": "(.*)", "validate": schema_path}, + name="TestJSONFileConverter", + converter_registry=converter_registry) + + m = jsonconverter.match(test_json) + assert m is not None + assert len(m) == 0 + + children = jsonconverter.create_children(None, test_json) + assert len(children) == 8 + assert children[0].__class__ == DictTextElement + assert children[0].name == "name" + assert children[0].value.__class__ == str + assert children[0].value == "DEMO" + + assert children[1].__class__ == DictIntegerElement + assert children[1].name == "projectId" + assert children[1].value.__class__ == int + assert children[1].value == 10002 + + assert children[2].__class__ == DictBooleanElement + assert children[2].name == "archived" + assert children[2].value.__class__ == bool + + assert children[3].__class__ == DictListElement + assert children[3].name == "Person" + assert children[3].value.__class__ == list + assert len(children[3].value) == 2 + + assert children[4].__class__ == DictTextElement + assert children[4].name == "start_date" + assert children[4].value.__class__ == str + + assert children[5].__class__ == DictListElement + assert children[5].name == "candidates" + assert children[5].value.__class__ == list + assert children[5].value == ["Mouse", "Penguine"] + + assert children[6].__class__ == DictFloatElement + assert children[6].name == "rvalue" + assert children[6].value.__class__ == float + + assert children[7].__class__ == DictTextElement + assert children[7].name == "url" + assert children[7].value.__class__ == str + + broken_json = File( + "brokenjson.json", + rfp("test_directories", "examples_json", "brokenjson.json") + ) + m = jsonconverter.match(broken_json) + + # Doesn't validate because of missing required 'name' property + with pytest.raises(ConverterValidationError) as err: + children = jsonconverter.create_children(None, broken_json) + + assert err.value.message.startswith("Couldn't validate") + + +def test_variable_replacement(): + values = GeneralStore() + values["a"] = 4 + values["b"] = "68" + + assert handle_value("b", values) == ("b", "single") + assert handle_value("+b", values) == ("b", "list") + assert handle_value("*b", values) == ("b", "multiproperty") + assert handle_value("$b", values) == ("68", "single") + assert handle_value("+$b", values) == ("68", "list") + assert handle_value("*$b", values) == ("68", "multiproperty") + + assert handle_value({"value": "b", + "collection_mode": "single"}, values) == ("b", "single") + assert handle_value({"value": "b", + "collection_mode": "list"}, values) == ("b", "list") + assert handle_value({"value": "b", + "collection_mode": "multiproperty"}, values) == ("b", "multiproperty") + assert handle_value({"value": "$b", + "collection_mode": "single"}, values) == ("68", "single") + assert handle_value({"value": "$b", + "collection_mode": "list"}, values) == ("68", "list") + assert handle_value({"value": "$b", + "collection_mode": "multiproperty"}, values) == ("68", "multiproperty") + + assert handle_value(["a", "b"], values) == (["a", "b"], "single") + assert handle_value(["$a", "$b"], values) == (["4", "68"], "single") + diff --git a/unittests/test_directories/example_insert/SimulationData/2020_climate-model-predict/2022-01-14/README.md b/unittests/test_directories/example_insert/SimulationData/2020_climate-model-predict/2022-01-14/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cb9437486636054377ac3020445345268c12fe1d --- /dev/null +++ b/unittests/test_directories/example_insert/SimulationData/2020_climate-model-predict/2022-01-14/README.md @@ -0,0 +1,26 @@ +--- +responsible: AuthorE +description: > + Code for fitting the predictive model to the + training data and for predicting the average + annual temperature for all measurement stations + for the years 2010 to 2019. + This is a second run of the same simulation from 2020-02-01 to check replication. +sources: +- ../../../ExperimentalData/2020_climate-model-predict/1980-01-01/temperatures-*.csv +- ../../../ExperimentalData/2020_climate-model-predict/1990-01-01/temperatures-*.csv +- ../../../ExperimentalData/2020_climate-model-predict/2000-01-01/temperatures-*.csv +- ../2020-02-01/ +results: +- file: params.json + description: Model parameters for the best fit to the training set +- file: predictions-201*.csv + description: Annual temperature predictions with geographical locations +scripts: +- file: model.py + description: python module with the model equations +- file: fit_parameters.py + description: Fit model parameters to training data using a basinhopping optimizer +- file: predict.py + description: Use optimized parameters to simulate average temperatures from 2010 to 2019 +... diff --git a/unittests/test_directories/example_overwrite_1/SimulationData/2020_climate-model-predict/2022-01-14/README.md b/unittests/test_directories/example_overwrite_1/SimulationData/2020_climate-model-predict/2022-01-14/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e39fef19e2aa8e131e4c57697f583520b8e42be7 --- /dev/null +++ b/unittests/test_directories/example_overwrite_1/SimulationData/2020_climate-model-predict/2022-01-14/README.md @@ -0,0 +1,26 @@ +--- +responsible: AuthorE +description: > + Code for fitting the predictive model to the + training data and for predicting the average + annual temperature for all measurement stations + for the years 2010 to 2019. + This is a second run of the same simulation from 2020-02-01 to check replication. This is a test for an update due to a changed description. +sources: +- ../../../ExperimentalData/2020_climate-model-predict/1980-01-01/temperatures-*.csv +- ../../../ExperimentalData/2020_climate-model-predict/1990-01-01/temperatures-*.csv +- ../../../ExperimentalData/2020_climate-model-predict/2000-01-01/temperatures-*.csv +- ../2020-02-01/ +results: +- file: params.json + description: Model parameters for the best fit to the training set. +- file: predictions-201*.csv + description: Annual temperature predictions with geographical locations +scripts: +- file: model.py + description: python module with the model equations +- file: fit_parameters.py + description: Fit model parameters to training data using a basinhopping optimizer +- file: predict.py + description: Use optimized parameters to simulate average temperatures from 2010 to 2019 +... diff --git a/unittests/test_directories/example_substitutions/ExperimentalData/220512_data.dat b/unittests/test_directories/example_substitutions/ExperimentalData/220512_data.dat new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/unittests/test_directories/example_substitutions/substitutions.yml b/unittests/test_directories/example_substitutions/substitutions.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b4e8784a69d1ad1b80fa757ad77cd137c8cc7b5 --- /dev/null +++ b/unittests/test_directories/example_substitutions/substitutions.yml @@ -0,0 +1,22 @@ + +ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + records: + Project: + name: project + subtree: + File: # name of the converter + type: SimpleFile + match: (?P<year>[0-9]{2,2})(?P<month>[0-9]{2,2})(?P<day>[0-9]{2,2})_data.dat + records: + Experiment: + date: 20$year-$month-$day + + ExperimentSeries: + Experiment: $Experiment + + Project: + Experiments: +$Experiment + dates: +20$year-$month-$day + diff --git a/unittests/test_directories/examples_json/brokenjson.json b/unittests/test_directories/examples_json/brokenjson.json new file mode 100644 index 0000000000000000000000000000000000000000..9c012bf062264014278fc2df7be6cf33b65c7469 --- /dev/null +++ b/unittests/test_directories/examples_json/brokenjson.json @@ -0,0 +1,13 @@ +{ + "projectId": 10002, + "archived": false, + "coordinator": { + "firstname": "Miri", + "lastname": "Mueller", + "email": "miri.mueller@science.de" + }, + "start_date": "2022-03-01", + "candidates": ["Mouse", "Penguine"], + "rvalue": 0.4444, + "url": "https://site.de/index.php/" +} diff --git a/unittests/test_directories/examples_json/jsontest_cfood.yml b/unittests/test_directories/examples_json/jsontest_cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..f1eb6a9fa186c07f551bd12a84050f544abfdabc --- /dev/null +++ b/unittests/test_directories/examples_json/jsontest_cfood.yml @@ -0,0 +1,58 @@ + +JSONTest: # name of the converter + type: JSONFile + match: '(.*)' + validate: ./testjson.schema.json + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + subtree: + name_element: + type: DictTextElement + match_name: "name" + match_value: "(?P<name>.*)" + records: + Project: + name: $name + url_element: # name of the first subtree element which is a converter + type: DictTextElement + match_value: "(?P<url>.*)" + match_name: "url" + records: + Project: + url: $url + persons_element: + type: DictListElement + match_name: "Person" + subtree: + person_element: + type: Dict + records: + Person: + parents: + - Person + Project: + Person: +$Person + subtree: + firstname_element: + type: DictTextElement + match_name: "firstname" + match_value: "(?P<firstname>.*)" + records: + Person: + firstname: $firstname + lastname_element: + type: DictTextElement + match_name: "lastname" + match_value: "(?P<lastname>.*)" + records: + Person: + lastname: $lastname + email_element: + type: DictTextElement + match_name: "email" + match_value: "(?P<email>.*)" + records: + Person: + email: $email diff --git a/unittests/test_directories/examples_json/testjson.json b/unittests/test_directories/examples_json/testjson.json new file mode 100644 index 0000000000000000000000000000000000000000..b893b608a6a2119c5c3252cd9cff4c4100f404da --- /dev/null +++ b/unittests/test_directories/examples_json/testjson.json @@ -0,0 +1,21 @@ +{ + "name": "DEMO", + "projectId": 10002, + "archived": false, + "Person": [ + { + "firstname": "Miri", + "lastname": "Mueller", + "email": "miri.mueller@science.de" + }, + { + "firstname": "Mara", + "lastname": "Mueller", + "email": "mara.mueller@science.de" + } + ], + "start_date": "2022-03-01", + "candidates": ["Mouse", "Penguine"], + "rvalue": 0.4444, + "url": "https://site.de/index.php/" +} diff --git a/unittests/test_directories/examples_json/testjson.schema.json b/unittests/test_directories/examples_json/testjson.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..fc784a61079e4737f1a0176fe4240133f5d1b5d0 --- /dev/null +++ b/unittests/test_directories/examples_json/testjson.schema.json @@ -0,0 +1,60 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "projectId": { + "type": "integer" + }, + "archived": { + "type": "boolean" + }, + "Person": { + "type": "array", + "items": { + "type": "object", + "properties": { + "firstname": { + "type": "string" + }, + "lastname": { + "type": "string" + }, + "email": { + "type": "string" + } + }, + "required": [ + "firstname", + "lastname", + "email" + ], + "additionalProperties": true + } + }, + "start_date": { + "type": "string", + "format": "date" + }, + "candidates": { + "type": "array", + "items": { + "type": "string" + } + }, + "rvalue": { + "type": "number" + }, + "url": { + "type": "string" + } + }, + "required": [ + "name", + "projectId", + "Person" + ], + "additionalProperties": false +} diff --git a/unittests/test_directories/single_file_test_data/identifiables.yml b/unittests/test_directories/single_file_test_data/identifiables.yml new file mode 100644 index 0000000000000000000000000000000000000000..e32746d5a6984096cc46fa618250832b325965b0 --- /dev/null +++ b/unittests/test_directories/single_file_test_data/identifiables.yml @@ -0,0 +1,7 @@ +Person: + - full_name +Keyword: + - name +Project: + - project_id + - title diff --git a/unittests/test_entity_comparison.py b/unittests/test_entity_comparison.py new file mode 100644 index 0000000000000000000000000000000000000000..549bc4f42a59765d25446d44fbb845e49ca4d9b9 --- /dev/null +++ b/unittests/test_entity_comparison.py @@ -0,0 +1,95 @@ +#!/bin/python +# Tests for entity comparison +# A. Schlemmer, 06/2021 + +import caosdb as db + +import pytest +from pytest import raises + +from caoscrawler.crawl import check_identical + + +def test_compare_entities(): + record1 = db.Record() + record2 = db.Record() + + assert check_identical(record1, record2) + + record1.add_property(name="type", value="int") + assert not check_identical(record1, record2) + assert not check_identical(record2, record1) + + record2.add_property(name="type", value="int") + assert check_identical(record1, record2) + record2.get_property("type").value = "int2" + assert not check_identical(record1, record2) + record2.get_property("type").value = 4 + assert not check_identical(record1, record2) + + record2.get_property("type").value = "int" + assert check_identical(record1, record2) + record2.add_parent(db.RecordType(name="Parent")) + assert not check_identical(record1, record2) + record1.add_parent(db.RecordType(name="Parent")) + + # This is confusing, but needed: + record1.add_property(name="field_with_type", value=42, datatype=db.INTEGER) + record2.add_property(name="field_with_type", value=42) + # not identical, because record1 sets the datatype + assert not check_identical(record1, record2) + # identical, because record2 sets the datatype + assert check_identical(record2, record1) + record2.get_property("field_with_type").datatype = db.INTEGER + assert check_identical(record1, record2) + assert check_identical(record2, record1) + + record2.get_property("field_with_type").datatype = db.DOUBLE + assert not check_identical(record1, record2) + assert not check_identical(record2, record1) + + # TODO: report this as a hacky workaround (for setting datatype from double to integer): + record2.get_property("field_with_type").datatype = db.TEXT + record2.get_property("field_with_type").datatype = db.INTEGER + assert check_identical(record1, record2) + assert check_identical(record2, record1) + + record1 = db.File() + record2 = db.File() + + vals = (("bla bla", "bla bla bla"), + (1, 2)) + + for attribute, values in zip(("description", "name", "path", "id"), + (vals[0], vals[0], vals[0], vals[1])): + setattr(record1, attribute, values[0]) + assert not check_identical(record1, record2) + assert not check_identical(record2, record1) + setattr(record2, attribute, values[1]) + assert not check_identical(record1, record2) + assert not check_identical(record2, record1) + + setattr(record2, attribute, values[0]) + assert check_identical(record1, record2) + assert check_identical(record2, record1) + + # currently "file" is not checked by compare_entities + + vals = (("abcd", "bcde"), + (1, 2)) + # This is confusing, but needed: + for attribute, values in zip(("_checksum", "_size"), + (vals[0], vals[1])): + setattr(record1, attribute, values[0]) + # not identical, because record1 sets the datatype + assert not check_identical(record1, record2) + # identical, because record2 sets the datatype + assert check_identical(record2, record1) + + setattr(record2, attribute, values[1]) + assert not check_identical(record1, record2) + assert not check_identical(record2, record1) + + setattr(record2, attribute, values[0]) + assert check_identical(record1, record2) + assert check_identical(record2, record1) diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py new file mode 100644 index 0000000000000000000000000000000000000000..b0b9801993dc68fe473e788b8ca79a2244912676 --- /dev/null +++ b/unittests/test_file_identifiables.py @@ -0,0 +1,74 @@ +#!/bin/python +# Tests for file identifiables +# A. Schlemmer, 06/2021 + +import caosdb as db + +import pytest +from pytest import raises + +from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter + + +def test_file_identifiable(): + ident = LocalStorageIdentifiableAdapter() + file_obj = db.File() + + identifiable = ident.get_identifiable(file_obj) + identifiable2 = ident.get_identifiable_for_file(file_obj) + + # these are two different objects: + assert identifiable != identifiable2 + assert file_obj != identifiable + # ... but the path is equal: + assert identifiable.path == identifiable2.path + # ... and very boring: + assert identifiable.path is None + # Test functionality of retrieving the files: + identified_file = ident.get_file(identifiable) + identified_file2 = ident.get_file(file_obj) + # The both should be None currently as there are no files in the local store yet: + assert identified_file is None + assert identified_file2 is None + + # Let's make it more interesting: + file_obj.path = "/test/bla/bla.txt" + file_obj._checksum = "abcd" + identifiable = ident.get_identifiable(file_obj) + assert file_obj != identifiable + assert file_obj.path == identifiable.path + # Checksum is not part of the identifiable: + assert file_obj.checksum != identifiable.checksum + + # This is the wrong method, so it should definitely return None: + identified_file = ident.retrieve_identified_record_for_identifiable( + identifiable) + assert identified_file is None + # This is the correct method to use: + identified_file = ident.get_file(identifiable) + # or directly using: + identified_file2 = ident.get_file(file_obj) + # The both should be None currently as there are no files in the local store yet: + assert identified_file is None + assert identified_file2 is None + + # Try again with actual files in the store: + records = ident.get_records() + test_record_wrong_path = db.File( + path="/bla/bla/test.txt") + test_record_correct_path = db.File( + path="/test/bla/bla.txt") + test_record_alsocorrect_path = db.File( + path="/test/bla/bla.txt") + records.append(test_record_wrong_path) + identified_file = ident.get_file(file_obj) + assert identified_file is None + + records.append(test_record_correct_path) + identified_file = ident.get_file(file_obj) + assert identified_file is not None + assert identified_file.path == file_obj.path + + with raises(RuntimeError, match=".*unambigiously.*"): + records.append(test_record_alsocorrect_path) + identified_file = ident.get_file(file_obj) diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py index 9730461020c6c582188db58df6524246c0a1042c..ef7998a460c07342d30a3f769fd609c1045a9cca 100644 --- a/unittests/test_identifiable_adapters.py +++ b/unittests/test_identifiable_adapters.py @@ -27,8 +27,10 @@ test identifiable_adapters module """ +import os from datetime import datetime -from newcrawler.identifiable_adapters import IdentifiableAdapter +from caoscrawler.identifiable_adapters import ( + CaosDBIdentifiableAdapter, IdentifiableAdapter) import caosdb as db @@ -52,3 +54,32 @@ def test_create_query_for_identifiable(): assert (query.lower() == "find record b with name='a' and 'c'='c' and 'd'='5' and 'e'='5.5'" " and 'f'='2020-10-10t00:00:00' and 'g'='true' and 'h'='1111' and 'i'='1112' and " "'j'='2222' and 'j'='3333' ") + + # The name can be the only identifiable + query = IdentifiableAdapter.create_query_for_identifiable( + db.Record(name="TestRecord").add_parent("TestType")) + assert query.lower() == "find record testtype with name='testrecord'" + + +def test_load_from_yaml_file(): + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_definition( + os.path.join(os.path.dirname(__file__), "test_directories", + "single_file_test_data", "identifiables.yml") + ) + + person_i = ident.get_registered_identifiable( + db.Record().add_parent("Person")) + assert person_i is not None + assert person_i.get_property("full_name") is not None + + keyword_i = ident.get_registered_identifiable( + db.Record().add_parent("Keyword")) + assert keyword_i is not None + assert keyword_i.get_property("name") is not None + + project_i = ident.get_registered_identifiable( + db.Record().add_parent("Project")) + assert project_i is not None + assert project_i.get_property("project_id") is not None + assert project_i.get_property("title") is not None diff --git a/unittests/test_identified_cache.py b/unittests/test_identified_cache.py index 9a1034634692e3d55935d31e2b3923d874f3f673..33add97d4309d87705144ec5331366d0bcd05541 100644 --- a/unittests/test_identified_cache.py +++ b/unittests/test_identified_cache.py @@ -27,12 +27,13 @@ test identified_cache module """ -from newcrawler.identified_cache import _create_hashable_string, IdentifiedCache +from caoscrawler.identified_cache import _create_hashable_string, IdentifiedCache import caosdb as db def test_create_hash(): - assert _create_hashable_string(db.Record("A").add_parent("B")) == "P<B>N<A>" + assert _create_hashable_string( + db.Record("A").add_parent("B")) == "P<B>N<A>" assert _create_hashable_string(db.Record("A") .add_parent("B").add_property('a', 5)) == "P<B>N<A>a:5" assert (_create_hashable_string( diff --git a/unittests/test_json.py b/unittests/test_json.py new file mode 100644 index 0000000000000000000000000000000000000000..97d9831de20a2b9f712294d1a0f6322789580f30 --- /dev/null +++ b/unittests/test_json.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" +import json +import os + +from pytest import raises + +import caosdb as db + +from caoscrawler.converters import JSONFileConverter, DictConverter +from caoscrawler.crawl import Crawler +from caoscrawler.structure_elements import File, JSONFile +from test_tool import rfp, dircheckstr + + +def test_json(): + crawler_definition_path = rfp("test_directories", "examples_json", + "jsontest_cfood.yml") + json_file_path = rfp("test_directories", "examples_json", "testjson.json") + + crawler = Crawler(debug=True) + crawler_definition = crawler.load_definition(crawler_definition_path) + # Load and register converter packages: + converter_registry = crawler.load_converters(crawler_definition) + + records = crawler.start_crawling( + JSONFile(os.path.basename(json_file_path), json_file_path), + crawler_definition, + converter_registry + ) + + rec = [r for r in records if r.name == "DEMO"] + assert len(rec) == 1 + rec = rec[0] + assert len(rec.parents) == 1 + assert rec.parents[0].name == "Project" + assert rec.get_property("url") is not None + assert rec.get_property("url").value == "https://site.de/index.php/" + assert rec.get_property("Person") is not None + assert isinstance(rec.get_property("Person").value, list) + assert len(rec.get_property("Person").value) == 2 + + +def test_broken_validation(): + crawler_definition_path = rfp( + "broken_cfoods", "broken_validation_path.yml") + crawler = Crawler() + with raises(FileNotFoundError) as err: + crawler_definition = crawler.load_definition(crawler_definition_path) + + assert str(err.value).startswith("Couldn't find validation file") diff --git a/unittests/test_schema.py b/unittests/test_schema.py new file mode 100644 index 0000000000000000000000000000000000000000..0736698eb32146fb3cfbee6acbcf11f5436df27e --- /dev/null +++ b/unittests/test_schema.py @@ -0,0 +1,31 @@ +#!/bin/python +# Tests for schema validation +# A. Schlemmer, 06/2021 + +from importlib_resources import files +import caosdb as db + +from os.path import join, dirname +from caoscrawler import Crawler + +import pytest +from pytest import raises + +from jsonschema.exceptions import ValidationError + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return join(dirname(__file__), *pathcomponents) + + +def test_schema_validation(): + cr = Crawler() + cr.load_definition(rfp("scifolder_cfood.yml")) + cr.load_definition(rfp("scifolder_extended.yml")) + + with raises(ValidationError, match=".*enum.*"): + cr.load_definition(rfp("broken_cfoods", "broken1.yml")) diff --git a/unittests/test_tool.py b/unittests/test_tool.py index e26b0b0ce6a8b22121ee29f1a1c021a025644ac9..5fc0ea9de7bf8a99740c69aca947f233d086e4c7 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -3,9 +3,9 @@ # Adapted from check-sfs # A. Schlemmer, 06/2021 -from newcrawler import Crawler -from newcrawler.structure_elements import File, DictTextElement, DictListElement -from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter +from caoscrawler import Crawler +from caoscrawler.structure_elements import File, DictTextElement, DictListElement +from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter from functools import partial from copy import deepcopy from unittest.mock import MagicMock, Mock @@ -37,7 +37,7 @@ def dircheckstr(*pathcomponents): """ Return the debug tree identifier for a given path. """ - return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) + return "caoscrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents) @pytest.fixture @@ -81,13 +81,20 @@ def test_record_structure_generation(crawler): subd = crawler.debug_tree[dircheckstr("DataAnalysis")] subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")] assert len(subd) == 2 - assert len(subd[0]) == 0 + # variables store on Data Analysis node of debug tree + assert len(subd[0]) == 2 + # record store on Data Analysis node of debug tree assert len(subd[1]) == 0 assert len(subc) == 2 - assert len(subc[0]) == 0 + assert len(subc[0]) == 2 assert len(subc[1]) == 0 - subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")] + # The data analysis node creates one variable for the node itself: + assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" + assert subc[0]["DataAnalysis"] == False + + subd = crawler.debug_tree[dircheckstr( + "DataAnalysis", "2020_climate-model-predict")] subc = crawler.debug_metadata["copied"][dircheckstr( "DataAnalysis", "2020_climate-model-predict")] @@ -95,15 +102,21 @@ def test_record_structure_generation(crawler): assert len(subd[1]["Project"].get_parents()) == 1 assert subd[1]["Project"].get_parents()[0].name == "Project" assert subd[1]["Project"].get_property("date").value == "2020" - assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict" + assert subd[1]["Project"].get_property( + "identifier").value == "climate-model-predict" - assert len(subd[0]) == 3 + assert len(subd[0]) == 6 assert subd[0]["date"] == "2020" assert subd[0]["identifier"] == "climate-model-predict" assert subd[0]["Project"].__class__ == db.Record + assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" + assert subc[0]["DataAnalysis"] == True + assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict" + assert subc[0]["project_dir"] == False + # Check the copy flags for the first level in the hierarchy: - assert len(subc[0]) == 3 + assert len(subc[0]) == 6 assert len(subc[1]) == 1 assert subc[1]["Project"] is False assert subc[0]["Project"] is False @@ -116,7 +129,7 @@ def test_record_structure_generation(crawler): subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict", "2020-02-08_prediction-errors")] - assert len(subd[0]) == 4 + assert len(subd[0]) == 8 assert subd[0]["date"] == "2020-02-08" assert subd[0]["identifier"] == "prediction-errors" assert subd[0]["Project"].__class__ == db.Record @@ -127,15 +140,19 @@ def test_record_structure_generation(crawler): assert len(subd[1]["Project"].get_parents()) == 1 assert subd[1]["Project"].get_parents()[0].name == "Project" assert subd[1]["Project"].get_property("date").value == "2020" - assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict" + assert subd[1]["Project"].get_property( + "identifier").value == "climate-model-predict" assert len(subd[1]["Measurement"].get_parents()) == 1 assert subd[1]["Measurement"].get_parents()[0].name == "Measurement" assert subd[1]["Measurement"].get_property("date").value == "2020-02-08" - assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors" + assert subd[1]["Measurement"].get_property( + "identifier").value == "prediction-errors" assert subd[1]["Measurement"].get_property("project").value != "$Project" - assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record - assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"] + assert subd[1]["Measurement"].get_property( + "project").value.__class__ == db.Record + assert subd[1]["Measurement"].get_property( + "project").value == subd[0]["Project"] # Check the copy flags for the second level in the hierarchy: assert subc[1]["Project"] is True @@ -163,24 +180,29 @@ def test_record_structure_generation(crawler): def test_ambigious_records(crawler, ident): ident.get_records().clear() - ident.get_records().extend(crawler.updateList) + ident.get_records().extend(crawler.target_data) r = ident.get_records() id_r0 = ident.get_identifiable(r[0]) with raises(RuntimeError, match=".*unambigiously.*"): - ident.retrieve_identified_record(id_r0) + ident.retrieve_identified_record_for_identifiable(id_r0) def test_crawler_update_list(crawler, ident): - crawler.copy_attributes = Mock() # If the following assertions fail, that is a hint, that the test file records.xml has changed # and this needs to be updated: assert len(ident.get_records()) == 18 - assert len([r for r in ident.get_records() if r.parents[0].name == "Person"]) == 5 - assert len([r for r in ident.get_records() if r.parents[0].name == "Measurement"]) == 11 - assert len([r for r in ident.get_records() if r.parents[0].name == "Project"]) == 2 + assert len( + [r for r in ident.get_records() if r.parents[0].name == "Person"] + ) == 5 + assert len( + [r for r in ident.get_records() if r.parents[0].name == "Measurement"] + ) == 11 + assert len( + [r for r in ident.get_records() if r.parents[0].name == "Project"] + ) == 2 # The crawler contains lots of duplicates, because identifiables have not been resolved yet: - assert len(ident.get_records()) != len(crawler.updateList) + assert len(ident.get_records()) != len(crawler.target_data) # Check consistency: # Check whether identifiables retrieved from current identifiable store return the same results. @@ -193,15 +215,17 @@ def test_crawler_update_list(crawler, ident): id_r0 = ident.get_identifiable(r_cur) assert r_cur.parents[0].name == id_r0.parents[0].name - assert r_cur.get_property("first_name").value == id_r0.get_property("first_name").value - assert r_cur.get_property("last_name").value == id_r0.get_property("last_name").value + assert r_cur.get_property( + "first_name").value == id_r0.get_property("first_name").value + assert r_cur.get_property( + "last_name").value == id_r0.get_property("last_name").value assert len(r_cur.parents) == 1 assert len(id_r0.parents) == 1 assert len(r_cur.properties) == 2 assert len(id_r0.properties) == 2 - idr_r0_test = ident.retrieve_identified_record(id_r0) - idr_r0 = ident.retrieve_identifiable(r_cur) + idr_r0_test = ident.retrieve_identified_record_for_identifiable(id_r0) + idr_r0 = ident.retrieve_identified_record_for_record(r_cur) assert idr_r0 == idr_r0_test # take the first measurement in the list of records: @@ -212,32 +236,35 @@ def test_crawler_update_list(crawler, ident): id_r1 = ident.get_identifiable(r_cur) assert r_cur.parents[0].name == id_r1.parents[0].name - assert r_cur.get_property("identifier").value == id_r1.get_property("identifier").value + assert r_cur.get_property( + "identifier").value == id_r1.get_property("identifier").value assert r_cur.get_property("date").value == id_r1.get_property("date").value - assert r_cur.get_property("project").value == id_r1.get_property("project").value + assert r_cur.get_property( + "project").value == id_r1.get_property("project").value assert len(r_cur.parents) == 1 assert len(id_r1.parents) == 1 - assert len(r_cur.properties) == 5 + assert len(r_cur.properties) == 4 assert len(id_r1.properties) == 3 - idr_r1_test = ident.retrieve_identified_record(id_r1) - idr_r1 = ident.retrieve_identifiable(r_cur) + idr_r1_test = ident.retrieve_identified_record_for_identifiable(id_r1) + idr_r1 = ident.retrieve_identified_record_for_record(r_cur) assert idr_r1 == idr_r1_test assert idr_r1 != idr_r0 assert idr_r1_test != idr_r0_test - assert len(idr_r1.properties) == 5 - assert r_cur.get_property("responsible").value == idr_r1.get_property("responsible").value - assert r_cur.get_property("description").value == idr_r1.get_property("description").value + assert len(idr_r1.properties) == 4 + assert r_cur.get_property( + "responsible").value == idr_r1.get_property("responsible").value + assert r_cur.description == idr_r1.description # test whether compare_entites function works in this context: comp = compare_entities(r_cur, id_r1) assert len(comp[0]["parents"]) == 0 assert len(comp[1]["parents"]) == 0 - assert len(comp[0]["properties"]) == 2 + assert len(comp[0]["properties"]) == 1 assert len(comp[1]["properties"]) == 0 assert "responsible" in comp[0]["properties"] - assert "description" in comp[0]["properties"] + assert "description" in comp[0] comp = compare_entities(r_cur, idr_r1) assert len(comp[0]["parents"]) == 0 @@ -257,6 +284,14 @@ def test_crawler_update_list(crawler, ident): assert len(updl) == 0 +def test_identifiable_adapter(): + query = IdentifiableAdapter.create_query_for_identifiable( + db.Record().add_parent("Person") + .add_property("first_name", value="A") + .add_property("last_name", value="B")) + assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' " + + def test_remove_unnecessary_updates(): # test trvial case upl = [db.Record().add_parent("A")] @@ -266,10 +301,10 @@ def test_remove_unnecessary_updates(): # test property difference case # TODO this should work right? - #upl = [db.Record().add_parent("A").add_property("a", 3)] + # upl = [db.Record().add_parent("A").add_property("a", 3)] # irs = [db.Record().add_parent("A")] # ID should be s - #Crawler.remove_unnecessary_updates(upl, irs) - #assert len(upl) == 1 + # Crawler.remove_unnecessary_updates(upl, irs) + # assert len(upl) == 1 # test value difference case upl = [db.Record().add_parent("A").add_property("a", 5)] @@ -294,13 +329,17 @@ def test_remove_unnecessary_updates(): assert len(upl) == 1 +# Current status: +# TODO: currently, this test fails, because non identifiable records cannot +# be inserted into the cache. Solution might be, just not to add them +# into the local cache. Probably in split_into_inserts_and_updates. @pytest.mark.xfail def test_identifiable_adapter_no_identifiable(crawler, ident): del ident._registered_identifiables["Person"] insl, updl = crawler.synchronize() assert len(updl) == 0 - pers = [r for r in crawler.updateList if r.parents[0].name == "Person"] + pers = [r for r in crawler.target_data if r.parents[0].name == "Person"] # All persons are inserted, because they are not identifiable: assert len(insl) == len(pers) @@ -320,24 +359,24 @@ def test_provenance_debug_data(crawler): assert check_key_count("Person") == 14 +def basic_retrieve_by_name_mock_up(rec, known): + """ returns a stored Record if rec.name is an existing key, None otherwise """ + if rec.name in known: + return known[rec.name] + else: + return None + + @pytest.fixture -def mock_retrieve(crawler): - # simulate remote server content by using the names to identify records - def base_mocked_lookup(rec, known): - if rec.name in known: - return known[rec.name] - else: - return None - - crawler.copy_attributes = Mock() - - # a record that is found remotely and should be added to the update list and one that is not - # found and should be added to the insert one - remote_known = {"A": db.Record(id=1111, name="A")} - crawler.identifiableAdapter.retrieve_identifiable = Mock(side_effect=partial( - base_mocked_lookup, known=remote_known)) - crawler.identifiableAdapter.get_registered_identifiable = ( - lambda x: db.Record().add_parent(x.parents[0].name)) +def crawler_mocked_identifiable_retrieve(crawler): + # mock retrieval of registered identifiabls: return Record with just a parent + crawler.identifiableAdapter.get_registered_identifiable = Mock( + side_effect=lambda x: db.Record().add_parent(x.parents[0].name)) + + # Simulate remote server content by using the names to identify records + # There is only a single known Record with name A + crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial( + basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")})) return crawler @@ -346,27 +385,32 @@ def test_split_into_inserts_and_updates_trivial(crawler): crawler.split_into_inserts_and_updates([]) -def test_split_into_inserts_and_updates_single(mock_retrieve): - crawler = mock_retrieve - - entlist = [db.Record(name="A").add_parent("C"), db.Record(name="B").add_parent("C")] +def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve): + crawler = crawler_mocked_identifiable_retrieve + entlist = [db.Record(name="A").add_parent( + "C"), db.Record(name="B").add_parent("C")] assert crawler.get_identified_record_from_local_cache(entlist[0]) is None assert crawler.get_identified_record_from_local_cache(entlist[1]) is None assert crawler.can_be_checked_externally(entlist[0]) assert crawler.can_be_checked_externally(entlist[1]) - assert crawler.identifiableAdapter.retrieve_identifiable(entlist[0]).id == 1111 - assert crawler.identifiableAdapter.retrieve_identifiable(entlist[1]) is None + assert crawler.identifiableAdapter.retrieve_identified_record_for_record( + entlist[0]).id == 1111 + assert crawler.identifiableAdapter.retrieve_identified_record_for_record( + entlist[1]) is None insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist)) assert len(insert) == 1 assert insert[0].name == "B" assert len(update) == 1 assert update[0].name == "A" + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() + crawler.identifiableAdapter.retrieve_identified_record_for_record.assert_called() -def test_split_into_inserts_and_updates_with_duplicate(mock_retrieve): - crawler = mock_retrieve +def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve): + crawler = crawler_mocked_identifiable_retrieve a = db.Record(name="A").add_parent("C") b = db.Record(name="B").add_parent("C") b.add_property("A", a) @@ -378,10 +422,13 @@ def test_split_into_inserts_and_updates_with_duplicate(mock_retrieve): assert insert[0].name == "B" assert len(update) == 1 assert update[0].name == "A" + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() + crawler.identifiableAdapter.retrieve_identified_record_for_record.assert_called() -def test_split_into_inserts_and_updates_with_ref(mock_retrieve): - crawler = mock_retrieve +def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve): + crawler = crawler_mocked_identifiable_retrieve # try it with a reference a = db.Record(name="A").add_parent("C") b = db.Record(name="B").add_parent("C") @@ -392,25 +439,29 @@ def test_split_into_inserts_and_updates_with_ref(mock_retrieve): assert insert[0].name == "B" assert len(update) == 1 assert update[0].name == "A" + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.retrieve_identified_record_for_record.assert_called() + crawler.identifiableAdapter.get_registered_identifiable.assert_called() -def test_split_into_inserts_and_updates_with_circ(mock_retrieve): +def test_split_into_inserts_and_updates_with_circ(crawler): # try circular - crawler = mock_retrieve a = db.Record(name="A").add_parent("C") b = db.Record(name="B").add_parent("C") b.add_property("A", a) a.add_property("B", b) entlist = [a, b] + # TODO this does not seem to be complete! -def test_split_into_inserts_and_updates_with_complex(mock_retrieve): - crawler = mock_retrieve +def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve): + crawler = crawler_mocked_identifiable_retrieve # A # ^ # | # F <- B <- G - a = db.Record(name="A").add_parent("C").add_property('d', 13).add_property('e', "lskdjlsfdj") + a = db.Record(name="A").add_parent("C").add_property( + 'd', 13).add_property('e', "lskdjlsfdj") b = db.Record(name="B").add_parent("C") g = db.Record(name="G").add_parent("C") f = db.Record(name="F").add_parent("C") @@ -423,13 +474,15 @@ def test_split_into_inserts_and_updates_with_complex(mock_retrieve): assert "B" in [el.name for el in insert] assert len(update) == 1 assert update[0].name == "A" + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() + crawler.identifiableAdapter.retrieve_identified_record_for_record.assert_called() # TODO write test where the unresoled entity is not part of the identifiable -@pytest.mark.xfail -def test_split_into_inserts_and_updates_with_copy_attr(mock_retrieve): - crawler = mock_retrieve +def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve): + crawler = crawler_mocked_identifiable_retrieve # assume identifiable is only the name a = db.Record(name="A").add_parent("C") a.add_property("foo", 1) @@ -438,12 +491,14 @@ def test_split_into_inserts_and_updates_with_copy_attr(mock_retrieve): entlist = [a, b] insert, update = crawler.split_into_inserts_and_updates(entlist) - # expected TODO assert update[0].get_property("bar").value == 2 assert update[0].get_property("foo").value == 1 + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() + crawler.identifiableAdapter.retrieve_identified_record_for_record.assert_called() -def test_all_references_are_existing_already(crawler): +def test_all_references_are_existing_already2(crawler): registered_identifiables = { "C": db.Record().add_parent("C").add_property("a"), "D": db.Record().add_parent("D").add_property("a").add_property("b")} @@ -453,11 +508,34 @@ def test_all_references_are_existing_already(crawler): assert crawler.all_references_are_existing_already( db.Record().add_parent("C").add_property('a', 123)) assert crawler.all_references_are_existing_already(db.Record().add_parent("C") + assert crawler.all_references_are_existing_already(db.Record().add_parent("D") + .add_property('a', 123) + .add_property('b', db.Record(id=123))) + a=db.Record(name="A").add_parent("C").add_property("a", 12311) + assert not crawler.all_references_are_existing_already(db.Record().add_parent("D") + .add_property('a', 123) + .add_property('b', a)) + crawler.add_identified_record_to_local_cache(a) + assert crawler.all_references_are_existing_already(db.Record().add_parent("D") + .add_property('a', 123) + .add_property('b', a)) + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() +def test_all_references_are_existing_already(crawler): + # Simulate remote server content by using the names to identify records + # There are only two known Records with name A and B + crawler.identifiableAdapter.get_registered_identifiable=Mock(side_effect=partial( + basic_retrieve_by_name_mock_up, known={"A": db.Record(name="A").add_parent("C"), + "B": db.Record(name="B").add_parent("C")})) + + assert crawler.all_references_are_existing_already( + db.Record().add_property('a', 123)) + assert crawler.all_references_are_existing_already(db.Record() .add_property('a', db.Record(id=123))) assert crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', db.Record(id=123))) - a = db.Record(name="A").add_parent("C").add_property("a", 12311) + a=db.Record(name="A").add_parent("C").add_property("a", 12311) assert not crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', a)) @@ -465,13 +543,15 @@ def test_all_references_are_existing_already(crawler): assert crawler.all_references_are_existing_already(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', a)) + # if this ever fails, the mock up may be removed + crawler.identifiableAdapter.get_registered_identifiable.assert_called() -def test_can_be_checked_externally(crawler): - registered_identifiables = { +def test_can_be_checked_externally2(crawler): + registered_identifiables={ "C": db.Record().add_parent("C").add_property("a"), "D": db.Record().add_parent("D").add_property("a").add_property("b")} - crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial( + crawler.identifiableAdapter.get_registered_identifiable=Mock(side_effect=partial( basic_ident_lookup, idents=registered_identifiables)) assert crawler.can_be_checked_externally(db.Record().add_parent("C").add_property('a', 123)) assert crawler.can_be_checked_externally(db.Record().add_parent("C") @@ -483,14 +563,26 @@ def test_can_be_checked_externally(crawler): assert not crawler.can_be_checked_externally(db.Record().add_parent("D") .add_property('a', 123) .add_property('b', db.Record())) +def test_can_be_checked_externally(crawler): + assert crawler.can_be_checked_externally( + db.Record().add_property('a', 123)) + assert crawler.can_be_checked_externally(db.Record() + .add_property('a', db.Record(id=123))) + assert crawler.can_be_checked_externally(db.Record().add_parent("D") + .add_property('a', 123) + .add_property('b', db.Record(id=123))) + + assert not crawler.can_be_checked_externally(db.Record().add_parent("D") + .add_property('a', 123) + .add_property('b', db.Record())) -def test_replace_entities_by_ids(crawler): - a = (db.Record().add_parent("B").add_property("A", 12345) +def test_replace_entities_with_ids(crawler): + a=(db.Record().add_parent("B").add_property("A", 12345) .add_property("B", db.Record(id=12345)) .add_property("C", [db.Record(id=12345), 233324])) - crawler.replace_entities_by_ids(a) + crawler.replace_entities_with_ids(a) assert a.get_property("A").value == 12345 assert a.get_property("B").value == 12345 assert a.get_property("C").value == [12345, 233324] diff --git a/unittests/test_tool_extended.py b/unittests/test_tool_extended.py new file mode 100644 index 0000000000000000000000000000000000000000..d0b431a539a15e3e83906540c69becff437742ec --- /dev/null +++ b/unittests/test_tool_extended.py @@ -0,0 +1,78 @@ +#!/bin/python +# Tests for the tool using pytest +# Adapted from check-sfs +# A. Schlemmer, 06/2021 + +from caoscrawler import Crawler +from caoscrawler.structure_elements import File, DictTextElement, DictListElement +from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter +from functools import partial +from copy import deepcopy +from unittest.mock import MagicMock, Mock +from os.path import join, dirname, basename +import yaml +import caosdb as db +from caosdb.apiutils import compare_entities + +import pytest +from pytest import raises + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return join(dirname(__file__), *pathcomponents) + + +def dircheckstr(*pathcomponents, structure_element_type="Directory"): + """ + Return the debug tree identifier for a given path. + """ + return ("caoscrawler.structure_elements." + structure_element_type + ": " + + basename(join(*pathcomponents)) + ", " + + rfp("test_directories", "examples_article", *pathcomponents)) + + +@pytest.fixture +def crawler(): + crawler = Crawler(debug=True) + crawler.crawl_directory(rfp("test_directories", "examples_article"), + rfp("scifolder_extended.yml")) + return crawler + + +# @pytest.fixture +# def ident(crawler): +# ident = LocalStorageIdentifiableAdapter() +# crawler.identifiableAdapter = ident + +# ident.restore_state(rfp("records.xml")) + +# ident.register_identifiable( +# "Person", db.RecordType() +# .add_parent(name="Person") +# .add_property(name="first_name") +# .add_property(name="last_name")) +# ident.register_identifiable( +# "Measurement", db.RecordType() +# .add_parent(name="Measurement") +# .add_property(name="identifier") +# .add_property(name="date") +# .add_property(name="project")) +# ident.register_identifiable( +# "Project", db.RecordType() +# .add_parent(name="Project") +# .add_property(name="date") +# .add_property(name="identifier")) +# return ident + + +def test_file_structure_generation(crawler): + sd = crawler.debug_tree[dircheckstr("SimulationData", + "2020_climate-model-predict", "2020-02-01", + "README.md", structure_element_type="File")] + assert sd[1]["ReadmeFile"].role == "File" + assert len(sd[1]["ReadmeFile"].path) > 0 + assert len(sd[1]["ReadmeFile"].file) > 0 diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py new file mode 100644 index 0000000000000000000000000000000000000000..071bf4646d20e35ed05dafaf5fabf786dc182dcc --- /dev/null +++ b/unittests/test_variable_substitutions.py @@ -0,0 +1,61 @@ +#!/bin/python +# Tests for variable substitutions +# A. Schlemmer, 05/2022 + +from caoscrawler import Crawler +from caoscrawler.structure_elements import File, DictTextElement, DictListElement +from caoscrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter +from functools import partial +from copy import deepcopy +from unittest.mock import MagicMock, Mock +from os.path import join, dirname, basename +import yaml +import caosdb as db +from caosdb.apiutils import compare_entities + +import pytest +from pytest import raises + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return join(dirname(__file__), *pathcomponents) + + +def dircheckstr(element_type, *pathcomponents): + """ + Return the debug tree identifier for a given path. + """ + return "caoscrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents) + + +@pytest.fixture +def crawler(): + crawler = Crawler(debug=True) + crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + rfp("test_directories", "example_substitutions", "substitutions.yml")) + return crawler + + +def test_substitutions(crawler): + # @review Florian Spreckelsen 2022-05-13 + for i in range(2): + subd = crawler.debug_tree[dircheckstr( + "File", "ExperimentalData", "220512_data.dat")] + assert subd[i]["Experiment"].get_property("date").value == "2022-05-12" + assert isinstance(subd[i]["ExperimentSeries"].get_property( + "Experiment").value, db.Record) + + subd = crawler.debug_tree[dircheckstr("Directory", "ExperimentalData")] + assert subd[i]["Project"].name == "project" + assert isinstance(subd[i]["Project"].get_property( + "Experiments").value, list) + assert isinstance(subd[i]["Project"].get_property( + "Experiments").value[0], db.Record) + + assert isinstance(subd[i]["Project"].get_property("dates").value, list) + assert subd[i]["Project"].get_property( + "dates").value[0] == "2022-05-12"