diff --git a/.gitignore b/.gitignore index 39be17dc807dc3e2f5c505dc9341027457403421..06c8c148f1e9f45493f574a11c6789398246defd 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,6 @@ log/ OUTBOX ConsistencyTest.xml testlog/ + +# python +__pycache__ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bb05c265f11d0fcb3a72cee54af5314a5ca4c284..34e93fd70f3252e3dd04b2df14e06a63299d940c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,6 +38,7 @@ test: tags: [ docker ] stage: test script: + - make test_misc - make easy-units - mvn dependency:purge-local-repository - mvn antlr4:antlr4 diff --git a/CHANGELOG.md b/CHANGELOG.md index ed30234ef57ec758fa3dee597087292951c9b182..a74ca5b36a0ca36939cd8e7a74fcbce227162841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Scripting is simplified by adding a `home` directory, of which a copy is created for each called script and set as the `HOME` environment variable. +- [bend_symlinks.sh](misc/bend_symlinks/bend_symlinks.sh) (version 0.1, experimental) + fix broken symlinks in the internal file system. See + [README.md](misc/bend_symlinks/README.md) +- [move_files.py](misc/move_files/move_files.py) (version 0.1, experimental) + Script for moving files (change their path) in the internal file system based + on a two-column tsv file (with columns "from" and "to"). See + [README.md](misc/move_files/README.md). ### Changed @@ -23,7 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - -### Removed ### +### Removed - diff --git a/makefile b/makefile index 53c822deac4faaa058d7b1a04329abf2eb03d0bb..ea03f993e5de1dbc19606e3dae4804b0e4c6bf7a 100644 --- a/makefile +++ b/makefile @@ -55,6 +55,9 @@ test: easy-units MAVEN_DEBUG_OPTS="-Xdebug -Xnoagent -Djava.compiler=NONE -Dcaosdb.debug=true -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=0.0.0.0:9000" mvn test -X +test_misc: + cd misc/bend_symlinks/ && /bin/bash -c /usr/bin/shunit2 test/test_suite.sh + clean: clean-antlr mvn clean rm -rf .m2-local diff --git a/misc/bend_symlinks/README.md b/misc/bend_symlinks/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f72357f8636a2cf961434108a52e98ef05181e62 --- /dev/null +++ b/misc/bend_symlinks/README.md @@ -0,0 +1,106 @@ +# About + +./bend_symlinks.sh - fix broken symlinks in the internal file system + +# Copyright and License Disclaimer: + + Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) + Copyright (C) 2019 IndiScale (info@indiscale.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + +# Version + 0.1 + +# Usage + + ./bend_symlinks.sh [-D] FILE_SYSTEM_DIR TARGET_PATTERN TARGET_REPLACEMENT + + Find all broken symlinks below FILE_SYSTEM_DIR which targets match + TARGET_PATTERN and replace the targets (i.e. overwrite the symlinks) + with TARGET_REPLACEMENT according to the rules of sed's replace + function. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + ./bend_symlinks.sh -d [-D] FILE_SYSTEM_DIR OLD_TARGET_PREFIX NEW_TARGET_PREFIX + + Find all broken symlinks below FILE_SYSTEM_DIR which targets a + path prefixed by OLD_TARGET_PREFIX and replace the targets (i.e. + overwrite the symlinks) by changing only the directory prefix to + NEW_TARGET_PREFIX. This is the preferred way to fix symlinks which + targets have just been moved to another directory while the structure + under this directory stayed the same. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + ./bend_symlinks.sh (-h|-v) + +# Parameters + + FILE_SYSTEM_DIR A directory of the internal file system's back-end + storage. + All symlinks below this directory are being process by + this script. + E.g. '/mnt/caosdb_fs/ExperimentalData/' + TARGET_PATTERN A (extended sed-style) regular expression for matching + broken symlink targets. + TARGET_REPLACEMENT A (sed-style) replacement string for the new (fixed) + symlink targets. + OLD_TARGET_PREFIX The directory of old and broken symlink targets. + E.g. '/mnt/data/current/experiments/' + NEW_TARGET_PREFIX The directory of the new symlink targets. + E.g. '/mnt/data/archive/experiments/2019/' + -d Bend all symlinks under FILE_SYSTEM_DIR which target a + file prefixed by OLD_TARGET_PREFIX to point to + NEW_TARGET_PREFIX and keep the substructure the same. + This is the most useful special case for the scenario + where orginal data files have just been moved from one + folder into another and the symlinks need to updated + accordingly. + -D Dry-run: Only print what would happen. + -h Print this help message and exit. + -v Print the version of this script and ext. + +# Examples + + 1. Files have been moved from '/mnt/data/current/experiments/' to + /mnt/data/archive/experiments/2019/'. Execute the script in the root + directory of the caosdb server's internal file system: + + $ ./bend_symlinks.sh -d ./ /mnt/data/current/experiments /mnt/data/archive/experiments/2019 + + 2. A File was renamed from '/mnt/data/procotol.pdf' to + '/mnt/data/protocol.pdf'. The symlink is located at + '/mnt/caosdb_fs/procotol.pdf'. Execute the script in the root directory + of the caosdb server's internal file system: + + $ ./bend_symlinks.sh ./ procotol\.pdf$ protocol.pdf + + 3. In order to print a table which contains the corrected name from example 2 + and which can be understood by the the + [move_files.py](../move_files/move_files.py) script pipe the standard + output like this. + + $ ./bend_symlinks.sh ./ procotol\.pdf$ protocol.pdf | sed -e 's/\/mnt\/data// > changes.tsv + + Then the changes.tsv file contains 'procotol.pdf<tab>protocol.pdf<EOF>'. + + +# Tests + +Run test suite with + + $ shunit2 test/test_suite.sh diff --git a/misc/bend_symlinks/bend_symlinks.sh b/misc/bend_symlinks/bend_symlinks.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a296fcdf7c447619751b958b6dc411273e9c951 --- /dev/null +++ b/misc/bend_symlinks/bend_symlinks.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019 IndiScale (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +set -o errexit -o noclobber -o nounset -o pipefail + +VERSION=0.1 + +LICENCE="# Copyright and License Disclaimer: + + Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) + Copyright (C) 2019 IndiScale (info@indiscale.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. +" + +USAGE="# Usage + + $0 [-D] FILE_SYSTEM_DIR TARGET_PATTERN TARGET_REPLACEMENT + + Find all broken symlinks below FILE_SYSTEM_DIR which targets match + TARGET_PATTERN and replace the targets (i.e. overwrite the symlinks) + with TARGET_REPLACEMENT according to the rules of sed's replace + function. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + $0 -d [-D] FILE_SYSTEM_DIR OLD_TARGET_PREFIX NEW_TARGET_PREFIX + + Find all broken symlinks below FILE_SYSTEM_DIR which targets a + path prefixed by OLD_TARGET_PREFIX and replace the targets (i.e. + overwrite the symlinks) by changing only the directory prefix to + NEW_TARGET_PREFIX. This is the preferred way to fix symlinks which + targets have just been moved to another directory while the structure + under this directory stayed the same. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + $0 (-h|-v) + +# Parameters + + FILE_SYSTEM_DIR A directory of the internal file system's back-end + storage. + All symlinks below this directory are being process by + this script. + E.g. '/mnt/caosdb_fs/ExperimentalData/' + TARGET_PATTERN A (extended sed-style) regular expression for matching + broken symlink targets. + TARGET_REPLACEMENT A (sed-style) replacement string for the new (fixed) + symlink targets. + OLD_TARGET_PREFIX The directory of old and broken symlink targets. + E.g. '/mnt/data/current/experiments/' + NEW_TARGET_PREFIX The directory of the new symlink targets. + E.g. '/mnt/data/archive/experiments/2019/' + -d Bend all symlinks under FILE_SYSTEM_DIR which target a + file prefixed by OLD_TARGET_PREFIX to point to + NEW_TARGET_PREFIX and keep the substructure the same. + This is the most useful special case for the scenario + where orginal data files have just been moved from one + folder into another and the symlinks need to updated + accordingly. + -D Dry-run: Only print what would happen. + -h Print this help message and exit. + -v Print the version of this script and ext. + +# Examples + + 1. Files have been moved from '/mnt/data/current/experiments/' to + /mnt/data/archive/experiments/2019/'. Execute the script in the root + directory of the caosdb server's internal file system: + + $ $0 -d ./ /mnt/data/current/experiments /mnt/data/archive/experiments/2019 + + 2. A File was renamed from '/mnt/data/procotol.pdf' to + '/mnt/data/protocol.pdf'. The symlink is located at + '/mnt/caosdb_fs/procotol.pdf'. Execute the script in the root directory + of the caosdb server's internal file system: + + $ $0 ./ procotol\\.pdf$ protocol.pdf + + 3. In order to print a table which contains the corrected name from example 2 + and which can be understood by the the + [move_files.py](../move_files/move_files.py) script pipe the standard + output like this. + + $ $0 ./ procotol\\.pdf$ protocol.pdf | sed -e 's/\/mnt\/data// > changes.tsv + + Then the changes.tsv file contains 'procotol.pdf<tab>protocol.pdf<EOF>'. +" + +HELP="$0 - fix broken symlinks in the internal file system + +$LICENCE + +# Version + $VERSION + +$USAGE +" + +source src/main.sh diff --git a/misc/bend_symlinks/src/main.sh b/misc/bend_symlinks/src/main.sh new file mode 100644 index 0000000000000000000000000000000000000000..c2a6a94766d437e41619c2602d1c62417e09ee42 --- /dev/null +++ b/misc/bend_symlinks/src/main.sh @@ -0,0 +1,98 @@ +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019 IndiScale (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +source "src/utils.sh" + +### PARSE COMMAND LINE ARGUMENTS ### + +# OPTIONS -v -h +IS_DRY_RUN=0 +IS_MOVE=0 +while getopts ":hvdD" opt; do + case ${opt} in + h ) + echo "$HELP" + exit 0 + ;; + v ) + echo "$VERSION" + exit 0 + ;; + d ) + IS_MOVE=1 + ;; + D ) + IS_DRY_RUN=1 + ;; + \? ) + echo "Invalid option. See '$0 -h' for more information" 1>&2 + exit 1 + ;; + esac +done +shift $((OPTIND -1)) + + +# POSTIONAL ARGUMENTS +if [ $# -ne 3 ] ; then + echo "Illegal number of positional parameters. See '$0 -h' for more information" 1>&2 + exit 1 +fi +FILE_SYSTEM_ROOT=$1 +REGEX_OLD=$2 +REPLACEMENT=$3 + + +if [ $IS_MOVE -eq 1 ] ; then + REGEX_OLD=$(old_dir "$REGEX_OLD") + REPLACEMENT=$(new_dir "$REPLACEMENT") +fi + +set -o noglob +for syml in $(find -P $(realpath $FILE_SYSTEM_ROOT) -type l) ; do + OLD_TARGET=$(realpath -m "$syml" | sed -n -r "/$REGEX_OLD/p") + if [ -z "$OLD_TARGET" ] ; then + # filter non matching + continue + fi + + if [ -e "$syml" ] ; then + echo "#IGNORING (not broken): $syml" 1>&2 + continue + fi + + NEW_TARGET=$(echo "$OLD_TARGET" | sed -r "s/$REGEX_OLD/$REPLACEMENT/g") + if [ ! -e "$NEW_TARGET" ] ; then + echo "#IGNORING (broken new): $NEW_TARGET" 1>&2 + continue + fi + + echo -e "$OLD_TARGET\t$NEW_TARGET" + if [ $IS_DRY_RUN -eq 1 ] ; then + continue + fi + + # -f means force overwriting + ln -fs "$NEW_TARGET" "$syml" ; + +done +set +o noglob + diff --git a/misc/bend_symlinks/src/utils.sh b/misc/bend_symlinks/src/utils.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e10fe9acc0c1c27fa3f2f58add8e7daf845e24c --- /dev/null +++ b/misc/bend_symlinks/src/utils.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019-2020 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019-2020 IndiScale GmbH (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +set -o errexit -o noclobber -o nounset -o pipefail + +function escape_simple_path () { + SPATH=$(escape_slash "$1") + # ? + SPATH=$(echo "$SPATH" | sed "s/\?/\\?/g") + # . + SPATH=$(echo "$SPATH" | sed "s/\./\\\\./g") + # $ + SPATH=$(echo "$SPATH" | sed "s/\\$/\\\\$/g") + # [ + SPATH=$(echo "$SPATH" | sed -r "s/\[/\\\\[/g") + # ( + SPATH=$(echo "$SPATH" | sed -r "s/\(/\\\\(/g") + # { + SPATH=$(echo "$SPATH" | sed -r "s/\{/\\\\{/g") + echo "$SPATH" +} + +function escape_slash () { + echo "${1//\//\\\/}" +} + + +function old_dir () { + OLD_DIR=$(realpath -m "$1") + OLD_DIR=$(escape_simple_path "$OLD_DIR") + echo "^$OLD_DIR\/(.*)$" +} + +function new_dir () { + NEW_DIR=$(realpath -m "$1") + NEW_DIR=$(escape_slash "$NEW_DIR") + echo "$NEW_DIR\/\1" +} diff --git a/misc/bend_symlinks/test/test_suite.sh b/misc/bend_symlinks/test/test_suite.sh new file mode 100644 index 0000000000000000000000000000000000000000..781e477e1ffacda36b12687ec05ebf5c9d021e15 --- /dev/null +++ b/misc/bend_symlinks/test/test_suite.sh @@ -0,0 +1,229 @@ + + +source "./src/utils.sh" +set +o errexit + +BEND=./bend_symlinks.sh +FILE_SYSTEM_ROOT=test_dir/links +DATA_DIR=test_dir/original + +oneTimeSetUp () { + mkdir -p $FILE_SYSTEM_ROOT $DATA_DIR +} + +oneTimeTearDown () { + rm -rf test_dir +} + +tearDown () { + rm -rf $FILE_SYSTEM_ROOT/* + rm -rf $DATA_DIR/* +} + +_make_test_file () { + touch "$DATA_DIR/$1" + ln -s $(realpath "$DATA_DIR/$1") "$FILE_SYSTEM_ROOT/$1" + assertEquals "initial target $1" $(realpath "$FILE_SYSTEM_ROOT/$1") $(realpath "$DATA_DIR/$1") +} + +_break_link_move_file () { + set -o noglob + OLD_PATH="$DATA_DIR/$1" + OLD_PATH_REAL=$(realpath "$OLD_PATH") + NEW_PATH="$DATA_DIR/$2" + NEW_PATH_REAL=$(realpath "$NEW_PATH") + LINK="$FILE_SYSTEM_ROOT/$1" + mv "$OLD_PATH_REAL" "$NEW_PATH_REAL" + assertEquals "still target $OLD_PATH_REAL" $(realpath "$LINK") "$OLD_PATH_REAL" + assertFalse "$LINK link is broken" "[ -f '$LINK' ]" + assertFalse "$OLD_PATH_REAL was moved" "[ -f '$OLD_PATH_REAL' ]" + assertTrue "$NEW_PATH_REAL is there" "[ -f '$NEW_PATH_REAL' ]" + set +o noglob +} + +testVersion () { + assertEquals "version 0.1" "0.1" "$($BEND -v)" +} + +assertLinkOk () { + set -o noglob + LINK=$(realpath "$FILE_SYSTEM_ROOT/$1") + TARGET=$(realpath "$DATA_DIR/$2") + assertTrue "target exists $LINK" "[ -f '$LINK' ]" + assertEquals "target matches $TARGET" $TARGET "$LINK" + set +o noglob +} + +testIgnoreUnbroken () { + _make_test_file "fileA" + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.new" 2>&1) # attempt to rename + + assertEquals "ignoring not broken" "#IGNORING (not broken): $(realpath "$PWD")/test_dir/links/fileA" "$RESULTS" + + assertLinkOk "fileA" "fileA" +} + +testIgnoreMissingNew () { + _make_test_file "fileA" + + _break_link_move_file "fileA" "fileA.new" + + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.non" 2>&1) + + assertEquals "ignoring broken new" "#IGNORING (broken new): $(realpath $DATA_DIR)/fileA.non" "$RESULTS" + + TARGET=$(realpath -m "$DATA_DIR/fileA") + LINK=$(realpath -m "$FILE_SYSTEM_ROOT/fileA") + assertFalse "symlink still broken" "[ -e '$LINK' ]" + assertEquals "target still old" "$TARGET" "$LINK" +} + + +testFileName () { + _make_test_file "fileA" + _make_test_file "fileB" + + _break_link_move_file "fileA" "fileA.new" + + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.new" 2>&1) # rename all fileA to fileA.new + + assertLinkOk "fileA" "fileA.new" + assertLinkOk "fileB" "fileB" +} + +testFullPath () { + _make_test_file "fileA" + _make_test_file "fileB" + + _break_link_move_file "fileA" "fileA.new" + + REGEX_OLD=$(escape_simple_path "$DATA_DIR/fileA") + REPLACEMENT=$(escape_slash "$DATA_DIR/fileA.new") + RESULTS=$($BEND $FILE_SYSTEM_ROOT "$REGEX_OLD" "$REPLACEMENT" 2>&1) + + assertLinkOk "fileA" "fileA.new" + assertLinkOk "fileB" "fileB" +} + +_testFullPathWithStrageChars () { + file_name="fileA$1bla" + _make_test_file "$file_name" + + _break_link_move_file "$file_name" "$file_name.new" + + REGEX_OLD=$(escape_simple_path "$DATA_DIR/$file_name") + REPLACEMENT=$(escape_slash "$DATA_DIR/$file_name.new") + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "$REGEX_OLD" "$REPLACEMENT" 2>&1) + + assertLinkOk "$file_name" "$file_name.new" + assertLinkOk "fileB" "fileB" +} + +testFullPathWithStrangeChars () { + _make_test_file "fileB" + _testFullPathWithStrageChars "A" + _testFullPathWithStrageChars "#" + _testFullPathWithStrageChars "0" + _testFullPathWithStrageChars "!" + _testFullPathWithStrageChars "." + _testFullPathWithStrageChars ";" + _testFullPathWithStrageChars "," + _testFullPathWithStrageChars "$" + _testFullPathWithStrageChars "[" + _testFullPathWithStrageChars "(" + _testFullPathWithStrageChars "{" + _testFullPathWithStrageChars "]" + _testFullPathWithStrageChars "[.]" +} + +testRegex () { + _make_test_file "fileA.0" + _make_test_file "dataA.1" + _make_test_file "fileA.0ok" + _make_test_file "fileB" + + _break_link_move_file "fileA.0" "file0-A" + _break_link_move_file "dataA.1" "data1-A" + + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "([a-z]+)([A-Z])+\.([01])$" "\1\3-\2" 2>&1) + + assertLinkOk "fileA.0" "file0-A" + assertLinkOk "dataA.1" "data1-A" + assertLinkOk "fileB" "fileB" + assertLinkOk "fileA.0ok" "fileA.0ok" +} + + +testDryRun () { + _make_test_file "fileA" + + OLD_TARGET=$(realpath "$DATA_DIR/fileA") + NEW_TARGET=$(realpath "$DATA_DIR/fileA.new") + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertTrue "1 target exists $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "1 target matches $OLD_TARGET" "$OLD_TARGET" "$SYMLINK" + + _break_link_move_file "fileA" "fileA.new" + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertFalse "2 target does not exist $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "2 target matches $OLD_TARGET" "$OLD_TARGET" "$SYMLINK" + + RESULTS=$($BEND -D "$FILE_SYSTEM_ROOT" "fileA" "fileA.new" 2>&1) + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertFalse "3 target does not exist $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "3 target matches $OLD_TARGET" $OLD_TARGET "$SYMLINK" + + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "fileA" "fileA.new") + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertTrue "4 target exists $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "4 target matches $NEW_TARGET" $NEW_TARGET "$SYMLINK" + +} + +testUtilsOldAndNewDir () { + OLD_DIR="/root/to/old/" + assertTrue "old root does not exist" "[ ! -e '$OLD_DIR' ]" + NEW_DIR=$(realpath -m "$FILE_SYSTEM_ROOT/root/to/new/") + # new_dir must exist + mkdir -p "$NEW_DIR" + + OLD_DIR=$(old_dir "$OLD_DIR") + NEW_DIR=$(new_dir "$NEW_DIR") + PWD_ESC=$(escape_simple_path $(realpath "$FILE_SYSTEM_ROOT")) + + assertEquals "OLD_DIR correct" "^\/root\/to\/old\/(.*)$" "$OLD_DIR" + assertEquals "NEW_DIR correct" "$PWD_ESC\/root\/to\/new\/\1" "$NEW_DIR" + + RESULT=$(echo "/root/to/old/subdir/fileA" | sed -r "s/$OLD_DIR/$NEW_DIR/g") + assertEquals "result" $(realpath -m "$FILE_SYSTEM_ROOT/root/to/new/subdir/fileA") "$RESULT" +} + + +testSymlinkToSymlink () { + touch "$DATA_DIR/fileA" + ln -s $(realpath "$DATA_DIR/fileA") "$DATA_DIR/symlinkA" + ln -s $(realpath -s "$DATA_DIR/symlinkA") "$FILE_SYSTEM_ROOT/symlinkA" + assertLinkOk "symlinkA" "symlinkA" + assertLinkOk "symlinkA" "fileA" + + + # move only the symlink in data_dir + mv $DATA_DIR/symlinkA $DATA_DIR/symlinkA.new + assertFalse "symlink in fs broken" "[ -e '$FILE_SYSTEM_ROOT/symlinkA' ]" + assertTrue "symlink in data ok ($TARGET)" "[ -e '$DATA_DIR/symlinkA.new' ]" + + RESULT=$($BEND "$FILE_SYSTEM_ROOT" "symlinkA" "symlinkA.new" 2>&1) + + assertTrue "simlink is fixed" "[ -e '$FILE_SYSTEM_ROOT/symlinkA' ]" + + + assertLinkOk "symlinkA" "symlinkA.new" + assertLinkOk "symlinkA" "fileA" + +} + + diff --git a/misc/check_symlinks/check_symlinks b/misc/check_symlinks/check_symlinks index 5545613982e7772189697eb596d9786af0d29b1b..0f6eb0d1d3626ed32fa99aa6a31891c6ddf90635 100755 --- a/misc/check_symlinks/check_symlinks +++ b/misc/check_symlinks/check_symlinks @@ -77,7 +77,6 @@ function check_recursively() { else echo "$subdir is empty" fi - done } diff --git a/misc/move_files/README.md b/misc/move_files/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df67f2025b8627a09e6886038646db4ec4436941 --- /dev/null +++ b/misc/move_files/README.md @@ -0,0 +1,24 @@ +# About + +Version: 0.1 + +Usage: `./move_files.py [-h] changes` + +This script moves files in the internal file system of the CaosDB server. It +reads file paths form a tsv file with columns from and to. For each line it +creates an update of a caosdb file object where the path that equals "from" is +changed to "to". + +positional arguments: + changes The file that defines the renames + +optional arguments: + -h, --help show this help message and exit + +# Tests + +The tests a integration tests which require a running test database and a sufficiently configured caosdb-pylib. + +Run `pytest test_move_files.py` to insert a bunch of test files, rename and subsequently delete them. + + diff --git a/misc/move_files/move_files.py b/misc/move_files/move_files.py new file mode 100755 index 0000000000000000000000000000000000000000..44cc3f4f7e61027b833a7774705577c59e756e72 --- /dev/null +++ b/misc/move_files/move_files.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +This script moves files in the internal file system of the CaosDB server. It +reads file paths form a tsv file with columns from and to. For each line it +creates an update of a caosdb file object where the path that equals "from" is +changed to "to". + +Version: 0.1 +""" + +import argparse +import sys +import time +from argparse import ArgumentParser + +import pandas as pd +from tqdm import tqdm + +import caosdb as db + + +def rename(changes, chunksize=10): + """change the path of files based on a two-column table (from, to). + + Parameters + ---------- + + changes : pd.DataFrame + A table with two columns, the old path and the new path. + chunksize : int, optional + How many files are being moved in one go (default is 10). + """ + i = 0 + + for i in tqdm(range(changes.shape[0]//chunksize+1)): + chunk = changes.iloc[i*chunksize:(i+1)*chunksize] + + if chunk.shape[0] == 0: + continue + cont = db.Container() + + for _, (old, new) in chunk.iterrows(): + cont.append(db.File(path=old)) + + cont.retrieve() + + for fi, (_, (old, new)) in zip(cont, chunk.iterrows()): + assert fi.path == old + fi.path = new + cont.update() + i += 1 + + +def main(argv=None): + '''Command line options.''' + + if argv is None: + argv = sys.argv + else: + sys.argv.extend(argv) + + # Setup argument parser + parser = ArgumentParser(description=__doc__) + parser.add_argument("changes", help="The file that defines the renames") + args = parser.parse_args() + + changes = pd.read_csv(args.changes, sep="\t") + + if ("to" not in changes.columns or "from" not in changes.columns): + raise ValueError("The file supplied under changes shall have a 'to'" + " and a 'from' column.") + + assert 0 == pd.isnull(changes).sum().sum() + rename(changes) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/misc/move_files/test_move_files.py b/misc/move_files/test_move_files.py new file mode 100644 index 0000000000000000000000000000000000000000..2edb76509dc6a350057997cc6f894abd4a4bb163 --- /dev/null +++ b/misc/move_files/test_move_files.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + + +import os +import random +import unittest + +import pandas as pd + +import caosdb as db +from move_files import rename + + +def create_filename(): + name = os.path.normpath("".join([ + random.choice("qwertyuiopkkhgfdsxcvbnm/") for el in range(30)])) + + if not name.startswith("/"): + name = "/" + name + + return name + + +class TestMoveFiles(unittest.TestCase): + """ + Files are being created, changed and then it is checked whether the changes + were correct. + """ + + def setUp(self): + self.files_to_be_changed = [create_filename() for i in range(40)] + self.files_not_to_be_changed = [create_filename() for i in range(20)] + self.new_names = [f+"new" for f in self.files_to_be_changed] + + table = pd.DataFrame([self.files_to_be_changed, self.new_names]) + table = table.T + table.columns = ["from", "to"] + self.table = table + self.cont = db.Container() + self.cont.extend([db.File(path=f, file=__file__) + for f in self.files_to_be_changed + + self.files_not_to_be_changed]) + self.cont.insert() + print("inserted") + + def test_move(self): + rename(self.table) + self.cont.retrieve() + + for i, (fi, name) in enumerate(zip( + self.cont, + self.files_to_be_changed + self.files_not_to_be_changed)): + + if i < len(self.files_to_be_changed): + self.assertEqual(fi.path, self.new_names[i]) + else: + self.assertEqual(fi.path, name) + + def tearDown(self): + self.cont.delete() + print("deleted") diff --git a/misc/mv_unknown_files_script/mv_unknown_files b/misc/mv_unknown_files_script/mv_unknown_files deleted file mode 100755 index 8b26b464d8dd40a15f3418895dd440903a6da8a6..0000000000000000000000000000000000000000 --- a/misc/mv_unknown_files_script/mv_unknown_files +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -grep ": Unknown file." $1 | sed 's/<Warning.*tion="//' | sed 's/:\sUnknown.*//' | awk '{ print "move ", $1 }' - - diff --git a/src/main/java/caosdb/server/FileSystem.java b/src/main/java/caosdb/server/FileSystem.java index b87b25ea682dba78afaf49f99c9665ac70d75764..799ef5c1bcc1f6d92a8a78acce7ce352671d7254 100644 --- a/src/main/java/caosdb/server/FileSystem.java +++ b/src/main/java/caosdb/server/FileSystem.java @@ -37,6 +37,7 @@ import org.apache.commons.fileupload.FileItemStream; import com.google.common.io.Files; import caosdb.server.database.access.Access; import caosdb.server.database.backend.transaction.GetFileRecordByPath; +import caosdb.server.database.exceptions.EntityDoesNotExistException; import caosdb.server.database.misc.TransactionBenchmark; import caosdb.server.entity.EntityInterface; import caosdb.server.entity.FileProperties; @@ -325,15 +326,22 @@ public class FileSystem { final GetFileRecordByPath t = new GetFileRecordByPath(file.getPath()); t.setAccess(access); t.setTransactionBenchmark(b); - t.executeTransaction(); + try { + t.executeTransaction(); + } catch (EntityDoesNotExistException e) { + // could not determine which entity owns this path + // this is usually the case when target is a directory + } if (t.getEntity() != null) { final Integer foreign = t.getId(); if (foreign != null && foreign.equals(entity.getId())) { + // entity already owns this path return true; } - throw ServerMessages.TARGET_PATH_EXISTS; } } + // another entity owns this path + throw ServerMessages.TARGET_PATH_EXISTS; } return true; diff --git a/src/main/java/caosdb/server/FileSystem.java.orig b/src/main/java/caosdb/server/FileSystem.java.orig new file mode 100644 index 0000000000000000000000000000000000000000..15f5e8770617d0ae90a18a9339493d37417c8f74 --- /dev/null +++ b/src/main/java/caosdb/server/FileSystem.java.orig @@ -0,0 +1,428 @@ +/* + * ** header v3.0 + * This file is a part of the CaosDB Project. + * + * Copyright (C) 2018 Research Group Biomedical Physics, + * Max-Planck-Institute for Dynamics and Self-Organization Göttingen + * Copyright (c) 2019 IndiScale GmbH + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * ** end header + */ + +package caosdb.server; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.regex.Pattern; +import org.apache.commons.fileupload.FileItemStream; +import com.google.common.io.Files; +import caosdb.server.database.access.Access; +import caosdb.server.database.backend.transaction.GetFileRecordByPath; +<<<<<<< HEAD +======= +import caosdb.server.database.exceptions.EntityDoesNotExistException; +>>>>>>> dev +import caosdb.server.database.misc.TransactionBenchmark; +import caosdb.server.entity.EntityInterface; +import caosdb.server.entity.FileProperties; +import caosdb.server.entity.Message; +import caosdb.server.utils.FileUtils; +import caosdb.server.utils.ServerMessages; +import caosdb.server.utils.Utils; + +public class FileSystem { + private static String filesystem = null; + private static String dropOffBox = null; + private static String tmpdir = null; + private static String sharedDir = null; + public static final Pattern base32Pattern = Pattern.compile("^[-A-Z2-7]+$"); + + private static void check() { + try { + File f = new File(filesystem); + if (!f.exists()) { + f.mkdirs(); + } + filesystem = f.getCanonicalPath() + "/"; + + f = new File(dropOffBox); + if (!f.exists()) { + f.mkdirs(); + } + dropOffBox = f.getCanonicalPath() + "/"; + + f = new File(tmpdir); + if (!f.exists()) { + f.mkdirs(); + } + tmpdir = f.getCanonicalPath() + "/"; + } catch (final IOException e) { + e.printStackTrace(); + System.exit(1); + } + } + + public static String getBasepath() { + if (filesystem == null) { + init(); + } + return filesystem; + } + + public static String getDropOffBox() { + if (dropOffBox == null) { + init(); + } + return dropOffBox; + } + + public static String getTmp() { + if (tmpdir == null) { + init(); + } + return tmpdir; + } + + public static String getShared() { + if (sharedDir == null) { + init(); + } + return sharedDir; + } + + public static void init() { + filesystem = CaosDBServer.getServerProperty(ServerProperties.KEY_FILE_SYSTEM_ROOT); + dropOffBox = CaosDBServer.getServerProperty(ServerProperties.KEY_DROP_OFF_BOX); + tmpdir = CaosDBServer.getServerProperty(ServerProperties.KEY_TMP_FILES); + sharedDir = CaosDBServer.getServerProperty(ServerProperties.KEY_SHARED_FOLDER); + check(); + } + + private FileSystem() {} + + /** + * Asserts that a temporary directory for this session exists, creating it if necessary. + * + * @param sessionString The session string for which the directory is guaranteed to exist after + * calling this function. If `session` is Null, a random directory will be created. + * @return A String with the existing directory. + */ + public static final String assertDir(String sessionString) throws IOException { + + if (sessionString == null) { + sessionString = Utils.getSecureFilename(15); + } + + // Name of the temporary directory + final File tempDir = new File(getTmp(), sessionString); + + if (!tempDir.exists()) { + tempDir.mkdirs(); + } + + if (!tempDir.isDirectory()) { + throw new IOException("File " + tempDir.toString() + " is not a directory."); + } + + return tempDir.toString(); + } + + /** + * Reads a FileItemStream and stores the file into the tmpfolder. Generates FileProperties. + * + * @param item + * @param session + * @return FileProperties of the uploaded File. + * @throws NoSuchAlgorithmException + * @throws IOException + */ + public static final FileProperties upload(final FileItemStream item, final String session) + throws NoSuchAlgorithmException, IOException { + String checksum = null; + + // Name of the temporary FILE object. + final String tempPath = FileSystem.getTmp() + session + item.getName(); + + final InputStream stream = item.openStream(); + final File tmpFile = new File(tempPath); + + if (tempPath.endsWith("/")) { + // this is a directory, not a file + + stream.close(); + assertDir(session); + } else { + // this is actually a file + + tmpFile.getParentFile().mkdirs(); + if (tmpFile.isDirectory()) { + // TODO this should generate an error. This means that the + // tmpIdentifyers are inconsistent + } + final OutputStream outputStream = new FileOutputStream(tmpFile); + final MessageDigest md = MessageDigest.getInstance("SHA-512"); + final byte[] buf = new byte[1024]; + int bufSize = 0; + + // store temporary file and calculate hash + while ((bufSize = stream.read(buf)) != -1) { + md.update(buf, 0, bufSize); + outputStream.write(buf, 0, bufSize); + } + outputStream.close(); + + // get hash result + final byte[] result = md.digest(); + checksum = FileUtils.toHex(result); + } + + final FileProperties file = new FileProperties(checksum, tempPath, tmpFile.length()); + file.setFile(tmpFile); + file.removeOnCleanUp(tempPath); + return file; + } + + /** + * Pick up file from DropOffBox and load it into tmp dir. + * + * @param path + * @param session + * @return A FileProperties Object which represents the object in the tmp dir. + * @throws NoSuchAlgorithmException + * @throws IOException + * @throws Message + * @throws CaosDBException + * @throws InterruptedException + */ + public static final FileProperties pickUp(final String path, final String session) + throws Message { + + File file = new File(getDropOffBox() + path); + + // is the file in the DropOffBox? + boolean inDropOffBox = true; + if (!file.exists()) { + if (path.startsWith("/")) { + file = new File(path); + if (!file.exists()) { + throw ServerMessages.FILE_NOT_FOUND; + } + } else { + throw ServerMessages.FILE_NOT_IN_DROPOFFBOX; + } + inDropOffBox = false; + } + File thumbnail = + new File(file.getParentFile().getAbsolutePath() + "/.thumbnails/" + file.getName()); + + if (inDropOffBox) { + // chown + FileUtils.runChownScript(file); + + // mv to tmp directory. + final File tmp = new File(getTmp() + session + path); + if (!file.renameTo(tmp)) { + throw ServerMessages.CANNOT_MOVE_FILE_TO_TMP; + } + if (thumbnail.exists()) { + final File tmpThumbnail = + new File(tmp.getParentFile().getAbsolutePath() + "/.thumbnails/" + tmp.getName()); + if (!thumbnail.renameTo(tmpThumbnail)) { + throw ServerMessages.CANNOT_MOVE_FILE_TO_TMP; + } + thumbnail = tmpThumbnail; + } else { + thumbnail = null; + } + + file = tmp; + } else { + // copy to tmp dir + final File tmp = new File(getTmp() + session + file.getName()); + if (!file.canRead()) { + throw ServerMessages.CANNOT_READ_FILE; + } + try { + Files.copy(file, tmp); + } catch (final IOException e) { + throw ServerMessages.CANNOT_MOVE_FILE_TO_TMP; + } + + if (thumbnail.exists()) { + final File tmpThumbnail = + new File(tmp.getParentFile().getAbsolutePath() + "/.thumbnails/" + tmp.getName()); + if (!thumbnail.canRead()) { + throw ServerMessages.CANNOT_READ_THUMBNAIL; + } + try { + Files.copy(thumbnail, tmpThumbnail); + } catch (final IOException e) { + throw ServerMessages.CANNOT_MOVE_FILE_TO_TMP; + } + thumbnail = tmpThumbnail; + } else { + thumbnail = null; + } + + file = tmp; + } + + // get checksum and size + final Long size = file.length(); + final String checksum = FileUtils.getChecksum(file); + + final FileProperties ret = new FileProperties(checksum, null, size); + ret.setFile(file); + ret.setThumbnail(thumbnail); + return ret; + } + + /** + * Checks if a target path is consistent with the server's configuration and the file system. + * + * @param entity + * @return + * @throws Message + */ + public static boolean checkTarget( + final EntityInterface entity, final Access access, TransactionBenchmark b) throws Message { + + final FileProperties file = entity.getFileProperties(); + // target file name = where the file is to be stored. + final String targetName = getPath(file.getPath()); + final File target = new File(targetName); + + // check if target's directory is a sub directory of the file + // system's root + if (!target.getAbsolutePath().matches("^" + getBasepath() + ".*$")) { + throw ServerMessages.TARGET_PATH_NOT_ALLOWED; + } + + // check if the target exists. + if (target.exists()) { + // if the file already is located at its target location, the + // target is correct. + if (file.getFile() != null && file.getFile().equals(target)) { + return true; + } else if (target.isDirectory()) { + throw ServerMessages.TARGET_PATH_EXISTS; + } else { + final GetFileRecordByPath t = new GetFileRecordByPath(file.getPath()); + t.setAccess(access); + t.setTransactionBenchmark(b); + try { + t.executeTransaction(); + } catch (EntityDoesNotExistException e) { + // could not determine which entity owns this path + // this is usually the case when target is a directory + } + if (t.getEntity() != null) { + final Integer foreign = t.getId(); + if (foreign != null && foreign.equals(entity.getId())) { + // entity already owns this path + return true; + } + } + } + // another entity owns this path + throw ServerMessages.TARGET_PATH_EXISTS; + } + + return true; + } + + /** + * Get the file from the internal file system. Returns null if the file does not exist. + * + * @param path + * @return + */ + public static File getFromFileSystem(final String path) { + File ret; + ret = getFile(path); + if (ret.exists()) { + return ret; + } + return null; + } + + /** + * Get the file from the shared files folder. + * + * <p>Conditions under which null is returned: + * <li>The file does not exist. + * <li>The file is a folder. + * <li>The requested path is just a file, without parent folders. + * <li>The requested path is not normalized. + * <li>The first component of the path does not match the base32 pattern for shared folders. + * + * @param path The path to the requested file. + * @return File + */ + public static File getFromShared(final String path) { + String basePath = getTmp(); + Path pathObj = (new File(path)).toPath(); + + // Must have more than one component + if (pathObj.getNameCount() < 2) { + return null; + } + // Check for normalization + if (!pathObj.equals(pathObj.normalize())) { + return null; + } + // The first component of `path` must follow the Base32 pattern. + String firstElement = pathObj.getName(0).toString(); + if (!base32Pattern.matcher(firstElement).matches()) { + return null; + } + + // All safe, let's get the file already. + File ret = new File(basePath, path); + // Does the file exist and is it a regular file? + if (!ret.exists() || !ret.isFile()) { + return null; + } + return ret; + } + + /** + * Return the canonical path on the native file system of the server's host which is guaranteed to + * be a valid path under the server's internal file system. + * + * @param location + * @return A canonical path. + * @throws Message TARGET_PATH_NOT_ALLOWED if location does not belong to the file system. + */ + public static String getPath(final String location) throws Message { + String canonicalPath; + canonicalPath = getFile(location).getAbsolutePath(); + if (canonicalPath.startsWith(getBasepath().replaceFirst("/$", ""))) { + return canonicalPath; + } + throw ServerMessages.TARGET_PATH_NOT_ALLOWED; + } + + private static File getFile(final String path) { + return new File(getBasepath() + path); + } +} diff --git a/src/main/java/caosdb/server/database/backend/implementation/UnixFileSystem/UnixFileSystemGetFileIterator.java b/src/main/java/caosdb/server/database/backend/implementation/UnixFileSystem/UnixFileSystemGetFileIterator.java index 5bd6cd56fa40dc8696a8471e33381f1ab2584639..78fcac09e9746374d8e4af85368720e512d6a3b4 100644 --- a/src/main/java/caosdb/server/database/backend/implementation/UnixFileSystem/UnixFileSystemGetFileIterator.java +++ b/src/main/java/caosdb/server/database/backend/implementation/UnixFileSystem/UnixFileSystemGetFileIterator.java @@ -22,14 +22,14 @@ */ package caosdb.server.database.backend.implementation.UnixFileSystem; +import java.io.File; +import java.util.Arrays; +import java.util.Iterator; import caosdb.server.FileSystem; import caosdb.server.database.access.Access; import caosdb.server.database.backend.interfaces.GetFileIteratorImpl; import caosdb.server.database.exceptions.TransactionException; import caosdb.server.entity.Message; -import java.io.File; -import java.util.Arrays; -import java.util.Iterator; public class UnixFileSystemGetFileIterator extends UnixFileSystemTransaction implements GetFileIteratorImpl { diff --git a/src/main/java/caosdb/server/jobs/core/InsertFilesInDir.java b/src/main/java/caosdb/server/jobs/core/InsertFilesInDir.java index 7db04f124aa45b81d3484db3a9d0ea33ca74a5ba..7e0517b0001c18ac4d3aa5577985b87228cfc7d9 100644 --- a/src/main/java/caosdb/server/jobs/core/InsertFilesInDir.java +++ b/src/main/java/caosdb/server/jobs/core/InsertFilesInDir.java @@ -22,6 +22,12 @@ */ package caosdb.server.jobs.core; +import java.io.File; +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import caosdb.server.CaosDBException; import caosdb.server.CaosDBServer; import caosdb.server.FileSystem; @@ -42,12 +48,6 @@ import caosdb.server.utils.EntityStatus; import caosdb.server.utils.FileUtils; import caosdb.server.utils.Undoable; import caosdb.server.utils.Utils; -import java.io.File; -import java.io.IOException; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; @JobAnnotation( flag = "InsertFilesInDir", @@ -62,6 +62,9 @@ public class InsertFilesInDir extends FlagJob { private Pattern include = null; private Pattern exclude = null; private boolean forceSymLinks = false; + private Pattern valueParser = + Pattern.compile( + "(?:(?:-p\\s*([^\\s]*?)\\s+)|(?:-i\\s*([^\\s]*?)\\s+)|(?:-e\\s*([^\\s]*?)\\s+)|(--force-allow-symlinks\\s+))|([^-].*)"); /** * @return a List of directories which subdirs are allowed to be batch-added. Needs to be @@ -86,14 +89,10 @@ public class InsertFilesInDir extends FlagJob { return ret; } - @Override - protected void job(final String value) { + public String parseValue(String value) { - String dirStr = value; - final Pattern pattern = - Pattern.compile( - "(?:(?:-p\\s*([^\\s]*?)\\s+)|(?:-i\\s*([^\\s]*?)\\s+)|(?:-e\\s*([^\\s]*?)\\s+)|(--force-allow-symlinks\\s+))|([^-].*)"); - final Matcher matcher = pattern.matcher(value); + String ret = value; + final Matcher matcher = valueParser.matcher(value); while (matcher.find()) { if (matcher.group(1) != null) { this.prefix = matcher.group(1).replaceFirst("/$", "") + "/"; @@ -108,9 +107,16 @@ public class InsertFilesInDir extends FlagJob { this.forceSymLinks = true; } if (matcher.group(5) != null) { - dirStr = matcher.group(5); + ret = matcher.group(5); } } + return ret; + } + + @Override + protected void job(final String value) { + + String dirStr = parseValue(value); final File dir = new File(dirStr); @@ -247,24 +253,34 @@ public class InsertFilesInDir extends FlagJob { return i; } + boolean isExcluded(File f) throws IOException { + return this.exclude != null && this.exclude.matcher(f.getCanonicalPath()).find(); + } + + boolean isNotIncluded(File f) throws IOException { + return this.include != null && !this.include.matcher(f.getCanonicalPath()).find(); + } + private boolean shouldBeProcessed(final File sub) throws IOException { - if (this.include != null && !this.include.matcher(sub.getCanonicalPath()).matches()) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - 1, - "Not explicitly included directory or file: " + sub.getCanonicalPath())); - return false; - } - if (this.exclude != null && this.exclude.matcher(sub.getCanonicalPath()).matches()) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - 2, - "Explicitly excluded directory or file: " + sub.getCanonicalPath())); - return false; + if (sub.isFile()) { + if (this.isNotIncluded(sub)) { + getContainer() + .addMessage( + new Message( + MessageType.Warning, + 1, + "Not explicitly included file: " + sub.getCanonicalPath())); + return false; + } + if (this.isExcluded(sub)) { + getContainer() + .addMessage( + new Message( + MessageType.Warning, + 2, + "Explicitly excluded file: " + sub.getCanonicalPath())); + return false; + } } if (sub.isHidden()) { getContainer() diff --git a/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java b/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java index 4e3a604a9dc5f7fb71bfabbf83785b84ce75b008..01afe0fe13d0f50b648db85bb9496554f321584a 100644 --- a/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java +++ b/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java @@ -82,30 +82,30 @@ public class FileStorageConsistencyCheck extends Thread } final String path = iterator.next(); - // FIXME this prevents all files with ".thumbnail" from being checked. - if (path.contains(".thumbnail")) { + // this prevents all thumbnails from being checked. + if (path.contains(".thumbnails/")) { continue; } try { final GetFileRecordByPath t = execute(new GetFileRecordByPath(path), this.access); - final int result = - execute( - new FileConsistencyCheck( - path, t.getSize(), t.getHash(), t.getLastConsistencyCheck(), new SHA512()), - this.access) - .getResult(); - - if (result != FileConsistencyCheck.OK) { - this.results.put(path, result); - } - - execute(new SetFileCheckedTimestamp(t.getId(), this.ts), this.access); - + final int result = + execute( + new FileConsistencyCheck( + path, t.getSize(), t.getHash(), t.getLastConsistencyCheck(), new SHA512()), + this.access) + .getResult(); + + if (result != FileConsistencyCheck.OK) { + this.results.put(path, result); + } + + execute(new SetFileCheckedTimestamp(t.getId(), this.ts), this.access); } catch (EntityDoesNotExistException e) { this.results.put(path, FileConsistencyCheck.UNKNOWN_FILE); continue; } + } // test all remaining file records diff --git a/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java.orig b/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java.orig new file mode 100644 index 0000000000000000000000000000000000000000..5e6c35a041c4436fa4181cc595be57566fad1854 --- /dev/null +++ b/src/main/java/caosdb/server/transaction/FileStorageConsistencyCheck.java.orig @@ -0,0 +1,246 @@ +/* + * ** header v3.0 + * This file is a part of the CaosDB Project. + * + * Copyright (C) 2018 Research Group Biomedical Physics, + * Max-Planck-Institute for Dynamics and Self-Organization Göttingen + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + * + * ** end header + */ +package caosdb.server.transaction; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.TimeZone; +import org.jdom2.Element; +import caosdb.datetime.UTCDateTime; +import caosdb.server.database.DatabaseMonitor; +import caosdb.server.database.access.Access; +import caosdb.server.database.backend.transaction.FileConsistencyCheck; +import caosdb.server.database.backend.transaction.GetFileIterator; +import caosdb.server.database.backend.transaction.GetFileRecordByPath; +import caosdb.server.database.backend.transaction.RetrieveAllUncheckedFiles; +import caosdb.server.database.backend.transaction.SetFileCheckedTimestamp; +import caosdb.server.database.exceptions.EntityDoesNotExistException; +import caosdb.server.database.proto.SparseEntity; +import caosdb.server.entity.Message; +import caosdb.server.entity.xml.ToElementable; +import caosdb.server.utils.SHA512; + +public class FileStorageConsistencyCheck extends Thread + implements ToElementable, TransactionInterface { + + private Access access = null; + private final HashMap<String, Integer> results = new HashMap<String, Integer>(); + private Exception exception = null; + private Runnable finishRunnable = null; + private final String location; + private Long ts = null; + + public Exception getException() { + return this.exception; + } + + public FileStorageConsistencyCheck(final String location) { + setDaemon(true); + this.location = location.startsWith("/") ? location.replaceFirst("^/", "") : location; + } + + @Override + public void run() { + this.access = DatabaseMonitor.getInstance().acquiredWeakAccess(this); + try { + + // test all files in file system. + final Iterator<String> iterator = + execute(new GetFileIterator(this.location), this.access).getIterator(); + + this.ts = System.currentTimeMillis(); + while (iterator != null && iterator.hasNext()) { + if (DatabaseMonitor.whoHasAllocatedStrongAccess() != null) { + // there is a thread waiting to write. pause this one and + // apply for a new weak access which will be granted when + // the write thread is ready. + this.access.release(); + this.access = DatabaseMonitor.getInstance().acquiredWeakAccess(this); + } + + final String path = iterator.next(); + // this prevents all thumbnails from being checked. + if (path.contains(".thumbnails/")) { + continue; + } + + try { + final GetFileRecordByPath t = execute(new GetFileRecordByPath(path), this.access); +<<<<<<< HEAD + final int result = + execute( + new FileConsistencyCheck( + path, t.getSize(), t.getHash(), t.getLastConsistencyCheck(), new SHA512()), + this.access) + .getResult(); + + if (result != FileConsistencyCheck.OK) { + this.results.put(path, result); + } + + execute(new SetFileCheckedTimestamp(t.getId(), this.ts), this.access); + + } catch (EntityDoesNotExistException e) { + this.results.put(path, FileConsistencyCheck.UNKNOWN_FILE); + continue; + } +======= + final int result = + execute( + new FileConsistencyCheck( + path, t.getSize(), t.getHash(), t.getLastConsistencyCheck(), new SHA512()), + this.access) + .getResult(); + + if (result != FileConsistencyCheck.OK) { + this.results.put(path, result); + } + + execute(new SetFileCheckedTimestamp(t.getId(), this.ts), this.access); + } catch (EntityDoesNotExistException e) { + this.results.put(path, FileConsistencyCheck.UNKNOWN_FILE); + continue; + } + +>>>>>>> dev + } + + // test all remaining file records + final Iterator<SparseEntity> iterator2 = + execute(new RetrieveAllUncheckedFiles(this.ts, this.location), this.access).getIterator(); + while (iterator2 != null && iterator2.hasNext()) { + + final SparseEntity entity = iterator2.next(); + final int result = + execute( + new FileConsistencyCheck( + entity.filePath, + entity.fileSize, + entity.fileHash, + entity.fileChecked, + new SHA512()), + this.access) + .getResult(); + + if (result != FileConsistencyCheck.OK) { + this.results.put(entity.filePath, result); + } + + execute(new SetFileCheckedTimestamp(entity.id, this.ts), this.access); + } + + } catch (final Exception e) { + this.exception = e; + } finally { + this.access.release(); + } + + synchronized (this.results) { + if (this.finishRunnable != null) { + this.finishRunnable.run(); + } + } + } + + public HashMap<String, Integer> getResults() { + return this.results; + } + + public void setOnFinish(final Runnable r) { + synchronized (this.results) { + this.finishRunnable = r; + } + } + + @Override + public void addToElement(final Element e) { + if (this.ts != null) { + e.setAttribute( + "timestamp", + UTCDateTime.SystemMillisToUTCDateTime(this.ts).toDateTimeString(TimeZone.getDefault())); + } + if (this.location != null) { + e.setAttribute("location", this.location); + } + + if (getException() != null) { + final StringBuilder sb = new StringBuilder(); + sb.append(getException().toString()); + for (final StackTraceElement t : getException().getStackTrace()) { + sb.append('\n').append(t.toString()); + } + + e.addContent(new Message("Error", 0, "An exception was thrown.", sb.toString()).toElement()); + } + + final List<Message> results2Messages = results2Messages(getResults(), this.location); + for (final Message m : results2Messages) { + e.addContent(m.toElement()); + } + } + + public Element toElement() { + final Element results = new Element("Results"); + addToElement(results); + return results; + } + + private static List<Message> results2Messages( + final HashMap<String, Integer> results, final String location) { + final ArrayList<Message> ret = new ArrayList<Message>(); + if (results.isEmpty()) { + if (location.length() > 0) { + ret.add(new Message("Info", 0, "File system below " + location + " is consistent.")); + } else { + ret.add(new Message("Info", 0, "File system is consistent.")); + } + } + for (final Entry<String, Integer> r : results.entrySet()) { + switch (r.getValue()) { + case FileConsistencyCheck.FILE_DOES_NOT_EXIST: + ret.add(new Message("Error", 0, r.getKey() + ": File does not exist.")); + break; + case FileConsistencyCheck.FILE_MODIFIED: + ret.add(new Message("Error", 0, r.getKey() + ": File was modified.")); + break; + case FileConsistencyCheck.UNKNOWN_FILE: + ret.add(new Message("Warning", 0, r.getKey() + ": Unknown file.")); + break; + case FileConsistencyCheck.NONE: + ret.add(new Message("Warning", 0, r.getKey() + ": Test result not available.")); + break; + default: + break; + } + } + return ret; + } + + @Override + public void execute() throws Exception { + run(); + } +} diff --git a/src/test/docker/Dockerfile b/src/test/docker/Dockerfile index 9e7bd9a73a3a52e052f4b5953adf1178254f6f03..d8b831eebbda33a3143f89a8da1edb770788bce5 100644 --- a/src/test/docker/Dockerfile +++ b/src/test/docker/Dockerfile @@ -1,4 +1,4 @@ FROM debian:stretch RUN apt-get update && \ apt-get install git make mariadb-server maven openjdk-8-jdk-headless \ - python3-pip screen libpam0g-dev unzip curl -y + python3-pip screen libpam0g-dev unzip curl shunit2 -y diff --git a/src/test/java/caosdb/server/jobs/core/TestInsertFilesInDir.java b/src/test/java/caosdb/server/jobs/core/TestInsertFilesInDir.java new file mode 100644 index 0000000000000000000000000000000000000000..37cad0eca28a50b006985a73edb07e45b60fdf9e --- /dev/null +++ b/src/test/java/caosdb/server/jobs/core/TestInsertFilesInDir.java @@ -0,0 +1,19 @@ +package caosdb.server.jobs.core; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import org.junit.Test; + +public class TestInsertFilesInDir { + + @Test + public void testExclude() throws IOException { + InsertFilesInDir job = new InsertFilesInDir(); + job.init(null, null, null); + job.parseValue("-e ^.*test.*$ test"); + File testFile = new File("test.dat"); + assertTrue(job.isExcluded(testFile)); + } +}