diff --git a/.gitignore b/.gitignore index 39be17dc807dc3e2f5c505dc9341027457403421..06c8c148f1e9f45493f574a11c6789398246defd 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,6 @@ log/ OUTBOX ConsistencyTest.xml testlog/ + +# python +__pycache__ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bb05c265f11d0fcb3a72cee54af5314a5ca4c284..34e93fd70f3252e3dd04b2df14e06a63299d940c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,6 +38,7 @@ test: tags: [ docker ] stage: test script: + - make test_misc - make easy-units - mvn dependency:purge-local-repository - mvn antlr4:antlr4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6898772f8d7299037aa4806c036e9a0839e74ae3..8b65bafc9f260dabecd592fea4f08829f08db7c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Scripting is simplified by adding a `home` directory, of which a copy is created for each called script and set as the `HOME` environment variable. +- [bend_symlinks.sh](misc/bend_symlinks/bend_symlinks.sh) (version 0.1, experimental) + fix broken symlinks in the internal file system. See + [README.md](misc/bend_symlinks/README.md) +- [move_files.py](misc/move_files/move_files.py) (version 0.1, experimental) + Script for moving files (change their path) in the internal file system based + on a two-column tsv file (with columns "from" and "to"). See + [README.md](misc/move_files/README.md). ### Changed @@ -22,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - -### Removed ### +### Removed - diff --git a/makefile b/makefile index 53c822deac4faaa058d7b1a04329abf2eb03d0bb..f031083a3fa0827d0015eb42b0db6a8de61baacb 100644 --- a/makefile +++ b/makefile @@ -55,6 +55,9 @@ test: easy-units MAVEN_DEBUG_OPTS="-Xdebug -Xnoagent -Djava.compiler=NONE -Dcaosdb.debug=true -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=0.0.0.0:9000" mvn test -X +test_misc: + cd misc/bend_symlinks/ && /usr/bin/shunit2 test/test_suite.sh + clean: clean-antlr mvn clean rm -rf .m2-local diff --git a/misc/bend_symlinks/README.md b/misc/bend_symlinks/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f72357f8636a2cf961434108a52e98ef05181e62 --- /dev/null +++ b/misc/bend_symlinks/README.md @@ -0,0 +1,106 @@ +# About + +./bend_symlinks.sh - fix broken symlinks in the internal file system + +# Copyright and License Disclaimer: + + Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) + Copyright (C) 2019 IndiScale (info@indiscale.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + +# Version + 0.1 + +# Usage + + ./bend_symlinks.sh [-D] FILE_SYSTEM_DIR TARGET_PATTERN TARGET_REPLACEMENT + + Find all broken symlinks below FILE_SYSTEM_DIR which targets match + TARGET_PATTERN and replace the targets (i.e. overwrite the symlinks) + with TARGET_REPLACEMENT according to the rules of sed's replace + function. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + ./bend_symlinks.sh -d [-D] FILE_SYSTEM_DIR OLD_TARGET_PREFIX NEW_TARGET_PREFIX + + Find all broken symlinks below FILE_SYSTEM_DIR which targets a + path prefixed by OLD_TARGET_PREFIX and replace the targets (i.e. + overwrite the symlinks) by changing only the directory prefix to + NEW_TARGET_PREFIX. This is the preferred way to fix symlinks which + targets have just been moved to another directory while the structure + under this directory stayed the same. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + ./bend_symlinks.sh (-h|-v) + +# Parameters + + FILE_SYSTEM_DIR A directory of the internal file system's back-end + storage. + All symlinks below this directory are being process by + this script. + E.g. '/mnt/caosdb_fs/ExperimentalData/' + TARGET_PATTERN A (extended sed-style) regular expression for matching + broken symlink targets. + TARGET_REPLACEMENT A (sed-style) replacement string for the new (fixed) + symlink targets. + OLD_TARGET_PREFIX The directory of old and broken symlink targets. + E.g. '/mnt/data/current/experiments/' + NEW_TARGET_PREFIX The directory of the new symlink targets. + E.g. '/mnt/data/archive/experiments/2019/' + -d Bend all symlinks under FILE_SYSTEM_DIR which target a + file prefixed by OLD_TARGET_PREFIX to point to + NEW_TARGET_PREFIX and keep the substructure the same. + This is the most useful special case for the scenario + where orginal data files have just been moved from one + folder into another and the symlinks need to updated + accordingly. + -D Dry-run: Only print what would happen. + -h Print this help message and exit. + -v Print the version of this script and ext. + +# Examples + + 1. Files have been moved from '/mnt/data/current/experiments/' to + /mnt/data/archive/experiments/2019/'. Execute the script in the root + directory of the caosdb server's internal file system: + + $ ./bend_symlinks.sh -d ./ /mnt/data/current/experiments /mnt/data/archive/experiments/2019 + + 2. A File was renamed from '/mnt/data/procotol.pdf' to + '/mnt/data/protocol.pdf'. The symlink is located at + '/mnt/caosdb_fs/procotol.pdf'. Execute the script in the root directory + of the caosdb server's internal file system: + + $ ./bend_symlinks.sh ./ procotol\.pdf$ protocol.pdf + + 3. In order to print a table which contains the corrected name from example 2 + and which can be understood by the the + [move_files.py](../move_files/move_files.py) script pipe the standard + output like this. + + $ ./bend_symlinks.sh ./ procotol\.pdf$ protocol.pdf | sed -e 's/\/mnt\/data// > changes.tsv + + Then the changes.tsv file contains 'procotol.pdf<tab>protocol.pdf<EOF>'. + + +# Tests + +Run test suite with + + $ shunit2 test/test_suite.sh diff --git a/misc/bend_symlinks/bend_symlinks.sh b/misc/bend_symlinks/bend_symlinks.sh new file mode 100755 index 0000000000000000000000000000000000000000..5a296fcdf7c447619751b958b6dc411273e9c951 --- /dev/null +++ b/misc/bend_symlinks/bend_symlinks.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019 IndiScale (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +set -o errexit -o noclobber -o nounset -o pipefail + +VERSION=0.1 + +LICENCE="# Copyright and License Disclaimer: + + Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) + Copyright (C) 2019 IndiScale (info@indiscale.com) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. +" + +USAGE="# Usage + + $0 [-D] FILE_SYSTEM_DIR TARGET_PATTERN TARGET_REPLACEMENT + + Find all broken symlinks below FILE_SYSTEM_DIR which targets match + TARGET_PATTERN and replace the targets (i.e. overwrite the symlinks) + with TARGET_REPLACEMENT according to the rules of sed's replace + function. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + $0 -d [-D] FILE_SYSTEM_DIR OLD_TARGET_PREFIX NEW_TARGET_PREFIX + + Find all broken symlinks below FILE_SYSTEM_DIR which targets a + path prefixed by OLD_TARGET_PREFIX and replace the targets (i.e. + overwrite the symlinks) by changing only the directory prefix to + NEW_TARGET_PREFIX. This is the preferred way to fix symlinks which + targets have just been moved to another directory while the structure + under this directory stayed the same. + + Print OLD_TARGET<tab>NEW_TARGET to stdout for each replaced symlink. + + $0 (-h|-v) + +# Parameters + + FILE_SYSTEM_DIR A directory of the internal file system's back-end + storage. + All symlinks below this directory are being process by + this script. + E.g. '/mnt/caosdb_fs/ExperimentalData/' + TARGET_PATTERN A (extended sed-style) regular expression for matching + broken symlink targets. + TARGET_REPLACEMENT A (sed-style) replacement string for the new (fixed) + symlink targets. + OLD_TARGET_PREFIX The directory of old and broken symlink targets. + E.g. '/mnt/data/current/experiments/' + NEW_TARGET_PREFIX The directory of the new symlink targets. + E.g. '/mnt/data/archive/experiments/2019/' + -d Bend all symlinks under FILE_SYSTEM_DIR which target a + file prefixed by OLD_TARGET_PREFIX to point to + NEW_TARGET_PREFIX and keep the substructure the same. + This is the most useful special case for the scenario + where orginal data files have just been moved from one + folder into another and the symlinks need to updated + accordingly. + -D Dry-run: Only print what would happen. + -h Print this help message and exit. + -v Print the version of this script and ext. + +# Examples + + 1. Files have been moved from '/mnt/data/current/experiments/' to + /mnt/data/archive/experiments/2019/'. Execute the script in the root + directory of the caosdb server's internal file system: + + $ $0 -d ./ /mnt/data/current/experiments /mnt/data/archive/experiments/2019 + + 2. A File was renamed from '/mnt/data/procotol.pdf' to + '/mnt/data/protocol.pdf'. The symlink is located at + '/mnt/caosdb_fs/procotol.pdf'. Execute the script in the root directory + of the caosdb server's internal file system: + + $ $0 ./ procotol\\.pdf$ protocol.pdf + + 3. In order to print a table which contains the corrected name from example 2 + and which can be understood by the the + [move_files.py](../move_files/move_files.py) script pipe the standard + output like this. + + $ $0 ./ procotol\\.pdf$ protocol.pdf | sed -e 's/\/mnt\/data// > changes.tsv + + Then the changes.tsv file contains 'procotol.pdf<tab>protocol.pdf<EOF>'. +" + +HELP="$0 - fix broken symlinks in the internal file system + +$LICENCE + +# Version + $VERSION + +$USAGE +" + +source src/main.sh diff --git a/misc/bend_symlinks/src/main.sh b/misc/bend_symlinks/src/main.sh new file mode 100644 index 0000000000000000000000000000000000000000..c2a6a94766d437e41619c2602d1c62417e09ee42 --- /dev/null +++ b/misc/bend_symlinks/src/main.sh @@ -0,0 +1,98 @@ +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019 IndiScale (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +source "src/utils.sh" + +### PARSE COMMAND LINE ARGUMENTS ### + +# OPTIONS -v -h +IS_DRY_RUN=0 +IS_MOVE=0 +while getopts ":hvdD" opt; do + case ${opt} in + h ) + echo "$HELP" + exit 0 + ;; + v ) + echo "$VERSION" + exit 0 + ;; + d ) + IS_MOVE=1 + ;; + D ) + IS_DRY_RUN=1 + ;; + \? ) + echo "Invalid option. See '$0 -h' for more information" 1>&2 + exit 1 + ;; + esac +done +shift $((OPTIND -1)) + + +# POSTIONAL ARGUMENTS +if [ $# -ne 3 ] ; then + echo "Illegal number of positional parameters. See '$0 -h' for more information" 1>&2 + exit 1 +fi +FILE_SYSTEM_ROOT=$1 +REGEX_OLD=$2 +REPLACEMENT=$3 + + +if [ $IS_MOVE -eq 1 ] ; then + REGEX_OLD=$(old_dir "$REGEX_OLD") + REPLACEMENT=$(new_dir "$REPLACEMENT") +fi + +set -o noglob +for syml in $(find -P $(realpath $FILE_SYSTEM_ROOT) -type l) ; do + OLD_TARGET=$(realpath -m "$syml" | sed -n -r "/$REGEX_OLD/p") + if [ -z "$OLD_TARGET" ] ; then + # filter non matching + continue + fi + + if [ -e "$syml" ] ; then + echo "#IGNORING (not broken): $syml" 1>&2 + continue + fi + + NEW_TARGET=$(echo "$OLD_TARGET" | sed -r "s/$REGEX_OLD/$REPLACEMENT/g") + if [ ! -e "$NEW_TARGET" ] ; then + echo "#IGNORING (broken new): $NEW_TARGET" 1>&2 + continue + fi + + echo -e "$OLD_TARGET\t$NEW_TARGET" + if [ $IS_DRY_RUN -eq 1 ] ; then + continue + fi + + # -f means force overwriting + ln -fs "$NEW_TARGET" "$syml" ; + +done +set +o noglob + diff --git a/misc/bend_symlinks/src/utils.sh b/misc/bend_symlinks/src/utils.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e10fe9acc0c1c27fa3f2f58add8e7daf845e24c --- /dev/null +++ b/misc/bend_symlinks/src/utils.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019-2020 Timm Fitschen (t.fitschen@indiscale.com) +# Copyright (C) 2019-2020 IndiScale GmbH (info@indiscale.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +set -o errexit -o noclobber -o nounset -o pipefail + +function escape_simple_path () { + SPATH=$(escape_slash "$1") + # ? + SPATH=$(echo "$SPATH" | sed "s/\?/\\?/g") + # . + SPATH=$(echo "$SPATH" | sed "s/\./\\\\./g") + # $ + SPATH=$(echo "$SPATH" | sed "s/\\$/\\\\$/g") + # [ + SPATH=$(echo "$SPATH" | sed -r "s/\[/\\\\[/g") + # ( + SPATH=$(echo "$SPATH" | sed -r "s/\(/\\\\(/g") + # { + SPATH=$(echo "$SPATH" | sed -r "s/\{/\\\\{/g") + echo "$SPATH" +} + +function escape_slash () { + echo "${1//\//\\\/}" +} + + +function old_dir () { + OLD_DIR=$(realpath -m "$1") + OLD_DIR=$(escape_simple_path "$OLD_DIR") + echo "^$OLD_DIR\/(.*)$" +} + +function new_dir () { + NEW_DIR=$(realpath -m "$1") + NEW_DIR=$(escape_slash "$NEW_DIR") + echo "$NEW_DIR\/\1" +} diff --git a/misc/bend_symlinks/test/test_suite.sh b/misc/bend_symlinks/test/test_suite.sh new file mode 100644 index 0000000000000000000000000000000000000000..781e477e1ffacda36b12687ec05ebf5c9d021e15 --- /dev/null +++ b/misc/bend_symlinks/test/test_suite.sh @@ -0,0 +1,229 @@ + + +source "./src/utils.sh" +set +o errexit + +BEND=./bend_symlinks.sh +FILE_SYSTEM_ROOT=test_dir/links +DATA_DIR=test_dir/original + +oneTimeSetUp () { + mkdir -p $FILE_SYSTEM_ROOT $DATA_DIR +} + +oneTimeTearDown () { + rm -rf test_dir +} + +tearDown () { + rm -rf $FILE_SYSTEM_ROOT/* + rm -rf $DATA_DIR/* +} + +_make_test_file () { + touch "$DATA_DIR/$1" + ln -s $(realpath "$DATA_DIR/$1") "$FILE_SYSTEM_ROOT/$1" + assertEquals "initial target $1" $(realpath "$FILE_SYSTEM_ROOT/$1") $(realpath "$DATA_DIR/$1") +} + +_break_link_move_file () { + set -o noglob + OLD_PATH="$DATA_DIR/$1" + OLD_PATH_REAL=$(realpath "$OLD_PATH") + NEW_PATH="$DATA_DIR/$2" + NEW_PATH_REAL=$(realpath "$NEW_PATH") + LINK="$FILE_SYSTEM_ROOT/$1" + mv "$OLD_PATH_REAL" "$NEW_PATH_REAL" + assertEquals "still target $OLD_PATH_REAL" $(realpath "$LINK") "$OLD_PATH_REAL" + assertFalse "$LINK link is broken" "[ -f '$LINK' ]" + assertFalse "$OLD_PATH_REAL was moved" "[ -f '$OLD_PATH_REAL' ]" + assertTrue "$NEW_PATH_REAL is there" "[ -f '$NEW_PATH_REAL' ]" + set +o noglob +} + +testVersion () { + assertEquals "version 0.1" "0.1" "$($BEND -v)" +} + +assertLinkOk () { + set -o noglob + LINK=$(realpath "$FILE_SYSTEM_ROOT/$1") + TARGET=$(realpath "$DATA_DIR/$2") + assertTrue "target exists $LINK" "[ -f '$LINK' ]" + assertEquals "target matches $TARGET" $TARGET "$LINK" + set +o noglob +} + +testIgnoreUnbroken () { + _make_test_file "fileA" + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.new" 2>&1) # attempt to rename + + assertEquals "ignoring not broken" "#IGNORING (not broken): $(realpath "$PWD")/test_dir/links/fileA" "$RESULTS" + + assertLinkOk "fileA" "fileA" +} + +testIgnoreMissingNew () { + _make_test_file "fileA" + + _break_link_move_file "fileA" "fileA.new" + + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.non" 2>&1) + + assertEquals "ignoring broken new" "#IGNORING (broken new): $(realpath $DATA_DIR)/fileA.non" "$RESULTS" + + TARGET=$(realpath -m "$DATA_DIR/fileA") + LINK=$(realpath -m "$FILE_SYSTEM_ROOT/fileA") + assertFalse "symlink still broken" "[ -e '$LINK' ]" + assertEquals "target still old" "$TARGET" "$LINK" +} + + +testFileName () { + _make_test_file "fileA" + _make_test_file "fileB" + + _break_link_move_file "fileA" "fileA.new" + + RESULTS=$($BEND $FILE_SYSTEM_ROOT "fileA" "fileA.new" 2>&1) # rename all fileA to fileA.new + + assertLinkOk "fileA" "fileA.new" + assertLinkOk "fileB" "fileB" +} + +testFullPath () { + _make_test_file "fileA" + _make_test_file "fileB" + + _break_link_move_file "fileA" "fileA.new" + + REGEX_OLD=$(escape_simple_path "$DATA_DIR/fileA") + REPLACEMENT=$(escape_slash "$DATA_DIR/fileA.new") + RESULTS=$($BEND $FILE_SYSTEM_ROOT "$REGEX_OLD" "$REPLACEMENT" 2>&1) + + assertLinkOk "fileA" "fileA.new" + assertLinkOk "fileB" "fileB" +} + +_testFullPathWithStrageChars () { + file_name="fileA$1bla" + _make_test_file "$file_name" + + _break_link_move_file "$file_name" "$file_name.new" + + REGEX_OLD=$(escape_simple_path "$DATA_DIR/$file_name") + REPLACEMENT=$(escape_slash "$DATA_DIR/$file_name.new") + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "$REGEX_OLD" "$REPLACEMENT" 2>&1) + + assertLinkOk "$file_name" "$file_name.new" + assertLinkOk "fileB" "fileB" +} + +testFullPathWithStrangeChars () { + _make_test_file "fileB" + _testFullPathWithStrageChars "A" + _testFullPathWithStrageChars "#" + _testFullPathWithStrageChars "0" + _testFullPathWithStrageChars "!" + _testFullPathWithStrageChars "." + _testFullPathWithStrageChars ";" + _testFullPathWithStrageChars "," + _testFullPathWithStrageChars "$" + _testFullPathWithStrageChars "[" + _testFullPathWithStrageChars "(" + _testFullPathWithStrageChars "{" + _testFullPathWithStrageChars "]" + _testFullPathWithStrageChars "[.]" +} + +testRegex () { + _make_test_file "fileA.0" + _make_test_file "dataA.1" + _make_test_file "fileA.0ok" + _make_test_file "fileB" + + _break_link_move_file "fileA.0" "file0-A" + _break_link_move_file "dataA.1" "data1-A" + + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "([a-z]+)([A-Z])+\.([01])$" "\1\3-\2" 2>&1) + + assertLinkOk "fileA.0" "file0-A" + assertLinkOk "dataA.1" "data1-A" + assertLinkOk "fileB" "fileB" + assertLinkOk "fileA.0ok" "fileA.0ok" +} + + +testDryRun () { + _make_test_file "fileA" + + OLD_TARGET=$(realpath "$DATA_DIR/fileA") + NEW_TARGET=$(realpath "$DATA_DIR/fileA.new") + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertTrue "1 target exists $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "1 target matches $OLD_TARGET" "$OLD_TARGET" "$SYMLINK" + + _break_link_move_file "fileA" "fileA.new" + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertFalse "2 target does not exist $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "2 target matches $OLD_TARGET" "$OLD_TARGET" "$SYMLINK" + + RESULTS=$($BEND -D "$FILE_SYSTEM_ROOT" "fileA" "fileA.new" 2>&1) + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertFalse "3 target does not exist $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "3 target matches $OLD_TARGET" $OLD_TARGET "$SYMLINK" + + RESULTS=$($BEND "$FILE_SYSTEM_ROOT" "fileA" "fileA.new") + + SYMLINK=$(realpath "$FILE_SYSTEM_ROOT/fileA") + assertTrue "4 target exists $SYMLINK" "[ -f '$SYMLINK' ]" + assertEquals "4 target matches $NEW_TARGET" $NEW_TARGET "$SYMLINK" + +} + +testUtilsOldAndNewDir () { + OLD_DIR="/root/to/old/" + assertTrue "old root does not exist" "[ ! -e '$OLD_DIR' ]" + NEW_DIR=$(realpath -m "$FILE_SYSTEM_ROOT/root/to/new/") + # new_dir must exist + mkdir -p "$NEW_DIR" + + OLD_DIR=$(old_dir "$OLD_DIR") + NEW_DIR=$(new_dir "$NEW_DIR") + PWD_ESC=$(escape_simple_path $(realpath "$FILE_SYSTEM_ROOT")) + + assertEquals "OLD_DIR correct" "^\/root\/to\/old\/(.*)$" "$OLD_DIR" + assertEquals "NEW_DIR correct" "$PWD_ESC\/root\/to\/new\/\1" "$NEW_DIR" + + RESULT=$(echo "/root/to/old/subdir/fileA" | sed -r "s/$OLD_DIR/$NEW_DIR/g") + assertEquals "result" $(realpath -m "$FILE_SYSTEM_ROOT/root/to/new/subdir/fileA") "$RESULT" +} + + +testSymlinkToSymlink () { + touch "$DATA_DIR/fileA" + ln -s $(realpath "$DATA_DIR/fileA") "$DATA_DIR/symlinkA" + ln -s $(realpath -s "$DATA_DIR/symlinkA") "$FILE_SYSTEM_ROOT/symlinkA" + assertLinkOk "symlinkA" "symlinkA" + assertLinkOk "symlinkA" "fileA" + + + # move only the symlink in data_dir + mv $DATA_DIR/symlinkA $DATA_DIR/symlinkA.new + assertFalse "symlink in fs broken" "[ -e '$FILE_SYSTEM_ROOT/symlinkA' ]" + assertTrue "symlink in data ok ($TARGET)" "[ -e '$DATA_DIR/symlinkA.new' ]" + + RESULT=$($BEND "$FILE_SYSTEM_ROOT" "symlinkA" "symlinkA.new" 2>&1) + + assertTrue "simlink is fixed" "[ -e '$FILE_SYSTEM_ROOT/symlinkA' ]" + + + assertLinkOk "symlinkA" "symlinkA.new" + assertLinkOk "symlinkA" "fileA" + +} + + diff --git a/misc/check_symlinks/check_symlinks b/misc/check_symlinks/check_symlinks index 5545613982e7772189697eb596d9786af0d29b1b..0f6eb0d1d3626ed32fa99aa6a31891c6ddf90635 100755 --- a/misc/check_symlinks/check_symlinks +++ b/misc/check_symlinks/check_symlinks @@ -77,7 +77,6 @@ function check_recursively() { else echo "$subdir is empty" fi - done } diff --git a/misc/move_files/README.md b/misc/move_files/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df67f2025b8627a09e6886038646db4ec4436941 --- /dev/null +++ b/misc/move_files/README.md @@ -0,0 +1,24 @@ +# About + +Version: 0.1 + +Usage: `./move_files.py [-h] changes` + +This script moves files in the internal file system of the CaosDB server. It +reads file paths form a tsv file with columns from and to. For each line it +creates an update of a caosdb file object where the path that equals "from" is +changed to "to". + +positional arguments: + changes The file that defines the renames + +optional arguments: + -h, --help show this help message and exit + +# Tests + +The tests a integration tests which require a running test database and a sufficiently configured caosdb-pylib. + +Run `pytest test_move_files.py` to insert a bunch of test files, rename and subsequently delete them. + + diff --git a/misc/move_files/move_files.py b/misc/move_files/move_files.py new file mode 100755 index 0000000000000000000000000000000000000000..44cc3f4f7e61027b833a7774705577c59e756e72 --- /dev/null +++ b/misc/move_files/move_files.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +This script moves files in the internal file system of the CaosDB server. It +reads file paths form a tsv file with columns from and to. For each line it +creates an update of a caosdb file object where the path that equals "from" is +changed to "to". + +Version: 0.1 +""" + +import argparse +import sys +import time +from argparse import ArgumentParser + +import pandas as pd +from tqdm import tqdm + +import caosdb as db + + +def rename(changes, chunksize=10): + """change the path of files based on a two-column table (from, to). + + Parameters + ---------- + + changes : pd.DataFrame + A table with two columns, the old path and the new path. + chunksize : int, optional + How many files are being moved in one go (default is 10). + """ + i = 0 + + for i in tqdm(range(changes.shape[0]//chunksize+1)): + chunk = changes.iloc[i*chunksize:(i+1)*chunksize] + + if chunk.shape[0] == 0: + continue + cont = db.Container() + + for _, (old, new) in chunk.iterrows(): + cont.append(db.File(path=old)) + + cont.retrieve() + + for fi, (_, (old, new)) in zip(cont, chunk.iterrows()): + assert fi.path == old + fi.path = new + cont.update() + i += 1 + + +def main(argv=None): + '''Command line options.''' + + if argv is None: + argv = sys.argv + else: + sys.argv.extend(argv) + + # Setup argument parser + parser = ArgumentParser(description=__doc__) + parser.add_argument("changes", help="The file that defines the renames") + args = parser.parse_args() + + changes = pd.read_csv(args.changes, sep="\t") + + if ("to" not in changes.columns or "from" not in changes.columns): + raise ValueError("The file supplied under changes shall have a 'to'" + " and a 'from' column.") + + assert 0 == pd.isnull(changes).sum().sum() + rename(changes) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/misc/move_files/test_move_files.py b/misc/move_files/test_move_files.py new file mode 100644 index 0000000000000000000000000000000000000000..2edb76509dc6a350057997cc6f894abd4a4bb163 --- /dev/null +++ b/misc/move_files/test_move_files.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + + +import os +import random +import unittest + +import pandas as pd + +import caosdb as db +from move_files import rename + + +def create_filename(): + name = os.path.normpath("".join([ + random.choice("qwertyuiopkkhgfdsxcvbnm/") for el in range(30)])) + + if not name.startswith("/"): + name = "/" + name + + return name + + +class TestMoveFiles(unittest.TestCase): + """ + Files are being created, changed and then it is checked whether the changes + were correct. + """ + + def setUp(self): + self.files_to_be_changed = [create_filename() for i in range(40)] + self.files_not_to_be_changed = [create_filename() for i in range(20)] + self.new_names = [f+"new" for f in self.files_to_be_changed] + + table = pd.DataFrame([self.files_to_be_changed, self.new_names]) + table = table.T + table.columns = ["from", "to"] + self.table = table + self.cont = db.Container() + self.cont.extend([db.File(path=f, file=__file__) + for f in self.files_to_be_changed + + self.files_not_to_be_changed]) + self.cont.insert() + print("inserted") + + def test_move(self): + rename(self.table) + self.cont.retrieve() + + for i, (fi, name) in enumerate(zip( + self.cont, + self.files_to_be_changed + self.files_not_to_be_changed)): + + if i < len(self.files_to_be_changed): + self.assertEqual(fi.path, self.new_names[i]) + else: + self.assertEqual(fi.path, name) + + def tearDown(self): + self.cont.delete() + print("deleted") diff --git a/misc/mv_unknown_files_script/mv_unknown_files b/misc/mv_unknown_files_script/mv_unknown_files deleted file mode 100755 index 8b26b464d8dd40a15f3418895dd440903a6da8a6..0000000000000000000000000000000000000000 --- a/misc/mv_unknown_files_script/mv_unknown_files +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -grep ": Unknown file." $1 | sed 's/<Warning.*tion="//' | sed 's/:\sUnknown.*//' | awk '{ print "move ", $1 }' - - diff --git a/src/test/docker/Dockerfile b/src/test/docker/Dockerfile index 9e7bd9a73a3a52e052f4b5953adf1178254f6f03..d8b831eebbda33a3143f89a8da1edb770788bce5 100644 --- a/src/test/docker/Dockerfile +++ b/src/test/docker/Dockerfile @@ -1,4 +1,4 @@ FROM debian:stretch RUN apt-get update && \ apt-get install git make mariadb-server maven openjdk-8-jdk-headless \ - python3-pip screen libpam0g-dev unzip curl -y + python3-pip screen libpam0g-dev unzip curl shunit2 -y