diff --git a/src/main/java/org/caosdb/server/ServerProperties.java b/src/main/java/org/caosdb/server/ServerProperties.java index 2d849c0e004b8c48d7e07f13d41fdad82735863e..03ffadcc4f48d9d8b1f941de102c6d6c03abd6f8 100644 --- a/src/main/java/org/caosdb/server/ServerProperties.java +++ b/src/main/java/org/caosdb/server/ServerProperties.java @@ -46,7 +46,6 @@ public class ServerProperties extends Properties implements Observable { public static final String KEY_FILE_SYSTEM_ROOT = "FILE_SYSTEM_ROOT"; public static final String KEY_TMP_FILES = "TMP_FILES"; public static final String KEY_SHARED_FOLDER = "SHARED_FOLDER"; - public static final String KEY_USER_FOLDERS = "USER_FOLDERS"; public static final String KEY_AUTH_OPTIONAL = "AUTH_OPTIONAL"; public static final String KEY_MYSQL_HOST = "MYSQL_HOST"; @@ -56,11 +55,7 @@ public class ServerProperties extends Properties implements Observable { public static final String KEY_MYSQL_USER_PASSWORD = "MYSQL_USER_PASSWORD"; public static final String KEY_MYSQL_SCHEMA_VERSION = "MYSQL_SCHEMA_VERSION"; - public static final String KEY_BASE_PATH = "BASE_PATH"; - public static final String KEY_FILE_POLICY = "FILE_POLICY"; - public static final String KEY_FILE_MESSAGES = "FILE_MESSAGES"; public static final String KEY_CONTEXT_ROOT = "CONTEXT_ROOT"; - public static final String KEY_POLICY_COMPONENT = "POLICY_COMPONENT"; public static final String KEY_SERVER_BIND_ADDRESS = "SERVER_BIND_ADDRESS"; public static final String KEY_SERVER_PORT_HTTPS = "SERVER_PORT_HTTPS"; @@ -100,10 +95,6 @@ public class ServerProperties extends Properties implements Observable { public static final String KEY_TRANSACTION_BENCHMARK_ENABLED = "TRANSACTION_BENCHMARK_ENABLED"; - public static final String KEY_INSERT_FILES_IN_DIR_ALLOWED_DIRS = - "INSERT_FILES_IN_DIR_ALLOWED_DIRS"; // see - // server/jobs/core/InsertFilesInDir.java - public static final String KEY_USER_SOURCES_INI_FILE = "USER_SOURCES_INI_FILE"; public static final String KEY_NEW_USER_DEFAULT_ACTIVITY = "NEW_USER_DEFAULT_ACTIVITY"; diff --git a/src/main/java/org/caosdb/server/jobs/core/InsertFilesInDir.java b/src/main/java/org/caosdb/server/jobs/core/InsertFilesInDir.java deleted file mode 100644 index ff1a4cfc9b32f20a408e43fddc30b74b845c7fdc..0000000000000000000000000000000000000000 --- a/src/main/java/org/caosdb/server/jobs/core/InsertFilesInDir.java +++ /dev/null @@ -1,430 +0,0 @@ -/* - * ** header v3.0 - * This file is a part of the CaosDB Project. - * - * Copyright (C) 2018 Research Group Biomedical Physics, - * Max-Planck-Institute for Dynamics and Self-Organization Göttingen - * Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> - * Copyright (C) 2021 Timm Fitschen <t.fitschen@indiscale.com> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - * - * ** end header - */ -package org.caosdb.server.jobs.core; - -import java.io.File; -import java.io.IOException; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.caosdb.api.entity.v1.MessageCode; -import org.caosdb.server.CaosDBException; -import org.caosdb.server.CaosDBServer; -import org.caosdb.server.FileSystem; -import org.caosdb.server.ServerProperties; -import org.caosdb.server.database.exceptions.TransactionException; -import org.caosdb.server.database.misc.RollBackHandler; -import org.caosdb.server.entity.EntityInterface; -import org.caosdb.server.entity.FileProperties; -import org.caosdb.server.entity.InsertEntity; -import org.caosdb.server.entity.Message; -import org.caosdb.server.entity.Message.MessageType; -import org.caosdb.server.entity.RetrieveEntity; -import org.caosdb.server.entity.Role; -import org.caosdb.server.filesystem.Hasher; -import org.caosdb.server.jobs.FlagJob; -import org.caosdb.server.jobs.Job; -import org.caosdb.server.jobs.JobAnnotation; -import org.caosdb.server.jobs.TransactionStage; -import org.caosdb.server.transaction.Retrieve; -import org.caosdb.server.transaction.WriteTransactionInterface; -import org.caosdb.server.utils.EntityStatus; -import org.caosdb.server.utils.FileUtils; -import org.caosdb.server.utils.Undoable; -import org.caosdb.server.utils.Utils; - -@JobAnnotation( - flag = "InsertFilesInDir", - loadOnDefault = false, - stage = TransactionStage.INIT, - description = - "For expert users only! Risk of creating spam records!\nValue of this flag might be any directory on the servers local file system which is part of the server's back-end file storage. This job will insert every readable, nonhidden file in said directory into the database and link the file with a symlink. This is useful to add a huge amount of files without actully copying them to the back-end file storage. If you call this job on a directory more than once every file that was recently added to the source directory is inserted. Every yet known file is left untouched. \nOptional parameter -e EXCLUDE: A regular expression of files which are to be ignored. \n Optional parameter -i INCLUDE: a regular expression of files which are to be included. By default, all files are included. The -e takes precedence. \nOptional parameter -p PREFIX: Stores all new files into the directory PREFIX in the server's file system.\nOptional parameter --force-allow-symlinks: Simlinks in your data are a source of problems for the database. Therefore, simlinks are ignored by default. This option allows symlinks (but still generates simlink warnings). \nPrepend/Dry run: Call this flag with a retrieve transaction (HTTP GET) and it will only count all files and list them without actually inserting them.") -public class InsertFilesInDir extends FlagJob { - - private File tmp = null; - private String prefix = ""; - private Pattern include = null; - private Pattern exclude = null; - private boolean forceSymLinks = false; - private Pattern valueParser = - Pattern.compile( - "(?:(?:-p\\s*([^\\s]*?)\\s+)|(?:-i\\s*([^\\s]*?)\\s+)|(?:-e\\s*([^\\s]*?)\\s+)|(--force-allow-symlinks\\s+))|([^-].*)"); - - /** - * @return a List of directories which subdirs are allowed to be batch-added. Needs to be - * configured in server.conf. - * @throws IOException - */ - private static LinkedList<File> getAllowedFolders() { - - final String[] dirs = - CaosDBServer.getServerProperty(ServerProperties.KEY_INSERT_FILES_IN_DIR_ALLOWED_DIRS) - .split("\\s*,?\\s*"); - final LinkedList<File> ret = new LinkedList<File>(); - for (final String dir : dirs) { - if (dir.length() > 0) { - try { - ret.add((new File(dir)).getCanonicalFile()); - } catch (final IOException e) { - throw new TransactionException(e); - } - } - } - return ret; - } - - public String parseValue(String value) { - - String ret = value; - final Matcher matcher = valueParser.matcher(value); - while (matcher.find()) { - if (matcher.group(1) != null) { - this.prefix = matcher.group(1).replaceFirst("/$", "") + "/"; - } - if (matcher.group(2) != null) { - this.include = Pattern.compile(matcher.group(2)); - } - if (matcher.group(3) != null) { - this.exclude = Pattern.compile(matcher.group(3)); - } - if (matcher.group(4) != null) { - this.forceSymLinks = true; - } - if (matcher.group(5) != null) { - ret = matcher.group(5); - } - } - return ret; - } - - @Override - protected void job(final String value) { - - String dirStr = parseValue(value); - - final File dir = new File(dirStr); - - if (dir.exists()) { - try { - checkDirIsAllowed(dir); - if (shouldBeProcessed(dir)) { - getContainer() - .addMessage( - new Message( - "Files count in " - + dir.getName() - + "/: " - + Integer.toString( - processFiles(this.prefix + dir.getName() + "/", dir)))); - } - } catch (final Message m) { - getContainer().addMessage(m); - return; - } catch (final IOException e) { - throw new TransactionException(e); - } - } else { - getContainer() - .addMessage( - new Message( - MessageType.Error, - MessageCode.MESSAGE_CODE_UNKNOWN, - "No such directory: " + dirStr)); - return; - } - } - - private String getTmpDir() { - if (this.tmp == null) { - this.tmp = new File(FileSystem.getTmp() + "InsertFileInDirLinks" + Utils.getUID()); - this.tmp.mkdirs(); - ((RollBackHandler) getTransaction().getAccess().getHelper("RollBack")) - .append( - new Undoable() { - - @Override - public void undo() { - try { - final Undoable delete = FileUtils.delete(InsertFilesInDir.this.tmp, true); - delete.cleanUp(); - } catch (final IOException | CaosDBException | InterruptedException e) { - e.printStackTrace(); - } - } - - @Override - public void cleanUp() { - try { - final Undoable delete = FileUtils.delete(InsertFilesInDir.this.tmp, true); - delete.cleanUp(); - } catch (final IOException | CaosDBException | InterruptedException e) { - e.printStackTrace(); - } - } - }); - } - try { - return this.tmp.getCanonicalPath() + "/"; - } catch (final IOException e) { - throw new TransactionException(e); - } - } - - /** - * Creates a symlink for each readable, nonhidden file in 'dir' in a tmp directory and adds a new - * file entity to the container. If 'dir' is a directory this method is called recursively. - * - * @param dir All files in this directory are to be processed. - * @param root A path to 'dir' which is relative to said root directory to the root directory of - * all files which are to be processed - * @return Number of processed files. - * @throws Exception - */ - private int processFiles(final String root, final File dir) { - // count files - int i = 0; - - try { - for (final File sub : dir.listFiles()) { - if (!shouldBeProcessed(sub)) { - continue; - } - - if (sub.isDirectory()) { - i += processFiles(root + sub.getName() + "/", sub); - } else { - i++; - final String targetPath = root + sub.getName(); - final EntityInterface newFileEntity = createInsertFileEntity(sub.getName()); - final long size = sub.length(); - final FileProperties fp = new FileProperties(null, targetPath, size); - newFileEntity.setFileProperties(fp); - - // we only want the checksums here during a dry run. - if (size < 1000L && getTransaction() instanceof Retrieve) { - fp.setChecksum(Hasher.SHA512(sub)); - } - - try { - final File existingFileAtTargetPath = FileSystem.getFromFileSystem(targetPath); - if (existingFileAtTargetPath != null - && FileUtils.isSymlinkPointingTo(existingFileAtTargetPath, sub)) { - // sub is already correctly linked. - // ignore it. - continue; - } - - // add create symlink and add file record to this - // container if the target - // path is allowed - if (FileSystem.checkTarget( - newFileEntity, - getTransaction().getAccess(), - getTransaction().getTransactionBenchmark()) - && newFileEntity.getEntityStatus() != EntityStatus.UNQUALIFIED) { - - final File link = - FileUtils.createSymlink(new File(getTmpDir() + Utils.getUID()), sub); - fp.setFile(link); - getContainer().add(newFileEntity); - loadJobs(newFileEntity); - } - } catch (final Message m) { - throw new TransactionException(m); - } - } - } - } catch (final IOException e) { - throw new TransactionException(e); - } - return i; - } - - /** - * Create a new InsertEntity (if this is an actual run) or a new RetrieveEntity (in dry-run mode) - * with {@link Role.File}. - * - * @param name the file name - * @return new File entity - */ - private EntityInterface createInsertFileEntity(String name) { - if (getTransaction() instanceof WriteTransactionInterface) { - return new InsertEntity(name, Role.File); - } - EntityInterface result = new RetrieveEntity(name); - result.setRole(Role.File); - return result; - } - - boolean isExcluded(File f) throws IOException { - return this.exclude != null && this.exclude.matcher(f.getCanonicalPath()).find(); - } - - boolean isNotIncluded(File f) throws IOException { - return this.include != null && !this.include.matcher(f.getCanonicalPath()).find(); - } - - private boolean shouldBeProcessed(final File sub) throws IOException { - if (sub.isFile()) { - if (this.isNotIncluded(sub)) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Not explicitly included file: " + sub.getCanonicalPath())); - return false; - } - if (this.isExcluded(sub)) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_ENTITY_DOES_NOT_EXIST, - "Explicitly excluded file: " + sub.getCanonicalPath())); - return false; - } - } - if (sub.isHidden()) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Hidden directory or file: " + sub.getCanonicalPath())); - return false; - } - if (sub.isDirectory() && !sub.canExecute()) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Unaccessible directory: " + sub.getCanonicalPath())); - return false; - } - if (!sub.canRead()) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Unreadable directory or file: " + sub.getCanonicalPath())); - return false; - } - if (FileUtils.isSymlink(sub)) { - getContainer() - .addMessage( - new Message( - MessageType.Warning, - MessageCode.MESSAGE_CODE_ENTITY_HAS_UNQUALIFIED_PARENTS, - "Directory or file is symbolic link: " + sub.getAbsolutePath())); - if (!this.forceSymLinks) { - return false; - } - } - return true; - } - - private void loadJobs(final EntityInterface e) { - final List<Job> loadJobs = loadJobs(e, getTransaction()); - getTransaction().getSchedule().addAll(loadJobs); - } - - /** - * Checks if 'dir' can be batch-added to the database. - * - * @param dir - * @throws Message if 'dir' is not allowed with details why not. - * @throws IOException - */ - private void checkDirIsAllowed(final File dir) throws Message { - // check if server's working dir, file system, or tmp dir - // overlaps the directory to be inserted. - - if (!dir.isDirectory()) { - throw new Message( - MessageType.Error, MessageCode.MESSAGE_CODE_UNKNOWN, "Dir is not a directory."); - } - - if (!dir.canRead() || !dir.canExecute()) { - throw new Message( - MessageType.Error, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Cannot read or enter the desired directory."); - } - - final File base = new File(FileSystem.getBasepath()); - final File tmp = new File(FileSystem.getTmp()); - final File root = new File("."); - - if (isSubDir(dir, base) - || isSubDir(base, dir) - || isSubDir(dir, tmp) - || isSubDir(tmp, dir) - || isSubDir(dir, root) - || isSubDir(root, dir)) { - throw new Message( - MessageType.Error, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Dir is not allowed: " + dir.toString()); - } - - for (final File f : getAllowedFolders()) { - // check if dir is a subdirectory of an allowed directory - if (isSubDir(f, dir)) { - return; - } - } - throw new Message( - MessageType.Error, - MessageCode.MESSAGE_CODE_UNKNOWN, - "Dir is not allowed: " - + dir.toString() - + " Allowed directories: " - + getAllowedFolders().toString()); - } - - /** - * Checks if sub is a subdirectory of dir. - * - * @param dir - * @param sub - * @throws Message - */ - private boolean isSubDir(final File dir, final File sub) throws Message { - if (sub == null) { - // probably, sub was root in the last iteration. - return false; - } - - if (dir.equals(sub)) { - return true; - } - - return isSubDir(dir, sub.getParentFile()); - } -} diff --git a/src/test/java/org/caosdb/server/jobs/core/TestInsertFilesInDir.java b/src/test/java/org/caosdb/server/jobs/core/TestInsertFilesInDir.java deleted file mode 100644 index a7202be9e7f8a4cfea5c050f403f15025511d09c..0000000000000000000000000000000000000000 --- a/src/test/java/org/caosdb/server/jobs/core/TestInsertFilesInDir.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.caosdb.server.jobs.core; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.IOException; -import org.caosdb.server.CaosDBServer; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class TestInsertFilesInDir { - - @BeforeAll - public static void setup() throws IOException { - CaosDBServer.initServerProperties(); - } - - @Test - public void testExclude() throws IOException { - final InsertFilesInDir job = new InsertFilesInDir(); - job.init(null, null, null); - job.parseValue("-e ^.*test.*$ test"); - final File testFile = new File("test.dat"); - assertTrue(job.isExcluded(testFile)); - } -}