diff --git a/patches/patch20210219-5.0.0-rc1/patch.sql b/patches/patch20210219-5.0.0-rc1/patch.sql index e73bd209f52b640605492399138589483b0097fc..c7426fe9cfb466fb9a43643fa62115e2829a619e 100644 --- a/patches/patch20210219-5.0.0-rc1/patch.sql +++ b/patches/patch20210219-5.0.0-rc1/patch.sql @@ -1,46 +1,131 @@ -- USE _caosdb_schema_unit_tests; --- REMOVE SQLITE datatype +-- REMOVE SQLITE datatype (obsolete) DELETE FROM name_data WHERE entity_id = 50; DELETE FROM entities WHERE id = 50; -ALTER TABLE files ADD UNIQUE (`path`); +/* +-- Create file_hashes table. +-- This table is necessary because not all FSO objects have hashes (e.g. +-- directories) and files may have more than only one hash stored. +CREATE TABLE IF NOT EXISTS file_hashes ( + file_id INT UNSIGNED, + digest VARBINARY(255) NOT NULL, + algorithm VARBINARY(255) NOT NULL DEFAULT "SHA-512", + checked_timestamp BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (file_id, algorithm), + FOREIGN KEY (file_id) REFERENCES files (file_id) +); + +-- ... and create the corresponding archive_file_hashes tables as usual. +CREATE TABLE IF NOT EXISTS archive_file_hashes ( + file_id INT UNSIGNED NOT NULL, + digest VARBINARY(255) NOT NULL, + algorithm VARBINARY(255) NOT NULL DEFAULT "SHA-512", + checked_timestamp BIGINT NOT NULL DEFAULT 0, + _iversion INT UNSIGNED NOT NULL, + PRIMARY KEY (file_id, _iversion, algorithm), + FOREIGN KEY (file_id, _iversion) REFERENCES archive_files (file_id, _iversion) ON DELETE CASCADE +); + +-- Move existing hash data to new tables... +INSERT INTO file_hashes (file_id, digest, checked_timestamp) SELECT file_id, hash, checked_timestamp FROM files WHERE hash IS NOT NULL; +INSERT INTO archive_file_hashes (file_id, _iversion, digest, checked_timestamp) SELECT file_id, _iversion, hash, checked_timestamp FROM archive_files WHERE hash IS NOT NULL; +-- and remove hash colums from the files and archive_files tables. +ALTER TABLE files DROP COLUMN hash; +ALTER TABLE archive_files DROP COLUMN hash; +*/ +-- Add new fields to files table... ALTER TABLE files ADD COLUMN IF NOT EXISTS ( parent_directory INT UNSIGNED DEFAULT NULL, mimetype VARBINARY(255) DEFAULT NULL, file_storage_id VARBINARY(255) NOT NULL DEFAULT "DEFAULT", - file_key VARBINARY(255) DEFAULT NULL, - hash_algorithm VARBINARY(255) DEFAULT NULL + file_key VARBINARY(255) DEFAULT NULL ); +-- ... and to the corresponding archive_files table. ALTER TABLE archive_files ADD COLUMN IF NOT EXISTS ( parent_directory INT UNSIGNED DEFAULT NULL, mimetype VARBINARY(255) DEFAULT NULL, file_storage_id VARBINARY(255) NOT NULL DEFAULT "DEFAULT", file_key VARBINARY(40000) DEFAULT NULL, - checked_timestamp BIGINT NOT NULL DEFAULT 0, - hash_algorithm VARBINARY(255) DEFAULT NULL + checked_timestamp BIGINT NOT NULL DEFAULT 0 ); +-- no two files at the same path are allowed. +ALTER TABLE files ADD UNIQUE (`path`); +-- directories cannot be deleted without deleting all the content first. +ALTER TABLE files ADD FOREIGN KEY (parent_directory) REFERENCES entities (id); +ALTER TABLE archive_files ADD FOREIGN KEY (parent_directory) REFERENCES entities (id) ON DELETE CASCADE; + + ALTER TABLE entities MODIFY COLUMN `role` enum('RECORDTYPE','RECORD','FILE','DOMAIN','PROPERTY','DATATYPE','ROLE','QUERYTEMPLATE', 'DIRECTORY') COLLATE utf8_unicode_ci NOT NULL; -INSERT INTO entities (id, description, role, acl) VALUES (9, "The directory role.", "ROLE", 0); -INSERT INTO name_data (domain_id, entity_id, property_id, value, status, pidx) VALUES (0, 9, 20, "DIRECTORY", "FIX", 0); +-- Add a new special role "DIRECTORY" +INSERT IGNORE INTO entities (id, description, role, acl) VALUES (9, "The directory role.", "ROLE", 0); +INSERT IGNORE INTO name_data (domain_id, entity_id, property_id, value, status, pidx) VALUES (0, 9, 20, "DIRECTORY", "FIX", 0); +-- Add a special nameless directory which is used as the root directory of the +-- internal file system. +INSERT IGNORE INTO entities (id, description, role, acl) VALUES (51, "The root directory of the internal files system", "DIRECTORY", 0); +INSERT IGNORE INTO files + (file_id, path, size, hash, checked_timestamp, mimetype, file_storage_id, + file_key, parent_directory) + VALUES (51, "", 0, NULL, 0, "inode/directory", "DEFAULT", "", NULL); + +-- In the default file storage back-end the key is just the path. UPDATE files SET file_key=path; UPDATE archive_files SET file_key=path; +-- and now, set NOT NULL for the file_key column +ALTER TABLE files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL; +ALTER TABLE archive_files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL; +-- Size may be NULL for directories ALTER TABLE files MODIFY COLUMN size BIGINT NULL DEFAULT NULL; +ALTER TABLE archive_files MODIFY COLUMN size BIGINT NULL DEFAULT NULL; + +-- More characters for long names ALTER TABLE files MODIFY COLUMN path VARCHAR(5000) NOT NULL; ALTER TABLE archive_files MODIFY COLUMN path VARCHAR(5000) NOT NULL; -ALTER TABLE files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL; -ALTER TABLE archive_files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL; + +-- Test data for the _create_dirs procedure... +-- INSERT INTO entities (id, role, acl) VALUES + -- (52, "FILE", 0), + -- (53, "FILE", 0), + -- (54, "FILE", 0), + -- (55, "FILE", 0), + -- (56, "FILE", 0), + -- (57, "FILE", 0), + -- (58, "FILE", 0), + -- (59, "FILE", 0), + -- (60, "FILE", 0); +-- INSERT INTO files + -- (file_id, path, size, hash, checked_timestamp, mimetype, file_storage_id, + -- file_key, parent_directory) + -- VALUES + -- (52, "file1", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (53, "file2", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (54, "A/B/file3", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (55, "A/B/file4", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (56, "A/B/C/file5", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (57, "A/B/C/file6", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (58, "B/C/D/file7", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (59, "B/C/D/file8", 0, NULL, 0, "bla", "DEFAULT", "", NULL), + -- (60, "B/C/file9", 0, NULL, 0, "bla", "DEFAULT", "", NULL); + DROP PROCEDURE IF EXISTS _create_dirs; DELIMITER // +/** + * Create all parent directories (from the root of the file system to the + * direct parents) for all existing files. + * + * This procedure is only intended for patching the content of the database and + * should not be used after the patch has been applied. + */ CREATE PROCEDURE _create_dirs() BEGIN DECLARE done BOOLEAN DEFAULT FALSE; @@ -50,41 +135,72 @@ BEGIN DECLARE dir_path VARCHAR(255) DEFAULT ""; DECLARE dir_path_no_trailing_slash VARCHAR(255) DEFAULT ""; DECLARE dir_exists BOOLEAN DEFAULT FALSE; - DECLARE cur CURSOR FOR SELECT file_id, path FROM files; + DECLARE parent_dir_id INT UNSIGNED DEFAULT 51; + DECLARE cur CURSOR FOR SELECT file_id, path FROM files WHERE mimetype != "inode/directory"; DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; OPEN cur; + UPDATE files SET parent_directory = 51 WHERE mimetype != "inode/directory"; + -- loop over all FILE entities in the table (not DIRECTORY) loop1: LOOP FETCH cur INTO _file_id, _path; IF done THEN LEAVE loop1; END IF; - -- TODO + -- for each file, these are the initial values. The creation of the + -- parent directories beginns at the root path... + SET dir_path = ""; + -- ... and thus the parent of the first new directory is also the root. + SET parent_dir_id = 51; + + -- create all parent directories (from the root to the direct parent + -- directory) of the current file. loop2: LOOP + -- calculate the next parent directory, beginning at the root of + -- the file system for each of the files. E.g. when the _path is + -- A/B/C the loop come here 3 times and dir_path is "A/", "A/B/", + -- and "", because A/B/C/ is not a substring of "A/B/C". SELECT REGEXP_SUBSTR(_path, CONCAT("(?U)^", dir_path, ".*/" )) INTO dir_path; IF dir_path = "" THEN + -- stopping condition of the inner loop!!! + -- this is the case when all parent directories of the current file have been processed. SET dir_exists = FALSE; SET done = FALSE; LEAVE loop2; END IF; + + -- remove the trailing slash for convenience. SET dir_path_no_trailing_slash = LEFT(dir_path, CHAR_LENGTH(dir_path)-1); - SELECT TRUE INTO dir_exists FROM files WHERE path = dir_path_no_trailing_slash; + + -- check if the directory has already been inserted (because there + -- could be several files under "A/B/" and "A" and "A/B" have + -- only to be created once). + SELECT file_id, TRUE INTO parent_dir_id, dir_exists FROM files WHERE path = dir_path_no_trailing_slash; IF dir_exists IS TRUE THEN + -- continue from the top of the inner loop and ignore the rest + -- this is the case when the directory exists yet and doesn't need to be inserted again. SET dir_exists = FALSE; ITERATE loop2; END IF; + + -- Here, the parent directories are created. First insert a new Entity... INSERT INTO entities (description, role, acl) VALUES (NULL, "DIRECTORY", 0); SET dir_id = LAST_INSERT_ID(); + -- ... then insert the FSODescriptor INSERT INTO files (file_id, path, size, hash, checked_timestamp, - mimetype, file_storage_id, file_key) + mimetype, file_storage_id, file_key, parent_directory) VALUES (dir_id, dir_path_no_trailing_slash, 0, NULL, 0, - "inode/directory", "DEFAULT", dir_path_no_trailing_slash - ); - UPDATE files SET parent_directory = dir_id WHERE path LIKE CONCAT(dir_path, "%"); - UPDATE files SET parent_directory = 0 WHERE path LIKE CONCAT(dir_path, "%"); + "inode/directory", "DEFAULT", dir_path_no_trailing_slash, parent_dir_id); + + -- set the parent_dir_id for the next iteration + SET parent_dir_id = dir_id; + + -- set parent dir all children of current directory + UPDATE files SET parent_directory = parent_dir_id WHERE path LIKE CONCAT(dir_path, '%'); END LOOP; + END LOOP; CLOSE cur; @@ -94,3 +210,6 @@ END // DELIMITER ; CALL _create_dirs(); + +-- not to be uses after this patch has been applied. +DROP PROCEDURE _create_dirs;