Skip to content
Snippets Groups Projects
Verified Commit 1a3d03d4 authored by Timm Fitschen's avatar Timm Fitschen
Browse files

fixed _create_dirs

parent 765c3634
No related branches found
No related tags found
1 merge request!12DRAFT: ENH: file system: core
Pipeline #7934 failed
-- USE _caosdb_schema_unit_tests;
-- REMOVE SQLITE datatype
-- REMOVE SQLITE datatype (obsolete)
DELETE FROM name_data WHERE entity_id = 50;
DELETE FROM entities WHERE id = 50;
ALTER TABLE files ADD UNIQUE (`path`);
/*
-- Create file_hashes table.
-- This table is necessary because not all FSO objects have hashes (e.g.
-- directories) and files may have more than only one hash stored.
CREATE TABLE IF NOT EXISTS file_hashes (
file_id INT UNSIGNED,
digest VARBINARY(255) NOT NULL,
algorithm VARBINARY(255) NOT NULL DEFAULT "SHA-512",
checked_timestamp BIGINT NOT NULL DEFAULT 0,
PRIMARY KEY (file_id, algorithm),
FOREIGN KEY (file_id) REFERENCES files (file_id)
);
-- ... and create the corresponding archive_file_hashes tables as usual.
CREATE TABLE IF NOT EXISTS archive_file_hashes (
file_id INT UNSIGNED NOT NULL,
digest VARBINARY(255) NOT NULL,
algorithm VARBINARY(255) NOT NULL DEFAULT "SHA-512",
checked_timestamp BIGINT NOT NULL DEFAULT 0,
_iversion INT UNSIGNED NOT NULL,
PRIMARY KEY (file_id, _iversion, algorithm),
FOREIGN KEY (file_id, _iversion) REFERENCES archive_files (file_id, _iversion) ON DELETE CASCADE
);
-- Move existing hash data to new tables...
INSERT INTO file_hashes (file_id, digest, checked_timestamp) SELECT file_id, hash, checked_timestamp FROM files WHERE hash IS NOT NULL;
INSERT INTO archive_file_hashes (file_id, _iversion, digest, checked_timestamp) SELECT file_id, _iversion, hash, checked_timestamp FROM archive_files WHERE hash IS NOT NULL;
-- and remove hash colums from the files and archive_files tables.
ALTER TABLE files DROP COLUMN hash;
ALTER TABLE archive_files DROP COLUMN hash;
*/
-- Add new fields to files table...
ALTER TABLE files ADD COLUMN IF NOT EXISTS (
parent_directory INT UNSIGNED DEFAULT NULL,
mimetype VARBINARY(255) DEFAULT NULL,
file_storage_id VARBINARY(255) NOT NULL DEFAULT "DEFAULT",
file_key VARBINARY(255) DEFAULT NULL,
hash_algorithm VARBINARY(255) DEFAULT NULL
file_key VARBINARY(255) DEFAULT NULL
);
-- ... and to the corresponding archive_files table.
ALTER TABLE archive_files ADD COLUMN IF NOT EXISTS (
parent_directory INT UNSIGNED DEFAULT NULL,
mimetype VARBINARY(255) DEFAULT NULL,
file_storage_id VARBINARY(255) NOT NULL DEFAULT "DEFAULT",
file_key VARBINARY(40000) DEFAULT NULL,
checked_timestamp BIGINT NOT NULL DEFAULT 0,
hash_algorithm VARBINARY(255) DEFAULT NULL
checked_timestamp BIGINT NOT NULL DEFAULT 0
);
-- no two files at the same path are allowed.
ALTER TABLE files ADD UNIQUE (`path`);
-- directories cannot be deleted without deleting all the content first.
ALTER TABLE files ADD FOREIGN KEY (parent_directory) REFERENCES entities (id);
ALTER TABLE archive_files ADD FOREIGN KEY (parent_directory) REFERENCES entities (id) ON DELETE CASCADE;
ALTER TABLE entities MODIFY COLUMN `role` enum('RECORDTYPE','RECORD','FILE','DOMAIN','PROPERTY','DATATYPE','ROLE','QUERYTEMPLATE', 'DIRECTORY') COLLATE utf8_unicode_ci NOT NULL;
INSERT INTO entities (id, description, role, acl) VALUES (9, "The directory role.", "ROLE", 0);
INSERT INTO name_data (domain_id, entity_id, property_id, value, status, pidx) VALUES (0, 9, 20, "DIRECTORY", "FIX", 0);
-- Add a new special role "DIRECTORY"
INSERT IGNORE INTO entities (id, description, role, acl) VALUES (9, "The directory role.", "ROLE", 0);
INSERT IGNORE INTO name_data (domain_id, entity_id, property_id, value, status, pidx) VALUES (0, 9, 20, "DIRECTORY", "FIX", 0);
-- Add a special nameless directory which is used as the root directory of the
-- internal file system.
INSERT IGNORE INTO entities (id, description, role, acl) VALUES (51, "The root directory of the internal files system", "DIRECTORY", 0);
INSERT IGNORE INTO files
(file_id, path, size, hash, checked_timestamp, mimetype, file_storage_id,
file_key, parent_directory)
VALUES (51, "", 0, NULL, 0, "inode/directory", "DEFAULT", "", NULL);
-- In the default file storage back-end the key is just the path.
UPDATE files SET file_key=path;
UPDATE archive_files SET file_key=path;
-- and now, set NOT NULL for the file_key column
ALTER TABLE files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL;
ALTER TABLE archive_files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL;
-- Size may be NULL for directories
ALTER TABLE files MODIFY COLUMN size BIGINT NULL DEFAULT NULL;
ALTER TABLE archive_files MODIFY COLUMN size BIGINT NULL DEFAULT NULL;
-- More characters for long names
ALTER TABLE files MODIFY COLUMN path VARCHAR(5000) NOT NULL;
ALTER TABLE archive_files MODIFY COLUMN path VARCHAR(5000) NOT NULL;
ALTER TABLE files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL;
ALTER TABLE archive_files MODIFY COLUMN file_key VARBINARY(40000) NOT NULL;
-- Test data for the _create_dirs procedure...
-- INSERT INTO entities (id, role, acl) VALUES
-- (52, "FILE", 0),
-- (53, "FILE", 0),
-- (54, "FILE", 0),
-- (55, "FILE", 0),
-- (56, "FILE", 0),
-- (57, "FILE", 0),
-- (58, "FILE", 0),
-- (59, "FILE", 0),
-- (60, "FILE", 0);
-- INSERT INTO files
-- (file_id, path, size, hash, checked_timestamp, mimetype, file_storage_id,
-- file_key, parent_directory)
-- VALUES
-- (52, "file1", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (53, "file2", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (54, "A/B/file3", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (55, "A/B/file4", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (56, "A/B/C/file5", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (57, "A/B/C/file6", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (58, "B/C/D/file7", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (59, "B/C/D/file8", 0, NULL, 0, "bla", "DEFAULT", "", NULL),
-- (60, "B/C/file9", 0, NULL, 0, "bla", "DEFAULT", "", NULL);
DROP PROCEDURE IF EXISTS _create_dirs;
DELIMITER //
/**
* Create all parent directories (from the root of the file system to the
* direct parents) for all existing files.
*
* This procedure is only intended for patching the content of the database and
* should not be used after the patch has been applied.
*/
CREATE PROCEDURE _create_dirs()
BEGIN
DECLARE done BOOLEAN DEFAULT FALSE;
......@@ -50,41 +135,72 @@ BEGIN
DECLARE dir_path VARCHAR(255) DEFAULT "";
DECLARE dir_path_no_trailing_slash VARCHAR(255) DEFAULT "";
DECLARE dir_exists BOOLEAN DEFAULT FALSE;
DECLARE cur CURSOR FOR SELECT file_id, path FROM files;
DECLARE parent_dir_id INT UNSIGNED DEFAULT 51;
DECLARE cur CURSOR FOR SELECT file_id, path FROM files WHERE mimetype != "inode/directory";
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
OPEN cur;
UPDATE files SET parent_directory = 51 WHERE mimetype != "inode/directory";
-- loop over all FILE entities in the table (not DIRECTORY)
loop1: LOOP
FETCH cur INTO _file_id, _path;
IF done THEN
LEAVE loop1;
END IF;
-- TODO
-- for each file, these are the initial values. The creation of the
-- parent directories beginns at the root path...
SET dir_path = "";
-- ... and thus the parent of the first new directory is also the root.
SET parent_dir_id = 51;
-- create all parent directories (from the root to the direct parent
-- directory) of the current file.
loop2: LOOP
-- calculate the next parent directory, beginning at the root of
-- the file system for each of the files. E.g. when the _path is
-- A/B/C the loop come here 3 times and dir_path is "A/", "A/B/",
-- and "", because A/B/C/ is not a substring of "A/B/C".
SELECT REGEXP_SUBSTR(_path, CONCAT("(?U)^", dir_path, ".*/" )) INTO dir_path;
IF dir_path = "" THEN
-- stopping condition of the inner loop!!!
-- this is the case when all parent directories of the current file have been processed.
SET dir_exists = FALSE;
SET done = FALSE;
LEAVE loop2;
END IF;
-- remove the trailing slash for convenience.
SET dir_path_no_trailing_slash = LEFT(dir_path, CHAR_LENGTH(dir_path)-1);
SELECT TRUE INTO dir_exists FROM files WHERE path = dir_path_no_trailing_slash;
-- check if the directory has already been inserted (because there
-- could be several files under "A/B/" and "A" and "A/B" have
-- only to be created once).
SELECT file_id, TRUE INTO parent_dir_id, dir_exists FROM files WHERE path = dir_path_no_trailing_slash;
IF dir_exists IS TRUE THEN
-- continue from the top of the inner loop and ignore the rest
-- this is the case when the directory exists yet and doesn't need to be inserted again.
SET dir_exists = FALSE;
ITERATE loop2;
END IF;
-- Here, the parent directories are created. First insert a new Entity...
INSERT INTO entities (description, role, acl) VALUES (NULL, "DIRECTORY", 0);
SET dir_id = LAST_INSERT_ID();
-- ... then insert the FSODescriptor
INSERT INTO files (file_id, path, size, hash, checked_timestamp,
mimetype, file_storage_id, file_key)
mimetype, file_storage_id, file_key, parent_directory)
VALUES (dir_id, dir_path_no_trailing_slash, 0, NULL, 0,
"inode/directory", "DEFAULT", dir_path_no_trailing_slash
);
UPDATE files SET parent_directory = dir_id WHERE path LIKE CONCAT(dir_path, "%");
UPDATE files SET parent_directory = 0 WHERE path LIKE CONCAT(dir_path, "%");
"inode/directory", "DEFAULT", dir_path_no_trailing_slash, parent_dir_id);
-- set the parent_dir_id for the next iteration
SET parent_dir_id = dir_id;
-- set parent dir all children of current directory
UPDATE files SET parent_directory = parent_dir_id WHERE path LIKE CONCAT(dir_path, '%');
END LOOP;
END LOOP;
CLOSE cur;
......@@ -94,3 +210,6 @@ END //
DELIMITER ;
CALL _create_dirs();
-- not to be uses after this patch has been applied.
DROP PROCEDURE _create_dirs;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment