From 1db5c9a0b7d594dbc178aed7764907cb803e3226 Mon Sep 17 00:00:00 2001
From: Timm Fitschen <t.fitschen@indiscale.com>
Date: Mon, 28 Nov 2022 21:52:04 +0100
Subject: [PATCH] WIP: file storage refactoring

---
 patches/patch20220110-6.0-SNAPSHOT/patch.sql  |  2 +
 procedures/insertFSODescriptor.sql            | 82 +++++++++++++++++++
 .../listFSODescriptorByParentDirectory.sql    | 61 ++++++++++++++
 ...FileIdByPath.sql => retrieveEntityACL.sql} | 38 ++++++---
 procedures/retrieveFSODescriptorByPath.sql    | 67 +++++++++++++++
 5 files changed, 239 insertions(+), 11 deletions(-)
 create mode 100644 procedures/insertFSODescriptor.sql
 create mode 100644 procedures/listFSODescriptorByParentDirectory.sql
 rename procedures/{getFileIdByPath.sql => retrieveEntityACL.sql} (56%)
 create mode 100644 procedures/retrieveFSODescriptorByPath.sql

diff --git a/patches/patch20220110-6.0-SNAPSHOT/patch.sql b/patches/patch20220110-6.0-SNAPSHOT/patch.sql
index 60db57d..7ad3403 100644
--- a/patches/patch20220110-6.0-SNAPSHOT/patch.sql
+++ b/patches/patch20220110-6.0-SNAPSHOT/patch.sql
@@ -73,6 +73,8 @@ ALTER TABLE files ADD UNIQUE (`path`);
 -- In the default file storage back-end the key is just the path.
 UPDATE files SET file_key=path;
 UPDATE archive_files SET file_key=path;
+UPDATE files SET hash_algorithm="SHA-512" WHERE HASH IS not null;
+UPDATE archive_files SET hash_algorithm="SHA-512" WHERE HASH IS not null;
 
 -- and now, set NOT NULL for the file_key column
 ALTER TABLE files MODIFY COLUMN file_key VARBINARY(16000) NOT NULL;
diff --git a/procedures/insertFSODescriptor.sql b/procedures/insertFSODescriptor.sql
new file mode 100644
index 0000000..cdbd6bd
--- /dev/null
+++ b/procedures/insertFSODescriptor.sql
@@ -0,0 +1,82 @@
+/*
+ * This file is a part of the CaosDB Project.
+ *
+ * Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+ * Copyright (C) 2022 Timm Fitschen <t.fitschen@indiscale
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+
+DELIMITER //
+
+DROP PROCEDURE IF EXISTS db_5_0.insertFSODescriptor //
+
+/**
+ * Insert the VirtualFSODescriptor, i.e. only the id and special properties
+ * of such an entity.
+ *
+ * Parameters
+ * ----------
+ * EntityID : INT UNSIGNED,
+ *   The entity id.
+ * FileHash VARCHAR(255),
+ *   The value of the hash.
+ * FileHashAlgo VARCHAR(255),
+ *   The algorithm used to calculate the hash value.
+ * FileCheckedTimestamp BIGINT,
+ *   When the FSO has been checked for consistency.
+ * FileSize BIGINT UNSIGNED,
+ *   Byte size of the file.
+ * FilePath VARCHAR(16000),
+ *   The path of the FSO in the virtual file system.
+ * FileMimeType VARCHAR(255),
+ *   The files mime type.
+ * FileStorageId VARCHAR(255),
+ *   The id of the file storage where the blob of the file is stored.
+ * FileKey VARCHAR(16000))
+ *   The key of the file in the file storage.
+ *
+ */
+CREATE PROCEDURE db_5_0.insertFSODescriptor(
+    IN EntityID INT UNSIGNED,
+    IN FileHash VARCHAR(255),
+    IN FileHashAlgo VARCHAR(255),
+    IN FileCheckedTimestamp BIGINT,
+    IN FileSize BIGINT UNSIGNED,
+    IN FilePath VARCHAR(16000),
+    IN FileMimeType VARCHAR(255),
+    IN FileStorageId VARCHAR(255),
+    IN FileKey VARCHAR(16000))
+insertFSODescriptorBody: BEGIN
+
+    INSERT INTO files (
+            file_id,
+            hash,
+            hash_algorithm,
+            checked_timestamp,
+            size,
+            path,
+            mimetype,
+            file_storage_id,
+            file_key
+            /*, parent_directory*/
+        ) VALUES (EntityID, unhex(FileHash), FileHashAlgo, FileCheckedTimestamp, FileSize, FilePath, FileMimeType, FileStorageId, FileKey);
+
+END;
+//
+
+
+DELIMITER ;
diff --git a/procedures/listFSODescriptorByParentDirectory.sql b/procedures/listFSODescriptorByParentDirectory.sql
new file mode 100644
index 0000000..17769bf
--- /dev/null
+++ b/procedures/listFSODescriptorByParentDirectory.sql
@@ -0,0 +1,61 @@
+/*
+ * This file is a part of the CaosDB Project.
+ *
+ * Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+ * Copyright (C) 2022 Timm Fitschen <t.fitschen@indiscale
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+DELIMITER //
+
+DROP PROCEDURE IF EXISTS db_5_0.listFSODescriptorByParentDirectory //
+
+/**
+ * List the 
+
+ *
+ * Parameters
+ * ----------
+ * ParentDirectory : INT UNSIGNED
+ *   The entity id of the parent directory.
+ *
+ * ResultSet
+ * ---------
+ * Tuples of (FileHashAlgo, FileId, FilePath, FileSize, FileHash,
+ *            FileHashChecked, FileMimeType, FileStorageID, FileKey)
+ */
+CREATE PROCEDURE db_5_0.listFSODescriptorByParentDirectory(
+    IN ParentDirectory INT UNSIGNED)
+listFSODescriptorByParentDirectoryBody: BEGIN
+
+    SELECT hash_algorithm AS FileHashAlgo,
+            file_id AS FileId,
+            parent_directory AS FileParentID,
+            path AS FilePath,
+            size AS FileSize,
+            hex(hash) AS FileHash,
+            checked_timestamp AS FileHashChecked,
+            mimetype AS FileMimetype,
+            file_storage_id AS FileStorageID,
+            file_key AS FileKey
+        FROM files
+        WHERE parent_directory = ParentDirectory;
+
+END;
+//
+
+
+DELIMITER ;
diff --git a/procedures/getFileIdByPath.sql b/procedures/retrieveEntityACL.sql
similarity index 56%
rename from procedures/getFileIdByPath.sql
rename to procedures/retrieveEntityACL.sql
index e565ece..5e0958b 100644
--- a/procedures/getFileIdByPath.sql
+++ b/procedures/retrieveEntityACL.sql
@@ -1,9 +1,8 @@
 /*
- * ** header v3.0
  * This file is a part of the CaosDB Project.
  *
- * Copyright (C) 2018 Research Group Biomedical Physics,
- * Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+ * Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+ * Copyright (C) 2022 Timm Fitschen <t.fitschen@indiscale
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Affero General Public License as
@@ -17,17 +16,34 @@
  *
  * You should have received a copy of the GNU Affero General Public License
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
- *
- * ** end header
  */
 
-Drop Procedure if exists db_5_0.getFileIdByPath;
-Delimiter //
-Create Procedure db_5_0.getFileIdByPath (in FilePath VARCHAR(255))
-BEGIN 
 
-Select file_id as FileID from files where path=FilePath LIMIT 1;
+
+DELIMITER //
+
+DROP PROCEDURE IF EXISTS db_5_0.retrieveEntityACL //
+
+/**
+ * Return the ACL of an entity.
+ *
+ * Parameters
+ * ----------
+ * EntityID : INT UNSIGNED
+ *   The entity's id.
+ *
+ * ResultSet
+ * ---------
+ * Tuple of (ACL)
+ */
+CREATE PROCEDURE db_5_0.retrieveEntityACL(
+    IN EntityID INT UNSIGNED)
+retrieveEntityACLBody: BEGIN
+
+    SELECT a.acl AS ACL FROM entity_acl AS a JOIN entities AS e ON (e.acl = a.id) WHERE e.id = EntityID;
 
 END;
 //
-delimiter ;
+
+
+DELIMITER ;
diff --git a/procedures/retrieveFSODescriptorByPath.sql b/procedures/retrieveFSODescriptorByPath.sql
new file mode 100644
index 0000000..f63572b
--- /dev/null
+++ b/procedures/retrieveFSODescriptorByPath.sql
@@ -0,0 +1,67 @@
+/*
+ * This file is a part of the CaosDB Project.
+ *
+ * Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+ * Copyright (C) 2022 Timm Fitschen <t.fitschen@indiscale
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+
+
+DELIMITER //
+
+DROP PROCEDURE IF EXISTS db_5_0.retrieveFSODescriptorByPath;
+
+/**
+ * Retrieve the VirtualFSODescriptor, i.e. only the id and special properties
+ * of such an entity.
+ *
+ * Parameters
+ * ----------
+ * Path : VARCHAR(16000)
+ *   The path of the FSO in the virtual file system.
+ * 
+ * Version
+ *   The version id.  In this procedure only, the version may also be given as
+ *   `HEAD` for the latest version or as `HEAD~n`, which retrieves the n-th
+ *   ancestor of `HEAD`.
+ *
+ * ResultSet
+ * ---------
+ * Tuples of (FileHashAlgo, FileId, FilePath, FileSize, FileHash,
+ *            FileHashChecked, FileMimeType, FileStorageID, FileKey)
+ */
+CREATE PROCEDURE db_5_0.retrieveFSODescriptorByPath(
+    IN byPath VARBINARY(16000))
+retrieveFSODescriptorByPathBody: BEGIN
+
+    SELECT hash_algorithm AS FileHashAlgo,
+            byPath as Param,
+            file_id AS FileId,
+            path AS FilePath,
+            size AS FileSize,
+            hex(hash) AS FileHash,
+            checked_timestamp AS FileHashChecked,
+            mimetype AS FileMimetype,
+            file_storage_id AS FileStorageID,
+            file_key AS FileKey
+        FROM files
+        WHERE path LIKE byPath;
+
+END;
+//
+
+
+DELIMITER ;
-- 
GitLab