From 0801b11082b6d9e3e1491488a08113175f7b863f Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Wed, 2 Oct 2024 13:38:46 +0200
Subject: [PATCH] ENH: Update script for outdated dumps.

Also updated the MariaDB server image for test pipeline.
---
 .docker/Dockerfile                            |  7 +-
 .gitlab-ci.yml                                |  3 +-
 .../2024-10-02.dump_fix_mariadb_10_6.sh       | 44 ++++++++++++
 dump_updates/README.md                        | 21 ++++++
 dump_updates/test/test_all.py                 | 70 +++++++++++++++++++
 ...ump_fix_mariadb_10_6.example1.expected.sql | 36 ++++++++++
 ...4-10-02.dump_fix_mariadb_10_6.example1.sql | 36 ++++++++++
 7 files changed, 213 insertions(+), 4 deletions(-)
 create mode 100755 dump_updates/2024-10-02.dump_fix_mariadb_10_6.sh
 create mode 100644 dump_updates/README.md
 create mode 100644 dump_updates/test/test_all.py
 create mode 100644 dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.expected.sql
 create mode 100644 dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.sql

diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 558c145..f9a4145 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -8,8 +8,11 @@ RUN apt-get update \
 		unzip \
         python3-lxml \
         python3-pip \
-        python3-pytest \
 		python3-sqlparse \
 		python3-sphinx \
 		doxygen
-RUN pip3 install breathe sphinx-rtd-theme recommonmark
+RUN pip3 install --break-system-packages \
+  breathe \
+  sphinx-rtd-theme \
+  pytest \
+  recommonmark
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7db3e23..eefdc3f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -59,8 +59,7 @@ unittests-mariadb:
   tags: [ docker ]
   stage: test
   services:
-    - mariadb:10.4
-
+    - mariadb:10.11
   script:
     - make pipeline-test SQL_HOST=mariadb
 
diff --git a/dump_updates/2024-10-02.dump_fix_mariadb_10_6.sh b/dump_updates/2024-10-02.dump_fix_mariadb_10_6.sh
new file mode 100755
index 0000000..4450b83
--- /dev/null
+++ b/dump_updates/2024-10-02.dump_fix_mariadb_10_6.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+# About this script
+# =================
+#
+# Fix the use of "offset" as a procedure parameter name.  "OFFSET" became a reserved keyword in
+# MariaDB 10.6.
+#
+# This script takes an SQL dump from stdin and prints the fixed SQL to stdout.
+#
+# Usage
+# -----
+#
+# 2024-10-02.dump_fix_mariadb_10_6.sh < yourdump.sql > yourdump.fixed.sql
+
+set -euo pipefail
+IFS=$'\n\t'
+
+script='
+s/Offset INT UNSIGNED) RETURNS varbinary(255)/HeadOffset INT UNSIGNED) RETURNS varbinary(255)/
+s/LIMIT 1 OFFSET Offset/LIMIT 1 OFFSET HeadOffset/
+'
+
+sed -e "$script"
+
+unset script
diff --git a/dump_updates/README.md b/dump_updates/README.md
new file mode 100644
index 0000000..704ea80
--- /dev/null
+++ b/dump_updates/README.md
@@ -0,0 +1,21 @@
+# SQL dump updates #
+
+## Dump ##
+
+This directory contains scripts to update database dumps to newer versions of the MariaDB server.
+
+## Background ##
+
+In some cases, restoring the database content from existing SQL dumps may not be possible in a
+straightforward manner.  For those cases, this directory contains scripts to help with known issues.
+
+Examples for problems include:
+
+- New reserved keywords in MariaDB, that were previously used as identifiers in code.
+
+# Known issues and their fix #
+
+- SQL syntax error near `Offset INT UNSIGNED`: If a dump was made before MariaDB 10.6 (LinkAhead <
+  0.15) and with the SQL backend before 7.0.3, there was a pramater named `Offset`.  With MariaDB
+  10.6 however, `OFFSET` became a reserved keyword.  This can be fixed by running  
+  `2024-10-02.dump_fix_mariadb_10_6.sh < yourdump.sql > yourdump.fixed.sql`
diff --git a/dump_updates/test/test_all.py b/dump_updates/test/test_all.py
new file mode 100644
index 0000000..8e97d2a
--- /dev/null
+++ b/dump_updates/test/test_all.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Testing the dump update scripts
+"""
+
+import filecmp
+from pathlib import Path
+from subprocess import run
+from tempfile import NamedTemporaryFile
+
+
+def get_basedir() -> str:
+    """Return the assumped base dir for the dump updates.
+    """
+    path = Path(__file__).parents[1]
+    return str(path)
+
+
+def get_test_data(basename: str) -> list[tuple[str, str]]:
+    """Return a list of [input, expectedoutput] tuples.
+
+    The output may be an empty string if no corresponding file can be found.
+    """
+    basedir = get_basedir()
+    datadir = Path(basedir) / "test" / "test_data"
+    results = []
+    for input_path in datadir.glob(f"{basename}.example*[0-9].sql"):
+        expected_path = datadir / f"{input_path.name[:-4]}.expected.sql"
+        if expected_path.exists():
+            expected = str(expected_path)
+        else:
+            expected = ""
+        results.append((str(input_path), expected))
+    return results
+
+
+def test_2024_10_02(tmpdir):
+    """``Offset`` became a reserved keyword in MariaDB 10.6.
+    """
+    script = "2024-10-02.dump_fix_mariadb_10_6.sh"
+    script_fullname = str(Path(get_basedir()) / script)
+    test_data = get_test_data(script[:-3])
+    for infile, expectedfile in test_data:
+        with (NamedTemporaryFile(dir=tmpdir, suffix=".sql", delete=True) as output,
+              open(infile, mode="rb") as infile_stream
+              ):
+            run([script_fullname],
+                stdin=infile_stream,
+                stdout=output,
+                check=True
+                )
+            assert filecmp.cmp(output.name, expectedfile), "Output does not match expected output."
diff --git a/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.expected.sql b/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.expected.sql
new file mode 100644
index 0000000..1a58223
--- /dev/null
+++ b/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.expected.sql
@@ -0,0 +1,36 @@
+/* Just a short snippt with the problem */
+
+
+/*!50003 SET @saved_col_connection = @@collation_connection */ ;
+/*!50003 SET character_set_client  = utf8 */ ;
+/*!50003 SET character_set_results = utf8 */ ;
+/*!50003 SET collation_connection  = utf8_general_ci */ ;
+DELIMITER ;;
+CREATE DEFINER=`root`@`%` FUNCTION `get_head_relative`(EntityID VARCHAR(255),
+    HeadOffset INT UNSIGNED) RETURNS varbinary(255)
+    READS SQL DATA
+BEGIN
+    DECLARE InternalEntityID INT UNSIGNED DEFAULT NULL;
+
+    SELECT internal_id INTO InternalEntityID FROM entity_ids WHERE id = EntityID;
+
+    
+    
+    
+    
+    RETURN (
+        SELECT e.version
+            FROM entity_version AS e
+            WHERE e.entity_id = InternalEntityID
+            ORDER BY e._iversion DESC
+            LIMIT 1 OFFSET HeadOffset
+        );
+END ;;
+DELIMITER ;
+/*!50003 SET sql_mode              = @saved_sql_mode */ ;
+/*!50003 SET character_set_client  = @saved_cs_client */ ;
+/*!50003 SET character_set_results = @saved_cs_results */ ;
+/*!50003 SET collation_connection  = @saved_col_connection */ ;
+/*!50003 SET @saved_sql_mode       = @@sql_mode */ ;
+/*!50003 SET sql_mode              = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ;
+/*!50003 D */
diff --git a/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.sql b/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.sql
new file mode 100644
index 0000000..a5476af
--- /dev/null
+++ b/dump_updates/test/test_data/2024-10-02.dump_fix_mariadb_10_6.example1.sql
@@ -0,0 +1,36 @@
+/* Just a short snippt with the problem */
+
+
+/*!50003 SET @saved_col_connection = @@collation_connection */ ;
+/*!50003 SET character_set_client  = utf8 */ ;
+/*!50003 SET character_set_results = utf8 */ ;
+/*!50003 SET collation_connection  = utf8_general_ci */ ;
+DELIMITER ;;
+CREATE DEFINER=`root`@`%` FUNCTION `get_head_relative`(EntityID VARCHAR(255),
+    Offset INT UNSIGNED) RETURNS varbinary(255)
+    READS SQL DATA
+BEGIN
+    DECLARE InternalEntityID INT UNSIGNED DEFAULT NULL;
+
+    SELECT internal_id INTO InternalEntityID FROM entity_ids WHERE id = EntityID;
+
+    
+    
+    
+    
+    RETURN (
+        SELECT e.version
+            FROM entity_version AS e
+            WHERE e.entity_id = InternalEntityID
+            ORDER BY e._iversion DESC
+            LIMIT 1 OFFSET Offset
+        );
+END ;;
+DELIMITER ;
+/*!50003 SET sql_mode              = @saved_sql_mode */ ;
+/*!50003 SET character_set_client  = @saved_cs_client */ ;
+/*!50003 SET character_set_results = @saved_cs_results */ ;
+/*!50003 SET collation_connection  = @saved_col_connection */ ;
+/*!50003 SET @saved_sql_mode       = @@sql_mode */ ;
+/*!50003 SET sql_mode              = 'STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' */ ;
+/*!50003 D */
-- 
GitLab