From 79f18160e7a7d4172acdece2c7b22ad710a3b275 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 9 Jan 2025 11:23:34 +0100
Subject: [PATCH 1/4] TST: add performance test

---
 performance-tests/test.py | 121 ++++++++++++++++++++++++++++++++++++++
 pyproject.toml            |   2 +
 2 files changed, 123 insertions(+)
 create mode 100644 performance-tests/test.py

diff --git a/performance-tests/test.py b/performance-tests/test.py
new file mode 100644
index 0000000..6409fb9
--- /dev/null
+++ b/performance-tests/test.py
@@ -0,0 +1,121 @@
+"""
+Tests CPU and Memory usage of RuQaD
+"""
+import cProfile, pstats, io
+from pstats import SortKey
+from time import sleep,time
+from tempfile import TemporaryDirectory
+from datetime import datetime, timezone
+from pathlib import Path
+
+from memory_profiler import profile as mprofile
+
+from ruqad.qualitycheck import QualityChecker
+from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager
+from ruqad.crawler import trigger_crawler
+import os
+import shutil
+from memory_profiler import memory_usage
+
+SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None
+KADIARGS = {
+    "host": os.environ['KADIHOST'],
+    "pat": os.environ['KADITOKEN'],
+}
+
+"""
+call this file to check memory and cpu usage of the RuQaD demonstrator
+"""
+
+def _run(n=-1):
+    cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00")
+    with KadiManager(**KADIARGS) as manager:
+        print(f"Checking for records created after {cut_off_date}...")
+        rec_ids = collect_records_created_after(manager, cut_off_date)
+
+        if len(rec_ids) == 0:
+            print("no new recs")
+        if n!=-1:
+            rec_ids = rec_ids[:n]
+        for rid in rec_ids:
+            with TemporaryDirectory(delete=False) as cdir:
+                eln_file = os.path.join(cdir, "export.eln")
+                download_eln_for(manager, rid, path=eln_file)
+                print(f"Downlaoded {eln_file}")
+                if SKIP_QUALITY_CHECK:
+                    print("Found env 'SKIP_QUALITY_CHECK', skipping quality check")
+                else:
+                    qc = QualityChecker()
+                    qc.check(filename=eln_file, target_dir=cdir)
+                    print(f"Quality check done. {os.listdir(cdir)}")
+                # trigger crawler on dir
+                remote_dir_path = os.path.join(cdir, "ruqad", str(rid))
+                os.makedirs(remote_dir_path)
+                if os.path.exists(os.path.join(cdir, "artifacts.zip")):
+                    shutil.move(os.path.join(cdir, "artifacts.zip"),
+                                os.path.join(remote_dir_path, "report.zip"))
+                #else:
+                #    Path(os.path.join(remote_dir_path, "report.zip")).touch()
+                shutil.move(os.path.join(cdir, "export.eln"),
+                            os.path.join(remote_dir_path, "export.eln"))
+                trigger_crawler(target_dir=cdir)
+
+def test_memory():
+    # test that maximum memory usage is below 1GB
+    assert 1000>max(memory_usage((_run, [10], {})))
+
+def test_cpu():
+    pr = cProfile.Profile()
+    pr.enable()
+    _run(n=1)
+    pr.disable()
+    s = io.StringIO()
+    sortby = SortKey.CUMULATIVE
+    ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
+    ps.print_stats(10)
+    print(s.getvalue())
+    ps.print_stats("ruqad", 10)
+    ps.print_stats("crawler", 10)
+    print(s.getvalue())
+
+def test_runtime_eln_download():
+    cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00")
+    with KadiManager(**KADIARGS) as manager:
+        rec_ids = collect_records_created_after(manager, cut_off_date)
+
+        if len(rec_ids) == 0:
+            print("no new recs")
+        with TemporaryDirectory(delete=False) as cdir:
+            eln_file = os.path.join(cdir, "export.eln")
+            start = time()
+            download_eln_for(manager, rec_ids[0], path=eln_file)
+            stop = time()
+
+    print(f"time for downloading eln: {stop-start:.2f} s")
+
+def test_runtime_crawler():
+    cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00")
+    with KadiManager(**KADIARGS) as manager:
+        rec_ids = collect_records_created_after(manager, cut_off_date)
+
+        if len(rec_ids) == 0:
+            print("no new recs")
+        with TemporaryDirectory(delete=False) as cdir:
+            eln_file = os.path.join(cdir, "export.eln")
+            download_eln_for(manager, rec_ids[0], path=eln_file)
+            # trigger crawler on dir
+            remote_dir_path = os.path.join(cdir, "ruqad", str(rec_ids[0]))
+            os.makedirs(remote_dir_path)
+            shutil.move(os.path.join(cdir, "export.eln"),
+                        os.path.join(remote_dir_path, "export.eln"))
+            start = time()
+            trigger_crawler(target_dir=cdir)
+            stop = time()
+
+    print(f"time for crawling eln: {stop-start:.2f} s")
+
+if __name__ == "__main__":
+    test_memory()
+    test_runtime_eln_download()
+    test_cpu()
+    test_runtime_crawler()
diff --git a/pyproject.toml b/pyproject.toml
index fd35267..c81a9cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,8 @@ test = [
     "tox",
     "pytest",
     "pytest-cov",
+    "guppy3",
+    "memory_profiler",
 ]
 all = [
     "ruqad[dev]",
-- 
GitLab


From 42a4f27e5ed28a3166c0b8418daabeaa012efc4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 9 Jan 2025 11:34:54 +0100
Subject: [PATCH 2/4] FIX: add ini for docker

---
 docker/Dockerfile      |  2 +-
 docker/pylinkahead.ini | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 docker/pylinkahead.ini

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 638ffac..37b0aec 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -8,7 +8,7 @@ COPY ./pyproject.toml /ruqad/
 
 # static configuration
 COPY ./qualitycheck_config.toml /ruqad/
-COPY ./pylinkahead.ini /ruqad/
+COPY ./docker/pylinkahead.ini /ruqad/
 
 # Installing the package
 WORKDIR /ruqad/
diff --git a/docker/pylinkahead.ini b/docker/pylinkahead.ini
new file mode 100644
index 0000000..a7a4fc0
--- /dev/null
+++ b/docker/pylinkahead.ini
@@ -0,0 +1,36 @@
+# The INI file must be located either in
+# - $CWD/pylinkahead.ini
+# - $HOME/.pylinkahead.ini
+# - the location given in the env variable PYLINKAHEADINI
+
+[Connection]
+# url=https://demo.indiscale.com/
+url = http://localhost:80/provider/linkahead/
+
+## If this option is set, the SSL certificate will be ignored.  Use with care!
+#ssl_insecure=1
+
+username=admin
+
+## The password input method can be chosen with the `password_method` setting,
+## which by default is set to `plain`.
+##
+## DEFAULT: the password method is `plain`, now the password must be saved as
+## plain text.
+password_method=input
+# password=caosdb
+
+
+## OR: `input`: username is optional, password is entered by the user directly
+# password_method=input
+
+## OR: `pass`: password is retrieved from the "pass" password manager
+# password_method=pass
+# password_identifier=...
+
+## OR: `keyring`: using the system keyring/wallet (macOS, GNOME, KDE, Windows)
+## requires installation of the keyring python package:
+## pip install keyring
+# password_method=keyring
+
+timeout=10000
-- 
GitLab


From 655d9468d00c0ce37a1bac0ffd91e45bdcd488d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 9 Jan 2025 11:39:55 +0100
Subject: [PATCH 3/4] MAINT: remove unused file

---
 docker/pylinkahead.ini | 36 ------------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 docker/pylinkahead.ini

diff --git a/docker/pylinkahead.ini b/docker/pylinkahead.ini
deleted file mode 100644
index a7a4fc0..0000000
--- a/docker/pylinkahead.ini
+++ /dev/null
@@ -1,36 +0,0 @@
-# The INI file must be located either in
-# - $CWD/pylinkahead.ini
-# - $HOME/.pylinkahead.ini
-# - the location given in the env variable PYLINKAHEADINI
-
-[Connection]
-# url=https://demo.indiscale.com/
-url = http://localhost:80/provider/linkahead/
-
-## If this option is set, the SSL certificate will be ignored.  Use with care!
-#ssl_insecure=1
-
-username=admin
-
-## The password input method can be chosen with the `password_method` setting,
-## which by default is set to `plain`.
-##
-## DEFAULT: the password method is `plain`, now the password must be saved as
-## plain text.
-password_method=input
-# password=caosdb
-
-
-## OR: `input`: username is optional, password is entered by the user directly
-# password_method=input
-
-## OR: `pass`: password is retrieved from the "pass" password manager
-# password_method=pass
-# password_identifier=...
-
-## OR: `keyring`: using the system keyring/wallet (macOS, GNOME, KDE, Windows)
-## requires installation of the keyring python package:
-## pip install keyring
-# password_method=keyring
-
-timeout=10000
-- 
GitLab


From e92628537ce3d970394536a7a0ad2b7317a66c12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Fri, 10 Jan 2025 13:19:09 +0100
Subject: [PATCH 4/4] TST: write output to file

---
 performance-tests/test.py | 41 ++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/performance-tests/test.py b/performance-tests/test.py
index 6409fb9..82d5b70 100644
--- a/performance-tests/test.py
+++ b/performance-tests/test.py
@@ -1,21 +1,22 @@
 """
 Tests CPU and Memory usage of RuQaD
 """
-import cProfile, pstats, io
-from pstats import SortKey
-from time import sleep,time
-from tempfile import TemporaryDirectory
+import cProfile
+import io
+import os
+import pstats
+import shutil
 from datetime import datetime, timezone
 from pathlib import Path
+from pstats import SortKey
+from tempfile import TemporaryDirectory
+from time import sleep, time
 
+from memory_profiler import memory_usage
 from memory_profiler import profile as mprofile
-
-from ruqad.qualitycheck import QualityChecker
-from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager
 from ruqad.crawler import trigger_crawler
-import os
-import shutil
-from memory_profiler import memory_usage
+from ruqad.kadi import KadiManager, collect_records_created_after, download_eln_for
+from ruqad.qualitycheck import QualityChecker
 
 SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None
 KADIARGS = {
@@ -69,14 +70,12 @@ def test_cpu():
     pr.enable()
     _run(n=1)
     pr.disable()
-    s = io.StringIO()
-    sortby = SortKey.CUMULATIVE
-    ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
-    ps.print_stats(10)
-    print(s.getvalue())
-    ps.print_stats("ruqad", 10)
-    ps.print_stats("crawler", 10)
-    print(s.getvalue())
+    with open("performance.txt", 'a') as s:
+        sortby = SortKey.CUMULATIVE
+        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
+        ps.print_stats(10)
+        ps.print_stats("ruqad", 10)
+        ps.print_stats("crawler", 10)
 
 def test_runtime_eln_download():
     cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00")
@@ -91,7 +90,8 @@ def test_runtime_eln_download():
             download_eln_for(manager, rec_ids[0], path=eln_file)
             stop = time()
 
-    print(f"time for downloading eln: {stop-start:.2f} s")
+    with open("performance.txt", 'a') as s:
+        s.write(f"time for downloading eln: {stop-start:.2f} s")
 
 def test_runtime_crawler():
     cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00")
@@ -112,7 +112,8 @@ def test_runtime_crawler():
             trigger_crawler(target_dir=cdir)
             stop = time()
 
-    print(f"time for crawling eln: {stop-start:.2f} s")
+    with open("performance.txt", 'a') as s:
+        s.write(f"time for crawling eln: {stop-start:.2f} s")
 
 if __name__ == "__main__":
     test_memory()
-- 
GitLab