From 79f18160e7a7d4172acdece2c7b22ad710a3b275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 9 Jan 2025 11:23:34 +0100 Subject: [PATCH 1/4] TST: add performance test --- performance-tests/test.py | 121 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 + 2 files changed, 123 insertions(+) create mode 100644 performance-tests/test.py diff --git a/performance-tests/test.py b/performance-tests/test.py new file mode 100644 index 0000000..6409fb9 --- /dev/null +++ b/performance-tests/test.py @@ -0,0 +1,121 @@ +""" +Tests CPU and Memory usage of RuQaD +""" +import cProfile, pstats, io +from pstats import SortKey +from time import sleep,time +from tempfile import TemporaryDirectory +from datetime import datetime, timezone +from pathlib import Path + +from memory_profiler import profile as mprofile + +from ruqad.qualitycheck import QualityChecker +from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager +from ruqad.crawler import trigger_crawler +import os +import shutil +from memory_profiler import memory_usage + +SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None +KADIARGS = { + "host": os.environ['KADIHOST'], + "pat": os.environ['KADITOKEN'], +} + +""" +call this file to check memory and cpu usage of the RuQaD demonstrator +""" + +def _run(n=-1): + cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00") + with KadiManager(**KADIARGS) as manager: + print(f"Checking for records created after {cut_off_date}...") + rec_ids = collect_records_created_after(manager, cut_off_date) + + if len(rec_ids) == 0: + print("no new recs") + if n!=-1: + rec_ids = rec_ids[:n] + for rid in rec_ids: + with TemporaryDirectory(delete=False) as cdir: + eln_file = os.path.join(cdir, "export.eln") + download_eln_for(manager, rid, path=eln_file) + print(f"Downlaoded {eln_file}") + if SKIP_QUALITY_CHECK: + print("Found env 'SKIP_QUALITY_CHECK', skipping quality check") + else: + qc = QualityChecker() + qc.check(filename=eln_file, target_dir=cdir) + print(f"Quality check done. {os.listdir(cdir)}") + # trigger crawler on dir + remote_dir_path = os.path.join(cdir, "ruqad", str(rid)) + os.makedirs(remote_dir_path) + if os.path.exists(os.path.join(cdir, "artifacts.zip")): + shutil.move(os.path.join(cdir, "artifacts.zip"), + os.path.join(remote_dir_path, "report.zip")) + #else: + # Path(os.path.join(remote_dir_path, "report.zip")).touch() + shutil.move(os.path.join(cdir, "export.eln"), + os.path.join(remote_dir_path, "export.eln")) + trigger_crawler(target_dir=cdir) + +def test_memory(): + # test that maximum memory usage is below 1GB + assert 1000>max(memory_usage((_run, [10], {}))) + +def test_cpu(): + pr = cProfile.Profile() + pr.enable() + _run(n=1) + pr.disable() + s = io.StringIO() + sortby = SortKey.CUMULATIVE + ps = pstats.Stats(pr, stream=s).sort_stats(sortby) + ps.print_stats(10) + print(s.getvalue()) + ps.print_stats("ruqad", 10) + ps.print_stats("crawler", 10) + print(s.getvalue()) + +def test_runtime_eln_download(): + cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00") + with KadiManager(**KADIARGS) as manager: + rec_ids = collect_records_created_after(manager, cut_off_date) + + if len(rec_ids) == 0: + print("no new recs") + with TemporaryDirectory(delete=False) as cdir: + eln_file = os.path.join(cdir, "export.eln") + start = time() + download_eln_for(manager, rec_ids[0], path=eln_file) + stop = time() + + print(f"time for downloading eln: {stop-start:.2f} s") + +def test_runtime_crawler(): + cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00") + with KadiManager(**KADIARGS) as manager: + rec_ids = collect_records_created_after(manager, cut_off_date) + + if len(rec_ids) == 0: + print("no new recs") + with TemporaryDirectory(delete=False) as cdir: + eln_file = os.path.join(cdir, "export.eln") + download_eln_for(manager, rec_ids[0], path=eln_file) + # trigger crawler on dir + remote_dir_path = os.path.join(cdir, "ruqad", str(rec_ids[0])) + os.makedirs(remote_dir_path) + shutil.move(os.path.join(cdir, "export.eln"), + os.path.join(remote_dir_path, "export.eln")) + start = time() + trigger_crawler(target_dir=cdir) + stop = time() + + print(f"time for crawling eln: {stop-start:.2f} s") + +if __name__ == "__main__": + test_memory() + test_runtime_eln_download() + test_cpu() + test_runtime_crawler() diff --git a/pyproject.toml b/pyproject.toml index fd35267..c81a9cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,8 @@ test = [ "tox", "pytest", "pytest-cov", + "guppy3", + "memory_profiler", ] all = [ "ruqad[dev]", -- GitLab From 42a4f27e5ed28a3166c0b8418daabeaa012efc4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 9 Jan 2025 11:34:54 +0100 Subject: [PATCH 2/4] FIX: add ini for docker --- docker/Dockerfile | 2 +- docker/pylinkahead.ini | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 docker/pylinkahead.ini diff --git a/docker/Dockerfile b/docker/Dockerfile index 638ffac..37b0aec 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -8,7 +8,7 @@ COPY ./pyproject.toml /ruqad/ # static configuration COPY ./qualitycheck_config.toml /ruqad/ -COPY ./pylinkahead.ini /ruqad/ +COPY ./docker/pylinkahead.ini /ruqad/ # Installing the package WORKDIR /ruqad/ diff --git a/docker/pylinkahead.ini b/docker/pylinkahead.ini new file mode 100644 index 0000000..a7a4fc0 --- /dev/null +++ b/docker/pylinkahead.ini @@ -0,0 +1,36 @@ +# The INI file must be located either in +# - $CWD/pylinkahead.ini +# - $HOME/.pylinkahead.ini +# - the location given in the env variable PYLINKAHEADINI + +[Connection] +# url=https://demo.indiscale.com/ +url = http://localhost:80/provider/linkahead/ + +## If this option is set, the SSL certificate will be ignored. Use with care! +#ssl_insecure=1 + +username=admin + +## The password input method can be chosen with the `password_method` setting, +## which by default is set to `plain`. +## +## DEFAULT: the password method is `plain`, now the password must be saved as +## plain text. +password_method=input +# password=caosdb + + +## OR: `input`: username is optional, password is entered by the user directly +# password_method=input + +## OR: `pass`: password is retrieved from the "pass" password manager +# password_method=pass +# password_identifier=... + +## OR: `keyring`: using the system keyring/wallet (macOS, GNOME, KDE, Windows) +## requires installation of the keyring python package: +## pip install keyring +# password_method=keyring + +timeout=10000 -- GitLab From 655d9468d00c0ce37a1bac0ffd91e45bdcd488d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 9 Jan 2025 11:39:55 +0100 Subject: [PATCH 3/4] MAINT: remove unused file --- docker/pylinkahead.ini | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 docker/pylinkahead.ini diff --git a/docker/pylinkahead.ini b/docker/pylinkahead.ini deleted file mode 100644 index a7a4fc0..0000000 --- a/docker/pylinkahead.ini +++ /dev/null @@ -1,36 +0,0 @@ -# The INI file must be located either in -# - $CWD/pylinkahead.ini -# - $HOME/.pylinkahead.ini -# - the location given in the env variable PYLINKAHEADINI - -[Connection] -# url=https://demo.indiscale.com/ -url = http://localhost:80/provider/linkahead/ - -## If this option is set, the SSL certificate will be ignored. Use with care! -#ssl_insecure=1 - -username=admin - -## The password input method can be chosen with the `password_method` setting, -## which by default is set to `plain`. -## -## DEFAULT: the password method is `plain`, now the password must be saved as -## plain text. -password_method=input -# password=caosdb - - -## OR: `input`: username is optional, password is entered by the user directly -# password_method=input - -## OR: `pass`: password is retrieved from the "pass" password manager -# password_method=pass -# password_identifier=... - -## OR: `keyring`: using the system keyring/wallet (macOS, GNOME, KDE, Windows) -## requires installation of the keyring python package: -## pip install keyring -# password_method=keyring - -timeout=10000 -- GitLab From e92628537ce3d970394536a7a0ad2b7317a66c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Fri, 10 Jan 2025 13:19:09 +0100 Subject: [PATCH 4/4] TST: write output to file --- performance-tests/test.py | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/performance-tests/test.py b/performance-tests/test.py index 6409fb9..82d5b70 100644 --- a/performance-tests/test.py +++ b/performance-tests/test.py @@ -1,21 +1,22 @@ """ Tests CPU and Memory usage of RuQaD """ -import cProfile, pstats, io -from pstats import SortKey -from time import sleep,time -from tempfile import TemporaryDirectory +import cProfile +import io +import os +import pstats +import shutil from datetime import datetime, timezone from pathlib import Path +from pstats import SortKey +from tempfile import TemporaryDirectory +from time import sleep, time +from memory_profiler import memory_usage from memory_profiler import profile as mprofile - -from ruqad.qualitycheck import QualityChecker -from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager from ruqad.crawler import trigger_crawler -import os -import shutil -from memory_profiler import memory_usage +from ruqad.kadi import KadiManager, collect_records_created_after, download_eln_for +from ruqad.qualitycheck import QualityChecker SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None KADIARGS = { @@ -69,14 +70,12 @@ def test_cpu(): pr.enable() _run(n=1) pr.disable() - s = io.StringIO() - sortby = SortKey.CUMULATIVE - ps = pstats.Stats(pr, stream=s).sort_stats(sortby) - ps.print_stats(10) - print(s.getvalue()) - ps.print_stats("ruqad", 10) - ps.print_stats("crawler", 10) - print(s.getvalue()) + with open("performance.txt", 'a') as s: + sortby = SortKey.CUMULATIVE + ps = pstats.Stats(pr, stream=s).sort_stats(sortby) + ps.print_stats(10) + ps.print_stats("ruqad", 10) + ps.print_stats("crawler", 10) def test_runtime_eln_download(): cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00") @@ -91,7 +90,8 @@ def test_runtime_eln_download(): download_eln_for(manager, rec_ids[0], path=eln_file) stop = time() - print(f"time for downloading eln: {stop-start:.2f} s") + with open("performance.txt", 'a') as s: + s.write(f"time for downloading eln: {stop-start:.2f} s") def test_runtime_crawler(): cut_off_date = datetime.fromisoformat("1990-01-01 02:34:42.484312+00:00") @@ -112,7 +112,8 @@ def test_runtime_crawler(): trigger_crawler(target_dir=cdir) stop = time() - print(f"time for crawling eln: {stop-start:.2f} s") + with open("performance.txt", 'a') as s: + s.write(f"time for crawling eln: {stop-start:.2f} s") if __name__ == "__main__": test_memory() -- GitLab