From 356bf44b5f4c2a95fbfdf72a2f361f8a3b0b63c4 Mon Sep 17 00:00:00 2001 From: Timm Fitschen <t.fitschen@indiscale.com> Date: Fri, 20 Dec 2024 14:53:56 +0100 Subject: [PATCH] DOC: update README; FIX: application root for kadi --- README.md | 7 +++++ pylinkahead.ini | 5 ++-- pyproject.toml | 3 ++- src/ruqad/crawler.py | 1 + src/ruqad/kadi.py | 8 +++++- src/ruqad/monitor.py | 26 ++++++++++++------- .../resources/crawler-settings/cfood.yaml | 2 +- 7 files changed, 36 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 8756abb..053fe9b 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,15 @@ Simply install with: Note: You can safely ignore the `requirements.txt`, this file is used as a lock file for components analysis. For more information, look at the section "SCA" below. +Additional runtime requirements: + +* curl + ### Run locally ### +- Configure your linkahead connection at [pylinkahead.ini](./pylinkahead.ini) +- Insert/synchronize the linkahead datamodel: + `python -m caosadvancedtools.models.parser src/ruqad/resources/crawler-settings/datamodel.yaml --sync` - Make sure that `qualitycheck_config.toml` and `secrets.sh` are filled with valied values. - Run `(set -a && . secrets.sh && rq_monitor)`, a short explanation follows: - `(...)`: Putting the parentheses prevents pollution of your shell with the variables defined in diff --git a/pylinkahead.ini b/pylinkahead.ini index fb782ac..a7a4fc0 100644 --- a/pylinkahead.ini +++ b/pylinkahead.ini @@ -5,10 +5,10 @@ [Connection] # url=https://demo.indiscale.com/ -url=https://localhost:10443/ +url = http://localhost:80/provider/linkahead/ ## If this option is set, the SSL certificate will be ignored. Use with care! -ssl_insecure=1 +#ssl_insecure=1 username=admin @@ -34,4 +34,3 @@ password_method=input # password_method=keyring timeout=10000 - diff --git a/pyproject.toml b/pyproject.toml index bb94531..b0739c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ requires-python = ">= 3.8" dependencies = [ "linkahead", - "caoscrawler[rocrate] >= 0.10.2", + "caoscrawler[rocrate] @ file:///home/tf/src/caosdb-crawler", "kadi-apy", "boto3>=1.35", "toml>=0.10", @@ -44,6 +44,7 @@ dev = [ "ruqad[test]", ] test = [ + "tox", "pytest", "pytest-cov", ] diff --git a/src/ruqad/crawler.py b/src/ruqad/crawler.py index cf3284f..aa64335 100644 --- a/src/ruqad/crawler.py +++ b/src/ruqad/crawler.py @@ -83,6 +83,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]: print("crawl", target_dir) crawler_main(crawled_directory_path=target_dir, + debug=True, cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'), identifiables_definition_file=ruqad_crawler_settings.joinpath( 'identifiables.yaml'), diff --git a/src/ruqad/kadi.py b/src/ruqad/kadi.py index f4ea823..539f00f 100644 --- a/src/ruqad/kadi.py +++ b/src/ruqad/kadi.py @@ -18,7 +18,7 @@ utilities to create .eln exports for certain records hosted in a Kadi instance """ from __future__ import annotations -from kadi_apy import KadiManager +from kadi_apy import KadiManager as _KadiManager from datetime import datetime PAGE_SIZE = 100 @@ -89,6 +89,12 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None: rec = manager.record(id=rid) rec.export(path=path, export_type='ro-crate') +class KadiManager(_KadiManager): + """Fix KadiManager to respect context root in url.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.host = f'{kwargs["host"]}/api/v1' def main(): with KadiManager(instance='demo') as manager: diff --git a/src/ruqad/monitor.py b/src/ruqad/monitor.py index 70a463c..d67dab3 100755 --- a/src/ruqad/monitor.py +++ b/src/ruqad/monitor.py @@ -29,18 +29,18 @@ import os from time import sleep from tempfile import TemporaryDirectory from datetime import datetime, timezone +from pathlib import Path from ruqad.qualitycheck import QualityChecker -from ruqad.kadi import collect_records_created_after, download_eln_for +from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager from ruqad.crawler import trigger_crawler -from kadi_apy import KadiManager - KADIARGS = { "host": os.environ['KADIHOST'], "pat": os.environ['KADITOKEN'], } +SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None def monitor(): """Continuously monitor the Kadi instance given in the environment variables. @@ -56,28 +56,34 @@ def monitor(): try: timestamp = datetime.now(timezone.utc) with KadiManager(**KADIARGS) as manager: - qc = QualityChecker() print(f"Checking for records created after {cut_off_date}...") rec_ids = collect_records_created_after(manager, cut_off_date) cut_off_date = timestamp - if len(rec_ids) > 5: + if len(rec_ids) > 25: print("skipping, too many recs: ", len(rec_ids)) continue if len(rec_ids) == 0: print("no new recs") for rid in rec_ids: - with TemporaryDirectory() as cdir: + with TemporaryDirectory(delete=False) as cdir: eln_file = os.path.join(cdir, "export.eln") download_eln_for(manager, rid, path=eln_file) print(f"Downlaoded {eln_file}") - qc.check(filename=eln_file, target_dir=cdir) - print(f"Quality check done. {os.listdir(cdir)}") + if SKIP_QUALITY_CHECK: + print("Found env 'SKIP_QUALITY_CHECK', skipping quality check") + else: + qc = QualityChecker() + qc.check(filename=eln_file, target_dir=cdir) + print(f"Quality check done. {os.listdir(cdir)}") # trigger crawler on dir remote_dir_path = os.path.join(cdir, "ruqad", str(rid)) os.makedirs(remote_dir_path) - shutil.move(os.path.join(cdir, "artifacts.zip"), - os.path.join(remote_dir_path, "report.zip")) + if os.path.exists(os.path.join(cdir, "artifacts.zip")): + shutil.move(os.path.join(cdir, "artifacts.zip"), + os.path.join(remote_dir_path, "report.zip")) + #else: + # Path(os.path.join(remote_dir_path, "report.zip")).touch() shutil.move(os.path.join(cdir, "export.eln"), os.path.join(remote_dir_path, "export.eln")) trigger_crawler(target_dir=cdir) diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml index 3590556..8d634d8 100644 --- a/src/ruqad/resources/crawler-settings/cfood.yaml +++ b/src/ruqad/resources/crawler-settings/cfood.yaml @@ -181,7 +181,7 @@ DataDir: MetaData: type: DictElement match_properties: - propertyID: (?P<propid>.*)$ + propertyID: (?P<propid>(voltage|rating))$ value: (?P<propvalue>.*)$ transform: -- GitLab