From 356bf44b5f4c2a95fbfdf72a2f361f8a3b0b63c4 Mon Sep 17 00:00:00 2001
From: Timm Fitschen <t.fitschen@indiscale.com>
Date: Fri, 20 Dec 2024 14:53:56 +0100
Subject: [PATCH] DOC: update README; FIX: application root for kadi

---
 README.md                                     |  7 +++++
 pylinkahead.ini                               |  5 ++--
 pyproject.toml                                |  3 ++-
 src/ruqad/crawler.py                          |  1 +
 src/ruqad/kadi.py                             |  8 +++++-
 src/ruqad/monitor.py                          | 26 ++++++++++++-------
 .../resources/crawler-settings/cfood.yaml     |  2 +-
 7 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 8756abb..053fe9b 100644
--- a/README.md
+++ b/README.md
@@ -16,8 +16,15 @@ Simply install with:
 Note: You can safely ignore the `requirements.txt`, this file is used as a lock file for components
 analysis.  For more information, look at the section "SCA" below.
 
+Additional runtime requirements:
+
+* curl
+
 ### Run locally ###
 
+- Configure your linkahead connection at [pylinkahead.ini](./pylinkahead.ini)
+- Insert/synchronize the linkahead datamodel:
+  `python -m caosadvancedtools.models.parser src/ruqad/resources/crawler-settings/datamodel.yaml --sync`
 - Make sure that `qualitycheck_config.toml` and `secrets.sh` are filled with valied values.
 - Run `(set -a && . secrets.sh && rq_monitor)`, a short explanation follows:
   - `(...)`: Putting the parentheses prevents pollution of your shell with the variables defined in
diff --git a/pylinkahead.ini b/pylinkahead.ini
index fb782ac..a7a4fc0 100644
--- a/pylinkahead.ini
+++ b/pylinkahead.ini
@@ -5,10 +5,10 @@
 
 [Connection]
 # url=https://demo.indiscale.com/
-url=https://localhost:10443/
+url = http://localhost:80/provider/linkahead/
 
 ## If this option is set, the SSL certificate will be ignored.  Use with care!
-ssl_insecure=1
+#ssl_insecure=1
 
 username=admin
 
@@ -34,4 +34,3 @@ password_method=input
 # password_method=keyring
 
 timeout=10000
-
diff --git a/pyproject.toml b/pyproject.toml
index bb94531..b0739c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
 requires-python = ">= 3.8"
 dependencies = [
              "linkahead",
-             "caoscrawler[rocrate] >= 0.10.2",
+             "caoscrawler[rocrate] @ file:///home/tf/src/caosdb-crawler",
              "kadi-apy",
              "boto3>=1.35",
              "toml>=0.10",
@@ -44,6 +44,7 @@ dev = [
     "ruqad[test]",
 ]
 test = [
+    "tox",
     "pytest",
     "pytest-cov",
 ]
diff --git a/src/ruqad/crawler.py b/src/ruqad/crawler.py
index cf3284f..aa64335 100644
--- a/src/ruqad/crawler.py
+++ b/src/ruqad/crawler.py
@@ -83,6 +83,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]:
 
     print("crawl", target_dir)
     crawler_main(crawled_directory_path=target_dir,
+                 debug=True,
                  cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'),
                  identifiables_definition_file=ruqad_crawler_settings.joinpath(
                      'identifiables.yaml'),
diff --git a/src/ruqad/kadi.py b/src/ruqad/kadi.py
index f4ea823..539f00f 100644
--- a/src/ruqad/kadi.py
+++ b/src/ruqad/kadi.py
@@ -18,7 +18,7 @@
 utilities to create .eln exports for certain records hosted in a Kadi instance
 """
 from __future__ import annotations
-from kadi_apy import KadiManager
+from kadi_apy import KadiManager as _KadiManager
 from datetime import datetime
 
 PAGE_SIZE = 100
@@ -89,6 +89,12 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None:
     rec = manager.record(id=rid)
     rec.export(path=path, export_type='ro-crate')
 
+class KadiManager(_KadiManager):
+    """Fix KadiManager to respect context root in url."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.host = f'{kwargs["host"]}/api/v1'
 
 def main():
     with KadiManager(instance='demo') as manager:
diff --git a/src/ruqad/monitor.py b/src/ruqad/monitor.py
index 70a463c..d67dab3 100755
--- a/src/ruqad/monitor.py
+++ b/src/ruqad/monitor.py
@@ -29,18 +29,18 @@ import os
 from time import sleep
 from tempfile import TemporaryDirectory
 from datetime import datetime, timezone
+from pathlib import Path
 
 from ruqad.qualitycheck import QualityChecker
-from ruqad.kadi import collect_records_created_after, download_eln_for
+from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager
 from ruqad.crawler import trigger_crawler
-from kadi_apy import KadiManager
-
 
 KADIARGS = {
     "host": os.environ['KADIHOST'],
     "pat": os.environ['KADITOKEN'],
 }
 
+SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None
 
 def monitor():
     """Continuously monitor the Kadi instance given in the environment variables.
@@ -56,28 +56,34 @@ def monitor():
         try:
             timestamp = datetime.now(timezone.utc)
             with KadiManager(**KADIARGS) as manager:
-                qc = QualityChecker()
                 print(f"Checking for records created after {cut_off_date}...")
                 rec_ids = collect_records_created_after(manager, cut_off_date)
                 cut_off_date = timestamp
 
-                if len(rec_ids) > 5:
+                if len(rec_ids) > 25:
                     print("skipping, too many recs: ", len(rec_ids))
                     continue
                 if len(rec_ids) == 0:
                     print("no new recs")
                 for rid in rec_ids:
-                    with TemporaryDirectory() as cdir:
+                    with TemporaryDirectory(delete=False) as cdir:
                         eln_file = os.path.join(cdir, "export.eln")
                         download_eln_for(manager, rid, path=eln_file)
                         print(f"Downlaoded {eln_file}")
-                        qc.check(filename=eln_file, target_dir=cdir)
-                        print(f"Quality check done. {os.listdir(cdir)}")
+                        if SKIP_QUALITY_CHECK:
+                            print("Found env 'SKIP_QUALITY_CHECK', skipping quality check")
+                        else:
+                            qc = QualityChecker()
+                            qc.check(filename=eln_file, target_dir=cdir)
+                            print(f"Quality check done. {os.listdir(cdir)}")
                         # trigger crawler on dir
                         remote_dir_path = os.path.join(cdir, "ruqad", str(rid))
                         os.makedirs(remote_dir_path)
-                        shutil.move(os.path.join(cdir, "artifacts.zip"),
-                                    os.path.join(remote_dir_path, "report.zip"))
+                        if os.path.exists(os.path.join(cdir, "artifacts.zip")):
+                            shutil.move(os.path.join(cdir, "artifacts.zip"),
+                                        os.path.join(remote_dir_path, "report.zip"))
+                        #else:
+                        #    Path(os.path.join(remote_dir_path, "report.zip")).touch()
                         shutil.move(os.path.join(cdir, "export.eln"),
                                     os.path.join(remote_dir_path, "export.eln"))
                         trigger_crawler(target_dir=cdir)
diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml
index 3590556..8d634d8 100644
--- a/src/ruqad/resources/crawler-settings/cfood.yaml
+++ b/src/ruqad/resources/crawler-settings/cfood.yaml
@@ -181,7 +181,7 @@ DataDir:
                             MetaData:
                               type: DictElement
                               match_properties:
-                                propertyID: (?P<propid>.*)$
+                                propertyID: (?P<propid>(voltage|rating))$
                                 value: (?P<propvalue>.*)$
 
                               transform:
-- 
GitLab