diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 34c94e38249b7354f6feea661fd79a435336ea8d..b0bebf43b3e34818ea9a33d9dcc5c2a446fa78c1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,7 +19,7 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. variables: - CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/customers/f-fit/ruqad/ruqad-demonstrator-fair-ds:$CI_COMMIT_REF_NAME + CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/fair-data-spaces/ruqad/ruqad-demonstrator-fair-ds:$CI_COMMIT_REF_NAME # Taken from: https://forum.gitlab.com/t/clarification/54346 GITLAB_FEATURES: "$GITLAB_FEATURES,dependency_scanning" diff --git a/README.md b/README.md index 8756abb32e00a6f81e44d9e538470c5882ebee90..053fe9bd6a427868bb8024456f811344a699ec25 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,15 @@ Simply install with: Note: You can safely ignore the `requirements.txt`, this file is used as a lock file for components analysis. For more information, look at the section "SCA" below. +Additional runtime requirements: + +* curl + ### Run locally ### +- Configure your linkahead connection at [pylinkahead.ini](./pylinkahead.ini) +- Insert/synchronize the linkahead datamodel: + `python -m caosadvancedtools.models.parser src/ruqad/resources/crawler-settings/datamodel.yaml --sync` - Make sure that `qualitycheck_config.toml` and `secrets.sh` are filled with valied values. - Run `(set -a && . secrets.sh && rq_monitor)`, a short explanation follows: - `(...)`: Putting the parentheses prevents pollution of your shell with the variables defined in diff --git a/pyproject.toml b/pyproject.toml index 04100d9269366490069a65efa6ae4ce55e2d09a8..fd35267d689c7dae0660a05306cbc524475f8635 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ requires-python = ">= 3.8" dependencies = [ "linkahead", - "caoscrawler[rocrate] >= 0.10.2", + "caoscrawler[rocrate] @ file:///home/tf/src/caosdb-crawler", "kadi-apy", "boto3>=1.35", "toml>=0.10", @@ -44,6 +44,7 @@ dev = [ "ruqad[test]", ] test = [ + "tox", "pytest", "pytest-cov", ] diff --git a/src/ruqad/crawler.py b/src/ruqad/crawler.py index cf3284f506e84657faaca2be1fad016893c384a1..aa643354baade438cfa02b00af8579a99554a8e7 100644 --- a/src/ruqad/crawler.py +++ b/src/ruqad/crawler.py @@ -83,6 +83,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]: print("crawl", target_dir) crawler_main(crawled_directory_path=target_dir, + debug=True, cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'), identifiables_definition_file=ruqad_crawler_settings.joinpath( 'identifiables.yaml'), diff --git a/src/ruqad/kadi.py b/src/ruqad/kadi.py index f4ea8231df01c97b5b8b03dd5a61fd213ef60bfe..539f00f638707bad55048c5b3986299b1d6a9f44 100644 --- a/src/ruqad/kadi.py +++ b/src/ruqad/kadi.py @@ -18,7 +18,7 @@ utilities to create .eln exports for certain records hosted in a Kadi instance """ from __future__ import annotations -from kadi_apy import KadiManager +from kadi_apy import KadiManager as _KadiManager from datetime import datetime PAGE_SIZE = 100 @@ -89,6 +89,12 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None: rec = manager.record(id=rid) rec.export(path=path, export_type='ro-crate') +class KadiManager(_KadiManager): + """Fix KadiManager to respect context root in url.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.host = f'{kwargs["host"]}/api/v1' def main(): with KadiManager(instance='demo') as manager: diff --git a/src/ruqad/monitor.py b/src/ruqad/monitor.py index 70a463c5c70a3f7e8acde06988f40d8fe8c06990..d67dab350761ff0df9d5be35b80e7a6959883c7f 100755 --- a/src/ruqad/monitor.py +++ b/src/ruqad/monitor.py @@ -29,18 +29,18 @@ import os from time import sleep from tempfile import TemporaryDirectory from datetime import datetime, timezone +from pathlib import Path from ruqad.qualitycheck import QualityChecker -from ruqad.kadi import collect_records_created_after, download_eln_for +from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager from ruqad.crawler import trigger_crawler -from kadi_apy import KadiManager - KADIARGS = { "host": os.environ['KADIHOST'], "pat": os.environ['KADITOKEN'], } +SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None def monitor(): """Continuously monitor the Kadi instance given in the environment variables. @@ -56,28 +56,34 @@ def monitor(): try: timestamp = datetime.now(timezone.utc) with KadiManager(**KADIARGS) as manager: - qc = QualityChecker() print(f"Checking for records created after {cut_off_date}...") rec_ids = collect_records_created_after(manager, cut_off_date) cut_off_date = timestamp - if len(rec_ids) > 5: + if len(rec_ids) > 25: print("skipping, too many recs: ", len(rec_ids)) continue if len(rec_ids) == 0: print("no new recs") for rid in rec_ids: - with TemporaryDirectory() as cdir: + with TemporaryDirectory(delete=False) as cdir: eln_file = os.path.join(cdir, "export.eln") download_eln_for(manager, rid, path=eln_file) print(f"Downlaoded {eln_file}") - qc.check(filename=eln_file, target_dir=cdir) - print(f"Quality check done. {os.listdir(cdir)}") + if SKIP_QUALITY_CHECK: + print("Found env 'SKIP_QUALITY_CHECK', skipping quality check") + else: + qc = QualityChecker() + qc.check(filename=eln_file, target_dir=cdir) + print(f"Quality check done. {os.listdir(cdir)}") # trigger crawler on dir remote_dir_path = os.path.join(cdir, "ruqad", str(rid)) os.makedirs(remote_dir_path) - shutil.move(os.path.join(cdir, "artifacts.zip"), - os.path.join(remote_dir_path, "report.zip")) + if os.path.exists(os.path.join(cdir, "artifacts.zip")): + shutil.move(os.path.join(cdir, "artifacts.zip"), + os.path.join(remote_dir_path, "report.zip")) + #else: + # Path(os.path.join(remote_dir_path, "report.zip")).touch() shutil.move(os.path.join(cdir, "export.eln"), os.path.join(remote_dir_path, "export.eln")) trigger_crawler(target_dir=cdir) diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml index 3590556fe7a91c2ce4388fef2e219773116bbc1a..8d634d84de6fffa6e92e836acbab975d6897868a 100644 --- a/src/ruqad/resources/crawler-settings/cfood.yaml +++ b/src/ruqad/resources/crawler-settings/cfood.yaml @@ -181,7 +181,7 @@ DataDir: MetaData: type: DictElement match_properties: - propertyID: (?P<propid>.*)$ + propertyID: (?P<propid>(voltage|rating))$ value: (?P<propvalue>.*)$ transform: