Skip to content
Snippets Groups Projects
Commit 6774b1f1 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'dev' of gitlab.indiscale.com:caosdb/customers/f-fit/ruqad into dev

parents ba85c5da 356bf44b
No related branches found
No related tags found
No related merge requests found
Pipeline #59458 failed
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
variables: variables:
CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/customers/f-fit/ruqad/ruqad-demonstrator-fair-ds:$CI_COMMIT_REF_NAME CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/fair-data-spaces/ruqad/ruqad-demonstrator-fair-ds:$CI_COMMIT_REF_NAME
# Taken from: https://forum.gitlab.com/t/clarification/54346 # Taken from: https://forum.gitlab.com/t/clarification/54346
GITLAB_FEATURES: "$GITLAB_FEATURES,dependency_scanning" GITLAB_FEATURES: "$GITLAB_FEATURES,dependency_scanning"
......
...@@ -16,8 +16,15 @@ Simply install with: ...@@ -16,8 +16,15 @@ Simply install with:
Note: You can safely ignore the `requirements.txt`, this file is used as a lock file for components Note: You can safely ignore the `requirements.txt`, this file is used as a lock file for components
analysis. For more information, look at the section "SCA" below. analysis. For more information, look at the section "SCA" below.
Additional runtime requirements:
* curl
### Run locally ### ### Run locally ###
- Configure your linkahead connection at [pylinkahead.ini](./pylinkahead.ini)
- Insert/synchronize the linkahead datamodel:
`python -m caosadvancedtools.models.parser src/ruqad/resources/crawler-settings/datamodel.yaml --sync`
- Make sure that `qualitycheck_config.toml` and `secrets.sh` are filled with valied values. - Make sure that `qualitycheck_config.toml` and `secrets.sh` are filled with valied values.
- Run `(set -a && . secrets.sh && rq_monitor)`, a short explanation follows: - Run `(set -a && . secrets.sh && rq_monitor)`, a short explanation follows:
- `(...)`: Putting the parentheses prevents pollution of your shell with the variables defined in - `(...)`: Putting the parentheses prevents pollution of your shell with the variables defined in
......
...@@ -24,7 +24,7 @@ classifiers = [ ...@@ -24,7 +24,7 @@ classifiers = [
requires-python = ">= 3.8" requires-python = ">= 3.8"
dependencies = [ dependencies = [
"linkahead", "linkahead",
"caoscrawler[rocrate] >= 0.10.2", "caoscrawler[rocrate] @ file:///home/tf/src/caosdb-crawler",
"kadi-apy", "kadi-apy",
"boto3>=1.35", "boto3>=1.35",
"toml>=0.10", "toml>=0.10",
...@@ -44,6 +44,7 @@ dev = [ ...@@ -44,6 +44,7 @@ dev = [
"ruqad[test]", "ruqad[test]",
] ]
test = [ test = [
"tox",
"pytest", "pytest",
"pytest-cov", "pytest-cov",
] ]
......
...@@ -83,6 +83,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]: ...@@ -83,6 +83,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]:
print("crawl", target_dir) print("crawl", target_dir)
crawler_main(crawled_directory_path=target_dir, crawler_main(crawled_directory_path=target_dir,
debug=True,
cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'), cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'),
identifiables_definition_file=ruqad_crawler_settings.joinpath( identifiables_definition_file=ruqad_crawler_settings.joinpath(
'identifiables.yaml'), 'identifiables.yaml'),
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
utilities to create .eln exports for certain records hosted in a Kadi instance utilities to create .eln exports for certain records hosted in a Kadi instance
""" """
from __future__ import annotations from __future__ import annotations
from kadi_apy import KadiManager from kadi_apy import KadiManager as _KadiManager
from datetime import datetime from datetime import datetime
PAGE_SIZE = 100 PAGE_SIZE = 100
...@@ -89,6 +89,12 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None: ...@@ -89,6 +89,12 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None:
rec = manager.record(id=rid) rec = manager.record(id=rid)
rec.export(path=path, export_type='ro-crate') rec.export(path=path, export_type='ro-crate')
class KadiManager(_KadiManager):
"""Fix KadiManager to respect context root in url."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.host = f'{kwargs["host"]}/api/v1'
def main(): def main():
with KadiManager(instance='demo') as manager: with KadiManager(instance='demo') as manager:
......
...@@ -29,18 +29,18 @@ import os ...@@ -29,18 +29,18 @@ import os
from time import sleep from time import sleep
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path
from ruqad.qualitycheck import QualityChecker from ruqad.qualitycheck import QualityChecker
from ruqad.kadi import collect_records_created_after, download_eln_for from ruqad.kadi import collect_records_created_after, download_eln_for, KadiManager
from ruqad.crawler import trigger_crawler from ruqad.crawler import trigger_crawler
from kadi_apy import KadiManager
KADIARGS = { KADIARGS = {
"host": os.environ['KADIHOST'], "host": os.environ['KADIHOST'],
"pat": os.environ['KADITOKEN'], "pat": os.environ['KADITOKEN'],
} }
SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None
def monitor(): def monitor():
"""Continuously monitor the Kadi instance given in the environment variables. """Continuously monitor the Kadi instance given in the environment variables.
...@@ -56,28 +56,34 @@ def monitor(): ...@@ -56,28 +56,34 @@ def monitor():
try: try:
timestamp = datetime.now(timezone.utc) timestamp = datetime.now(timezone.utc)
with KadiManager(**KADIARGS) as manager: with KadiManager(**KADIARGS) as manager:
qc = QualityChecker()
print(f"Checking for records created after {cut_off_date}...") print(f"Checking for records created after {cut_off_date}...")
rec_ids = collect_records_created_after(manager, cut_off_date) rec_ids = collect_records_created_after(manager, cut_off_date)
cut_off_date = timestamp cut_off_date = timestamp
if len(rec_ids) > 5: if len(rec_ids) > 25:
print("skipping, too many recs: ", len(rec_ids)) print("skipping, too many recs: ", len(rec_ids))
continue continue
if len(rec_ids) == 0: if len(rec_ids) == 0:
print("no new recs") print("no new recs")
for rid in rec_ids: for rid in rec_ids:
with TemporaryDirectory() as cdir: with TemporaryDirectory(delete=False) as cdir:
eln_file = os.path.join(cdir, "export.eln") eln_file = os.path.join(cdir, "export.eln")
download_eln_for(manager, rid, path=eln_file) download_eln_for(manager, rid, path=eln_file)
print(f"Downlaoded {eln_file}") print(f"Downlaoded {eln_file}")
qc.check(filename=eln_file, target_dir=cdir) if SKIP_QUALITY_CHECK:
print(f"Quality check done. {os.listdir(cdir)}") print("Found env 'SKIP_QUALITY_CHECK', skipping quality check")
else:
qc = QualityChecker()
qc.check(filename=eln_file, target_dir=cdir)
print(f"Quality check done. {os.listdir(cdir)}")
# trigger crawler on dir # trigger crawler on dir
remote_dir_path = os.path.join(cdir, "ruqad", str(rid)) remote_dir_path = os.path.join(cdir, "ruqad", str(rid))
os.makedirs(remote_dir_path) os.makedirs(remote_dir_path)
shutil.move(os.path.join(cdir, "artifacts.zip"), if os.path.exists(os.path.join(cdir, "artifacts.zip")):
os.path.join(remote_dir_path, "report.zip")) shutil.move(os.path.join(cdir, "artifacts.zip"),
os.path.join(remote_dir_path, "report.zip"))
#else:
# Path(os.path.join(remote_dir_path, "report.zip")).touch()
shutil.move(os.path.join(cdir, "export.eln"), shutil.move(os.path.join(cdir, "export.eln"),
os.path.join(remote_dir_path, "export.eln")) os.path.join(remote_dir_path, "export.eln"))
trigger_crawler(target_dir=cdir) trigger_crawler(target_dir=cdir)
......
...@@ -181,7 +181,7 @@ DataDir: ...@@ -181,7 +181,7 @@ DataDir:
MetaData: MetaData:
type: DictElement type: DictElement
match_properties: match_properties:
propertyID: (?P<propid>.*)$ propertyID: (?P<propid>(voltage|rating))$
value: (?P<propvalue>.*)$ value: (?P<propvalue>.*)$
transform: transform:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment