diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b0bebf43b3e34818ea9a33d9dcc5c2a446fa78c1..9630e0df00132cf46d445a1a80345630f3fc10ac 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,6 +29,7 @@ stages: - style - code-analysis - test + - pages - deploy @@ -52,15 +53,6 @@ info: script: - *env -e2e_tests: - tags: [cached-dind] - stage: test - image: $CI_REGISTRY_IMAGE - #image: python:3.13 - script: - - echo $KADITOKEN - - python -m pytest end-to-end-tests/test_kadi.py - build-testenv: tags: [cached-dind] image: docker:20.10 @@ -119,6 +111,26 @@ gemnasium-python-dependency_scanning: needs: [info] stage: code-analysis +################## +### Test stage ### +################## + +e2e_tests: + tags: [cached-dind] + needs: [build-testenv] + stage: test + image: $CI_REGISTRY_IMAGE + #image: python:3.13 + allow_failure: true + script: + - echo $KADITOKEN + - pytest --cov=ruqad end-to-end-tests/test_kadi.py + - mkdir coverage + - mv .coverage coverage/e2e + artifacts: + paths: + - coverage/ + unittest_py3.9: tags: [cached-dind] needs: [build-testenv] @@ -128,8 +140,15 @@ unittest_py3.9: # TODO Remove this manual crawler installation after the crawler has been released in version 0.10.2 - pip install --root-user-action=ignore git+https://gitlab.indiscale.com/caosdb/src/caosdb-crawler.git@dev - pip install .[all] + # Setting up the configuration + - cp qualitycheck_config.example.toml qualitycheck_config.toml # actual test - pytest --cov=ruqad -vv ./unittests + - mkdir coverage + - mv .coverage coverage/"${CI_JOB_NAME}" + artifacts: + paths: + - coverage unittest_py3.10: tags: [cached-dind] @@ -158,3 +177,33 @@ unittest_py3.13: stage: test image: python:3.13 script: *python_test_script + artifacts: + paths: + - coverage + +# Collect coverage reports from multiple tests, combine them and generate a web page +coverage_job: + tags: [cached-dind] + image: python:3.13 + stage: pages + needs: + - e2e_tests + - unittest_py3.13 + script: + # Setup + - ls -alrth . coverage + - pip install --root-user-action=ignore pytest-cov + - pip install --no-deps . + # Merging and report generation + - coverage combine coverage/* + - coverage html + - mv htmlcov public + # Output for the pipeline follows + - echo -e "To look at the coverage report, either download the artifact or go to:\n > https://docs.indiscale.com/customers/f-fit/ruqad/" + - coverage report + # Regex taken from https://docs.gitlab.com/ee/ci/testing/code_coverage.html#test-coverage-examples + coverage: '/TOTAL.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' + pages: true + artifacts: + paths: + - public diff --git a/README.md b/README.md index 053fe9bd6a427868bb8024456f811344a699ec25..0e450f7ccb0e9ec907e16ef7d912f96d292be630 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ populating FAIR data spaces. Ruqad connects to [Kadi4Mat](https://kadi.iam.kit. the data, stores the results in a [LinkAhead](https://getlinkahead.com) instance and makes the data available via an [EDC (Eclipse Dataspace Components)](https://projects.eclipse.org/projects/technology.edc) instance. + + ## Usage ### Installation ### diff --git a/docker/Dockerfile b/docker/Dockerfile index 638ffac54845811cdb761d827e80c57e7eb4b584..211e8560f4cd7bd200da9e88b99f65fdaaadb4ef 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -8,7 +8,7 @@ COPY ./pyproject.toml /ruqad/ # static configuration COPY ./qualitycheck_config.toml /ruqad/ -COPY ./pylinkahead.ini /ruqad/ +COPY ./pylinkahead.template.ini /ruqad/pylinkahead.ini # Installing the package WORKDIR /ruqad/ diff --git a/pylinkahead.template.ini b/pylinkahead.template.ini new file mode 100644 index 0000000000000000000000000000000000000000..e5491096d07658719eb28e8ebc6cef48959986d5 --- /dev/null +++ b/pylinkahead.template.ini @@ -0,0 +1,35 @@ +# The INI file must be located either in +# - $CWD/pylinkahead.ini +# - $HOME/.pylinkahead.ini +# - the location given in the env variable PYLINKAHEADINI + +[Connection] +# url=https://demo.indiscale.com/ +url = http://localhost:80/provider/linkahead/ + +## If this option is set, the SSL certificate will be ignored. Use with care! +#ssl_insecure=1 + +username=admin + +## The password input method can be chosen with the `password_method` setting, +## which by default is set to `plain`. +## +## DEFAULT: the password method is `plain`, now the password must be saved as +## plain text. +password=caosdb + + +## OR: `input`: username is optional, password is entered by the user directly +# password_method=input + +## OR: `pass`: password is retrieved from the "pass" password manager +# password_method=pass +# password_identifier=... + +## OR: `keyring`: using the system keyring/wallet (macOS, GNOME, KDE, Windows) +## requires installation of the keyring python package: +## pip install keyring +# password_method=keyring + +timeout=10000 diff --git a/pyproject.toml b/pyproject.toml index fd35267d689c7dae0660a05306cbc524475f8635..7b280af375a61968ad0e335d1f5abc1dbe453aae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ requires-python = ">= 3.8" dependencies = [ "linkahead", - "caoscrawler[rocrate] @ file:///home/tf/src/caosdb-crawler", + "caoscrawler[rocrate] @ git+https://gitlab.indiscale.com/caosdb/src/caosdb-crawler.git@96ae0ada880049eec7673637816b20360a0d63cf", "kadi-apy", "boto3>=1.35", "toml>=0.10", @@ -46,6 +46,7 @@ dev = [ test = [ "tox", "pytest", + "pytest-env", "pytest-cov", ] all = [ @@ -58,3 +59,11 @@ rq_qualitycheck = "ruqad.qualitycheck:main" [tool.setuptools.package-data] ruqad = ["resources/**/*"] + +[tool.pytest.ini_options] +env = [ + "S3_ACCESS_KEY_ID=1234", + "S3_SECRET_ACCESS_KEY=1234", + "GITLAB_PIPELINE_TOKEN=1234", + "GITLAB_API_TOKEN=1234", +] diff --git a/src/ruqad/crawler.py b/src/ruqad/crawler.py index aa643354baade438cfa02b00af8579a99554a8e7..ba459fa3bb439c2fa1c75bbf189627ee59bc448f 100644 --- a/src/ruqad/crawler.py +++ b/src/ruqad/crawler.py @@ -87,6 +87,6 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]: cfood_file_name=ruqad_crawler_settings.joinpath('cfood.yaml'), identifiables_definition_file=ruqad_crawler_settings.joinpath( 'identifiables.yaml'), - remove_prefix="/"+os.path.basename(target_dir)) + remove_prefix="/" + os.path.basename(target_dir)) return (True, ent_qc) diff --git a/src/ruqad/kadi.py b/src/ruqad/kadi.py index 539f00f638707bad55048c5b3986299b1d6a9f44..2e9f67523a63b1ca8c31a2362f3a3e33dd82f1d4 100644 --- a/src/ruqad/kadi.py +++ b/src/ruqad/kadi.py @@ -44,11 +44,12 @@ def _generate_pages(manager) -> dict: + str(response['code'])) n_pages = response["_pagination"]["total_pages"] for ii in range(n_pages): - query_params.update({"page": ii+1}) + query_params.update({"page": ii + 1}) yield manager.search.search_resources("record", **query_params).json() -def collect_records_created_after(manager: KadiManager, cut_off_date: datetime.datetime) -> list(int): +def collect_records_created_after(manager: KadiManager, + cut_off_date: datetime.datetime) -> list(int): """ Iterates page-wise over the responses of the Kadi API until records are reached that are older than the given cut_off_date. @@ -89,6 +90,7 @@ def download_eln_for(manager: KadiManager, rid: int, path: str) -> None: rec = manager.record(id=rid) rec.export(path=path, export_type='ro-crate') + class KadiManager(_KadiManager): """Fix KadiManager to respect context root in url.""" @@ -96,6 +98,7 @@ class KadiManager(_KadiManager): super().__init__(**kwargs) self.host = f'{kwargs["host"]}/api/v1' + def main(): with KadiManager(instance='demo') as manager: cut_off_date = datetime.fromisoformat( diff --git a/src/ruqad/monitor.py b/src/ruqad/monitor.py index d67dab350761ff0df9d5be35b80e7a6959883c7f..9d549d0d235dae61d9df4ddaf2e17f5d15e4c45f 100755 --- a/src/ruqad/monitor.py +++ b/src/ruqad/monitor.py @@ -42,6 +42,7 @@ KADIARGS = { SKIP_QUALITY_CHECK = os.getenv("SKIP_QUALITY_CHECK") is not None + def monitor(): """Continuously monitor the Kadi instance given in the environment variables. @@ -82,7 +83,7 @@ def monitor(): if os.path.exists(os.path.join(cdir, "artifacts.zip")): shutil.move(os.path.join(cdir, "artifacts.zip"), os.path.join(remote_dir_path, "report.zip")) - #else: + # else: # Path(os.path.join(remote_dir_path, "report.zip")).touch() shutil.move(os.path.join(cdir, "export.eln"), os.path.join(remote_dir_path, "export.eln")) diff --git a/src/ruqad/qualitycheck.py b/src/ruqad/qualitycheck.py index 9ba034590f94b12f9d2756be0eafb8fcc38fbb66..b846e593158aaf9b582006455e80bbcc98061e5e 100755 --- a/src/ruqad/qualitycheck.py +++ b/src/ruqad/qualitycheck.py @@ -155,7 +155,7 @@ This deletes all the objects in the bucket. zipf = ZipFile(filename) zipf.extractall(path=tmp) # TODO Zip bomb detection and prevention. for name in zipf.namelist(): - if name.endswith(".json"): + if name.endswith(".json") or name.endswith(os.path.sep): continue if upload: self._upload(os.path.join(tmp, name), remove_prefix=tmp) diff --git a/unittests/test_qualitycheck.py b/unittests/test_qualitycheck.py new file mode 100644 index 0000000000000000000000000000000000000000..dca6a761607aa5a3dd2ff51d18dbfaeb6e429665 --- /dev/null +++ b/unittests/test_qualitycheck.py @@ -0,0 +1,43 @@ +# Copyright (C) 2024 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Unit tests for the QualityChecker.""" + +from datetime import datetime +from pathlib import Path +from unittest.mock import patch, Mock + +from ruqad import qualitycheck + + +@patch("boto3.Session.client") +def test_qc_internal(mock_s3_client): + zipfile = (Path(__file__).parents[1] / "end-to-end-tests" / "data" / "crawler_data" / "ruqad" / + "1223" / "export.eln") + qc = qualitycheck.QualityChecker() + qc._extract_content(zipfile, upload=True) + correct_call = False + for call in mock_s3_client.mock_calls: + if not call[0] == '().upload_file': + continue + if (len(call.args) == 3 + and call.args[0].endswith("abalone2.csv") + and call.args[1] == "ruqad" + and call.args[2] == + "data/test-crawler-second/test-crawler-second/files/abalone2.csv"): + correct_call = True + break + assert correct_call