diff --git a/pyproject.toml b/pyproject.toml index 7b280af375a61968ad0e335d1f5abc1dbe453aae..6d406b476db9900bf68ea341f9c801580a994466 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ requires-python = ">= 3.8" dependencies = [ "linkahead", + "caosadvancedtools @ git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@ef70481ee9e0dde914cf974288e8e1b4ed32ec2e", "caoscrawler[rocrate] @ git+https://gitlab.indiscale.com/caosdb/src/caosdb-crawler.git@96ae0ada880049eec7673637816b20360a0d63cf", "kadi-apy", "boto3>=1.35", diff --git a/src/ruqad/crawler_extensions/converters.py b/src/ruqad/crawler_extensions/converters.py deleted file mode 100644 index 8ed4646a7be8b209ffa58d8887375063bb191770..0000000000000000000000000000000000000000 --- a/src/ruqad/crawler_extensions/converters.py +++ /dev/null @@ -1,19 +0,0 @@ -# encoding: utf-8 -# -# This file is a part of the LinkAhead Project. -# -# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> -# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. diff --git a/src/ruqad/qualitycheck.py b/src/ruqad/qualitycheck.py index b846e593158aaf9b582006455e80bbcc98061e5e..b7fe2f2347e7be0a7d7e327e2e3c19252354f8e2 100755 --- a/src/ruqad/qualitycheck.py +++ b/src/ruqad/qualitycheck.py @@ -119,7 +119,7 @@ out : bool self._download_result(job_id=job_id, target_dir=target_dir) except self.CheckFailed as cfe: print(f"Check failed:\nStatus: {cfe.reason['status']}") - breakpoint() + #breakpoint() check_ok = False @@ -228,6 +228,10 @@ remove_prefix : Optional[str] while True: cmd_result = run(cmd, check=True, capture_output=True) result = json.loads(cmd_result.stdout) + if "error" in result: + print("Pipeline terminated unsuccessfully: ", result["error_description"]) + result["status"] = result["error_description"] + raise self.CheckFailed(result) if result["status"] != "running" and result["finished_at"] is not None: break time.sleep(1) diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml index 8d634d84de6fffa6e92e836acbab975d6897868a..582d298d34ae6276896f0f5c4afe574d01db101c 100644 --- a/src/ruqad/resources/crawler-settings/cfood.yaml +++ b/src/ruqad/resources/crawler-settings/cfood.yaml @@ -151,10 +151,11 @@ DataDir: name: (?P<name>.*) dateCreated: (?P<dateCreated>.*)$ dateModified: (?P<dateModified>.*)$ + #description: (?P<description>.*)$ records: Dataset: name: $name - description: $description + #description: $description dateModified: $dateModified dateCreated: $dateCreated