diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c458aed9609b71662800d4d69b73755ed263718..0cef3c6b7b997ecdcfdfa90ed3694b928b0fd8a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - new name parsing - new test for software folder structure - new assure_name_is function +- two utility functions when working with files: NameCollector and + get_file_via_download ### Changed ### diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index 2bec8b0acc98fdc7dd54c10cb360aac209e30978..e4bf311e6700448aab0ebf1a5ab72bad6bf1296e 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -32,8 +32,8 @@ import caosdb as db from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.guard import INSERT, UPDATE -from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, - PublicationCFood, SimulationCFood, SoftwareCFood) +from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, + SimulationCFood, SoftwareCFood) try: from sss_helper import get_argument_parser, print_success @@ -86,8 +86,7 @@ if __name__ == "__main__": config = db.configuration.get_config() c = FileCrawler(files=files, use_cache=True, interactive=False, hideKnown=False, - cfood_types=[ProjectCFood, - ExperimentCFood, AnalysisCFood, SoftwareCFood, + cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood, PublicationCFood, SimulationCFood, ]) diff --git a/integrationtests/model.yml b/integrationtests/model.yml index 241762abc51bde2449a28f0e6583e12bd5b5f47b..0a4ad381bfc119dd65d2c192f8de823deda525ae 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -40,7 +40,6 @@ sources: datatype: REFERENCE scripts: datatype: REFERENCE -SourceCode: Simulation: obligatory_properties: date: diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py index daf794b55002247d019e3f7bdd73e9313274eb8c..3089bf4ce60093206e42477d740ead5854a9debc 100644 --- a/integrationtests/test_crawl_with_datamodel_problems.py +++ b/integrationtests/test_crawl_with_datamodel_problems.py @@ -25,16 +25,14 @@ problems caused by a faulty model. """ import caosdb as db - from caosadvancedtools import loadFiles from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.datamodel_problems import DataModelProblems from caosadvancedtools.guard import INSERT -from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, - PublicationCFood, SimulationCFood) - from caosmodels.parser import parse_model_from_yaml +from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, + SimulationCFood) def setup_module(): @@ -63,6 +61,7 @@ def test_crawler_with_data_model_problems(): pathlist = [basepath+dirname for dirname in ["ExperimentalData", "DataAnalysis", "SimulationData", "Publications"]] + for path in pathlist: loadFiles.loadpath(path=path, include="", exclude="", prefix="", dryrun=False, forceAllowSymlinks=False) @@ -71,6 +70,7 @@ def test_crawler_with_data_model_problems(): model = parse_model_from_yaml("model.yml") model.sync_data_model(noquestion=True) deleted_entities = {"Experiment", "Poster", "results"} + for ent in deleted_entities: db.execute_query("FIND "+ent).delete() @@ -81,8 +81,9 @@ def test_crawler_with_data_model_problems(): files = FileCrawler.query_files(crawl_path) c = FileCrawler(files=files, use_cache=False, abort_on_exception=False, interactive=False, - hideKnown=False, cfood_types=[ProjectCFood, - ExperimentCFood, AnalysisCFood, PublicationCFood, + hideKnown=False, cfood_types=[ExperimentCFood, + AnalysisCFood, + PublicationCFood, SimulationCFood, ]) c.crawl(security_level=INSERT, path=crawl_path) diff --git a/setup.py b/setup.py index 85d7d6449f8914ea3e61b37a5e363c1e45e7a4d5..8b31cfb3a8893155e942ec66a8531b1cee2471ee 100755 --- a/setup.py +++ b/setup.py @@ -154,7 +154,10 @@ def setup_package(): long_description_content_type="text/markdown", author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', - install_requires=["caosdb>=0.4.0", "openpyxl"], + install_requires=["caosdb>=0.4.0", + "openpyxl>=3.0.0", + "xlrd>=1.2.0", + ], packages=find_packages('src'), package_dir={'': 'src'}, setup_requires=["pytest-runner>=2.0,<3dev"], diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py index f2dd96da18a0cbd5f3864b6a3d6b2a6a12fa3cc6..19efc9ed2b3e99e17eb28f5c87b0a6dbc0c47499 100644 --- a/src/caosadvancedtools/serverside/helper.py +++ b/src/caosadvancedtools/serverside/helper.py @@ -25,9 +25,10 @@ import datetime import json import logging import os -import sys import subprocess +import sys from email import message, policy, utils +from tempfile import NamedTemporaryFile import caosdb as db @@ -211,10 +212,12 @@ def init_data_model(entities): local_datatype = e.datatype local_role = e.role e.retrieve() + if local_datatype is not None and local_datatype != e.datatype: info = ("The remote entity has a {} data type while it should " "have a {}.".format(e.datatype, local_datatype)) raise DataModelError(e.name, info) + if local_role is not None and local_role != e.role: info = ("The remote entity has is a {} while it should " "be a {}.".format(e.role, local_role)) @@ -376,14 +379,17 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None, if cc is not None: mail["CC"] = cc if isinstance(cc, str) else ", ".join(cc) + if bcc is not None: mail["BCC"] = bcc if isinstance(cc, str) else ", ".join(cc) # construct the call + if send_mail_bin is not None: sendmail = send_mail_bin else: caosdb_config = db.configuration.get_config() + if not "Misc" in caosdb_config or not "sendmail" in caosdb_config["Misc"]: err_msg = ("No sendmail executable configured. " "Please configure `Misc.sendmail` " @@ -416,7 +422,55 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None, stderr=subprocess.PIPE) stdout, stderr = p.communicate(mail.as_bytes()) return_code = p.wait() + if return_code != 0: raise subprocess.CalledProcessError(return_code, command, output=stdout.decode("utf8"), stderr=stderr.decode("utf8")) + + +def get_file_via_download(ent, logger=logging.getLogger(__name__)): + """ downloads the given file entity + + The typical error handling is done. + """ + try: + # TODO remove the following treatment of size=0 when the + # following issue is resolved: + # https://gitlab.com/caosdb/caosdb-server/-/issues/107 + + if ent.size > 0: + val_file = ent.download() + else: + ntf = NamedTemporaryFile(delete=False) + ntf.close() + val_file = ntf.name + except db.ConsistencyError as e: + logger.error("The checksum of the downloaded file with id={} did not " + "match.".format(ent.id)) + raise e + except db.CaosDBException as e: + logger.error("Cannot download the file with id={}.".format(ent.id)) + raise e + + return val_file + + +class NameCollector(object): + def __init__(self): + self.names = [] + + def get_unique_savename(self, name): + """ make names unique by attaching numbers + + This is for example use full if multiple files shall be saved into one + directory but the names of them are not unique + """ + orig_name = name + + if name in self.names: + ii = self.names.count(name) + 1 + name += "_{}".format(ii) + self.names.append(orig_name) + + return name diff --git a/unittests/test_sss_helper.py b/unittests/test_sss_helper.py index e457735918a09835df35a401b0fc1df245a7ac54..71408fa6e34f17f18a803e00e944ef9105eae311 100644 --- a/unittests/test_sss_helper.py +++ b/unittests/test_sss_helper.py @@ -1,12 +1,16 @@ -from os.path import abspath, dirname, join, isfile, exists -from os import listdir, remove import subprocess from email import message_from_file, policy +from os import listdir, remove +from os.path import abspath, dirname, exists, isfile, join + +import caosdb as db +from caosadvancedtools.serverside.helper import (NameCollector, get_data, + get_file_via_download, + init_data_model, + parse_arguments, send_mail) +from caosdb import RecordType, configure_connection, get_config +from caosdb.connection.mockup import MockUpResponse, MockUpServerConnection from pytest import mark, raises -from caosdb import configure_connection, RecordType, get_config -from caosdb.connection.mockup import (MockUpServerConnection, MockUpResponse) -from caosadvancedtools.serverside.helper import (parse_arguments, get_data, - init_data_model, send_mail) def get_data_example(): @@ -30,10 +34,12 @@ def teardown_module(): def get_tmp_mails(): tmpmail = "/tmp/mail" + if not exists(tmpmail): return [] mails = [join(tmpmail, f) for f in listdir(tmpmail) if isfile(join(tmpmail, f))] + return mails @@ -84,3 +90,39 @@ def test_send_mail_error(): with raises(subprocess.CalledProcessError): send_mail("me@example.com", "you@example.com", "the subject", "hello!", send_mail_bin="/bin/cat") + + +def test_get_file_via_download(): + class DummyFile(): + size = 5 + id = 5 + tmp = DummyFile() + tmp.size = 0 + assert isinstance(get_file_via_download(tmp), str) + + # TODO test whether something ends up in the logger + class Inconsistent(DummyFile): + def download(*args, **kwargs): + raise db.ConsistencyError() + with raises(db.ConsistencyError): + get_file_via_download(Inconsistent()) + + # TODO test whether something ends up in the logger + class NotThere(DummyFile): + def download(*args, **kwargs): + raise db.CaosDBException() + with raises(db.CaosDBException): + get_file_via_download(Inconsistent()) + + +def test_get_unique_savename(): + nc = NameCollector() + assert nc.get_unique_savename("ha") == "ha" + assert nc.get_unique_savename("ho") == "ho" + assert nc.get_unique_savename("ho") == "ho_2" + assert nc.get_unique_savename("ha") == "ha_2" + assert nc.get_unique_savename("ha") == "ha_3" + assert nc.get_unique_savename("hi") == "hi" + # check reset + nc = NameCollector() + assert nc.get_unique_savename("ha") == "ha"