From 45fc690352977037e70e084a321ed95eb9fbbf9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Fri, 13 Nov 2020 16:29:05 +0000
Subject: [PATCH] Add file utility tools

---
 CHANGELOG.md                                  |  2 +
 integrationtests/crawl.py                     |  7 +--
 integrationtests/model.yml                    |  1 -
 .../test_crawl_with_datamodel_problems.py     | 13 +++--
 setup.py                                      |  5 +-
 src/caosadvancedtools/serverside/helper.py    | 56 ++++++++++++++++++-
 unittests/test_sss_helper.py                  | 54 ++++++++++++++++--
 7 files changed, 119 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7c458aed..0cef3c6b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - new name parsing
 - new test for software folder structure
 - new assure_name_is function
+- two utility functions when working with files: NameCollector and 
+  get_file_via_download
 
 ### Changed ###
 
diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py
index 2bec8b0a..e4bf311e 100755
--- a/integrationtests/crawl.py
+++ b/integrationtests/crawl.py
@@ -32,8 +32,8 @@ import caosdb as db
 from caosadvancedtools.cfood import fileguide
 from caosadvancedtools.crawler import FileCrawler
 from caosadvancedtools.guard import INSERT, UPDATE
-from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
-                       PublicationCFood, SimulationCFood, SoftwareCFood)
+from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood,
+                       SimulationCFood, SoftwareCFood)
 
 try:
     from sss_helper import get_argument_parser, print_success
@@ -86,8 +86,7 @@ if __name__ == "__main__":
     config = db.configuration.get_config()
     c = FileCrawler(files=files, use_cache=True,
                     interactive=False, hideKnown=False,
-                    cfood_types=[ProjectCFood,
-                                 ExperimentCFood, AnalysisCFood, SoftwareCFood,
+                    cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood,
                                  PublicationCFood, SimulationCFood,
                                  ])
 
diff --git a/integrationtests/model.yml b/integrationtests/model.yml
index 241762ab..0a4ad381 100644
--- a/integrationtests/model.yml
+++ b/integrationtests/model.yml
@@ -40,7 +40,6 @@ sources:
   datatype: REFERENCE
 scripts:
   datatype: REFERENCE
-SourceCode:
 Simulation:
   obligatory_properties:
     date:
diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py
index daf794b5..3089bf4c 100644
--- a/integrationtests/test_crawl_with_datamodel_problems.py
+++ b/integrationtests/test_crawl_with_datamodel_problems.py
@@ -25,16 +25,14 @@ problems caused by a faulty model.
 
 """
 import caosdb as db
-
 from caosadvancedtools import loadFiles
 from caosadvancedtools.cfood import fileguide
 from caosadvancedtools.crawler import FileCrawler
 from caosadvancedtools.datamodel_problems import DataModelProblems
 from caosadvancedtools.guard import INSERT
-from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
-                       PublicationCFood, SimulationCFood)
-
 from caosmodels.parser import parse_model_from_yaml
+from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood,
+                       SimulationCFood)
 
 
 def setup_module():
@@ -63,6 +61,7 @@ def test_crawler_with_data_model_problems():
     pathlist = [basepath+dirname for dirname in
                 ["ExperimentalData", "DataAnalysis",
                  "SimulationData", "Publications"]]
+
     for path in pathlist:
         loadFiles.loadpath(path=path, include="", exclude="",
                            prefix="", dryrun=False, forceAllowSymlinks=False)
@@ -71,6 +70,7 @@ def test_crawler_with_data_model_problems():
     model = parse_model_from_yaml("model.yml")
     model.sync_data_model(noquestion=True)
     deleted_entities = {"Experiment", "Poster", "results"}
+
     for ent in deleted_entities:
         db.execute_query("FIND "+ent).delete()
 
@@ -81,8 +81,9 @@ def test_crawler_with_data_model_problems():
     files = FileCrawler.query_files(crawl_path)
     c = FileCrawler(files=files, use_cache=False,
                     abort_on_exception=False, interactive=False,
-                    hideKnown=False, cfood_types=[ProjectCFood,
-                                                  ExperimentCFood, AnalysisCFood, PublicationCFood,
+                    hideKnown=False, cfood_types=[ExperimentCFood,
+                                                  AnalysisCFood,
+                                                  PublicationCFood,
                                                   SimulationCFood, ])
     c.crawl(security_level=INSERT, path=crawl_path)
 
diff --git a/setup.py b/setup.py
index 85d7d644..8b31cfb3 100755
--- a/setup.py
+++ b/setup.py
@@ -154,7 +154,10 @@ def setup_package():
         long_description_content_type="text/markdown",
         author='Henrik tom Wörden',
         author_email='h.tomwoerden@indiscale.com',
-        install_requires=["caosdb>=0.4.0", "openpyxl"],
+        install_requires=["caosdb>=0.4.0",
+                          "openpyxl>=3.0.0",
+                          "xlrd>=1.2.0",
+                          ],
         packages=find_packages('src'),
         package_dir={'': 'src'},
         setup_requires=["pytest-runner>=2.0,<3dev"],
diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py
index f2dd96da..19efc9ed 100644
--- a/src/caosadvancedtools/serverside/helper.py
+++ b/src/caosadvancedtools/serverside/helper.py
@@ -25,9 +25,10 @@ import datetime
 import json
 import logging
 import os
-import sys
 import subprocess
+import sys
 from email import message, policy, utils
+from tempfile import NamedTemporaryFile
 
 import caosdb as db
 
@@ -211,10 +212,12 @@ def init_data_model(entities):
             local_datatype = e.datatype
             local_role = e.role
             e.retrieve()
+
             if local_datatype is not None and local_datatype != e.datatype:
                 info = ("The remote entity has a {} data type while it should "
                         "have a {}.".format(e.datatype, local_datatype))
                 raise DataModelError(e.name, info)
+
             if local_role is not None and local_role != e.role:
                 info = ("The remote entity has is a {} while it should "
                         "be a {}.".format(e.role, local_role))
@@ -376,14 +379,17 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None,
 
     if cc is not None:
         mail["CC"] = cc if isinstance(cc, str) else ", ".join(cc)
+
     if bcc is not None:
         mail["BCC"] = bcc if isinstance(cc, str) else ", ".join(cc)
 
     # construct the call
+
     if send_mail_bin is not None:
         sendmail = send_mail_bin
     else:
         caosdb_config = db.configuration.get_config()
+
         if not "Misc" in caosdb_config or not "sendmail" in caosdb_config["Misc"]:
             err_msg = ("No sendmail executable configured. "
                        "Please configure `Misc.sendmail` "
@@ -416,7 +422,55 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None,
                          stderr=subprocess.PIPE)
     stdout, stderr = p.communicate(mail.as_bytes())
     return_code = p.wait()
+
     if return_code != 0:
         raise subprocess.CalledProcessError(return_code, command,
                                             output=stdout.decode("utf8"),
                                             stderr=stderr.decode("utf8"))
+
+
+def get_file_via_download(ent, logger=logging.getLogger(__name__)):
+    """ downloads the given file entity
+
+    The typical error handling is done.
+    """
+    try:
+        # TODO remove the following treatment of size=0 when the
+        # following issue is resolved:
+        # https://gitlab.com/caosdb/caosdb-server/-/issues/107
+
+        if ent.size > 0:
+            val_file = ent.download()
+        else:
+            ntf = NamedTemporaryFile(delete=False)
+            ntf.close()
+            val_file = ntf.name
+    except db.ConsistencyError as e:
+        logger.error("The checksum of the downloaded file with id={} did not "
+                     "match.".format(ent.id))
+        raise e
+    except db.CaosDBException as e:
+        logger.error("Cannot download the file with id={}.".format(ent.id))
+        raise e
+
+    return val_file
+
+
+class NameCollector(object):
+    def __init__(self):
+        self.names = []
+
+    def get_unique_savename(self, name):
+        """ make names unique by attaching numbers
+
+        This is for example use full if multiple files shall be saved into one
+        directory but the names of them are not unique
+        """
+        orig_name = name
+
+        if name in self.names:
+            ii = self.names.count(name) + 1
+            name += "_{}".format(ii)
+        self.names.append(orig_name)
+
+        return name
diff --git a/unittests/test_sss_helper.py b/unittests/test_sss_helper.py
index e4577359..71408fa6 100644
--- a/unittests/test_sss_helper.py
+++ b/unittests/test_sss_helper.py
@@ -1,12 +1,16 @@
-from os.path import abspath, dirname, join, isfile, exists
-from os import listdir, remove
 import subprocess
 from email import message_from_file, policy
+from os import listdir, remove
+from os.path import abspath, dirname, exists, isfile, join
+
+import caosdb as db
+from caosadvancedtools.serverside.helper import (NameCollector, get_data,
+                                                 get_file_via_download,
+                                                 init_data_model,
+                                                 parse_arguments, send_mail)
+from caosdb import RecordType, configure_connection, get_config
+from caosdb.connection.mockup import MockUpResponse, MockUpServerConnection
 from pytest import mark, raises
-from caosdb import configure_connection, RecordType, get_config
-from caosdb.connection.mockup import (MockUpServerConnection, MockUpResponse)
-from caosadvancedtools.serverside.helper import (parse_arguments, get_data,
-                                                 init_data_model, send_mail)
 
 
 def get_data_example():
@@ -30,10 +34,12 @@ def teardown_module():
 
 def get_tmp_mails():
     tmpmail = "/tmp/mail"
+
     if not exists(tmpmail):
         return []
     mails = [join(tmpmail, f) for f in listdir(tmpmail) if isfile(join(tmpmail,
                                                                        f))]
+
     return mails
 
 
@@ -84,3 +90,39 @@ def test_send_mail_error():
     with raises(subprocess.CalledProcessError):
         send_mail("me@example.com", "you@example.com", "the subject", "hello!",
                   send_mail_bin="/bin/cat")
+
+
+def test_get_file_via_download():
+    class DummyFile():
+        size = 5
+        id = 5
+    tmp = DummyFile()
+    tmp.size = 0
+    assert isinstance(get_file_via_download(tmp), str)
+
+    # TODO test whether something ends up in the logger
+    class Inconsistent(DummyFile):
+        def download(*args, **kwargs):
+            raise db.ConsistencyError()
+    with raises(db.ConsistencyError):
+        get_file_via_download(Inconsistent())
+
+    # TODO test whether something ends up in the logger
+    class NotThere(DummyFile):
+        def download(*args, **kwargs):
+            raise db.CaosDBException()
+    with raises(db.CaosDBException):
+        get_file_via_download(Inconsistent())
+
+
+def test_get_unique_savename():
+    nc = NameCollector()
+    assert nc.get_unique_savename("ha") == "ha"
+    assert nc.get_unique_savename("ho") == "ho"
+    assert nc.get_unique_savename("ho") == "ho_2"
+    assert nc.get_unique_savename("ha") == "ha_2"
+    assert nc.get_unique_savename("ha") == "ha_3"
+    assert nc.get_unique_savename("hi") == "hi"
+    # check reset
+    nc = NameCollector()
+    assert nc.get_unique_savename("ha") == "ha"
-- 
GitLab