From e778c88dfb258eac0a005139370dbfa2c8f4d8e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Mon, 22 Nov 2021 17:12:02 +0100
Subject: [PATCH] WIP: add the possibility to run generic scripts and add unit
 tests

---
 .../serverside/example_script.py              |  51 +++++
 .../serverside/generic_analysis.py            | 179 ++++++++++++++++--
 unittests/test_generic_analysis.py            |  70 +++++++
 3 files changed, 286 insertions(+), 14 deletions(-)
 create mode 100644 src/caosadvancedtools/serverside/example_script.py
 create mode 100644 unittests/test_generic_analysis.py

diff --git a/src/caosadvancedtools/serverside/example_script.py b/src/caosadvancedtools/serverside/example_script.py
new file mode 100644
index 00000000..3f8e6d1d
--- /dev/null
+++ b/src/caosadvancedtools/serverside/example_script.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+An exemplary script that illustrates how scripts can be used in conjunction
+with the generic_analysis module.
+"""
+
+import argparse
+import sys
+from argparse import RawTextHelpFormatter
+
+
+def main(args):
+    print(args.entityid)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__,
+                                     formatter_class=RawTextHelpFormatter)
+    parser.add_argument("entityid",
+                        help="The ID of the DataAnalysis Record.", type=int)
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    sys.exit(main(args))
diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py
index 8b02326e..a9751da9 100644
--- a/src/caosadvancedtools/serverside/generic_analysis.py
+++ b/src/caosadvancedtools/serverside/generic_analysis.py
@@ -22,10 +22,164 @@
 
 # This source file is work in progress and currently untested.
 
-import importlib
+
+"""
+Variante I: Python module implementiert eine 'main' function, die einen Record
+als Argument entgegen nimmt und diesen um z.B. 'results' ergänzt. Das Update
+wird von einer äußeren Funktion übernommen.
+
+Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line)
+und updated das Objekt selbstständig.
+
+
+Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde.
+Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von
+z.B. Parametern)
+
+
+Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen.
+ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann
+der Record leicht erzeugt werden.
+
+
+
+      "Analyze"       "Perform Anlysis"
+   Knopf an Record     Form im WebUI
+   im WebUI
+         |               |
+         |               |
+         v               v
+     Winzskript, dass einen
+     DataAnalysis-Stub erzeugt
+          |
+          |
+          v
+    execute_script Routine -->  AnalysisSkript
+    erhält den Stub und ggf.    Nutzt Funktionen um Updates durchzuführen falls
+    den Pythonmodulenamen       notwendig, Email
+         ^
+         |
+         |
+    Cronjob findet outdated
+    DataAnalysis
+
+
+Analyseskript macht update:
+    - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records)
+    - spezielle Funktionen sollten verwendet werden
+    - Logging und informieren muss im Skript passieren
+    - Skript kann mit subprocess aufgerufen werden (alternative unvollständige
+      DataAnalysis einfügen)
+
+
+# Features
+    - Emailversand bei Insert oder Update
+    - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden
+    - Debug Info: müsste optional/bei Fehler zur Verfügung stehen.
+    - Skript/Software version sollte gespeichert werden
+
+
+Outlook: the part of the called scripts that interact with LinkAhead might in
+future be replaced by the Crawler. The working directory would be copied to the
+file server and then crawled.
+"""
+
 import argparse
+import importlib
+import logging
+import os
+import subprocess
+import sys
+
 import caosdb as db
 
+logger = logging.getLogger(__name__)
+
+
+def check_referenced_script(record: db.Record):
+    """ return the name of a referenced script
+
+    If the supplied record does not have an appropriate Property warings are
+    logged.
+    """
+
+    if record.get_property("scripts") is None:
+        logger.warning("The follwing changed Record is missing the 'scripts' "
+                       "Property:\n{}".format(str(record)))
+
+        return
+
+    script_prop = record.get_property("scripts")
+
+    if (not db.apiutils.is_reference(script_prop) or
+            not isinstance(script_prop.value, int)):
+        logger.warning("The 'scripts' Property of the following Record should "
+                       "reference a File:\n{}".format(str(record)))
+
+        return
+
+    script = db.execute_query("FIND ENTITY WITH id={}".format(
+        script_prop.value), unique=True)
+
+    if (not isinstance(script, db.File)):
+        logger.warning("The 'scripts' Property of the Record {} should "
+                       "reference a File. Entity {} is not a File".format(
+                           record.id, script_prop.value))
+
+        return
+
+    script_name = os.path.basename(script.path)
+
+    return script_name
+
+
+def call_script(script_name, record_id):
+    if script_name not in os.listdir("."):
+        logger.warning("No script with the name '{}' is installed.".format(
+            script_name))
+
+        return
+
+    ret = subprocess.run([script_name, record_id], stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+
+    if ret.returnvalue != 0:
+        logger.warning("Skript failed")
+
+
+def run(dataAnalysisRecord: db.Record):
+    """run a data analysis script.
+
+    There are two options:
+    1. A python script installed as a pip package.
+    2. A generic script that can be executed on the command line.
+
+    Using a python package:
+    It should be located in package plugin and implement at least
+    a main function that takes a DataAnalysisRecord as a single argument.
+    The script may perform changes to the Record and insert and update
+    Entities.
+
+    Using a generic script:
+    The only argument that is supplied to the script is the ID of the
+    dataAnalysisRecord. Apart from the different Argument everything that is
+    said for the python package holds here.
+    """
+
+    # TODO enhance datamodel? 'Software' Property that references 'Script' or
+    # Python module?
+
+    if dataAnalysisRecord.get_property("scripts") is not None:
+        script_name = check_referenced_script(dataAnalysisRecord)
+        call_script(script_name, dataAnalysisRecord.id)
+
+    if dataAnalysisRecord.get_property("Software") is not None:
+        m = importlib.import_module(
+            dataAnalysisRecord.get_property("Software").value)
+
+        m.main(dataAnalysisRecord)
+
+
 def _parse_arguments():
     """Parses the command line arguments.
     """
@@ -39,20 +193,17 @@ def _parse_arguments():
     return parser.parse_args()
 
 
-def run(plugin: str):
-    """
-    Generically run a data analysis script.
-    This script should be installed as a pip package.
-    It should be located in package plugin and implement at least
-    a main function that takes a DataAnalysisRecord as a single argument.
-    """
-    m = importlib.import_module(plugin)
+def main():
     args = _parse_arguments()
-    input_dataset = db.Record(id=args.entity)
-    parameters = db.Record(id=args.parameter)
 
     dataAnalysisRecord = db.Record()
-    dataAnalysisRecord.add_property(input_dataset)
-    dataAnalysisRecord.add_property(parameters)
+    dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity)
+    dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter)
 
-    m.main(dataAnalysisRecord) 
+    # TODO: should this be done?
+    dataAnalysisRecord.insert()
+
+
+if __name__ == "__main__":
+    args = _parse_arguments()
+    sys.exit(main(args))
diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py
new file mode 100644
index 00000000..a1077b97
--- /dev/null
+++ b/unittests/test_generic_analysis.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+module description
+"""
+
+import caosdb as db
+from caosadvancedtools.serverside.generic_analysis import \
+    check_referenced_script
+
+from test_utils import BaseMockUpTest
+
+
+class TestGAnalysisNoFile(BaseMockUpTest):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.entities = (
+            '<Response><Record name="script.py" path="/some/path/script.py'
+            '" id="1234"/><Query string="find record" results="1">'
+            '</Query></Response>')
+
+    def test_check_referenced_script(self):
+        # missing scripts
+        self.assertIsNone(check_referenced_script(db.Record()))
+        # wrong datatype
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.TEXT)))
+        # wrong value
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="hallo")))
+        # no file
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="1234")))
+
+
+class TestGAnalysisFile(BaseMockUpTest):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.entities = (
+            '<Response><File name="script.py" path="/some/path/script.py'
+            '" id="1234"/><Query string="find record" results="1">'
+            '</Query></Response>')
+
+    def test_check_referenced_script(self):
+        # all correct
+        self.assertEqual(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="1234")), "script.py")
-- 
GitLab