WIP: add the possibility to run generic scripts and add unit tests

e778c88d · Henrik tom Wörden · 051dfdd1 · e778c88d · e778c88d · e778c88d
Commit e778c88d authored 3 years ago by Henrik tom Wörden
--- a/src/caosadvancedtools/serverside/example_script.py
+++ b/src/caosadvancedtools/serverside/example_script.py
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+"""
+An exemplary script that illustrates how scripts can be used in conjunction
+with the generic_analysis module.
+"""
+import argparse
+import sys
+from argparse import RawTextHelpFormatter
+def main(args):
+    print(args.entityid)
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__,
+                                     formatter_class=RawTextHelpFormatter)
+    parser.add_argument("entityid",
+                        help="The ID of the DataAnalysis Record.", type=int)
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    sys.exit(main(args))
--- a/src/caosadvancedtools/serverside/generic_analysis.py
+++ b/src/caosadvancedtools/serverside/generic_analysis.py
@@ -22,10 +22,164 @@
 # This source file is work in progress and currently untested.
-import importlib
+"""
+Variante I: Python module implementiert eine 'main' function, die einen Record
+als Argument entgegen nimmt und diesen um z.B. 'results' ergänzt. Das Update
+wird von einer äußeren Funktion übernommen.
+Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line)
+und updated das Objekt selbstständig.
+Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde.
+Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von
+z.B. Parametern)
+Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen.
+ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann
+der Record leicht erzeugt werden.
+      "Analyze"       "Perform Anlysis"
+   Knopf an Record     Form im WebUI
+   im WebUI
+         |               |
+         |               |
+         v               v
+     Winzskript, dass einen
+     DataAnalysis-Stub erzeugt
+          |
+          |
+          v
+    execute_script Routine -->  AnalysisSkript
+    erhält den Stub und ggf.    Nutzt Funktionen um Updates durchzuführen falls
+    den Pythonmodulenamen       notwendig, Email
+         ^
+         |
+         |
+    Cronjob findet outdated
+    DataAnalysis
+Analyseskript macht update:
+    - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records)
+    - spezielle Funktionen sollten verwendet werden
+    - Logging und informieren muss im Skript passieren
+    - Skript kann mit subprocess aufgerufen werden (alternative unvollständige
+      DataAnalysis einfügen)
+# Features
+    - Emailversand bei Insert oder Update
+    - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden
+    - Debug Info: müsste optional/bei Fehler zur Verfügung stehen.
+    - Skript/Software version sollte gespeichert werden
+Outlook: the part of the called scripts that interact with LinkAhead might in
+future be replaced by the Crawler. The working directory would be copied to the
+file server and then crawled.
+"""
 import argparse
+import importlib
+import logging
+import os
+import subprocess
+import sys
 import caosdb as db
+logger = logging.getLogger(__name__)
+def check_referenced_script(record: db.Record):
+    """ return the name of a referenced script
+    If the supplied record does not have an appropriate Property warings are
+    logged.
+    """
+    if record.get_property("scripts") is None:
+        logger.warning("The follwing changed Record is missing the 'scripts' "
+                       "Property:\n{}".format(str(record)))
+        return
+    script_prop = record.get_property("scripts")
+    if (not db.apiutils.is_reference(script_prop) or
+            not isinstance(script_prop.value, int)):
+        logger.warning("The 'scripts' Property of the following Record should "
+                       "reference a File:\n{}".format(str(record)))
+        return
+    script = db.execute_query("FIND ENTITY WITH id={}".format(
+        script_prop.value), unique=True)
+    if (not isinstance(script, db.File)):
+        logger.warning("The 'scripts' Property of the Record {} should "
+                       "reference a File. Entity {} is not a File".format(
+                           record.id, script_prop.value))
+        return
+    script_name = os.path.basename(script.path)
+    return script_name
+def call_script(script_name, record_id):
+    if script_name not in os.listdir("."):
+        logger.warning("No script with the name '{}' is installed.".format(
+            script_name))
+        return
+    ret = subprocess.run([script_name, record_id], stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    if ret.returnvalue != 0:
+        logger.warning("Skript failed")
+def run(dataAnalysisRecord: db.Record):
+    """run a data analysis script.
+    There are two options:
+    1. A python script installed as a pip package.
+    2. A generic script that can be executed on the command line.
+    Using a python package:
+    It should be located in package plugin and implement at least
+    a main function that takes a DataAnalysisRecord as a single argument.
+    The script may perform changes to the Record and insert and update
+    Entities.
+    Using a generic script:
+    The only argument that is supplied to the script is the ID of the
+    dataAnalysisRecord. Apart from the different Argument everything that is
+    said for the python package holds here.
+    """
+    # TODO enhance datamodel? 'Software' Property that references 'Script' or
+    # Python module?
+    if dataAnalysisRecord.get_property("scripts") is not None:
+        script_name = check_referenced_script(dataAnalysisRecord)
+        call_script(script_name, dataAnalysisRecord.id)
+    if dataAnalysisRecord.get_property("Software") is not None:
+        m = importlib.import_module(
+            dataAnalysisRecord.get_property("Software").value)
+        m.main(dataAnalysisRecord)
 def _parse_arguments():
    """Parses the command line arguments.
    """
@@ -39,20 +193,17 @@ def _parse_arguments():
    return parser.parse_args()
-def run(plugin: str):
+def main():
-    """
-    Generically run a data analysis script.
-    This script should be installed as a pip package.
-    It should be located in package plugin and implement at least
-    a main function that takes a DataAnalysisRecord as a single argument.
-    """
-    m = importlib.import_module(plugin)
    args = _parse_arguments()
-    input_dataset = db.Record(id=args.entity)
-    parameters = db.Record(id=args.parameter)
    dataAnalysisRecord = db.Record()
-    dataAnalysisRecord.add_property(input_dataset)
+    dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity)
-    dataAnalysisRecord.add_property(parameters)
+    dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter)
-    m.main(dataAnalysisRecord) 
+    # TODO: should this be done?
+    dataAnalysisRecord.insert()
+if __name__ == "__main__":
+    args = _parse_arguments()
+    sys.exit(main(args))
--- a/unittests/test_generic_analysis.py
+++ b/unittests/test_generic_analysis.py
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+"""
+module description
+"""
+import caosdb as db
+from caosadvancedtools.serverside.generic_analysis import \
+    check_referenced_script
+from test_utils import BaseMockUpTest
+class TestGAnalysisNoFile(BaseMockUpTest):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.entities = (
+            '<Response><Record name="script.py" path="/some/path/script.py'
+            '" id="1234"/><Query string="find record" results="1">'
+            '</Query></Response>')
+    def test_check_referenced_script(self):
+        # missing scripts
+        self.assertIsNone(check_referenced_script(db.Record()))
+        # wrong datatype
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.TEXT)))
+        # wrong value
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="hallo")))
+        # no file
+        self.assertIsNone(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="1234")))
+class TestGAnalysisFile(BaseMockUpTest):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.entities = (
+            '<Response><File name="script.py" path="/some/path/script.py'
+            '" id="1234"/><Query string="find record" results="1">'
+            '</Query></Response>')
+    def test_check_referenced_script(self):
+        # all correct
+        self.assertEqual(check_referenced_script(db.Record().add_property(
+            "scripts", datatype=db.REFERENCE, value="1234")), "script.py")