From e778c88dfb258eac0a005139370dbfa2c8f4d8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Mon, 22 Nov 2021 17:12:02 +0100 Subject: [PATCH] WIP: add the possibility to run generic scripts and add unit tests --- .../serverside/example_script.py | 51 +++++ .../serverside/generic_analysis.py | 179 ++++++++++++++++-- unittests/test_generic_analysis.py | 70 +++++++ 3 files changed, 286 insertions(+), 14 deletions(-) create mode 100644 src/caosadvancedtools/serverside/example_script.py create mode 100644 unittests/test_generic_analysis.py diff --git a/src/caosadvancedtools/serverside/example_script.py b/src/caosadvancedtools/serverside/example_script.py new file mode 100644 index 00000000..3f8e6d1d --- /dev/null +++ b/src/caosadvancedtools/serverside/example_script.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +An exemplary script that illustrates how scripts can be used in conjunction +with the generic_analysis module. +""" + +import argparse +import sys +from argparse import RawTextHelpFormatter + + +def main(args): + print(args.entityid) + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("entityid", + help="The ID of the DataAnalysis Record.", type=int) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py index 8b02326e..a9751da9 100644 --- a/src/caosadvancedtools/serverside/generic_analysis.py +++ b/src/caosadvancedtools/serverside/generic_analysis.py @@ -22,10 +22,164 @@ # This source file is work in progress and currently untested. -import importlib + +""" +Variante I: Python module implementiert eine 'main' function, die einen Record +als Argument entgegen nimmt und diesen um z.B. 'results' ergänzt. Das Update +wird von einer äußeren Funktion übernommen. + +Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line) +und updated das Objekt selbstständig. + + +Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde. +Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von +z.B. Parametern) + + +Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen. +ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann +der Record leicht erzeugt werden. + + + + "Analyze" "Perform Anlysis" + Knopf an Record Form im WebUI + im WebUI + | | + | | + v v + Winzskript, dass einen + DataAnalysis-Stub erzeugt + | + | + v + execute_script Routine --> AnalysisSkript + erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls + den Pythonmodulenamen notwendig, Email + ^ + | + | + Cronjob findet outdated + DataAnalysis + + +Analyseskript macht update: + - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records) + - spezielle Funktionen sollten verwendet werden + - Logging und informieren muss im Skript passieren + - Skript kann mit subprocess aufgerufen werden (alternative unvollständige + DataAnalysis einfügen) + + +# Features + - Emailversand bei Insert oder Update + - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden + - Debug Info: müsste optional/bei Fehler zur Verfügung stehen. + - Skript/Software version sollte gespeichert werden + + +Outlook: the part of the called scripts that interact with LinkAhead might in +future be replaced by the Crawler. The working directory would be copied to the +file server and then crawled. +""" + import argparse +import importlib +import logging +import os +import subprocess +import sys + import caosdb as db +logger = logging.getLogger(__name__) + + +def check_referenced_script(record: db.Record): + """ return the name of a referenced script + + If the supplied record does not have an appropriate Property warings are + logged. + """ + + if record.get_property("scripts") is None: + logger.warning("The follwing changed Record is missing the 'scripts' " + "Property:\n{}".format(str(record))) + + return + + script_prop = record.get_property("scripts") + + if (not db.apiutils.is_reference(script_prop) or + not isinstance(script_prop.value, int)): + logger.warning("The 'scripts' Property of the following Record should " + "reference a File:\n{}".format(str(record))) + + return + + script = db.execute_query("FIND ENTITY WITH id={}".format( + script_prop.value), unique=True) + + if (not isinstance(script, db.File)): + logger.warning("The 'scripts' Property of the Record {} should " + "reference a File. Entity {} is not a File".format( + record.id, script_prop.value)) + + return + + script_name = os.path.basename(script.path) + + return script_name + + +def call_script(script_name, record_id): + if script_name not in os.listdir("."): + logger.warning("No script with the name '{}' is installed.".format( + script_name)) + + return + + ret = subprocess.run([script_name, record_id], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + if ret.returnvalue != 0: + logger.warning("Skript failed") + + +def run(dataAnalysisRecord: db.Record): + """run a data analysis script. + + There are two options: + 1. A python script installed as a pip package. + 2. A generic script that can be executed on the command line. + + Using a python package: + It should be located in package plugin and implement at least + a main function that takes a DataAnalysisRecord as a single argument. + The script may perform changes to the Record and insert and update + Entities. + + Using a generic script: + The only argument that is supplied to the script is the ID of the + dataAnalysisRecord. Apart from the different Argument everything that is + said for the python package holds here. + """ + + # TODO enhance datamodel? 'Software' Property that references 'Script' or + # Python module? + + if dataAnalysisRecord.get_property("scripts") is not None: + script_name = check_referenced_script(dataAnalysisRecord) + call_script(script_name, dataAnalysisRecord.id) + + if dataAnalysisRecord.get_property("Software") is not None: + m = importlib.import_module( + dataAnalysisRecord.get_property("Software").value) + + m.main(dataAnalysisRecord) + + def _parse_arguments(): """Parses the command line arguments. """ @@ -39,20 +193,17 @@ def _parse_arguments(): return parser.parse_args() -def run(plugin: str): - """ - Generically run a data analysis script. - This script should be installed as a pip package. - It should be located in package plugin and implement at least - a main function that takes a DataAnalysisRecord as a single argument. - """ - m = importlib.import_module(plugin) +def main(): args = _parse_arguments() - input_dataset = db.Record(id=args.entity) - parameters = db.Record(id=args.parameter) dataAnalysisRecord = db.Record() - dataAnalysisRecord.add_property(input_dataset) - dataAnalysisRecord.add_property(parameters) + dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity) + dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter) - m.main(dataAnalysisRecord) + # TODO: should this be done? + dataAnalysisRecord.insert() + + +if __name__ == "__main__": + args = _parse_arguments() + sys.exit(main(args)) diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py new file mode 100644 index 00000000..a1077b97 --- /dev/null +++ b/unittests/test_generic_analysis.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import \ + check_referenced_script + +from test_utils import BaseMockUpTest + + +class TestGAnalysisNoFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><Record name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # missing scripts + self.assertIsNone(check_referenced_script(db.Record())) + # wrong datatype + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.TEXT))) + # wrong value + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="hallo"))) + # no file + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234"))) + + +class TestGAnalysisFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # all correct + self.assertEqual(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234")), "script.py") -- GitLab