Skip to content
Snippets Groups Projects
Commit e778c88d authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

WIP: add the possibility to run generic scripts and add unit tests

parent 051dfdd1
No related branches found
No related tags found
2 merge requests!39Release 0.4.0,!20created draft for generic analysis method
Pipeline #16390 passed
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
An exemplary script that illustrates how scripts can be used in conjunction
with the generic_analysis module.
"""
import argparse
import sys
from argparse import RawTextHelpFormatter
def main(args):
print(args.entityid)
def parse_args():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=RawTextHelpFormatter)
parser.add_argument("entityid",
help="The ID of the DataAnalysis Record.", type=int)
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
sys.exit(main(args))
...@@ -22,10 +22,164 @@ ...@@ -22,10 +22,164 @@
# This source file is work in progress and currently untested. # This source file is work in progress and currently untested.
import importlib
"""
Variante I: Python module implementiert eine 'main' function, die einen Record
als Argument entgegen nimmt und diesen um z.B. 'results' ergänzt. Das Update
wird von einer äußeren Funktion übernommen.
Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line)
und updated das Objekt selbstständig.
Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde.
Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von
z.B. Parametern)
Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen.
ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann
der Record leicht erzeugt werden.
"Analyze" "Perform Anlysis"
Knopf an Record Form im WebUI
im WebUI
| |
| |
v v
Winzskript, dass einen
DataAnalysis-Stub erzeugt
|
|
v
execute_script Routine --> AnalysisSkript
erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls
den Pythonmodulenamen notwendig, Email
^
|
|
Cronjob findet outdated
DataAnalysis
Analyseskript macht update:
- flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records)
- spezielle Funktionen sollten verwendet werden
- Logging und informieren muss im Skript passieren
- Skript kann mit subprocess aufgerufen werden (alternative unvollständige
DataAnalysis einfügen)
# Features
- Emailversand bei Insert oder Update
- Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden
- Debug Info: müsste optional/bei Fehler zur Verfügung stehen.
- Skript/Software version sollte gespeichert werden
Outlook: the part of the called scripts that interact with LinkAhead might in
future be replaced by the Crawler. The working directory would be copied to the
file server and then crawled.
"""
import argparse import argparse
import importlib
import logging
import os
import subprocess
import sys
import caosdb as db import caosdb as db
logger = logging.getLogger(__name__)
def check_referenced_script(record: db.Record):
""" return the name of a referenced script
If the supplied record does not have an appropriate Property warings are
logged.
"""
if record.get_property("scripts") is None:
logger.warning("The follwing changed Record is missing the 'scripts' "
"Property:\n{}".format(str(record)))
return
script_prop = record.get_property("scripts")
if (not db.apiutils.is_reference(script_prop) or
not isinstance(script_prop.value, int)):
logger.warning("The 'scripts' Property of the following Record should "
"reference a File:\n{}".format(str(record)))
return
script = db.execute_query("FIND ENTITY WITH id={}".format(
script_prop.value), unique=True)
if (not isinstance(script, db.File)):
logger.warning("The 'scripts' Property of the Record {} should "
"reference a File. Entity {} is not a File".format(
record.id, script_prop.value))
return
script_name = os.path.basename(script.path)
return script_name
def call_script(script_name, record_id):
if script_name not in os.listdir("."):
logger.warning("No script with the name '{}' is installed.".format(
script_name))
return
ret = subprocess.run([script_name, record_id], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if ret.returnvalue != 0:
logger.warning("Skript failed")
def run(dataAnalysisRecord: db.Record):
"""run a data analysis script.
There are two options:
1. A python script installed as a pip package.
2. A generic script that can be executed on the command line.
Using a python package:
It should be located in package plugin and implement at least
a main function that takes a DataAnalysisRecord as a single argument.
The script may perform changes to the Record and insert and update
Entities.
Using a generic script:
The only argument that is supplied to the script is the ID of the
dataAnalysisRecord. Apart from the different Argument everything that is
said for the python package holds here.
"""
# TODO enhance datamodel? 'Software' Property that references 'Script' or
# Python module?
if dataAnalysisRecord.get_property("scripts") is not None:
script_name = check_referenced_script(dataAnalysisRecord)
call_script(script_name, dataAnalysisRecord.id)
if dataAnalysisRecord.get_property("Software") is not None:
m = importlib.import_module(
dataAnalysisRecord.get_property("Software").value)
m.main(dataAnalysisRecord)
def _parse_arguments(): def _parse_arguments():
"""Parses the command line arguments. """Parses the command line arguments.
""" """
...@@ -39,20 +193,17 @@ def _parse_arguments(): ...@@ -39,20 +193,17 @@ def _parse_arguments():
return parser.parse_args() return parser.parse_args()
def run(plugin: str): def main():
"""
Generically run a data analysis script.
This script should be installed as a pip package.
It should be located in package plugin and implement at least
a main function that takes a DataAnalysisRecord as a single argument.
"""
m = importlib.import_module(plugin)
args = _parse_arguments() args = _parse_arguments()
input_dataset = db.Record(id=args.entity)
parameters = db.Record(id=args.parameter)
dataAnalysisRecord = db.Record() dataAnalysisRecord = db.Record()
dataAnalysisRecord.add_property(input_dataset) dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity)
dataAnalysisRecord.add_property(parameters) dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter)
m.main(dataAnalysisRecord) # TODO: should this be done?
dataAnalysisRecord.insert()
if __name__ == "__main__":
args = _parse_arguments()
sys.exit(main(args))
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
module description
"""
import caosdb as db
from caosadvancedtools.serverside.generic_analysis import \
check_referenced_script
from test_utils import BaseMockUpTest
class TestGAnalysisNoFile(BaseMockUpTest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.entities = (
'<Response><Record name="script.py" path="/some/path/script.py'
'" id="1234"/><Query string="find record" results="1">'
'</Query></Response>')
def test_check_referenced_script(self):
# missing scripts
self.assertIsNone(check_referenced_script(db.Record()))
# wrong datatype
self.assertIsNone(check_referenced_script(db.Record().add_property(
"scripts", datatype=db.TEXT)))
# wrong value
self.assertIsNone(check_referenced_script(db.Record().add_property(
"scripts", datatype=db.REFERENCE, value="hallo")))
# no file
self.assertIsNone(check_referenced_script(db.Record().add_property(
"scripts", datatype=db.REFERENCE, value="1234")))
class TestGAnalysisFile(BaseMockUpTest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.entities = (
'<Response><File name="script.py" path="/some/path/script.py'
'" id="1234"/><Query string="find record" results="1">'
'</Query></Response>')
def test_check_referenced_script(self):
# all correct
self.assertEqual(check_referenced_script(db.Record().add_property(
"scripts", datatype=db.REFERENCE, value="1234")), "script.py")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment