diff --git a/CHANGELOG.md b/CHANGELOG.md index 2172ea15043615dc90f61f0d34b5928c76d7283a..bd2a02040cdad31deb9738947269867e1209c9fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting [EXPERIMENTAL] + ### Changed ### ### Deprecated ### diff --git a/integrationtests/create_analysis.py b/integrationtests/create_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7aa0d2d6671f14a3c65cf5ed135dfecb0aa69c --- /dev/null +++ b/integrationtests/create_analysis.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +A small script that creates an Analysis Record that can be used for testing the +automated analysis pipeline. +""" + +import sys +from datetime import datetime + +import caosdb as db + + +def main(): + script = db.File( + file="../src/caosadvancedtools/serverside/examples/example_script.py", + path=("AutomatedAnalysis/scripts/" + + str(datetime.now())+"example_script.py"), + ) + script.insert() + + da = db.Record() + da.add_parent("Analysis") + da.add_property("scripts", value=[script], datatype=db.LIST(db.FILE)) + da.add_property("sources", + value=db.execute_query( + "FIND FILE which is stored at '**/timeseries.npy'", + unique=True), + ) + da.add_property("date", "2020-01-01") + da.add_property("identifier", "TEST") + only = db.execute_query( + "FIND RECORD Person WITH firstname=Only", + unique=True) + only.add_property(db.Property("Email").retrieve().id, "only@example.com") + only.update() + da.add_property("responsible", only) + da.insert() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/integrationtests/example_script.py b/integrationtests/example_script.py new file mode 120000 index 0000000000000000000000000000000000000000..6a03bd102a8a2989cf70e6ed1954d7cc93d6130d --- /dev/null +++ b/integrationtests/example_script.py @@ -0,0 +1 @@ +../src/caosadvancedtools/examples/example_script.py \ No newline at end of file diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..18da9b18cda23d411d0f2666629377dd7991ac8f 100644 Binary files a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy and b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy differ diff --git a/integrationtests/model.yml b/integrationtests/model.yml index cad7958803edf1a01f0649353443ffab350cc5e5..6c68a86156c21c37abccee575de76486c44f3f06 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -51,6 +51,9 @@ Analysis: date: identifier: responsible: + suggested_properties: + mean_value: + datatype: DOUBLE Publication: Thesis: inherit_from_suggested: diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 71af543643a35cb082f10a24440c5ea87df946c9..1c0357e265eec770069166e614fc0a3aa6ecc548 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -65,6 +65,12 @@ python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" python3 test_im_und_export.py + +# automated analysis +# for some reason the loadFiles of sim data has to be called again +python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData +python3 create_analysis.py + # Better safe than sorry: python3 clear_database.py diff --git a/integrationtests/update_analysis.py b/integrationtests/update_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..bd18ab375437bec02320dcfd269896c2ba7e2bb0 --- /dev/null +++ b/integrationtests/update_analysis.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""Example update script. An anlysis Record is retrieved and passed to the +generic run function which then calls the appropriate script based on the +Record. + +The simple query here could be replaced with something that e.g. retrieves all +entities that where changed within a certain period of time. + +""" + +import sys + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import run + + +def main(): + da = db.execute_query("FIND Analysis with identifier=TEST", unique=True) + run(da) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/caosadvancedtools/serverside/examples/example_script.py b/src/caosadvancedtools/serverside/examples/example_script.py new file mode 100755 index 0000000000000000000000000000000000000000..d97d2d0d1f936b1c12e857d38fce043f0b514340 --- /dev/null +++ b/src/caosadvancedtools/serverside/examples/example_script.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""An example script that illustrates how scripts can be used in conjunction +with the generic_analysis module. + +The data model needed for this script is: + +Analysis: + sources: REFEERENCE + scripts: FILE + results: REFEERENCE + mean_value: DOUBLE + +Person: + Email: TEXT + +""" + +import argparse +import logging +import sys +from argparse import RawTextHelpFormatter +from datetime import datetime + +import caosdb as db +import matplotlib.pyplot as plt +import numpy as np +from caosadvancedtools.cfood import assure_property_is +from caosadvancedtools.crawler import apply_list_of_updates +from caosadvancedtools.guard import INSERT, UPDATE +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.serverside.helper import send_mail as main_send_mail + +# logging should be done like this in order to allow the caller script to +# direct the output. +logger = logging.getLogger(__name__) + +# allow updates of existing entities +guard.set_level(level=UPDATE) + + +def send_mail(changes: [db.Entity], receipient: str): + """ calls sendmail in order to send a mail to the curator about pending + changes + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + receipient: The person who shall receive the mail. + """ + + caosdb_config = db.configuration.get_config() + text = """Dear Curator, +The following changes where done automatically. + +{changes} + """.format(changes="\n".join(changes)) + try: + fro = caosdb_config["advancedtools"]["automated_updates.from_mail"] + except KeyError: + logger.error("Server Configuration is missing a setting for " + "sending mails. The administrator should check " + "'from_mail'.") + return + + main_send_mail( + from_addr=fro, + to=receipient, + subject="Automated Update", + body=text) + + +def main(args): + + # auth_token is provided by the server side scripting API + # use this token for authentication when creating a new connection + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(auth_token=args.auth_token) + logger.debug("Established connection") + + try: + dataAnalysisRecord = db.Record(id=args.entityid).retrieve() + except db.TransactionError: + logger.error("Cannot retrieve Record with id ={}".format( + args.entityid + )) + + # The script may require certain information to exist. Here, we expect that + # a sources Property exists that references a numpy file. + # Similarly an InputDataSet could be used. + + if (dataAnalysisRecord.get_property("sources") is None + or not db.apiutils.is_reference( + dataAnalysisRecord.get_property("sources"))): + + raise RuntimeError("sources Refenrence must exist.") + + logger.debug("Found required data.") + + # ####### this core might be replaced by a call to another script ####### # + # Download the data + source_val = dataAnalysisRecord.get_property("sources").value + npobj = db.File( + id=(source_val[0] + if isinstance(source_val, list) + else source_val)).retrieve() + npfile = npobj.download() + logger.debug("Downloaded data.") + data = np.load(npfile) + + # Plot data + filename = "hist.png" + plt.hist(data) + plt.savefig(filename) + + mean = data.mean() + # ####################################################################### # + + # Insert the result plot + fig = db.File(file=filename, + path="/Analysis/results/"+str(datetime.now())+"/"+filename) + fig.insert() + + # Add the mean value to the analysis Record + # If such a property existed before, it is changed if necessary. The old + # value will persist in the versioning of LinkAhead + to_be_updated = db.Container() + assure_property_is( + dataAnalysisRecord, + "mean_value", + mean, + to_be_updated=to_be_updated + ) + + # Add the file with the plot to the analysis Record + # If a file was already referenced, the new one will be referenced instead. + # The old file is being kept and is still referenced in an old version of + # the analysis Record. + assure_property_is( + dataAnalysisRecord, + "results", + [fig.id], + to_be_updated=to_be_updated + ) + + if len(to_be_updated) > 0: + print(to_be_updated) + apply_list_of_updates(to_be_updated, update_flags={}) + logger.debug("Update sucessful.") + logger.info("The following Entities were changed:\n{}.".format( + [el.id for el in to_be_updated]) + ) + + # Send mails to people that are referenced. + people = db.execute_query("FIND RECORD Person WHICH IS REFERENCED BY " + "{}".format(dataAnalysisRecord.id)) + for person in people: + if person.get_property("Email") is not None: + send_mail([str(el) for el in to_be_updated], + receipient=person.get_property("Email").value) + logger.debug("Mails send.") + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("--auth-token", + help="Token provided by the server for authentication") + parser.add_argument("entityid", + help="The ID of the DataAnalysis Record.", type=int) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..66bec8a77e55709434b4285699e2cc2f8f804894 --- /dev/null +++ b/src/caosadvancedtools/serverside/generic_analysis.py @@ -0,0 +1,214 @@ +# encoding: utf-8 +# +# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# See: https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/55 + +# This source file is work in progress and currently untested. + + +""" +Variante I: Python module implementiert eine 'main' function, die einen Record +als Argument entgegennimmt und diesen um z.B. 'results' ergänzt und updated. + +Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line) +und updated das Objekt selbstständig. + +Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde. +Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von +z.B. Parametern) + +Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen. +ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann +der Record leicht erzeugt werden. + + + + "Analyze" "Perform Anlysis" + Knopf an Record Form im WebUI + im WebUI + | | + | | + v v + Winzskript, dass einen + DataAnalysis-Stub erzeugt + | + | + v + execute_script Routine --> AnalysisSkript + erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls + den Pythonmodulenamen notwendig, Email + ^ + | + | + Cronjob findet outdated + DataAnalysis + + +Analyseskript macht update: + - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records) + - spezielle Funktionen sollten verwendet werden + - Logging und informieren muss im Skript passieren + - Skript kann mit subprocess aufgerufen werden (alternative unvollständige + DataAnalysis einfügen) + + +# Features + - Emailversand bei Insert oder Update + - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden + - Debug Info: müsste optional/bei Fehler zur Verfügung stehen. + - Skript/Software version sollte gespeichert werden + + +Outlook: the part of the called scripts that interact with LinkAhead might in +future be replaced by the Crawler. The working directory would be copied to the +file server and then crawled. +""" + +import argparse +import importlib +import logging +import os +import sys + +import caosdb as db +from caosdb.utils.server_side_scripting import run_server_side_script + +logger = logging.getLogger(__name__) + + +def check_referenced_script(record: db.Record): + """ return the name of a referenced script + + If the supplied record does not have an appropriate Property warings are + logged. + """ + + if record.get_property("scripts") is None: + logger.warning("The follwing changed Record is missing the 'scripts' " + "Property:\n{}".format(str(record))) + + return + + script_prop = record.get_property("scripts") + + if not db.apiutils.is_reference(script_prop): + logger.warning("The 'scripts' Property of the following Record should " + "reference a File:\n{}".format(str(record))) + + return + + script = db.execute_query("FIND ENTITY WITH id={}".format( + script_prop.value[0] if isinstance(script_prop.value, list) + else script_prop.value), unique=True) + + if (not isinstance(script, db.File)): + logger.warning("The 'scripts' Property of the Record {} should " + "reference a File. Entity {} is not a File".format( + record.id, script_prop.value)) + + return + + script_name = os.path.basename(script.path) + + return script_name + + +def call_script(script_name: str, record_id: int): + ret = run_server_side_script(script_name, record_id) + + if ret.code != 0: + logger.error("Script failed!") + logger.debug(ret.stdout) + logger.error(ret.stderr) + else: + logger.debug(ret.stdout) + logger.error(ret.stderr) + + +def run(dataAnalysisRecord: db.Record): + """run a data analysis script. + + There are two options: + 1. A python script installed as a pip package. + 2. A generic script that can be executed on the command line. + + Using a python package: + It should be located in package plugin and implement at least + a main function that takes a DataAnalysisRecord as a single argument. + The script may perform changes to the Record and insert and update + Entities. + + Using a generic script: + The only argument that is supplied to the script is the ID of the + dataAnalysisRecord. Apart from the different Argument everything that is + said for the python package holds here. + """ + + if dataAnalysisRecord.get_property("scripts") is not None: + script_name = check_referenced_script(dataAnalysisRecord) + logger.debug( + "Found 'scripts'. Call script '{}' in separate process".format( + script_name) + ) + call_script(script_name, dataAnalysisRecord.id) + logger.debug( + "Script '{}' done.\n-----------------------------------".format( + script_name)) + + if dataAnalysisRecord.get_property("Software") is not None: + mod = dataAnalysisRecord.get_property("Software").value + logger.debug( + "Found 'Software'. Call '{}' as Python module".format( + mod) + ) + m = importlib.import_module(mod) + + m.main(dataAnalysisRecord) + logger.debug( + "'main' function of Python module '{}' done" + ".\n-----------------------------------".format(mod)) + + +def _parse_arguments(): + """ Parses the command line arguments. """ + parser = argparse.ArgumentParser(description='__doc__') + parser.add_argument("--module", help="An id an input dataset.") + parser.add_argument("--inputset", help="An id an input dataset.") + parser.add_argument("--parameterset", help="An id of a parameter record.") + + return parser.parse_args() + + +def main(): + """ This is for testing only. """ + args = _parse_arguments() + + dataAnalysisRecord = db.Record() + dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity) + dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter) + dataAnalysisRecord.add_property(name="Software", value=args.module) + + dataAnalysisRecord.insert() + run(dataAnalysisRecord) + + +if __name__ == "__main__": + args = _parse_arguments() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/model.yml b/src/caosadvancedtools/serverside/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f5a9634a97e39da4c5b3a6dfe1bf0c587863231 --- /dev/null +++ b/src/caosadvancedtools/serverside/model.yml @@ -0,0 +1,15 @@ +# Parent of all datasets which are used as input to or output from +# analysis scripts +Dataset: + +# Parent of all parametersets which are used as input for analysis scripts +ParameterSet: + +DataAnalysis: + recommended_properties: + InputDataset: + datatype: Dataset + OutputDataset: + datatype: Dataset + ParameterSet: + date: \ No newline at end of file diff --git a/src/caosadvancedtools/serverside/sync.py b/src/caosadvancedtools/serverside/sync.py new file mode 100755 index 0000000000000000000000000000000000000000..04283a15ba7919af6027b53217ffb69355ddfc6f --- /dev/null +++ b/src/caosadvancedtools/serverside/sync.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# Sync data model for generic data analysis method +# A. Schlemmer, 09/2021 + +from caosadvancedtools.models import parser +model = parser.parse_model_from_yaml("model.yml") +model.sync_data_model() diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a1077b97ec58f80c8534c89d5fa5f57d8d815cb9 --- /dev/null +++ b/unittests/test_generic_analysis.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import \ + check_referenced_script + +from test_utils import BaseMockUpTest + + +class TestGAnalysisNoFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><Record name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # missing scripts + self.assertIsNone(check_referenced_script(db.Record())) + # wrong datatype + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.TEXT))) + # wrong value + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="hallo"))) + # no file + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234"))) + + +class TestGAnalysisFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # all correct + self.assertEqual(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234")), "script.py")