diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 78c3cb0438d3529f2d24b91e04e8fbe95a3cc64e..f620aeffd5146254bf630645eaded34d69f35f1c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,7 +46,7 @@ test: image: $CI_REGISTRY_IMAGE_BASE script: - if [[ "$CAOSDB_TAG" == "" ]]; then - CAOSDB_TAG=dev-latest; + CAOSDB_TAG=dev; fi - echo $CAOSDB_TAG - time docker load < /image-cache/caosdb-advanced-testenv.tar || true diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f8350ea556d710d6b9f17cb76f64631412c268..44629bd9b80b9bfd6a8d6a991fe52c8ce5ed3919 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## ### Added ### -- `check_reference_field` function to check whether entities with provided ids - exits (for example when importing data from a table) -- added the `datatypes` argument to `TableImporter` for columns that do not - need a special conversion function + +- CFood that creates a Record for each line in a csv file +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting [EXPERIMENTAL] ### Changed ### @@ -22,6 +22,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Security ### +## [0.3.1] - 2021-12-06 ## + +### Added ### +- `check_reference_field` function to check whether entities with provided ids + exits (for example when importing data from a table) +- added the `datatypes` argument to `TableImporter` for columns that do not + need a special conversion function + ## [0.3.0] - 2021-11-02 ## ### Added ### diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index 79ed3b5ffe52d276677e2a7914f70923e5c9e70c..defed2cb4f5fb0a0f349898e555c5d25924e2f9b 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -34,7 +34,7 @@ from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.guard import INSERT, UPDATE from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, SimulationCFood, - SoftwareCFood) + SoftwareCFood, ResultTableCFood) from example_hdf5cfood import ExampleH5CFood @@ -91,6 +91,7 @@ if __name__ == "__main__": interactive=False, hideKnown=False, cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood, PublicationCFood, SimulationCFood, + ResultTableCFood, ExampleH5CFood ]) diff --git a/integrationtests/create_analysis.py b/integrationtests/create_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7aa0d2d6671f14a3c65cf5ed135dfecb0aa69c --- /dev/null +++ b/integrationtests/create_analysis.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +A small script that creates an Analysis Record that can be used for testing the +automated analysis pipeline. +""" + +import sys +from datetime import datetime + +import caosdb as db + + +def main(): + script = db.File( + file="../src/caosadvancedtools/serverside/examples/example_script.py", + path=("AutomatedAnalysis/scripts/" + + str(datetime.now())+"example_script.py"), + ) + script.insert() + + da = db.Record() + da.add_parent("Analysis") + da.add_property("scripts", value=[script], datatype=db.LIST(db.FILE)) + da.add_property("sources", + value=db.execute_query( + "FIND FILE which is stored at '**/timeseries.npy'", + unique=True), + ) + da.add_property("date", "2020-01-01") + da.add_property("identifier", "TEST") + only = db.execute_query( + "FIND RECORD Person WITH firstname=Only", + unique=True) + only.add_property(db.Property("Email").retrieve().id, "only@example.com") + only.update() + da.add_property("responsible", only) + da.insert() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/integrationtests/example_script.py b/integrationtests/example_script.py new file mode 120000 index 0000000000000000000000000000000000000000..f6e9b498ff97638cb4105e019424c0c677a7f414 --- /dev/null +++ b/integrationtests/example_script.py @@ -0,0 +1 @@ +../src/caosadvancedtools/serverside/examples/example_script.py \ No newline at end of file diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py +++ b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf and b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf differ diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e29553fe01c8706e15a042e5ac6f85ed1a2cc8ce 100644 --- a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat @@ -0,0 +1 @@ +datadatadata diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf and b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf differ diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..18da9b18cda23d411d0f2666629377dd7991ac8f 100644 Binary files a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy and b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy differ diff --git a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py +++ b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/model.yml b/integrationtests/model.yml index cad7958803edf1a01f0649353443ffab350cc5e5..9f7a62d1d0befbc7225353380c79db2f368c969c 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -19,6 +19,14 @@ SoftwareVersion: binaries: sourceCode: Software: +DepthTest: + obligatory_properties: + temperature: + datatype: DOUBLE + description: 'temp' + depth: + datatype: DOUBLE + description: 'temp' Person: obligatory_properties: firstName: @@ -51,6 +59,9 @@ Analysis: date: identifier: responsible: + suggested_properties: + mean_value: + datatype: DOUBLE Publication: Thesis: inherit_from_suggested: diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 71af543643a35cb082f10a24440c5ea87df946c9..1c0357e265eec770069166e614fc0a3aa6ecc548 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -65,6 +65,12 @@ python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" python3 test_im_und_export.py + +# automated analysis +# for some reason the loadFiles of sim data has to be called again +python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData +python3 create_analysis.py + # Better safe than sorry: python3 clear_database.py diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index 05bb581058a964d76ab78583cc290c348e8c4566..4efef87cef52e4a2a20a615afe210c32f52a276a 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -66,6 +66,17 @@ class CrawlerTest(unittest.TestCase): datfile.description) assert os.path.basename(datfile.path) == "datafile.dat" + # There should be two DepthTest Properties + depthtests = exp.get_property("DepthTest") + assert depthtests is not None + assert len(depthtests.value) == 2 + depthtest = db.Record(id=depthtests.value[0]) + depthtest.retrieve() + assert "DepthTest" in [p.name for p in depthtest.get_parents()] + assert 234.4 == depthtest.get_property("temperature").value + assert "°C" == depthtest.get_property("temperature").unit + assert 3.0 == depthtest.get_property("depth").value + # Should have a responsible person self.assertIsNotNone(exp.get_property("responsible")) person = db.Record(id=exp.get_property("responsible").value[0]) diff --git a/integrationtests/test_im_und_export.py b/integrationtests/test_im_und_export.py index 27995080aa5cbeeb6f562226d4f0c0ca19c64d83..8ea45fd2cebbcb2c3be6c8cb79805204486f7862 100644 --- a/integrationtests/test_im_und_export.py +++ b/integrationtests/test_im_und_export.py @@ -12,10 +12,12 @@ if __name__ == "__main__": directory = TemporaryDirectory() export_related_to(rec.id, directory=directory.name) # delete everything + print("Clearing database") recs = db.execute_query("FIND entity with id>99") recs.delete() assert 0 == len(db.execute_query("FIND File which is stored at " "**/poster.pdf")) + print("Importing stored elements") import_xml(os.path.join(directory.name, "caosdb_data.xml"), interactive=False) # The following tests the existence of some required entities. @@ -26,3 +28,4 @@ if __name__ == "__main__": db.execute_query("FIND RecordType Person", unique=True) db.execute_query("FIND Record Person with firstname=Only", unique=True) db.execute_query("FIND File which is stored at **/poster.pdf", unique=True) + print("Found all required Records and Files.") diff --git a/integrationtests/update_analysis.py b/integrationtests/update_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..bd18ab375437bec02320dcfd269896c2ba7e2bb0 --- /dev/null +++ b/integrationtests/update_analysis.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""Example update script. An anlysis Record is retrieved and passed to the +generic run function which then calls the appropriate script based on the +Record. + +The simple query here could be replaced with something that e.g. retrieves all +entities that where changed within a certain period of time. + +""" + +import sys + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import run + + +def main(): + da = db.execute_query("FIND Analysis with identifier=TEST", unique=True) + run(da) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/setup.py b/setup.py index 81695d77156a2225b12cb67b4c9d8a4677de5bb5..8a2dcee8d0f4fb80986efa99d91176e653993c45 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ from setuptools import find_packages, setup MAJOR = 0 MINOR = 3 -MICRO = 1 +MICRO = 2 PRE = "" # e.g. rc0, alpha.1, 0.beta-23 ISRELEASED = False diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 82ada2fa02cd3b87415493e1c1d5499fc3dc8a82..5d91d85cbbbff5b6f64ce9a9de1f29ca603d3b8a 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -66,6 +66,82 @@ def separated(text): return "-"*60 + "\n" + text +def apply_list_of_updates(to_be_updated, update_flags={}, + update_cache=None, run_id=None): + """Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB + after removing possible duplicates. If a chace is provided, uauthorized + updates can be cached for further authorization. + + Parameters: + ----------- + to_be_updated : db.Container + Container with the entities that will be updated. + update_flags : dict, optional + Dictionary of CaosDB server flags that will be used for the + update. Default is an empty dict. + update_cache : UpdateCache or None, optional + Cache in which the intended updates will be stored so they can be + authorized afterwards. Default is None. + run_id : String or None, optional + Id with which the pending updates are cached. Only meaningful if + `update_cache` is provided. Default is None. + """ + + if len(to_be_updated) == 0: + return + + get_ids_for_entities_with_names(to_be_updated) + + # remove duplicates + tmp = db.Container() + + for el in to_be_updated: + if el not in tmp: + tmp.append(el) + + to_be_updated = tmp + + info = "UPDATE: updating the following entities\n" + + baseurl = db.configuration.get_config()["Connection"]["url"] + + def make_clickable(txt, id): + return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt) + + for el in to_be_updated: + info += str("\t" + make_clickable(el.name, el.id) + if el.name is not None + else "\t" + make_clickable(str(el.id), el.id)) + info += "\n" + logger.info(info) + + logger.debug(to_be_updated) + try: + if len(to_be_updated) > 0: + logger.info( + "Updating {} Records...".format( + len(to_be_updated))) + guard.safe_update(to_be_updated, unique=False, + flags=update_flags) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) + except ProhibitedException: + try: + update_cache.insert(to_be_updated, run_id) + except IntegrityError as e: + logger.warning( + "There were problems with the update of {}.".format( + to_be_updated), + extra={"identifier": str(to_be_updated), + "category": "update-cache"} + ) + logger.debug(traceback.format_exc()) + logger.debug(e) + except Exception as e: + DataModelProblems.evaluate_exception(e) + + class Crawler(object): def __init__(self, cfood_types, use_cache=False, abort_on_exception=True, interactive=True, hideKnown=False, @@ -318,7 +394,11 @@ class Crawler(object): self._cached_find_or_insert_identifiables(cfood.identifiables) cfood.update_identifiables() - self.push_identifiables_to_CaosDB(cfood) + apply_list_of_updates( + cfood.to_be_updated, + cfood.update_flags, + update_cache=self.update_cache, + run_id=self.run_id) except FileNotFoundError as e: logger.info("Cannot access {}. However, it might be needed for" " the correct execution".format(e.filename)) @@ -516,64 +596,8 @@ carefully and if the changes are ok, click on the following link: subject="Crawler Update", body=text) - def push_identifiables_to_CaosDB(self, cfood): - """ - Updates the to_be_updated Container, i.e. pushes the changes to CaosDB - """ - - if len(cfood.to_be_updated) == 0: - return - - get_ids_for_entities_with_names(cfood.to_be_updated) - - # remove duplicates - tmp = db.Container() - - for el in cfood.to_be_updated: - if el not in tmp: - tmp.append(el) - - cfood.to_be_updated = tmp - - info = "UPDATE: updating the following entities\n" - - baseurl = db.configuration.get_config()["Connection"]["url"] - for el in cfood.to_be_updated: - def make_clickable(txt, id): - return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt) - info += str("\t" + make_clickable(el.name, el.id) - if el.name is not None - else "\t" + make_clickable(str(el.id), el.id)) - info += "\n" - logger.info(info) - - logger.debug(cfood.to_be_updated) - try: - if len(cfood.to_be_updated) > 0: - logger.info( - "Updating {} Records...".format( - len(cfood.to_be_updated))) - guard.safe_update(cfood.to_be_updated, unique=False, - flags=cfood.update_flags) - except FileNotFoundError as e: - logger.info("Cannot access {}. However, it might be needed for" - " the correct execution".format(e.filename)) - except ProhibitedException: - try: - self.update_cache.insert(cfood.to_be_updated, self.run_id) - except IntegrityError as e: - logger.warning( - "There were problems with the update of {}.".format( - cfood.to_be_updated), - extra={"identifier": str(cfood.to_be_updated), - "category": "update-cache"} - ) - logger.debug(traceback.format_exc()) - logger.debug(e) - except Exception as e: - DataModelProblems.evaluate_exception(e) - # TODO remove static? + @staticmethod def find_or_insert_identifiables(identifiables): """ Sets the ids of identifiables (that do not have already an id from the diff --git a/src/caosadvancedtools/scifolder/__init__.py b/src/caosadvancedtools/scifolder/__init__.py index d7d67937b42ca23173fc93d4e704411f33d80bc4..cf753cfc0b72bf95e34edea1301b96ed18f040d0 100644 --- a/src/caosadvancedtools/scifolder/__init__.py +++ b/src/caosadvancedtools/scifolder/__init__.py @@ -3,3 +3,4 @@ from .experiment_cfood import ExperimentCFood from .publication_cfood import PublicationCFood from .simulation_cfood import SimulationCFood from .software_cfood import SoftwareCFood +from .result_table_cfood import ResultTableCFood diff --git a/src/caosadvancedtools/scifolder/result_table_cfood.py b/src/caosadvancedtools/scifolder/result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..deaa2d00118659a9b177a05fe40b19a1793a16fb --- /dev/null +++ b/src/caosadvancedtools/scifolder/result_table_cfood.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import re + +import caosdb as db +import pandas as pd +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from ..cfood import assure_property_is, fileguide +from .experiment_cfood import ExperimentCFood +from .generic_pattern import date_pattern, date_suffix_pattern, project_pattern +from .utils import parse_responsibles, reference_records_corresponding_to_files +from .withreadme import DATAMODEL as dm +from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob + + +# TODO similarities with TableCrawler +class ResultTableCFood(AbstractFileCFood): + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "revisionOf"] + win_paths = [] + table_re = r"result_table_(?P<recordtype>.*).csv$" + property_name_re = re.compile(r"^(?P<pname>.+?)\s*(\[\s?(?P<unit>.*?)\s?\] *)?$") + + @staticmethod + def name_beautifier(x): return x + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.table = pd.read_csv(fileguide.access(self.crawled_path)) + + @staticmethod + def get_re(): + return (".*/ExperimentalData/"+project_pattern + date_pattern + + date_suffix_pattern + ResultTableCFood.table_re) + + def create_identifiables(self): + self.recs = [] + self.experiment, self.project = ( + ExperimentCFood.create_identifiable_experiment(self.match)) + + for idx, row in self.table.iterrows(): + rec = db.Record() + rec.add_parent(self.match.group("recordtype")) + + for col in self.table.columns[:2]: + match = re.match(ResultTableCFood.property_name_re, col) + + if match.group("unit"): + rec.add_property(match.group("pname"), row.loc[col], unit=match.group("unit")) + else: + rec.add_property(match.group("pname"), row.loc[col]) + self.identifiables.append(rec) + self.recs.append(rec) + + self.identifiables.extend([self.project, self.experiment]) + + def update_identifiables(self): + for ii, (idx, row) in enumerate(self.table.iterrows()): + for col in row.index: + match = re.match(ResultTableCFood.property_name_re, col) + assure_property_is(self.recs[ii], match.group("pname"), row.loc[col], to_be_updated=self.to_be_updated) + assure_property_is(self.experiment, self.match.group("recordtype"), + self.recs, to_be_updated=self.to_be_updated, + datatype=db.LIST(self.match.group("recordtype"))) diff --git a/src/caosadvancedtools/serverside/examples/example_script.py b/src/caosadvancedtools/serverside/examples/example_script.py new file mode 100755 index 0000000000000000000000000000000000000000..d97d2d0d1f936b1c12e857d38fce043f0b514340 --- /dev/null +++ b/src/caosadvancedtools/serverside/examples/example_script.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""An example script that illustrates how scripts can be used in conjunction +with the generic_analysis module. + +The data model needed for this script is: + +Analysis: + sources: REFEERENCE + scripts: FILE + results: REFEERENCE + mean_value: DOUBLE + +Person: + Email: TEXT + +""" + +import argparse +import logging +import sys +from argparse import RawTextHelpFormatter +from datetime import datetime + +import caosdb as db +import matplotlib.pyplot as plt +import numpy as np +from caosadvancedtools.cfood import assure_property_is +from caosadvancedtools.crawler import apply_list_of_updates +from caosadvancedtools.guard import INSERT, UPDATE +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.serverside.helper import send_mail as main_send_mail + +# logging should be done like this in order to allow the caller script to +# direct the output. +logger = logging.getLogger(__name__) + +# allow updates of existing entities +guard.set_level(level=UPDATE) + + +def send_mail(changes: [db.Entity], receipient: str): + """ calls sendmail in order to send a mail to the curator about pending + changes + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + receipient: The person who shall receive the mail. + """ + + caosdb_config = db.configuration.get_config() + text = """Dear Curator, +The following changes where done automatically. + +{changes} + """.format(changes="\n".join(changes)) + try: + fro = caosdb_config["advancedtools"]["automated_updates.from_mail"] + except KeyError: + logger.error("Server Configuration is missing a setting for " + "sending mails. The administrator should check " + "'from_mail'.") + return + + main_send_mail( + from_addr=fro, + to=receipient, + subject="Automated Update", + body=text) + + +def main(args): + + # auth_token is provided by the server side scripting API + # use this token for authentication when creating a new connection + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(auth_token=args.auth_token) + logger.debug("Established connection") + + try: + dataAnalysisRecord = db.Record(id=args.entityid).retrieve() + except db.TransactionError: + logger.error("Cannot retrieve Record with id ={}".format( + args.entityid + )) + + # The script may require certain information to exist. Here, we expect that + # a sources Property exists that references a numpy file. + # Similarly an InputDataSet could be used. + + if (dataAnalysisRecord.get_property("sources") is None + or not db.apiutils.is_reference( + dataAnalysisRecord.get_property("sources"))): + + raise RuntimeError("sources Refenrence must exist.") + + logger.debug("Found required data.") + + # ####### this core might be replaced by a call to another script ####### # + # Download the data + source_val = dataAnalysisRecord.get_property("sources").value + npobj = db.File( + id=(source_val[0] + if isinstance(source_val, list) + else source_val)).retrieve() + npfile = npobj.download() + logger.debug("Downloaded data.") + data = np.load(npfile) + + # Plot data + filename = "hist.png" + plt.hist(data) + plt.savefig(filename) + + mean = data.mean() + # ####################################################################### # + + # Insert the result plot + fig = db.File(file=filename, + path="/Analysis/results/"+str(datetime.now())+"/"+filename) + fig.insert() + + # Add the mean value to the analysis Record + # If such a property existed before, it is changed if necessary. The old + # value will persist in the versioning of LinkAhead + to_be_updated = db.Container() + assure_property_is( + dataAnalysisRecord, + "mean_value", + mean, + to_be_updated=to_be_updated + ) + + # Add the file with the plot to the analysis Record + # If a file was already referenced, the new one will be referenced instead. + # The old file is being kept and is still referenced in an old version of + # the analysis Record. + assure_property_is( + dataAnalysisRecord, + "results", + [fig.id], + to_be_updated=to_be_updated + ) + + if len(to_be_updated) > 0: + print(to_be_updated) + apply_list_of_updates(to_be_updated, update_flags={}) + logger.debug("Update sucessful.") + logger.info("The following Entities were changed:\n{}.".format( + [el.id for el in to_be_updated]) + ) + + # Send mails to people that are referenced. + people = db.execute_query("FIND RECORD Person WHICH IS REFERENCED BY " + "{}".format(dataAnalysisRecord.id)) + for person in people: + if person.get_property("Email") is not None: + send_mail([str(el) for el in to_be_updated], + receipient=person.get_property("Email").value) + logger.debug("Mails send.") + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("--auth-token", + help="Token provided by the server for authentication") + parser.add_argument("entityid", + help="The ID of the DataAnalysis Record.", type=int) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..66bec8a77e55709434b4285699e2cc2f8f804894 --- /dev/null +++ b/src/caosadvancedtools/serverside/generic_analysis.py @@ -0,0 +1,214 @@ +# encoding: utf-8 +# +# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# See: https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/55 + +# This source file is work in progress and currently untested. + + +""" +Variante I: Python module implementiert eine 'main' function, die einen Record +als Argument entgegennimmt und diesen um z.B. 'results' ergänzt und updated. + +Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line) +und updated das Objekt selbstständig. + +Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde. +Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von +z.B. Parametern) + +Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen. +ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann +der Record leicht erzeugt werden. + + + + "Analyze" "Perform Anlysis" + Knopf an Record Form im WebUI + im WebUI + | | + | | + v v + Winzskript, dass einen + DataAnalysis-Stub erzeugt + | + | + v + execute_script Routine --> AnalysisSkript + erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls + den Pythonmodulenamen notwendig, Email + ^ + | + | + Cronjob findet outdated + DataAnalysis + + +Analyseskript macht update: + - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records) + - spezielle Funktionen sollten verwendet werden + - Logging und informieren muss im Skript passieren + - Skript kann mit subprocess aufgerufen werden (alternative unvollständige + DataAnalysis einfügen) + + +# Features + - Emailversand bei Insert oder Update + - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden + - Debug Info: müsste optional/bei Fehler zur Verfügung stehen. + - Skript/Software version sollte gespeichert werden + + +Outlook: the part of the called scripts that interact with LinkAhead might in +future be replaced by the Crawler. The working directory would be copied to the +file server and then crawled. +""" + +import argparse +import importlib +import logging +import os +import sys + +import caosdb as db +from caosdb.utils.server_side_scripting import run_server_side_script + +logger = logging.getLogger(__name__) + + +def check_referenced_script(record: db.Record): + """ return the name of a referenced script + + If the supplied record does not have an appropriate Property warings are + logged. + """ + + if record.get_property("scripts") is None: + logger.warning("The follwing changed Record is missing the 'scripts' " + "Property:\n{}".format(str(record))) + + return + + script_prop = record.get_property("scripts") + + if not db.apiutils.is_reference(script_prop): + logger.warning("The 'scripts' Property of the following Record should " + "reference a File:\n{}".format(str(record))) + + return + + script = db.execute_query("FIND ENTITY WITH id={}".format( + script_prop.value[0] if isinstance(script_prop.value, list) + else script_prop.value), unique=True) + + if (not isinstance(script, db.File)): + logger.warning("The 'scripts' Property of the Record {} should " + "reference a File. Entity {} is not a File".format( + record.id, script_prop.value)) + + return + + script_name = os.path.basename(script.path) + + return script_name + + +def call_script(script_name: str, record_id: int): + ret = run_server_side_script(script_name, record_id) + + if ret.code != 0: + logger.error("Script failed!") + logger.debug(ret.stdout) + logger.error(ret.stderr) + else: + logger.debug(ret.stdout) + logger.error(ret.stderr) + + +def run(dataAnalysisRecord: db.Record): + """run a data analysis script. + + There are two options: + 1. A python script installed as a pip package. + 2. A generic script that can be executed on the command line. + + Using a python package: + It should be located in package plugin and implement at least + a main function that takes a DataAnalysisRecord as a single argument. + The script may perform changes to the Record and insert and update + Entities. + + Using a generic script: + The only argument that is supplied to the script is the ID of the + dataAnalysisRecord. Apart from the different Argument everything that is + said for the python package holds here. + """ + + if dataAnalysisRecord.get_property("scripts") is not None: + script_name = check_referenced_script(dataAnalysisRecord) + logger.debug( + "Found 'scripts'. Call script '{}' in separate process".format( + script_name) + ) + call_script(script_name, dataAnalysisRecord.id) + logger.debug( + "Script '{}' done.\n-----------------------------------".format( + script_name)) + + if dataAnalysisRecord.get_property("Software") is not None: + mod = dataAnalysisRecord.get_property("Software").value + logger.debug( + "Found 'Software'. Call '{}' as Python module".format( + mod) + ) + m = importlib.import_module(mod) + + m.main(dataAnalysisRecord) + logger.debug( + "'main' function of Python module '{}' done" + ".\n-----------------------------------".format(mod)) + + +def _parse_arguments(): + """ Parses the command line arguments. """ + parser = argparse.ArgumentParser(description='__doc__') + parser.add_argument("--module", help="An id an input dataset.") + parser.add_argument("--inputset", help="An id an input dataset.") + parser.add_argument("--parameterset", help="An id of a parameter record.") + + return parser.parse_args() + + +def main(): + """ This is for testing only. """ + args = _parse_arguments() + + dataAnalysisRecord = db.Record() + dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity) + dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter) + dataAnalysisRecord.add_property(name="Software", value=args.module) + + dataAnalysisRecord.insert() + run(dataAnalysisRecord) + + +if __name__ == "__main__": + args = _parse_arguments() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/model.yml b/src/caosadvancedtools/serverside/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f5a9634a97e39da4c5b3a6dfe1bf0c587863231 --- /dev/null +++ b/src/caosadvancedtools/serverside/model.yml @@ -0,0 +1,15 @@ +# Parent of all datasets which are used as input to or output from +# analysis scripts +Dataset: + +# Parent of all parametersets which are used as input for analysis scripts +ParameterSet: + +DataAnalysis: + recommended_properties: + InputDataset: + datatype: Dataset + OutputDataset: + datatype: Dataset + ParameterSet: + date: \ No newline at end of file diff --git a/src/caosadvancedtools/serverside/sync.py b/src/caosadvancedtools/serverside/sync.py new file mode 100755 index 0000000000000000000000000000000000000000..04283a15ba7919af6027b53217ffb69355ddfc6f --- /dev/null +++ b/src/caosadvancedtools/serverside/sync.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# Sync data model for generic data analysis method +# A. Schlemmer, 09/2021 + +from caosadvancedtools.models import parser +model = parser.parse_model_from_yaml("model.yml") +model.sync_data_model() diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index fee39ced2ed47da9d85d9430cce2d18e9a4023ca..0b55252bbf4d65cde1ffdf0711f396dda0f29546 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -156,6 +156,9 @@ def win_path_converter(val): checks whether the value looks like a windows path and converts it to posix """ + if val == "": + return val + if not check_win_path(val): raise ValueError( "Field should be a Windows path, but is\n'{}'.".format(val)) @@ -367,6 +370,8 @@ class TableImporter(object): if len(self.unique_keys) > 0: df = self.check_unique(df, filename=filename) + return df + class XLSImporter(TableImporter): def read_file(self, filename, **kwargs): @@ -405,7 +410,7 @@ class XLSImporter(TableImporter): 'category': "inconsistency"}) raise DataInconsistencyError(*e.args) - self.check_dataframe(df, filename) + df = self.check_dataframe(df, filename) return df @@ -422,7 +427,7 @@ class CSVImporter(TableImporter): 'category': "inconsistency"}) raise DataInconsistencyError(*ve.args) - self.check_dataframe(df, filename) + df = self.check_dataframe(df, filename) return df @@ -439,6 +444,6 @@ class TSVImporter(TableImporter): 'category': "inconsistency"}) raise DataInconsistencyError(*ve.args) - self.check_dataframe(df, filename) + df = self.check_dataframe(df, filename) return df diff --git a/unittests/test.csv b/unittests/test.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/unittests/test.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a1077b97ec58f80c8534c89d5fa5f57d8d815cb9 --- /dev/null +++ b/unittests/test_generic_analysis.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import \ + check_referenced_script + +from test_utils import BaseMockUpTest + + +class TestGAnalysisNoFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><Record name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # missing scripts + self.assertIsNone(check_referenced_script(db.Record())) + # wrong datatype + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.TEXT))) + # wrong value + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="hallo"))) + # no file + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234"))) + + +class TestGAnalysisFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # all correct + self.assertEqual(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234")), "script.py") diff --git a/unittests/test_result_table_cfood.py b/unittests/test_result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..3341a2394cc9ef15ae172bb8992445d87c60d063 --- /dev/null +++ b/unittests/test_result_table_cfood.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +test module for ResultTableCFood +""" + + +import os +import re +import unittest + +import caosdb as db +from caosadvancedtools.scifolder.result_table_cfood import ResultTableCFood + + +class CFoodTest(unittest.TestCase): + def test_re(self): + self.assertIsNotNone(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv").group("recordtype"), + "Hallo") + self.assertIsNotNone(re.match(ResultTableCFood.table_re, + "result_table_Cool RecordType.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Cool RecordType.csv").group("recordtype"), + "Cool RecordType") + self.assertIsNone(re.match(ResultTableCFood.table_re, "result_tableCool RecordType.csv")) + + self.assertIsNotNone(re.match(ResultTableCFood.property_name_re, + "temperature [C]")) + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("pname"), + "temperature") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [ C ]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature").group("pname"), "temperature") + + def test_ident(self): + rtc = ResultTableCFood(os.path.join(os.path.dirname(__file__), "test.csv")) + rtc.match = re.match(ResultTableCFood.get_re(), + "/ExperimentalData/2010_TestProject/2019-02-03_something/result_table_RT.csv") + rtc.create_identifiables() + rtc.update_identifiables()