diff --git a/README_SETUP.md b/README_SETUP.md index 0ac69cf928ae0267e60a8a5ec576b5117236e24c..e5ebd969462f7d2c28a329e2c6b6e1bab1252775 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -39,9 +39,11 @@ Optional h5-crawler: extroot. E.g. `sudo mount -o bind extroot ../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path the extroot of the empty profile to be used is located at). -3. Start an empty (!) CaosDB instance (with the mounted extroot). The - database will be cleared during testing, so it's important to use +3. Start (or restart) an empty (!) CaosDB instance (with the mounted extroot). + The database will be cleared during testing, so it's important to use an empty instance. + Make sure your configuration for the python caosdb module is correct and + allows to connect to the server. 4. Run `test.sh`. Note that this may modify content of the `integrationtest/extroot/` directory. ## Code Formatting diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 5efd549551670d3a4581380271ac2aba4b80a10f..71af543643a35cb082f10a24440c5ea87df946c9 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -34,11 +34,11 @@ echo "Filling the database" echo "Testing the crawler database" python3 -m pytest test_crawler_with_cfoods.py echo "make a change" -pushd extroot +cd extroot egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' # remove a file to check that this does not lead to a crawler crash mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back -popd +cd .. echo "run crawler" ./crawl.py / | tee $OUT # rename the moved file @@ -58,9 +58,9 @@ then fi set -e echo "undo changes" -pushd extroot +cd extroot egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' -popd +cd .. python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" diff --git a/integrationtests/test_data_model.py b/integrationtests/test_data_model.py index 6f530719a810d76e5cc5a2c59fcd2d0325ff5268..2949fa81727a6c61a8646a48c249204fa87542d8 100644 --- a/integrationtests/test_data_model.py +++ b/integrationtests/test_data_model.py @@ -33,13 +33,6 @@ class DataModelTest(unittest.TestCase): rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) assert rt.get_property("test") is not None - def tearDown(self): - try: - tests = db.execute_query("FIND test*") - tests.delete() - except Exception: - pass - def test_missing(self): # Test sync with missing prop # insert propt @@ -52,3 +45,19 @@ class DataModelTest(unittest.TestCase): dm.sync_data_model(noquestion=True) rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) assert rt.get_property("testproperty") is not None + + def test_get_existing_entities(self): + db.RecordType(name="TestRecord").insert() + c = db.Container().extend([ + db.Property(name="test"), + db.RecordType(name="TestRecord")]) + exist = DataModel.get_existing_entities(c) + assert len(exist) == 1 + assert exist[0].name == "TestRecord" + + def tearDown(self): + try: + tests = db.execute_query("FIND test*") + tests.delete() + except Exception: + pass diff --git a/integrationtests/test_im_und_export.py b/integrationtests/test_im_und_export.py index db26249b14d3d547db8dcea4e49de2aa07479e5b..27995080aa5cbeeb6f562226d4f0c0ca19c64d83 100644 --- a/integrationtests/test_im_und_export.py +++ b/integrationtests/test_im_und_export.py @@ -3,15 +3,14 @@ import os from tempfile import TemporaryDirectory import caosdb as db - -from caosadvancedtools.export_related import export +from caosadvancedtools.export_related import export_related_to from caosadvancedtools.import_from_xml import import_xml if __name__ == "__main__": print("Conducting im- and export tests") rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True) directory = TemporaryDirectory() - export(rec.id, directory=directory.name) + export_related_to(rec.id, directory=directory.name) # delete everything recs = db.execute_query("FIND entity with id>99") recs.delete() diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py index 543ac327871fb7f5c79d68e638af1a47b62d83f6..9defe77115db7687d3a6c5f27bf7f3d268e605fc 100644 --- a/src/caosadvancedtools/cfoods/h5.py +++ b/src/caosadvancedtools/cfoods/h5.py @@ -6,6 +6,8 @@ # Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> # Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> # Copyright (C) 2021 Alexander Kreft +# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity, +# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -85,13 +87,13 @@ def h5_attr_to_property(val): if isinstance(val, np.ndarray): if val.ndim > 1: return None, None - - if val.ndim == 0: - raise NotImplementedError( - "Code assumes that scalar values " - "will not be given as np.ndarray objects") - val = list(val) + # The tolist method is on both numpy.ndarray and numpy.generic + # and properly converts scalars (including 0-dimensional + # numpy.ndarray) to Python scalars and 1D arrays to lists of + # Python scalars. + if val.ndim != 0: dtype = db.LIST(dtype) + val = val.tolist() # TODO this can eventually be removed diff --git a/src/caosadvancedtools/collect_datamodel.py b/src/caosadvancedtools/collect_datamodel.py index 1ca68068e713dd34ebc3368ad760461578dee4ef..806d15333cac7f745ce2fb82a02e0214ad2b6616 100644 --- a/src/caosadvancedtools/collect_datamodel.py +++ b/src/caosadvancedtools/collect_datamodel.py @@ -26,14 +26,19 @@ import argparse import os import caosdb as db +from caosdb.apiutils import retrieve_entities_with_ids + +from export_related import export def get_dm(): - rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")]) + rts = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM RECORDTYPE")]) if None in rts: rts.remove(None) - ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")]) + ps = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM PROPERTY")]) if None in ps: ps.remove(None) @@ -47,18 +52,26 @@ def get_parser(): "be stored") p.add_argument("-c", "--compare", help="directory where the datamodel that" " shall be compared is stored") + p.add_argument("-x", "--xml", action="store_true", + help="store xml as well") return p -def store(directory): +def store(directory, xml=False): rts, ps = get_dm() os.makedirs(directory, exist_ok=True) with open(os.path.join(directory, "recordtypes.txt"), "w") as fi: - fi.write(",".join(rts)) + fi.write(",".join([el[1] for el in rts])) with open(os.path.join(directory, "properties.txt"), "w") as fi: - fi.write(",".join(ps)) + fi.write(",".join([el[1] for el in ps])) + + if xml: + cont = retrieve_entities_with_ids( + [el[0] for el in rts]+[el[0] for el in ps]) + + export(cont, directory) def load_dm(directory): @@ -104,7 +117,7 @@ if __name__ == "__main__": args = p.parse_args() if args.store: - store(args.store) + store(args.store, xml=args.xml) if args.compare: compare(args.compare) diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 33a732e89b179b9580b914e4c640baec3d307ce8..5a8d428655791169557f5c292d30698f6ad69798 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard +from .serverside.helper import send_mail as main_send_mail from .suppressKnown import SuppressKnown logger = logging.getLogger(__name__) @@ -500,7 +501,6 @@ carefully and if the changes are ok, click on the following link: """.format(url=caosdb_config["Connection"]["url"], filename=filename, changes="\n".join(changes)) - sendmail = caosdb_config["Misc"]["sendmail"] try: fro = caosdb_config["advancedtools"]["crawler.from_mail"] to = caosdb_config["advancedtools"]["crawler.to_mail"] @@ -510,8 +510,11 @@ carefully and if the changes are ok, click on the following link: "'from_mail' and 'to_mail'.") return - p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE) - p.communicate(input=text.encode()) + main_send_mail( + from_addr=fro, + to=to, + subject="Crawler Update", + body=text) def push_identifiables_to_CaosDB(self, cfood): """ diff --git a/src/caosadvancedtools/export_related.py b/src/caosadvancedtools/export_related.py index 00f440d28a2ae1da14132083e4b8d3c5003d1b65..69b588c34cc7c8123ab4291f6d8f76f06e7400be 100755 --- a/src/caosadvancedtools/export_related.py +++ b/src/caosadvancedtools/export_related.py @@ -96,12 +96,15 @@ def invert_ids(entities): apply_to_ids(entities, lambda x: x*-1) -def export(rec_id, directory="."): +def export_related_to(rec_id, directory="."): if not isinstance(rec_id, int): raise ValueError("rec_id needs to be an integer") ent = db.execute_query("FIND {}".format(rec_id), unique=True) cont = recursively_collect_related(ent) + export(cont, directory=directory) + +def export(cont, directory="."): directory = os.path.abspath(directory) dl_dir = os.path.join(directory, "downloads") @@ -119,6 +122,9 @@ def export(rec_id, directory="."): print("Failed download of:", target) invert_ids(cont) + + for el in cont: + el.version = None xml = etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) @@ -147,4 +153,4 @@ if __name__ == "__main__": parser = defineParser() args = parser.parse_args() - export(args.id, directory=args.directory) + export_related_to(args.id, directory=args.directory) diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index a81855f2e10b5c267dc0915111547bcf2af766fa..1eda090757336506436621f9f430cf852a8a2348 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -62,6 +62,8 @@ def _get_listdatatype(dtype): # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by # https://stackoverflow.com/users/2572431/augurar + + class SafeLineLoader(yaml.SafeLoader): """Load a line and keep meta-information. @@ -347,19 +349,22 @@ class Parser(object): self.model[name].description = prop elif prop_name == "recommended_properties": - self._add_to_recordtype(name, prop, importance=db.RECOMMENDED) + self._add_to_recordtype( + name, prop, importance=db.RECOMMENDED) for n, e in prop.items(): self._treat_entity(n, e) elif prop_name == "obligatory_properties": - self._add_to_recordtype(name, prop, importance=db.OBLIGATORY) + self._add_to_recordtype( + name, prop, importance=db.OBLIGATORY) for n, e in prop.items(): self._treat_entity(n, e) elif prop_name == "suggested_properties": - self._add_to_recordtype(name, prop, importance=db.SUGGESTED) + self._add_to_recordtype( + name, prop, importance=db.SUGGESTED) for n, e in prop.items(): self._treat_entity(n, e) @@ -392,6 +397,7 @@ class Parser(object): name that was defined in the model (or extern part) the string representations are replaced with caosdb objects + """ for key, value in self.model.items(): @@ -427,7 +433,7 @@ class Parser(object): continue - raise ValueError("Unknown Datatype.") + raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ diff --git a/src/doc/index.rst b/src/doc/index.rst index 309624d5f6fe083cbcbf0287735d88f10e90dfba..9aa045349ab05d3f5130a7f33b38c7eca0c4f32e 100644 --- a/src/doc/index.rst +++ b/src/doc/index.rst @@ -13,7 +13,9 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th Getting started <README_SETUP> Concepts <concepts> - CaosDB Crawler <crawler> + tutorials + Caosdb-Crawler <crawler> + YAML Interface <yaml_interface> _apidoc/modules diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..06248f2b5c17f40b6f15f5f55664c5a4a5530a86 --- /dev/null +++ b/src/doc/yaml_interface.rst @@ -0,0 +1,117 @@ +YAML-Interface +-------------- + +The yaml interface is a module in caosdb-pylib that can be used to create and update +CaosDB models using a simplified definition in YAML format. + +Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml. + +.. code-block:: yaml + + Project: + obligatory_properties: + projectId: + datatype: INTEGER + description: 'UID of this project' + Person: + recommended_properties: + firstName: + datatype: TEXT + description: 'first name' + lastName: + datatype: TEXT + description: 'last name' + LabbookEntry: + recommended_properties: + Project: + entryId: + datatype: INTEGER + description: 'UID of this entry' + responsible: + datatype: Person + description: 'the person responsible for these notes' + textElement: + datatype: TEXT + description: 'a text element of a labbook recording' + associatedFile: + datatype: FILE + description: 'A file associated with this recording' + table: + datatype: FILE + description: 'A table document associated with this recording' + + +This example defines 3 ``RecordType``s: + +- A ``Project`` with one obligatory property ``datatype`` +- A Person with a ``firstName`` and a ``lastName`` (as recommended properties) +- A ``LabbookEntry`` with multiple recommended properties of different data types + +One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB: + +- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB. +- The new property is added as a recommended property to record type ``Person``. + +Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``. + +Note the difference between the three property declarations of ``LabbookEntry``: + +- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``. +- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above. +- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``. + +Datatypes +--------- + +You can use any data type understood by CaosDB as datatype attribute in the yaml model. + +List attributes are a bit special: + +.. code-block:: yaml + + datatype: LIST<DOUBLE> + +would declare a list datatype of DOUBLE elements. + +.. code-block:: yaml + + datatype: LIST<Project> + +would declare a list of elements with datatype Project. + + +Keywords +-------- + +- **parent**: Parent of this entity. +- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested" +- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project. +- **unit**: The unit of the property, e.g. "m/s". +- **description**: A description for this entity. +- **recommended_properties**: Add properties to this entity with importance "recommended". +- **obligatory_properties**: Add properties to this entity with importance "obligatory". +- **suggested_properties**: Add properties to this entity with importance "suggested". +- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent. +- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent. +- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent. + +Usage +----- + +You can use the yaml parser directly in python as follows: + + +.. code-block:: python + + from caosadvancedtools.models import parser as parser + model = parser.parse_model_from_yaml("model.yml") + + +This creates a DataModel object containing all entities defined in the yaml file. + +You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize +the model with a CaosDB instance, e.g.: + +.. code-block:: python + + model.sync_data_model() diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 074239399002833e8500af6369f1b2c7bcc8a3ac..159adfca1d589bb092b6f59110828b5868401e25 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -1,19 +1,11 @@ import unittest import caosdb as db -import pytest from caosadvancedtools.models.data_model import DataModel class DataModelTest(unittest.TestCase): - def tearDown(self): - try: - tests = db.execute_query("FIND test*") - tests.delete() - except Exception: - pass - def test_collecting(self): maintained = {"one": db.RecordType(name="TestRecord").add_property( name="testproperty"), @@ -24,17 +16,6 @@ class DataModelTest(unittest.TestCase): assert "TestRecord" in names assert "testproperty" in names - # TODO this seems to require integration test - @pytest.mark.xfail - def test_get_existing_entities(self): - db.RecordType(name="TestRecord").insert() - c = db.Container().extend([ - db.Property(name="testproperty"), - db.RecordType(name="TestRecord")]) - exist = DataModel.get_existing_entities(c) - assert len(exist) == 1 - assert exist[0].name == "TestRecord" - def test_sync_ids_by_name(self): container = db.Container().extend([db.RecordType(name="TestRecord"), db.RecordType(name="TestRecord2"), diff --git a/unittests/test_h5.py b/unittests/test_h5.py index 12b04844e173ac2f778b34daafcd876fdf527a49..e5ae94686fe4542f6833e21e9a80f01e4257538d 100644 --- a/unittests/test_h5.py +++ b/unittests/test_h5.py @@ -181,4 +181,7 @@ class H5CFoodTest(unittest.TestCase): self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d)) self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d)) - self.assertRaises(NotImplementedError, h5_attr_to_property, np.array(1)) + # Test scalar values given as np.array + self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1))) + self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123))) + self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World"))) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 51b4803d4db00f1b04fdfc4b78792e6a9de61bb8..60ca940b0471eaddf6e8c47b1cacbbfe90d22cac 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -64,7 +64,6 @@ class ConverterTest(unittest.TestCase): r"\this\computer,\this\computer"), ["/this/computer", "/this/computer"]) - @pytest.mark.xfail def test_datetime(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") self.importer = XLSImporter(converters={'d': datetime_converter, @@ -165,7 +164,7 @@ class XLSImporterTest(unittest.TestCase): df_new = self.importer.check_unique(df) self.assertEqual(df_new.shape[0], 1) - @pytest.mark.xfail + @pytest.mark.xfail(reason="Wrong Error is raised. See Issue #52") def test_raise(self): tmp = NamedTemporaryFile(delete=False, suffix=".lol") tmp.close()