Merge branch 'dev' into f-fix-list

211cea7c · Henrik tom Wörden · 28fbcf8b · 6f28bffe · 211cea7c · 211cea7c
Commit 211cea7c authored Sep 28, 2021 by Henrik tom Wörden
--- a/README_SETUP.md
+++ b/README_SETUP.md
@@ -39,9 +39,11 @@ Optional h5-crawler:
   extroot. E.g. `sudo mount -o bind extroot
   ../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path
   the extroot of the empty profile to be used is located at).
-3. Start an empty (!) CaosDB instance (with the mounted extroot). The
-   database will be cleared during testing, so it's important to use
+3. Start (or restart) an empty (!) CaosDB instance (with the mounted extroot).
+   The database will be cleared during testing, so it's important to use
   an empty instance.
+   Make sure your configuration for the python caosdb module is correct and
+   allows to connect to the server.
 4. Run `test.sh`.  Note that this may modify content of the `integrationtest/extroot/` directory.

 ## Code Formatting

--- a/integrationtests/test.sh
+++ b/integrationtests/test.sh
@@ -34,11 +34,11 @@ echo "Filling the database"
 echo "Testing the crawler database"
 python3 -m pytest test_crawler_with_cfoods.py
 echo "make a change"
-pushd extroot
+cd extroot
 egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
 # remove a file to check that this does not lead to a crawler crash
 mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back
-popd
+cd ..
 echo "run crawler"
 ./crawl.py  / | tee $OUT
 # rename the moved file
@@ -58,9 +58,9 @@ then
 fi
 set -e
 echo "undo changes"
-pushd extroot
+cd extroot
 egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g'
-popd
+cd ..
 python3 test_table.py
 # TODO the following test deletes lots of the data inserted by the crawler
 echo "Testing im and export"

--- a/integrationtests/test_data_model.py
+++ b/integrationtests/test_data_model.py
@@ -33,13 +33,6 @@ class DataModelTest(unittest.TestCase):
        rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
        assert rt.get_property("test") is not None

-    def tearDown(self):
-        try:
-            tests = db.execute_query("FIND test*")
-            tests.delete()
-        except Exception:
-            pass
-
    def test_missing(self):
        # Test sync with missing prop
        # insert propt
@@ -52,3 +45,19 @@ class DataModelTest(unittest.TestCase):
        dm.sync_data_model(noquestion=True)
        rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
        assert rt.get_property("testproperty") is not None
+
+    def test_get_existing_entities(self):
+        db.RecordType(name="TestRecord").insert()
+        c = db.Container().extend([
+            db.Property(name="test"),
+            db.RecordType(name="TestRecord")])
+        exist = DataModel.get_existing_entities(c)
+        assert len(exist) == 1
+        assert exist[0].name == "TestRecord"
+
+    def tearDown(self):
+        try:
+            tests = db.execute_query("FIND test*")
+            tests.delete()
+        except Exception:
+            pass
--- a/integrationtests/test_im_und_export.py
+++ b/integrationtests/test_im_und_export.py
@@ -3,15 +3,14 @@ import os
 from tempfile import TemporaryDirectory

 import caosdb as db
-
-from caosadvancedtools.export_related import export
+from caosadvancedtools.export_related import export_related_to
 from caosadvancedtools.import_from_xml import import_xml

 if __name__ == "__main__":
    print("Conducting im- and export tests")
    rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True)
    directory = TemporaryDirectory()
-    export(rec.id, directory=directory.name)
+    export_related_to(rec.id, directory=directory.name)
    # delete everything
    recs = db.execute_query("FIND entity with id>99")
    recs.delete()

--- a/src/caosadvancedtools/cfoods/h5.py
+++ b/src/caosadvancedtools/cfoods/h5.py
@@ -6,6 +6,8 @@
 # Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com>
 # Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
 # Copyright (C) 2021 Alexander Kreft
+# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity, 
+# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -85,13 +87,13 @@ def h5_attr_to_property(val):
        if isinstance(val, np.ndarray):
            if val.ndim > 1:
                return None, None
-
-            if val.ndim == 0:
-                raise NotImplementedError(
-                    "Code assumes that scalar values "
-                    "will not be given as np.ndarray objects")
-            val = list(val)
+        # The tolist method is on both numpy.ndarray and numpy.generic
+        # and properly converts scalars (including 0-dimensional
+        # numpy.ndarray) to Python scalars and 1D arrays to lists of
+        # Python scalars.
+        if val.ndim != 0:
            dtype = db.LIST(dtype)
+        val = val.tolist()

        # TODO this can eventually be removed


--- a/src/caosadvancedtools/collect_datamodel.py
+++ b/src/caosadvancedtools/collect_datamodel.py
@@ -26,14 +26,19 @@ import argparse
 import os

 import caosdb as db
+from caosdb.apiutils import retrieve_entities_with_ids
+
+from export_related import export


 def get_dm():
-    rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")])
+    rts = set([(r.id, r.name) for r
+               in db.execute_query("SELECT name FROM RECORDTYPE")])

    if None in rts:
        rts.remove(None)
-    ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")])
+    ps = set([(r.id, r.name) for r
+              in db.execute_query("SELECT name FROM PROPERTY")])

    if None in ps:
        ps.remove(None)
@@ -47,18 +52,26 @@ def get_parser():
                   "be stored")
    p.add_argument("-c", "--compare", help="directory where the datamodel that"
                   " shall be compared is stored")
+    p.add_argument("-x", "--xml", action="store_true",
+                   help="store xml as well")

    return p


-def store(directory):
+def store(directory, xml=False):
    rts, ps = get_dm()

    os.makedirs(directory, exist_ok=True)
    with open(os.path.join(directory, "recordtypes.txt"), "w") as fi:
-        fi.write(",".join(rts))
+        fi.write(",".join([el[1] for el in rts]))
    with open(os.path.join(directory, "properties.txt"), "w") as fi:
-        fi.write(",".join(ps))
+        fi.write(",".join([el[1] for el in ps]))
+
+    if xml:
+        cont = retrieve_entities_with_ids(
+            [el[0] for el in rts]+[el[0] for el in ps])
+
+        export(cont, directory)


 def load_dm(directory):
@@ -104,7 +117,7 @@ if __name__ == "__main__":
    args = p.parse_args()

    if args.store:
-        store(args.store)
+        store(args.store, xml=args.xml)

    if args.compare:
        compare(args.compare)
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError
 from .datamodel_problems import DataModelProblems
 from .guard import RETRIEVE, ProhibitedException
 from .guard import global_guard as guard
+from .serverside.helper import send_mail as main_send_mail
 from .suppressKnown import SuppressKnown

 logger = logging.getLogger(__name__)
@@ -500,7 +501,6 @@ carefully and if the changes are ok, click on the following link:
        """.format(url=caosdb_config["Connection"]["url"],
                   filename=filename,
                   changes="\n".join(changes))
-        sendmail = caosdb_config["Misc"]["sendmail"]
        try:
            fro = caosdb_config["advancedtools"]["crawler.from_mail"]
            to = caosdb_config["advancedtools"]["crawler.to_mail"]
@@ -510,8 +510,11 @@ carefully and if the changes are ok, click on the following link:
                         "'from_mail' and 'to_mail'.")
            return

-        p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE)
-        p.communicate(input=text.encode())
+        main_send_mail(
+            from_addr=fro,
+            to=to,
+            subject="Crawler Update",
+            body=text)

    def push_identifiables_to_CaosDB(self, cfood):
        """

--- a/src/caosadvancedtools/export_related.py
+++ b/src/caosadvancedtools/export_related.py
@@ -96,12 +96,15 @@ def invert_ids(entities):
    apply_to_ids(entities, lambda x: x*-1)


-def export(rec_id, directory="."):
+def export_related_to(rec_id, directory="."):
    if not isinstance(rec_id, int):
        raise ValueError("rec_id needs to be an integer")
    ent = db.execute_query("FIND {}".format(rec_id), unique=True)
    cont = recursively_collect_related(ent)
+    export(cont, directory=directory)

+
+def export(cont, directory="."):
    directory = os.path.abspath(directory)
    dl_dir = os.path.join(directory, "downloads")

@@ -119,6 +122,9 @@ def export(rec_id, directory="."):
                print("Failed download of:", target)

    invert_ids(cont)
+
+    for el in cont:
+        el.version = None
    xml = etree.tounicode(cont.to_xml(
        local_serialization=True), pretty_print=True)

@@ -147,4 +153,4 @@ if __name__ == "__main__":
    parser = defineParser()
    args = parser.parse_args()

-    export(args.id, directory=args.directory)
+    export_related_to(args.id, directory=args.directory)
--- a/src/caosadvancedtools/models/parser.py
+++ b/src/caosadvancedtools/models/parser.py
@@ -62,6 +62,8 @@ def _get_listdatatype(dtype):

 # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by
 # https://stackoverflow.com/users/2572431/augurar
+
+
 class SafeLineLoader(yaml.SafeLoader):
    """Load a line and keep meta-information.

@@ -347,19 +349,22 @@ class Parser(object):
                    self.model[name].description = prop

                elif prop_name == "recommended_properties":
-                    self._add_to_recordtype(name, prop, importance=db.RECOMMENDED)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.RECOMMENDED)

                    for n, e in prop.items():
                        self._treat_entity(n, e)

                elif prop_name == "obligatory_properties":
-                    self._add_to_recordtype(name, prop, importance=db.OBLIGATORY)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.OBLIGATORY)

                    for n, e in prop.items():
                        self._treat_entity(n, e)

                elif prop_name == "suggested_properties":
-                    self._add_to_recordtype(name, prop, importance=db.SUGGESTED)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.SUGGESTED)

                    for n, e in prop.items():
                        self._treat_entity(n, e)
@@ -392,6 +397,7 @@ class Parser(object):
        name that was defined in the model (or extern part)

        the string representations are replaced with caosdb objects
+
        """

        for key, value in self.model.items():
@@ -427,7 +433,7 @@ class Parser(object):

                    continue

-                raise ValueError("Unknown Datatype.")
+                raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype))

    def _set_recordtypes(self):
        """ properties are defined in first iteration; set remaining as RTs """

--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -13,7 +13,9 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th

   Getting started <README_SETUP>
   Concepts <concepts>
-   CaosDB Crawler <crawler>
+   tutorials
+   Caosdb-Crawler <crawler>
+   YAML Interface <yaml_interface>
   _apidoc/modules



--- a/src/doc/yaml_interface.rst
+++ b/src/doc/yaml_interface.rst
+YAML-Interface
+--------------
+
+The yaml interface is a module in caosdb-pylib that can be used to create and update
+CaosDB models using a simplified definition in YAML format.
+
+Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml.
+
+.. code-block:: yaml
+
+    Project:
+       obligatory_properties:
+          projectId:
+             datatype: INTEGER
+             description: 'UID of this project'
+    Person:
+       recommended_properties:
+          firstName:
+             datatype: TEXT 
+             description: 'first name'
+          lastName:
+             datatype: TEXT 
+             description: 'last name'
+    LabbookEntry:
+       recommended_properties:
+          Project:
+          entryId:
+             datatype: INTEGER
+             description: 'UID of this entry'
+          responsible:
+             datatype: Person
+             description: 'the person responsible for these notes'
+          textElement:
+             datatype: TEXT
+             description: 'a text element of a labbook recording'
+          associatedFile:
+             datatype: FILE
+             description: 'A file associated with this recording'
+          table:
+             datatype: FILE
+             description: 'A table document associated with this recording'
+
+
+This example defines 3 ``RecordType``s:
+
+- A ``Project`` with one obligatory property ``datatype``
+- A Person with a ``firstName`` and a ``lastName`` (as recommended properties)
+- A ``LabbookEntry`` with multiple recommended properties of different data types
+
+One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB:
+
+- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB.
+- The new property is added as a recommended property to record type ``Person``.
+
+Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``.
+
+Note the difference between the three property declarations of ``LabbookEntry``:
+
+- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``.
+- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above.
+- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``.
+
+Datatypes
+---------
+
+You can use any data type understood by CaosDB as datatype attribute in the yaml model.
+
+List attributes are a bit special:
+
+.. code-block:: yaml
+
+  datatype: LIST<DOUBLE>
+
+would declare a list datatype of DOUBLE elements.
+
+.. code-block:: yaml
+
+  datatype: LIST<Project>
+
+would declare a list of elements with datatype Project.
+
+
+Keywords
+--------
+
+- **parent**: Parent of this entity.
+- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested"
+- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project.
+- **unit**: The unit of the property, e.g. "m/s".
+- **description**: A description for this entity.
+- **recommended_properties**: Add properties to this entity with importance "recommended".
+- **obligatory_properties**: Add properties to this entity with importance "obligatory".
+- **suggested_properties**: Add properties to this entity with importance "suggested".
+- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent.
+- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent.
+- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent.
+
+Usage
+-----
+
+You can use the yaml parser directly in python as follows:
+
+
+.. code-block:: python
+   
+  from caosadvancedtools.models import parser as parser
+  model = parser.parse_model_from_yaml("model.yml")
+
+
+This creates a DataModel object containing all entities defined in the yaml file.
+
+You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize
+the model with a CaosDB instance, e.g.:
+
+.. code-block:: python
+   
+  model.sync_data_model()
--- a/unittests/test_data_model.py
+++ b/unittests/test_data_model.py
 import unittest

 import caosdb as db
-import pytest
 from caosadvancedtools.models.data_model import DataModel


 class DataModelTest(unittest.TestCase):

-    def tearDown(self):
-        try:
-            tests = db.execute_query("FIND test*")
-            tests.delete()
-        except Exception:
-            pass
-
    def test_collecting(self):
        maintained = {"one": db.RecordType(name="TestRecord").add_property(
            name="testproperty"),
@@ -24,17 +16,6 @@ class DataModelTest(unittest.TestCase):
        assert "TestRecord" in names
        assert "testproperty" in names

-    # TODO this seems to require integration test
-    @pytest.mark.xfail
-    def test_get_existing_entities(self):
-        db.RecordType(name="TestRecord").insert()
-        c = db.Container().extend([
-            db.Property(name="testproperty"),
-            db.RecordType(name="TestRecord")])
-        exist = DataModel.get_existing_entities(c)
-        assert len(exist) == 1
-        assert exist[0].name == "TestRecord"
-
    def test_sync_ids_by_name(self):
        container = db.Container().extend([db.RecordType(name="TestRecord"),
                                           db.RecordType(name="TestRecord2"),

--- a/unittests/test_h5.py
+++ b/unittests/test_h5.py
@@ -181,4 +181,7 @@ class H5CFoodTest(unittest.TestCase):
        self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d))
        self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d))

-        self.assertRaises(NotImplementedError, h5_attr_to_property, np.array(1))
+        # Test scalar values given as np.array
+        self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1)))
+        self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123)))
+        self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World")))
--- a/unittests/test_table_importer.py
+++ b/unittests/test_table_importer.py
@@ -64,7 +64,6 @@ class ConverterTest(unittest.TestCase):
            r"\this\computer,\this\computer"),
                         ["/this/computer", "/this/computer"])

-    @pytest.mark.xfail
    def test_datetime(self):
        test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
        self.importer = XLSImporter(converters={'d': datetime_converter,
@@ -165,7 +164,7 @@ class XLSImporterTest(unittest.TestCase):
        df_new = self.importer.check_unique(df)
        self.assertEqual(df_new.shape[0], 1)

-    @pytest.mark.xfail
+    @pytest.mark.xfail(reason="Wrong Error is raised. See Issue #52")
    def test_raise(self):
        tmp = NamedTemporaryFile(delete=False, suffix=".lol")
        tmp.close()