Skip to content
Snippets Groups Projects
Commit 211cea7c authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'dev' into f-fix-list

parents 28fbcf8b 6f28bffe
No related branches found
No related tags found
2 merge requests!22Release 0.3,!14F fix list
Pipeline #14228 passed with warnings
......@@ -39,9 +39,11 @@ Optional h5-crawler:
extroot. E.g. `sudo mount -o bind extroot
../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path
the extroot of the empty profile to be used is located at).
3. Start an empty (!) CaosDB instance (with the mounted extroot). The
database will be cleared during testing, so it's important to use
3. Start (or restart) an empty (!) CaosDB instance (with the mounted extroot).
The database will be cleared during testing, so it's important to use
an empty instance.
Make sure your configuration for the python caosdb module is correct and
allows to connect to the server.
4. Run `test.sh`. Note that this may modify content of the `integrationtest/extroot/` directory.
## Code Formatting
......
......@@ -34,11 +34,11 @@ echo "Filling the database"
echo "Testing the crawler database"
python3 -m pytest test_crawler_with_cfoods.py
echo "make a change"
pushd extroot
cd extroot
egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
# remove a file to check that this does not lead to a crawler crash
mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back
popd
cd ..
echo "run crawler"
./crawl.py / | tee $OUT
# rename the moved file
......@@ -58,9 +58,9 @@ then
fi
set -e
echo "undo changes"
pushd extroot
cd extroot
egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g'
popd
cd ..
python3 test_table.py
# TODO the following test deletes lots of the data inserted by the crawler
echo "Testing im and export"
......
......@@ -33,13 +33,6 @@ class DataModelTest(unittest.TestCase):
rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
assert rt.get_property("test") is not None
def tearDown(self):
try:
tests = db.execute_query("FIND test*")
tests.delete()
except Exception:
pass
def test_missing(self):
# Test sync with missing prop
# insert propt
......@@ -52,3 +45,19 @@ class DataModelTest(unittest.TestCase):
dm.sync_data_model(noquestion=True)
rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
assert rt.get_property("testproperty") is not None
def test_get_existing_entities(self):
db.RecordType(name="TestRecord").insert()
c = db.Container().extend([
db.Property(name="test"),
db.RecordType(name="TestRecord")])
exist = DataModel.get_existing_entities(c)
assert len(exist) == 1
assert exist[0].name == "TestRecord"
def tearDown(self):
try:
tests = db.execute_query("FIND test*")
tests.delete()
except Exception:
pass
......@@ -3,15 +3,14 @@ import os
from tempfile import TemporaryDirectory
import caosdb as db
from caosadvancedtools.export_related import export
from caosadvancedtools.export_related import export_related_to
from caosadvancedtools.import_from_xml import import_xml
if __name__ == "__main__":
print("Conducting im- and export tests")
rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True)
directory = TemporaryDirectory()
export(rec.id, directory=directory.name)
export_related_to(rec.id, directory=directory.name)
# delete everything
recs = db.execute_query("FIND entity with id>99")
recs.delete()
......
......@@ -6,6 +6,8 @@
# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com>
# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
# Copyright (C) 2021 Alexander Kreft
# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity,
# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
......@@ -85,13 +87,13 @@ def h5_attr_to_property(val):
if isinstance(val, np.ndarray):
if val.ndim > 1:
return None, None
if val.ndim == 0:
raise NotImplementedError(
"Code assumes that scalar values "
"will not be given as np.ndarray objects")
val = list(val)
# The tolist method is on both numpy.ndarray and numpy.generic
# and properly converts scalars (including 0-dimensional
# numpy.ndarray) to Python scalars and 1D arrays to lists of
# Python scalars.
if val.ndim != 0:
dtype = db.LIST(dtype)
val = val.tolist()
# TODO this can eventually be removed
......
......@@ -26,14 +26,19 @@ import argparse
import os
import caosdb as db
from caosdb.apiutils import retrieve_entities_with_ids
from export_related import export
def get_dm():
rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")])
rts = set([(r.id, r.name) for r
in db.execute_query("SELECT name FROM RECORDTYPE")])
if None in rts:
rts.remove(None)
ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")])
ps = set([(r.id, r.name) for r
in db.execute_query("SELECT name FROM PROPERTY")])
if None in ps:
ps.remove(None)
......@@ -47,18 +52,26 @@ def get_parser():
"be stored")
p.add_argument("-c", "--compare", help="directory where the datamodel that"
" shall be compared is stored")
p.add_argument("-x", "--xml", action="store_true",
help="store xml as well")
return p
def store(directory):
def store(directory, xml=False):
rts, ps = get_dm()
os.makedirs(directory, exist_ok=True)
with open(os.path.join(directory, "recordtypes.txt"), "w") as fi:
fi.write(",".join(rts))
fi.write(",".join([el[1] for el in rts]))
with open(os.path.join(directory, "properties.txt"), "w") as fi:
fi.write(",".join(ps))
fi.write(",".join([el[1] for el in ps]))
if xml:
cont = retrieve_entities_with_ids(
[el[0] for el in rts]+[el[0] for el in ps])
export(cont, directory)
def load_dm(directory):
......@@ -104,7 +117,7 @@ if __name__ == "__main__":
args = p.parse_args()
if args.store:
store(args.store)
store(args.store, xml=args.xml)
if args.compare:
compare(args.compare)
......@@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError
from .datamodel_problems import DataModelProblems
from .guard import RETRIEVE, ProhibitedException
from .guard import global_guard as guard
from .serverside.helper import send_mail as main_send_mail
from .suppressKnown import SuppressKnown
logger = logging.getLogger(__name__)
......@@ -500,7 +501,6 @@ carefully and if the changes are ok, click on the following link:
""".format(url=caosdb_config["Connection"]["url"],
filename=filename,
changes="\n".join(changes))
sendmail = caosdb_config["Misc"]["sendmail"]
try:
fro = caosdb_config["advancedtools"]["crawler.from_mail"]
to = caosdb_config["advancedtools"]["crawler.to_mail"]
......@@ -510,8 +510,11 @@ carefully and if the changes are ok, click on the following link:
"'from_mail' and 'to_mail'.")
return
p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE)
p.communicate(input=text.encode())
main_send_mail(
from_addr=fro,
to=to,
subject="Crawler Update",
body=text)
def push_identifiables_to_CaosDB(self, cfood):
"""
......
......@@ -96,12 +96,15 @@ def invert_ids(entities):
apply_to_ids(entities, lambda x: x*-1)
def export(rec_id, directory="."):
def export_related_to(rec_id, directory="."):
if not isinstance(rec_id, int):
raise ValueError("rec_id needs to be an integer")
ent = db.execute_query("FIND {}".format(rec_id), unique=True)
cont = recursively_collect_related(ent)
export(cont, directory=directory)
def export(cont, directory="."):
directory = os.path.abspath(directory)
dl_dir = os.path.join(directory, "downloads")
......@@ -119,6 +122,9 @@ def export(rec_id, directory="."):
print("Failed download of:", target)
invert_ids(cont)
for el in cont:
el.version = None
xml = etree.tounicode(cont.to_xml(
local_serialization=True), pretty_print=True)
......@@ -147,4 +153,4 @@ if __name__ == "__main__":
parser = defineParser()
args = parser.parse_args()
export(args.id, directory=args.directory)
export_related_to(args.id, directory=args.directory)
......@@ -62,6 +62,8 @@ def _get_listdatatype(dtype):
# Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by
# https://stackoverflow.com/users/2572431/augurar
class SafeLineLoader(yaml.SafeLoader):
"""Load a line and keep meta-information.
......@@ -347,19 +349,22 @@ class Parser(object):
self.model[name].description = prop
elif prop_name == "recommended_properties":
self._add_to_recordtype(name, prop, importance=db.RECOMMENDED)
self._add_to_recordtype(
name, prop, importance=db.RECOMMENDED)
for n, e in prop.items():
self._treat_entity(n, e)
elif prop_name == "obligatory_properties":
self._add_to_recordtype(name, prop, importance=db.OBLIGATORY)
self._add_to_recordtype(
name, prop, importance=db.OBLIGATORY)
for n, e in prop.items():
self._treat_entity(n, e)
elif prop_name == "suggested_properties":
self._add_to_recordtype(name, prop, importance=db.SUGGESTED)
self._add_to_recordtype(
name, prop, importance=db.SUGGESTED)
for n, e in prop.items():
self._treat_entity(n, e)
......@@ -392,6 +397,7 @@ class Parser(object):
name that was defined in the model (or extern part)
the string representations are replaced with caosdb objects
"""
for key, value in self.model.items():
......@@ -427,7 +433,7 @@ class Parser(object):
continue
raise ValueError("Unknown Datatype.")
raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype))
def _set_recordtypes(self):
""" properties are defined in first iteration; set remaining as RTs """
......
......@@ -13,7 +13,9 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
Getting started <README_SETUP>
Concepts <concepts>
CaosDB Crawler <crawler>
tutorials
Caosdb-Crawler <crawler>
YAML Interface <yaml_interface>
_apidoc/modules
......
YAML-Interface
--------------
The yaml interface is a module in caosdb-pylib that can be used to create and update
CaosDB models using a simplified definition in YAML format.
Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml.
.. code-block:: yaml
Project:
obligatory_properties:
projectId:
datatype: INTEGER
description: 'UID of this project'
Person:
recommended_properties:
firstName:
datatype: TEXT
description: 'first name'
lastName:
datatype: TEXT
description: 'last name'
LabbookEntry:
recommended_properties:
Project:
entryId:
datatype: INTEGER
description: 'UID of this entry'
responsible:
datatype: Person
description: 'the person responsible for these notes'
textElement:
datatype: TEXT
description: 'a text element of a labbook recording'
associatedFile:
datatype: FILE
description: 'A file associated with this recording'
table:
datatype: FILE
description: 'A table document associated with this recording'
This example defines 3 ``RecordType``s:
- A ``Project`` with one obligatory property ``datatype``
- A Person with a ``firstName`` and a ``lastName`` (as recommended properties)
- A ``LabbookEntry`` with multiple recommended properties of different data types
One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB:
- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB.
- The new property is added as a recommended property to record type ``Person``.
Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``.
Note the difference between the three property declarations of ``LabbookEntry``:
- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``.
- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above.
- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``.
Datatypes
---------
You can use any data type understood by CaosDB as datatype attribute in the yaml model.
List attributes are a bit special:
.. code-block:: yaml
datatype: LIST<DOUBLE>
would declare a list datatype of DOUBLE elements.
.. code-block:: yaml
datatype: LIST<Project>
would declare a list of elements with datatype Project.
Keywords
--------
- **parent**: Parent of this entity.
- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested"
- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project.
- **unit**: The unit of the property, e.g. "m/s".
- **description**: A description for this entity.
- **recommended_properties**: Add properties to this entity with importance "recommended".
- **obligatory_properties**: Add properties to this entity with importance "obligatory".
- **suggested_properties**: Add properties to this entity with importance "suggested".
- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent.
- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent.
- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent.
Usage
-----
You can use the yaml parser directly in python as follows:
.. code-block:: python
from caosadvancedtools.models import parser as parser
model = parser.parse_model_from_yaml("model.yml")
This creates a DataModel object containing all entities defined in the yaml file.
You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize
the model with a CaosDB instance, e.g.:
.. code-block:: python
model.sync_data_model()
import unittest
import caosdb as db
import pytest
from caosadvancedtools.models.data_model import DataModel
class DataModelTest(unittest.TestCase):
def tearDown(self):
try:
tests = db.execute_query("FIND test*")
tests.delete()
except Exception:
pass
def test_collecting(self):
maintained = {"one": db.RecordType(name="TestRecord").add_property(
name="testproperty"),
......@@ -24,17 +16,6 @@ class DataModelTest(unittest.TestCase):
assert "TestRecord" in names
assert "testproperty" in names
# TODO this seems to require integration test
@pytest.mark.xfail
def test_get_existing_entities(self):
db.RecordType(name="TestRecord").insert()
c = db.Container().extend([
db.Property(name="testproperty"),
db.RecordType(name="TestRecord")])
exist = DataModel.get_existing_entities(c)
assert len(exist) == 1
assert exist[0].name == "TestRecord"
def test_sync_ids_by_name(self):
container = db.Container().extend([db.RecordType(name="TestRecord"),
db.RecordType(name="TestRecord2"),
......
......@@ -181,4 +181,7 @@ class H5CFoodTest(unittest.TestCase):
self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d))
self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d))
self.assertRaises(NotImplementedError, h5_attr_to_property, np.array(1))
# Test scalar values given as np.array
self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1)))
self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123)))
self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World")))
......@@ -64,7 +64,6 @@ class ConverterTest(unittest.TestCase):
r"\this\computer,\this\computer"),
["/this/computer", "/this/computer"])
@pytest.mark.xfail
def test_datetime(self):
test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
self.importer = XLSImporter(converters={'d': datetime_converter,
......@@ -165,7 +164,7 @@ class XLSImporterTest(unittest.TestCase):
df_new = self.importer.check_unique(df)
self.assertEqual(df_new.shape[0], 1)
@pytest.mark.xfail
@pytest.mark.xfail(reason="Wrong Error is raised. See Issue #52")
def test_raise(self):
tmp = NamedTemporaryFile(delete=False, suffix=".lol")
tmp.close()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment