Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • caosdb/src/caosdb-advanced-user-tools
1 result
Show changes
Commits on Source (7)
......@@ -29,6 +29,6 @@ RUN pip3 install recommonmark sphinx-rtd-theme
COPY . /git
RUN rm -r /git/.git \
&& mv /git/.docker/pycaosdb.ini /git/integrationtests
RUN cd /git && pip3 install .
RUN cd /git && pip3 install .[h5-crawler]
WORKDIR /git/integrationtests
CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh
CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force
......@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Crawler documentation
- Proof-of-concept integration with Bloxberg.
- Introduce a cfood that can create a Record structure based on the contents of a hdf5 file
h5py is now an optional dependency
### Changed ###
......
......@@ -12,6 +12,11 @@ Dependencies will be installed automatically if you use the below described proc
- `caosdb>=0.4.0`
- `openpyxl>=3.0.0`
- `xlrd>=1.2.0`
- `pandas>=1.2.0`
- `numpy>=1.17.3`
If you want to use the optional h5-crawler the following dependencies will be installed additionally:
- `h5py>=3.3.0`
For testing:
- `tox`
......@@ -21,6 +26,9 @@ For testing:
- `pip install . --user`
- `pip install tox --user`
Optional h5-crawler:
- `pip install .[h5-crawler] --user`
## Run Unit Tests
`tox`
......
......@@ -34,7 +34,9 @@ from caosadvancedtools.scifolder.generic_pattern import readme_pattern
class ExampleH5CFood(H5CFood):
root_name = "ExampleH5"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.root_name = "ExampleH5"
@staticmethod
def get_re():
......
......@@ -15,8 +15,8 @@ if len(db.execute_query("FIND Property alias")) == 0:
h5model = db.Container()
h5file = h5py.File('extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r')
H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model)
h5model[0].name = "ExampleH5"
H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model,
root_name="ExampleH5")
print(h5model)
h5model = DataModel(h5model)
h5model.sync_data_model(noquestion=True)
#!/bin/bash
if [ "$1" != "--force" ]
then
echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)"
read safety
if [ -z $safety ]
then
echo "Exiting..."
exit 0
elif [ $safety != "yes" ]
then
echo "Exiting..."
exit 0
fi
fi
OUT=/tmp/crawler.output
ls
cat pycaosdb.ini
......
......@@ -159,8 +159,9 @@ def setup_package():
"pandas>=1.2.0",
"numpy>=1.17.3",
"xlrd>=2.0",
"h5py",
],
extras_require={"h5-crawler": ["h5py>=3.3.0", ],
},
packages=find_packages('src'),
package_dir={'': 'src'},
setup_requires=["pytest-runner>=2.0,<3dev"],
......
......@@ -152,9 +152,19 @@ fileguide = FileGuide()
class AbstractCFood(object, metaclass=ABCMeta):
""" Abstract base class for Crawler food (CFood)."""
def __init__(self, item):
""" Abstract base class for Crawler food (CFood)."""
"""A CFood has two main methods which must be customized:
1. `create_identifiables`
This method defines (and inserts if necessary) the identifiables which may be updated at a
later stage. After calling this method, the `identifiables` Container contains those
Records which will be updated at a later time.
2. `update_identifiables`
This method updates the stored identifiables as necessary.
"""
self.to_be_updated = db.Container()
self.identifiables = db.Container()
self.item = item
......
......@@ -112,20 +112,17 @@ class H5CFood(AbstractFileCFood):
Attributes
----------
root_name : str, default "root"
Type of the root Record (the Record corresponding to the root node in
the HDF5 file)
h5file : h5py.File, default None
Name of the hdf5-file to read
"""
# to be overwritten by subclasses
root_name = "root"
def __init__(self, *args, **kwargs):
"""CFood which consumes HDF5 files."""
super().__init__(*args, **kwargs)
self.h5file = None
self.root_name = "root"
self.hdf5Container = db.Container()
self.em = EntityMapping()
......@@ -152,7 +149,8 @@ class H5CFood(AbstractFileCFood):
"""
# manually create the identifiable root element: self.identifiable_root
self.structure = self.create_structure(self.h5file,
special_treatment=self.special_treatment)
special_treatment=self.special_treatment,
root_name=self.root_name)
def update_identifiables(self):
"""Check if the identifiables need to be updated.
......@@ -198,7 +196,7 @@ class H5CFood(AbstractFileCFood):
@classmethod
def create_structure(cls, h5obj, create_recordTypes=False, collection=None,
special_treatment=None):
special_treatment=None, root_name="root"):
"""Create Records and Record types from a given hdf5-object for all
items in the tree. Attributes are added as properties, the
values only if the dimension < 2.
......@@ -206,7 +204,11 @@ class H5CFood(AbstractFileCFood):
Parameters
----------
h5obj : h5py.File
a hdf5-file object
a hdf5-file object
root_name : name that is used instead of '/'
Type of the root Record (the Record corresponding to
the root node in the HDF5 file)
Returns
-------
......@@ -223,7 +225,7 @@ class H5CFood(AbstractFileCFood):
def special_treatment(x, y, z): return x, y, z
if h5obj.name == "/":
name_without_path = cls.root_name
name_without_path = root_name
else:
name_without_path = h5obj.name.split("/")[-1]
......
../../README_SETUP.md
\ No newline at end of file
......@@ -71,7 +71,7 @@ indicated in the messages).
Invocation as Python Script
---------------------------
The crawler can be executed directly via a python script (usually called
The crawler can be executed directly via a Python script (usually called
``crawl.py``). The script prints the progress and reports potential
problems. The exact behavior depends on your setup. However, you can
have a look at the example in the
......@@ -84,7 +84,7 @@ have a look at the example in the
Call ``python3 crawl.py --help`` to see what parameters can be provided.
Typically, an invocation looks like:
.. code:: python
.. code:: sh
python3 crawl.py /someplace/
......
......@@ -13,8 +13,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
Getting started <README_SETUP>
Concepts <concepts>
tutorials
Caosdb-Crawler <crawler>
CaosDB Crawler <crawler>
_apidoc/modules
......
......@@ -9,4 +9,5 @@ deps=nose
pytest-cov
openpyxl
xlrd == 1.2
h5py
commands=py.test --cov=caosadvancedtools -vv {posargs}