Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • caosdb/src/caosdb-advanced-user-tools
1 result
Show changes
Commits on Source (7)
...@@ -29,6 +29,6 @@ RUN pip3 install recommonmark sphinx-rtd-theme ...@@ -29,6 +29,6 @@ RUN pip3 install recommonmark sphinx-rtd-theme
COPY . /git COPY . /git
RUN rm -r /git/.git \ RUN rm -r /git/.git \
&& mv /git/.docker/pycaosdb.ini /git/integrationtests && mv /git/.docker/pycaosdb.ini /git/integrationtests
RUN cd /git && pip3 install . RUN cd /git && pip3 install .[h5-crawler]
WORKDIR /git/integrationtests WORKDIR /git/integrationtests
CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force
...@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Crawler documentation - Crawler documentation
- Proof-of-concept integration with Bloxberg. - Proof-of-concept integration with Bloxberg.
- Introduce a cfood that can create a Record structure based on the contents of a hdf5 file - Introduce a cfood that can create a Record structure based on the contents of a hdf5 file
h5py is now an optional dependency
### Changed ### ### Changed ###
......
...@@ -12,6 +12,11 @@ Dependencies will be installed automatically if you use the below described proc ...@@ -12,6 +12,11 @@ Dependencies will be installed automatically if you use the below described proc
- `caosdb>=0.4.0` - `caosdb>=0.4.0`
- `openpyxl>=3.0.0` - `openpyxl>=3.0.0`
- `xlrd>=1.2.0` - `xlrd>=1.2.0`
- `pandas>=1.2.0`
- `numpy>=1.17.3`
If you want to use the optional h5-crawler the following dependencies will be installed additionally:
- `h5py>=3.3.0`
For testing: For testing:
- `tox` - `tox`
...@@ -21,6 +26,9 @@ For testing: ...@@ -21,6 +26,9 @@ For testing:
- `pip install . --user` - `pip install . --user`
- `pip install tox --user` - `pip install tox --user`
Optional h5-crawler:
- `pip install .[h5-crawler] --user`
## Run Unit Tests ## Run Unit Tests
`tox` `tox`
......
...@@ -34,7 +34,9 @@ from caosadvancedtools.scifolder.generic_pattern import readme_pattern ...@@ -34,7 +34,9 @@ from caosadvancedtools.scifolder.generic_pattern import readme_pattern
class ExampleH5CFood(H5CFood): class ExampleH5CFood(H5CFood):
root_name = "ExampleH5" def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.root_name = "ExampleH5"
@staticmethod @staticmethod
def get_re(): def get_re():
......
...@@ -15,8 +15,8 @@ if len(db.execute_query("FIND Property alias")) == 0: ...@@ -15,8 +15,8 @@ if len(db.execute_query("FIND Property alias")) == 0:
h5model = db.Container() h5model = db.Container()
h5file = h5py.File('extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r') h5file = h5py.File('extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r')
H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model) H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model,
h5model[0].name = "ExampleH5" root_name="ExampleH5")
print(h5model) print(h5model)
h5model = DataModel(h5model) h5model = DataModel(h5model)
h5model.sync_data_model(noquestion=True) h5model.sync_data_model(noquestion=True)
#!/bin/bash #!/bin/bash
if [ "$1" != "--force" ]
then
echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)"
read safety
if [ -z $safety ]
then
echo "Exiting..."
exit 0
elif [ $safety != "yes" ]
then
echo "Exiting..."
exit 0
fi
fi
OUT=/tmp/crawler.output OUT=/tmp/crawler.output
ls ls
cat pycaosdb.ini cat pycaosdb.ini
......
...@@ -159,8 +159,9 @@ def setup_package(): ...@@ -159,8 +159,9 @@ def setup_package():
"pandas>=1.2.0", "pandas>=1.2.0",
"numpy>=1.17.3", "numpy>=1.17.3",
"xlrd>=2.0", "xlrd>=2.0",
"h5py",
], ],
extras_require={"h5-crawler": ["h5py>=3.3.0", ],
},
packages=find_packages('src'), packages=find_packages('src'),
package_dir={'': 'src'}, package_dir={'': 'src'},
setup_requires=["pytest-runner>=2.0,<3dev"], setup_requires=["pytest-runner>=2.0,<3dev"],
......
...@@ -152,9 +152,19 @@ fileguide = FileGuide() ...@@ -152,9 +152,19 @@ fileguide = FileGuide()
class AbstractCFood(object, metaclass=ABCMeta): class AbstractCFood(object, metaclass=ABCMeta):
""" Abstract base class for Crawler food (CFood)."""
def __init__(self, item): def __init__(self, item):
""" Abstract base class for Crawler food (CFood).""" """A CFood has two main methods which must be customized:
1. `create_identifiables`
This method defines (and inserts if necessary) the identifiables which may be updated at a
later stage. After calling this method, the `identifiables` Container contains those
Records which will be updated at a later time.
2. `update_identifiables`
This method updates the stored identifiables as necessary.
"""
self.to_be_updated = db.Container() self.to_be_updated = db.Container()
self.identifiables = db.Container() self.identifiables = db.Container()
self.item = item self.item = item
......
...@@ -112,20 +112,17 @@ class H5CFood(AbstractFileCFood): ...@@ -112,20 +112,17 @@ class H5CFood(AbstractFileCFood):
Attributes Attributes
---------- ----------
root_name : str, default "root"
Type of the root Record (the Record corresponding to the root node in
the HDF5 file)
h5file : h5py.File, default None h5file : h5py.File, default None
Name of the hdf5-file to read Name of the hdf5-file to read
""" """
# to be overwritten by subclasses # to be overwritten by subclasses
root_name = "root"
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
"""CFood which consumes HDF5 files.""" """CFood which consumes HDF5 files."""
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.h5file = None self.h5file = None
self.root_name = "root"
self.hdf5Container = db.Container() self.hdf5Container = db.Container()
self.em = EntityMapping() self.em = EntityMapping()
...@@ -152,7 +149,8 @@ class H5CFood(AbstractFileCFood): ...@@ -152,7 +149,8 @@ class H5CFood(AbstractFileCFood):
""" """
# manually create the identifiable root element: self.identifiable_root # manually create the identifiable root element: self.identifiable_root
self.structure = self.create_structure(self.h5file, self.structure = self.create_structure(self.h5file,
special_treatment=self.special_treatment) special_treatment=self.special_treatment,
root_name=self.root_name)
def update_identifiables(self): def update_identifiables(self):
"""Check if the identifiables need to be updated. """Check if the identifiables need to be updated.
...@@ -198,7 +196,7 @@ class H5CFood(AbstractFileCFood): ...@@ -198,7 +196,7 @@ class H5CFood(AbstractFileCFood):
@classmethod @classmethod
def create_structure(cls, h5obj, create_recordTypes=False, collection=None, def create_structure(cls, h5obj, create_recordTypes=False, collection=None,
special_treatment=None): special_treatment=None, root_name="root"):
"""Create Records and Record types from a given hdf5-object for all """Create Records and Record types from a given hdf5-object for all
items in the tree. Attributes are added as properties, the items in the tree. Attributes are added as properties, the
values only if the dimension < 2. values only if the dimension < 2.
...@@ -206,7 +204,11 @@ class H5CFood(AbstractFileCFood): ...@@ -206,7 +204,11 @@ class H5CFood(AbstractFileCFood):
Parameters Parameters
---------- ----------
h5obj : h5py.File h5obj : h5py.File
a hdf5-file object a hdf5-file object
root_name : name that is used instead of '/'
Type of the root Record (the Record corresponding to
the root node in the HDF5 file)
Returns Returns
------- -------
...@@ -223,7 +225,7 @@ class H5CFood(AbstractFileCFood): ...@@ -223,7 +225,7 @@ class H5CFood(AbstractFileCFood):
def special_treatment(x, y, z): return x, y, z def special_treatment(x, y, z): return x, y, z
if h5obj.name == "/": if h5obj.name == "/":
name_without_path = cls.root_name name_without_path = root_name
else: else:
name_without_path = h5obj.name.split("/")[-1] name_without_path = h5obj.name.split("/")[-1]
......
../../README_SETUP.md
\ No newline at end of file
...@@ -71,7 +71,7 @@ indicated in the messages). ...@@ -71,7 +71,7 @@ indicated in the messages).
Invocation as Python Script Invocation as Python Script
--------------------------- ---------------------------
The crawler can be executed directly via a python script (usually called The crawler can be executed directly via a Python script (usually called
``crawl.py``). The script prints the progress and reports potential ``crawl.py``). The script prints the progress and reports potential
problems. The exact behavior depends on your setup. However, you can problems. The exact behavior depends on your setup. However, you can
have a look at the example in the have a look at the example in the
...@@ -84,7 +84,7 @@ have a look at the example in the ...@@ -84,7 +84,7 @@ have a look at the example in the
Call ``python3 crawl.py --help`` to see what parameters can be provided. Call ``python3 crawl.py --help`` to see what parameters can be provided.
Typically, an invocation looks like: Typically, an invocation looks like:
.. code:: python .. code:: sh
python3 crawl.py /someplace/ python3 crawl.py /someplace/
......
...@@ -13,8 +13,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th ...@@ -13,8 +13,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
Getting started <README_SETUP> Getting started <README_SETUP>
Concepts <concepts> Concepts <concepts>
tutorials CaosDB Crawler <crawler>
Caosdb-Crawler <crawler>
_apidoc/modules _apidoc/modules
......
...@@ -9,4 +9,5 @@ deps=nose ...@@ -9,4 +9,5 @@ deps=nose
pytest-cov pytest-cov
openpyxl openpyxl
xlrd == 1.2 xlrd == 1.2
h5py
commands=py.test --cov=caosadvancedtools -vv {posargs} commands=py.test --cov=caosadvancedtools -vv {posargs}