diff --git a/.docker/Dockerfile b/.docker/Dockerfile index e7bc28acad38aaf299d7427117510e10f57a903f..43e5eff1171da8d69eb8897bea678bf90572570a 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -29,6 +29,6 @@ RUN pip3 install recommonmark sphinx-rtd-theme COPY . /git RUN rm -r /git/.git \ && mv /git/.docker/pycaosdb.ini /git/integrationtests -RUN cd /git && pip3 install . +RUN cd /git && pip3 install .[h5-crawler] WORKDIR /git/integrationtests -CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh +CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force diff --git a/CHANGELOG.md b/CHANGELOG.md index eda225cc2e280c7ca326fe563ef94c0122684eda..7844075f1d7cd745b96e004c7f72dfcf98b6e8a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Crawler documentation - Proof-of-concept integration with Bloxberg. - Introduce a cfood that can create a Record structure based on the contents of a hdf5 file + h5py is now an optional dependency ### Changed ### diff --git a/README_SETUP.md b/README_SETUP.md index 19f051636952945fe76b2ab752264031ac43378d..0ac69cf928ae0267e60a8a5ec576b5117236e24c 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -12,6 +12,11 @@ Dependencies will be installed automatically if you use the below described proc - `caosdb>=0.4.0` - `openpyxl>=3.0.0` - `xlrd>=1.2.0` +- `pandas>=1.2.0` +- `numpy>=1.17.3` + +If you want to use the optional h5-crawler the following dependencies will be installed additionally: +- `h5py>=3.3.0` For testing: - `tox` @@ -21,6 +26,9 @@ For testing: - `pip install . --user` - `pip install tox --user` +Optional h5-crawler: +- `pip install .[h5-crawler] --user` + ## Run Unit Tests `tox` diff --git a/integrationtests/example_hdf5cfood.py b/integrationtests/example_hdf5cfood.py index f369f852a294d8819720e74ad4f849082b108653..5485402d2042b2055a087b99abcba409095a7c70 100644 --- a/integrationtests/example_hdf5cfood.py +++ b/integrationtests/example_hdf5cfood.py @@ -34,7 +34,9 @@ from caosadvancedtools.scifolder.generic_pattern import readme_pattern class ExampleH5CFood(H5CFood): - root_name = "ExampleH5" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.root_name = "ExampleH5" @staticmethod def get_re(): diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py index f57ea440d4a7343a5a33c8deeaa8fa79b62d1e8e..ae3dd7701b44f5008bd976d81f8ecc8d9a02bf89 100755 --- a/integrationtests/insert_model.py +++ b/integrationtests/insert_model.py @@ -15,8 +15,8 @@ if len(db.execute_query("FIND Property alias")) == 0: h5model = db.Container() h5file = h5py.File('extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r') -H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model) -h5model[0].name = "ExampleH5" +H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model, + root_name="ExampleH5") print(h5model) h5model = DataModel(h5model) h5model.sync_data_model(noquestion=True) diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 80a2afe307ab1da628faa39d4ba8ef93bc6959e4..5efd549551670d3a4581380271ac2aba4b80a10f 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -1,4 +1,18 @@ #!/bin/bash +if [ "$1" != "--force" ] +then + echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)" + read safety + if [ -z $safety ] + then + echo "Exiting..." + exit 0 + elif [ $safety != "yes" ] + then + echo "Exiting..." + exit 0 + fi +fi OUT=/tmp/crawler.output ls cat pycaosdb.ini diff --git a/setup.py b/setup.py index 89b7f10674c4871c8eaedaad0355782d92a09125..772866537d02b71adddfab2a351a3e3372b05ab2 100755 --- a/setup.py +++ b/setup.py @@ -159,8 +159,9 @@ def setup_package(): "pandas>=1.2.0", "numpy>=1.17.3", "xlrd>=2.0", - "h5py", ], + extras_require={"h5-crawler": ["h5py>=3.3.0", ], + }, packages=find_packages('src'), package_dir={'': 'src'}, setup_requires=["pytest-runner>=2.0,<3dev"], diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index fcdd2b83769e847bd2f00066ce1442a03b74e0fc..48b423e01894220d2bd31dab5784932d601f9f62 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -152,9 +152,19 @@ fileguide = FileGuide() class AbstractCFood(object, metaclass=ABCMeta): + """ Abstract base class for Crawler food (CFood).""" def __init__(self, item): - """ Abstract base class for Crawler food (CFood).""" + """A CFood has two main methods which must be customized: + + 1. `create_identifiables` + This method defines (and inserts if necessary) the identifiables which may be updated at a + later stage. After calling this method, the `identifiables` Container contains those + Records which will be updated at a later time. + + 2. `update_identifiables` + This method updates the stored identifiables as necessary. + """ self.to_be_updated = db.Container() self.identifiables = db.Container() self.item = item diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py index 0e56da71d14d0ce643caab16a1846a36e5917c06..543ac327871fb7f5c79d68e638af1a47b62d83f6 100644 --- a/src/caosadvancedtools/cfoods/h5.py +++ b/src/caosadvancedtools/cfoods/h5.py @@ -112,20 +112,17 @@ class H5CFood(AbstractFileCFood): Attributes ---------- - root_name : str, default "root" - Type of the root Record (the Record corresponding to the root node in - the HDF5 file) h5file : h5py.File, default None Name of the hdf5-file to read """ # to be overwritten by subclasses - root_name = "root" def __init__(self, *args, **kwargs): """CFood which consumes HDF5 files.""" super().__init__(*args, **kwargs) self.h5file = None + self.root_name = "root" self.hdf5Container = db.Container() self.em = EntityMapping() @@ -152,7 +149,8 @@ class H5CFood(AbstractFileCFood): """ # manually create the identifiable root element: self.identifiable_root self.structure = self.create_structure(self.h5file, - special_treatment=self.special_treatment) + special_treatment=self.special_treatment, + root_name=self.root_name) def update_identifiables(self): """Check if the identifiables need to be updated. @@ -198,7 +196,7 @@ class H5CFood(AbstractFileCFood): @classmethod def create_structure(cls, h5obj, create_recordTypes=False, collection=None, - special_treatment=None): + special_treatment=None, root_name="root"): """Create Records and Record types from a given hdf5-object for all items in the tree. Attributes are added as properties, the values only if the dimension < 2. @@ -206,7 +204,11 @@ class H5CFood(AbstractFileCFood): Parameters ---------- h5obj : h5py.File - a hdf5-file object + a hdf5-file object + + root_name : name that is used instead of '/' + Type of the root Record (the Record corresponding to + the root node in the HDF5 file) Returns ------- @@ -223,7 +225,7 @@ class H5CFood(AbstractFileCFood): def special_treatment(x, y, z): return x, y, z if h5obj.name == "/": - name_without_path = cls.root_name + name_without_path = root_name else: name_without_path = h5obj.name.split("/")[-1] diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md new file mode 120000 index 0000000000000000000000000000000000000000..88332e357f5e06f3de522768ccdcd9e513c15f62 --- /dev/null +++ b/src/doc/README_SETUP.md @@ -0,0 +1 @@ +../../README_SETUP.md \ No newline at end of file diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 37c0f054de029afd10c07023f2d2e17c8aec0cb5..c52bbf2fe9b9f5fd77805e45ec85d195f5aa95f3 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -71,7 +71,7 @@ indicated in the messages). Invocation as Python Script --------------------------- -The crawler can be executed directly via a python script (usually called +The crawler can be executed directly via a Python script (usually called ``crawl.py``). The script prints the progress and reports potential problems. The exact behavior depends on your setup. However, you can have a look at the example in the @@ -84,7 +84,7 @@ have a look at the example in the Call ``python3 crawl.py --help`` to see what parameters can be provided. Typically, an invocation looks like: -.. code:: python +.. code:: sh python3 crawl.py /someplace/ diff --git a/tox.ini b/tox.ini index d41e9930870390ea52f447bc91fbcff3c4e32a0f..1b3cd4ef0d39955197448ace9fdf5d26ea6749b4 100644 --- a/tox.ini +++ b/tox.ini @@ -9,4 +9,5 @@ deps=nose pytest-cov openpyxl xlrd == 1.2 + h5py commands=py.test --cov=caosadvancedtools -vv {posargs}