Skip to content
Snippets Groups Projects
Commit 2f063d83 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

DOC: Add docstrings

parent af88bb22
Branches
Tags
2 merge requests!160STY: styling,!143ENH: HDF5 Converter
Pipeline #47456 passed
......@@ -39,6 +39,20 @@ from .structure_elements import DictElement, File, FloatElement, IntegerElement,
def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
"""Convert hdf5 attributes to a list of either basic scalar structure elements or ndarrays.
Parameters
----------
elt : Union[h5py.File, h5py.Group, h5py.Dataset]
The hdf5 element the attributes of which will be converted to structure
elements.
Returns
-------
converted : list[StructureElement]
A list of the attributes converted to StructureElements (either basic
scalar elements or ndarray).
"""
converted = []
for name, value in elt.attrs.items():
......@@ -49,6 +63,25 @@ def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
"""Convert a given HDF5 element to the corresponding StructureElement.
Parameters
----------
elt : Union[h5py.Group, h5py.Dataset]
The hdf5 element to be converted.
name : str
The name of the StructureElement that the hdf5 element is converted to.
Raises
------
ValueError
In case of anything that is not convertible to a HDF5 structure element.
Returns
-------
StructureElement
The converted StructureElement.
"""
if isinstance(elt, h5py.Group):
......@@ -63,6 +96,34 @@ def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
def convert_basic_element_with_nd_array(value, name: str = None,
internal_path: str = None, msg_prefix: str = ""):
"""Convert a given object either to an ndarray structure element or to a
basic scalar structure element.
This function extends :func:`~caoscrawler.converters.convert_basic_element`
by a special treatment for certain numpy objects, most importantly
ndarrays. They are converted to a scalar in case of a size-1 array, to a
list in case of a 1-d array, and to a ``H5NdarrayElement`` in all other
cases. In addition, numpy integers and floats are also converted to
IntegerElements and FloatElements, respectively.
Parameters
----------
value
The object to be converted.
name : str, optional
The name of the structure element ``value`` is being converted
to. Default is None.
internal_path : str, optional
The internal path of ``value`` within the HDF5 file. Default is None.
msg_prefix : str, optional
The prefix of the error message that will be raised. Default is ``""``.
Returns
-------
StructureElement
The StructureElement ``value`` was converted to.
"""
if isinstance(value, np.ndarray):
......@@ -92,24 +153,43 @@ def convert_basic_element_with_nd_array(value, name: str = None,
class H5GroupElement(DictElement):
"""StructureElement specific for HDF5 groups"""
def __init__(self, name: str, value: h5py.Group):
super().__init__(name, value)
class H5DatasetElement(DictElement):
"""StructureElement specific for HDF5 datasets."""
def __init__(self, name: str, value: h5py.Dataset):
super().__init__(name, value)
class H5NdarrayElement(DictElement):
"""StructureElement specific for NDArrays within HDF5 files.
Also store the internal path of the array within the HDF5 file in its
``internal_path`` attribute.
"""
def __init__(self, name: str, value, internal_path: str):
super().__init__(name, value)
self.internal_path = internal_path
class H5FileConverter(SimpleFileConverter):
"""Converter for HDF5 files that creates children for the contained
attributes, groups, and datasets.
"""
def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from root-level file attributes and contained hdf5
elements.
"""
if not isinstance(element, File):
......@@ -129,12 +209,20 @@ class H5FileConverter(SimpleFileConverter):
class H5GroupConverter(DictElementConverter):
"""Converter for HDF5 groups that creates children from the group-level
attributes and the contained subgroups and datasets.
"""
def typecheck(self, element: StructureElement):
return isinstance(element, H5GroupElement)
def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from group attributes and hdf5 elements contained in
this group.
"""
if not isinstance(element.value, h5py.Group):
......@@ -152,12 +240,20 @@ class H5GroupConverter(DictElementConverter):
class H5DatasetConverter(DictElementConverter):
"""Converter for HDF5 datasets that creates children from the dataset
attributes and the contained array data.
"""
def typecheck(self, element: StructureElement):
return isinstance(element, H5DatasetElement)
def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from the dataset attributes and append the array data
contained in this dataset.
"""
if not isinstance(element.value, h5py.Dataset):
......@@ -176,6 +272,10 @@ class H5DatasetConverter(DictElementConverter):
class H5NdarrayConverter(Converter):
"""Converter for ndarrays contained in HDF5 files. Creates the wrapper
record for this ndarray.
"""
def __init__(self, definition: dict, name: str, converter_registry: dict):
......@@ -188,11 +288,17 @@ class H5NdarrayConverter(Converter):
super().__init__(definition, name, converter_registry)
def create_children(self, values: GeneralStore, element: StructureElement):
"""The ndarray doesn't have any further children."""
# The ndarray doesn't have any further children.
return []
def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement):
"""Create a wrapper record with name ``recordname``, type
``array_recordtype_name`` (default ``H5Ndarray``) and the internal path
stored in a property with name ``internal_path_property_name`` (default
``internal_hdf5_path``).
"""
rname = self.definition["recordname"]
if "array_recordtype_name" in self.definition:
......
......@@ -278,7 +278,7 @@ tests
<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
and shows how the individual converters are used in order to crawl a `simple
example file
<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads`_
<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
containing groups, subgroups, and datasets, together with their respective
attributes.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment