Skip to content
Snippets Groups Projects
Commit 2f063d83 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

DOC: Add docstrings

parent af88bb22
No related branches found
No related tags found
2 merge requests!160STY: styling,!143ENH: HDF5 Converter
Pipeline #47456 passed
...@@ -39,6 +39,20 @@ from .structure_elements import DictElement, File, FloatElement, IntegerElement, ...@@ -39,6 +39,20 @@ from .structure_elements import DictElement, File, FloatElement, IntegerElement,
def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]): def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
"""Convert hdf5 attributes to a list of either basic scalar structure elements or ndarrays.
Parameters
----------
elt : Union[h5py.File, h5py.Group, h5py.Dataset]
The hdf5 element the attributes of which will be converted to structure
elements.
Returns
-------
converted : list[StructureElement]
A list of the attributes converted to StructureElements (either basic
scalar elements or ndarray).
"""
converted = [] converted = []
for name, value in elt.attrs.items(): for name, value in elt.attrs.items():
...@@ -49,6 +63,25 @@ def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]): ...@@ -49,6 +63,25 @@ def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]):
def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str): def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
"""Convert a given HDF5 element to the corresponding StructureElement.
Parameters
----------
elt : Union[h5py.Group, h5py.Dataset]
The hdf5 element to be converted.
name : str
The name of the StructureElement that the hdf5 element is converted to.
Raises
------
ValueError
In case of anything that is not convertible to a HDF5 structure element.
Returns
-------
StructureElement
The converted StructureElement.
"""
if isinstance(elt, h5py.Group): if isinstance(elt, h5py.Group):
...@@ -63,6 +96,34 @@ def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str): ...@@ -63,6 +96,34 @@ def convert_h5_element(elt: Union[h5py.Group, h5py.Dataset], name: str):
def convert_basic_element_with_nd_array(value, name: str = None, def convert_basic_element_with_nd_array(value, name: str = None,
internal_path: str = None, msg_prefix: str = ""): internal_path: str = None, msg_prefix: str = ""):
"""Convert a given object either to an ndarray structure element or to a
basic scalar structure element.
This function extends :func:`~caoscrawler.converters.convert_basic_element`
by a special treatment for certain numpy objects, most importantly
ndarrays. They are converted to a scalar in case of a size-1 array, to a
list in case of a 1-d array, and to a ``H5NdarrayElement`` in all other
cases. In addition, numpy integers and floats are also converted to
IntegerElements and FloatElements, respectively.
Parameters
----------
value
The object to be converted.
name : str, optional
The name of the structure element ``value`` is being converted
to. Default is None.
internal_path : str, optional
The internal path of ``value`` within the HDF5 file. Default is None.
msg_prefix : str, optional
The prefix of the error message that will be raised. Default is ``""``.
Returns
-------
StructureElement
The StructureElement ``value`` was converted to.
"""
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
...@@ -92,24 +153,43 @@ def convert_basic_element_with_nd_array(value, name: str = None, ...@@ -92,24 +153,43 @@ def convert_basic_element_with_nd_array(value, name: str = None,
class H5GroupElement(DictElement): class H5GroupElement(DictElement):
"""StructureElement specific for HDF5 groups"""
def __init__(self, name: str, value: h5py.Group): def __init__(self, name: str, value: h5py.Group):
super().__init__(name, value) super().__init__(name, value)
class H5DatasetElement(DictElement): class H5DatasetElement(DictElement):
"""StructureElement specific for HDF5 datasets."""
def __init__(self, name: str, value: h5py.Dataset): def __init__(self, name: str, value: h5py.Dataset):
super().__init__(name, value) super().__init__(name, value)
class H5NdarrayElement(DictElement): class H5NdarrayElement(DictElement):
"""StructureElement specific for NDArrays within HDF5 files.
Also store the internal path of the array within the HDF5 file in its
``internal_path`` attribute.
"""
def __init__(self, name: str, value, internal_path: str): def __init__(self, name: str, value, internal_path: str):
super().__init__(name, value) super().__init__(name, value)
self.internal_path = internal_path self.internal_path = internal_path
class H5FileConverter(SimpleFileConverter): class H5FileConverter(SimpleFileConverter):
"""Converter for HDF5 files that creates children for the contained
attributes, groups, and datasets.
"""
def create_children(self, generalStore: GeneralStore, element: StructureElement): def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from root-level file attributes and contained hdf5
elements.
"""
if not isinstance(element, File): if not isinstance(element, File):
...@@ -129,12 +209,20 @@ class H5FileConverter(SimpleFileConverter): ...@@ -129,12 +209,20 @@ class H5FileConverter(SimpleFileConverter):
class H5GroupConverter(DictElementConverter): class H5GroupConverter(DictElementConverter):
"""Converter for HDF5 groups that creates children from the group-level
attributes and the contained subgroups and datasets.
"""
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return isinstance(element, H5GroupElement) return isinstance(element, H5GroupElement)
def create_children(self, generalStore: GeneralStore, element: StructureElement): def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from group attributes and hdf5 elements contained in
this group.
"""
if not isinstance(element.value, h5py.Group): if not isinstance(element.value, h5py.Group):
...@@ -152,12 +240,20 @@ class H5GroupConverter(DictElementConverter): ...@@ -152,12 +240,20 @@ class H5GroupConverter(DictElementConverter):
class H5DatasetConverter(DictElementConverter): class H5DatasetConverter(DictElementConverter):
"""Converter for HDF5 datasets that creates children from the dataset
attributes and the contained array data.
"""
def typecheck(self, element: StructureElement): def typecheck(self, element: StructureElement):
return isinstance(element, H5DatasetElement) return isinstance(element, H5DatasetElement)
def create_children(self, generalStore: GeneralStore, element: StructureElement): def create_children(self, generalStore: GeneralStore, element: StructureElement):
"""Create children from the dataset attributes and append the array data
contained in this dataset.
"""
if not isinstance(element.value, h5py.Dataset): if not isinstance(element.value, h5py.Dataset):
...@@ -176,6 +272,10 @@ class H5DatasetConverter(DictElementConverter): ...@@ -176,6 +272,10 @@ class H5DatasetConverter(DictElementConverter):
class H5NdarrayConverter(Converter): class H5NdarrayConverter(Converter):
"""Converter for ndarrays contained in HDF5 files. Creates the wrapper
record for this ndarray.
"""
def __init__(self, definition: dict, name: str, converter_registry: dict): def __init__(self, definition: dict, name: str, converter_registry: dict):
...@@ -188,11 +288,17 @@ class H5NdarrayConverter(Converter): ...@@ -188,11 +288,17 @@ class H5NdarrayConverter(Converter):
super().__init__(definition, name, converter_registry) super().__init__(definition, name, converter_registry)
def create_children(self, values: GeneralStore, element: StructureElement): def create_children(self, values: GeneralStore, element: StructureElement):
"""The ndarray doesn't have any further children."""
# The ndarray doesn't have any further children.
return [] return []
def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement): def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement):
"""Create a wrapper record with name ``recordname``, type
``array_recordtype_name`` (default ``H5Ndarray``) and the internal path
stored in a property with name ``internal_path_property_name`` (default
``internal_hdf5_path``).
"""
rname = self.definition["recordname"] rname = self.definition["recordname"]
if "array_recordtype_name" in self.definition: if "array_recordtype_name" in self.definition:
......
...@@ -278,7 +278,7 @@ tests ...@@ -278,7 +278,7 @@ tests
<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_ <https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/h5_cfood.yml?ref_type=heads>`_
and shows how the individual converters are used in order to crawl a `simple and shows how the individual converters are used in order to crawl a `simple
example file example file
<https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads`_ <https://gitlab.com/linkahead/linkahead-crawler/-/blob/main/unittests/hdf5_dummy_file.hdf5?ref_type=heads>`_
containing groups, subgroups, and datasets, together with their respective containing groups, subgroups, and datasets, together with their respective
attributes. attributes.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment