diff --git a/src/caoscrawler/hdf5_converter.py b/src/caoscrawler/hdf5_converter.py index 1262aa5572f53b92ede0d531bc35a2a93efd69b8..5824301026cacd9a9d8e896a68ed21ebc7e80a83 100644 --- a/src/caoscrawler/hdf5_converter.py +++ b/src/caoscrawler/hdf5_converter.py @@ -25,3 +25,65 @@ except ModuleNotFoundError: "Couldn't find module h5py. Did you install the crawler package with " "its optional `h5-crawler` dependency?" ) + +from typing import Union + +from .converters import convert_basic_element, DictElementConverter, SimpleFileConverter +from .stores import GeneralStore +from .structure_elements import StructureElement + + +def __convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]): + + converted = [] + for name, value in elt.attrs.items(): + converted.append(convert_basic_element( + value, name, f"The value of attribute {name} has an unknown type: {type(value)}.")) + + return converted + + +def __convert_h5_elements(elt: Union[h5py.Group, h5py.Dataset], name: str): + + if isinstance(elt, h5py.Group): + + return H5GroupElement(name, elt) + + if isinstance(elt, h5py.Dataset): + + return H5DatasetElement(name, elt) + + raise ValueError("The given element must be either a HDF5 Group or Dataset object.") + + +class H5GroupElement(StructureElement): + def __init__(self, name: str, value: h5py.Group): + super().__init__(name) + self.value = value + + +class H5DatasetElement(StructureElement): + def __init__(self, name: str, value: h5py.Dataset): + super().__init__(name) + self.value = value + + +class H5FileConverter(SimpleFileConverter): + + def create_children(self, generalStore: GeneralStore, element: StructureElement): + + if not isinstance(element, File): + + raise ValueError("create_children should have been called with a File object.") + + ff = h5py.File(element.path, 'r') + + children = [] + + for name, value in ff.items(): + + children.append(__convert_h5_elements(value, name)) + + children.extend(__convert_attributes(ff)) + + return children