diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml index b2c4387e282f182fa1034617bdf2cf61a01f624f..0c1195b6536ab925fe23a58fb166b1cb1a698648 100644 --- a/unittests/h5_cfood.yml +++ b/unittests/h5_cfood.yml @@ -2,38 +2,55 @@ metadata: crawler-version: 0.6.1 --- +Converters: + H5Dataset: + converter: H5DatasetConverter + package: caoscrawler.hdf5_converter + H5File: + converter: H5FileConverter + package: caoscrawler.hdf5_converter + H5Group: + converter: H5GroupConverter + package: caoscrawler.hdf5_converter + H5Ndarray: + converter: H5NdarrayConverter + package: caoscrawler.hdf5_converter # Top-level, we have just the HDF5 file. -H5FileElement: - type: H5File - match: (.*)\.(hdf5|h5)$ +ParentDirectory: + type: Directory + match: (.*) subtree: - # Here, we have the groups, the top-level dataset, and possible attributes - # (empty for now). - RootIntegerElement: - type: H5Dataset - match: ^root_integers$ - records: - H5Dataset: - parents: - - H5Dataset + H5FileElement: + type: H5File + match: (.*)\.(hdf5|h5)$ subtree: - # included NDArray in this dataset - TopLevelIntNDElement: - type: H5Ndarray - match: (.*) - recordname: this + # Here, we have the groups, the top-level dataset, and possible + # attributes (empty for now). + RootIntegerElement: + type: H5Dataset + match_name: ^root_integers$ records: - H5Dataset: +$this - # There is one more list-valued attribute to this dataset. - TopLevelDataAttribute: - type: ListElement - match: ^attr_data_root$ + H5Dataset: + parents: + - H5Dataset subtree: - AttributeListEntries: - type: FloatElement - match: (?P<value>.*) + # included NDArray in this dataset + TopLevelIntNDElement: + type: H5Ndarray + match_name: (.*) + recordname: this records: - H5Dataset: - attr_data_root: +$value + H5Dataset: +$this + # There is one more list-valued attribute to this dataset. + TopLevelDataAttribute: + type: ListElement + match_name: ^attr_data_root$ + subtree: + AttributeListEntries: + type: FloatElement + match: (?P<value>.*) + records: + H5Dataset: + attr_data_root: +$value diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py index bf69eee94fb0e6fa1fb59d2db24211d6afe0fc56..f3b6564eee575d1206b1d81452d7627a92cd6311 100644 --- a/unittests/test_h5_converter.py +++ b/unittests/test_h5_converter.py @@ -19,14 +19,16 @@ # import numpy as np +from pathlib import Path +from pytest import fixture, importorskip -from caoscrawler.hdf5_converter import (__convert_basic_element_with_nd_array, - __convert_h5_element, H5GroupElement, +from caoscrawler.debug_tree import DebugTree +from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array, + convert_h5_element, H5GroupElement, H5DatasetElement, H5NdarrayElement) +from caoscrawler.scanner import scan_directory from caoscrawler.structure_elements import (FloatElement, ListElement, TextElement) -from pathlib import Path -from pytest import fixture, importorskip # Skip the whole module if h5py hasn't been installed h5py = importorskip("h5py") @@ -45,10 +47,10 @@ def h5_dummy_file(): def test_h5_elements(h5_dummy_file): - elt = __convert_h5_element(h5_dummy_file["group_level1_a"], "test") + elt = convert_h5_element(h5_dummy_file["group_level1_a"], "test") assert isinstance(elt, H5GroupElement) - elt = __convert_h5_element(h5_dummy_file["root_integers"], "test") + elt = convert_h5_element(h5_dummy_file["root_integers"], "test") assert isinstance(elt, H5DatasetElement) @@ -57,30 +59,38 @@ def test_nd_array_conversion(): # Only test array handling here, `convert_basic_element` is tested # elsewhere. arr = np.array([[["something"]]]) - elt = __convert_basic_element_with_nd_array(arr) + elt = convert_basic_element_with_nd_array(arr) assert isinstance(elt, TextElement) assert elt.value == "something" arr = np.zeros((1, 1)) - elt = __convert_basic_element_with_nd_array(arr) + elt = convert_basic_element_with_nd_array(arr) assert isinstance(elt, FloatElement) assert elt.value == 0 arr = np.zeros((1, 3, 1)) - elt = __convert_basic_element_with_nd_array(arr) + elt = convert_basic_element_with_nd_array(arr) assert isinstance(elt, ListElement) assert elt.value == [0, 0, 0] arr = np.array([[1, 2, 3], [4, 5, 6]]) - elt = __convert_basic_element_with_nd_array(arr, internal_path="some/path") + elt = convert_basic_element_with_nd_array(arr, internal_path="some/path") assert isinstance(elt, H5NdarrayElement) assert elt.internal_path == "some/path" # Non-arrays should be forwarded correctly - elt = __convert_basic_element_with_nd_array("something") + elt = convert_basic_element_with_nd_array("something") assert isinstance(elt, TextElement) assert elt.value == "something" - elt = __convert_basic_element_with_nd_array([0, 0, 0]) + elt = convert_basic_element_with_nd_array([0, 0, 0]) assert isinstance(elt, ListElement) assert elt.value == [0, 0, 0] + +def test_record_creation(): + + dbtr = DebugTree() + records = scan_directory(UNITTESTDIR, UNITTESTDIR / "h5_cfood.yml", debug_tree=dbtr) + print(dbtr.debug_tree) + print(dbtr.debug_metadata) + assert False