diff --git a/integrationtests/example_hdf5cfood.py b/integrationtests/example_hdf5cfood.py index 820c43103fda6ea20dc2208045fece543c9d5c17..7ef57072010a549ebf4ecfc6fca61d3d3da8c3f4 100644 --- a/integrationtests/example_hdf5cfood.py +++ b/integrationtests/example_hdf5cfood.py @@ -41,9 +41,13 @@ class ExampleH5CFood(H5CFood): return ExperimentCFood.get_re()[:-len(readme_pattern)] + r".*\.hdf5" def create_identifiables(self): - self.experiment, project = ExperimentCFood.create_identifiable_experiment(self.match) - self.identifiable_root = db.Record() self.identifiable_root.add_property("hdf5File", self.crawled_file) self.identifiable_root.add_parent("ExampleH5") self.identifiables.append(self.identifiable_root) + + def special_treatment(self, key, value, dtype): + if key == "attr_data_root": + return "single_attribute", value, dtype + + return key, value, dtype diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index 7dc19240cbd27f0e6e7cd6cc145c7e12704df0d2..01452fe9eae1759167c943d90fe00a71fe105db9 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -486,3 +486,17 @@ class CrawlerTest(unittest.TestCase): # Should have a description self.assertIsNotNone(ana.description) + + def test_exampleh5(self): + examp = db.execute_query("FIND Record ExampleH5", unique=True) + + for ent in [p.value for p in examp.properties]: + if ent.parents[0].name == 'group_level1_a': + self.assertTrue(ent.get_property("group_level2_aa") is not None) + self.assertTrue(ent.get_property("group_level1_a") is None) + elif ent.parents[0].name == 'group_level1_b': + self.assertTrue(ent.get_property("level1_b_floats") is not None) + elif ent.parents[0].name == 'group_level1_c': + self.assertTrue(ent.get_property("level1_c_floats") is not None) + elif ent.parents[0].name == 'root_integers': + self.assertTrue(ent.get_property("single_attribute") is not None) diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py index 57c6e1307b93cb6d35757f8245f60da21ce3cec1..f40d9c69461f09cc845300109710f74deaef3a66 100644 --- a/src/caosadvancedtools/cfoods/h5.py +++ b/src/caosadvancedtools/cfoods/h5.py @@ -130,7 +130,8 @@ class H5CFood(AbstractFileCFood): def collect_information(self): self.h5file = h5py.File(fileguide.access(self.crawled_path), 'r') - self.structure = self.create_structure(self.h5file) + self.structure = self.create_structure(self.h5file, + self.special_treatment) @staticmethod def get_re(): @@ -183,8 +184,20 @@ class H5CFood(AbstractFileCFood): # self.update_structure(self.structure) update_structure(self.em, self.to_be_updated, self.structure) + def special_treatment(self, key, value, dtype): + """define special treatment of attributes + + to be overwritten by child classes. + + key: attribute name + value: attribute value + """ + + return key, value, dtype + @classmethod - def create_structure(cls, h5obj, create_recordTypes=False, collection=None): + def create_structure(cls, h5obj, create_recordTypes=False, collection=None, + special_treatment=None): """Create Records and Record types from a given hdf5-object for all items in the tree. Attributes are added as properties, the values only if the dimension < 2. @@ -198,6 +211,9 @@ class H5CFood(AbstractFileCFood): if collection is None: collection = [] + if special_treatment is None: + def special_treatment(x, y, z): return x, y, z + if h5obj.name == "/": name_without_path = cls.root_name else: @@ -215,7 +231,8 @@ class H5CFood(AbstractFileCFood): sub = H5CFood.create_structure(h5obj[subgroup], create_recordTypes=create_recordTypes, - collection=collection) + collection=collection, + special_treatment=special_treatment) if create_recordTypes: rec.add_property(subgroup_name) @@ -239,7 +256,10 @@ class H5CFood(AbstractFileCFood): collection.append(prop) rec.add_property(name=key) else: - rec.add_property(name=key, value=val, datatype=dtype) + treated_k, treated_v, treated_dtype = special_treatment( + key, val, dtype) + rec.add_property(name=treated_k, value=treated_v, + datatype=treated_dtype) return rec