Skip to content
Snippets Groups Projects
Commit d029a263 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-fix-rocrate' into 'dev'

Fix issues in rocrate support

See merge request !215
parents b34c2178 a20b309f
No related branches found
No related tags found
2 merge requests!217TST: Make NamedTemporaryFiles Windows-compatible,!215Fix issues in rocrate support
Pipeline #60849 passed
...@@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Registered identifiables can also be used by children of the given RecordType - Registered identifiables can also be used by children of the given RecordType
if no registered identifiable is defined for them. if no registered identifiable is defined for them.
- ROCrate converter supports dereferencing property values with a single "@id"-property during
subtree generation.
- ROCrate converter supports the special property "variablesMeasured" in addition to "hasPart".
- `None` and other NA values (i.e., values where `pandas.isna` is - `None` and other NA values (i.e., values where `pandas.isna` is
`True`) are now interpreted as empty strings in `True`) are now interpreted as empty strings in
`converters.match_name_and_value` instead of being cast to string naïvely `converters.match_name_and_value` instead of being cast to string naïvely
......
...@@ -51,4 +51,4 @@ h5-crawler = ...@@ -51,4 +51,4 @@ h5-crawler =
spss = spss =
pandas[spss] pandas[spss]
rocrate = rocrate =
rocrate @ git+https://github.com/salexan2001/ro-crate-py.git@f-automatic-dummy-ids rocrate
...@@ -196,7 +196,21 @@ class ROCrateEntityConverter(Converter): ...@@ -196,7 +196,21 @@ class ROCrateEntityConverter(Converter):
# Add the properties: # Add the properties:
for name, value in eprops.items(): for name, value in eprops.items():
children.append(convert_basic_element(value, name)) if isinstance(value, dict):
# This is - according to the standard - only allowed, if it's flat, i.e.
# it contains a single element with key == "@id" and the id as value which
# is supposed to be dereferenced:
if not (len(value) == 1 and "@id" in value):
raise RuntimeError("The JSON-LD is not flat.")
dereferenced = element.entity.crate.dereference(value["@id"])
if dereferenced is not None:
children.append(
ROCrateEntity(element.folder, dereferenced))
else:
# This is just an external ID and will be added as simple DictElement
children.append(convert_basic_element(value, name))
else:
children.append(convert_basic_element(value, name))
# Add the files: # Add the files:
if isinstance(element.entity, rocrate.model.file.File): if isinstance(element.entity, rocrate.model.file.File):
...@@ -204,10 +218,12 @@ class ROCrateEntityConverter(Converter): ...@@ -204,10 +218,12 @@ class ROCrateEntityConverter(Converter):
children.append(File(name, os.path.join(element.folder, path, name))) children.append(File(name, os.path.join(element.folder, path, name)))
# Parts of this entity are added as child entities: # Parts of this entity are added as child entities:
if "hasPart" in eprops: for sublist in ("hasPart", "variableMeasured"):
for p in eprops["hasPart"]: if sublist in eprops:
children.append( for p in eprops[sublist]:
ROCrateEntity(element.folder, element.entity.crate.dereference( children.append(
p["@id"]))) ROCrateEntity(element.folder, element.entity.crate.dereference(
p["@id"])))
# TODO: See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/195 for discussion.
return children return children
...@@ -26,11 +26,18 @@ DataDir: ...@@ -26,11 +26,18 @@ DataDir:
"@id": records-example/$ "@id": records-example/$
name: (?P<name>.*) name: (?P<name>.*)
keywords: (?P<keywords>.*) keywords: (?P<keywords>.*)
description: (?P<description>.*)
dateModified: (?P<dateModified>.*) dateModified: (?P<dateModified>.*)
records: records:
Dataset: Dataset:
name: $name name: $name
keywords: $keywords keywords: $keywords
description: $description
dateModified: $dateModified dateModified: $dateModified
subtree:
Description:
type: ROCrateEntity
match_type: TextObject
match_properties:
text: (?P<description>.*)
records:
Dataset:
description: $description
File deleted
No preview for this file type
...@@ -76,6 +76,12 @@ def eln_entities(basic_eln_converter): ...@@ -76,6 +76,12 @@ def eln_entities(basic_eln_converter):
return entities return entities
@pytest.mark.xfail(
reason="The example files for PASTA have not yet been updated in:"
"https://github.com/TheELNConsortium/TheELNFileFormat/tree/master/examples/PASTA"
"However, there was the announcement that these files are going to follow the"
"flattened structure soon: https://github.com/TheELNConsortium/TheELNFileFormat/issues/98"
)
def test_load_pasta(basic_eln_converter): def test_load_pasta(basic_eln_converter):
""" """
Test for loading the .eln example export from PASTA. Test for loading the .eln example export from PASTA.
...@@ -99,7 +105,7 @@ def test_load_kadi4mat(basic_eln_converter): ...@@ -99,7 +105,7 @@ def test_load_kadi4mat(basic_eln_converter):
match = basic_eln_converter.match(f_k4mat) match = basic_eln_converter.match(f_k4mat)
assert match is not None assert match is not None
entities = basic_eln_converter.create_children(GeneralStore(), f_k4mat) entities = basic_eln_converter.create_children(GeneralStore(), f_k4mat)
assert len(entities) == 10 assert len(entities) == 17
assert isinstance(entities[0], ROCrateEntity) assert isinstance(entities[0], ROCrateEntity)
assert isinstance(entities[0].folder, str) assert isinstance(entities[0].folder, str)
assert isinstance(entities[0].entity, Entity) assert isinstance(entities[0].entity, Entity)
...@@ -131,15 +137,15 @@ match_properties: ...@@ -131,15 +137,15 @@ match_properties:
match = ds2.match(eln_entities[1]) match = ds2.match(eln_entities[1])
assert match is not None assert match is not None
assert match["dateCreated"] == "2024-08-21T12:07:45.115990+00:00" assert match["dateCreated"] == "2024-11-19T13:44:35.476888+00:00"
children = ds2.create_children(GeneralStore(), eln_entities[1]) children = ds2.create_children(GeneralStore(), eln_entities[1])
assert len(children) == 8 assert len(children) == 8
assert isinstance(children[0], TextElement) assert isinstance(children[0], TextElement)
assert children[0].name == "@id" assert children[0].name == "@id"
assert children[0].value == "ro-crate-metadata.json" assert children[0].value == "ro-crate-metadata.json"
assert isinstance(children[5], DictElement) assert isinstance(children[5], ROCrateEntity)
assert children[5].value == {'@id': 'https://kadi.iam.kit.edu'} assert children[5].name == "https://kadi.iam.kit.edu"
def test_file(eln_entities): def test_file(eln_entities):
...@@ -178,13 +184,20 @@ match_properties: ...@@ -178,13 +184,20 @@ match_properties:
assert match is not None assert match is not None
children = ds_parts.create_children(GeneralStore(), ent_parts) children = ds_parts.create_children(GeneralStore(), ent_parts)
# Number of children = number of properties + number of parts +
# number of variables measured + number of files
assert len(children) == (len(ent_parts.entity.properties()) +
len(ent_parts.entity.properties()["hasPart"]) +
len(ent_parts.entity.properties()["variableMeasured"]))
# Number of children = number of properties + number of parts:
assert len(children) == len(ent_parts.entity.properties()) + 4
entity_children = [f for f in children if isinstance(f, ROCrateEntity)] entity_children = [f for f in children if isinstance(f, ROCrateEntity)]
assert len(entity_children) == 4 assert len(entity_children) == 13
file_counter = 0
for f in entity_children: for f in entity_children:
assert isinstance(f.entity, rocrate.model.file.File) if isinstance(f.entity, rocrate.model.file.File):
file_counter += 1
assert file_counter == 4
def test_scanner(): def test_scanner():
...@@ -193,7 +206,14 @@ def test_scanner(): ...@@ -193,7 +206,14 @@ def test_scanner():
assert len(rlist) == 1 assert len(rlist) == 1
assert isinstance(rlist[0], db.Record) assert isinstance(rlist[0], db.Record)
assert rlist[0].name == "records-example" assert rlist[0].name == "records-example"
assert rlist[0].description == "This is a sample record." # This assertion was moved to a different test, see below:
# assert rlist[0].description == "This is a sample record."
assert rlist[0].parents[0].name == "Dataset" assert rlist[0].parents[0].name == "Dataset"
assert rlist[0].get_property("keywords").value == "sample" assert rlist[0].get_property("keywords").value == "sample"
assert rlist[0].get_property("dateModified").value == "2024-08-21T11:43:17.626965+00:00" assert rlist[0].get_property("dateModified").value == "2024-08-21T11:43:17.626965+00:00"
def test_description_reference():
rlist = scanner.scan_directory(os.path.join(UNITTESTDIR, "eln_files/"),
os.path.join(UNITTESTDIR, "eln_cfood.yaml"))
assert rlist[0].description == "This is a sample record."
...@@ -50,6 +50,12 @@ def converter_registry(): ...@@ -50,6 +50,12 @@ def converter_registry():
return converter_registry return converter_registry
@pytest.mark.xfail(
reason="The example files for PASTA have not yet been updated in:"
"https://github.com/TheELNConsortium/TheELNFileFormat/tree/master/examples/PASTA"
"However, there was the announcement that these files are going to follow the"
"flattened structure soon: https://github.com/TheELNConsortium/TheELNFileFormat/issues/98"
)
def test_zipfile_converter(converter_registry): def test_zipfile_converter(converter_registry):
zipfile = File("PASTA.eln", os.path.join(UNITTESTDIR, "eln_files", "PASTA.eln")) zipfile = File("PASTA.eln", os.path.join(UNITTESTDIR, "eln_files", "PASTA.eln"))
zip_conv = ZipFileConverter(yaml.safe_load(""" zip_conv = ZipFileConverter(yaml.safe_load("""
...@@ -77,3 +83,40 @@ match: ^PASTA$ ...@@ -77,3 +83,40 @@ match: ^PASTA$
assert isinstance(children[i], Directory) assert isinstance(children[i], Directory)
for i in range(2, 5): for i in range(2, 5):
assert isinstance(children[i], File) assert isinstance(children[i], File)
def test_zipfile_minimal(converter_registry):
zipfile = File("empty.zip", os.path.join(UNITTESTDIR, "zip_minimal", "empty.zip"))
zip_conv = ZipFileConverter(yaml.safe_load("""
type: ZipFile
match: .*$
"""), "TestZipFileConverter", converter_registry)
match = zip_conv.match(zipfile)
assert match is not None
children = zip_conv.create_children(GeneralStore(), zipfile)
assert len(children) == 2
file_obj = None
dir_obj = None
for ch in children:
if isinstance(ch, File):
file_obj = ch
elif isinstance(ch, Directory):
dir_obj = ch
else:
assert False
assert file_obj is not None and dir_obj is not None
assert file_obj.name == "empty.txt"
dir_conv = DirectoryConverter(yaml.safe_load("""
type: Directory
match: ^folder$
"""), "TestDirectory", converter_registry)
match = dir_conv.match(dir_obj)
assert match is not None
children = dir_conv.create_children(GeneralStore(), dir_obj)
assert len(children) == 3
for i in range(3):
assert isinstance(children[i], File)
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment