Select Git revision
test_rocrate_converter.py
-
Alexander Schlemmer authoredAlexander Schlemmer authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_rocrate_converter.py 7.47 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2024 Alexander Schlemmer <a.schlemmer@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
test the XML converters
"""
import importlib
import os
from pathlib import Path
import linkahead as db
import pytest
import rocrate
import yaml
from caoscrawler import scanner
from caoscrawler.converters import ELNFileConverter, ROCrateEntityConverter
from caoscrawler.stores import GeneralStore
from caoscrawler.structure_elements import (DictElement, File, ROCrateEntity,
TextElement)
from rocrate.model.entity import Entity
UNITTESTDIR = Path(__file__).parent
@pytest.fixture
def converter_registry():
converter_registry: dict[str, dict[str, str]] = {
"ELNFile": {
"converter": "ELNFileConverter",
"package": "caoscrawler.converters"},
"ROCrateEntity": {
"converter": "ROCrateEntityConverter",
"package": "caoscrawler.converters",
}
}
for key, value in converter_registry.items():
module = importlib.import_module(value["package"])
value["class"] = getattr(module, value["converter"])
return converter_registry
@pytest.fixture
def basic_eln_converter(converter_registry):
return ELNFileConverter(yaml.safe_load("""
type: ELNFile
match: .*\\.eln
"""), "TestELNConverter", converter_registry)
@pytest.fixture
def eln_entities(basic_eln_converter):
f_k4mat = File("records-example.eln",
os.path.join(UNITTESTDIR, "eln_files", "records-example.eln"))
store = GeneralStore()
entities = basic_eln_converter.create_children(store, f_k4mat)
return entities
@pytest.mark.xfail(
reason="The example files for PASTA have not yet been updated in:"
"https://github.com/TheELNConsortium/TheELNFileFormat/tree/master/examples/PASTA"
"However, there was the announcement that these files are going to follow the"
"flattened structure soon: https://github.com/TheELNConsortium/TheELNFileFormat/issues/98"
)
def test_load_pasta(basic_eln_converter):
"""
Test for loading the .eln example export from PASTA.
"""
f_pasta = File("PASTA.eln", os.path.join(UNITTESTDIR, "eln_files", "PASTA.eln"))
match = basic_eln_converter.match(f_pasta)
assert match is not None
entities = basic_eln_converter.create_children(GeneralStore(), f_pasta)
assert len(entities) == 20
assert isinstance(entities[0], ROCrateEntity)
assert isinstance(entities[0].folder, str)
assert isinstance(entities[0].entity, Entity)
def test_load_kadi4mat(basic_eln_converter):
"""
Test for loading the .eln example export from PASTA.
"""
f_k4mat = File("records-example.eln",
os.path.join(UNITTESTDIR, "eln_files", "records-example.eln"))
match = basic_eln_converter.match(f_k4mat)
assert match is not None
entities = basic_eln_converter.create_children(GeneralStore(), f_k4mat)
assert len(entities) == 17
assert isinstance(entities[0], ROCrateEntity)
assert isinstance(entities[0].folder, str)
assert isinstance(entities[0].entity, Entity)
def test_match_rocrate_entities(eln_entities):
ds1 = ROCrateEntityConverter(yaml.safe_load("""
type: ROCrateEntity
match_properties:
"@id": \\./
datePublished: (?P<datePublished>.*)
"""), "TestELNConverter", converter_registry)
match = ds1.match(eln_entities[0])
assert match is not None
ds2 = ROCrateEntityConverter(yaml.safe_load("""
type: ROCrateEntity
match_type: CreativeWork
match_properties:
"@id": ro-crate-metadata.json
dateCreated: (?P<dateCreated>.*)
"""), "TestELNConverter", converter_registry)
match = ds2.match(eln_entities[0])
assert match is None
match = ds1.match(eln_entities[1])
assert match is None
match = ds2.match(eln_entities[1])
assert match is not None
assert match["dateCreated"] == "2024-11-19T13:44:35.476888+00:00"
children = ds2.create_children(GeneralStore(), eln_entities[1])
assert len(children) == 8
assert isinstance(children[0], TextElement)
assert children[0].name == "@id"
assert children[0].value == "ro-crate-metadata.json"
assert isinstance(children[5], ROCrateEntity)
assert children[5].name == "https://kadi.iam.kit.edu"
def test_file(eln_entities):
ds_csv = ROCrateEntityConverter(yaml.safe_load("""
type: ROCrateEntity
match_type: File
match_properties:
"@id": .*\.csv$
"""), "TestELNConverter", converter_registry)
ent_csv = eln_entities[5]
match = ds_csv.match(ent_csv)
assert match is not None
children = ds_csv.create_children(GeneralStore(), ent_csv)
# Number of children = number of properties + number of files:
assert len(children) == len(ent_csv.entity.properties()) + 1
# Get the file:
f_csv = [f for f in children if isinstance(f, File)][0]
with open(f_csv.path) as f:
text = f.read()
assert "Ultrasound Transducer" in text
def test_has_part(eln_entities):
ds_parts = ROCrateEntityConverter(yaml.safe_load("""
type: ROCrateEntity
match_type: Dataset
match_properties:
"@id": records-example/
"""), "TestELNConverter", converter_registry)
ent_parts = eln_entities[2]
match = ds_parts.match(ent_parts)
assert match is not None
children = ds_parts.create_children(GeneralStore(), ent_parts)
# Number of children = number of properties + number of parts +
# number of variables measured + number of files
assert len(children) == (len(ent_parts.entity.properties()) +
len(ent_parts.entity.properties()["hasPart"]) +
len(ent_parts.entity.properties()["variableMeasured"]))
entity_children = [f for f in children if isinstance(f, ROCrateEntity)]
assert len(entity_children) == 13
file_counter = 0
for f in entity_children:
if isinstance(f.entity, rocrate.model.file.File):
file_counter += 1
assert file_counter == 4
def test_scanner():
rlist = scanner.scan_directory(os.path.join(UNITTESTDIR, "eln_files/"),
os.path.join(UNITTESTDIR, "eln_cfood.yaml"))
assert len(rlist) == 1
assert isinstance(rlist[0], db.Record)
assert rlist[0].name == "records-example"
# This assertion was moved to a different test, see below:
# assert rlist[0].description == "This is a sample record."
assert rlist[0].parents[0].name == "Dataset"
assert rlist[0].get_property("keywords").value == "sample"
assert rlist[0].get_property("dateModified").value == "2024-08-21T11:43:17.626965+00:00"
def test_description_reference():
rlist = scanner.scan_directory(os.path.join(UNITTESTDIR, "eln_files/"),
os.path.join(UNITTESTDIR, "eln_cfood.yaml"))
assert rlist[0].description == "This is a sample record."