diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index 02ccac5c48e039a3374a0d169f3b355f897e45fc..97f70320e37b1c1d8623e5fc1d98b6d72916e2b8 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -1,7 +1,7 @@ version: '3.7' services: sqldb: - image: mariadb:10.4 + image: mariadb:11.4 environment: MYSQL_ROOT_PASSWORD: caosdb1234 networks: diff --git a/CHANGELOG.md b/CHANGELOG.md index d04329ed20eb29f51a4f9d149c2dca7a4cf6f240..00252658b4fe2117100a695f270bc66b752431bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Registered identifiables can also be used by children of the given RecordType if no registered identifiable is defined for them. +- ROCrate converter supports dereferencing property values with a single "@id"-property during + subtree generation. +- ROCrate converter supports the special property "variablesMeasured" in addition to "hasPart". - `None` and other NA values (i.e., values where `pandas.isna` is `True`) are now interpreted as empty strings in `converters.match_name_and_value` instead of being cast to string naïvely diff --git a/src/caoscrawler/converters/rocrate.py b/src/caoscrawler/converters/rocrate.py index 8a45af753312a2bf29c1ddb9e6bcb15458c3ebde..7dcad86589961f03f1e755ddbc0b60742cf4ed4a 100644 --- a/src/caoscrawler/converters/rocrate.py +++ b/src/caoscrawler/converters/rocrate.py @@ -196,7 +196,21 @@ class ROCrateEntityConverter(Converter): # Add the properties: for name, value in eprops.items(): - children.append(convert_basic_element(value, name)) + if isinstance(value, dict): + # This is - according to the standard - only allowed, if it's flat, i.e. + # it contains a single element with key == "@id" and the id as value which + # is supposed to be dereferenced: + if not (len(value) == 1 and "@id" in value): + raise RuntimeError("The JSON-LD is not flat.") + dereferenced = element.entity.crate.dereference(value["@id"]) + if dereferenced is not None: + children.append( + ROCrateEntity(element.folder, dereferenced)) + else: + # This is just an external ID and will be added as simple DictElement + children.append(convert_basic_element(value, name)) + else: + children.append(convert_basic_element(value, name)) # Add the files: if isinstance(element.entity, rocrate.model.file.File): @@ -204,10 +218,12 @@ class ROCrateEntityConverter(Converter): children.append(File(name, os.path.join(element.folder, path, name))) # Parts of this entity are added as child entities: - if "hasPart" in eprops: - for p in eprops["hasPart"]: - children.append( - ROCrateEntity(element.folder, element.entity.crate.dereference( - p["@id"]))) + for sublist in ("hasPart", "variableMeasured"): + if sublist in eprops: + for p in eprops[sublist]: + children.append( + ROCrateEntity(element.folder, element.entity.crate.dereference( + p["@id"]))) + # TODO: See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/195 for discussion. return children diff --git a/unittests/eln_cfood.yaml b/unittests/eln_cfood.yaml index ab8e7108f511b0450d37c3e60162e412d4a1bf3b..bb29b7da7c1e6c3fc555038412f42ff2ab4d28fa 100644 --- a/unittests/eln_cfood.yaml +++ b/unittests/eln_cfood.yaml @@ -26,11 +26,18 @@ DataDir: "@id": records-example/$ name: (?P<name>.*) keywords: (?P<keywords>.*) - description: (?P<description>.*) dateModified: (?P<dateModified>.*) records: Dataset: name: $name keywords: $keywords - description: $description dateModified: $dateModified + subtree: + Description: + type: ROCrateEntity + match_type: TextObject + match_properties: + text: (?P<description>.*) + records: + Dataset: + description: $description diff --git a/unittests/test_rocrate_converter.py b/unittests/test_rocrate_converter.py index ebf585fb1ccbf245e597ba412ab414bbfdd80b6b..4b6bde171c789017e95a38729ae93f49ecf3f97b 100644 --- a/unittests/test_rocrate_converter.py +++ b/unittests/test_rocrate_converter.py @@ -144,8 +144,8 @@ match_properties: assert isinstance(children[0], TextElement) assert children[0].name == "@id" assert children[0].value == "ro-crate-metadata.json" - assert isinstance(children[5], DictElement) - assert children[5].value == {'@id': 'https://kadi.iam.kit.edu'} + assert isinstance(children[5], ROCrateEntity) + assert children[5].name == "https://kadi.iam.kit.edu" def test_file(eln_entities): @@ -184,13 +184,20 @@ match_properties: assert match is not None children = ds_parts.create_children(GeneralStore(), ent_parts) + # Number of children = number of properties + number of parts + + # number of variables measured + number of files + assert len(children) == (len(ent_parts.entity.properties()) + + len(ent_parts.entity.properties()["hasPart"]) + + len(ent_parts.entity.properties()["variableMeasured"])) - # Number of children = number of properties + number of parts: - assert len(children) == len(ent_parts.entity.properties()) + 4 entity_children = [f for f in children if isinstance(f, ROCrateEntity)] - assert len(entity_children) == 4 + assert len(entity_children) == 13 + file_counter = 0 + for f in entity_children: - assert isinstance(f.entity, rocrate.model.file.File) + if isinstance(f.entity, rocrate.model.file.File): + file_counter += 1 + assert file_counter == 4 def test_scanner(): @@ -206,12 +213,6 @@ def test_scanner(): assert rlist[0].get_property("dateModified").value == "2024-08-21T11:43:17.626965+00:00" -@pytest.mark.xfail( - reason="The description is no longer a simple string, but a reference to another record." - "The rocrate converter will be able to dereference this as soon as this feature is implemented:" - "https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/191" - "This test might need changes in the yaml definition." -) def test_description_reference(): rlist = scanner.scan_directory(os.path.join(UNITTESTDIR, "eln_files/"), os.path.join(UNITTESTDIR, "eln_cfood.yaml")) diff --git a/unittests/test_zipfile_converter.py b/unittests/test_zipfile_converter.py index 22e4f2eab90563f6639c2197346a6c11987a89c9..451d23c93bfc15889d5b7a9f97ef1f157aece6ee 100644 --- a/unittests/test_zipfile_converter.py +++ b/unittests/test_zipfile_converter.py @@ -97,15 +97,26 @@ match: .*$ children = zip_conv.create_children(GeneralStore(), zipfile) assert len(children) == 2 - assert children[1].name == "empty.txt" + + file_obj = None + dir_obj = None + for ch in children: + if isinstance(ch, File): + file_obj = ch + elif isinstance(ch, Directory): + dir_obj = ch + else: + assert False + assert file_obj is not None and dir_obj is not None + assert file_obj.name == "empty.txt" dir_conv = DirectoryConverter(yaml.safe_load(""" type: Directory match: ^folder$ """), "TestDirectory", converter_registry) - match = dir_conv.match(children[0]) + match = dir_conv.match(dir_obj) assert match is not None - children = dir_conv.create_children(GeneralStore(), children[0]) + children = dir_conv.create_children(GeneralStore(), dir_obj) assert len(children) == 3 for i in range(3): assert isinstance(children[i], File)