Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • caosdb/src/caosdb-crawler
1 result
Show changes
Showing
with 462 additions and 45 deletions
src/doc/img/properties-from-dict-records-author.png

59.2 KiB

src/doc/img/properties-from-dict-records-person.png

56.2 KiB

RecordFromDictElement:
type: PropertiesFromDictElement
match: "(.*)"
subtree:
AnotherElement:
type: Text
match_name: "(.*)"
RecordFromDictElement:
type: PropertiesFromDictElement
record_from_dict:
parents:
- MyType1
- MyType2
match: "(.*)"
subtree:
AnotherElement:
type: Text
match_name: "(.*)"
---
metadata:
crawler-version: 0.3.1
crawler-version: 0.7.2
---
Definitions:
type: Definitions
......
---
metadata:
crawler-version: 0.6.1
crawler-version: 0.7.2
---
Converters:
H5Dataset:
......
PropertiesFromDictElement:
type: PropertiesFromDictElement
match: ".*"
record_from_dict:
variable_name: MyRec
parents:
- MyType1
- MyType2
references:
author:
parents:
- Person
......@@ -4,7 +4,7 @@
---
metadata:
crawler-version: 0.3.1
crawler-version: 0.7.2
---
Definitions:
type: Definitions
......
......@@ -29,26 +29,32 @@ import importlib
import json
import logging
import os
import pytest
import sys
import yaml
from itertools import product
from pathlib import Path
import pytest
import yaml
import linkahead as db
from caoscrawler.converters import (Converter, ConverterValidationError,
DateElementConverter, DictElementConverter,
DictIntegerElementConverter,
DirectoryConverter, FloatElementConverter,
IntegerElementConverter, JSONFileConverter,
ListElementConverter,
MarkdownFileConverter, YAMLFileConverter,
ListElementConverter, MarkdownFileConverter,
PropertiesFromDictConverter,
YAMLFileConverter,
_AbstractScalarValueElementConverter,
handle_value, replace_variables)
from caoscrawler.crawl import Crawler
from caoscrawler.scanner import (_load_definition_from_yaml_dict,
create_converter_registry,
create_transformer_registry, load_definition)
from caoscrawler.stores import GeneralStore
create_transformer_registry,
load_definition,
scan_structure_elements)
from caoscrawler.stores import GeneralStore, RecordStore
from caoscrawler.structure_elements import (BooleanElement, DictElement,
Directory, File, FloatElement,
IntegerElement, ListElement,
......@@ -73,6 +79,10 @@ def converter_registry():
"DictElement": {
"converter": "DictElementConverter",
"package": "caoscrawler.converters"},
"PropertiesFromDictElement": {
"converter": "PropertiesFromDictConverter",
"package": "caoscrawler.converters"
},
"TextElement": {
"converter": "TextElementConverter",
"package": "caoscrawler.converters"},
......@@ -497,7 +507,7 @@ MyElement:
two_doc_yaml = """
---
metadata:
crawler-version: 0.3.1
crawler-version: 0.7.2
Converters:
MyNewType:
converter: MyNewTypeConverter
......@@ -633,7 +643,7 @@ def test_load_converters():
# converter classes can be loaded from their respective packages.
# Please adapt, if defaults change!
assert len(converter_registry) == 23
assert len(converter_registry) == 25
# All of them are contained in caoscrawler.converters
for conv_key, conv in converter_registry.items():
......@@ -660,3 +670,342 @@ def test_create_path_value(converter_registry):
dc.create_values(values, Directory("a", "/a"))
assert "Test.path" in values
assert values["Test.path"] == "/a"
def test_properties_from_dict_basic(converter_registry):
"""Test that a record with the correct name and properties is created, and
that the children are still created correctly.
"""
# definitions with blacklist and named references
pfdc = PropertiesFromDictConverter(
definition={
"type": "PropertiesFromDictElement",
"match": ".*",
"record_from_dict": {
"variable_name": "MyRec",
"parents": ["DictRT1", "DictRT2"],
"properties_blacklist": ["blacklisted_int", "blacklisted_ref"],
"references": {
"authors": {
"parents": ["Person"]
}
}
}
},
name="Test", converter_registry=converter_registry)
# Tests for Dict with scalars, dict with lists, dict with reference,
# dict with list of references, dict with reference with reference, named
# reference
values = GeneralStore()
records = RecordStore()
test_dict_element = DictElement("TestDictElement", {
"a": 5,
"b": ["a", "b", "c"],
"scalar_ref": {
"name": "Scalar Ref",
"a": 23,
"blacklisted_int": 42
},
"list_ref": [
{
"c": True
},
{
"c": False
}
],
"ref_with_ref": {
"a": 789,
"ref_in_ref": {
"b": "something"
}
},
"blacklisted_int": -123,
"blacklisted_ref": {
"a": 25
},
"authors": {
"full_name": "Some Author"
}
})
pfdc.create_records(values=values, records=records, element=test_dict_element)
assert "MyRec" in records
my_rec = records["MyRec"]
assert isinstance(my_rec, db.Record)
assert len(my_rec.parents) == 2
assert "DictRT1" in [par.name for par in my_rec.parents]
assert "DictRT2" in [par.name for par in my_rec.parents]
# scalar prop
assert my_rec.get_property("a") is not None
assert my_rec.get_property("a").value == 5
# list prop
assert my_rec.get_property("b") is not None
assert len(my_rec.get_property("b").value) == 3
for elt in ["a", "b", "c"]:
assert elt in my_rec.get_property("b").value
# scalar ref
assert my_rec.get_property("scalar_ref") is not None
referenced = my_rec.get_property("scalar_ref").value
assert isinstance(referenced, db.Record)
assert referenced.name == "Scalar Ref"
assert len(referenced.parents) == 1
assert "scalar_ref" in [par.name for par in referenced.parents]
assert referenced.get_property("a") is not None
assert referenced.get_property("a").value == 23
# blacklisted
assert referenced.get_property("blacklisted_int") is None
# list of ref
assert my_rec.get_property("list_ref") is not None
assert isinstance(my_rec.get_property("list_ref").value, list)
assert len(my_rec.get_property("list_ref").value) == 2
for rec in my_rec.get_property("list_ref").value:
assert isinstance(rec, db.Record)
assert len(rec.parents) == 1
assert "list_ref" in [par.name for par in rec.parents]
assert rec.get_property("c") is not None
assert type(rec.get_property("c").value) is bool
assert True in [rec.get_property("c").value for rec in my_rec.get_property("list_ref").value]
assert False in [rec.get_property("c").value for rec in my_rec.get_property("list_ref").value]
# ref with ref
assert my_rec.get_property("ref_with_ref") is not None
outer_rec = my_rec.get_property("ref_with_ref").value
assert isinstance(outer_rec, db.Record)
assert len(outer_rec.parents) == 1
assert "ref_with_ref" in [par.name for par in outer_rec.parents]
assert outer_rec.get_property("a") is not None
assert outer_rec.get_property("a").value == 789
assert outer_rec.get_property("ref_in_ref") is not None
inner_rec = outer_rec.get_property("ref_in_ref").value
assert isinstance(inner_rec, db.Record)
assert len(inner_rec.parents) == 1
assert "ref_in_ref" in [par.name for par in inner_rec.parents]
assert inner_rec.get_property("b") is not None
assert inner_rec.get_property("b").value == "something"
# blacklisted
assert my_rec.get_property("blacklisted_int") is None
assert my_rec.get_property("blacklisted_ref") is None
# named reference property
assert my_rec.get_property("authors") is not None
author_rec = my_rec.get_property("authors").value
assert isinstance(author_rec, db.Record)
assert len(author_rec.parents) == 1
assert "Person" in [par.name for par in author_rec.parents]
assert author_rec.get_property("full_name") is not None
assert author_rec.get_property("full_name").value == "Some Author"
def test_properties_from_dict_callable(converter_registry):
def convert_some_values(rec: db.Record, records: RecordStore, values: GeneralStore):
"""Add an URL prefix to a property value if appliccable."""
if rec.get_property("url") is not None:
old_val = rec.get_property("url").value
if not (old_val is None or old_val.startswith("http")):
# only add if there is a value that doesn't look like an URL
rec.get_property("url").value = f"https://test.com/{old_val}"
return rec
pdfc = PropertiesFromDictConverter(
definition={
"record_from_dict": {
"variable_name": "MyRec",
"name": "My New Record"
}
},
name="TestConverter",
converter_registry=converter_registry,
referenced_record_callback=convert_some_values
)
values = GeneralStore()
records = RecordStore()
test_dict_element = DictElement("TestDictElement", {
"url": "something",
"referenced1": {
"url": "referenced"
},
"referenced2": {
"nourl": "something else",
"url": "https://indiscale.com"
}
})
pdfc.create_records(values=values, records=records, element=test_dict_element)
assert "MyRec" in records
my_rec = records["MyRec"]
assert isinstance(my_rec, db.Record)
assert len(my_rec.parents) == 1
assert "MyRec" in [par.name for par in my_rec.parents]
assert my_rec.name == "My New Record"
# simple conversion
assert my_rec.get_property("url") is not None
assert my_rec.get_property("url").value == "https://test.com/something"
# also works in referenced
assert my_rec.get_property("referenced1") is not None
referenced1 = my_rec.get_property("referenced1").value
assert isinstance(referenced1, db.Record)
assert referenced1.get_property("url") is not None
assert referenced1.get_property("url").value == "https://test.com/referenced"
# ... and works as expected
assert my_rec.get_property("referenced2") is not None
referenced2 = my_rec.get_property("referenced2").value
assert isinstance(referenced2, db.Record)
assert referenced2.get_property("nourl") is not None
assert referenced2.get_property("nourl").value == "something else"
assert referenced2.get_property("url") is not None
assert referenced2.get_property("url").value == "https://indiscale.com"
def test_properties_from_dict_nested(converter_registry):
"""Test the PropertiesFromDictConverter with a nested dict,
together with the regular DictElementConverter and Records created
and used on different subtree levels.
"""
root_dict_element = DictElement("RootDict", {
"TopLevelRec": "MyRec",
"propertiesDict": {
"a": 5,
"blacklisted": {
"bl_name": "BlackList",
"date": "2023-12-31"
}
},
"otherDict": {
"additional_from_other": "other"
}
})
def_dict = {
"RootElt": {
# Root dictionary
"type": "DictElement",
"match": ".*",
"records": {
# Define top-level, use below in subtrees
"MyRec": {
"parents": ["MyType"]
}
},
"subtree": {
# Top-level text element for the Record name
"NameElt": {
"type": "TextElement",
"match_name": "^TopLevelRec$",
"match_value": "(?P<name>.*)",
"records": {
"MyRec": {
"name": "$name"
}
}
},
"PFDElement": {
"type": "PropertiesFromDictElement",
"match_name": "^propertiesDict$",
"record_from_dict": {
"variable_name": "MyRec",
"properties_blacklist": ["blacklisted"]
},
"subtree": {
"BLElement": {
"type": "DictElement",
"match_name": "^blacklisted$",
"records": {
"BLRec": {
"parents": ["BlackListedType"],
"MyRec": "$MyRec"
}
},
"subtree": {
"BLNameElt": {
"type": "TextElement",
"match_name": "^bl_name$",
"match_value": "(?P<name>.*)",
"records": {
"BLRec": {
"name": "$name"
}
}
},
"BLDateElt": {
"type": "TextElement",
"match_name": "^date$",
"match_value": "(?P<date>.*)",
"records": {
"BLRec": {
"creation_date": "$date"
}
}
}
}
}
}
},
# Other dict which uses the DictElementConverter
"OtherDictElement": {
"type": "DictElement",
"match_name": "^otherDict$",
"subtree": {
"additionalElt": {
"type": "TextElement",
"match_name": "^additional_from_other$",
"match_value": "(?P<val>.*)",
"records": {
"MyRec": {
"additional_from_other": "$val"
}
}
}
}
}
}
}
}
records = scan_structure_elements(root_dict_element, def_dict, converter_registry)
# All records need to be there
assert len(records) == 2
myrec = None
blrec = None
for rec in records:
if rec.name == "MyRec":
myrec = rec
elif rec.name == "BlackList":
blrec = rec
assert myrec is not None
assert blrec is not None
# Parent is set from top level
assert len(myrec.parents) == 1
assert "MyType" in [par.name for par in myrec.parents]
# Set automatically, with blacklist
assert myrec.get_property("a") is not None
assert myrec.get_property("a").value == 5
assert myrec.get_property("blacklisted") is None
# Now check blacklisted record from subtree
assert len(blrec.parents) == 1
assert "BlackListedType" in [par.name for par in blrec.parents]
assert blrec.get_property("MyRec") is not None
assert blrec.get_property("MyRec").value == myrec
assert blrec.get_property("creation_date") is not None
assert blrec.get_property("creation_date").value == "2023-12-31"
# The "old" DictConverter should have added the additional property:
assert myrec.get_property("additional_from_other") is not None
assert myrec.get_property("additional_from_other").value == "other"
......@@ -173,7 +173,15 @@ A:
model.get_deep("A").id = 2
return result + [model.get_deep("B")]
print(query_string)
raise NotImplementedError("Mock for this case is missing")
raise NotImplementedError(f"Mock for this case is missing: {query_string}")
def mock_cached_only_rt_allow_empty(query_string: str):
try:
result = mock_cached_only_rt(query_string)
except NotImplementedError:
result = db.Container()
return result
@pytest.fixture(autouse=True)
......
Experiment:
date:
- 1
- 2
Experiment:
- date
- 23
Experiment:
- date
Event:
- is_referenced_by: Experiment
- event_id
{
"projectId": 10002,
"archived": false,
"coordinator": {
"firstname": "Miri",
"lastname": "Mueller",
"email": "miri.mueller@science.de"
},
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
"url": "https://site.de/index.php/"
"projectId": 10002,
"archived": false,
"coordinator": {
"firstname": "Miri",
"lastname": "Mueller",
"email": "miri.mueller@science.de"
},
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
"url": "https://site.de/index.php/"
}
{
"name": "DEMO",
"projectId": 10002,
"archived": false,
"Person": [
{
"firstname": "Miri",
"lastname": "Mueller",
"other": null,
"email": "miri.mueller@science.de"
},
"name": "DEMO",
"projectId": 10002,
"archived": false,
"Person": [{
"firstname": "Miri",
"lastname": "Mueller",
"other": null,
"email": "miri.mueller@science.de"
},
{
"firstname": "Mara",
"lastname": "Mueller",
"email": "mara.mueller@science.de"
"email": "mara.mueller@science.de"
}
],
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
"url": "https://site.de/index.php/"
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
"url": "https://site.de/index.php/"
}
......@@ -2,7 +2,7 @@
# Tests for entity comparison
# A. Schlemmer, 06/2021
import caosdb as db
import linkahead as db
import pytest
from pytest import raises
......
......@@ -23,7 +23,7 @@ from functools import partial
from pathlib import Path
from pytest import fixture, importorskip
import caosdb as db
import linkahead as db
from caoscrawler.debug_tree import DebugTree
from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array,
......
......@@ -24,7 +24,7 @@
test identifiable module
"""
import caosdb as db
import linkahead as db
import pytest
from caoscrawler.identifiable import Identifiable
from caoscrawler.sync_node import SyncNode
......
......@@ -32,8 +32,10 @@ from datetime import datetime
from unittest.mock import MagicMock, Mock, patch
from pathlib import Path
import caosdb as db
import linkahead as db
import pytest
from caoscrawler.exceptions import (InvalidIdentifiableYAML,
)
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
IdentifiableAdapter,
......@@ -122,6 +124,23 @@ def test_load_from_yaml_file():
assert project_i.get_property("title") is not None
def test_invalid_yaml():
ident = CaosDBIdentifiableAdapter()
invalid_dir = UNITTESTDIR / "test_data" / "invalid_identifiable"
with pytest.raises(InvalidIdentifiableYAML) as exc:
ident.load_from_yaml_definition(invalid_dir / "identifiable_content_no_list.yaml")
assert str(exc.value) == "Identifiable contents must be lists, but this was not: Experiment"
with pytest.raises(InvalidIdentifiableYAML) as exc:
ident.load_from_yaml_definition(invalid_dir / "identifiable_referenced_no_list.yaml")
assert str(exc.value) == "'is_referenced_by' must be a list. Found in: Event"
with pytest.raises(InvalidIdentifiableYAML) as exc:
ident.load_from_yaml_definition(invalid_dir / "identifiable_no_str_or_dict.yaml")
assert str(exc.value) == ("Identifiable properties must be str or dict, but this one was not:\n"
" Experiment/23")
def test_non_default_name():
ident = CaosDBIdentifiableAdapter()
identifiable = ident.get_identifiable(SyncNode(db.Record(name="don't touch it")
......@@ -141,8 +160,8 @@ def test_wildcard_ref():
dummy.id = 1
identifiable = ident.get_identifiable(SyncNode(rec, db.RecordType()
.add_parent(name="Person")
.add_property(name="is_referenced_by", value=["*"])),
.add_property(name="is_referenced_by",
value=["*"])),
[dummy]
)
assert identifiable.backrefs[0] == 1
......
......@@ -31,7 +31,7 @@ import os
from pytest import raises
import caosdb as db
import linkahead as db
from caoscrawler.converters import JSONFileConverter
from pathlib import Path
......