diff --git a/CHANGELOG.md b/CHANGELOG.md index 354024f9be37fc102f035a5d6562b6d522aaa915..e0589ec4e056a494e79762ef048cf2e644f4f40a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### - `spss_to_datamodel` script works again. +- The cfood now supports bi-directional references when defining records on the same level. + (See: https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/175) ### Security ### diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py index c699e0ab84a0d928c1f84a1b421d97e1b2d848b6..0506fa4db03e9b3638051e6ec4fa132bd348a988 100644 --- a/integrationtests/test_issues.py +++ b/integrationtests/test_issues.py @@ -16,20 +16,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. # +import tempfile + import linkahead as db +import yaml from caosadvancedtools.models.parser import parse_model_from_string from caoscrawler.crawl import Crawler from caoscrawler.identifiable import Identifiable from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter -from caoscrawler.scanner import (create_converter_registry, +from caoscrawler.scanner import (_load_definition_from_yaml_dict, + create_converter_registry, scan_structure_elements) from caoscrawler.structure_elements import DictElement from linkahead.cached import cache_clear from linkahead.utils.register_tests import clear_database, set_test_key from pytest import fixture, mark, raises -import tempfile - set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") @@ -332,6 +334,64 @@ def test_indiscale_87(clear_database): print("---") +def test_issue_16(clear_database): + """ + This is another a test for: + https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/16 + + In addition to the two unit tests for recursive definition in `test_scanner.py` this system test + tests whether recursively defined records can be synchronized correctly using the crawler. + """ + recursive_yaml = """ +FirstConverter: + type: DictElement + records: + Experiment: + subtree: + Converter: + type: DictElement + records: + Block: + name: block 1 + Experiment: $Experiment + Experiment: + name: experiment 1 + Block: $Block + """ + + crawler_definition = _load_definition_from_yaml_dict( + [yaml.load(recursive_yaml, Loader=yaml.SafeLoader)]) + converter_registry = create_converter_registry(crawler_definition) + + # Nested DictElements that match the yaml structure in recursive_yaml: + data = {"data": { + }} + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + + rt_exp = db.RecordType(name="Experiment").insert() + rt_block = db.RecordType(name="Block").insert() + + ident = CaosDBIdentifiableAdapter() + ident.load_from_yaml_object(yaml.safe_load(""" +Experiment: +- name +Block: +- name +""")) + + crawler = Crawler(identifiableAdapter=ident) + crawler.synchronize(crawled_data=records) + + exp_res = db.execute_query("FIND Experiment") + assert len(exp_res) == 1 + exp_block = db.execute_query("FIND Block") + assert len(exp_block) == 1 + + assert exp_res[0].get_property("Block").value == exp_block[0].id + assert exp_block[0].get_property("Experiment").value == exp_res[0].id + + def test_issue_14(clear_database): """ Issue title: Some parent updates are required before inserts diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index f95862a900b46d9d92a2d3389d41487266a790dc..09942918a3818978b1b4b0c3ded1635f5f9053fc 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -249,11 +249,35 @@ out: tuple return (propvalue, propunit, collection_mode) -def create_records(values: GeneralStore, records: RecordStore, def_records: dict): - # list of keys to identify, which variables have been set by which paths: - # the items are tuples: - # 0: record name - # 1: property name +def create_records(values: GeneralStore, + records: RecordStore, + def_records: dict) -> list[tuple[str, str]]: + """ + Create records in GeneralStore `values` and RecordStore `records` as given + by the definition in `def_records`. + + This function will be called during scanning using the cfood definition. + It also should be used by CustomConverters to set records as automatic substitution + and other crawler features are applied automatically. + + Parameters + ---------- + values: GeneralStore + This GeneralStore will be used to access variables that are needed during variable substitution + in setting the properties of records and files. + Furthermore, the records that are generated in this function will be stored in this GeneralStore + **additionally to** storing them in the RecordStore given as the second argument to this function. + + records: RecordStore + The RecordStore where the generated records will be stored. + + Returns + ------- + : list[tuple[str, str]] + A list of tuples containing the record names (1st element of tuple) and respective property names + as 2nd element of the tuples. This list will be used by the scanner for creating the debug tree. + + """ keys_modified = [] for name, record in def_records.items(): @@ -286,11 +310,22 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict if (role == "Record" and "parents" not in record): c_record.add_parent(name) - c_record = records[name] - if isinstance(record, str): raise RuntimeError( "dict expected, but found str: {}".format(record)) + + # We do a second run over the def_records, here. Having finished the first run + # for creating the records (in the variable and records stores) makes sure that + # records, that are defined on this level can already be accessed during variable substitution + # in the properties that will be set in the next block. + for name, record in def_records.items(): + # See above: + if record is None: + record = {} + + c_record = records[name] + + # Set the properties: for key, value in record.items(): if key == "parents" or key == "role": continue @@ -320,7 +355,8 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict c_record.add_property(name=key, value=propvalue, unit=propunit) else: if collection_mode == "list": - if propunit and c_record.get_property(key).unit and propunit != c_record.get_property(key).unit: + if (propunit and c_record.get_property(key).unit + and propunit != c_record.get_property(key).unit): raise RuntimeError( f"Property '{key}' has contradictory units: " f"{propunit} and {c_record.get_property(key).unit}" diff --git a/unittests/test_scanner.py b/unittests/test_scanner.py index 120d804c7895b8411b4f051b6ac8a08495f71359..c531f66fd38a714ba4f6f538d41c9fbaeb364d44 100644 --- a/unittests/test_scanner.py +++ b/unittests/test_scanner.py @@ -406,3 +406,92 @@ def test_units(): assert rec.get_property("may_be_overwritten") is not None assert rec.get_property("may_be_overwritten").value == "400" assert rec.get_property("may_be_overwritten").unit == "°C" + + +def test_recursive_definition(): + """ + This is basically a test for: + https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/16 + """ + + recursive_yaml = """ +Converter: + type: DictElement + records: + Block: + Experiment: $Experiment + Experiment: + Block: $Block + """ + + crawler_definition = _load_definition_from_yaml_dict( + [yaml.load(recursive_yaml, Loader=yaml.SafeLoader)]) + converter_registry = create_converter_registry(crawler_definition) + + data = { + "value_with_unit": "1.1 m", + "array_with_units": [ + "1.1 cm", + "2.2 cm" + ] + } + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + + assert len(records) == 2 + assert len(records[0].parents) == 1 + assert records[0].parents[0].name == "Block" + assert len(records[1].parents) == 1 + assert records[1].parents[0].name == "Experiment" + + assert records[0].get_property("Experiment").value == records[1] + assert records[1].get_property("Block").value == records[0] + + +def test_recursive_definition_2(): + """ + This is another a test for: + https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/16 + + It defines Experiment on a different level, therefore allowing the recursive definition. + This is, however, no workaround for test_recursive_definition as a bidirectional link on the + same level is still not achieved. + """ + + recursive_yaml = """ +FirstConverter: + type: DictElement + records: + Experiment: + subtree: + Converter: + type: DictElement + records: + Block: + Experiment: $Experiment + Experiment: + Block: $Block + """ + + crawler_definition = _load_definition_from_yaml_dict( + [yaml.load(recursive_yaml, Loader=yaml.SafeLoader)]) + converter_registry = create_converter_registry(crawler_definition) + + data = {"data": { + "value_with_unit": "1.1 m", + "array_with_units": [ + "1.1 cm", + "2.2 cm" + ] + }} + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + + assert len(records) == 2 + assert len(records[0].parents) == 1 + assert records[0].parents[0].name == "Block" + assert len(records[1].parents) == 1 + assert records[1].parents[0].name == "Experiment" + + assert records[0].get_property("Experiment").value == records[1] + assert records[1].get_property("Block").value == records[0]