Skip to content
Snippets Groups Projects
Commit 15eb6a2d authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-use-substitution-templates' into 'dev'

F use substitution templates

See merge request caosdb/src/crawler2.0!21
parents d9d09def a2baf4e6
Branches
Tags
2 merge requests!53Release 0.1,!21F use substitution templates
Pipeline #22941 passed
...@@ -37,6 +37,7 @@ from .structure_elements import (StructureElement, Directory, File, Dict, JSONFi ...@@ -37,6 +37,7 @@ from .structure_elements import (StructureElement, Directory, File, Dict, JSONFi
TextElement, DictTextElement, DictElement, DictListElement) TextElement, DictTextElement, DictElement, DictListElement)
from typing import Optional, Union from typing import Optional, Union
from abc import abstractmethod from abc import abstractmethod
from string import Template
import yaml_header_tools import yaml_header_tools
import yaml import yaml
...@@ -63,6 +64,7 @@ def handle_value(value: Union[dict, str], values: GeneralStore): ...@@ -63,6 +64,7 @@ def handle_value(value: Union[dict, str], values: GeneralStore):
- the final value of the property - the final value of the property
- the collection mode (can be single, list or multiproperty) - the collection mode (can be single, list or multiproperty)
""" """
# @review Florian Spreckelsen 2022-05-13
if type(value) == dict: if type(value) == dict:
if "value" not in value: if "value" not in value:
...@@ -90,12 +92,20 @@ def handle_value(value: Union[dict, str], values: GeneralStore): ...@@ -90,12 +92,20 @@ def handle_value(value: Union[dict, str], values: GeneralStore):
propvalue = value propvalue = value
return (propvalue, collection_mode) return (propvalue, collection_mode)
if propvalue.startswith("$"): # Check if the replacement is a single variable containing a record:
propvalue = values[propvalue[1:]] match = re.match(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$", propvalue)
# Allow the insertion of $ signs at the beginning if match is not None:
if type(propvalue) == str and propvalue.startswith("$$"): varname = match.group("varname")
propvalue = propvalue[1:] if varname in values:
if values[varname] is None:
propvalue = None
return (propvalue, collection_mode)
if isinstance(values[varname], db.Entity):
propvalue = values[varname]
return (propvalue, collection_mode)
propvalue_template = Template(propvalue)
propvalue = propvalue_template.safe_substitute(**values.get_storage())
return (propvalue, collection_mode) return (propvalue, collection_mode)
......
...@@ -769,6 +769,7 @@ class Crawler(object): ...@@ -769,6 +769,7 @@ class Crawler(object):
updateList) updateList)
# remove unnecessary updates from list # remove unnecessary updates from list
# TODO: refactoring of typo
for el in to_be_updated: for el in to_be_updated:
self.replace_entities_by_ids(el) self.replace_entities_by_ids(el)
......
ExperimentalData: # name of the converter
type: Directory
match: ExperimentalData
records:
Project:
name: project
subtree:
File: # name of the converter
type: SimpleFile
match: (?P<year>[0-9]{2,2})(?P<month>[0-9]{2,2})(?P<day>[0-9]{2,2})_data.dat
records:
Experiment:
date: 20$year-$month-$day
ExperimentSeries:
Experiment: $Experiment
Project:
Experiments: +$Experiment
dates: +20$year-$month-$day
...@@ -74,8 +74,10 @@ def test_record_structure_generation(crawler): ...@@ -74,8 +74,10 @@ def test_record_structure_generation(crawler):
subd = crawler.debug_tree[dircheckstr("DataAnalysis")] subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")] subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
assert len(subd) == 2 assert len(subd) == 2
assert len(subd[0]) == 2 # variables store on Data Analysis node of debug tree # variables store on Data Analysis node of debug tree
assert len(subd[1]) == 0 # record store on Data Analysis node of debug tree assert len(subd[0]) == 2
# record store on Data Analysis node of debug tree
assert len(subd[1]) == 0
assert len(subc) == 2 assert len(subc) == 2
assert len(subc[0]) == 2 assert len(subc[0]) == 2
assert len(subc[1]) == 0 assert len(subc[1]) == 0
...@@ -84,7 +86,8 @@ def test_record_structure_generation(crawler): ...@@ -84,7 +86,8 @@ def test_record_structure_generation(crawler):
assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis" assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
assert subc[0]["DataAnalysis"] == False assert subc[0]["DataAnalysis"] == False
subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")] subd = crawler.debug_tree[dircheckstr(
"DataAnalysis", "2020_climate-model-predict")]
subc = crawler.debug_metadata["copied"][dircheckstr( subc = crawler.debug_metadata["copied"][dircheckstr(
"DataAnalysis", "2020_climate-model-predict")] "DataAnalysis", "2020_climate-model-predict")]
...@@ -92,7 +95,8 @@ def test_record_structure_generation(crawler): ...@@ -92,7 +95,8 @@ def test_record_structure_generation(crawler):
assert len(subd[1]["Project"].get_parents()) == 1 assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project" assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020" assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict" assert subd[1]["Project"].get_property(
"identifier").value == "climate-model-predict"
assert len(subd[0]) == 6 assert len(subd[0]) == 6
assert subd[0]["date"] == "2020" assert subd[0]["date"] == "2020"
...@@ -129,15 +133,19 @@ def test_record_structure_generation(crawler): ...@@ -129,15 +133,19 @@ def test_record_structure_generation(crawler):
assert len(subd[1]["Project"].get_parents()) == 1 assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project" assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020" assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict" assert subd[1]["Project"].get_property(
"identifier").value == "climate-model-predict"
assert len(subd[1]["Measurement"].get_parents()) == 1 assert len(subd[1]["Measurement"].get_parents()) == 1
assert subd[1]["Measurement"].get_parents()[0].name == "Measurement" assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
assert subd[1]["Measurement"].get_property("date").value == "2020-02-08" assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors" assert subd[1]["Measurement"].get_property(
"identifier").value == "prediction-errors"
assert subd[1]["Measurement"].get_property("project").value != "$Project" assert subd[1]["Measurement"].get_property("project").value != "$Project"
assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record assert subd[1]["Measurement"].get_property(
assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"] "project").value.__class__ == db.Record
assert subd[1]["Measurement"].get_property(
"project").value == subd[0]["Project"]
# Check the copy flags for the second level in the hierarchy: # Check the copy flags for the second level in the hierarchy:
assert subc[1]["Project"] is True assert subc[1]["Project"] is True
...@@ -176,9 +184,15 @@ def test_crawler_update_list(crawler, ident): ...@@ -176,9 +184,15 @@ def test_crawler_update_list(crawler, ident):
# If the following assertions fail, that is a hint, that the test file records.xml has changed # If the following assertions fail, that is a hint, that the test file records.xml has changed
# and this needs to be updated: # and this needs to be updated:
assert len(ident.get_records()) == 18 assert len(ident.get_records()) == 18
assert len([r for r in ident.get_records() if r.parents[0].name == "Person"]) == 5 assert len(
assert len([r for r in ident.get_records() if r.parents[0].name == "Measurement"]) == 11 [r for r in ident.get_records() if r.parents[0].name == "Person"]
assert len([r for r in ident.get_records() if r.parents[0].name == "Project"]) == 2 ) == 5
assert len(
[r for r in ident.get_records() if r.parents[0].name == "Measurement"]
) == 11
assert len(
[r for r in ident.get_records() if r.parents[0].name == "Project"]
) == 2
# The crawler contains lots of duplicates, because identifiables have not been resolved yet: # The crawler contains lots of duplicates, because identifiables have not been resolved yet:
assert len(ident.get_records()) != len(crawler.updateList) assert len(ident.get_records()) != len(crawler.updateList)
...@@ -194,8 +208,10 @@ def test_crawler_update_list(crawler, ident): ...@@ -194,8 +208,10 @@ def test_crawler_update_list(crawler, ident):
id_r0 = ident.get_identifiable(r_cur) id_r0 = ident.get_identifiable(r_cur)
assert r_cur.parents[0].name == id_r0.parents[0].name assert r_cur.parents[0].name == id_r0.parents[0].name
assert r_cur.get_property("first_name").value == id_r0.get_property("first_name").value assert r_cur.get_property(
assert r_cur.get_property("last_name").value == id_r0.get_property("last_name").value "first_name").value == id_r0.get_property("first_name").value
assert r_cur.get_property(
"last_name").value == id_r0.get_property("last_name").value
assert len(r_cur.parents) == 1 assert len(r_cur.parents) == 1
assert len(id_r0.parents) == 1 assert len(id_r0.parents) == 1
assert len(r_cur.properties) == 2 assert len(r_cur.properties) == 2
...@@ -213,9 +229,11 @@ def test_crawler_update_list(crawler, ident): ...@@ -213,9 +229,11 @@ def test_crawler_update_list(crawler, ident):
id_r1 = ident.get_identifiable(r_cur) id_r1 = ident.get_identifiable(r_cur)
assert r_cur.parents[0].name == id_r1.parents[0].name assert r_cur.parents[0].name == id_r1.parents[0].name
assert r_cur.get_property("identifier").value == id_r1.get_property("identifier").value assert r_cur.get_property(
"identifier").value == id_r1.get_property("identifier").value
assert r_cur.get_property("date").value == id_r1.get_property("date").value assert r_cur.get_property("date").value == id_r1.get_property("date").value
assert r_cur.get_property("project").value == id_r1.get_property("project").value assert r_cur.get_property(
"project").value == id_r1.get_property("project").value
assert len(r_cur.parents) == 1 assert len(r_cur.parents) == 1
assert len(id_r1.parents) == 1 assert len(id_r1.parents) == 1
assert len(r_cur.properties) == 4 assert len(r_cur.properties) == 4
...@@ -228,7 +246,8 @@ def test_crawler_update_list(crawler, ident): ...@@ -228,7 +246,8 @@ def test_crawler_update_list(crawler, ident):
assert idr_r1_test != idr_r0_test assert idr_r1_test != idr_r0_test
assert len(idr_r1.properties) == 4 assert len(idr_r1.properties) == 4
assert r_cur.get_property("responsible").value == idr_r1.get_property("responsible").value assert r_cur.get_property(
"responsible").value == idr_r1.get_property("responsible").value
assert r_cur.description == idr_r1.description assert r_cur.description == idr_r1.description
# test whether compare_entites function works in this context: # test whether compare_entites function works in this context:
...@@ -355,14 +374,17 @@ def test_split_into_inserts_and_updates_trivial(crawler): ...@@ -355,14 +374,17 @@ def test_split_into_inserts_and_updates_trivial(crawler):
def test_split_into_inserts_and_updates_single(mock_retrieve): def test_split_into_inserts_and_updates_single(mock_retrieve):
crawler = mock_retrieve crawler = mock_retrieve
entlist = [db.Record(name="A").add_parent("C"), db.Record(name="B").add_parent("C")] entlist = [db.Record(name="A").add_parent(
"C"), db.Record(name="B").add_parent("C")]
assert crawler.get_identified_record_from_local_cache(entlist[0]) is None assert crawler.get_identified_record_from_local_cache(entlist[0]) is None
assert crawler.get_identified_record_from_local_cache(entlist[1]) is None assert crawler.get_identified_record_from_local_cache(entlist[1]) is None
assert crawler.can_be_checked_externally(entlist[0]) assert crawler.can_be_checked_externally(entlist[0])
assert crawler.can_be_checked_externally(entlist[1]) assert crawler.can_be_checked_externally(entlist[1])
assert crawler.identifiableAdapter.retrieve_identified_record_for_record(entlist[0]).id == 1111 assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
assert crawler.identifiableAdapter.retrieve_identified_record_for_record(entlist[1]) is None entlist[0]).id == 1111
assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
entlist[1]) is None
insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist)) insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
assert len(insert) == 1 assert len(insert) == 1
...@@ -416,7 +438,8 @@ def test_split_into_inserts_and_updates_with_complex(mock_retrieve): ...@@ -416,7 +438,8 @@ def test_split_into_inserts_and_updates_with_complex(mock_retrieve):
# ^ # ^
# | # |
# F <- B <- G # F <- B <- G
a = db.Record(name="A").add_parent("C").add_property('d', 13).add_property('e', "lskdjlsfdj") a = db.Record(name="A").add_parent("C").add_property(
'd', 13).add_property('e', "lskdjlsfdj")
b = db.Record(name="B").add_parent("C") b = db.Record(name="B").add_parent("C")
g = db.Record(name="G").add_parent("C") g = db.Record(name="G").add_parent("C")
f = db.Record(name="F").add_parent("C") f = db.Record(name="F").add_parent("C")
...@@ -457,7 +480,8 @@ def test_all_references_are_existing_already(crawler): ...@@ -457,7 +480,8 @@ def test_all_references_are_existing_already(crawler):
base_mocked_lookup, known={"A": db.Record(name="A").add_parent("C"), base_mocked_lookup, known={"A": db.Record(name="A").add_parent("C"),
"B": db.Record(name="B").add_parent("C")})) "B": db.Record(name="B").add_parent("C")}))
assert crawler.all_references_are_existing_already(db.Record().add_property('a', 123)) assert crawler.all_references_are_existing_already(
db.Record().add_property('a', 123))
assert crawler.all_references_are_existing_already(db.Record() assert crawler.all_references_are_existing_already(db.Record()
.add_property('a', db.Record(id=123))) .add_property('a', db.Record(id=123)))
assert crawler.all_references_are_existing_already(db.Record() assert crawler.all_references_are_existing_already(db.Record()
...@@ -475,7 +499,8 @@ def test_all_references_are_existing_already(crawler): ...@@ -475,7 +499,8 @@ def test_all_references_are_existing_already(crawler):
def test_can_be_checked_externally(crawler): def test_can_be_checked_externally(crawler):
assert crawler.can_be_checked_externally(db.Record().add_property('a', 123)) assert crawler.can_be_checked_externally(
db.Record().add_property('a', 123))
assert crawler.can_be_checked_externally(db.Record() assert crawler.can_be_checked_externally(db.Record()
.add_property('a', db.Record(id=123))) .add_property('a', db.Record(id=123)))
assert crawler.can_be_checked_externally(db.Record() assert crawler.can_be_checked_externally(db.Record()
......
#!/bin/python
# Tests for variable substitutions
# A. Schlemmer, 05/2022
from newcrawler import Crawler
from newcrawler.structure_elements import File, DictTextElement, DictListElement
from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter
from functools import partial
from copy import deepcopy
from unittest.mock import MagicMock, Mock
from os.path import join, dirname, basename
import yaml
import caosdb as db
from caosdb.apiutils import compare_entities
import pytest
from pytest import raises
def rfp(*pathcomponents):
"""
Return full path.
Shorthand convenience function.
"""
return join(dirname(__file__), *pathcomponents)
def dircheckstr(element_type, *pathcomponents):
"""
Return the debug tree identifier for a given path.
"""
return "newcrawler.structure_elements." + element_type + ": " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "example_substitutions", *pathcomponents)
@pytest.fixture
def crawler():
crawler = Crawler(debug=True)
crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"),
rfp("test_directories", "example_substitutions", "substitutions.yml"))
return crawler
def test_substitutions(crawler):
# @review Florian Spreckelsen 2022-05-13
for i in range(2):
subd = crawler.debug_tree[dircheckstr(
"File", "ExperimentalData", "220512_data.dat")]
assert subd[i]["Experiment"].get_property("date").value == "2022-05-12"
assert isinstance(subd[i]["ExperimentSeries"].get_property(
"Experiment").value, db.Record)
subd = crawler.debug_tree[dircheckstr("Directory", "ExperimentalData")]
assert subd[i]["Project"].name == "project"
assert isinstance(subd[i]["Project"].get_property(
"Experiments").value, list)
assert isinstance(subd[i]["Project"].get_property(
"Experiments").value[0], db.Record)
assert isinstance(subd[i]["Project"].get_property("dates").value, list)
assert subd[i]["Project"].get_property(
"dates").value[0] == "2022-05-12"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment