Select Git revision
test_file.py
-
Timm Fitschen authoredTimm Fitschen authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_tool.py 10.29 KiB
#!/bin/python
# Tests for the tool using pytest
# Adapted from check-sfs
# A. Schlemmer, 06/2021
from newcrawler import Crawler
from newcrawler.converters import MarkdownFileConverter
from newcrawler.structure_elements import File, DictTextElement, DictListElement
from newcrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
from os.path import join, dirname, basename
import yaml
import caosdb as db
from caosdb.apiutils import compare_entities
from pytest import raises
# Some notes:
# Track provenance information in two ways:
# - DONE: provenance in structure elements and converters for properties of records
# - TODO: list whether information from structure elements and converters was used
def rfp(*pathcomponents):
"""
Return full path.
Shorthand convenience function.
"""
return join(dirname(__file__), *pathcomponents)
def dircheckstr(*pathcomponents):
"""
Return the debug tree identifier for a given path.
"""
return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
def test_crawler():
crawler = Crawler(debug=True)
crawler.crawl_directory(rfp("test_directories", "examples_article"),
rfp("scifolder_cfood.yml"))
subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
assert len(subd) == 2
assert len(subd[0]) == 0
assert len(subd[1]) == 0
assert len(subc) == 2
assert len(subc[0]) == 0
assert len(subc[1]) == 0
subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict")]
assert len(subd[1]) == 1
assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
assert len(subd[0]) == 3
assert subd[0]["date"] == "2020"
assert subd[0]["identifier"] == "climate-model-predict"
assert subd[0]["Project"].__class__ == db.Record
# Check the copy flags for the first level in the hierarchy:
assert len(subc[0]) == 3
assert len(subc[1]) == 1
assert subc[1]["Project"] is False
assert subc[0]["Project"] is False
assert subc[0]["date"] is False
assert subc[0]["identifier"] is False
subd = crawler.debug_tree[dircheckstr("DataAnalysis",
"2020_climate-model-predict",
"2020-02-08_prediction-errors")]
subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
"2020_climate-model-predict",
"2020-02-08_prediction-errors")]
assert len(subd[0]) == 4
assert subd[0]["date"] == "2020-02-08"
assert subd[0]["identifier"] == "prediction-errors"
assert subd[0]["Project"].__class__ == db.Record
assert subd[0]["Measurement"].__class__ == db.Record
assert len(subd[1]) == 2
assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
assert len(subd[1]["Measurement"].get_parents()) == 1
assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors"
assert subd[1]["Measurement"].get_property("project").value != "$Project"
assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
# Check the copy flags for the second level in the hierarchy:
assert subc[1]["Project"] is True
assert subc[0]["Project"] is True
assert subc[1]["Measurement"] is False
assert subc[0]["Measurement"] is False
assert subc[0]["date"] is False
assert subc[0]["identifier"] is False
def test_markdown_converter():
test_readme = File("README.md", rfp(
"test_directories", "examples_article", "DataAnalysis",
"2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"))
converter = MarkdownFileConverter({
"match": "(.*)"
}, "TestMarkdownFileConverter")
m = converter.match(File("test_tool.py", rfp(
"test_tool.py")))
assert m is None
m = converter.match(test_readme)
assert m is not None
assert m.__class__ == dict
assert len(m) == 0
converter = MarkdownFileConverter({
"match": "README.md"
}, "TestMarkdownFileConverter")
m = converter.match(test_readme)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme)
assert len(children) == 5
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictTextElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == str
test_readme2 = File("README.md", rfp("test_directories", "examples_article", "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))
m = converter.match(test_readme2)
assert m is not None
assert len(m) == 0
children = converter.create_children(None, test_readme2)
assert len(children) == 2
assert children[1].__class__ == DictTextElement
assert children[1].name == "description"
assert children[1].value.__class__ == str
assert children[0].__class__ == DictListElement
assert children[0].name == "responsible"
assert children[0].value.__class__ == list
def test_crawler_update_list():
crawler = Crawler(debug=True)
crawler.crawl_directory(rfp("test_directories", "examples_article"),
rfp("scifolder_cfood.yml"))
# assert len(crawler.updateList) == 8
ident = LocalStorageIdentifiableAdapter()
# This code was used to generate a static records.xml once:
# ident.get_records().extend(crawler.updateList)
# ident.store_state(rfp("records.xml"))
ident.restore_state(rfp("records.xml"))
assert len(ident.get_records()) == len(crawler.updateList)
ident.register_identifiable(
"Person", db.RecordType()
.add_parent(name="Person")
.add_property(name="first_name")
.add_property(name="last_name"))
ident.register_identifiable(
"Measurement", db.RecordType()
.add_parent(name="Measurement")
.add_property(name="identifier")
.add_property(name="date")
.add_property(name="project"))
ident.register_identifiable(
"Project", db.RecordType()
.add_parent(name="Project")
.add_property(name="date")
.add_property(name="identifier"))
curind = 0
r = ident.get_records()
id_r0 = ident.get_identifiable(r[0])
assert r[curind].parents[0].name == id_r0.parents[0].name
assert r[curind].get_property("first_name").value == id_r0.get_property("first_name").value
assert r[curind].get_property("last_name").value == id_r0.get_property("last_name").value
assert len(r[curind].parents) == 1
assert len(id_r0.parents) == 1
assert len(r[curind].properties) == 2
assert len(id_r0.properties) == 2
with raises(RuntimeError, match=".*unambigiously.*"):
ident.retrieve_identified_record(id_r0)
# clean record list:
recordlist = ident.get_records()
for i in range(len(recordlist)-1, 1, -1):
if recordlist[i].parents[0].name == "Person":
del recordlist[i]
idr_r0_test = ident.retrieve_identified_record(id_r0)
idr_r0 = ident.retrieve_identifiable(r[curind])
assert idr_r0 == idr_r0_test
curind += 1
r = ident.get_records()
id_r1 = ident.get_identifiable(r[curind])
# print(r[curind])
# print(id_r1)
assert r[curind].parents[0].name == id_r1.parents[0].name
assert r[curind].get_property("identifier").value == id_r1.get_property("identifier").value
assert r[curind].get_property("date").value == id_r1.get_property("date").value
assert r[curind].get_property("project").value == id_r1.get_property("project").value
assert len(r[curind].parents) == 1
assert len(id_r1.parents) == 1
assert len(r[curind].properties) == 5
assert len(id_r1.properties) == 3
idr_r1_test = ident.retrieve_identified_record(id_r1)
idr_r1 = ident.retrieve_identifiable(r[curind])
assert idr_r1 == idr_r1_test
assert idr_r1 != idr_r0
assert idr_r1_test != idr_r0_test
assert len(idr_r1.properties) == 5
assert r[curind].get_property("responsible").value == idr_r1.get_property("responsible").value
assert r[curind].get_property("description").value == idr_r1.get_property("description").value
# test whether compare_entites function works in this context:
comp = compare_entities(r[curind], id_r1)
assert len(comp[0]["parents"]) == 0
assert len(comp[1]["parents"]) == 0
assert len(comp[0]["properties"]) == 2
assert len(comp[1]["properties"]) == 0
assert "responsible" in comp[0]["properties"]
assert "description" in comp[0]["properties"]
comp = compare_entities(r[curind], idr_r1)
assert len(comp[0]["parents"]) == 0
assert len(comp[1]["parents"]) == 0
assert len(comp[0]["properties"]) == 0
assert len(comp[1]["properties"]) == 0
def test_provenance_debug_data():
crawler = Crawler(debug=True)
crawler.crawl_directory(rfp("test_directories", "examples_article"),
rfp("scifolder_cfood.yml"))
crawler.save_debug_data(rfp("provenance.yml"))
with open(rfp("provenance.yml"), "r") as f:
provenance = yaml.load(f, Loader=yaml.SafeLoader)
pr = provenance["provenance"]
def check_key_count(prefix):
return sum([1 for key in pr.keys() if key.startswith(prefix)])
assert check_key_count("Measurement") == 11
assert check_key_count("Project") == 5
assert check_key_count("Person") == 14