Select Git revision
test_crawler_with_cfoods.py
-
Daniel Hornung authoredDaniel Hornung authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_crawler_with_cfoods.py 20.44 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2018 Research Group Biomedical Physics,
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
# Copyright (C) 2019,2020 Indiscale GmbH <info@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
import os
import unittest
import linkahead as db
from linkahead.apiutils import retrieve_entity_with_id
def get_entity_with_id(eid):
return db.execute_query("FIND ENTITY "+str(eid), unique=True)
class LoadFilesTest(unittest.TestCase):
def test_lol(self):
# check whether ignored files were insered
res = db.execute_query("FIND FILE WHICH IS STORED AT '**/lol'")
assert len(res) == 0
res = db.execute_query("FIND FILE WHICH IS STORED AT '**/~README.md'")
assert len(res) == 0
class CrawlerTest(unittest.TestCase):
def test_experiment(self):
########################
# # dummy for dependency test experiment # #
########################
exp = db.execute_query(
"FIND ENTITY Experiment with date=2019-02-04 and identifier=empty_identifier",
unique=True)
########################
# # first experiment # #
########################
# TODO saving an empty string as value in a text property leads to a
# vanishing of the property
# thus an x is used here. Needs to be fixed.
exp = db.execute_query(
"FIND ENTITY Experiment with date=2019-02-03 and identifier=empty_identifier",
unique=True)
# There should be a Project with name TestProject which is referenced
project_id = exp.get_property("Project").value
project = get_entity_with_id(project_id)
assert project.name == "2010_TestProject"
assert "Project" in [p.name for p in project.get_parents()]
# There should be a datafile as result attached with path datafile.dat
datfile_id = exp.get_property("results").value[0]
datfile = get_entity_with_id(datfile_id)
self.assertEqual("an example reference to a results file",
datfile.description)
assert os.path.basename(datfile.path) == "datafile.dat"
# There should be two DepthTest Properties
depthtests = exp.get_property("DepthTest")
assert depthtests is not None
assert len(depthtests.value) == 2
depthtest = db.Record(id=depthtests.value[0])
depthtest.retrieve()
assert "DepthTest" in [p.name for p in depthtest.get_parents()]
assert 234.4 == depthtest.get_property("temperature").value
assert "°C" == depthtest.get_property("temperature").unit
assert 3.0 == depthtest.get_property("depth").value
# Should have a responsible person
self.assertIsNotNone(exp.get_property("responsible"))
person = db.Record(id=exp.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("Only", person.get_property("firstname").value)
# Should have a description
self.assertIsNotNone(exp.description)
#########################
# # second experiment # #
#########################
exp = db.execute_query(
"FIND ENTITY Experiment with date=2019-02-03 and identifier='something'",
unique=True)
# Should be the same project
assert project_id == exp.get_property("Project").value
# Should have two data files
datfile_ids = exp.get_property("results").value
datafile_names = []
for did in datfile_ids:
datfile = get_entity_with_id(did)
datafile_names.append(os.path.basename(datfile.path))
assert "usefull.xlsx" in datafile_names
assert "useless.xlsx" in datafile_names
def test_analysis(self):
######################
# # first analysis # #
######################
ana = db.execute_query(
"FIND ENTITY Analysis with date=2019-02-03 and identifier='empty_identifier'",
unique=True)
# There should be a Project with name TestProject which is referenced
project_id = ana.get_property("Project").value
project = get_entity_with_id(project_id)
assert "Project" in [p.name for p in project.get_parents()]
# There should be a file as result attached with path results.pdf
datfile_id = ana.get_property("results").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "results.pdf"
# There should be a file as script attached with path plot.py
datfile_id = ana.get_property("scripts").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "plot.py"
# Should have a responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("Only", person.get_property("firstname").value)
# Should have a description
self.assertIsNotNone(ana.description)
# Should reference an experiment
sources = [get_entity_with_id(el) for el in
ana.get_property("sources").value]
count = 0
for s in sources:
if len(s.parents) > 0 and s.parents[0].name == "Experiment":
count += 1
self.assertEqual(count, 1)
self.assertEqual(len(sources), 2)
first_ana = ana
#######################
# # second analysis # #
#######################
ana = db.execute_query(
"FIND ENTITY Analysis with date=2019-02-03 and identifier='something'",
unique=True)
# Should be the same project
assert project_id == ana.get_property("Project").value
# There should be a file as script attached with path plot.py
datfile_id = ana.get_property("scripts").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "analyse.py"
# Should reference an experiment
sources = [get_entity_with_id(el) for el in
ana.get_property("sources").value]
count = 0
for s in sources:
if len(s.parents) > 0 and s.parents[0].name == "Experiment":
count += 1
self.assertEqual(count, 1)
self.assertEqual(len(sources), 1)
# Should reference the other analysis
self.assertIsNotNone(ana.get_property("revisionOf"))
a = db.Record(id=ana.get_property("revisionOf").value[0])
a.retrieve()
self.assertEqual(a.id, first_ana.id)
def test_simulation(self):
######################
# # first simulation # #
######################
sim = db.execute_query(
"FIND ENTITY Simulation with date=2019-02-03 and identifier='empty_identifier'",
unique=True)
# There should be a Project with name TestProject which is referenced
project_id = sim.get_property("Project").value
project = get_entity_with_id(project_id)
assert "Project" in [p.name for p in project.get_parents()]
# There should be a file as result attached with path results.pdf
datfile_id = sim.get_property("results").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "snapshots.dat"
# There should be a file as script attached with path plot.py
datfile_id = sim.get_property("scripts").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "sim.py"
# Should have a responsible person
self.assertIsNotNone(sim.get_property("responsible"))
person = db.Record(id=sim.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("Only", person.get_property("firstname").value)
# Should have a description
self.assertIsNotNone(sim.description)
#########################
# # second simulation # #
#########################
sim = db.execute_query(
"FIND ENTITY Simulation with date=2019-02-03 and identifier='something'",
unique=True)
sources = [get_entity_with_id(el) for el in
sim.get_property("sources").value]
count = 0
for s in sources:
if len(s.parents) > 0 and s.parents[0].name == "Experiment":
count += 1
self.assertEqual(count, 1)
self.assertEqual(len(sources), 1)
# Should be the same project
assert project_id == sim.get_property("Project").value
# Should have two responsible persons
self.assertIsNotNone(sim.get_property("responsible"))
self.assertEqual(len(sim.get_property("responsible").value), 2)
# TODO enable revisionOf
# Should have revision
# osim = get_entity_with_id(sim.get_property("revisionOf").value[0])
# self.assertEqual(osim.parents[0].name, "Simulation")
# self.assertNotEqual(osim.id, sim.id)
# Should have data file
datfile_id = sim.get_property("results").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "timeseries.npy"
# Should have two scritps files
datfile_ids = sim.get_property("scripts").value
datafile_names = []
for did in datfile_ids:
datfile = get_entity_with_id(did)
datafile_names.append(os.path.basename(datfile.path))
assert "parameters.p" in datafile_names
assert "large_sim.py" in datafile_names
def test_publication(self):
#########################
# # first publication # #
#########################
pub = db.execute_query("FIND ENTITY *really_cool_finding", unique=True)
# There should be a file as result attached with path poster.pdf
datfile_id = pub.get_property("results").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "poster.pdf"
# There should be a file as data attached
datfile_id = pub.get_property("sources").value[1]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "results.pdf"
# Test type
self.assertEqual(pub.parents[0].name, "Poster")
##########################
# # second publication # #
##########################
pub = db.execute_query("FIND ENTITY *paper_on_exciting_stuff ", unique=True)
# Test type
self.assertEqual(pub.parents[0].name, "Thesis")
def test_software(self):
########################
# # overall software # #
########################
sw = db.execute_query("FIND Record Software")
assert len(sw) == 5
ps = db.execute_query("FIND RecordType Software with name!='Software'")
assert len(ps) == 2
##############################
# # first software version # #
##############################
ana = db.execute_query(
"FIND ENTITY Software with version='V1.0-rc1'", unique=True)
sw = db.execute_query(
"FIND ENTITY Software with name='2010_TestSoftware'", unique=True)
assert sw.get_property("alias").value == "TestSoftware"
# The software record should inherit from the correct software
assert sw.id == ana.get_parents()[0].id
assert ana.name == "TestSoftware_V1.0-rc1"
# There should not be a file as binary
self.assertIsNone(ana.get_property("binaries"))
# There should be a file as script attached with path plot.py
datfile = get_entity_with_id(ana.get_property("sourceCode").value[0])
datfile2 = get_entity_with_id(ana.get_property("sourceCode").value[1])
for d in [datfile, datfile2]:
if datfile.path.endswith("analyse.py"):
assert datfile.description == "a simple script"
elif datfile.path.endswith("calc.py"):
assert datfile.description == "some calculation"
else:
raise Exception("unkown file")
# Should have two responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
person2 = db.Record(id=ana.get_property("responsible").value[1])
person2.retrieve()
for fn in ["Second", "First"]:
found = False
for p in [person, person2]:
if p.get_property("firstname").value == fn:
found = True
if not found:
raise Exception("Did not find person")
# Should have a description
self.assertIsNotNone(ana.description)
#######################
# # second software version # #
#######################
ana = db.execute_query(
"FIND ENTITY Software with version='v0.1'", unique=True)
sw = db.execute_query(
"FIND ENTITY Software with name='2010_TestSoftware'", unique=True)
# The software record should inherit from the correct software
assert sw.id == ana.get_parents()[0].id
# The software should have the date
assert "2019-02-03" == ana.get_property("date").value
# There should be a file as binary attached with path release.deb
datfile_id = ana.get_property("binaries").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "example.deb"
# There should be a file as script attached with path plot.py
datfile_id = ana.get_property("sourceCode").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "plot.py"
# Should have a responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("Only", person.get_property("firstname").value)
# Should have a description
assert "example" in ana.description
#######################
# # third software version # #
#######################
ana = db.execute_query(
"FIND ENTITY Software with date='2020-02-04' and not version",
unique=True)
sw = db.execute_query(
"FIND ENTITY Software with name='2020NewProject0X'", unique=True)
# The software record should inherit from the correct software
assert sw.id == ana.get_parents()[0].id
# The software should have the date
assert "2020-02-04" == ana.get_property("date").value
# There should be a file as binary attached with path release.deb
datfile_id = ana.get_property("binaries").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "example.deb"
# There should be a file as script attached with path plot.py
datfile_id = ana.get_property("sourceCode").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "plot.py"
# Should have two responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
person2 = db.Record(id=ana.get_property("responsible").value[1])
person2.retrieve()
for fn in ["Some", "No"]:
found = False
for p in [person, person2]:
if p.get_property("firstname").value == fn:
found = True
if not found:
raise Exception("Did not find person")
# Should have a description
assert "example" in ana.description
#######################
# # fourth software version # #
#######################
ana = db.execute_query(
"FIND ENTITY Software with date='2020-02-03' and not version",
unique=True)
sw = db.execute_query(
"FIND ENTITY Software with name='2020NewProject0X'", unique=True)
assert sw.get_property("alias").value == "NewProject0X"
# The software record should inherit from the correct software
assert sw.id == ana.get_parents()[0].id
# The software should have the date
assert "2020-02-03" == ana.get_property("date").value
# There should not be a file as binary
self.assertIsNone(ana.get_property("binaries"))
# There should be a file as script attached with path plot.py
datfile = get_entity_with_id(ana.get_property("sourceCode").value[0])
datfile2 = get_entity_with_id(ana.get_property("sourceCode").value[1])
for d in [datfile, datfile2]:
if datfile.path.endswith("plot.py"):
assert datfile.description == "a plotting script"
elif datfile.path.endswith("calc.py"):
assert datfile.description == "a calc script"
else:
raise Exception("unkown file")
# Should have a responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("Only", person.get_property("firstname").value)
self.assertEqual("Responsible", person.get_property("lastname").value)
# Should have a description
assert "example" in ana.description
##############################
# # fifth software version # #
##############################
ana = db.execute_query(
"FIND ENTITY Software with version='second'", unique=True)
sw = db.execute_query(
"FIND ENTITY Software with name='2020NewProject0X'", unique=True)
assert sw.get_property("alias").value == "NewProject0X"
# The software record should inherit from the correct software
assert sw.id == ana.get_parents()[0].id
assert ana.name == "NewProject0X_second"
# There should be a file as binary attached with path release.deb
datfile_id = ana.get_property("binaries").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "release.deb"
# There should be a file as script attached with path plot.py
datfile_id = ana.get_property("sourceCode").value[0]
datfile = get_entity_with_id(datfile_id)
assert os.path.basename(datfile.path) == "analyse.py"
# Should have a responsible person
self.assertIsNotNone(ana.get_property("responsible"))
person = db.Record(id=ana.get_property("responsible").value[0])
person.retrieve()
self.assertEqual("First", person.get_property("firstname").value)
# Should have a description
self.assertIsNotNone(ana.description)
def test_exampleh5(self):
examp = db.execute_query("FIND Record ExampleH5", unique=True)
for prop in examp.properties:
if prop.name == 'group_level1_a':
self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level2_aa") is not None)
self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level1_a") is None)
elif prop.name == 'group_level1_b':
self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_b_floats") is not None)
elif prop.name == 'group_level1_c':
self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_c_floats") is not None)
elif prop.name == 'root_integers':
self.assertTrue(retrieve_entity_with_id(prop.value).get_property("single_attribute") is not None)