Skip to content
Snippets Groups Projects
Select Git revision
  • 2266975978a761d8b38174d79ce160c4161c5084
  • main default protected
  • f-yaml-parser-enums
  • dev protected
  • f-fix-paths
  • f-fix-validate-to-dict
  • f-labfolder-converter
  • f-state-machine-script
  • f-xlsx-converter-warnings-errors
  • f-rename
  • f-extra-deps
  • f-more-jsonschema-export
  • f-henrik
  • f-fix-89
  • f-trigger-advanced-user-tools
  • f-real-rename-test
  • f-linkahead-rename
  • f-register-integrationtests
  • f-fix-id
  • f-h5-files
  • f-json-schema
  • v0.14.0
  • v0.13.0
  • v0.12.0
  • v0.11.0
  • v0.10.0-numpy2
  • v0.10.0
  • v0.9.0
  • v0.8.0
  • v0.7.0
  • v0.6.1
  • v0.6.0
  • v0.5.0
  • v0.4.1
  • v0.4.0
  • v0.3.1
  • v0.3.0
37 results

test_crawler_with_cfoods.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_crawler_with_cfoods.py 20.44 KiB
    #!/usr/bin/env python3
    # encoding: utf-8
    #
    # ** header v3.0
    # This file is a part of the LinkAhead Project.
    #
    # Copyright (C) 2018 Research Group Biomedical Physics,
    # Max-Planck-Institute for Dynamics and Self-Organization Göttingen
    # Copyright (C) 2019,2020 Indiscale GmbH <info@indiscale.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    # ** end header
    import os
    import unittest
    
    import linkahead as db
    from linkahead.apiutils import retrieve_entity_with_id
    
    
    def get_entity_with_id(eid):
        return db.execute_query("FIND ENTITY "+str(eid), unique=True)
    
    
    class LoadFilesTest(unittest.TestCase):
        def test_lol(self):
            # check whether ignored files were insered
            res = db.execute_query("FIND FILE WHICH IS STORED AT '**/lol'")
            assert len(res) == 0
            res = db.execute_query("FIND FILE WHICH IS STORED AT '**/~README.md'")
            assert len(res) == 0
    
    
    class CrawlerTest(unittest.TestCase):
        def test_experiment(self):
    
            ########################
            # # dummy for dependency test experiment # #
            ########################
            exp = db.execute_query(
                "FIND ENTITY Experiment with date=2019-02-04 and identifier=empty_identifier",
                unique=True)
    
            ########################
            # # first experiment # #
            ########################
            # TODO saving an empty string as value in a text property leads to a
            # vanishing of the property
            # thus an x is used here. Needs to be fixed.
            exp = db.execute_query(
                "FIND ENTITY Experiment with date=2019-02-03 and identifier=empty_identifier",
                unique=True)
    
            # There should be a Project with name TestProject which is referenced
            project_id = exp.get_property("Project").value
            project = get_entity_with_id(project_id)
            assert project.name == "2010_TestProject"
            assert "Project" in [p.name for p in project.get_parents()]
    
            # There should be a datafile as result attached with path datafile.dat
            datfile_id = exp.get_property("results").value[0]
            datfile = get_entity_with_id(datfile_id)
            self.assertEqual("an example reference to a results file",
                             datfile.description)
            assert os.path.basename(datfile.path) == "datafile.dat"
    
            # There should be two DepthTest Properties
            depthtests = exp.get_property("DepthTest")
            assert depthtests is not None
            assert len(depthtests.value) == 2
            depthtest = db.Record(id=depthtests.value[0])
            depthtest.retrieve()
            assert "DepthTest" in [p.name for p in depthtest.get_parents()]
            assert 234.4 == depthtest.get_property("temperature").value
            assert "°C" == depthtest.get_property("temperature").unit
            assert 3.0 == depthtest.get_property("depth").value
    
            # Should have a responsible person
            self.assertIsNotNone(exp.get_property("responsible"))
            person = db.Record(id=exp.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("Only", person.get_property("firstname").value)
    
            # Should have a description
            self.assertIsNotNone(exp.description)
    
            #########################
            # # second experiment # #
            #########################
            exp = db.execute_query(
                "FIND ENTITY Experiment with date=2019-02-03 and identifier='something'",
                unique=True)
    
            # Should be the same project
            assert project_id == exp.get_property("Project").value
    
            # Should have two data files
            datfile_ids = exp.get_property("results").value
            datafile_names = []
    
            for did in datfile_ids:
                datfile = get_entity_with_id(did)
                datafile_names.append(os.path.basename(datfile.path))
            assert "usefull.xlsx" in datafile_names
            assert "useless.xlsx" in datafile_names
    
        def test_analysis(self):
            ######################
            # # first analysis # #
            ######################
            ana = db.execute_query(
                "FIND ENTITY Analysis with date=2019-02-03 and identifier='empty_identifier'",
                unique=True)
    
            # There should be a Project with name TestProject which is referenced
            project_id = ana.get_property("Project").value
            project = get_entity_with_id(project_id)
            assert "Project" in [p.name for p in project.get_parents()]
    
            # There should be a file as result attached with path results.pdf
            datfile_id = ana.get_property("results").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "results.pdf"
    
            # There should be a file as script attached with path plot.py
            datfile_id = ana.get_property("scripts").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "plot.py"
    
            # Should have a responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("Only", person.get_property("firstname").value)
    
            # Should have a description
            self.assertIsNotNone(ana.description)
    
            # Should reference an experiment
            sources = [get_entity_with_id(el) for el in
                       ana.get_property("sources").value]
            count = 0
    
            for s in sources:
                if len(s.parents) > 0 and s.parents[0].name == "Experiment":
                    count += 1
            self.assertEqual(count, 1)
            self.assertEqual(len(sources), 2)
    
            first_ana = ana
    
            #######################
            # # second analysis # #
            #######################
            ana = db.execute_query(
                "FIND ENTITY Analysis with date=2019-02-03 and identifier='something'",
                unique=True)
    
            # Should be the same project
            assert project_id == ana.get_property("Project").value
    
            # There should be a file as script attached with path plot.py
            datfile_id = ana.get_property("scripts").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "analyse.py"
    
            # Should reference an experiment
            sources = [get_entity_with_id(el) for el in
                       ana.get_property("sources").value]
            count = 0
    
            for s in sources:
                if len(s.parents) > 0 and s.parents[0].name == "Experiment":
                    count += 1
            self.assertEqual(count, 1)
            self.assertEqual(len(sources), 1)
    
            # Should reference the other analysis
            self.assertIsNotNone(ana.get_property("revisionOf"))
            a = db.Record(id=ana.get_property("revisionOf").value[0])
            a.retrieve()
            self.assertEqual(a.id, first_ana.id)
    
        def test_simulation(self):
            ######################
            # # first simulation # #
            ######################
            sim = db.execute_query(
                "FIND ENTITY Simulation with date=2019-02-03 and identifier='empty_identifier'",
                unique=True)
    
            # There should be a Project with name TestProject which is referenced
            project_id = sim.get_property("Project").value
            project = get_entity_with_id(project_id)
            assert "Project" in [p.name for p in project.get_parents()]
    
            # There should be a file as result attached with path results.pdf
            datfile_id = sim.get_property("results").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "snapshots.dat"
    
            # There should be a file as script attached with path plot.py
            datfile_id = sim.get_property("scripts").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "sim.py"
    
            # Should have a responsible person
            self.assertIsNotNone(sim.get_property("responsible"))
            person = db.Record(id=sim.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("Only", person.get_property("firstname").value)
    
            # Should have a description
            self.assertIsNotNone(sim.description)
    
            #########################
            # # second simulation # #
            #########################
            sim = db.execute_query(
                "FIND ENTITY Simulation with date=2019-02-03 and identifier='something'",
                unique=True)
    
            sources = [get_entity_with_id(el) for el in
                       sim.get_property("sources").value]
            count = 0
    
            for s in sources:
                if len(s.parents) > 0 and s.parents[0].name == "Experiment":
                    count += 1
            self.assertEqual(count, 1)
            self.assertEqual(len(sources), 1)
    
            # Should be the same project
            assert project_id == sim.get_property("Project").value
    
            # Should have two responsible persons
            self.assertIsNotNone(sim.get_property("responsible"))
            self.assertEqual(len(sim.get_property("responsible").value), 2)
    
            # TODO enable revisionOf
            # Should have revision
            # osim = get_entity_with_id(sim.get_property("revisionOf").value[0])
            # self.assertEqual(osim.parents[0].name, "Simulation")
            # self.assertNotEqual(osim.id, sim.id)
    
            # Should have data file
            datfile_id = sim.get_property("results").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "timeseries.npy"
    
            # Should have two scritps files
            datfile_ids = sim.get_property("scripts").value
            datafile_names = []
    
            for did in datfile_ids:
                datfile = get_entity_with_id(did)
                datafile_names.append(os.path.basename(datfile.path))
            assert "parameters.p" in datafile_names
            assert "large_sim.py" in datafile_names
    
        def test_publication(self):
            #########################
            # # first publication # #
            #########################
            pub = db.execute_query("FIND ENTITY *really_cool_finding", unique=True)
    
            # There should be a file as result attached with path poster.pdf
            datfile_id = pub.get_property("results").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "poster.pdf"
    
            # There should be a file as data attached
            datfile_id = pub.get_property("sources").value[1]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "results.pdf"
    
            # Test type
            self.assertEqual(pub.parents[0].name, "Poster")
    
            ##########################
            # # second publication # #
            ##########################
            pub = db.execute_query("FIND ENTITY *paper_on_exciting_stuff ", unique=True)
    
            # Test type
            self.assertEqual(pub.parents[0].name, "Thesis")
    
        def test_software(self):
            ########################
            # # overall software # #
            ########################
    
            sw = db.execute_query("FIND Record Software")
            assert len(sw) == 5
    
            ps = db.execute_query("FIND RecordType Software with name!='Software'")
            assert len(ps) == 2
    
            ##############################
            # # first software version # #
            ##############################
            ana = db.execute_query(
                "FIND ENTITY Software with version='V1.0-rc1'", unique=True)
    
            sw = db.execute_query(
                "FIND ENTITY Software with name='2010_TestSoftware'", unique=True)
            assert sw.get_property("alias").value == "TestSoftware"
    
            # The software record should inherit from the correct software
            assert sw.id == ana.get_parents()[0].id
            assert ana.name == "TestSoftware_V1.0-rc1"
    
            # There should not be a file as binary
            self.assertIsNone(ana.get_property("binaries"))
    
            # There should be a file as script attached with path plot.py
            datfile = get_entity_with_id(ana.get_property("sourceCode").value[0])
            datfile2 = get_entity_with_id(ana.get_property("sourceCode").value[1])
    
            for d in [datfile, datfile2]:
                if datfile.path.endswith("analyse.py"):
                    assert datfile.description == "a simple script"
                elif datfile.path.endswith("calc.py"):
                    assert datfile.description == "some calculation"
                else:
                    raise Exception("unkown file")
    
            # Should have two responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            person2 = db.Record(id=ana.get_property("responsible").value[1])
            person2.retrieve()
    
            for fn in ["Second", "First"]:
                found = False
    
                for p in [person, person2]:
                    if p.get_property("firstname").value == fn:
                        found = True
    
                if not found:
                    raise Exception("Did not find person")
    
            # Should have a description
            self.assertIsNotNone(ana.description)
    
            #######################
            # # second software version # #
            #######################
            ana = db.execute_query(
                "FIND ENTITY Software with version='v0.1'", unique=True)
    
            sw = db.execute_query(
                "FIND ENTITY Software with name='2010_TestSoftware'", unique=True)
    
            # The software record should inherit from the correct software
            assert sw.id == ana.get_parents()[0].id
            # The software should have the date
            assert "2019-02-03" == ana.get_property("date").value
    
            # There should be a file as binary attached with path release.deb
            datfile_id = ana.get_property("binaries").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "example.deb"
    
            # There should be a file as script attached with path plot.py
            datfile_id = ana.get_property("sourceCode").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "plot.py"
    
            # Should have a responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("Only", person.get_property("firstname").value)
    
            # Should have a description
            assert "example" in ana.description
    
            #######################
            # # third software version # #
            #######################
            ana = db.execute_query(
                "FIND ENTITY Software with date='2020-02-04' and not version",
                unique=True)
    
            sw = db.execute_query(
                "FIND ENTITY Software with name='2020NewProject0X'", unique=True)
    
            # The software record should inherit from the correct software
            assert sw.id == ana.get_parents()[0].id
            # The software should have the date
            assert "2020-02-04" == ana.get_property("date").value
    
            # There should be a file as binary attached with path release.deb
            datfile_id = ana.get_property("binaries").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "example.deb"
    
            # There should be a file as script attached with path plot.py
            datfile_id = ana.get_property("sourceCode").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "plot.py"
    
            # Should have two responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            person2 = db.Record(id=ana.get_property("responsible").value[1])
            person2.retrieve()
    
            for fn in ["Some", "No"]:
                found = False
    
                for p in [person, person2]:
                    if p.get_property("firstname").value == fn:
                        found = True
    
                if not found:
                    raise Exception("Did not find person")
    
            # Should have a description
            assert "example" in ana.description
    
            #######################
            # # fourth software version # #
            #######################
            ana = db.execute_query(
                "FIND ENTITY Software with date='2020-02-03' and not version",
                unique=True)
    
            sw = db.execute_query(
                "FIND ENTITY Software with name='2020NewProject0X'", unique=True)
            assert sw.get_property("alias").value == "NewProject0X"
    
            # The software record should inherit from the correct software
            assert sw.id == ana.get_parents()[0].id
            # The software should have the date
            assert "2020-02-03" == ana.get_property("date").value
    
            # There should not be a file as binary
            self.assertIsNone(ana.get_property("binaries"))
    
            # There should be a file as script attached with path plot.py
            datfile = get_entity_with_id(ana.get_property("sourceCode").value[0])
            datfile2 = get_entity_with_id(ana.get_property("sourceCode").value[1])
    
            for d in [datfile, datfile2]:
                if datfile.path.endswith("plot.py"):
                    assert datfile.description == "a plotting script"
                elif datfile.path.endswith("calc.py"):
                    assert datfile.description == "a calc script"
                else:
                    raise Exception("unkown file")
    
            # Should have a responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("Only", person.get_property("firstname").value)
            self.assertEqual("Responsible", person.get_property("lastname").value)
    
            # Should have a description
            assert "example" in ana.description
    
            ##############################
            # # fifth software version # #
            ##############################
            ana = db.execute_query(
                "FIND ENTITY Software with version='second'", unique=True)
    
            sw = db.execute_query(
                "FIND ENTITY Software with name='2020NewProject0X'", unique=True)
            assert sw.get_property("alias").value == "NewProject0X"
    
            # The software record should inherit from the correct software
            assert sw.id == ana.get_parents()[0].id
            assert ana.name == "NewProject0X_second"
    
            # There should be a file as binary attached with path release.deb
            datfile_id = ana.get_property("binaries").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "release.deb"
    
            # There should be a file as script attached with path plot.py
            datfile_id = ana.get_property("sourceCode").value[0]
            datfile = get_entity_with_id(datfile_id)
            assert os.path.basename(datfile.path) == "analyse.py"
    
            # Should have a responsible person
            self.assertIsNotNone(ana.get_property("responsible"))
            person = db.Record(id=ana.get_property("responsible").value[0])
            person.retrieve()
            self.assertEqual("First", person.get_property("firstname").value)
    
            # Should have a description
            self.assertIsNotNone(ana.description)
    
        def test_exampleh5(self):
            examp = db.execute_query("FIND Record ExampleH5", unique=True)
    
            for prop in examp.properties:
                if prop.name == 'group_level1_a':
                    self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level2_aa") is not None)
                    self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level1_a") is None)
                elif prop.name == 'group_level1_b':
                    self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_b_floats") is not None)
                elif prop.name == 'group_level1_c':
                    self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_c_floats") is not None)
                elif prop.name == 'root_integers':
                    self.assertTrue(retrieve_entity_with_id(prop.value).get_property("single_attribute") is not None)