Skip to content
Snippets Groups Projects
Select Git revision
  • bb7cb3a88eb6e38a7a1216a009911a4345f0bcb6
  • main default protected
  • f-pipeline-timeout
  • dev protected
  • f-fix-accent-sensitivity
  • f-filesystem-import
  • f-update-acl
  • f-filesystem-link
  • f-filesystem-directory
  • f-filesystem-core
  • f-filesystem-cleanup
  • f-string-ids
  • f-filesystem-main
  • f-multipart-encoding
  • f-trigger-advanced-user-tools
  • f-real-rename-test-pylibsolo2
  • f-real-rename-test-pylibsolo
  • f-real-rename-test
  • f-linkahead-rename
  • f-reference-record
  • f-xml-serialization
  • linkahead-pylib-v0.18.0
  • linkahead-control-v0.16.0
  • linkahead-pylib-v0.17.0
  • linkahead-mariadbbackend-v8.0.0
  • linkahead-server-v0.13.0
  • caosdb-pylib-v0.15.0
  • caosdb-pylib-v0.14.0
  • caosdb-pylib-v0.13.2
  • caosdb-server-v0.12.1
  • caosdb-pylib-v0.13.1
  • caosdb-pylib-v0.12.0
  • caosdb-server-v0.10.0
  • caosdb-pylib-v0.11.1
  • caosdb-pylib-v0.11.0
  • caosdb-server-v0.9.0
  • caosdb-pylib-v0.10.0
  • caosdb-server-v0.8.1
  • caosdb-pylib-v0.8.0
  • caosdb-server-v0.8.0
  • caosdb-pylib-v0.7.2
41 results

test_file.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_tool.py 10.29 KiB
    #!/bin/python
    # Tests for the tool using pytest
    # Adapted from check-sfs
    # A. Schlemmer, 06/2021
    
    from newcrawler import Crawler
    from newcrawler.converters import MarkdownFileConverter
    from newcrawler.structure_elements import File, DictTextElement, DictListElement
    from newcrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
    from os.path import join, dirname, basename
    import yaml
    import caosdb as db
    from caosdb.apiutils import compare_entities
    
    from pytest import raises
    
    # Some notes:
    # Track provenance information in two ways:
    # - DONE: provenance in structure elements and converters for properties of records
    # - TODO: list whether information from structure elements and converters was used
    
    def rfp(*pathcomponents):
        """
        Return full path.
        Shorthand convenience function.
        """
        return join(dirname(__file__), *pathcomponents)
    
    def dircheckstr(*pathcomponents):
        """
        Return the debug tree identifier for a given path.
        """
        return "newcrawler.structure_elements.Directory: " + basename(join(*pathcomponents)) + ", " + rfp("test_directories", "examples_article", *pathcomponents)
    
    def test_crawler():
        crawler = Crawler(debug=True)
        crawler.crawl_directory(rfp("test_directories", "examples_article"),
                  rfp("scifolder_cfood.yml"))
        
        subd = crawler.debug_tree[dircheckstr("DataAnalysis")]
        subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")]
        assert len(subd) == 2
        assert len(subd[0]) == 0
        assert len(subd[1]) == 0
        assert len(subc) == 2
        assert len(subc[0]) == 0
        assert len(subc[1]) == 0
        
        subd = crawler.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
        subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict")]
        
        assert len(subd[1]) == 1
        assert len(subd[1]["Project"].get_parents()) == 1
        assert subd[1]["Project"].get_parents()[0].name == "Project"
        assert subd[1]["Project"].get_property("date").value == "2020"
        assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
        
        assert len(subd[0]) == 3
        assert subd[0]["date"] == "2020"
        assert subd[0]["identifier"] == "climate-model-predict"
        assert subd[0]["Project"].__class__ == db.Record
    
        # Check the copy flags for the first level in the hierarchy:
        assert len(subc[0]) == 3
        assert len(subc[1]) == 1
        assert subc[1]["Project"] is False
        assert subc[0]["Project"] is False
        assert subc[0]["date"] is False
        assert subc[0]["identifier"] is False
    
        
    
        subd = crawler.debug_tree[dircheckstr("DataAnalysis",
                                              "2020_climate-model-predict",
                                              "2020-02-08_prediction-errors")]
        subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis",
                                              "2020_climate-model-predict",
                                              "2020-02-08_prediction-errors")]
        assert len(subd[0]) == 4
        assert subd[0]["date"] == "2020-02-08"
        assert subd[0]["identifier"] == "prediction-errors"
        assert subd[0]["Project"].__class__ == db.Record
        assert subd[0]["Measurement"].__class__ == db.Record
    
        assert len(subd[1]) == 2
        
        assert len(subd[1]["Project"].get_parents()) == 1
        assert subd[1]["Project"].get_parents()[0].name == "Project"
        assert subd[1]["Project"].get_property("date").value == "2020"
        assert subd[1]["Project"].get_property("identifier").value == "climate-model-predict"
    
        assert len(subd[1]["Measurement"].get_parents()) == 1
        assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
        assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
        assert subd[1]["Measurement"].get_property("identifier").value == "prediction-errors"
        assert subd[1]["Measurement"].get_property("project").value != "$Project"
        assert subd[1]["Measurement"].get_property("project").value.__class__ == db.Record
        assert subd[1]["Measurement"].get_property("project").value == subd[0]["Project"]
    
        # Check the copy flags for the second level in the hierarchy:
        assert subc[1]["Project"] is True
        assert subc[0]["Project"] is True
        assert subc[1]["Measurement"] is False
        assert subc[0]["Measurement"] is False
        assert subc[0]["date"] is False
        assert subc[0]["identifier"] is False
    
    def test_markdown_converter():
        test_readme = File("README.md", rfp(
            "test_directories", "examples_article", "DataAnalysis",
            "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"))
        
        converter = MarkdownFileConverter({
            "match": "(.*)"
        }, "TestMarkdownFileConverter")
    
        m = converter.match(File("test_tool.py", rfp(
            "test_tool.py")))
        assert m is None
    
        m = converter.match(test_readme)
        assert m is not None
        assert m.__class__ == dict
        assert len(m) == 0
    
        converter = MarkdownFileConverter({
            "match": "README.md"
        }, "TestMarkdownFileConverter")
    
        m = converter.match(test_readme)
        assert m is not None
        assert len(m) == 0
    
        children = converter.create_children(None, test_readme)
        assert len(children) == 5
        assert children[1].__class__ == DictTextElement
        assert children[1].name == "description"
        assert children[1].value.__class__ == str
    
        assert children[0].__class__ == DictTextElement
        assert children[0].name == "responsible"
        assert children[0].value.__class__ == str
    
        test_readme2 = File("README.md", rfp("test_directories", "examples_article", "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md"))
    
        m = converter.match(test_readme2)
        assert m is not None
        assert len(m) == 0
    
        children = converter.create_children(None, test_readme2)
        assert len(children) == 2
        assert children[1].__class__ == DictTextElement
        assert children[1].name == "description"
        assert children[1].value.__class__ == str
    
        assert children[0].__class__ == DictListElement
        assert children[0].name == "responsible"
        assert children[0].value.__class__ == list
    
    def test_crawler_update_list():
        crawler = Crawler(debug=True)
        crawler.crawl_directory(rfp("test_directories", "examples_article"),
                                rfp("scifolder_cfood.yml"))
    
        # assert len(crawler.updateList) == 8
    
        ident = LocalStorageIdentifiableAdapter()
    
        # This code was used to generate a static records.xml once:
        # ident.get_records().extend(crawler.updateList)
        # ident.store_state(rfp("records.xml"))
    
        ident.restore_state(rfp("records.xml"))
        assert len(ident.get_records()) == len(crawler.updateList)
    
        ident.register_identifiable(
            "Person", db.RecordType()
            .add_parent(name="Person")
            .add_property(name="first_name")
            .add_property(name="last_name"))
        ident.register_identifiable(
            "Measurement", db.RecordType()
            .add_parent(name="Measurement")
            .add_property(name="identifier")
            .add_property(name="date")
            .add_property(name="project"))
        ident.register_identifiable(
            "Project", db.RecordType()
            .add_parent(name="Project")
            .add_property(name="date")
            .add_property(name="identifier"))
    
        curind = 0
        r = ident.get_records()
        id_r0 = ident.get_identifiable(r[0])
        assert r[curind].parents[0].name == id_r0.parents[0].name
        assert r[curind].get_property("first_name").value == id_r0.get_property("first_name").value
        assert r[curind].get_property("last_name").value == id_r0.get_property("last_name").value
        assert len(r[curind].parents) == 1
        assert len(id_r0.parents) == 1
        assert len(r[curind].properties) == 2
        assert len(id_r0.properties) == 2
    
        with raises(RuntimeError, match=".*unambigiously.*"):
            ident.retrieve_identified_record(id_r0)
    
        # clean record list:
        recordlist = ident.get_records()
        for i in range(len(recordlist)-1, 1, -1):
            if recordlist[i].parents[0].name == "Person":
                del recordlist[i]
    
        idr_r0_test = ident.retrieve_identified_record(id_r0)
        idr_r0 = ident.retrieve_identifiable(r[curind])
        assert idr_r0 == idr_r0_test
    
        curind += 1
        r = ident.get_records()
        id_r1 = ident.get_identifiable(r[curind])
        # print(r[curind])
        # print(id_r1)
        assert r[curind].parents[0].name == id_r1.parents[0].name
        assert r[curind].get_property("identifier").value == id_r1.get_property("identifier").value
        assert r[curind].get_property("date").value == id_r1.get_property("date").value
        assert r[curind].get_property("project").value == id_r1.get_property("project").value
        assert len(r[curind].parents) == 1
        assert len(id_r1.parents) == 1
        assert len(r[curind].properties) == 5
        assert len(id_r1.properties) == 3
    
        idr_r1_test = ident.retrieve_identified_record(id_r1)
        idr_r1 = ident.retrieve_identifiable(r[curind])
        assert idr_r1 == idr_r1_test
        assert idr_r1 != idr_r0
        assert idr_r1_test != idr_r0_test
    
        assert len(idr_r1.properties) == 5
        assert r[curind].get_property("responsible").value == idr_r1.get_property("responsible").value
        assert r[curind].get_property("description").value == idr_r1.get_property("description").value
    
        # test whether compare_entites function works in this context:
        comp = compare_entities(r[curind], id_r1)
        assert len(comp[0]["parents"]) == 0
        assert len(comp[1]["parents"]) == 0
        assert len(comp[0]["properties"]) == 2
        assert len(comp[1]["properties"]) == 0
        assert "responsible" in comp[0]["properties"]
        assert "description" in comp[0]["properties"]
    
        comp = compare_entities(r[curind], idr_r1)
        assert len(comp[0]["parents"]) == 0
        assert len(comp[1]["parents"]) == 0
        assert len(comp[0]["properties"]) == 0
        assert len(comp[1]["properties"]) == 0
        
    
    
    def test_provenance_debug_data():
        crawler = Crawler(debug=True)
        crawler.crawl_directory(rfp("test_directories", "examples_article"),
                                rfp("scifolder_cfood.yml"))
        crawler.save_debug_data(rfp("provenance.yml"))
    
        with open(rfp("provenance.yml"), "r") as f:
            provenance = yaml.load(f, Loader=yaml.SafeLoader)
    
        pr = provenance["provenance"]
    
        def check_key_count(prefix):
            return sum([1 for key in pr.keys() if key.startswith(prefix)])
        assert check_key_count("Measurement") == 11
        assert check_key_count("Project") == 5
        assert check_key_count("Person") == 14