Skip to content
Snippets Groups Projects
Select Git revision
  • 2b33eb617616f5aa76bc455b84c209fa4f446e40
  • main default protected
  • dev
  • f-spss-value-label-name
  • f-unmod
  • f-checkidentical
  • f-simple-breakpoint
  • f-new-debug-tree
  • f-existing-file-id
  • f-no-ident
  • f-collect-problems
  • f-refactor-debug-tree
  • v0.13.0
  • v0.12.0
  • v0.11.0
  • v0.10.1
  • v0.10.0
  • v0.9.1
  • v0.9.0
  • v0.8.0
  • v0.7.1
  • v0.7.0
  • v0.6.0
  • v0.5.0
  • v0.4.0
  • v0.3.0
  • v0.2.0
  • v0.1.0
28 results

test_scanner.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_scanner.py 7.17 KiB
    
    import json
    import logging
    import os
    import warnings
    from copy import deepcopy
    from functools import partial
    from os.path import basename, dirname, join
    from pathlib import Path
    from tempfile import NamedTemporaryFile
    from unittest.mock import MagicMock, Mock, patch
    
    import caosdb as db
    import caosdb.common.models as dbmodels
    import pytest
    import yaml
    from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
                                   crawler_main, split_restricted_path)
    from caoscrawler.debug_tree import DebugTree
    from caoscrawler.identifiable import Identifiable
    from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                   IdentifiableAdapter,
                                                   LocalStorageIdentifiableAdapter)
    from caoscrawler.scanner import (create_converter_registry, load_definition,
                                     scan_directory, scan_structure_elements)
    from caoscrawler.stores import GeneralStore, RecordStore
    from caoscrawler.structure_elements import (DictElement, DictListElement,
                                                DictTextElement, File)
    from caosdb.apiutils import compare_entities
    from caosdb.cached import cache_clear
    from caosdb.exceptions import EmptyUniqueQueryError
    from pytest import raises
    
    from utils import dircheckstr as dircheck_base
    
    UNITTESTDIR = Path(__file__).parent
    
    dircheckstr = partial(dircheck_base, UNITTESTDIR/"test_directories" / "examples_article")
    
    
    def test_scan_structure_elements():
        tmpfi = NamedTemporaryFile(delete=False)
        with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
            data = yaml.load(f, Loader=yaml.SafeLoader)
    
        crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
        converter_registry = create_converter_registry(crawler_definition)
        recs = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
                                       converter_registry)
        assert len(recs) == 4
    
    
    def test_provenance_debug_data():
        # TODO rewrite the test to use a smaller example setup
        tmpfi = NamedTemporaryFile(delete=False)
        debug_tree = DebugTree()
        with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
            data = yaml.load(f, Loader=yaml.SafeLoader)
    
        crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
        converter_registry = create_converter_registry(crawler_definition)
        stuff = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
                                        converter_registry, debug_tree=debug_tree)
        crawler = Crawler()
        crawler.save_debug_data(tmpfi.name, debug_tree)
        with open(tmpfi.name, "r") as f:
            provenance = yaml.load(f, Loader=yaml.SafeLoader)
    
        pr = provenance["provenance"]
    
        def check_key_count(prefix):
            return sum([1 for key in pr.keys() if key.startswith(prefix)])
        assert check_key_count("Ent") == 4
    
    
    def test_record_structure_generation():
        # TODO create a test from this that tests scan_structure
        # the cfood should be minimal but cover typical scenarios (e.g. children)
        # add also a minimal test for scan_directory; it can be very basic since the only difference
        # to scan_structure is the kind of starting structure_element (check this statement)
        # The test should not check debug tree output but actual created records
    
        # TODO test creation of debug information in a separate test
    
        dbt = DebugTree()
        scan_directory(UNITTESTDIR/"test_directories" / "examples_article",
                       UNITTESTDIR/"scifolder_cfood.yml",
                       debug_tree=dbt)
        subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
        subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")]
        assert len(subd) == 2
        # variables store on Data Analysis node of debug tree
        assert len(subd[0]) == 4
        # record store on Data Analysis node of debug tree
        assert len(subd[1]) == 0
        assert len(subc) == 2
        assert len(subc[0]) == 4
        assert len(subc[1]) == 0
    
        # The data analysis node creates one variable for the node itself:
        assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
        assert subc[0]["DataAnalysis"] is False
    
        subd = dbt.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
        subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict")]
    
        assert len(subd[1]) == 1
        assert len(subd[1]["Project"].get_parents()) == 1
        assert subd[1]["Project"].get_parents()[0].name == "Project"
        assert subd[1]["Project"].get_property("date").value == "2020"
        assert subd[1]["Project"].get_property(
            "identifier").value == "climate-model-predict"
    
        assert len(subd[0]) == 9
        assert subd[0]["date"] == "2020"
        assert subd[0]["identifier"] == "climate-model-predict"
        assert subd[0]["Project"].__class__ == db.Record
    
        assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
        assert subc[0]["DataAnalysis"] is True
        assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict"
        assert subc[0]["project_dir"] is False
    
        # Check the copy flags for the first level in the hierarchy:
        assert len(subc[0]) == 9
        assert len(subc[1]) == 1
        assert subc[1]["Project"] is False
        assert subc[0]["Project"] is False
        assert subc[0]["date"] is False
        assert subc[0]["identifier"] is False
    
        subd = dbt.debug_tree[dircheckstr("DataAnalysis",
                                          "2020_climate-model-predict",
                                          "2020-02-08_prediction-errors")]
        subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis",
                                                        "2020_climate-model-predict",
                                                        "2020-02-08_prediction-errors")]
        assert len(subd[0]) == 12
        assert subd[0]["date"] == "2020-02-08"
        assert subd[0]["identifier"] == "prediction-errors"
        assert subd[0]["Project"].__class__ == db.Record
        assert subd[0]["Measurement"].__class__ == db.Record
    
        assert len(subd[1]) == 2
    
        assert len(subd[1]["Project"].get_parents()) == 1
        assert subd[1]["Project"].get_parents()[0].name == "Project"
        assert subd[1]["Project"].get_property("date").value == "2020"
        assert subd[1]["Project"].get_property(
            "identifier").value == "climate-model-predict"
    
        assert len(subd[1]["Measurement"].get_parents()) == 1
        assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
        assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
        assert subd[1]["Measurement"].get_property(
            "identifier").value == "prediction-errors"
        assert subd[1]["Measurement"].get_property("project").value != "$Project"
        assert subd[1]["Measurement"].get_property(
            "project").value.__class__ == db.Record
        assert subd[1]["Measurement"].get_property(
            "project").value == subd[0]["Project"]
    
        # Check the copy flags for the second level in the hierarchy:
        assert subc[1]["Project"] is True
        assert subc[0]["Project"] is True
        assert subc[1]["Measurement"] is False
        assert subc[0]["Measurement"] is False
        assert subc[0]["date"] is False
        assert subc[0]["identifier"] is False