Select Git revision
test_scanner.py
-
Henrik tom Wörden authored
- remove rfp - consolidate dircheckstr - remove usage of relatively large dataset that is controled by LocalStorageIdentifiableAdapter for unittests
Henrik tom Wörden authored- remove rfp - consolidate dircheckstr - remove usage of relatively large dataset that is controled by LocalStorageIdentifiableAdapter for unittests
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_scanner.py 7.17 KiB
import json
import logging
import os
import warnings
from copy import deepcopy
from functools import partial
from os.path import basename, dirname, join
from pathlib import Path
from tempfile import NamedTemporaryFile
from unittest.mock import MagicMock, Mock, patch
import caosdb as db
import caosdb.common.models as dbmodels
import pytest
import yaml
from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
crawler_main, split_restricted_path)
from caoscrawler.debug_tree import DebugTree
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
IdentifiableAdapter,
LocalStorageIdentifiableAdapter)
from caoscrawler.scanner import (create_converter_registry, load_definition,
scan_directory, scan_structure_elements)
from caoscrawler.stores import GeneralStore, RecordStore
from caoscrawler.structure_elements import (DictElement, DictListElement,
DictTextElement, File)
from caosdb.apiutils import compare_entities
from caosdb.cached import cache_clear
from caosdb.exceptions import EmptyUniqueQueryError
from pytest import raises
from utils import dircheckstr as dircheck_base
UNITTESTDIR = Path(__file__).parent
dircheckstr = partial(dircheck_base, UNITTESTDIR/"test_directories" / "examples_article")
def test_scan_structure_elements():
tmpfi = NamedTemporaryFile(delete=False)
with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
data = yaml.load(f, Loader=yaml.SafeLoader)
crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
converter_registry = create_converter_registry(crawler_definition)
recs = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
converter_registry)
assert len(recs) == 4
def test_provenance_debug_data():
# TODO rewrite the test to use a smaller example setup
tmpfi = NamedTemporaryFile(delete=False)
debug_tree = DebugTree()
with open(UNITTESTDIR/"example_datastructure.yml", "r") as f:
data = yaml.load(f, Loader=yaml.SafeLoader)
crawler_definition = load_definition(UNITTESTDIR/"example_cfood.yml")
converter_registry = create_converter_registry(crawler_definition)
stuff = scan_structure_elements(DictElement(name="", value=data), crawler_definition,
converter_registry, debug_tree=debug_tree)
crawler = Crawler()
crawler.save_debug_data(tmpfi.name, debug_tree)
with open(tmpfi.name, "r") as f:
provenance = yaml.load(f, Loader=yaml.SafeLoader)
pr = provenance["provenance"]
def check_key_count(prefix):
return sum([1 for key in pr.keys() if key.startswith(prefix)])
assert check_key_count("Ent") == 4
def test_record_structure_generation():
# TODO create a test from this that tests scan_structure
# the cfood should be minimal but cover typical scenarios (e.g. children)
# add also a minimal test for scan_directory; it can be very basic since the only difference
# to scan_structure is the kind of starting structure_element (check this statement)
# The test should not check debug tree output but actual created records
# TODO test creation of debug information in a separate test
dbt = DebugTree()
scan_directory(UNITTESTDIR/"test_directories" / "examples_article",
UNITTESTDIR/"scifolder_cfood.yml",
debug_tree=dbt)
subd = dbt.debug_tree[dircheckstr("DataAnalysis")]
subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis")]
assert len(subd) == 2
# variables store on Data Analysis node of debug tree
assert len(subd[0]) == 4
# record store on Data Analysis node of debug tree
assert len(subd[1]) == 0
assert len(subc) == 2
assert len(subc[0]) == 4
assert len(subc[1]) == 0
# The data analysis node creates one variable for the node itself:
assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
assert subc[0]["DataAnalysis"] is False
subd = dbt.debug_tree[dircheckstr("DataAnalysis", "2020_climate-model-predict")]
subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis", "2020_climate-model-predict")]
assert len(subd[1]) == 1
assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property(
"identifier").value == "climate-model-predict"
assert len(subd[0]) == 9
assert subd[0]["date"] == "2020"
assert subd[0]["identifier"] == "climate-model-predict"
assert subd[0]["Project"].__class__ == db.Record
assert subd[0]["DataAnalysis"] == "examples_article/DataAnalysis"
assert subc[0]["DataAnalysis"] is True
assert subd[0]["project_dir"] == "examples_article/DataAnalysis/2020_climate-model-predict"
assert subc[0]["project_dir"] is False
# Check the copy flags for the first level in the hierarchy:
assert len(subc[0]) == 9
assert len(subc[1]) == 1
assert subc[1]["Project"] is False
assert subc[0]["Project"] is False
assert subc[0]["date"] is False
assert subc[0]["identifier"] is False
subd = dbt.debug_tree[dircheckstr("DataAnalysis",
"2020_climate-model-predict",
"2020-02-08_prediction-errors")]
subc = dbt.debug_metadata["copied"][dircheckstr("DataAnalysis",
"2020_climate-model-predict",
"2020-02-08_prediction-errors")]
assert len(subd[0]) == 12
assert subd[0]["date"] == "2020-02-08"
assert subd[0]["identifier"] == "prediction-errors"
assert subd[0]["Project"].__class__ == db.Record
assert subd[0]["Measurement"].__class__ == db.Record
assert len(subd[1]) == 2
assert len(subd[1]["Project"].get_parents()) == 1
assert subd[1]["Project"].get_parents()[0].name == "Project"
assert subd[1]["Project"].get_property("date").value == "2020"
assert subd[1]["Project"].get_property(
"identifier").value == "climate-model-predict"
assert len(subd[1]["Measurement"].get_parents()) == 1
assert subd[1]["Measurement"].get_parents()[0].name == "Measurement"
assert subd[1]["Measurement"].get_property("date").value == "2020-02-08"
assert subd[1]["Measurement"].get_property(
"identifier").value == "prediction-errors"
assert subd[1]["Measurement"].get_property("project").value != "$Project"
assert subd[1]["Measurement"].get_property(
"project").value.__class__ == db.Record
assert subd[1]["Measurement"].get_property(
"project").value == subd[0]["Project"]
# Check the copy flags for the second level in the hierarchy:
assert subc[1]["Project"] is True
assert subc[0]["Project"] is True
assert subc[1]["Measurement"] is False
assert subc[0]["Measurement"] is False
assert subc[0]["date"] is False
assert subc[0]["identifier"] is False