Select Git revision
set_permissions.py
-
Alexander Kreft authoredAlexander Kreft authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_realworld_example.py 7.83 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
module description
"""
import json
import os
import caosdb as db
from caoscrawler.crawl import Crawler, crawler_main
from caoscrawler.converters import JSONFileConverter, DictConverter
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.structure_elements import File, JSONFile, Directory
import pytest
from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
#from caosadvancedtools.testutils import clear_database, set_test_key
import sys
# TODO is not yet merged in caosadvancedtools
# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
def rfp(*pathcomponents):
"""
Return full path.
Shorthand convenience function.
"""
return os.path.join(os.path.dirname(__file__), *pathcomponents)
DATADIR = rfp("test_data", "extroot", "realworld_example")
@pytest.fixture
def usemodel():
# First load dataspace data model
dataspace_definitions = parse_model_from_json_schema(
os.path.join(DATADIR, "schema", "dataspace.schema.json"))
dataspace_definitions.sync_data_model(noquestion=True)
# Then general dataset definitions
dataset_definitions = parse_model_from_json_schema(
os.path.join(DATADIR, "schema", "dataset.schema.json"))
dataset_definitions.sync_data_model(noquestion=True)
# Finally, add inheritances as defined in yaml
dataset_inherits = parse_model_from_yaml(
os.path.join(DATADIR, "schema", "dataset-inheritance.yml"))
dataset_inherits.sync_data_model(noquestion=True)
@pytest.fixture
def clear_database():
# TODO(fspreck): Remove once the corresponding advancedtools function can
# be used.
ents = db.execute_query("FIND ENTITY WITH ID>99")
if ents:
ents.delete()
def create_identifiable_adapter():
ident = CaosDBIdentifiableAdapter()
ident.load_from_yaml_definition(os.path.join(DATADIR, "identifiables.yml"))
return ident
def test_dataset(clear_database, usemodel):
ident = create_identifiable_adapter()
crawler = Crawler(identifiableAdapter=ident)
crawler_definition = crawler.load_definition(
os.path.join(DATADIR, "dataset_cfoods.yml"))
# print(json.dumps(crawler_definition, indent=3))
# Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition)
# print("DictIntegerElement" in converter_registry)
records = crawler.start_crawling(
Directory("data", os.path.join(DATADIR, 'data')),
crawler_definition,
converter_registry
)
crawler.synchronize()
dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
"archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
" AND Person", unique=True)
assert dataspace.get_property("start_date").value == "2022-03-01"
db.execute_query("FIND RECORD Person with full_name='Max Schmitt' AND"
" given_name='Max'", unique=True)
dataset = db.execute_query(f"FIND RECORD Dataset with Dataspace={dataspace.id} AND title="
"'Random numbers created on a random autumn day in a random person\\'s office'"
"", unique=True)
assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Person WITH full_name="
"'Alexa Nozone' AND WHICH REFERENCES Person WITH full_name='Max Schmitt'"
"") == 1
assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH "
"start_datetime='2022-02-10T16:36:48+01:00'") == 1
def test_event_update(clear_database, usemodel):
identifiable_path = os.path.join(DATADIR, "identifiables.yml")
crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
# TODO(fspreck): Use crawler_main
crawler_main(
os.path.join(DATADIR, 'data'),
crawler_definition_path,
identifiable_path,
True,
os.path.join(DATADIR, "provenance.yml"),
False,
""
)
old_dataset_rec = db.execute_query(
"FIND RECORD Dataset WHICH HAS AN EVENT WITH location='Bremen, Germany'")
assert len(old_dataset_rec) == 1
old_dataset_rec = old_dataset_rec[0]
assert old_dataset_rec.get_property("Event").datatype == db.LIST("Event")
assert len(old_dataset_rec.get_property("Event").value) == 1
old_event_rec = db.Record(
id=old_dataset_rec.get_property("Event").value[0]).retrieve()
# TODO(fspreck): crawl again manually, edit the event records in the update
# list, synchronize, and test whether the events have been updated.
ident = CaosDBIdentifiableAdapter()
ident.load_from_yaml_definition(identifiable_path)
second_crawler = Crawler(identifiableAdapter=ident)
crawler_definition = second_crawler.load_definition(
crawler_definition_path)
converter_registry = second_crawler.load_converters(crawler_definition)
records = second_crawler.start_crawling(
Directory("data", os.path.join(DATADIR, "data")),
crawler_definition,
converter_registry
)
for rec in records:
if rec.parents[0].name == "Event":
rec.get_property("longitude").value = 0.0
rec.get_property("latitude").value = 0.0
rec.get_property("location").value = "Origin"
elif rec.parents[0].name == "Dataset":
rec.get_property("Event").value[0].get_property(
"longitude").value = 0.0
rec.get_property("Event").value[0].get_property(
"latitude").value = 0.0
rec.get_property("Event").value[0].get_property(
"location").value = "Origin"
second_crawler.synchronize()
# Dataset is still the same Record, but with an updated event
new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve()
for prop in old_dataset_rec.get_properties():
if not prop.name == "Event":
assert new_dataset_rec.get_property(
prop.name).datatype == prop.datatype
assert new_dataset_rec.get_property(
prop.name).value == prop.value
assert new_dataset_rec.get_property("Event").datatype == db.LIST("Event")
assert new_dataset_rec.get_property("Event").value is not None
assert len(new_dataset_rec.get_property("Event").value) == 1
assert new_dataset_rec.get_property("Event").value[0] != old_event_rec.id
# The event has new properties
new_event_rec = db.Record(
id=new_dataset_rec.get_property("Event").value[0]).retrieve()
assert new_event_rec.get_property("longitude").value == 0.0
assert new_event_rec.get_property("latitude").value == 0.0
assert new_event_rec.get_property("location").value == "Origin"
assert new_event_rec.get_property(
"start_datetime").value == old_event_rec.get_property("start_datetime").value