Select Git revision
ext_bottom_line_test_data.py
-
Timm Fitschen authoredTimm Fitschen authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_dataset_crawler.py 3.61 KiB
#!/usr/bin/env python3
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
module description
"""
import json
import os
import caosdb as db
from newcrawler.crawl import Crawler
from newcrawler.converters import JSONFileConverter, DictConverter
from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from newcrawler.structure_elements import File, JSONFile, Directory
from caosadvancedtools.testutils import clear_database, set_test_key
import sys
from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
def rfp(*pathcomponents):
"""
Return full path.
Shorthand convenience function.
"""
return os.path.join(os.path.dirname(__file__), *pathcomponents)
DATADIR = rfp("..", "test_data", "extroot", "realworld_example")
@pytest.fixture
def usemodel():
# First load dataspace data model
dataspace_definitions = parse_model_from_json_schema(
os.path.join(DATADIR, "schema", "dataspace.schema.json"))
dataspace_definitions.sync_data_model(noquestion=True)
# Then general dataset definitions
dataset_definitions = parse_model_from_json_schema(
os.path.join(DATADIR, "schema", "dataset.schema.json"))
dataset_definitions.sync_data_model(noquestion=True)
# Finally, add inheritances as defined in yaml
dataset_inherits = parse_model_from_yaml(
os.path.join(DATADIR, "schema", "dataset-inheritance.yml"))
dataset_inherits.sync_data_model(noquestion=True)
def test_dataset(clear_database, usemodel):
# json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(
"license", db.RecordType().add_parent("license").add_property("name"))
ident.register_identifiable("project_type", db.RecordType(
).add_parent("project_type").add_property("name"))
ident.register_identifiable("Person", db.RecordType(
).add_parent("Person").add_property("full_name"))
crawler = Crawler(debug=True, identifiableAdapter=ident)
crawler_definition = crawler.load_definition(os.path.join(DATADIR, "dataset_cfoods.yml"))
# print(json.dumps(crawler_definition, indent=3))
# Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition)
# print("DictIntegerElement" in converter_registry)
records = crawler.start_crawling(
Directory(os.path.join(DATADIR, 'data'), "data"),
crawler_definition,
converter_registry
)
subd = crawler.debug_tree
subc = crawler.debug_metadata
# print(json.dumps(subc, indent=3))
# print(subd)
# print(subc)
# print(records)
ins, ups = crawler.synchronize()