Skip to content
Snippets Groups Projects
Select Git revision
  • bebf2a0c00c63ab27298d9ecd2ff0266fb4809dc
  • main default protected
  • dev protected
  • f-render-html-properties
  • f-vishesh0932-ext-cosmetics
  • f-table-references
  • f-update-legacy-adapter
  • f-refactor-refs
  • f-fix-caosadvancedtools-refs
  • f-linkahead-rename
  • f-citation-cff
  • f-map-resolve-reference
  • dev-bmpg
  • f-form-select
  • f-doc-extention
  • f-geo-position-records
  • f-data-analysis
  • f-area-folder-drop
  • f-fix-get-parents
  • f-fix-110
  • f-entity-state
  • v0.16.0
  • v0.15.2
  • v0.15.1
  • v0.15.0
  • v0.14.0
  • v0.13.3
  • v0.13.2
  • v0.13.1
  • v0.13.0
  • v0.12.0
  • v0.11.1
  • v0.11.0
  • v0.10.1
  • v0.10.0
  • v0.9.0
  • v0.8.0
  • v0.7.0
  • v0.6.0
  • v0.5.0
  • v0.4.2
41 results

ext_bottom_line_test_data.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_dataset_crawler.py 3.61 KiB
    #!/usr/bin/env python3
    # encoding: utf-8
    #
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
    # Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
    # Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    
    """
    module description
    """
    import json
    import os
    
    import caosdb as db
    
    from newcrawler.crawl import Crawler
    from newcrawler.converters import JSONFileConverter, DictConverter
    from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
    from newcrawler.structure_elements import File, JSONFile, Directory
    
    from caosadvancedtools.testutils import clear_database, set_test_key
    import sys
    
    from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
    set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
    
    
    def rfp(*pathcomponents):
        """
        Return full path.
        Shorthand convenience function.
        """
        return os.path.join(os.path.dirname(__file__), *pathcomponents)
    
    
    DATADIR = rfp("..", "test_data", "extroot", "realworld_example")
    
    
    @pytest.fixture
    def usemodel():
        # First load dataspace data model
        dataspace_definitions = parse_model_from_json_schema(
            os.path.join(DATADIR, "schema", "dataspace.schema.json"))
        dataspace_definitions.sync_data_model(noquestion=True)
    
        # Then general dataset definitions
        dataset_definitions = parse_model_from_json_schema(
            os.path.join(DATADIR, "schema", "dataset.schema.json"))
        dataset_definitions.sync_data_model(noquestion=True)
    
        # Finally, add inheritances as defined in yaml
        dataset_inherits = parse_model_from_yaml(
            os.path.join(DATADIR, "schema", "dataset-inheritance.yml"))
        dataset_inherits.sync_data_model(noquestion=True)
    
    
    def test_dataset(clear_database, usemodel):
        # json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
    
        ident = CaosDBIdentifiableAdapter()
        ident.register_identifiable(
            "license", db.RecordType().add_parent("license").add_property("name"))
        ident.register_identifiable("project_type", db.RecordType(
        ).add_parent("project_type").add_property("name"))
        ident.register_identifiable("Person", db.RecordType(
        ).add_parent("Person").add_property("full_name"))
    
        crawler = Crawler(debug=True, identifiableAdapter=ident)
        crawler_definition = crawler.load_definition(os.path.join(DATADIR, "dataset_cfoods.yml"))
        # print(json.dumps(crawler_definition, indent=3))
        # Load and register converter packages:
        converter_registry = crawler.load_converters(crawler_definition)
        # print("DictIntegerElement" in converter_registry)
    
        records = crawler.start_crawling(
            Directory(os.path.join(DATADIR, 'data'), "data"),
            crawler_definition,
            converter_registry
        )
        subd = crawler.debug_tree
        subc = crawler.debug_metadata
        # print(json.dumps(subc, indent=3))
        # print(subd)
        # print(subc)
        # print(records)
        ins, ups = crawler.synchronize()