Skip to content
Snippets Groups Projects
Commit ce5ea287 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

TST: fix yml and json

parent 4c0d720d
Branches
Tags
2 merge requests!53Release 0.1,!9ENH: introduce JSON converter
src/newcrawler.egg-info/
.coverage
__pycache__
TAGS
src/.coverage
build/
......@@ -10,4 +11,4 @@ provenance.yml
*.pem
*.jks
*.tar.gz
*.sql
\ No newline at end of file
*.sql
JSONTest: # name of the converter
type: JSONFile
match: '(.*)'
subtree:
element: # name of the first subtree element which is a converter
type: Dict
match: ".*"
subtree:
element2: # name of the first subtree element which is a converter
type: DictTextElement
match_value: "(?P<url>.*)"
match_name: "url"
records:
Project: # this is an identifiable in this case
parents:
- Project # not needed as the name is equivalent
identifier: $url
{
"name": "DEMO",
"id": 10002,
"archived": false,
"coordinator": {
"firstname": "Miri",
"lastname": "Mueller",
"email": "miri.mueller@science.de"
},
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
"url": "https://site.de/index.php/"
}
#!/usr/bin/env python3
# encoding: utf-8
#
# ** header v3.0
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com>
# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
"""
module description
"""
from newcrawler.crawl import Crawler
from newcrawler.converters import JSONFileConverter, DictConverter
import os
from newcrawler.structure_elements import File, JSONFile
import json
from test_tool import rfp, dircheckstr
def test_json():
crawler_definition_path = rfp("jsontest_cfood.yml")
json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
crawler = Crawler(debug=True)
crawler_definition = crawler.load_definition(crawler_definition_path)
# Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition)
crawler.start_crawling(
JSONFile(os.path.basename(json_file_path), json_file_path),
crawler_definition,
converter_registry,
JSONFileConverter
)
subd = crawler.debug_tree
subc = crawler.debug_metadata
#print(json.dumps(subd, indent=3))
print(subd)
print(subc)
lskdjf
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment