diff --git a/.gitignore b/.gitignore index 905a0ea8ea6816a80ce75de95045b2f12946e2d0..a317a7844ce52677efdceeb7d826ed9941cac3f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ src/newcrawler.egg-info/ .coverage +__pycache__ TAGS src/.coverage build/ @@ -10,4 +11,4 @@ provenance.yml *.pem *.jks *.tar.gz -*.sql \ No newline at end of file +*.sql diff --git a/unittests/jsontest_cfood.yml b/unittests/jsontest_cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..f8a99bcedf2865d1c8f9c3280022d76bbea97e6f --- /dev/null +++ b/unittests/jsontest_cfood.yml @@ -0,0 +1,18 @@ + +JSONTest: # name of the converter + type: JSONFile + match: '(.*)' + subtree: + element: # name of the first subtree element which is a converter + type: Dict + match: ".*" + subtree: + element2: # name of the first subtree element which is a converter + type: DictTextElement + match_value: "(?P<url>.*)" + match_name: "url" + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + identifier: $url diff --git a/unittests/test_directories/single_file_test_data/testjson.json b/unittests/test_directories/single_file_test_data/testjson.json new file mode 100644 index 0000000000000000000000000000000000000000..7b7361c15f6857846bc05edfe4db14316bba5ce8 --- /dev/null +++ b/unittests/test_directories/single_file_test_data/testjson.json @@ -0,0 +1,14 @@ +{ + "name": "DEMO", + "id": 10002, + "archived": false, + "coordinator": { + "firstname": "Miri", + "lastname": "Mueller", + "email": "miri.mueller@science.de" + }, + "start_date": "2022-03-01", + "candidates": ["Mouse", "Penguine"], + "rvalue": 0.4444, + "url": "https://site.de/index.php/" +} diff --git a/unittests/test_json.py b/unittests/test_json.py new file mode 100644 index 0000000000000000000000000000000000000000..ee2454eed5b4517638a4acaf209e3fb1050e2447 --- /dev/null +++ b/unittests/test_json.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +from newcrawler.crawl import Crawler +from newcrawler.converters import JSONFileConverter, DictConverter +import os + +from newcrawler.structure_elements import File, JSONFile + +import json +from test_tool import rfp, dircheckstr + + +def test_json(): + crawler_definition_path = rfp("jsontest_cfood.yml") + json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json") + + crawler = Crawler(debug=True) + crawler_definition = crawler.load_definition(crawler_definition_path) + # Load and register converter packages: + converter_registry = crawler.load_converters(crawler_definition) + + crawler.start_crawling( + JSONFile(os.path.basename(json_file_path), json_file_path), + crawler_definition, + converter_registry, + JSONFileConverter + ) + subd = crawler.debug_tree + subc = crawler.debug_metadata + #print(json.dumps(subd, indent=3)) + print(subd) + print(subc) + lskdjf