From ce5ea287abcef18a6691a31f8aecdee850568321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Wed, 13 Apr 2022 18:35:18 +0200 Subject: [PATCH] TST: fix yml and json --- .gitignore | 3 +- unittests/jsontest_cfood.yml | 18 ++++++ .../single_file_test_data/testjson.json | 14 +++++ unittests/test_json.py | 60 +++++++++++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 unittests/jsontest_cfood.yml create mode 100644 unittests/test_directories/single_file_test_data/testjson.json create mode 100644 unittests/test_json.py diff --git a/.gitignore b/.gitignore index 905a0ea8..a317a784 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ src/newcrawler.egg-info/ .coverage +__pycache__ TAGS src/.coverage build/ @@ -10,4 +11,4 @@ provenance.yml *.pem *.jks *.tar.gz -*.sql \ No newline at end of file +*.sql diff --git a/unittests/jsontest_cfood.yml b/unittests/jsontest_cfood.yml new file mode 100644 index 00000000..f8a99bce --- /dev/null +++ b/unittests/jsontest_cfood.yml @@ -0,0 +1,18 @@ + +JSONTest: # name of the converter + type: JSONFile + match: '(.*)' + subtree: + element: # name of the first subtree element which is a converter + type: Dict + match: ".*" + subtree: + element2: # name of the first subtree element which is a converter + type: DictTextElement + match_value: "(?P<url>.*)" + match_name: "url" + records: + Project: # this is an identifiable in this case + parents: + - Project # not needed as the name is equivalent + identifier: $url diff --git a/unittests/test_directories/single_file_test_data/testjson.json b/unittests/test_directories/single_file_test_data/testjson.json new file mode 100644 index 00000000..7b7361c1 --- /dev/null +++ b/unittests/test_directories/single_file_test_data/testjson.json @@ -0,0 +1,14 @@ +{ + "name": "DEMO", + "id": 10002, + "archived": false, + "coordinator": { + "firstname": "Miri", + "lastname": "Mueller", + "email": "miri.mueller@science.de" + }, + "start_date": "2022-03-01", + "candidates": ["Mouse", "Penguine"], + "rvalue": 0.4444, + "url": "https://site.de/index.php/" +} diff --git a/unittests/test_json.py b/unittests/test_json.py new file mode 100644 index 00000000..ee2454ee --- /dev/null +++ b/unittests/test_json.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +from newcrawler.crawl import Crawler +from newcrawler.converters import JSONFileConverter, DictConverter +import os + +from newcrawler.structure_elements import File, JSONFile + +import json +from test_tool import rfp, dircheckstr + + +def test_json(): + crawler_definition_path = rfp("jsontest_cfood.yml") + json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json") + + crawler = Crawler(debug=True) + crawler_definition = crawler.load_definition(crawler_definition_path) + # Load and register converter packages: + converter_registry = crawler.load_converters(crawler_definition) + + crawler.start_crawling( + JSONFile(os.path.basename(json_file_path), json_file_path), + crawler_definition, + converter_registry, + JSONFileConverter + ) + subd = crawler.debug_tree + subc = crawler.debug_metadata + #print(json.dumps(subd, indent=3)) + print(subd) + print(subc) + lskdjf -- GitLab