diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 1ba73a5b3ea0e4969e882ab11990303aa432f3eb..b300a1a97aa22b3eafc91ef89c01bbd7111edd62 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -24,7 +24,7 @@ COPY . /git RUN rm -r /git/.git # Install pycaosdb.ini for the tests -RUN mv /git/.docker/tester_pycaosdb.ini /git/pycaosdb.ini +RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini RUN cd /git/ && pip3 install . @@ -34,4 +34,4 @@ CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- \ # ... install pycaosdb.ini the server-side scripts cp /git/.docker/sss_pycaosdb.ini /scripting/home/.pycaosdb.ini && \ # ... and run tests - pytest . + pytest-3 . diff --git a/integrationtests/README.md b/integrationtests/README.md index 5c308f51a332d5a930f91eb30f0d93032ae47627..96789ed9f02036a0c7cc25ca1a60d9f0042a5557 100644 --- a/integrationtests/README.md +++ b/integrationtests/README.md @@ -1,3 +1,2 @@ -1. Clear database (see clear_database.py) -2. Insert model (see insert_model.py) -3. Run test.py +1. Mount test_data/extroot as extroot folder in the CaosDB server +2. use an empty server diff --git a/integrationtests/model.yml b/integrationtests/basic_example/model.yml similarity index 100% rename from integrationtests/model.yml rename to integrationtests/basic_example/model.yml diff --git a/integrationtests/test.py b/integrationtests/basic_example/test.py similarity index 99% rename from integrationtests/test.py rename to integrationtests/basic_example/test.py index 2d8818005ee4350bd3b1403ae71c315c6d5c1a14..35ee991808a0194357bf9cdc6e8f6c990d282a10 100755 --- a/integrationtests/test.py +++ b/integrationtests/basic_example/test.py @@ -40,6 +40,7 @@ import pytest from caosadvancedtools.models.parser import parse_model_from_yaml import yaml +# TODO is not yet merged in caosadvancedtools from caosadvancedtools.testutils import clear_database, set_test_key set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") diff --git a/integrationtests/realworld_example/crawl.sh b/integrationtests/realworld_example/crawl.sh new file mode 100755 index 0000000000000000000000000000000000000000..55a2a331fe517a539e2dd937ac35605c72b496c9 --- /dev/null +++ b/integrationtests/realworld_example/crawl.sh @@ -0,0 +1,4 @@ +#!/bin/bash +python -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/data +python load_and_insert_json_models.py +python test_dataset_crawler.py diff --git a/integrationtests/realworld_example/load_and_insert_json_models.py b/integrationtests/realworld_example/load_and_insert_json_models.py new file mode 100644 index 0000000000000000000000000000000000000000..682fd9c77531e63ed18dd13417399ad0d18a8de2 --- /dev/null +++ b/integrationtests/realworld_example/load_and_insert_json_models.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +import sys + +from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml + + +def main(): + # First load dataspace data model + dataspace_definitions = parse_model_from_json_schema( + "schema/dataspace.schema.json") + dataspace_definitions.sync_data_model(noquestion=True) + + # Then general dataset definitions + dataset_definitions = parse_model_from_json_schema( + "schema/dataset.schema.json") + dataset_definitions.sync_data_model(noquestion=True) + + # Finally, add inheritances as defined in yaml + dataset_inherits = parse_model_from_yaml( + "schema/dataset-inheritance.yml") + dataset_inherits.sync_data_model(noquestion=True) + + +if __name__ == "__main__": + + sys.exit(main()) diff --git a/integrationtests/realworld_example/test_dataset_crawler.py b/integrationtests/realworld_example/test_dataset_crawler.py new file mode 100644 index 0000000000000000000000000000000000000000..206020cef67b20716bacfa99399d63bb8e06360e --- /dev/null +++ b/integrationtests/realworld_example/test_dataset_crawler.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +""" +module description +""" +import json +import os + +import caosdb as db + +from newcrawler.crawl import Crawler +from newcrawler.converters import JSONFileConverter, DictConverter +from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter +from newcrawler.structure_elements import File, JSONFile, Directory +import pytest +from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml + +#from caosadvancedtools.testutils import clear_database, set_test_key +import sys + +# TODO is not yet merged in caosadvancedtools +# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") + + +def rfp(*pathcomponents): + """ + Return full path. + Shorthand convenience function. + """ + return os.path.join(os.path.dirname(__file__), *pathcomponents) + + +DATADIR = rfp("..", "test_data", "extroot", "realworld_example") + + +@pytest.fixture +def usemodel(): + # First load dataspace data model + dataspace_definitions = parse_model_from_json_schema( + os.path.join(DATADIR, "schema", "dataspace.schema.json")) + dataspace_definitions.sync_data_model(noquestion=True) + + # Then general dataset definitions + dataset_definitions = parse_model_from_json_schema( + os.path.join(DATADIR, "schema", "dataset.schema.json")) + dataset_definitions.sync_data_model(noquestion=True) + + # Finally, add inheritances as defined in yaml + dataset_inherits = parse_model_from_yaml( + os.path.join(DATADIR, "schema", "dataset-inheritance.yml")) + dataset_inherits.sync_data_model(noquestion=True) + + +def test_dataset( + # clear_database, + usemodel): + # json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json") + + ident = CaosDBIdentifiableAdapter() + ident.register_identifiable( + "license", db.RecordType().add_parent("license").add_property("name")) + ident.register_identifiable("project_type", db.RecordType( + ).add_parent("project_type").add_property("name")) + ident.register_identifiable("Person", db.RecordType( + ).add_parent("Person").add_property("full_name")) + + crawler = Crawler(debug=True, identifiableAdapter=ident) + crawler_definition = crawler.load_definition(os.path.join(DATADIR, "dataset_cfoods.yml")) + # print(json.dumps(crawler_definition, indent=3)) + # Load and register converter packages: + converter_registry = crawler.load_converters(crawler_definition) + # print("DictIntegerElement" in converter_registry) + + records = crawler.start_crawling( + Directory("data", os.path.join(DATADIR, 'data')), + crawler_definition, + converter_registry + ) + subd = crawler.debug_tree + subc = crawler.debug_metadata + # print(json.dumps(subc, indent=3)) + # print(subd) + # print(subc) + # print(records) + ins, ups = crawler.synchronize() + + dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND " + "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'" + " AND Person", unique=True) + assert dataspace.get_property("start_date").value == "2022-03-01" + db.execute_query("FIND RECORD Person with full_name='Max Schmitt' AND" + " given_name='Max'", unique=True) + + dataset = db.execute_query(f"FIND RECORD Dataset with Dataspace={dataspace.id} AND title=" + "'Random numbers created on a random autumn day in a random office'" + "", unique=True) + assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Person WITH full_name=" + "'Alexa Nozone' AND WHICH REFERENCES Person WITH full_name='Max Schmitt'" + "") == 1 + assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH " + "start_datetime='2022-02-10T16:36:48+01:00'") == 1 diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json new file mode 100644 index 0000000000000000000000000000000000000000..26e11e4e16081b8b5b64a83889bc1f4d160ef0e7 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json @@ -0,0 +1,15 @@ +{ + "name": "DEMO", + "dataspace_id": 20002, + "archived": false, + "coordinator": { + "full_name": "Max Schmitt", + "given_name": "Max", + "family_name": "Schmitt", + "email": "max.schmitt@email.de" + }, + "start_date": "2022-03-01", + "end_date": "2032-02-28", + "comment": "Demonstration data space for DataCloud", + "url": "https://datacloud.de/index.php/f/7679" +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a4d684e50cf4fa0699c66d27661d0d54055ec8b --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv @@ -0,0 +1,101 @@ +index,A[kg],B[s],pH,Temp.[C] +0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103 +1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594 +2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147 +3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233 +4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441 +5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873 +6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162 +7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676 +8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684 +9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632 +10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636 +11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112 +12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732 +13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337 +14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778 +15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225 +16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972 +17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504 +18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404 +19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175 +20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193 +21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964 +22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742 +23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912 +24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072 +25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565 +26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114 +27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716 +28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731 +29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792 +30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879 +31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315 +32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065 +33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574 +34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077 +35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761 +36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107 +37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802 +38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193 +39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148 +40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466 +41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686 +42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987 +43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624 +44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154 +45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345 +46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358 +47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935 +48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196 +49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441 +50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068 +51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919 +52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722 +53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831 +54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044 +55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592 +56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437 +57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791 +58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786 +59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107 +60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978 +61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521 +62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588 +63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923 +64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695 +65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734 +66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433 +67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371 +68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373 +69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758 +70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915 +71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415 +72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697 +73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031 +74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385 +75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612 +76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712 +77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861 +78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587 +79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815 +80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158 +81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734 +82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921 +83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127 +84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767 +85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342 +86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516 +87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525 +88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814 +89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933 +90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606 +91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997 +92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454 +93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997 +94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793 +95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706 +96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686 +97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246 +98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454 +99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827 diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..64df90e55eff065b1cc249a634444a72f9fd00d2 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json @@ -0,0 +1,50 @@ +{ + "title": "Random numbers created on a random autumn day in a random office", + "abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.", + "Event": [ + { + "longitude": 18.445078548041533, + "start_datetime": "2022-02-10T16:36:48+01:00", + "latitude": 53.10833068997861, + "elevation": 2, + "location": "Bremen, Germany" + } + ], + "license": "CC-BY", + "authors": [ + { + "firstname": "Max", + "lastname": "Schmitt", + "full_name": "Max Schmitt", + "affiliation": "CLOUD", + "ORCID": "0000-0001-6233-1866", + "email": "max.schmitt@email.de" + }, + { + "firstname": "Alexa", + "lastname": "Nozone", + "full_name": "Alexa Nozone", + "affiliation": "CLOUD", + "email": "alexa.nozone@email.de" + } + ], + "comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de", + "project": { + "name": "Demonstration of Extremly important Metadata in Folders", + "full_name": "Project", + "project_acronym": "DEMO", + "project_type": "national", + "institute": "CLOUD", + "start_date": "2021-10-01", + "end_date": "2031-12-31", + "url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html", + "coordinator": { + "firstname": "Max", + "lastname": "Schmitt", + "email": "max.schmitt@email.de" + } + }, + "method": { + "name": "Random Number Generator" + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md new file mode 100644 index 0000000000000000000000000000000000000000..2317ff8616c43e75f52637ff581017bf4a50d468 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md @@ -0,0 +1,25 @@ +# Raw Data + +The `03_raw_data` folder is here to store all raw data of each dataset +associated with the project – the data that has not been edited by you yet but +which you plan to use in your research. It can be e.g. your unprocessed field +sampling records, or useful data from an online repository. Organize your data +in this folder in the following way: + +- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`. + +- **IMPORTANT**: provide the folder with information about your raw data by + filling out a metadata form for each of your datasets! For this, + + - either copy the `metadata-template.json` file and put it into your dataset + folder. Open the copy with a text editor and fill out the fields. + - or use the metadata editor in the DataCoud web client (press the "+" button + and use "New matadata.json" file) + + If you can’t find information about your data to fill in here, you should + reconsider using it - it is important to be able to trace your data sources to + ensure a FAIR scientific process! + +- For processing any of the data, make a copy of the dataset and paste it into + the `04_data_processing` folder. This way, you make sure to keep your raw data + in its original state. \ No newline at end of file diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json new file mode 100644 index 0000000000000000000000000000000000000000..7f457d239321b232fb2db7d46f4e1576c85911b0 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/CLOUD/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md new file mode 100644 index 0000000000000000000000000000000000000000..ce1b002b18772b85f4bba3a222574f438a6ed0e3 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md @@ -0,0 +1,10 @@ +# Data Processing + +The actual work is done in this `04_data_processing` folder. Depending on your +field and type and size of project, you can organize this folder in the way that +fits your process best. Here, a bit of chaos can happen ;) Keep in mind to +document your processing steps in the `02_materials_and_methods` folder and to +put in your final results into the `05_results` folder. In the end of your +project, it should be possible to delete everything in this folder and +reconstruct the working process using the documentation and raw data from +previous folders. diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json new file mode 100644 index 0000000000000000000000000000000000000000..05f9394dfbfa9a0b2b4844c7080a340585a9050f --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/cloud/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md new file mode 100644 index 0000000000000000000000000000000000000000..ae0ab6571c52c0ec9a1cdc8aba27b31fd3be6fcc --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md @@ -0,0 +1,7 @@ +# Results + +All the results that are final versions of your data analysis or processing, +should be copied into this `05_results` folder. Organize your results folder in +the way most fitting to your project. + +Provide metadata to your results files. diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/README.md b/integrationtests/test_data/extroot/realworld_example/data/35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..809d699c462d064ff5193add8e23677bec84b0e0 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/data/35/README.md @@ -0,0 +1,5 @@ +# Dataspace: DEMO + +This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage +for all of CLOUD's research data. + diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml new file mode 100644 index 0000000000000000000000000000000000000000..1589cba2b44afc3e2645b0ee72f91bf83b327032 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml @@ -0,0 +1,528 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# +Data: + type: Directory + match: data + subtree: + dataspace_dir: + type: Directory + match: (?P<dataspace_dir_number>[0-9]+) + records: + Dataspace: + name: $dataspace_dir_number + subtree: + dataspace_json: + type: JSONFile + match: .dataspace.json + validate: schema/dataspace.schema.json + subtree: + dataspace_id_element: + type: DictIntegerElement + match_name: "dataspace_id" + match_value: "(?P<id>[0-9]+)" + records: + Dataspace: + dataspace_id: $id + archived_element: + type: DictBooleanElement + match_name: "archived" + match_value: "(?P<archived>.*)" + records: + Dataspace: + archived: $archived + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Dataspace: + url: $url + coordinator_element: + type: DictDictElement + match_name: "coordinator" + records: + Person: + parents: + - Person + Dataspace: + Person: $Person + subtree: &person_subtree + full_name_element: + type: DictTextElement + match_name: "full_name" + match_value: "(?P<full_name>.*)" + records: + Person: + full_name: $full_name + full_name_nonlatin_element: + type: DictTextElement + match_name: "full_name_nonlatin" + match_value: "(?P<full_name_nonlatin>.*)" + records: + Person: + full_name_nonlatin: $full_name_nonlatin + family_name_element: + type: DictTextElement + match_name: "family_name" + match_value: "(?P<family_name>.*)" + records: + Person: + family_name: $family_name + given_name_element: + type: DictTextElement + match_name: "given_name" + match_value: "(?P<given_name>.*)" + records: + Person: + given_name: $given_name + email_element: + type: DictTextElement + match_name: "email" + match_value: "(?P<email>.*)" + records: + Person: + email: $email + affiliation_element: + type: DictTextElement + match_name: "affiliation" + match_value: "(?P<affiliation>.*)" + records: + Person: + affiliation: $affiliation + ORCID_element: + type: DictTextElement + match_name: "ORCID" + match_value: "(?P<ORCID>.*)" + records: + Person: + ORCID: $ORCID + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Dataspace: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Dataspace: + end_date: $end_date + comment: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Dataspace: + comment: $comment + raw_data_dir: + type: Directory + match: 03_raw_data + subtree: &template + # TODO collect info from metadata.json and look into sub-directories + # (only one level) for metadata.json + dataset_dir: + match: (?P<dataset_dir_name>.*) + type: Directory + records: + Dataset: + Dataspace: $Dataspace + subtree: + metadata_json: &metadata_json_template + type: JSONFile + match: metadata.json + validate: schema/dataset.schema.json + subtree: + title_element: + type: DictTextElement + match_name: "title" + match_value: "(?P<title>.*)" + records: + Dataset: + title: $title + authors_element: + type: DictListElement + match_name: "authors" + subtree: + author_element: + type: Dict + records: + Person: + parents: + - Person + Dataset: + authors: +$Person + subtree: *person_subtree + abstract_element: + type: DictTextElement + match_name: "abstract" + match_value: "(?P<abstract>.*)" + records: + Dataset: + abstract: $abstract + comment_element: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Dataset: + comment: $comment + license_element: + type: DictTextElement + match_name: "license" + match_value: "(?P<license_name>.*)" + records: + license: + # TODO: As soon as such things can be validated, a + # creation of a new license has to be forbidden here + # (although this is effectively done already by + # validating against the above schema.) + name: $license_name + Dataset: + license: $license + dataset_doi_element: + type: DictTextElement + match_name: "dataset_doi" + match_value: "(?P<dataset_doi>.*)" + records: + Dataset: + dataset_doi: $dataset_doi + related_to_dois_element: + type: DictListElement + match_name: "related_to_dois" + subtree: + related_to_doi_element: + type: TextElement + match: "(?P<related_to_doi>).*" + records: + Dataset: + related_to_dois: +$related_to_doi + Keywords_element: + type: DictListElement + match_name: "Keyword" + Events_element: + type: DictListElement + match_name: "Event" + subtree: + Event_element: + type: Dict + records: + Event: + parents: + - Event + Dataset: + Event: +$Event + subtree: + label_element: + type: DictTextElement + match_name: "label" + match_value: "(?P<label>.*)" + records: + Event: + label: $label + comment_element: + type: DictTextElement + match_name: "comment" + match_value: "(?P<comment>.*)" + records: + Event: + comment: $comment + start_datetime_element: + type: DictTextElement + match_name: start_datetime + match_value: "(?P<start_datetime>.*)" + records: + Event: + start_datetime: $start_datetime + end_datetime_element: + type: DictTextElement + match_name: end_datetime + match_value: "(?P<end_datetime>.*)" + records: + Event: + end_datetime: $end_datetime + longitude_element: + type: DictFloatElement + match_name: "longitude" + match_value: "(?P<longitude>.*)" + records: + Event: + longitude: $longitude + latitude_element: + type: DictFloatElement + match_name: "latitude" + match_value: "(?P<latitude>.*)" + records: + Event: + latitude: $latitude + elevation_element: + type: DictFloatElement + match_name: "elevation" + match_value: "(?P<elevation>.*)" + records: + Event: + elevation: $elevation + location_element: + type: DictTextElement + match_name: location + match_value: "(?P<location>.*)" + records: + Event: + location: $location + igsn_element: + type: DictTextElement + match_name: igsn + match_value: "(?P<igsn>.*)" + records: + Event: + igsn: $igsn + events_in_data_element: + type: DictBooleanElement + match_name: "events_in_data" + match_value: "(?P<events_in_data>.*)" + records: + Dataset: + events_in_data: $events_in_data + geojson_element: + type: DictTextElement + match_name: "geojson" + match_value: "(?P<geojson>.*)" + records: + Dataset: + geojson: $geojson + project_element: + type: DictDictElement + match_name: "project" + records: + Project: + parents: + - Project + Dataset: + Project: $Project + subtree: + full_name_element: + type: DictTextElement + match_name: "full_name" + match_value: "(?P<full_name>.*)" + records: + Project: + full_name: $full_name + project_id_element: + type: DictTextElement + match_name: "project_id" + match_value: "(?P<project_id>.*)" + records: + Project: + project_id: $project_id + project_type_element: + type: DictTextElement + match_name: "project_type" + match_value: "(?P<project_type_name>.*)" + records: + project_type: + name: $project_type_name + Project: + project_type: $project_type + institute_element: + type: DictTextElement + match_name: "institute" + match_value: "(?P<institute>.*)" + records: + Project: + institute: $institute + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Project: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Project: + end_date: $end_date + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Project: + url: $url + coordinators_element: + type: DictListElement + match_name: "coordinators" + subtree: + coordinator_element: + type: Dict + records: + Person: + parents: + - Person + Project: + coordinators: +$Person + subtree: *person_subtree + campaign_element: + type: DictDictElement + match_name: "campaign" + records: + Campaign: + parents: + - Campaign + Dataset: + Campaign: $Campaign + subtree: + label_element: + type: DictTextElement + match_name: "label" + match_value: "(?P<label>.*)" + records: + Campaign: + label: $label + optional_label_element: + type: DictTextElement + match_name: "optional_label" + match_value: "(?P<optional_label>.*)" + records: + Campaign: + optional_label: $optional_label + start_date_element: + type: DictTextElement + match_name: "start_date" + match_value: "(?P<start_date>.*)" + records: + Campaign: + start_date: $start_date + end_date_element: + type: DictTextElement + match_name: "end_date" + match_value: "(?P<end_date>.*)" + records: + Campaign: + end_date: $end_date + responsible_scientists_element: + type: DictListElement + match_name: "responsible_scientists" + subtree: + responsible_scientist_element: + type: Dict + records: + Person: + parents: + - Person + Campaign: + responsible_scientists: +$Person + subtree: *person_subtree + Methods_element: + type: DictListElement + match_name: "Method" + subtree: + Method_element: + type: Dict + records: + Method: + parents: + - Method + Dataset: + Method: +$Method + subtree: + method_name_element: + type: DictTextElement + match_name: "method_name" + match_value: "(?P<method_name>.*)" + records: + Method: + name: $method_name + abbreviation_element: + type: DictTextElement + match_name: "abbreviation" + match_value: "(?P<abbreviation>.*)" + records: + Method: + abbreviation: $abbreviation + url_element: + type: DictTextElement + match_name: "url" + match_value: "(?P<url>.*)" + records: + Method: + url: $url + Taxa_element: + type: DictListElement + match_name: "Taxon" + subtree: + Taxon_element: + type: Dict + records: + Taxon: + parents: + - Taxon + Dataset: + Taxon: +$Taxon + subtree: + taxon_name_element: + type: DictTextElement + match_name: "taxon_name" + match_value: "(?P<taxon_name>.*)" + records: + Taxon: + name: $taxon_name + archived_element: + type: DictBooleanElement + match_name: "archived" + match_value: "(P<archived>.*)" + records: + Dataset: + archived: $archived + publication_date_element: + type: DictTextElement + match_name: "publication_date" + match_value: "(P<publication_date>.*)" + records: + Dataset: + publication_date: $publication_date + max_files_element: + type: DictIntegerElement + match_name: "max_files" + match_value: "(P<max_files>.*)" + records: + Dataset: + max_files: $max_files + auxiliary_file: &aux_file_template + type: File + match: "(?P<aux_file_name>(?!metadata.json).*)" + # TODO File, path and reference dataset in file record + child_dataset_dir: + type: Directory + match: (?P<child_dataset_dir_name>.*) + subtree: + metadata_json: *metadata_json_template + auxiliary_file: *aux_file_template + data_processing_dir: + type: Directory + match: 04_data_processing + subtree: *template + results_dir: + type: Directory + match: 05_results + subtree: *template diff --git a/integrationtests/test_data/extroot/realworld_example/schema/README.md b/integrationtests/test_data/extroot/realworld_example/schema/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e0bb95f8b844374bba72c7c6989ac57cfa5fc305 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/README.md @@ -0,0 +1,37 @@ +# Dataset Schemas + +These schema's are derived from the [metadata +schemas](https://github.com/leibniz-zmt/zmt-metadata-schema) used at the Leibniz +Center for Tropical Marine Research (Leibniz ZMT). + +# Copyright + +BSD 3-Clause License + +Copyright (c) 2022 ZMT +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d12053a0007cdea1005e7673db69f46b35a063d --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml @@ -0,0 +1,18 @@ +extern: +- Keyword +- Taxon +- full_name +- full_name_nonlatin +- name + +full_name: + inherit_from_obligatory: + - name + +full_name_nonlatin: + inherit_from_obligatory: + - name + +Taxon: + inherit_from_obligatory: + - Keyword diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..83d6a60d857349772c960af637671cb21c8abd5d --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json @@ -0,0 +1,365 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "full dataset title" + }, + "authors": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "abstract": { + "type": "string", + "minLength": 80, + "maxLength": 1000, + "description": "Abstract with at least 80 characters" + }, + "comment": { + "type": "string" + }, + "license": { + "type": "string", + "enum": [ + "CC-BY", + "CC-BY-SA", + "CC0", + "restricted access" + ] + }, + "dataset_doi": { + "type": "string", + "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)", + "description": "Dataset DOI, e.g. 10.1594/PANGAEA.938740" + }, + "related_to_dois": { + "type": "array", + "items": { + "type": "string", + "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)" + }, + "description": "DOIs of related publications and/or datasets, e.g. 10.1000/182" + }, + "Keyword": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + } + }, + "Event": { + "type": "array", + "description": "https://wiki.pangaea.de/wiki/Event", + "items": { + "type": "object", + "properties": { + "label": { + "type": "string" + }, + "comment": { + "type": "string" + }, + "start_datetime": { + "type": "string", + "format": "date-time" + }, + "end_datetime": { + "type": "string", + "format": "date-time" + }, + "longitude": { + "type": "number", + "minimum": -180, + "maximum": 180, + "description": "longitude (W/E) in decimal degree (-180 to 180)" + }, + "latitude": { + "type": "number", + "minimum": -90, + "maximum": 90, + "description": "latitude (N/S) in decimal degree (-90 to 90)" + }, + "elevation": { + "type": "number", + "minimum": -10000, + "maximum": 20000, + "description": "elevation in m" + }, + "location": { + "type": "string", + "description": "geographical location as text (e.g., North Sea; Espoo, Finland)" + }, + "igsn": { + "type": "string", + "description": "International Geo Sample Number (http://www.geosamples.org/aboutigsn)" + } + }, + "required": [ + "longitude", + "latitude", + "start_datetime" + ] + } + }, + "events_in_data": { + "type": "boolean", + "description": "Does the data contain additional information about timepoints and locations?" + }, + "geojson": { + "type": "string", + "pattern": "", + "description": "GeoJSON for complex geographic structures" + }, + "project": { + "title": "Project", + "description": "https://wiki.pangaea.de/wiki/Project", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "short name of project" + }, + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UTF-8 characters allowed)" + }, + "project_id": { + "type": "string", + "description": "Project ID" + }, + "project_type": { + "type": "string", + "enum": [ + "DFG", + "EU", + "BMBF", + "national", + "international" + ] + }, + "institute": { + "type": "string", + "description": "place of coordination or project office", + "default": "Centre for Research" + }, + "start_date": { + "type": "string", + "format": "date" + }, + "end_date": { + "type": "string", + "format": "date" + }, + "url": { + "type": "string", + "format": "uri" + }, + "coordinators": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UTF-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + } + }, + "required": ["name", "full_name"] + }, + "campaign": { + "title": "Campaign", + "description": "https://wiki.pangaea.de/wiki/Campaign, synonyms: cruise, expedition, leg, ", + "type": "object", + "properties": { + "label": { + "type": "string", + "description": "is unique and does not contain blanks; uses abbreviations instead of full names" + }, + "optional_label": { + "type": "string" + }, + "start_date": { + "type": "string", + "format": "date" + }, + "end_date": { + "type": "string", + "format": "date" + }, + "responsible_scientists": { + "type": "array", + "items": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "affiliation": { + "type": "string" + }, + "ORCID": { + "type": "string", + "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$" + }, + "email": { + "type": "string", + "format": "email" + } + }, + "required": [ + "full_name", + "email" + ] + }, + "minItems": 1, + "uniqueItems": true + } + } + }, + "Method": { + "type": "array", + "items": { + "type": "object", + "description": "https://wiki.pangaea.de/wiki/Method", + "properties": { + "method_name": { + "type": "string", + "description": "full official name of tool/instrument/device/gear" + }, + "abbreviation": { + "type": "string", + "description": "may be used for import in an event list to avoid misspellings" + }, + "url": { + "type": "string", + "description": "should contain a web address, where an official description of the device can be found" + } + } + } + }, + "Taxon": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + } + }, + "archived": { + "type": "boolean", + "description": "Has the dataset been archived?", + "default": false + }, + "publication_date": { + "type": "string", + "format": "date" + }, + "max_files": { + "type": "integer", + "description": "Maximum number of files to included by the CaosDB crawler", + "default": 100 + } + }, + "required": [ + "title", + "authors", + "abstract" + ] +} diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..01653bfa821e0a0acbb5a481bfd458e2ed784fb9 --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json @@ -0,0 +1,45 @@ +{ + "title": "Dataspace", + "description": "A Dataspace is a folder in the DataCloud with a pre-defined structure", + "type": "object", + "properties": { + "dataspace_id": { + "type": "integer", + "description": "Integer ID of Dataspace (matches LDAP GID)", + "minimum": 20000 + }, + "archived": { "type": "boolean" }, + "url": { + "type": "string", + "description": "link to folder on file system (CaosDB or cloud folder)" + }, + "coordinator": { + "type": "object", + "title": "Person", + "properties": { + "full_name": { + "type": "string", + "description": "Full name (latin transcription, all UFT-8 characters allowed)" + }, + "full_name_nonlatin": { + "type": "string", + "description": "Full name (non-latin alphabet)" + }, + "family_name": { + "type": "string", + "description": "Family name (latin transcription)" + }, + "given_name": { + "type": "string", + "description": "Given/other names (latin transcription)" + }, + "email": { "type": "string", "format": "email" } + }, + "required": ["full_name", "email"] + }, + "start_date": { "type": "string", "format": "date" }, + "end_date": { "type": "string", "format": "date" }, + "comment": { "type": "string" } + }, + "required": ["dataspace_id", "url", "coordinator"] +} diff --git a/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml new file mode 100644 index 0000000000000000000000000000000000000000..7e251eeced7bf626e77364fc5555b1cb10dd3afb --- /dev/null +++ b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml @@ -0,0 +1,26 @@ +extern: +- name +- url +- Dataset + +german_name: + datatype: TEXT + inherit_from_obligatory: + - name + +Department: + recommended_properties: + url: + german_name: + + +WorkingGroup: + recommended_properties: + Department: + german_name: + url: + +Dataset: + recommended_properties: + WorkingGroup: +