From d2ce1ec9178d38e7838ee91445f7808b52858e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Thu, 5 May 2022 13:43:35 +0200 Subject: [PATCH] data and styling --- integrationtests/data/35/.dataspace.json | 15 +++ .../03_raw_data/001_dataset1/demo-dataset.csv | 101 ++++++++++++++++++ .../35/03_raw_data/001_dataset1/metadata.json | 51 +++++++++ .../data/35/03_raw_data/README_RawData.md | 25 +++++ .../35/03_raw_data/metadata-template.json | 52 +++++++++ .../README_ProcessedData.md | 10 ++ .../04_data_processing/metadata-template.json | 52 +++++++++ .../data/35/05_results/README_Results.md | 7 ++ integrationtests/data/35/README.md | 5 + integrationtests/test.py | 26 ++--- 10 files changed, 331 insertions(+), 13 deletions(-) create mode 100644 integrationtests/data/35/.dataspace.json create mode 100644 integrationtests/data/35/03_raw_data/001_dataset1/demo-dataset.csv create mode 100644 integrationtests/data/35/03_raw_data/001_dataset1/metadata.json create mode 100644 integrationtests/data/35/03_raw_data/README_RawData.md create mode 100644 integrationtests/data/35/03_raw_data/metadata-template.json create mode 100644 integrationtests/data/35/04_data_processing/README_ProcessedData.md create mode 100644 integrationtests/data/35/04_data_processing/metadata-template.json create mode 100644 integrationtests/data/35/05_results/README_Results.md create mode 100644 integrationtests/data/35/README.md diff --git a/integrationtests/data/35/.dataspace.json b/integrationtests/data/35/.dataspace.json new file mode 100644 index 00000000..7473a604 --- /dev/null +++ b/integrationtests/data/35/.dataspace.json @@ -0,0 +1,15 @@ +{ + "name": "DEMO", + "dataspace_id": 20002, + "archived": false, + "coordinator": { + "full_name": "Max Schmitt", + "firstname": "Max", + "lastname": "Schmitt", + "email": "max.schmitt@email.de" + }, + "start_date": "2022-03-01", + "end_date": "2032-02-28", + "comment": "Demonstration data space for DataCloud", + "url": "https://datacloud.de/index.php/f/7679" +} diff --git a/integrationtests/data/35/03_raw_data/001_dataset1/demo-dataset.csv b/integrationtests/data/35/03_raw_data/001_dataset1/demo-dataset.csv new file mode 100644 index 00000000..7a4d684e --- /dev/null +++ b/integrationtests/data/35/03_raw_data/001_dataset1/demo-dataset.csv @@ -0,0 +1,101 @@ +index,A[kg],B[s],pH,Temp.[C] +0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103 +1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594 +2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147 +3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233 +4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441 +5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873 +6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162 +7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676 +8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684 +9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632 +10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636 +11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112 +12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732 +13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337 +14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778 +15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225 +16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972 +17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504 +18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404 +19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175 +20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193 +21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964 +22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742 +23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912 +24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072 +25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565 +26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114 +27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716 +28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731 +29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792 +30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879 +31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315 +32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065 +33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574 +34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077 +35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761 +36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107 +37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802 +38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193 +39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148 +40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466 +41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686 +42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987 +43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624 +44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154 +45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345 +46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358 +47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935 +48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196 +49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441 +50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068 +51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919 +52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722 +53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831 +54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044 +55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592 +56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437 +57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791 +58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786 +59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107 +60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978 +61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521 +62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588 +63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923 +64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695 +65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734 +66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433 +67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371 +68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373 +69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758 +70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915 +71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415 +72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697 +73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031 +74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385 +75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612 +76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712 +77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861 +78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587 +79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815 +80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158 +81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734 +82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921 +83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127 +84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767 +85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342 +86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516 +87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525 +88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814 +89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933 +90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606 +91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997 +92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454 +93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997 +94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793 +95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706 +96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686 +97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246 +98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454 +99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827 diff --git a/integrationtests/data/35/03_raw_data/001_dataset1/metadata.json b/integrationtests/data/35/03_raw_data/001_dataset1/metadata.json new file mode 100644 index 00000000..f263bc3c --- /dev/null +++ b/integrationtests/data/35/03_raw_data/001_dataset1/metadata.json @@ -0,0 +1,51 @@ +{ + "title": "Random numbers created on a random autumn day in a random office", + "abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.", + "Event": [ + { + "longitude": 18.445078548041533, + "datetime": "2022-02-10T16:36:48+01:00", + "start_date": "2021-10-01", + "latitude": 53.10833068997861, + "elevation": 2, + "location": "Bremen, Germany" + } + ], + "license": "CC-BY", + "authors": [ + { + "firstname": "Max", + "lastname": "Schmitt", + "full_name": "Max Schmitt", + "affiliation": "CLOUD", + "ORCID": "0000-0001-6233-1866", + "email": "max.schmitt@email.de" + }, + { + "firstname": "Alexa", + "lastname": "Nozone", + "full_name": "Alexa Nozone", + "affiliation": "CLOUD", + "email": "alexa.nozone@email.de" + } + ], + "comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de", + "project": { + "name": "Demonstration of Extremly important Metadata in Folders", + "full_name": "Project", + "project_acronym": "DEMO", + "project_type": "national", + "institute": "CLOUD", + "start_date": "2021-10-01", + "end_date": "2031-12-31", + "url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html", + "coordinator": { + "firstname": "Max", + "lastname": "Schmitt", + "email": "max.schmitt@email.de" + } + }, + "method": { + "name": "Random Number Generator" + } +} diff --git a/integrationtests/data/35/03_raw_data/README_RawData.md b/integrationtests/data/35/03_raw_data/README_RawData.md new file mode 100644 index 00000000..2317ff86 --- /dev/null +++ b/integrationtests/data/35/03_raw_data/README_RawData.md @@ -0,0 +1,25 @@ +# Raw Data + +The `03_raw_data` folder is here to store all raw data of each dataset +associated with the project – the data that has not been edited by you yet but +which you plan to use in your research. It can be e.g. your unprocessed field +sampling records, or useful data from an online repository. Organize your data +in this folder in the following way: + +- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`. + +- **IMPORTANT**: provide the folder with information about your raw data by + filling out a metadata form for each of your datasets! For this, + + - either copy the `metadata-template.json` file and put it into your dataset + folder. Open the copy with a text editor and fill out the fields. + - or use the metadata editor in the DataCoud web client (press the "+" button + and use "New matadata.json" file) + + If you can’t find information about your data to fill in here, you should + reconsider using it - it is important to be able to trace your data sources to + ensure a FAIR scientific process! + +- For processing any of the data, make a copy of the dataset and paste it into + the `04_data_processing` folder. This way, you make sure to keep your raw data + in its original state. \ No newline at end of file diff --git a/integrationtests/data/35/03_raw_data/metadata-template.json b/integrationtests/data/35/03_raw_data/metadata-template.json new file mode 100644 index 00000000..7f457d23 --- /dev/null +++ b/integrationtests/data/35/03_raw_data/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/CLOUD/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/data/35/04_data_processing/README_ProcessedData.md b/integrationtests/data/35/04_data_processing/README_ProcessedData.md new file mode 100644 index 00000000..ce1b002b --- /dev/null +++ b/integrationtests/data/35/04_data_processing/README_ProcessedData.md @@ -0,0 +1,10 @@ +# Data Processing + +The actual work is done in this `04_data_processing` folder. Depending on your +field and type and size of project, you can organize this folder in the way that +fits your process best. Here, a bit of chaos can happen ;) Keep in mind to +document your processing steps in the `02_materials_and_methods` folder and to +put in your final results into the `05_results` folder. In the end of your +project, it should be possible to delete everything in this folder and +reconstruct the working process using the documentation and raw data from +previous folders. diff --git a/integrationtests/data/35/04_data_processing/metadata-template.json b/integrationtests/data/35/04_data_processing/metadata-template.json new file mode 100644 index 00000000..05f9394d --- /dev/null +++ b/integrationtests/data/35/04_data_processing/metadata-template.json @@ -0,0 +1,52 @@ +{ + "dataset": { + "title": "", + "abstract": "See https://github.com/cloud/metadata-schema for schema specification", + "license": "CC-BY", + "authors": [ + { + "firstname": "", + "lastname": "", + "affiliation": "", + "ORCID": "XXXX-XXXX-XXXX-XXXX", + "email": "name@domain.de" + }, + { + "firstname": "", + "lastname": "", + "affiliation": "", + "email": "name@domain.de", + "ORCID": "XXXX-XXXX-XXXX-XXXX" + } + ], + "project": { + "name": "", + "acronym": "", + "type": "DFG/", + "institute": "CLOUD", + "start_date": "YYYY-MM-DD", + "end_date": "YYYY-MM-DD", + "url": "", + "coordinator": { + "lastname": "", + "email": "", + "firstname": "" + } + }, + "events_in_data": false, + "events": [ + { + "longitude": 0, + "latitude": 0, + "elevation": 0, + "location": "", + "datetime": "YYYY-MM-DDTHH:mm:ss" + } + ], + "method": { + "name": "", + "url": "" + }, + "max_files": 100 + } +} diff --git a/integrationtests/data/35/05_results/README_Results.md b/integrationtests/data/35/05_results/README_Results.md new file mode 100644 index 00000000..ae0ab657 --- /dev/null +++ b/integrationtests/data/35/05_results/README_Results.md @@ -0,0 +1,7 @@ +# Results + +All the results that are final versions of your data analysis or processing, +should be copied into this `05_results` folder. Organize your results folder in +the way most fitting to your project. + +Provide metadata to your results files. diff --git a/integrationtests/data/35/README.md b/integrationtests/data/35/README.md new file mode 100644 index 00000000..809d699c --- /dev/null +++ b/integrationtests/data/35/README.md @@ -0,0 +1,5 @@ +# Dataspace: DEMO + +This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage +for all of CLOUD's research data. + diff --git a/integrationtests/test.py b/integrationtests/test.py index efff6430..2d881800 100755 --- a/integrationtests/test.py +++ b/integrationtests/test.py @@ -28,6 +28,7 @@ module description """ +import os from caosdb import EmptyUniqueQueryError import argparse import sys @@ -42,7 +43,6 @@ import yaml from caosadvancedtools.testutils import clear_database, set_test_key set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") -import os def rfp(*pathcomponents): """ @@ -52,14 +52,12 @@ def rfp(*pathcomponents): return os.path.join(os.path.dirname(__file__), *pathcomponents) - - @pytest.fixture def usemodel(): model = parse_model_from_yaml(rfp("model.yml")) model.sync_data_model(noquestion=True, verbose=False) - + @pytest.fixture def ident(): ident = CaosDBIdentifiableAdapter() @@ -68,12 +66,12 @@ def ident(): ident.register_identifiable( "Person", db.RecordType() .add_parent(name="Person") - #.add_property(name="first_name") + # .add_property(name="first_name") .add_property(name="last_name")) ident.register_identifiable( "Measurement", db.RecordType() .add_parent(name="Measurement") - #.add_property(name="identifier") + # .add_property(name="identifier") .add_property(name="date") .add_property(name="project")) ident.register_identifiable( @@ -150,6 +148,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler): assert len(ins) == 0 assert len(ups) == 0 + def test_insertion(clear_database, usemodel, ident, crawler): ins, ups = crawler.synchronize() @@ -169,6 +168,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): assert len(ins) == 0 assert len(ups) == 0 + def test_insertion_and_update(clear_database, usemodel, ident, crawler): ins, ups = crawler.synchronize() @@ -184,7 +184,8 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): ins, ups = cr.synchronize() assert len(ins) == 0 assert len(ups) == 1 - + + def test_identifiable_update(clear_database, usemodel, ident, crawler): ins, ups = crawler.synchronize() @@ -197,7 +198,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): l = cr.updateList for record in l: if (record.parents[0].name == "Measurement" and - record.get_property("date").value == "2020-01-03"): + record.get_property("date").value == "2020-01-03"): # maybe a bit weird, but add an email address to a measurement record.add_property(name="email", value="testperson@testaccount.test") print("one change") @@ -206,14 +207,13 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): assert len(ins) == 0 assert len(ups) == 1 - # Test the change within one property: cr = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr) l = cr.updateList for record in l: if (record.parents[0].name == "Measurement" and - record.get_property("date").value == "2020-01-03"): + record.get_property("date").value == "2020-01-03"): record.add_property(name="email", value="testperson@coolmail.test") print("one change") break @@ -227,7 +227,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): l = cr.updateList for record in l: if (record.parents[0].name == "Measurement" and - record.get_property("date").value == "2020-01-03"): + record.get_property("date").value == "2020-01-03"): record.add_property(name="email", value="testperson@coolmail.test") record.get_property("date").value = "2012-01-02" print("one change") @@ -269,6 +269,7 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended): assert len(r) == 1 assert r[0].get_property("ReadmeFile").value == f.id + def test_file_update(clear_database, usemodel, ident, crawler_extended): ins1, ups1 = crawler_extended.synchronize(commit_changes=True) fileList_ins = [r for r in ins1 if r.role == "File"] @@ -288,7 +289,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): res = db.execute_query("Find File") assert len(res) == 11 assert len(res[0].parents) == 0 - + cr2 = Crawler(debug=True, identifiableAdapter=ident) crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") @@ -302,7 +303,6 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): res = db.execute_query("Find File") assert len(res) == 11 assert res[0].parents[0].name == "ProjectMarkdownReadme" - # TODO: Implement file update checks (based on checksum) # Add test with actual file update: -- GitLab