Skip to content
Snippets Groups Projects
Commit d2ce1ec9 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

data and styling

parent 7a178ca1
Branches
Tags
2 merge requests!53Release 0.1,!18Add integrationtests based on a real world example
{
"name": "DEMO",
"dataspace_id": 20002,
"archived": false,
"coordinator": {
"full_name": "Max Schmitt",
"firstname": "Max",
"lastname": "Schmitt",
"email": "max.schmitt@email.de"
},
"start_date": "2022-03-01",
"end_date": "2032-02-28",
"comment": "Demonstration data space for DataCloud",
"url": "https://datacloud.de/index.php/f/7679"
}
index,A[kg],B[s],pH,Temp.[C]
0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103
1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594
2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147
3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233
4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441
5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873
6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162
7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676
8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684
9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632
10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636
11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112
12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732
13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337
14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778
15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225
16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972
17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504
18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404
19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175
20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193
21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964
22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742
23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912
24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072
25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565
26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114
27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716
28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731
29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792
30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879
31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315
32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065
33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574
34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077
35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761
36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107
37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802
38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193
39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148
40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466
41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686
42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987
43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624
44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154
45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345
46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358
47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935
48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196
49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441
50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068
51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919
52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722
53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831
54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044
55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592
56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437
57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791
58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786
59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107
60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978
61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521
62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588
63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923
64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695
65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734
66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433
67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371
68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373
69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758
70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915
71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415
72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697
73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031
74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385
75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612
76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712
77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861
78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587
79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815
80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158
81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734
82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921
83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127
84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767
85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342
86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516
87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525
88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814
89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933
90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606
91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997
92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454
93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997
94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793
95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706
96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686
97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246
98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454
99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827
{
"title": "Random numbers created on a random autumn day in a random office",
"abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.",
"Event": [
{
"longitude": 18.445078548041533,
"datetime": "2022-02-10T16:36:48+01:00",
"start_date": "2021-10-01",
"latitude": 53.10833068997861,
"elevation": 2,
"location": "Bremen, Germany"
}
],
"license": "CC-BY",
"authors": [
{
"firstname": "Max",
"lastname": "Schmitt",
"full_name": "Max Schmitt",
"affiliation": "CLOUD",
"ORCID": "0000-0001-6233-1866",
"email": "max.schmitt@email.de"
},
{
"firstname": "Alexa",
"lastname": "Nozone",
"full_name": "Alexa Nozone",
"affiliation": "CLOUD",
"email": "alexa.nozone@email.de"
}
],
"comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de",
"project": {
"name": "Demonstration of Extremly important Metadata in Folders",
"full_name": "Project",
"project_acronym": "DEMO",
"project_type": "national",
"institute": "CLOUD",
"start_date": "2021-10-01",
"end_date": "2031-12-31",
"url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html",
"coordinator": {
"firstname": "Max",
"lastname": "Schmitt",
"email": "max.schmitt@email.de"
}
},
"method": {
"name": "Random Number Generator"
}
}
# Raw Data
The `03_raw_data` folder is here to store all raw data of each dataset
associated with the project – the data that has not been edited by you yet but
which you plan to use in your research. It can be e.g. your unprocessed field
sampling records, or useful data from an online repository. Organize your data
in this folder in the following way:
- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`.
- **IMPORTANT**: provide the folder with information about your raw data by
filling out a metadata form for each of your datasets! For this,
- either copy the `metadata-template.json` file and put it into your dataset
folder. Open the copy with a text editor and fill out the fields.
- or use the metadata editor in the DataCoud web client (press the "+" button
and use "New matadata.json" file)
If you can’t find information about your data to fill in here, you should
reconsider using it - it is important to be able to trace your data sources to
ensure a FAIR scientific process!
- For processing any of the data, make a copy of the dataset and paste it into
the `04_data_processing` folder. This way, you make sure to keep your raw data
in its original state.
\ No newline at end of file
{
"dataset": {
"title": "",
"abstract": "See https://github.com/CLOUD/metadata-schema for schema specification",
"license": "CC-BY",
"authors": [
{
"firstname": "",
"lastname": "",
"affiliation": "",
"ORCID": "XXXX-XXXX-XXXX-XXXX",
"email": "name@domain.de"
},
{
"firstname": "",
"lastname": "",
"affiliation": "",
"email": "name@domain.de",
"ORCID": "XXXX-XXXX-XXXX-XXXX"
}
],
"project": {
"name": "",
"acronym": "",
"type": "DFG/",
"institute": "CLOUD",
"start_date": "YYYY-MM-DD",
"end_date": "YYYY-MM-DD",
"url": "",
"coordinator": {
"lastname": "",
"email": "",
"firstname": ""
}
},
"events_in_data": false,
"events": [
{
"longitude": 0,
"latitude": 0,
"elevation": 0,
"location": "",
"datetime": "YYYY-MM-DDTHH:mm:ss"
}
],
"method": {
"name": "",
"url": ""
},
"max_files": 100
}
}
# Data Processing
The actual work is done in this `04_data_processing` folder. Depending on your
field and type and size of project, you can organize this folder in the way that
fits your process best. Here, a bit of chaos can happen ;) Keep in mind to
document your processing steps in the `02_materials_and_methods` folder and to
put in your final results into the `05_results` folder. In the end of your
project, it should be possible to delete everything in this folder and
reconstruct the working process using the documentation and raw data from
previous folders.
{
"dataset": {
"title": "",
"abstract": "See https://github.com/cloud/metadata-schema for schema specification",
"license": "CC-BY",
"authors": [
{
"firstname": "",
"lastname": "",
"affiliation": "",
"ORCID": "XXXX-XXXX-XXXX-XXXX",
"email": "name@domain.de"
},
{
"firstname": "",
"lastname": "",
"affiliation": "",
"email": "name@domain.de",
"ORCID": "XXXX-XXXX-XXXX-XXXX"
}
],
"project": {
"name": "",
"acronym": "",
"type": "DFG/",
"institute": "CLOUD",
"start_date": "YYYY-MM-DD",
"end_date": "YYYY-MM-DD",
"url": "",
"coordinator": {
"lastname": "",
"email": "",
"firstname": ""
}
},
"events_in_data": false,
"events": [
{
"longitude": 0,
"latitude": 0,
"elevation": 0,
"location": "",
"datetime": "YYYY-MM-DDTHH:mm:ss"
}
],
"method": {
"name": "",
"url": ""
},
"max_files": 100
}
}
# Results
All the results that are final versions of your data analysis or processing,
should be copied into this `05_results` folder. Organize your results folder in
the way most fitting to your project.
Provide metadata to your results files.
# Dataspace: DEMO
This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage
for all of CLOUD's research data.
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
module description module description
""" """
import os
from caosdb import EmptyUniqueQueryError from caosdb import EmptyUniqueQueryError
import argparse import argparse
import sys import sys
...@@ -42,7 +43,6 @@ import yaml ...@@ -42,7 +43,6 @@ import yaml
from caosadvancedtools.testutils import clear_database, set_test_key from caosadvancedtools.testutils import clear_database, set_test_key
set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2") set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
import os
def rfp(*pathcomponents): def rfp(*pathcomponents):
""" """
...@@ -52,14 +52,12 @@ def rfp(*pathcomponents): ...@@ -52,14 +52,12 @@ def rfp(*pathcomponents):
return os.path.join(os.path.dirname(__file__), *pathcomponents) return os.path.join(os.path.dirname(__file__), *pathcomponents)
@pytest.fixture @pytest.fixture
def usemodel(): def usemodel():
model = parse_model_from_yaml(rfp("model.yml")) model = parse_model_from_yaml(rfp("model.yml"))
model.sync_data_model(noquestion=True, verbose=False) model.sync_data_model(noquestion=True, verbose=False)
@pytest.fixture @pytest.fixture
def ident(): def ident():
ident = CaosDBIdentifiableAdapter() ident = CaosDBIdentifiableAdapter()
...@@ -68,12 +66,12 @@ def ident(): ...@@ -68,12 +66,12 @@ def ident():
ident.register_identifiable( ident.register_identifiable(
"Person", db.RecordType() "Person", db.RecordType()
.add_parent(name="Person") .add_parent(name="Person")
#.add_property(name="first_name") # .add_property(name="first_name")
.add_property(name="last_name")) .add_property(name="last_name"))
ident.register_identifiable( ident.register_identifiable(
"Measurement", db.RecordType() "Measurement", db.RecordType()
.add_parent(name="Measurement") .add_parent(name="Measurement")
#.add_property(name="identifier") # .add_property(name="identifier")
.add_property(name="date") .add_property(name="date")
.add_property(name="project")) .add_property(name="project"))
ident.register_identifiable( ident.register_identifiable(
...@@ -150,6 +148,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler): ...@@ -150,6 +148,7 @@ def test_multiple_insertions(clear_database, usemodel, ident, crawler):
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_insertion(clear_database, usemodel, ident, crawler): def test_insertion(clear_database, usemodel, ident, crawler):
ins, ups = crawler.synchronize() ins, ups = crawler.synchronize()
...@@ -169,6 +168,7 @@ def test_insertion(clear_database, usemodel, ident, crawler): ...@@ -169,6 +168,7 @@ def test_insertion(clear_database, usemodel, ident, crawler):
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 0 assert len(ups) == 0
def test_insertion_and_update(clear_database, usemodel, ident, crawler): def test_insertion_and_update(clear_database, usemodel, ident, crawler):
ins, ups = crawler.synchronize() ins, ups = crawler.synchronize()
...@@ -184,7 +184,8 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): ...@@ -184,7 +184,8 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler):
ins, ups = cr.synchronize() ins, ups = cr.synchronize()
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
def test_identifiable_update(clear_database, usemodel, ident, crawler): def test_identifiable_update(clear_database, usemodel, ident, crawler):
ins, ups = crawler.synchronize() ins, ups = crawler.synchronize()
...@@ -197,7 +198,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -197,7 +198,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
l = cr.updateList l = cr.updateList
for record in l: for record in l:
if (record.parents[0].name == "Measurement" and if (record.parents[0].name == "Measurement" and
record.get_property("date").value == "2020-01-03"): record.get_property("date").value == "2020-01-03"):
# maybe a bit weird, but add an email address to a measurement # maybe a bit weird, but add an email address to a measurement
record.add_property(name="email", value="testperson@testaccount.test") record.add_property(name="email", value="testperson@testaccount.test")
print("one change") print("one change")
...@@ -206,14 +207,13 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -206,14 +207,13 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
assert len(ins) == 0 assert len(ins) == 0
assert len(ups) == 1 assert len(ups) == 1
# Test the change within one property: # Test the change within one property:
cr = Crawler(debug=True, identifiableAdapter=ident) cr = Crawler(debug=True, identifiableAdapter=ident)
crawl_standard_test_directory(cr) crawl_standard_test_directory(cr)
l = cr.updateList l = cr.updateList
for record in l: for record in l:
if (record.parents[0].name == "Measurement" and if (record.parents[0].name == "Measurement" and
record.get_property("date").value == "2020-01-03"): record.get_property("date").value == "2020-01-03"):
record.add_property(name="email", value="testperson@coolmail.test") record.add_property(name="email", value="testperson@coolmail.test")
print("one change") print("one change")
break break
...@@ -227,7 +227,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler): ...@@ -227,7 +227,7 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
l = cr.updateList l = cr.updateList
for record in l: for record in l:
if (record.parents[0].name == "Measurement" and if (record.parents[0].name == "Measurement" and
record.get_property("date").value == "2020-01-03"): record.get_property("date").value == "2020-01-03"):
record.add_property(name="email", value="testperson@coolmail.test") record.add_property(name="email", value="testperson@coolmail.test")
record.get_property("date").value = "2012-01-02" record.get_property("date").value = "2012-01-02"
print("one change") print("one change")
...@@ -269,6 +269,7 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended): ...@@ -269,6 +269,7 @@ def test_file_insertion(clear_database, usemodel, ident, crawler_extended):
assert len(r) == 1 assert len(r) == 1
assert r[0].get_property("ReadmeFile").value == f.id assert r[0].get_property("ReadmeFile").value == f.id
def test_file_update(clear_database, usemodel, ident, crawler_extended): def test_file_update(clear_database, usemodel, ident, crawler_extended):
ins1, ups1 = crawler_extended.synchronize(commit_changes=True) ins1, ups1 = crawler_extended.synchronize(commit_changes=True)
fileList_ins = [r for r in ins1 if r.role == "File"] fileList_ins = [r for r in ins1 if r.role == "File"]
...@@ -288,7 +289,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): ...@@ -288,7 +289,7 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
res = db.execute_query("Find File") res = db.execute_query("Find File")
assert len(res) == 11 assert len(res) == 11
assert len(res[0].parents) == 0 assert len(res[0].parents) == 0
cr2 = Crawler(debug=True, identifiableAdapter=ident) cr2 = Crawler(debug=True, identifiableAdapter=ident)
crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml") crawl_standard_test_directory(cr2, cfood="scifolder_extended2.yml")
...@@ -302,7 +303,6 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended): ...@@ -302,7 +303,6 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
res = db.execute_query("Find File") res = db.execute_query("Find File")
assert len(res) == 11 assert len(res) == 11
assert res[0].parents[0].name == "ProjectMarkdownReadme" assert res[0].parents[0].name == "ProjectMarkdownReadme"
# TODO: Implement file update checks (based on checksum) # TODO: Implement file update checks (based on checksum)
# Add test with actual file update: # Add test with actual file update:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment