diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 1ba73a5b3ea0e4969e882ab11990303aa432f3eb..b300a1a97aa22b3eafc91ef89c01bbd7111edd62 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -24,7 +24,7 @@ COPY . /git
 RUN rm -r /git/.git
 
 # Install pycaosdb.ini for the tests
-RUN mv /git/.docker/tester_pycaosdb.ini /git/pycaosdb.ini
+RUN mv /git/.docker/tester_pycaosdb.ini /git/integrationtests/pycaosdb.ini
 
 RUN cd /git/ && pip3 install .
 
@@ -34,4 +34,4 @@ CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- \
     # ... install pycaosdb.ini the server-side scripts
     cp /git/.docker/sss_pycaosdb.ini /scripting/home/.pycaosdb.ini && \
     # ... and run tests
-    pytest .
+    pytest-3 .
diff --git a/integrationtests/README.md b/integrationtests/README.md
index 5c308f51a332d5a930f91eb30f0d93032ae47627..96789ed9f02036a0c7cc25ca1a60d9f0042a5557 100644
--- a/integrationtests/README.md
+++ b/integrationtests/README.md
@@ -1,3 +1,2 @@
-1. Clear database (see clear_database.py)
-2. Insert model (see insert_model.py)
-3. Run test.py
+1. Mount test_data/extroot as extroot folder in the CaosDB server
+2. use an empty server
diff --git a/integrationtests/model.yml b/integrationtests/basic_example/model.yml
similarity index 100%
rename from integrationtests/model.yml
rename to integrationtests/basic_example/model.yml
diff --git a/integrationtests/test.py b/integrationtests/basic_example/test.py
similarity index 99%
rename from integrationtests/test.py
rename to integrationtests/basic_example/test.py
index 2d8818005ee4350bd3b1403ae71c315c6d5c1a14..35ee991808a0194357bf9cdc6e8f6c990d282a10 100755
--- a/integrationtests/test.py
+++ b/integrationtests/basic_example/test.py
@@ -40,6 +40,7 @@ import pytest
 from caosadvancedtools.models.parser import parse_model_from_yaml
 import yaml
 
+# TODO is not yet merged in caosadvancedtools
 from caosadvancedtools.testutils import clear_database, set_test_key
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
diff --git a/integrationtests/realworld_example/crawl.sh b/integrationtests/realworld_example/crawl.sh
new file mode 100755
index 0000000000000000000000000000000000000000..55a2a331fe517a539e2dd937ac35605c72b496c9
--- /dev/null
+++ b/integrationtests/realworld_example/crawl.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+python -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/data
+python load_and_insert_json_models.py
+python test_dataset_crawler.py
diff --git a/integrationtests/realworld_example/load_and_insert_json_models.py b/integrationtests/realworld_example/load_and_insert_json_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..682fd9c77531e63ed18dd13417399ad0d18a8de2
--- /dev/null
+++ b/integrationtests/realworld_example/load_and_insert_json_models.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import sys
+
+from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+
+
+def main():
+    # First load dataspace data model
+    dataspace_definitions = parse_model_from_json_schema(
+        "schema/dataspace.schema.json")
+    dataspace_definitions.sync_data_model(noquestion=True)
+
+    # Then general dataset definitions
+    dataset_definitions = parse_model_from_json_schema(
+        "schema/dataset.schema.json")
+    dataset_definitions.sync_data_model(noquestion=True)
+
+    # Finally, add inheritances as defined in yaml
+    dataset_inherits = parse_model_from_yaml(
+        "schema/dataset-inheritance.yml")
+    dataset_inherits.sync_data_model(noquestion=True)
+
+
+if __name__ == "__main__":
+
+    sys.exit(main())
diff --git a/integrationtests/realworld_example/test_dataset_crawler.py b/integrationtests/realworld_example/test_dataset_crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..206020cef67b20716bacfa99399d63bb8e06360e
--- /dev/null
+++ b/integrationtests/realworld_example/test_dataset_crawler.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+module description
+"""
+import json
+import os
+
+import caosdb as db
+
+from newcrawler.crawl import Crawler
+from newcrawler.converters import JSONFileConverter, DictConverter
+from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from newcrawler.structure_elements import File, JSONFile, Directory
+import pytest
+from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
+
+#from caosadvancedtools.testutils import clear_database, set_test_key
+import sys
+
+# TODO is not yet merged in caosadvancedtools
+# set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
+
+
+def rfp(*pathcomponents):
+    """
+    Return full path.
+    Shorthand convenience function.
+    """
+    return os.path.join(os.path.dirname(__file__), *pathcomponents)
+
+
+DATADIR = rfp("..", "test_data", "extroot", "realworld_example")
+
+
+@pytest.fixture
+def usemodel():
+    # First load dataspace data model
+    dataspace_definitions = parse_model_from_json_schema(
+        os.path.join(DATADIR, "schema", "dataspace.schema.json"))
+    dataspace_definitions.sync_data_model(noquestion=True)
+
+    # Then general dataset definitions
+    dataset_definitions = parse_model_from_json_schema(
+        os.path.join(DATADIR, "schema", "dataset.schema.json"))
+    dataset_definitions.sync_data_model(noquestion=True)
+
+    # Finally, add inheritances as defined in yaml
+    dataset_inherits = parse_model_from_yaml(
+        os.path.join(DATADIR, "schema", "dataset-inheritance.yml"))
+    dataset_inherits.sync_data_model(noquestion=True)
+
+
+def test_dataset(
+        # clear_database,
+        usemodel):
+    # json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
+
+    ident = CaosDBIdentifiableAdapter()
+    ident.register_identifiable(
+        "license", db.RecordType().add_parent("license").add_property("name"))
+    ident.register_identifiable("project_type", db.RecordType(
+    ).add_parent("project_type").add_property("name"))
+    ident.register_identifiable("Person", db.RecordType(
+    ).add_parent("Person").add_property("full_name"))
+
+    crawler = Crawler(debug=True, identifiableAdapter=ident)
+    crawler_definition = crawler.load_definition(os.path.join(DATADIR, "dataset_cfoods.yml"))
+    # print(json.dumps(crawler_definition, indent=3))
+    # Load and register converter packages:
+    converter_registry = crawler.load_converters(crawler_definition)
+    # print("DictIntegerElement" in converter_registry)
+
+    records = crawler.start_crawling(
+        Directory("data", os.path.join(DATADIR, 'data')),
+        crawler_definition,
+        converter_registry
+    )
+    subd = crawler.debug_tree
+    subc = crawler.debug_metadata
+    # print(json.dumps(subc, indent=3))
+    # print(subd)
+    # print(subc)
+    # print(records)
+    ins, ups = crawler.synchronize()
+
+    dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
+                                 "archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
+                                 " AND Person", unique=True)
+    assert dataspace.get_property("start_date").value == "2022-03-01"
+    db.execute_query("FIND RECORD Person with full_name='Max Schmitt' AND"
+                     " given_name='Max'", unique=True)
+
+    dataset = db.execute_query(f"FIND RECORD Dataset with Dataspace={dataspace.id} AND title="
+                               "'Random numbers created on a random autumn day in a random office'"
+                               "", unique=True)
+    assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Person WITH full_name="
+                            "'Alexa Nozone' AND WHICH REFERENCES Person WITH full_name='Max Schmitt'"
+                            "") == 1
+    assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH "
+                            "start_datetime='2022-02-10T16:36:48+01:00'") == 1
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json
new file mode 100644
index 0000000000000000000000000000000000000000..26e11e4e16081b8b5b64a83889bc1f4d160ef0e7
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/.dataspace.json
@@ -0,0 +1,15 @@
+{
+	"name": "DEMO",
+	"dataspace_id": 20002,
+	"archived": false,
+	"coordinator": {
+        "full_name": "Max Schmitt",
+		"given_name": "Max",
+		"family_name": "Schmitt",
+		"email": "max.schmitt@email.de"
+	},
+	"start_date": "2022-03-01",
+	"end_date": "2032-02-28",
+	"comment": "Demonstration data space for DataCloud",
+	"url": "https://datacloud.de/index.php/f/7679"
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7a4d684e50cf4fa0699c66d27661d0d54055ec8b
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/demo-dataset.csv
@@ -0,0 +1,101 @@
+index,A[kg],B[s],pH,Temp.[C]
+0,2.1209183975976957,-0.5658499891692009,-0.8391639362482752,0.6210332089995103
+1,-1.2155508955759597,-1.0141121577750831,0.2503340429095144,0.7560156296323594
+2,-1.0191141299527218,-1.5870495901656396,0.6811842117478961,-0.25776671384531147
+3,-0.8235788683146266,1.1688759819188137,-0.15841036014621737,0.24351773490785233
+4,-2.028210212186099,-0.15000944869896093,0.7344551834722798,-1.0594635581726441
+5,0.8578345931586077,-1.0478958942647336,-0.5059960285526023,0.6141193812881873
+6,-0.7585068400011461,-0.45812334415522366,-0.6299981228519985,-0.072295788065162
+7,-0.34875455645064296,-0.49936600901639105,0.08492189470338947,0.24398792231786676
+8,-0.491473523786921,-1.1815449374689073,-0.23631388788457763,0.8801868915647684
+9,-1.291852196630842,0.4956544058017087,1.7176555991727498,1.8889309443940632
+10,-0.974327795079914,-0.6002779223325445,1.4950878953418667,-0.4750187681874636
+11,0.863708396863823,0.4867513929363103,-1.2500529683835453,2.1711592870838112
+12,-1.0518542498779602,-0.6800136223939168,-0.5593377295003794,-0.23451862458342732
+13,0.013421028872223972,-1.7652967848993042,0.302518679323854,1.124258888392337
+14,1.1387734213591119,-0.5602347718731282,-0.6908747870526222,0.905906598269778
+15,-1.8032949181114486,0.18858416406523845,1.0083249532267977,0.6969475009127225
+16,-0.42755813629599176,-1.0354063212247375,-0.24666198541039489,-1.2245102779938972
+17,-0.558268266895522,-1.4564784210249142,1.6162446783371565,-0.6109432350045504
+18,-0.9759505344957924,-2.780175134826593,3.039543722358096,-1.258487109407404
+19,-0.042261223623348665,0.7827311969447484,0.8902139085357877,0.33130889065513175
+20,-0.43764310886282315,-0.8338864816830261,0.8545198929035823,-0.8330242660029193
+21,0.2910454990578444,0.40786200750721635,-0.8115126892604917,0.7108997766944964
+22,0.41446462010439317,-1.0965365861313923,-0.1816041240266455,-0.18304466068648742
+23,-0.5187715545823834,-0.46490147833949275,-0.5059346590831783,0.6998562249774912
+24,2.4491154744839005,-0.3554192977203785,-0.6604902675826654,-0.9434392815439072
+25,-0.5083188860395834,0.2781724921583019,-0.4340136020292349,0.02629089617543565
+26,-0.9854213292611846,-1.398313530263303,0.05552818415139104,-0.20282242071816114
+27,1.0808664341388348,-0.681501179909626,0.6492258431774035,-0.41774069067997716
+28,-1.1767497846165254,1.0817469159915034,-1.524089495721789,0.703812702135731
+29,0.19626402088297137,-1.731421126100085,0.33753714074823216,1.167207071332792
+30,-1.1808345594828473,-0.2820078693924212,-0.8720833031493173,0.8643708946275879
+31,0.8284163458216123,0.20722015645321426,0.29071068457985955,2.6180265991342315
+32,-0.08203655784081282,0.060308831720906446,0.9519485488783623,0.7350446746473065
+33,-0.9071581669506105,0.6088044300190749,1.0099718941738625,0.002799079788086574
+34,-0.42977850177492904,1.2900375327057412,0.32028642454115197,0.8301665482611077
+35,1.0852695299159272,-0.7040390830488096,0.7964627034904589,0.5892571532287761
+36,-1.5667114288837196,0.19932071915614016,-1.0037399027933205,0.5715977614420107
+37,1.3367378436097728,-0.4584285824179284,-0.4435084312392094,-1.3448283883056802
+38,-0.03788754387000687,-0.37288494267798383,-0.5643391975832854,0.43485956543590193
+39,1.0634390535750102,1.0881233131592658,1.2921865320956318,-0.07293734130819148
+40,1.6786504380461766,-0.03043290400609124,2.66472625811549,-0.016638240963738466
+41,-1.657581538683817,0.8240214695327108,0.32914391919723984,0.08007211199118686
+42,0.04171224685709963,-0.9854865121535178,-0.3195510216437891,-0.42540430453161987
+43,0.6506526831507736,-1.159358101323352,-1.2789107289085737,0.10499609768025624
+44,0.7402635450212406,-0.44202303578095753,-0.5748164371395315,0.5600113473434154
+45,-0.9809738202025933,0.16868168368656386,-1.5883259666916558,-2.6955712214488345
+46,-1.8495816486925372,-1.6954982682847552,1.387648046113911,0.8455399256972358
+47,1.0442607146494682,0.44438084748213075,-0.6547675875380801,-0.5557546828614935
+48,0.32905474147201974,-0.7323591467714324,0.8555098512789541,2.4647371835928196
+49,-2.5131333956577557,1.4005121469909907,-2.162216422615549,-0.3797761578463441
+50,-1.406182674849582,-0.33037485118390236,-0.30720520090625775,0.3765108318500068
+51,1.4315461764484496,0.4490657382715407,0.14688708820540236,-1.3363710028523919
+52,-1.3546100448551868,0.35309094153560083,1.1314974294901488,-0.8299500202669722
+53,-0.7668372422803534,1.3427856896905794,0.11144680945543838,0.5488627384438831
+54,2.6377507721791997,1.86851303077989,0.1358347611054535,0.0021631807468969044
+55,-0.2814604476092987,-0.8110890245372547,0.2914246407211869,1.3009776744589592
+56,-0.08220515064674763,0.06131679740379093,1.2240755323078314,1.6651435947789437
+57,-1.5833977101974248,-1.0390852809695386,0.9472604405151627,-1.1238493594739791
+58,0.060801913229076375,-1.1395369395697963,-0.6773504352336591,-0.7823844332031786
+59,0.3719151864023754,-2.6589573541115885,0.9138765623088898,1.9179285751965107
+60,0.16875501543121765,-0.21075290840365637,-0.15712808326461272,-1.095263810678978
+61,-0.6676220651512094,-2.392512574657398,-0.1970135407082481,1.1303688380560521
+62,-0.3508037371211798,0.37103055819752395,0.1319143246551687,-0.8442765717512588
+63,0.5744187610995448,0.2824163982139891,-0.23250484081352427,-0.009480528299369923
+64,-1.033847039653939,-0.6062251775571341,0.8745680740327043,0.10611431160660695
+65,0.8616095853453857,-0.7902852788672261,0.9924735544245377,-0.39017477285341734
+66,-0.25797403501959537,0.9776756368066659,-0.1774701795502288,0.8457628045096433
+67,0.1879011473947124,0.4880410431165719,0.33941695573743247,-0.3098695458944371
+68,0.12908240475251795,-0.3929831705571321,-0.9815115481276334,-0.6596680503662373
+69,0.47271005121390686,-0.27585706457228726,0.659750762879994,-1.621655291178758
+70,1.2805576221047092,1.255690982276119,0.8893506172744224,0.36843763617254915
+71,-1.8222077377715007,-1.2618097663744718,-1.2393746501949852,0.22742537143827415
+72,-0.7670935921671362,0.6632357605887813,-1.8652052380554516,-0.3566398262186697
+73,0.368513682832951,0.22484190975093934,0.7207761550523548,-0.4607733151206031
+74,-1.6353304746550132,-1.0835890398703607,0.6240782484796151,1.497716990815385
+75,1.2631082191418077,1.9388688317848526,0.43069457351954177,-0.1240852286700612
+76,1.4229945541316606,1.685287372911636,0.282616738427184,1.6075806781661712
+77,0.15907038463344916,-1.1862747951875707,-2.162241163696355,0.9048269906929861
+78,0.8724544719304812,-0.06423147646568356,0.28403221059939265,0.7315950326908587
+79,-0.5099002924982818,0.8674753935115029,0.0015306969822590103,-0.793334121698815
+80,0.16756755106838742,-0.8374595440291756,1.871547652925694,-0.019948470822079158
+81,0.5333319586985659,-1.6076411272904392,0.4676478392958759,0.35245743045221734
+82,-0.5292514883314576,-1.2708056558247538,-1.7043012586370947,0.3391676901971921
+83,1.8042184317880245,1.2058943020996364,-2.3228385290614084,1.2008461670776127
+84,0.8671835774935015,0.9953640415286719,-1.4439272409362103,0.9410085688802767
+85,-0.118043369635042,0.41649838899300184,-1.2993225013700294,1.9232397286356342
+86,-0.32517525711392864,0.062481999278585824,-0.27679161049236684,0.06555334954413516
+87,-0.39336711632154264,0.0790516633124132,-0.600204351381406,1.321653482130525
+88,-0.9789171222367312,0.30688902979967303,0.10346158693798674,0.3160642853129814
+89,0.4332454768673768,-0.620828990252391,-1.0710192139922268,0.15027972939295933
+90,3.1092106995021096,0.354640404873306,1.8164064530643516,1.8911595405760606
+91,0.7027212216033006,-1.9367414347582559,-0.26797308254438235,1.1063820286927997
+92,0.6665636818250888,0.7953561614160027,1.8164132351496374,1.5760380002772454
+93,-1.4931006068027144,0.2680846074746922,-0.30697269318261355,-0.5300118028948997
+94,0.9258476710590248,0.15464742730214845,0.5847769923450901,-0.8405562302565793
+95,0.3015957125126854,2.9697978560379323,2.2793789547159338,0.13951152352691706
+96,0.4109127837045091,0.04501972229381512,0.5969781411176205,1.6443498245829686
+97,0.07956221270863263,0.009072464866011773,-0.6905847540574735,-0.9639714900867246
+98,2.9172401959670817,0.43571229891911717,-0.903738601954934,0.08343820441617454
+99,0.5501333973314503,-0.2511364474548299,1.4945524498890597,-1.1608586317841827
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..64df90e55eff065b1cc249a634444a72f9fd00d2
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/001_dataset1/metadata.json
@@ -0,0 +1,50 @@
+{
+        "title": "Random numbers created on a random autumn day in a random office",
+        "abstract": "For demonstration purposes we created random numbers on a computer in an office of the CLOUD. This demonstration dataset is used in the DataCloud, a curated cloud storage for scientific data.",
+        "Event": [
+            {
+                "longitude": 18.445078548041533,
+                "start_datetime": "2022-02-10T16:36:48+01:00",
+                "latitude": 53.10833068997861,
+                "elevation": 2,
+                "location": "Bremen, Germany"
+            }
+        ],
+        "license": "CC-BY",
+        "authors": [
+            {
+                "firstname": "Max",
+                "lastname": "Schmitt",
+                "full_name": "Max Schmitt",
+                "affiliation": "CLOUD",
+                "ORCID": "0000-0001-6233-1866",
+                "email": "max.schmitt@email.de"
+            },
+            {
+                "firstname": "Alexa",
+                "lastname": "Nozone",
+                "full_name": "Alexa Nozone",
+                "affiliation": "CLOUD",
+                "email": "alexa.nozone@email.de"
+            }
+        ],
+        "comment": "For questions about the DataCloud or this demonstration dataset, contact research-data@email.de",
+        "project": {
+            "name": "Demonstration of Extremly important Metadata in Folders",
+			"full_name": "Project",
+            "project_acronym": "DEMO",
+            "project_type": "national",
+            "institute": "CLOUD",
+            "start_date": "2021-10-01",
+            "end_date": "2031-12-31",
+            "url": "https://www.cloud.de/de/forschung-infrastruktur/forschungsdaten-services.html",
+            "coordinator": {
+                "firstname": "Max",
+                "lastname": "Schmitt",
+                "email": "max.schmitt@email.de"
+            }
+        },
+        "method": {
+            "name": "Random Number Generator"
+        }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md
new file mode 100644
index 0000000000000000000000000000000000000000..2317ff8616c43e75f52637ff581017bf4a50d468
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/README_RawData.md
@@ -0,0 +1,25 @@
+# Raw Data
+
+The `03_raw_data` folder is here to store all raw data of each dataset
+associated with the project – the data that has not been edited by you yet but
+which you plan to use in your research. It can be e.g. your unprocessed field
+sampling records, or useful data from an online repository. Organize your data
+in this folder in the following way:
+
+- Each dataset should reside inside a subfolder. It is recommended to number and name these folders clearly, e.g. `03_raw_data/001_precipitationgermany2017`.
+
+- **IMPORTANT**: provide the folder with information about your raw data by
+  filling out a metadata form for each of your datasets! For this, 
+  
+  - either copy the `metadata-template.json` file and put it into your dataset
+    folder. Open the copy with a text editor and fill out the fields. 
+  - or use the metadata editor in the DataCoud web client (press the  "+" button
+    and use "New matadata.json" file)
+
+  If you can’t find information about your data to fill in here, you should
+  reconsider using it - it is important to be able to trace your data sources to
+  ensure a FAIR scientific process!
+
+- For processing any of the data, make a copy of the dataset and paste it into
+  the `04_data_processing` folder. This way, you make sure to keep your raw data
+  in its original state.
\ No newline at end of file
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f457d239321b232fb2db7d46f4e1576c85911b0
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/03_raw_data/metadata-template.json
@@ -0,0 +1,52 @@
+{
+  "dataset": {
+    "title": "",
+    "abstract": "See https://github.com/CLOUD/metadata-schema for schema specification",
+    "license": "CC-BY",
+    "authors": [
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX",
+        "email": "name@domain.de"
+      },
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "email": "name@domain.de",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX"
+      }
+    ],
+    "project": {
+      "name": "",
+      "acronym": "",
+      "type": "DFG/",
+      "institute": "CLOUD",
+      "start_date": "YYYY-MM-DD",
+      "end_date": "YYYY-MM-DD",
+      "url": "",
+      "coordinator": {
+        "lastname": "",
+        "email": "",
+        "firstname": ""
+      }
+    },
+    "events_in_data": false,
+    "events": [
+      {
+        "longitude": 0,
+        "latitude": 0,
+        "elevation": 0,
+        "location": "",
+        "datetime": "YYYY-MM-DDTHH:mm:ss"
+      }
+    ],
+    "method": {
+      "name": "",
+      "url": ""
+    },
+    "max_files": 100
+  }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce1b002b18772b85f4bba3a222574f438a6ed0e3
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/README_ProcessedData.md
@@ -0,0 +1,10 @@
+# Data Processing
+
+The actual work is done in this `04_data_processing` folder. Depending on your
+field and type and size of project, you can organize this folder in the way that
+fits your process best. Here, a bit of chaos can happen ;) Keep in mind to
+document your processing steps in the `02_materials_and_methods` folder and to
+put in your final results into the `05_results` folder. In the end of your
+project, it should be possible to delete everything in this folder and
+reconstruct the working process using the documentation and raw data from
+previous folders.
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json
new file mode 100644
index 0000000000000000000000000000000000000000..05f9394dfbfa9a0b2b4844c7080a340585a9050f
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/04_data_processing/metadata-template.json
@@ -0,0 +1,52 @@
+{
+  "dataset": {
+    "title": "",
+    "abstract": "See https://github.com/cloud/metadata-schema for schema specification",
+    "license": "CC-BY",
+    "authors": [
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX",
+        "email": "name@domain.de"
+      },
+      {
+        "firstname": "",
+        "lastname": "",
+        "affiliation": "",
+        "email": "name@domain.de",
+        "ORCID": "XXXX-XXXX-XXXX-XXXX"
+      }
+    ],
+    "project": {
+      "name": "",
+      "acronym": "",
+      "type": "DFG/",
+      "institute": "CLOUD",
+      "start_date": "YYYY-MM-DD",
+      "end_date": "YYYY-MM-DD",
+      "url": "",
+      "coordinator": {
+        "lastname": "",
+        "email": "",
+        "firstname": ""
+      }
+    },
+    "events_in_data": false,
+    "events": [
+      {
+        "longitude": 0,
+        "latitude": 0,
+        "elevation": 0,
+        "location": "",
+        "datetime": "YYYY-MM-DDTHH:mm:ss"
+      }
+    ],
+    "method": {
+      "name": "",
+      "url": ""
+    },
+    "max_files": 100
+  }
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md
new file mode 100644
index 0000000000000000000000000000000000000000..ae0ab6571c52c0ec9a1cdc8aba27b31fd3be6fcc
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/05_results/README_Results.md
@@ -0,0 +1,7 @@
+# Results
+
+All the results that are final versions of your data analysis or processing,
+should be copied into this `05_results` folder. Organize your results folder in
+the way most fitting to your project.
+
+Provide metadata to your results files.
diff --git a/integrationtests/test_data/extroot/realworld_example/data/35/README.md b/integrationtests/test_data/extroot/realworld_example/data/35/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..809d699c462d064ff5193add8e23677bec84b0e0
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/data/35/README.md
@@ -0,0 +1,5 @@
+# Dataspace: DEMO
+
+This is a Dataspace in the CLOUD DataCloud providing safe, curated cloud storage
+for all of CLOUD's research data.
+
diff --git a/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1589cba2b44afc3e2645b0ee72f91bf83b327032
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/dataset_cfoods.yml
@@ -0,0 +1,528 @@
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+Data:
+  type: Directory
+  match: data
+  subtree:
+    dataspace_dir:
+      type: Directory
+      match: (?P<dataspace_dir_number>[0-9]+)
+      records:
+        Dataspace:
+          name: $dataspace_dir_number
+      subtree:
+        dataspace_json:
+          type: JSONFile
+          match: .dataspace.json
+          validate: schema/dataspace.schema.json
+          subtree:
+            dataspace_id_element:
+              type: DictIntegerElement
+              match_name: "dataspace_id"
+              match_value: "(?P<id>[0-9]+)"
+              records:
+                Dataspace:
+                  dataspace_id: $id
+            archived_element:
+              type: DictBooleanElement
+              match_name: "archived"
+              match_value: "(?P<archived>.*)"
+              records:
+                Dataspace:
+                  archived: $archived
+            url_element:
+              type: DictTextElement
+              match_name: "url"
+              match_value: "(?P<url>.*)"
+              records:
+                Dataspace:
+                  url: $url
+            coordinator_element:
+              type: DictDictElement
+              match_name: "coordinator"
+              records:
+                Person:
+                  parents:
+                    - Person
+                Dataspace:
+                  Person: $Person
+              subtree: &person_subtree
+                full_name_element:
+                  type: DictTextElement
+                  match_name: "full_name"
+                  match_value: "(?P<full_name>.*)"
+                  records:
+                    Person:
+                      full_name: $full_name
+                full_name_nonlatin_element:
+                  type: DictTextElement
+                  match_name: "full_name_nonlatin"
+                  match_value: "(?P<full_name_nonlatin>.*)"
+                  records:
+                    Person:
+                      full_name_nonlatin: $full_name_nonlatin
+                family_name_element:
+                  type: DictTextElement
+                  match_name: "family_name"
+                  match_value: "(?P<family_name>.*)"
+                  records:
+                    Person:
+                      family_name: $family_name
+                given_name_element:
+                  type: DictTextElement
+                  match_name: "given_name"
+                  match_value: "(?P<given_name>.*)"
+                  records:
+                    Person:
+                      given_name: $given_name
+                email_element:
+                  type: DictTextElement
+                  match_name: "email"
+                  match_value: "(?P<email>.*)"
+                  records:
+                    Person:
+                      email: $email
+                affiliation_element:
+                  type: DictTextElement
+                  match_name: "affiliation"
+                  match_value: "(?P<affiliation>.*)"
+                  records:
+                    Person:
+                      affiliation: $affiliation
+                ORCID_element:
+                  type: DictTextElement
+                  match_name: "ORCID"
+                  match_value: "(?P<ORCID>.*)"
+                  records:
+                    Person:
+                      ORCID: $ORCID
+            start_date_element:
+              type: DictTextElement
+              match_name: "start_date"
+              match_value: "(?P<start_date>.*)"
+              records:
+                Dataspace:
+                  start_date: $start_date
+            end_date_element:
+              type: DictTextElement
+              match_name: "end_date"
+              match_value: "(?P<end_date>.*)"
+              records:
+                Dataspace:
+                  end_date: $end_date
+            comment:
+              type: DictTextElement
+              match_name: "comment"
+              match_value: "(?P<comment>.*)"
+              records:
+                Dataspace:
+                  comment: $comment
+        raw_data_dir:
+          type: Directory
+          match: 03_raw_data
+          subtree: &template
+            # TODO collect info from metadata.json and look into sub-directories
+            # (only one level) for metadata.json
+            dataset_dir:
+              match: (?P<dataset_dir_name>.*)
+              type: Directory
+              records:
+                Dataset:
+                  Dataspace: $Dataspace
+              subtree:
+                metadata_json: &metadata_json_template
+                  type: JSONFile
+                  match: metadata.json
+                  validate: schema/dataset.schema.json
+                  subtree:
+                    title_element:
+                      type: DictTextElement
+                      match_name: "title"
+                      match_value: "(?P<title>.*)"
+                      records:
+                        Dataset:
+                          title: $title
+                    authors_element:
+                      type: DictListElement
+                      match_name: "authors"
+                      subtree:
+                        author_element:
+                          type: Dict
+                          records:
+                            Person:
+                              parents:
+                                - Person
+                            Dataset:
+                              authors: +$Person
+                          subtree: *person_subtree
+                    abstract_element:
+                      type: DictTextElement
+                      match_name: "abstract"
+                      match_value: "(?P<abstract>.*)"
+                      records:
+                        Dataset:
+                          abstract: $abstract
+                    comment_element:
+                      type: DictTextElement
+                      match_name: "comment"
+                      match_value: "(?P<comment>.*)"
+                      records:
+                        Dataset:
+                          comment: $comment
+                    license_element:
+                      type: DictTextElement
+                      match_name: "license"
+                      match_value: "(?P<license_name>.*)"
+                      records:
+                        license:
+                          # TODO: As soon as such things can be validated, a
+                          # creation of a new license has to be forbidden here
+                          # (although this is effectively done already by
+                          # validating against the above schema.)
+                          name: $license_name
+                        Dataset:
+                          license: $license
+                    dataset_doi_element:
+                      type: DictTextElement
+                      match_name: "dataset_doi"
+                      match_value: "(?P<dataset_doi>.*)"
+                      records:
+                        Dataset:
+                          dataset_doi: $dataset_doi
+                    related_to_dois_element:
+                      type: DictListElement
+                      match_name: "related_to_dois"
+                      subtree:
+                        related_to_doi_element:
+                          type: TextElement
+                          match: "(?P<related_to_doi>).*"
+                          records:
+                            Dataset:
+                              related_to_dois: +$related_to_doi
+                    Keywords_element:
+                      type: DictListElement
+                      match_name: "Keyword"
+                    Events_element:
+                      type: DictListElement
+                      match_name: "Event"
+                      subtree:
+                        Event_element:
+                          type: Dict
+                          records:
+                            Event:
+                              parents:
+                                - Event
+                            Dataset:
+                              Event: +$Event
+                          subtree:
+                            label_element:
+                              type: DictTextElement
+                              match_name: "label"
+                              match_value: "(?P<label>.*)"
+                              records:
+                                Event:
+                                  label: $label
+                            comment_element:
+                              type: DictTextElement
+                              match_name: "comment"
+                              match_value: "(?P<comment>.*)"
+                              records:
+                                Event:
+                                  comment: $comment
+                            start_datetime_element:
+                              type: DictTextElement
+                              match_name: start_datetime
+                              match_value: "(?P<start_datetime>.*)"
+                              records:
+                                Event:
+                                  start_datetime: $start_datetime
+                            end_datetime_element:
+                              type: DictTextElement
+                              match_name: end_datetime
+                              match_value: "(?P<end_datetime>.*)"
+                              records:
+                                Event:
+                                  end_datetime: $end_datetime
+                            longitude_element:
+                              type: DictFloatElement
+                              match_name: "longitude"
+                              match_value: "(?P<longitude>.*)"
+                              records:
+                                Event:
+                                  longitude: $longitude
+                            latitude_element:
+                              type: DictFloatElement
+                              match_name: "latitude"
+                              match_value: "(?P<latitude>.*)"
+                              records:
+                                Event:
+                                  latitude: $latitude
+                            elevation_element:
+                              type: DictFloatElement
+                              match_name: "elevation"
+                              match_value: "(?P<elevation>.*)"
+                              records:
+                                Event:
+                                  elevation: $elevation
+                            location_element:
+                              type: DictTextElement
+                              match_name: location
+                              match_value: "(?P<location>.*)"
+                              records:
+                                Event:
+                                  location: $location
+                            igsn_element:
+                              type: DictTextElement
+                              match_name: igsn
+                              match_value: "(?P<igsn>.*)"
+                              records:
+                                Event:
+                                  igsn: $igsn
+                    events_in_data_element:
+                      type: DictBooleanElement
+                      match_name: "events_in_data"
+                      match_value: "(?P<events_in_data>.*)"
+                      records:
+                        Dataset:
+                          events_in_data: $events_in_data
+                    geojson_element:
+                      type: DictTextElement
+                      match_name: "geojson"
+                      match_value: "(?P<geojson>.*)"
+                      records:
+                        Dataset:
+                          geojson: $geojson
+                    project_element:
+                      type: DictDictElement
+                      match_name: "project"
+                      records:
+                        Project:
+                          parents:
+                            - Project
+                        Dataset:
+                          Project: $Project
+                      subtree:
+                        full_name_element:
+                          type: DictTextElement
+                          match_name: "full_name"
+                          match_value: "(?P<full_name>.*)"
+                          records:
+                            Project:
+                              full_name: $full_name
+                        project_id_element:
+                          type: DictTextElement
+                          match_name: "project_id"
+                          match_value: "(?P<project_id>.*)"
+                          records:
+                            Project:
+                              project_id: $project_id
+                        project_type_element:
+                          type: DictTextElement
+                          match_name: "project_type"
+                          match_value: "(?P<project_type_name>.*)"
+                          records:
+                            project_type:
+                              name: $project_type_name
+                            Project:
+                              project_type: $project_type
+                        institute_element:
+                          type: DictTextElement
+                          match_name: "institute"
+                          match_value: "(?P<institute>.*)"
+                          records:
+                            Project:
+                              institute: $institute
+                        start_date_element:
+                          type: DictTextElement
+                          match_name: "start_date"
+                          match_value: "(?P<start_date>.*)"
+                          records:
+                            Project:
+                              start_date: $start_date
+                        end_date_element:
+                          type: DictTextElement
+                          match_name: "end_date"
+                          match_value: "(?P<end_date>.*)"
+                          records:
+                            Project:
+                              end_date: $end_date
+                        url_element:
+                          type: DictTextElement
+                          match_name: "url"
+                          match_value: "(?P<url>.*)"
+                          records:
+                            Project:
+                              url: $url
+                        coordinators_element:
+                          type: DictListElement
+                          match_name: "coordinators"
+                          subtree:
+                            coordinator_element:
+                              type: Dict
+                              records:
+                                Person:
+                                  parents:
+                                    - Person
+                                Project:
+                                  coordinators: +$Person
+                              subtree: *person_subtree
+                        campaign_element:
+                          type: DictDictElement
+                          match_name: "campaign"
+                          records:
+                            Campaign:
+                              parents:
+                                - Campaign
+                            Dataset:
+                              Campaign: $Campaign
+                          subtree:
+                            label_element:
+                              type: DictTextElement
+                              match_name: "label"
+                              match_value: "(?P<label>.*)"
+                              records:
+                                Campaign:
+                                  label: $label
+                            optional_label_element:
+                              type: DictTextElement
+                              match_name: "optional_label"
+                              match_value: "(?P<optional_label>.*)"
+                              records:
+                                Campaign:
+                                  optional_label: $optional_label
+                            start_date_element:
+                              type: DictTextElement
+                              match_name: "start_date"
+                              match_value: "(?P<start_date>.*)"
+                              records:
+                                Campaign:
+                                  start_date: $start_date
+                            end_date_element:
+                              type: DictTextElement
+                              match_name: "end_date"
+                              match_value: "(?P<end_date>.*)"
+                              records:
+                                Campaign:
+                                  end_date: $end_date
+                            responsible_scientists_element:
+                              type: DictListElement
+                              match_name: "responsible_scientists"
+                              subtree:
+                                responsible_scientist_element:
+                                  type: Dict
+                                  records:
+                                    Person:
+                                      parents:
+                                        - Person
+                                    Campaign:
+                                      responsible_scientists: +$Person
+                                  subtree: *person_subtree
+                        Methods_element:
+                          type: DictListElement
+                          match_name: "Method"
+                          subtree:
+                            Method_element:
+                              type: Dict
+                              records:
+                                Method:
+                                  parents:
+                                    - Method
+                                Dataset:
+                                  Method: +$Method
+                              subtree:
+                                method_name_element:
+                                  type: DictTextElement
+                                  match_name: "method_name"
+                                  match_value: "(?P<method_name>.*)"
+                                  records:
+                                    Method:
+                                      name: $method_name
+                                abbreviation_element:
+                                  type: DictTextElement
+                                  match_name: "abbreviation"
+                                  match_value: "(?P<abbreviation>.*)"
+                                  records:
+                                    Method:
+                                      abbreviation: $abbreviation
+                                url_element:
+                                  type: DictTextElement
+                                  match_name: "url"
+                                  match_value: "(?P<url>.*)"
+                                  records:
+                                    Method:
+                                      url: $url
+                        Taxa_element:
+                          type: DictListElement
+                          match_name: "Taxon"
+                          subtree:
+                            Taxon_element:
+                              type: Dict
+                              records:
+                                Taxon:
+                                  parents:
+                                    - Taxon
+                                Dataset:
+                                  Taxon: +$Taxon
+                              subtree:
+                                taxon_name_element:
+                                  type: DictTextElement
+                                  match_name: "taxon_name"
+                                  match_value: "(?P<taxon_name>.*)"
+                                  records:
+                                    Taxon:
+                                      name: $taxon_name
+                        archived_element:
+                          type: DictBooleanElement
+                          match_name: "archived"
+                          match_value: "(P<archived>.*)"
+                          records:
+                            Dataset:
+                              archived: $archived
+                        publication_date_element:
+                          type: DictTextElement
+                          match_name: "publication_date"
+                          match_value: "(P<publication_date>.*)"
+                          records:
+                            Dataset:
+                              publication_date: $publication_date
+                        max_files_element:
+                          type: DictIntegerElement
+                          match_name: "max_files"
+                          match_value: "(P<max_files>.*)"
+                          records:
+                            Dataset:
+                              max_files: $max_files
+                auxiliary_file: &aux_file_template
+                  type: File
+                  match: "(?P<aux_file_name>(?!metadata.json).*)"
+                  # TODO File, path and reference dataset in file record
+                child_dataset_dir:
+                  type: Directory
+                  match: (?P<child_dataset_dir_name>.*)
+                  subtree:
+                    metadata_json: *metadata_json_template
+                    auxiliary_file: *aux_file_template
+        data_processing_dir:
+          type: Directory
+          match: 04_data_processing
+          subtree: *template
+        results_dir:
+          type: Directory
+          match: 05_results
+          subtree: *template
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/README.md b/integrationtests/test_data/extroot/realworld_example/schema/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0bb95f8b844374bba72c7c6989ac57cfa5fc305
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/README.md
@@ -0,0 +1,37 @@
+# Dataset Schemas
+
+These schema's are derived from the [metadata
+schemas](https://github.com/leibniz-zmt/zmt-metadata-schema) used at the Leibniz
+Center for Tropical Marine Research (Leibniz ZMT).
+
+# Copyright
+
+BSD 3-Clause License
+
+Copyright (c) 2022 ZMT
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d12053a0007cdea1005e7673db69f46b35a063d
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset-inheritance.yml
@@ -0,0 +1,18 @@
+extern:
+- Keyword
+- Taxon
+- full_name
+- full_name_nonlatin
+- name
+
+full_name:
+  inherit_from_obligatory:
+  - name
+
+full_name_nonlatin:
+  inherit_from_obligatory:
+  - name
+
+Taxon:
+  inherit_from_obligatory:
+  - Keyword
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json
new file mode 100644
index 0000000000000000000000000000000000000000..83d6a60d857349772c960af637671cb21c8abd5d
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataset.schema.json
@@ -0,0 +1,365 @@
+{
+  "title": "Dataset",
+  "description": "",
+  "type": "object",
+  "properties": {
+    "title": {
+      "type": "string",
+      "description": "full dataset title"
+    },
+    "authors": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "title": "Person",
+        "properties": {
+          "full_name": {
+            "type": "string",
+            "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+          },
+          "full_name_nonlatin": {
+            "type": "string",
+            "description": "Full name (non-latin alphabet)"
+          },
+          "family_name": {
+            "type": "string",
+            "description": "Family name (latin transcription)"
+          },
+          "given_name": {
+            "type": "string",
+            "description": "Given/other names (latin transcription)"
+          },
+          "affiliation": {
+            "type": "string"
+          },
+          "ORCID": {
+            "type": "string",
+            "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+            "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+          },
+          "email": {
+            "type": "string",
+            "format": "email"
+          }
+        },
+        "required": [
+          "full_name",
+          "email"
+        ]
+      },
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "abstract": {
+      "type": "string",
+      "minLength": 80,
+      "maxLength": 1000,
+      "description": "Abstract with at least 80 characters"
+    },
+    "comment": {
+      "type": "string"
+    },
+    "license": {
+      "type": "string",
+      "enum": [
+        "CC-BY",
+        "CC-BY-SA",
+        "CC0",
+        "restricted access"
+      ]
+    },
+    "dataset_doi": {
+      "type": "string",
+      "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)",
+      "description": "Dataset DOI, e.g. 10.1594/PANGAEA.938740"
+    },
+    "related_to_dois": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "pattern": "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%\"#? ])\\S)+)"
+      },
+      "description": "DOIs of related publications and/or datasets, e.g. 10.1000/182"
+    },
+    "Keyword": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "Event": {
+      "type": "array",
+      "description": "https://wiki.pangaea.de/wiki/Event",
+      "items": {
+        "type": "object",
+        "properties": {
+          "label": {
+            "type": "string"
+          },
+          "comment": {
+            "type": "string"
+          },
+          "start_datetime": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "end_datetime": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "longitude": {
+            "type": "number",
+            "minimum": -180,
+            "maximum": 180,
+            "description": "longitude (W/E) in decimal degree (-180 to 180)"
+          },
+          "latitude": {
+            "type": "number",
+            "minimum": -90,
+            "maximum": 90,
+            "description": "latitude (N/S) in decimal degree (-90 to 90)"
+          },
+          "elevation": {
+            "type": "number",
+            "minimum": -10000,
+            "maximum": 20000,
+            "description": "elevation in m"
+          },
+          "location": {
+            "type": "string",
+            "description": "geographical location as text (e.g., North Sea; Espoo, Finland)"
+          },
+          "igsn": {
+            "type": "string",
+            "description": "International Geo Sample Number (http://www.geosamples.org/aboutigsn)"
+          }
+        },
+        "required": [
+          "longitude",
+          "latitude",
+          "start_datetime"
+        ]
+      }
+    },
+    "events_in_data": {
+      "type": "boolean",
+      "description": "Does the data contain additional information about timepoints and locations?"
+    },
+    "geojson": {
+      "type": "string",
+      "pattern": "",
+      "description": "GeoJSON for complex geographic structures"
+    },
+    "project": {
+      "title": "Project",
+      "description": "https://wiki.pangaea.de/wiki/Project",
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string", 
+          "description": "short name of project"
+        },
+        "full_name": {
+          "type": "string",
+          "description": "Full name (latin transcription, all UTF-8 characters allowed)"
+        },
+        "project_id": {
+          "type": "string",
+          "description": "Project ID"
+        },
+        "project_type": {
+          "type": "string",
+          "enum": [
+            "DFG",
+            "EU",
+            "BMBF",
+            "national",
+            "international"
+          ]
+        },
+        "institute": {
+          "type": "string",
+          "description": "place of coordination or project office",
+          "default": "Centre for Research"
+        },
+        "start_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "end_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "url": {
+          "type": "string",
+          "format": "uri"
+        },
+        "coordinators": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "title": "Person",
+            "properties": {
+              "full_name": {
+                "type": "string",
+                "description": "Full name (latin transcription, all UTF-8 characters allowed)"
+              },
+              "full_name_nonlatin": {
+                "type": "string",
+                "description": "Full name (non-latin alphabet)"
+              },
+              "family_name": {
+                "type": "string",
+                "description": "Family name (latin transcription)"
+              },
+              "given_name": {
+                "type": "string",
+                "description": "Given/other names (latin transcription)"
+              },
+              "affiliation": {
+                "type": "string"
+              },
+              "ORCID": {
+                "type": "string",
+                "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+                "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+              },
+              "email": {
+                "type": "string",
+                "format": "email"
+              }
+            },
+            "required": [
+              "full_name",
+              "email"
+            ]
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        }
+      },
+      "required": ["name", "full_name"]
+    },
+    "campaign": {
+      "title": "Campaign",
+      "description": "https://wiki.pangaea.de/wiki/Campaign, synonyms: cruise, expedition, leg, ",
+      "type": "object",
+      "properties": {
+        "label": {
+          "type": "string",
+          "description": "is unique and does not contain blanks; uses abbreviations instead of full names"
+        },
+        "optional_label": {
+          "type": "string"
+        },
+        "start_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "end_date": {
+          "type": "string",
+          "format": "date"
+        },
+        "responsible_scientists": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "title": "Person",
+            "properties": {
+              "full_name": {
+                "type": "string",
+                "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+              },
+              "full_name_nonlatin": {
+                "type": "string",
+                "description": "Full name (non-latin alphabet)"
+              },
+              "family_name": {
+                "type": "string",
+                "description": "Family name (latin transcription)"
+              },
+              "given_name": {
+                "type": "string",
+                "description": "Given/other names (latin transcription)"
+              },
+              "affiliation": {
+                "type": "string"
+              },
+              "ORCID": {
+                "type": "string",
+                "description": "ORCID identifier as 16-digit number, e.g. 0000-0001-6233-1866",
+                "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{4}$"
+              },
+              "email": {
+                "type": "string",
+                "format": "email"
+              }
+            },
+            "required": [
+              "full_name",
+              "email"
+            ]
+          },
+          "minItems": 1,
+          "uniqueItems": true
+        }
+      }
+    },
+    "Method": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "description": "https://wiki.pangaea.de/wiki/Method",
+        "properties": {
+          "method_name": {
+            "type": "string",
+            "description": "full official name of tool/instrument/device/gear"
+          },
+          "abbreviation": {
+            "type": "string",
+            "description": "may be used for import in an event list to avoid misspellings"
+          },
+          "url": {
+            "type": "string",
+            "description": "should contain a web address, where an official description of the device can be found"
+          }
+        }
+      }
+    },
+    "Taxon": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "archived": {
+      "type": "boolean",
+      "description": "Has the dataset been archived?",
+      "default": false
+    },
+    "publication_date": {
+      "type": "string",
+      "format": "date"
+    },
+    "max_files": {
+      "type": "integer",
+      "description": "Maximum number of files to included by the CaosDB crawler",
+      "default": 100
+    }
+  },
+  "required": [
+    "title",
+    "authors",
+    "abstract"
+  ]
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
new file mode 100644
index 0000000000000000000000000000000000000000..01653bfa821e0a0acbb5a481bfd458e2ed784fb9
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/dataspace.schema.json
@@ -0,0 +1,45 @@
+{
+  "title": "Dataspace",
+  "description": "A Dataspace is a folder in the DataCloud with a pre-defined structure",
+  "type": "object",
+  "properties": {
+    "dataspace_id": {
+      "type": "integer",
+      "description": "Integer ID of Dataspace (matches LDAP GID)",
+      "minimum": 20000
+    },
+    "archived": { "type": "boolean" },
+    "url": {
+      "type": "string",
+      "description": "link to folder on file system (CaosDB or cloud folder)"
+    },
+    "coordinator": {
+      "type": "object",
+      "title": "Person",
+      "properties": {
+        "full_name": {
+          "type": "string",
+          "description": "Full name (latin transcription, all UFT-8 characters allowed)"
+        },
+        "full_name_nonlatin": {
+          "type": "string",
+          "description": "Full name (non-latin alphabet)"
+        },
+        "family_name": {
+          "type": "string",
+          "description": "Family name (latin transcription)"
+        },
+        "given_name": {
+          "type": "string",
+          "description": "Given/other names (latin transcription)"
+        },
+        "email": { "type": "string", "format": "email" }
+      },
+      "required": ["full_name", "email"]
+    },
+    "start_date": { "type": "string", "format": "date" },
+    "end_date": { "type": "string", "format": "date" },
+    "comment": { "type": "string" }
+  },
+  "required": ["dataspace_id", "url", "coordinator"]
+}
diff --git a/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7e251eeced7bf626e77364fc5555b1cb10dd3afb
--- /dev/null
+++ b/integrationtests/test_data/extroot/realworld_example/schema/zmt-organisation.yml
@@ -0,0 +1,26 @@
+extern:
+- name
+- url
+- Dataset
+
+german_name:
+  datatype: TEXT
+  inherit_from_obligatory:
+  - name
+
+Department:
+  recommended_properties:
+    url:
+    german_name:
+
+
+WorkingGroup:
+  recommended_properties:
+    Department:
+    german_name:
+    url:
+
+Dataset:
+  recommended_properties:
+    WorkingGroup:
+