diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 1651fa08f7fb157e007cf5c4a992f548b7d411ba..43e5eff1171da8d69eb8897bea678bf90572570a 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -2,6 +2,8 @@ FROM debian:10
 RUN apt-get update && \
     apt-get install \
     curl \
+    libhdf5-dev \
+    pkgconf \
     python3 \
     python3-pip \
     python3-requests \
@@ -27,6 +29,6 @@ RUN pip3 install recommonmark sphinx-rtd-theme
 COPY . /git
 RUN rm -r /git/.git \
     && mv /git/.docker/pycaosdb.ini /git/integrationtests
-RUN cd /git && pip3 install .
+RUN cd /git && pip3 install .[h5-crawler]
 WORKDIR /git/integrationtests
-CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh
+CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2a80211839ae3db85765c99629247f06e2c6778b..c9cd5b631cea84f44c5296edf4b789d83982d074 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -58,8 +58,8 @@ test:
       - cd .docker 
       - /bin/sh ./run.sh
       - cd .. 
-      - docker logs docker_caosdb-server_1 &> ../caosdb_log.txt
-      - docker logs docker_sqldb_1 &> ../mariadb_log.txt
+      - docker logs docker_caosdb-server_1 &> caosdb_log.txt
+      - docker logs docker_sqldb_1 &> mariadb_log.txt
       - docker-compose -f .docker/docker-compose.yml down
       - rc=`cat .docker/result`  
       - exit $rc
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e885b7c5d389d05af2f48f3b184355e4e53ad157..3e4f6f8eeddbf62f599bdd4f3fd230cfc3beb9d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,9 +28,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Automated documentation builds: `make doc`
 - Crawler documentation
 - Proof-of-concept integration with Bloxberg.
+- Introduce a cfood that can create a Record structure based on the contents of a hdf5 file
+  h5py is now an optional dependency
+- table importer implementations for csv and tsv
+- string-in-list check for table imports
 
 ### Changed ###
 
+- identifiables of single CFoods are now treated one after the other. This 
+  allows them to have dependencies among each other if they are ordered 
+  correctly
 - identifiables must have at least one property or a name
 * `caosadvancedtools.serverside.helper.init_data_model` also checks the role
   and data type of entities.
@@ -61,6 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   cause an `sqlite3.IntegrityError` if more than one change was cached
   for the same entity.
 * #40 Insertion of identifiables with missing obligatory properties
+- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation 
+  of a RecordType. This is fixed now.
+* #52 `XLSimporter.read_xls` throwed a wrong error when reading from a file with a wrong ending. 
+  Now, a `DataInconsistencyError` is raised instead of a ValueError.
+* List properties are no longer updated unnecessarily by the crawler.
 
 ### Security ###
 
diff --git a/Makefile b/Makefile
index cbac0ea0a77e5523529ef181d83ffb9738d72faf..7609444bd4fd3a8ce980eca0bc3993b3cf2e168f 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@
 
 # This Makefile is a wrapper for several other scripts.
 
-.PHONY: help doc install
+.PHONY: help doc install unittest
 
 help:
 	@echo 'Type `make doc` for documentation, or `make install` for (local) installation.'
@@ -30,4 +30,7 @@ doc:
 	$(MAKE) -C src/doc html
 
 install:
-	@echo "Not implemented yet, use pip for installation."
+	pip3 install .
+
+unittest:
+	pytest-3 unittests
diff --git a/README_SETUP.md b/README_SETUP.md
index 19f051636952945fe76b2ab752264031ac43378d..e5ebd969462f7d2c28a329e2c6b6e1bab1252775 100644
--- a/README_SETUP.md
+++ b/README_SETUP.md
@@ -12,6 +12,11 @@ Dependencies will be installed automatically if you use the below described proc
 - `caosdb>=0.4.0`                                      
 - `openpyxl>=3.0.0`
 - `xlrd>=1.2.0`
+-  `pandas>=1.2.0`
+- `numpy>=1.17.3`
+
+If you want to use the optional h5-crawler the following dependencies will be installed additionally:
+- `h5py>=3.3.0`
 
 For testing:
 - `tox`
@@ -21,6 +26,9 @@ For testing:
 - `pip install . --user`
 - `pip install tox --user`
 
+Optional h5-crawler:
+- `pip install .[h5-crawler] --user`
+
 ## Run Unit Tests
 `tox`
 
@@ -31,9 +39,11 @@ For testing:
    extroot. E.g. `sudo mount -o bind extroot
    ../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path
    the extroot of the empty profile to be used is located at).
-3. Start an empty (!) CaosDB instance (with the mounted extroot). The
-   database will be cleared during testing, so it's important to use
+3. Start (or restart) an empty (!) CaosDB instance (with the mounted extroot).
+   The database will be cleared during testing, so it's important to use
    an empty instance.
+   Make sure your configuration for the python caosdb module is correct and
+   allows to connect to the server.
 4. Run `test.sh`.  Note that this may modify content of the `integrationtest/extroot/` directory.
 
 ## Code Formatting
diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py
index 61f51c297bc1fafa686a334031f772e095ab3896..defed2cb4f5fb0a0f349898e555c5d25924e2f9b 100755
--- a/integrationtests/crawl.py
+++ b/integrationtests/crawl.py
@@ -36,6 +36,8 @@ from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood,
                                          PublicationCFood, SimulationCFood,
                                          SoftwareCFood, ResultTableCFood)
 
+from example_hdf5cfood import ExampleH5CFood
+
 try:
     from sss_helper import get_argument_parser, print_success
 except ModuleNotFoundError:
@@ -89,7 +91,9 @@ if __name__ == "__main__":
                     interactive=False, hideKnown=False,
                     cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood,
                                  PublicationCFood, SimulationCFood,
-                                 ResultTableCFood])
+                                 ResultTableCFood,
+                                 ExampleH5CFood
+                                 ])
 
     if args.authorize_run:
         for run_id in args.authorize_run:
diff --git a/integrationtests/example_hdf5cfood.py b/integrationtests/example_hdf5cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..5485402d2042b2055a087b99abcba409095a7c70
--- /dev/null
+++ b/integrationtests/example_hdf5cfood.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+"""
+An exemplary definition of a HDF5 CFood for integration testing
+"""
+
+import caosdb as db
+from caosadvancedtools.cfoods.h5 import H5CFood
+from caosadvancedtools.scifolder import ExperimentCFood
+from caosadvancedtools.scifolder.generic_pattern import readme_pattern
+
+
+class ExampleH5CFood(H5CFood):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.root_name = "ExampleH5"
+
+    @staticmethod
+    def get_re():
+        return ExperimentCFood.get_re()[:-len(readme_pattern)] + r".*\.hdf5"
+
+    def create_identifiables(self):
+        super().create_identifiables()
+        self.identifiable_root = db.Record()
+        self.identifiable_root.add_property("hdf5File", self.crawled_file)
+        self.identifiable_root.add_parent("ExampleH5")
+        self.identifiables.append(self.identifiable_root)
+
+    def special_treatment(self, key, value, dtype):
+        if key == "attr_data_root":
+            return "single_attribute", value, dtype
+
+        return key, value, dtype
diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf
Binary files /dev/null and b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 differ
diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7de3bd15d29b93085322250a06adb9b8f389f8e4
--- /dev/null
+++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md
@@ -0,0 +1,5 @@
+---
+responsible:
+- Tom Wood
+description: Something.
+...
diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh
index 98d22347bd2d40e8384a2a217452fd3ba5bc445f..9f55365eb595537b43caa9b197c8bc31ea1e69cb 100755
--- a/integrationtests/filldb.sh
+++ b/integrationtests/filldb.sh
@@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications
 python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software
 python3 insert_model.py 
+python3 insert_some.py
 python3 crawl.py /
diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py
index 270a08a36d7512a8642c2ca08a9ec6ea93b81bd9..ae3dd7701b44f5008bd976d81f8ecc8d9a02bf89 100755
--- a/integrationtests/insert_model.py
+++ b/integrationtests/insert_model.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 import caosdb as db
+import h5py
+from caosadvancedtools.cfoods.h5 import H5CFood
+from caosadvancedtools.models.data_model import DataModel
 from caosadvancedtools.models.parser import parse_model_from_yaml
 
 model = parse_model_from_yaml("model.yml")
@@ -9,3 +12,11 @@ if len(db.execute_query("FIND Property alias")) == 0:
     al = db.Property(name="alias")
     al.add_parent(name="name")
     al.insert()
+
+h5model = db.Container()
+h5file = h5py.File('extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r')
+H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model,
+                         root_name="ExampleH5")
+print(h5model)
+h5model = DataModel(h5model)
+h5model.sync_data_model(noquestion=True)
diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf16a45ddf1f95ed261af1d9f18edfa1cbf4b450
--- /dev/null
+++ b/integrationtests/insert_some.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import caosdb as db
+from caosadvancedtools.scifolder.experiment_cfood import dm
+
+# This inserts two identifiables. When no dependencies are possible among
+# identifiables, it should not be possible to find both: the experiment
+# identifiable would for example not reference the correct project Record
+project = db.Record(name='2010_TestProject')
+project.add_parent(name=dm.Project)
+project.insert()
+
+pers = db.Record()
+pers.add_parent("Person")
+pers.add_property("lastname", "Wood")
+pers.add_property("firstname", "Tom")
+pers.insert()
+
+experiment = db.Record()
+experiment.add_parent(name=dm.Experiment)
+experiment.description = "Something."
+experiment.add_property(
+    name=dm.date, value='2019-02-04')
+experiment.add_property(name=dm.Project, value=project)
+experiment.add_property(
+        name="identifier", value="empty_identifier")
+experiment.add_property(
+        name="responsible", value=pers)
+experiment.insert(flags={"force-missing-obligatory": "ignore"})
diff --git a/integrationtests/model.yml b/integrationtests/model.yml
index 357adfc7b6618f427297105f722b2d333c34c792..eaf1c084787fb8ed181db9abbdc05ae74d6a212f 100644
--- a/integrationtests/model.yml
+++ b/integrationtests/model.yml
@@ -9,6 +9,7 @@ Experiment:
   # TODO empty  recommended_properties is a problem
   #recommended_properties:
     responsible:
+      datatype: LIST<Person>
 Project:
 SoftwareVersion:
   recommended_properties:
@@ -38,16 +39,16 @@ Person:
     email:
       datatype: TEXT 
       description: 'Email of a Person.'
-responsible:
-  datatype: REFERENCE
 revisionOf:
   datatype: REFERENCE
 results:
-  datatype: REFERENCE
+  datatype: LIST<REFERENCE>
 sources:
-  datatype: REFERENCE
+  datatype: LIST<REFERENCE>
 scripts:
-  datatype: REFERENCE
+  datatype: LIST<REFERENCE>
+single_attribute:
+  datatype: LIST<INTEGER>
 Simulation:
   obligatory_properties:
     date:
@@ -74,3 +75,5 @@ Presentation:
 Report:
   inherit_from_suggested:
   - Publication
+hdf5File:
+  datatype: REFERENCE
diff --git a/integrationtests/test.sh b/integrationtests/test.sh
index a56b758421a059a0cc3461c08600c13ffd93705c..71af543643a35cb082f10a24440c5ea87df946c9 100755
--- a/integrationtests/test.sh
+++ b/integrationtests/test.sh
@@ -1,8 +1,23 @@
 #!/bin/bash
+if [ "$1" != "--force" ]
+then
+    echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)"
+    read safety
+    if [ -z $safety ]
+    then
+        echo "Exiting..."
+        exit 0
+    elif [ $safety != "yes" ]
+    then
+        echo "Exiting..."
+        exit 0
+    fi
+fi
 OUT=/tmp/crawler.output
 ls 
 cat pycaosdb.ini
 rm -rf cache.db
+set -e
 echo "Clearing database"
 python3 clear_database.py
 echo "Testing crawler without cfoods"
@@ -19,17 +34,16 @@ echo "Filling the database"
 echo "Testing the crawler database"
 python3 -m pytest test_crawler_with_cfoods.py
 echo "make a change"
-pushd extroot
+cd extroot
 egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
 # remove a file to check that this does not lead to a crawler crash
 mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back
-popd
+cd ..
 echo "run crawler"
 ./crawl.py  / | tee $OUT
 # rename the moved file
 mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx
 # check whether there was something UNAUTHORIZED
-set -e
 grep "There where unauthorized changes" $OUT
 # get the id of the run which is the last field of the output string
 RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }')
@@ -44,9 +58,9 @@ then
 fi
 set -e
 echo "undo changes"
-pushd extroot
+cd extroot
 egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g'
-popd
+cd ..
 python3 test_table.py
 # TODO the following test deletes lots of the data inserted by the crawler
 echo "Testing im and export"
diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py
index 18aa4847845ca2353d82a0439102211f1072e77e..fc07b6bde0ec5f0462cc6f51c27b875ff3a22b5c 100755
--- a/integrationtests/test_crawler_with_cfoods.py
+++ b/integrationtests/test_crawler_with_cfoods.py
@@ -26,6 +26,7 @@ import os
 import unittest
 
 import caosdb as db
+from caosdb.apiutils import retrieve_entity_with_id
 
 
 def get_entity_with_id(eid):
@@ -34,6 +35,14 @@ def get_entity_with_id(eid):
 
 class CrawlerTest(unittest.TestCase):
     def test_experiment(self):
+
+        ########################
+        # # dummy for dependency test experiment # #
+        ########################
+        exp = db.execute_query(
+            "FIND Experiment with date=2019-02-04 and identifier=empty_identifier",
+            unique=True)
+
         ########################
         # # first experiment # #
         ########################
@@ -489,3 +498,17 @@ class CrawlerTest(unittest.TestCase):
 
         # Should have a description
         self.assertIsNotNone(ana.description)
+
+    def test_exampleh5(self):
+        examp = db.execute_query("FIND Record ExampleH5", unique=True)
+
+        for prop in examp.properties:
+            if prop.name == 'group_level1_a':
+                self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level2_aa") is not None)
+                self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level1_a") is None)
+            elif prop.name == 'group_level1_b':
+                self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_b_floats") is not None)
+            elif prop.name == 'group_level1_c':
+                self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_c_floats") is not None)
+            elif prop.name == 'root_integers':
+                self.assertTrue(retrieve_entity_with_id(prop.value).get_property("single_attribute") is not None)
diff --git a/integrationtests/test_data_model.py b/integrationtests/test_data_model.py
index 6f530719a810d76e5cc5a2c59fcd2d0325ff5268..2949fa81727a6c61a8646a48c249204fa87542d8 100644
--- a/integrationtests/test_data_model.py
+++ b/integrationtests/test_data_model.py
@@ -33,13 +33,6 @@ class DataModelTest(unittest.TestCase):
         rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
         assert rt.get_property("test") is not None
 
-    def tearDown(self):
-        try:
-            tests = db.execute_query("FIND test*")
-            tests.delete()
-        except Exception:
-            pass
-
     def test_missing(self):
         # Test sync with missing prop
         # insert propt
@@ -52,3 +45,19 @@ class DataModelTest(unittest.TestCase):
         dm.sync_data_model(noquestion=True)
         rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True)
         assert rt.get_property("testproperty") is not None
+
+    def test_get_existing_entities(self):
+        db.RecordType(name="TestRecord").insert()
+        c = db.Container().extend([
+            db.Property(name="test"),
+            db.RecordType(name="TestRecord")])
+        exist = DataModel.get_existing_entities(c)
+        assert len(exist) == 1
+        assert exist[0].name == "TestRecord"
+
+    def tearDown(self):
+        try:
+            tests = db.execute_query("FIND test*")
+            tests.delete()
+        except Exception:
+            pass
diff --git a/integrationtests/test_im_und_export.py b/integrationtests/test_im_und_export.py
index db26249b14d3d547db8dcea4e49de2aa07479e5b..27995080aa5cbeeb6f562226d4f0c0ca19c64d83 100644
--- a/integrationtests/test_im_und_export.py
+++ b/integrationtests/test_im_und_export.py
@@ -3,15 +3,14 @@ import os
 from tempfile import TemporaryDirectory
 
 import caosdb as db
-
-from caosadvancedtools.export_related import export
+from caosadvancedtools.export_related import export_related_to
 from caosadvancedtools.import_from_xml import import_xml
 
 if __name__ == "__main__":
     print("Conducting im- and export tests")
     rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True)
     directory = TemporaryDirectory()
-    export(rec.id, directory=directory.name)
+    export_related_to(rec.id, directory=directory.name)
     # delete everything
     recs = db.execute_query("FIND entity with id>99")
     recs.delete()
diff --git a/setup.py b/setup.py
index f26b126c2a589554ace736661aa3a685b3f671d3..772866537d02b71adddfab2a351a3e3372b05ab2 100755
--- a/setup.py
+++ b/setup.py
@@ -157,12 +157,15 @@ def setup_package():
         install_requires=["caosdb>=0.4.0",
                           "openpyxl>=3.0.0",
                           "pandas>=1.2.0",
+                          "numpy>=1.17.3",
                           "xlrd>=2.0",
                           ],
+        extras_require={"h5-crawler": ["h5py>=3.3.0", ],
+                        },
         packages=find_packages('src'),
         package_dir={'': 'src'},
         setup_requires=["pytest-runner>=2.0,<3dev"],
-        tests_require=["pytest", "pytest-cov", "coverage>=4.4.2"],
+        tests_require=["pytest", "pytest-pythonpath", "pytest-cov", "coverage>=4.4.2"],
     )
     try:
         setup(**metadata)
diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py
index 3dac86ec328944303c629c8de721fb1a2f6a7bef..ff807f2aba6210d643e675e7e3dd91d7c3b30906 100644
--- a/src/caosadvancedtools/cache.py
+++ b/src/caosadvancedtools/cache.py
@@ -32,6 +32,8 @@ from hashlib import sha256
 import caosdb as db
 from lxml import etree
 
+import tempfile
+
 
 def put_in_container(stuff):
     if isinstance(stuff, list):
@@ -154,7 +156,9 @@ class UpdateCache(Cache):
 
     def __init__(self, db_file=None):
         if db_file is None:
-            db_file = "/tmp/crawler_update_cache.db"
+            tmppath = tempfile.gettempdir()
+            tmpf = os.path.join(tmppath, "crawler_update_cache.db")
+            db_file = tmpf
         super().__init__(db_file=db_file)
 
     @staticmethod
diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 8ce1dced48ba12e62717fe5bd788178e1e5a9488..c818792c79440dc1fcc78f3c0b1ed1b9bd215cb8 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -47,6 +47,7 @@ from abc import ABCMeta, abstractmethod
 from datetime import datetime
 
 import caosdb as db
+from caosdb.common.models import Entity
 from caosdb.exceptions import (BadQueryError, EmptyUniqueQueryError,
                                QueryNotUniqueError, TransactionError)
 
@@ -152,9 +153,19 @@ fileguide = FileGuide()
 
 
 class AbstractCFood(object, metaclass=ABCMeta):
+    """ Abstract base class for Crawler food (CFood)."""
 
     def __init__(self, item):
-        """ Abstract base class for Crawler food (CFood)."""
+        """A CFood has two main methods which must be customized:
+
+    1. `create_identifiables`
+        This method defines (and inserts if necessary) the identifiables which may be updated at a
+        later stage.  After calling this method, the `identifiables` Container contains those
+        Records which will be updated at a later time.
+
+    2. `update_identifiables`
+        This method updates the stored identifiables as necessary.
+        """
         self.to_be_updated = db.Container()
         self.identifiables = db.Container()
         self.item = item
@@ -298,7 +309,7 @@ class AbstractFileCFood(AbstractCFood):
         super().__init__(*args, item=crawled_path, **kwargs)
         self._crawled_file = None
         self.crawled_path = crawled_path
-        self.match = re.match(type(self).get_re(), crawled_path)
+        self.match = re.match(self.get_re(), crawled_path)
         self.attached_filenames = []
 
     @property
@@ -309,7 +320,31 @@ class AbstractFileCFood(AbstractCFood):
         return self._crawled_file
 
     @staticmethod
-    def get_re():
+    def re_from_extensions(extensions):
+        """Return a regular expression which matches the given file extensions.
+
+        Useful for inheriting classes.
+
+        Parameters
+        ----------
+        extensions : iterable<str>
+            An iterable with the allowed extensions.
+
+        Returns
+        -------
+        out : str
+            The regular expression, starting with ``.*\\.`` and ending with the EOL dollar
+            character.  The actual extension will be accessible in the
+            :py:attribute:`pattern group name<python:re.Pattern.groupindexe>` ``ext``.
+        """
+
+        if not extensions:
+            return None
+
+        return r".*\.(?P<ext>" + "|".join(extensions) + ")$"
+
+    @classmethod
+    def get_re(cls):
         """ Returns the regular expression used to identify files that shall be
         processed
 
@@ -377,6 +412,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
     if containing_object.get_property(property_name) is None:
         containing_object.add_property(property_name, value=[],
                                        datatype=datatype)
+    # TODO: case where multiple times the same property exists is not treated
 
     if not isinstance(containing_object.get_property(property_name).value, list):
         containing_object.get_property(property_name).value = [
@@ -627,8 +663,19 @@ def assure_has_property(entity, name, value, to_be_updated=None,
     if isinstance(value, db.Entity):
         value = value.id
 
+    if isinstance(value, list):
+        value = [i.id if isinstance(i, db.Entity) else i for i in value]
+
     for el in possible_properties:
-        if el.value == value:
+        tmp_value = el.value
+
+        if isinstance(tmp_value, db.Entity):
+            tmp_value = el.value.id
+
+        if isinstance(tmp_value, list):
+            tmp_value = [i.id if isinstance(i, db.Entity) else i for i in tmp_value]
+
+        if tmp_value == value:
             contained = True
 
             break
diff --git a/src/caosadvancedtools/cfoods/__init__.py b/src/caosadvancedtools/cfoods/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..30ce05add09a223c2f65dbe187a6cfb1768d7a22
--- /dev/null
+++ b/src/caosadvancedtools/cfoods/__init__.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""Specialized CFoods."""
diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c68edd3668fec957126aa3234a830aab98fcd25
--- /dev/null
+++ b/src/caosadvancedtools/cfoods/h5.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020,2021 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2021 Alexander Kreft
+# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity,
+# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+"""A CFood for hdf5 files
+
+
+This module allows to parse hdf5 files and reproduce their structure in form
+of Records that reference each other.
+
+hdf5 files are composed of groups and datasets. Both of which can have
+attributes. Groups and datasets are mapped to Records and attributes to
+Properties.
+"""
+
+import re
+from copy import deepcopy
+
+import caosdb as db
+import h5py
+import numpy as np
+from caosadvancedtools.cfood import fileguide
+from caosdb.common.datatype import is_reference
+from caosdb.common.utils import uuid
+
+from ..cfood import (AbstractFileCFood, assure_has_description,
+                     assure_has_parent, assure_has_property,
+                     assure_property_is)
+from ..structure_mapping import (EntityMapping, collect_existing_structure,
+                                 update_structure)
+
+
+def h5_attr_to_property(val):
+    """ returns the value and datatype of a CaosDB Property for the given value
+
+
+    1d arrays are converted to lists
+    If no suitable Property can be created (None, None) is returned.
+
+    2d and higher dimensionality arrays are being ignored.
+    """
+
+    if isinstance(val, str):
+        return val, db.TEXT
+    elif isinstance(val, complex):
+        return val, db.TEXT
+    else:
+        if not hasattr(val, 'dtype'):
+            raise NotImplementedError("Code assumes only str are missing the"
+                                      "dtype attribute")
+
+        if issubclass(val.dtype.type, np.floating):
+            dtype = db.DOUBLE
+        elif issubclass(val.dtype.type, np.integer):
+            dtype = db.INTEGER
+        elif val.dtype.kind in ['S', 'U']:
+            dtype = db.TEXT
+            val = val.astype(str)
+        elif val.dtype.kind == 'O':
+            if not np.all([isinstance(el, str) for el in val]):
+                raise NotImplementedError("Cannot convert arbitrary objects")
+            dtype = db.TEXT
+            val = val.astype(str)
+        else:
+            raise NotImplementedError("Unknown dtype used")
+
+        if isinstance(val, np.ndarray):
+            if val.ndim > 1:
+                return None, None
+        # The tolist method is on both numpy.ndarray and numpy.generic
+        # and properly converts scalars (including 0-dimensional
+        # numpy.ndarray) to Python scalars and 1D arrays to lists of
+        # Python scalars.
+        if val.ndim != 0:
+            dtype = db.LIST(dtype)
+        val = val.tolist()
+
+        # TODO this can eventually be removed
+
+        if(hasattr(val, 'ndim')):
+            if not isinstance(val, np.ndarray) and val.ndim != 0:
+                print(val, val.ndim)
+                raise Exception(
+                    "Implementation assumes that only np.arrays have ndim.")
+
+        return val, dtype
+
+
+class H5CFood(AbstractFileCFood):
+    """ H5CFood which consumes a HDF5 file.
+
+    The structure is mapped onto an equivalent structure of interconnected
+    Records.
+
+    Attributes
+    ----------
+    h5file : h5py.File, default None
+        Name of the hdf5-file to read
+    """
+
+    # to be overwritten by subclasses
+
+    def __init__(self, *args, **kwargs):
+        """CFood which consumes HDF5 files."""
+        super().__init__(*args, **kwargs)
+        self.h5file = None
+        self.root_name = "root"
+        self.hdf5Container = db.Container()
+        self.em = EntityMapping()
+
+    def collect_information(self):
+        self.h5file = h5py.File(fileguide.access(self.crawled_path), 'r')
+
+    @staticmethod
+    def get_re():
+        """Return a regular expression string to match *.h5, *.nc, *.hdf, *.hdf5."""
+        extensions = [
+            "h5",
+            "nc",
+            "hdf",
+            "hdf5",
+        ]
+
+        return AbstractFileCFood.re_from_extensions(extensions)
+
+    def create_identifiables(self):
+        """Create identifiables out of groups in the HDF5 file.
+
+        This method will call is_identifiable(h5path, h5object) and create_identifiable(h5path,
+        h5object) on each HDF5 object to decide and actually create the identifiables.
+        """
+        # manually create the identifiable root element: self.identifiable_root
+        self.structure = self.create_structure(self.h5file,
+                                               special_treatment=self.special_treatment,
+                                               root_name=self.root_name)
+
+    def update_identifiables(self):
+        """Check if the identifiables need to be updated.
+
+        In that case also add the updated entities to the list of updateables.
+
+        This method will iterate over the groups and datasets governed by this CFood's identifiables
+        and call ``update_object(path, h5object)`` on each object.
+
+        """
+
+        self.structure._cuid = "root element"
+        self.em.add(self.structure, self.identifiable_root)
+        collect_existing_structure(self.structure, self.identifiable_root,
+                                   self.em)
+        self.to_be_inserted = db.Container()
+        self.insert_missing_structure(self.structure)
+
+        # TODO this is a workaround due to the fact that the caosdb library
+        # changes the objects in the Container if it is inserted. The graph
+        # structure is flattened. I.e. references to other entity objects are
+        # replaced with their IDs. However this code depends on this graph.
+        tmp_copy = deepcopy(self.to_be_inserted)
+        tmp_copy.insert()
+
+        for e1, e2 in zip(tmp_copy, self.to_be_inserted):
+            e2.id = e1.id
+        # End workaround
+
+        # self.update_structure(self.structure)
+        update_structure(self.em, self.to_be_updated, self.structure)
+
+    def special_treatment(self, key, value, dtype):
+        """define special treatment of attributes
+
+        to be overwritten by child classes.
+
+        key: attribute name
+        value: attribute value
+        """
+
+        return key, value, dtype
+
+    @classmethod
+    def create_structure(cls, h5obj, create_recordTypes=False, collection=None,
+                         special_treatment=None, root_name="root"):
+        """Create Records and Record types from a given hdf5-object for all
+        items in the tree. Attributes are added as properties, the
+        values only if the dimension < 2.
+
+        Parameters
+        ----------
+        h5obj : h5py.File
+                a hdf5-file object
+
+        root_name : name that is used instead of '/'
+                    Type of the root Record (the Record corresponding to
+                    the root node in the HDF5 file)
+
+        Returns
+        -------
+        rec : db.Container
+            Contains the Record Types, Records and Properties for the
+            input-tree
+
+        """
+
+        if collection is None:
+            collection = []
+
+        if special_treatment is None:
+            def special_treatment(x, y, z): return x, y, z
+
+        if h5obj.name == "/":
+            name_without_path = root_name
+        else:
+            name_without_path = h5obj.name.split("/")[-1]
+
+        if create_recordTypes:
+            rec = db.RecordType(name=name_without_path)
+        else:
+            rec = db.Record().add_parent(name=name_without_path)
+        collection.append(rec)
+
+        if isinstance(h5obj, h5py.Group):
+            for subgroup in h5obj.keys():
+                subgroup_name = h5obj[subgroup].name.split("/")[-1]
+
+                sub = H5CFood.create_structure(h5obj[subgroup],
+                                               create_recordTypes=create_recordTypes,
+                                               collection=collection,
+                                               special_treatment=special_treatment)
+
+                if create_recordTypes:
+                    rec.add_property(subgroup_name)
+                else:
+                    rec.add_property(subgroup_name, value=sub)
+
+        for key, val in h5obj.attrs.items():
+            # ignored
+
+            if key in ["REFERENCE_LIST", "DIMENSION_LIST", "NAME", "CLASS"]:
+                continue
+
+            val, dtype = h5_attr_to_property(val)
+
+            if val is None and dtype is None:
+                continue
+
+            if create_recordTypes and key.lower() not in ['description']:
+                treated_k, _, treated_dtype = special_treatment(
+                    key, val, dtype)
+
+                if treated_k is not None:
+                    prop = db.Property(name=treated_k, datatype=treated_dtype)
+                    collection.append(prop)
+                    rec.add_property(name=treated_k)
+            else:
+                treated_k, treated_v, treated_dtype = special_treatment(
+                    key, val, dtype)
+
+                if treated_k is not None:
+                    rec.add_property(name=treated_k, value=treated_v,
+                                     datatype=treated_dtype)
+
+        return rec
+
+    def insert_missing_structure(self, target_structure: db.Record):
+        if target_structure._cuid not in self.em.to_existing:
+            self.to_be_inserted.append(target_structure)
+
+        for prop in target_structure.get_properties():
+            if prop.is_reference(server_retrieval=True):
+                self.insert_missing_structure(prop.value)
diff --git a/src/caosadvancedtools/collect_datamodel.py b/src/caosadvancedtools/collect_datamodel.py
index 1ca68068e713dd34ebc3368ad760461578dee4ef..806d15333cac7f745ce2fb82a02e0214ad2b6616 100644
--- a/src/caosadvancedtools/collect_datamodel.py
+++ b/src/caosadvancedtools/collect_datamodel.py
@@ -26,14 +26,19 @@ import argparse
 import os
 
 import caosdb as db
+from caosdb.apiutils import retrieve_entities_with_ids
+
+from export_related import export
 
 
 def get_dm():
-    rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")])
+    rts = set([(r.id, r.name) for r
+               in db.execute_query("SELECT name FROM RECORDTYPE")])
 
     if None in rts:
         rts.remove(None)
-    ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")])
+    ps = set([(r.id, r.name) for r
+              in db.execute_query("SELECT name FROM PROPERTY")])
 
     if None in ps:
         ps.remove(None)
@@ -47,18 +52,26 @@ def get_parser():
                    "be stored")
     p.add_argument("-c", "--compare", help="directory where the datamodel that"
                    " shall be compared is stored")
+    p.add_argument("-x", "--xml", action="store_true",
+                   help="store xml as well")
 
     return p
 
 
-def store(directory):
+def store(directory, xml=False):
     rts, ps = get_dm()
 
     os.makedirs(directory, exist_ok=True)
     with open(os.path.join(directory, "recordtypes.txt"), "w") as fi:
-        fi.write(",".join(rts))
+        fi.write(",".join([el[1] for el in rts]))
     with open(os.path.join(directory, "properties.txt"), "w") as fi:
-        fi.write(",".join(ps))
+        fi.write(",".join([el[1] for el in ps]))
+
+    if xml:
+        cont = retrieve_entities_with_ids(
+            [el[0] for el in rts]+[el[0] for el in ps])
+
+        export(cont, directory)
 
 
 def load_dm(directory):
@@ -104,7 +117,7 @@ if __name__ == "__main__":
     args = p.parse_args()
 
     if args.store:
-        store(args.store)
+        store(args.store, xml=args.xml)
 
     if args.compare:
         compare(args.compare)
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 747c533d7a4652434f967147d8a53d1847cfbb4e..5a8d428655791169557f5c292d30698f6ad69798 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError
 from .datamodel_problems import DataModelProblems
 from .guard import RETRIEVE, ProhibitedException
 from .guard import global_guard as guard
+from .serverside.helper import send_mail as main_send_mail
 from .suppressKnown import SuppressKnown
 
 logger = logging.getLogger(__name__)
@@ -500,7 +501,6 @@ carefully and if the changes are ok, click on the following link:
         """.format(url=caosdb_config["Connection"]["url"],
                    filename=filename,
                    changes="\n".join(changes))
-        sendmail = caosdb_config["Misc"]["sendmail"]
         try:
             fro = caosdb_config["advancedtools"]["crawler.from_mail"]
             to = caosdb_config["advancedtools"]["crawler.to_mail"]
@@ -510,8 +510,11 @@ carefully and if the changes are ok, click on the following link:
                          "'from_mail' and 'to_mail'.")
             return
 
-        p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE)
-        p.communicate(input=text.encode())
+        main_send_mail(
+            from_addr=fro,
+            to=to,
+            subject="Crawler Update",
+            body=text)
 
     def push_identifiables_to_CaosDB(self, cfood):
         """
@@ -576,44 +579,51 @@ carefully and if the changes are ok, click on the following link:
         # looking for matching entities in CaosDB when there is no valid id
         # i.e. there was none set from a cache
 
+        existing = []
+        inserted = []
+
         for ent in identifiables:
             if ent.id is None or ent.id < 0:
                 logger.debug("Looking for: {}".format(
                     ent.id if ent.id is not None else ent.name))
-                existing = Crawler.find_existing(ent)
+                found = Crawler.find_existing(ent)
 
-                if existing is not None:
-                    ent.id = existing.id
+                if found is not None:
+                    ent.id = found.id
             else:
                 logger.debug("Id is known of: {}".format(ent))
 
-        # insert missing, i.e. those which are not valid
-        missing_identifiables = db.Container()
-        missing_identifiables.extend([ent for ent in identifiables
-                                      if ent.id is None or ent.id < 0])
-        # TODO the following should not be necessary. Fix it
-
-        for ent in missing_identifiables:
-            ent.id = None
+            # insert missing, i.e. those which are not valid
+            if ent.id is None or ent.id < 0:
+                missing = ent
+                ent.id = None
+            else:
+                missing = None
+                existing.append(ent)
 
-        if len(missing_identifiables) > 0:
-            info = "Going to insert the following entities:\n"
+            if missing:
+                try:
+                    guard.safe_insert(missing, unique=False,
+                                      flags={"force-missing-obligatory": "ignore"})
+                    inserted.append(ent)
+                except Exception as e:
+                    DataModelProblems.evaluate_exception(e)
+        if len(existing) > 0:
+            info = "Identified the following existing entities:\n"
 
-            for ent in missing_identifiables:
+            for ent in existing:
                 info += str(ent)+"\n"
             logger.debug(info)
+        else:
+            logger.debug("Did not identify any existing entities")
+        if len(inserted) > 0:
+            info = "Inserted the following entities:\n"
 
-        if len(missing_identifiables) == 0:
-            logger.debug("No new entities to be inserted.")
+            for ent in inserted:
+                info += str(ent)+"\n"
+            logger.debug(info)
         else:
-            try:
-                logger.info(
-                    "Inserting {} Records...".format(
-                        len(missing_identifiables)))
-                guard.safe_insert(missing_identifiables, unique=False,
-                                  flags={"force-missing-obligatory": "ignore"})
-            except Exception as e:
-                DataModelProblems.evaluate_exception(e)
+            logger.debug("Did not insert any new entities")
 
         logger.debug("Retrieving entities from CaosDB...")
         identifiables.retrieve(unique=True, raise_exception_on_error=False)
@@ -693,8 +703,8 @@ class FileCrawler(Crawler):
 
     @staticmethod
     def query_files(path):
-        query_str = "FIND FILE WHICH IS STORED AT " + (
-            path if path.endswith("/") else path + "/") + "**"
+        query_str = "FIND FILE WHICH IS STORED AT '" + (
+            path if path.endswith("/") else path + "/") + "**'"
         q_info = "Sending the following query: '" + query_str + "'\n"
         files = db.execute_query(query_str)
         logger.info(
diff --git a/src/caosadvancedtools/example_cfood.py b/src/caosadvancedtools/example_cfood.py
index 6111d95defc37bbb6d836feec3fa3d2e4e3d91ab..2e395d5c3030508087e25a7156d35c8954d223d7 100644
--- a/src/caosadvancedtools/example_cfood.py
+++ b/src/caosadvancedtools/example_cfood.py
@@ -26,8 +26,8 @@ from .cfood import AbstractFileCFood, assure_has_property
 
 
 class ExampleCFood(AbstractFileCFood):
-    @staticmethod
-    def get_re():
+    @classmethod
+    def get_re(cls):
         return (r".*/(?P<species>[^/]+)/"
                 r"(?P<date>\d{4}-\d{2}-\d{2})/README.md")
 
diff --git a/src/caosadvancedtools/export_related.py b/src/caosadvancedtools/export_related.py
index 00f440d28a2ae1da14132083e4b8d3c5003d1b65..69b588c34cc7c8123ab4291f6d8f76f06e7400be 100755
--- a/src/caosadvancedtools/export_related.py
+++ b/src/caosadvancedtools/export_related.py
@@ -96,12 +96,15 @@ def invert_ids(entities):
     apply_to_ids(entities, lambda x: x*-1)
 
 
-def export(rec_id, directory="."):
+def export_related_to(rec_id, directory="."):
     if not isinstance(rec_id, int):
         raise ValueError("rec_id needs to be an integer")
     ent = db.execute_query("FIND {}".format(rec_id), unique=True)
     cont = recursively_collect_related(ent)
+    export(cont, directory=directory)
 
+
+def export(cont, directory="."):
     directory = os.path.abspath(directory)
     dl_dir = os.path.join(directory, "downloads")
 
@@ -119,6 +122,9 @@ def export(rec_id, directory="."):
                 print("Failed download of:", target)
 
     invert_ids(cont)
+
+    for el in cont:
+        el.version = None
     xml = etree.tounicode(cont.to_xml(
         local_serialization=True), pretty_print=True)
 
@@ -147,4 +153,4 @@ if __name__ == "__main__":
     parser = defineParser()
     args = parser.parse_args()
 
-    export(args.id, directory=args.directory)
+    export_related_to(args.id, directory=args.directory)
diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py
index 5e1532e03690e753b8926b87b01db4e3a89f2c4c..e56a492fa3e9199a312d374a622770e7836f42cb 100644
--- a/src/caosadvancedtools/models/parser.py
+++ b/src/caosadvancedtools/models/parser.py
@@ -43,8 +43,27 @@ KEYWORDS_IGNORED = [
 ]
 
 
+def _get_listdatatype(dtype):
+    """matches a string to check whether the type definition is a list
+
+    returns the type within the list or None, if it cannot be matched with a
+    list definition
+    """
+    # TODO: string representation should be the same as used by the server:
+    # e.g. LIST<TEXT>
+    # this should be changed in the module and the old behavour should be
+    # marked as depricated
+    match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype)
+
+    if match is None:
+        return None
+    else:
+        return match.group("dt")
+
 # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by
 # https://stackoverflow.com/users/2572431/augurar
+
+
 class SafeLineLoader(yaml.SafeLoader):
     """Load a line and keep meta-information.
 
@@ -56,6 +75,7 @@ class SafeLineLoader(yaml.SafeLoader):
         mapping = super().construct_mapping(node, deep=deep)
         # Add 1 so line numbering starts at 1
         mapping['__line__'] = node.start_mark.line + 1
+
         return mapping
 # End of https://stackoverflow.com/a/53647080
 
@@ -76,12 +96,14 @@ class YamlDefinitionError(RuntimeError):
 def parse_model_from_yaml(filename):
     """Shortcut if the Parser object is not needed."""
     parser = Parser()
+
     return parser.parse_model_from_yaml(filename)
 
 
 def parse_model_from_string(string):
     """Shortcut if the Parser object is not needed."""
     parser = Parser()
+
     return parser.parse_model_from_string(string)
 
 
@@ -105,6 +127,7 @@ class Parser(object):
         """
         with open(filename, 'r') as outfile:
             ymlmodel = yaml.load(outfile, Loader=SafeLineLoader)
+
         return self._create_model_from_dict(ymlmodel)
 
     def parse_model_from_string(self, string):
@@ -121,6 +144,7 @@ class Parser(object):
           The created DataModel
         """
         ymlmodel = yaml.load(string, Loader=SafeLineLoader)
+
         return self._create_model_from_dict(ymlmodel)
 
     def _create_model_from_dict(self, ymlmodel):
@@ -148,6 +172,7 @@ class Parser(object):
         # a record type with the name of the element.
         # The retrieved entity will be added to the model.
         # If no entity with that name is found an exception is raised.
+
         if "extern" not in ymlmodel:
             ymlmodel["extern"] = []
 
@@ -170,7 +195,7 @@ class Parser(object):
             self._add_entity_to_model(name, entity)
         # initialize recordtypes
         self._set_recordtypes()
-        self._check_datatypes()
+        self._check_and_convert_datatypes()
 
         for name, entity in ymlmodel.items():
             self._treat_entity(name, entity, line=ymlmodel["__line__"])
@@ -196,11 +221,14 @@ class Parser(object):
         out : str
           If `name` was a string, return it. Else return str(`name`).
         """
+
         if name is None:
             print("WARNING: Name of this context is None: {}".format(context),
                   file=sys.stderr)
+
         if not isinstance(name, str):
             name = str(name)
+
         return name
 
     def _add_entity_to_model(self, name, definition):
@@ -208,9 +236,11 @@ class Parser(object):
 
         Properties are also initialized.
         """
+
         if name == "__line__":
             return
         name = self._stringify(name)
+
         if name not in self.model:
             self.model[name] = None
 
@@ -221,8 +251,9 @@ class Parser(object):
                 and isinstance(definition, dict)
                 # is it a property
                 and "datatype" in definition
-                # but not a list
-                and not definition["datatype"].startswith("LIST")):
+                # but not simply an RT of the model
+                and not (_get_listdatatype(definition["datatype"]) == name and
+                         _get_listdatatype(definition["datatype"]) in self.model)):
 
             # and create the new property
             self.model[name] = db.Property(name=name,
@@ -235,6 +266,7 @@ class Parser(object):
 
             if prop_type in definition:
                 # Empty property mapping should be allowed.
+
                 if definition[prop_type] is None:
                     definition[prop_type] = {}
                 try:
@@ -245,6 +277,7 @@ class Parser(object):
                 except AttributeError as ate:
                     if ate.args[0].endswith("'items'"):
                         line = definition["__line__"]
+
                         if isinstance(definition[prop_type], list):
                             line = definition[prop_type][0]["__line__"]
                         raise YamlDefinitionError(line) from None
@@ -252,26 +285,24 @@ class Parser(object):
 
     def _add_to_recordtype(self, ent_name, props, importance):
         """Add properties to a RecordType."""
+
         for n, e in props.items():
             if n in KEYWORDS:
                 if n in KEYWORDS_IGNORED:
                     continue
                 raise YamlDefinitionError("Unexpected keyword in line {}: {}".format(
                     props["__line__"], n))
+
             if n == "__line__":
                 continue
             n = self._stringify(n)
 
-            if isinstance(e, dict) and "datatype" in e and e["datatype"].startswith("LIST"):
-                match = re.match(r"LIST[(](.*)[)]", e["datatype"])
-
-                if match is None:
-                    raise ValueError("List datatype definition is wrong")
-                dt = db.LIST(match.group(1))
-                self.model[ent_name].add_property(name=n,
-                                                  importance=importance,
-                                                  datatype=dt
-                                                  )
+            if (isinstance(e, dict) and "datatype" in e
+                    and (_get_listdatatype(e["datatype"]) is not None)):
+                self.model[ent_name].add_property(
+                    name=n,
+                    importance=importance,
+                    datatype=db.LIST(_get_listdatatype(e["datatype"])))
             else:
                 self.model[ent_name].add_property(name=n,
                                                   importance=importance)
@@ -288,6 +319,7 @@ class Parser(object):
 
     def _treat_entity(self, name, definition, line=None):
         """Parse the definition and the information to the entity."""
+
         if name == "__line__":
             return
         name = self._stringify(name)
@@ -316,19 +348,22 @@ class Parser(object):
                     self.model[name].description = prop
 
                 elif prop_name == "recommended_properties":
-                    self._add_to_recordtype(name, prop, importance=db.RECOMMENDED)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.RECOMMENDED)
 
                     for n, e in prop.items():
                         self._treat_entity(n, e)
 
                 elif prop_name == "obligatory_properties":
-                    self._add_to_recordtype(name, prop, importance=db.OBLIGATORY)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.OBLIGATORY)
 
                     for n, e in prop.items():
                         self._treat_entity(n, e)
 
                 elif prop_name == "suggested_properties":
-                    self._add_to_recordtype(name, prop, importance=db.SUGGESTED)
+                    self._add_to_recordtype(
+                        name, prop, importance=db.SUGGESTED)
 
                     for n, e in prop.items():
                         self._treat_entity(n, e)
@@ -354,21 +389,50 @@ class Parser(object):
             raise e
         self.treated.append(name)
 
-    def _check_datatypes(self):
+    def _check_and_convert_datatypes(self):
         """ checks if datatype is valid.
-        datatype of properties is simply initialized with string. Here over
-        properties is iterated and datatype is corrected. """
+        datatype of properties is simply initialized with string. Here, we
+        iterate over properties and check whether it is a base datatype of a
+        name that was defined in the model (or extern part)
+
+        the string representations are replaced with caosdb objects
+
+        """
 
         for key, value in self.model.items():
+
             if isinstance(value, db.Property):
-                if value.datatype in self.model:
-                    value.datatype = self.model[value.datatype]
-                else:
-                    # get the datatype
-                    try:
-                        value.datatype = db.__getattribute__(value.datatype)
-                    except AttributeError:
-                        raise ValueError("Unknown Datatype.")
+                dtype = value.datatype
+                is_list = False
+
+                if _get_listdatatype(value.datatype) is not None:
+                    dtype = _get_listdatatype(value.datatype)
+                    is_list = True
+
+                if dtype in self.model:
+                    if is_list:
+                        value.datatype = db.LIST(self.model[dtype])
+                    else:
+                        value.datatype = self.model[dtype]
+
+                    continue
+
+                if dtype in [db.DOUBLE,
+                             db.REFERENCE,
+                             db.TEXT,
+                             db.DATETIME,
+                             db.INTEGER,
+                             db.FILE,
+                             db.BOOLEAN]:
+
+                    if is_list:
+                        value.datatype = db.LIST(db.__getattribute__(dtype))
+                    else:
+                        value.datatype = db.__getattribute__(dtype)
+
+                    continue
+
+                raise ValueError("Property {} has an unknown datatype: {}".format(value.name, value.datatype))
 
     def _set_recordtypes(self):
         """ properties are defined in first iteration; set remaining as RTs """
diff --git a/src/caosadvancedtools/structure_mapping.py b/src/caosadvancedtools/structure_mapping.py
new file mode 100644
index 0000000000000000000000000000000000000000..50e57ac4d84f2034fbdb6da6c7159f450a993c3a
--- /dev/null
+++ b/src/caosadvancedtools/structure_mapping.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import caosdb as db
+from caosdb.apiutils import resolve_reference
+from caosdb.common.utils import uuid
+
+from .cfood import (assure_has_description, assure_has_parent,
+                    assure_property_is)
+
+
+class EntityMapping(object):
+    """
+    map local entities to entities on the server
+
+    the dict to_existing maps _cuid property to entity objects
+    the dict to_target maps id property to entity objects
+    """
+
+    def __init__(self):
+        self.to_existing = {}
+        self.to_target = {}
+
+    def add(self, target, existing):
+        if target._cuid is None:
+            target._cuid = str(uuid())
+        self.to_existing[str(target._cuid)] = existing
+        self.to_target[existing.id] = target
+
+
+def collect_existing_structure(target_structure, existing_root, em):
+    """ recursively collects existing entities
+
+    The collected entities are those that correspond to the ones in
+    target_structure.
+
+
+    em: EntityMapping
+    """
+
+    for prop in target_structure.properties:
+        if prop.value is None:
+            continue
+
+        if not prop.is_reference(server_retrieval=True):
+            continue
+
+        if (len([p for p in target_structure.properties if p.name == prop.name])
+                != 1):
+            raise ValueError("Current implementation allows only one property "
+                             "for each property name")
+
+        if (existing_root.get_property(prop.name) is not None and
+                existing_root.get_property(prop.name).value is not None):
+            resolve_reference(prop)
+
+            resolve_reference(existing_root.get_property(prop.name))
+            referenced = existing_root.get_property(prop.name).value
+
+            if not isinstance(referenced, list):
+                referenced = [referenced]
+            target_value = prop.value
+
+            if not isinstance(target_value, list):
+                target_value = [target_value]
+
+            if len(target_value) != len(referenced):
+                raise ValueError()
+
+            for tent, eent in zip(target_value, referenced):
+                em.add(tent, eent)
+                collect_existing_structure(tent, eent, em)
+
+
+def update_structure(em, updating: db.Container, target_structure: db.Record):
+    """compare the existing records with the target record tree created
+    from the h5 object
+
+    Parameters
+    ----------
+
+    existing_structure
+        retrieved entity; e.g. the top level identifiable
+
+    target_structure : db.Record
+        A record which may have references to other records.  Must be a DAG.
+    """
+
+    if target_structure._cuid in em.to_existing:
+        update_matched_entity(em,
+                              updating,
+                              target_structure,
+                              em.to_existing[target_structure._cuid])
+
+    for prop in target_structure.get_properties():
+        if prop.is_reference(server_retrieval=True):
+            update_structure(em, updating, prop.value)
+
+
+def update_matched_entity(em, updating, target_record, existing_record):
+    """
+    update the Record existing in the server according to the Record
+    supplied as target_record
+    """
+
+    for parent in target_record.get_parents():
+        if parent.name == "":
+            raise ValueError("Parent name must not be empty.")
+        assure_has_parent(existing_record, parent.name, force=True)
+
+    if target_record.description is not None:
+        # check whether description is equal
+        assure_has_description(existing_record, target_record.description,
+                               to_be_updated=updating)
+
+    for prop in target_record.get_properties():
+        # check for remaining property types
+
+        if isinstance(prop.value, db.Entity):
+            if prop.value._cuid in em.to_existing:
+                value = em.to_existing[prop.value._cuid].id
+            else:
+                value = prop.value.id
+        else:
+            value = prop.value
+        assure_property_is(existing_record, prop.name, value,
+                           to_be_updated=updating)
diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py
index c15f0e06fa7d126937497aeb877dd5d2991b6ff7..c4b57039c5184f2443e4dbb91cf11f5e59ae6790 100644
--- a/src/caosadvancedtools/suppressKnown.py
+++ b/src/caosadvancedtools/suppressKnown.py
@@ -5,6 +5,8 @@ import os
 import sqlite3
 from hashlib import sha256
 
+import tempfile
+
 
 class SuppressKnown(logging.Filter):
     """
@@ -26,8 +28,9 @@ class SuppressKnown(logging.Filter):
         if db_file:
             self.db_file = db_file
         else:
-            self.db_file = "/tmp/caosadvanced_suppressed_cache.db"
-
+            tmppath = tempfile.gettempdir()
+            tmpf = os.path.join(tmppath, "caosadvanced_suppressed_cache.db")
+            self.db_file = tmpf
         if not os.path.exists(self.db_file):
             self.create_cache()
 
diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py
index 04c8ea23b19ee0cc055dc58b69f1b3d6fecd1b55..cb61e8389de69a2d0d0527ad01cb8b9991b19ece 100755
--- a/src/caosadvancedtools/table_importer.py
+++ b/src/caosadvancedtools/table_importer.py
@@ -88,7 +88,10 @@ def date_converter(val, fmt="%Y-%m-%d"):
     converts it using format string
     """
 
-    return datetime_converter(val, fmt=fmt).date()
+    if val is None:
+        return None
+    else:
+        return datetime_converter(val, fmt=fmt).date()
 
 
 def incomplete_date_converter(val, fmts={"%Y-%m-%d": "%Y-%m-%d",
@@ -145,12 +148,44 @@ def win_path_converter(val):
     return path.as_posix()
 
 
-class TSVImporter(object):
-    def __init__(self, converters, obligatory_columns=[], unique_columns=[]):
-        raise NotImplementedError()
+def string_in_list(val, options, ignore_case=True):
+    """Return the given value if it is contained in options, raise an
+    error otherwise.
 
+    Parameters
+    ----------
+    val : str
+        String value to be checked.
+    options : list<str>
+        List of possible values that val may obtain
+    ignore_case : bool, optional
+        Specify whether the comparison of val and the possible options
+        should ignor capitalization. Default is True.
+
+    Returns
+    -------
+    val : str
+       The original value if it is contained in options
 
-class XLSImporter(object):
+    Raises
+    ------
+    ValueError
+       If val is not contained in options.
+    """
+    if ignore_case:
+        val = val.lower()
+        options = [o.lower() for o in options]
+
+    if val not in options:
+        raise ValueError(
+            "Field value is '{}', but it should be one of the following "
+            "values:  {}.".format(val, ", ".join(
+                ["'{}'".format(o) for o in options])))
+
+    return val
+
+
+class TableImporter(object):
     def __init__(self, converters, obligatory_columns=None, unique_keys=None):
         """
         converters: dict with column names as keys and converter functions as
@@ -168,50 +203,14 @@ class XLSImporter(object):
         """
         self.sup = SuppressKnown()
         self.required_columns = list(converters.keys())
-        self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns
+        self.obligatory_columns = ([]
+                                   if obligatory_columns is None
+                                   else obligatory_columns)
         self.unique_keys = [] if unique_keys is None else unique_keys
         self.converters = converters
 
-    def read_xls(self, filename, **kwargs):
-        """
-        converts an xls file into a Pandas DataFrame.
-
-        The converters of the XLSImporter object are used.
-
-        Raises: DataInconsistencyError
-        """
-        try:
-            xls_file = pd.io.excel.ExcelFile(filename)
-        except XLRDError as e:
-            logger.warning(
-                "Cannot read \n{}.\nError:{}".format(filename,
-                                                     str(e)),
-                extra={'identifier': str(filename),
-                       'category': "inconsistency"})
-            raise DataInconsistencyError(*e.args)
-
-        if len(xls_file.sheet_names) > 1:
-            # Multiple sheets is the default now. Only show in debug
-            logger.debug(
-                "Excel file {} contains multiple sheets. "
-                "All but the first are being ignored.".format(filename))
-
-        try:
-            df = xls_file.parse(converters=self.converters, **kwargs)
-        except Exception as e:
-            logger.warning(
-                "Cannot parse {}.".format(filename),
-                extra={'identifier': str(filename),
-                       'category': "inconsistency"})
-            raise DataInconsistencyError(*e.args)
-
-        self.check_columns(df, filename=filename)
-        df = self.check_missing(df, filename=filename)
-
-        if len(self.unique_keys) > 0:
-            df = self.check_unique(df, filename=filename)
-
-        return df
+    def read_file(self, filename, **kwargs):
+        raise NotImplementedError()
 
     def check_columns(self, df, filename=None):
         """
@@ -306,3 +305,70 @@ class XLSImporter(object):
                     okay = False
 
         return df
+
+    def check_dataframe(self, df, filename):
+        self.check_columns(df, filename=filename)
+        df = self.check_missing(df, filename=filename)
+
+        if len(self.unique_keys) > 0:
+            df = self.check_unique(df, filename=filename)
+
+
+class XLSImporter(TableImporter):
+    def read_file(self, filename, **kwargs):
+        return self.read_xls(filename=filename, **kwargs)
+
+    def read_xls(self, filename, **kwargs):
+        """
+        converts an xls file into a Pandas DataFrame.
+
+        The converters of the XLSImporter object are used.
+
+        Raises: DataInconsistencyError
+        """
+        try:
+            xls_file = pd.io.excel.ExcelFile(filename)
+        except (XLRDError, ValueError) as e:
+            logger.warning(
+                "Cannot read \n{}.\nError:{}".format(filename,
+                                                     str(e)),
+                extra={'identifier': str(filename),
+                       'category': "inconsistency"})
+            raise DataInconsistencyError(*e.args)
+
+        if len(xls_file.sheet_names) > 1:
+            # Multiple sheets is the default now. Only show in debug
+            logger.debug(
+                "Excel file {} contains multiple sheets. "
+                "All but the first are being ignored.".format(filename))
+
+        try:
+            df = xls_file.parse(converters=self.converters, **kwargs)
+        except Exception as e:
+            logger.warning(
+                "Cannot parse {}.".format(filename),
+                extra={'identifier': str(filename),
+                       'category': "inconsistency"})
+            raise DataInconsistencyError(*e.args)
+
+        self.check_dataframe(df, filename)
+
+        return df
+
+
+class CSVImporter(TableImporter):
+    def read_file(self, filename, sep=",", **kwargs):
+        df = pd.read_csv(filename, sep=sep, converters=self.converters,
+                         **kwargs)
+        self.check_dataframe(df, filename)
+
+        return df
+
+
+class TSVImporter(TableImporter):
+    def read_file(self, filename, **kwargs):
+        df = pd.read_csv(filename, sep="\t", converters=self.converters,
+                         **kwargs)
+        self.check_dataframe(df, filename)
+
+        return df
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
new file mode 120000
index 0000000000000000000000000000000000000000..88332e357f5e06f3de522768ccdcd9e513c15f62
--- /dev/null
+++ b/src/doc/README_SETUP.md
@@ -0,0 +1 @@
+../../README_SETUP.md
\ No newline at end of file
diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst
index 0a710fa1e3ed2c8115f7209be30de758c0c23ec3..c52bbf2fe9b9f5fd77805e45ec85d195f5aa95f3 100644
--- a/src/doc/crawler.rst
+++ b/src/doc/crawler.rst
@@ -71,7 +71,7 @@ indicated in the messages).
 Invocation as Python Script
 ---------------------------
 
-The crawler can be executed directly via a python script (usually called
+The crawler can be executed directly via a Python script (usually called
 ``crawl.py``). The script prints the progress and reports potential
 problems. The exact behavior depends on your setup. However, you can
 have a look at the example in the
@@ -84,7 +84,7 @@ have a look at the example in the
 Call ``python3 crawl.py --help`` to see what parameters can be provided.
 Typically, an invocation looks like:
 
-.. code:: python
+.. code:: sh
 
    python3 crawl.py /someplace/
 
@@ -392,7 +392,7 @@ shows how a set of CFoods can be defined to deal with a complex file structure.
 
 You can find detailed information on files need to be structured `here
 <https://gitlab.com/salexan/check-sfs/-/blob/f-software/filesystem_structure.md>`__ and the source
-code of the CFoods `here <https://gitlab.com/henrik_indiscale/scifolder>`__.
+code of the CFoods `here <https://gitlab.com/caosdb/caosdb-advanced-user-tools>`__.
 
 Sources
 =======
diff --git a/src/doc/index.rst b/src/doc/index.rst
index ee266598cd6cfbcfaa6f54b8e39aa32e4c2b6915..9aa045349ab05d3f5130a7f33b38c7eca0c4f32e 100644
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th
    Concepts <concepts>
    tutorials
    Caosdb-Crawler <crawler>
+   YAML Interface <yaml_interface>
    _apidoc/modules
 
 
diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst
new file mode 100644
index 0000000000000000000000000000000000000000..06248f2b5c17f40b6f15f5f55664c5a4a5530a86
--- /dev/null
+++ b/src/doc/yaml_interface.rst
@@ -0,0 +1,117 @@
+YAML-Interface
+--------------
+
+The yaml interface is a module in caosdb-pylib that can be used to create and update
+CaosDB models using a simplified definition in YAML format.
+
+Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml.
+
+.. code-block:: yaml
+
+    Project:
+       obligatory_properties:
+          projectId:
+             datatype: INTEGER
+             description: 'UID of this project'
+    Person:
+       recommended_properties:
+          firstName:
+             datatype: TEXT 
+             description: 'first name'
+          lastName:
+             datatype: TEXT 
+             description: 'last name'
+    LabbookEntry:
+       recommended_properties:
+          Project:
+          entryId:
+             datatype: INTEGER
+             description: 'UID of this entry'
+          responsible:
+             datatype: Person
+             description: 'the person responsible for these notes'
+          textElement:
+             datatype: TEXT
+             description: 'a text element of a labbook recording'
+          associatedFile:
+             datatype: FILE
+             description: 'A file associated with this recording'
+          table:
+             datatype: FILE
+             description: 'A table document associated with this recording'
+
+
+This example defines 3 ``RecordType``s:
+
+- A ``Project`` with one obligatory property ``datatype``
+- A Person with a ``firstName`` and a ``lastName`` (as recommended properties)
+- A ``LabbookEntry`` with multiple recommended properties of different data types
+
+One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB:
+
+- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB.
+- The new property is added as a recommended property to record type ``Person``.
+
+Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``.
+
+Note the difference between the three property declarations of ``LabbookEntry``:
+
+- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``.
+- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above.
+- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``.
+
+Datatypes
+---------
+
+You can use any data type understood by CaosDB as datatype attribute in the yaml model.
+
+List attributes are a bit special:
+
+.. code-block:: yaml
+
+  datatype: LIST<DOUBLE>
+
+would declare a list datatype of DOUBLE elements.
+
+.. code-block:: yaml
+
+  datatype: LIST<Project>
+
+would declare a list of elements with datatype Project.
+
+
+Keywords
+--------
+
+- **parent**: Parent of this entity.
+- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested"
+- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project.
+- **unit**: The unit of the property, e.g. "m/s".
+- **description**: A description for this entity.
+- **recommended_properties**: Add properties to this entity with importance "recommended".
+- **obligatory_properties**: Add properties to this entity with importance "obligatory".
+- **suggested_properties**: Add properties to this entity with importance "suggested".
+- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent.
+- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent.
+- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent.
+
+Usage
+-----
+
+You can use the yaml parser directly in python as follows:
+
+
+.. code-block:: python
+   
+  from caosadvancedtools.models import parser as parser
+  model = parser.parse_model_from_yaml("model.yml")
+
+
+This creates a DataModel object containing all entities defined in the yaml file.
+
+You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize
+the model with a CaosDB instance, e.g.:
+
+.. code-block:: python
+   
+  model.sync_data_model()
diff --git a/tox.ini b/tox.ini
index 3d7f652203ed0caf9cdfaebbb159784e6f9b2835..1b3cd4ef0d39955197448ace9fdf5d26ea6749b4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,9 +4,10 @@ skip_missing_interpreters = true
 [testenv]
 deps=nose
     pandas
-    caosdb
+    git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev
     pytest
     pytest-cov
     openpyxl
     xlrd == 1.2
+    h5py
 commands=py.test --cov=caosadvancedtools -vv {posargs}
diff --git a/unittests/create_dummy_hdf5file.py b/unittests/create_dummy_hdf5file.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce04030154c70e1d533f67aeec12321b86ddf305
--- /dev/null
+++ b/unittests/create_dummy_hdf5file.py
@@ -0,0 +1,70 @@
+import h5py
+import numpy as np
+
+
+def create_hdf5_file(filename="hdf5_dummy_file.hdf5"):
+    '''
+    Create a dummy hdf5-file for testing.
+    Structure:
+
+    root:-->root
+        group_level1_a:-->group
+            group_level2_aa:-->group
+                group_level3_aaa:-->group
+                    level3_aaa_floats_2d = float64(100x100)
+                group_level3_aab:-->group
+            group_level2_ab:-->group
+                group_level3_aba:-->group
+                    level3_aba_floats_2d = float64(100x100)
+            group_level2_ac:-->group
+                level2_ac_integers_2d = int32(100x100)
+        group_level1_b:-->group
+            group_level2_ba:-->group
+                level2_ba_integers_2d = int32(100x100)
+            level1_b_floats = float64(10000)
+        group_level1_c:-->group
+            level1_c_floats = float64(10000)
+        root_integers = int32(10000)
+    '''
+
+    with h5py.File(filename, mode="w") as hdf5:
+        '''Create toplevel groups'''
+        group_lvl1_a = hdf5.create_group("group_level1_a")
+        group_lvl1_b = hdf5.create_group("group_level1_b")
+        group_lvl1_c = hdf5.create_group("group_level1_c")
+
+        '''Create level 2 groups'''
+        group_lvl2_aa = group_lvl1_a.create_group("group_level2_aa")
+        group_lvl2_ab = group_lvl1_a.create_group("group_level2_ab")
+        group_lvl2_ac = group_lvl1_a.create_group("group_level2_ac")
+        group_lvl2_ba = group_lvl1_b.create_group("group_level2_ba")
+
+        '''Create level 3 groups'''
+        group_lvl3_aaa = group_lvl2_aa.create_group("group_level3_aaa")
+        group_lvl3_aab = group_lvl2_aa.create_group("group_level3_aab")
+        group_lvl3_aba = group_lvl2_ab.create_group("group_level3_aba")
+
+        '''Create datasets'''
+        integers = np.arange(10000)
+        floats = np.arange(0, 1000, 0.1)
+        integers_2d = np.diag(np.arange(100))
+        floats_2d = np.eye(100)
+        data_root = hdf5.create_dataset("root_integers", data=integers)
+        data_lvl1_b = group_lvl1_b.create_dataset("level1_b_floats", data=floats)
+        data_lvl2_c = group_lvl1_c.create_dataset("level1_c_floats", data=floats)
+        data_lvl2_ac = group_lvl2_ac.create_dataset("level2_ac_integers_2d", data=integers_2d)
+        data_lvl2_ba = group_lvl2_ba.create_dataset("level2_ba_integers_2d", data=integers_2d)
+        data_lvl3_aaa = group_lvl3_aaa.create_dataset("level3_aaa_floats_2d", data=floats_2d)
+        data_lvl3_aba = group_lvl3_aba.create_dataset("level3_aba_floats_2d", data=floats_2d)
+
+        '''Create attributes'''
+        attr_group_lvl1_a = group_lvl1_a.attrs.create("attr_group_lvl1_a", 1)
+        attr_group_lvl2_aa = group_lvl2_aa.attrs.create("attr_group_lvl2_aa", -2)
+        attr_group_lvl3_aaa = group_lvl3_aaa.attrs.create("attr_group_lvl3_aaa", 1.0)
+        attr_data_root = data_root.attrs.create("attr_data_root", -2.0)
+        attr_data_lvl2_ac = data_lvl2_ac.attrs.create("attr_data_lvl2_ac", np.diag(np.arange(10)))
+        attr_data_lvl3_aaa = data_lvl3_aaa.attrs.create("attr_data_lvl3_aaa", np.eye(10))
+
+
+if __name__ == "__main__":
+    create_hdf5_file()
diff --git a/unittests/hdf5_dummy_file.hdf5 b/unittests/hdf5_dummy_file.hdf5
new file mode 100644
index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf
Binary files /dev/null and b/unittests/hdf5_dummy_file.hdf5 differ
diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py
index 1bad508a2c22cf1ee1e29be11c3342d2115dd5a2..f5125166106c4bace21121d58a025886f9b132b9 100644
--- a/unittests/test_cfood.py
+++ b/unittests/test_cfood.py
@@ -112,6 +112,36 @@ class CFoodReTest(unittest.TestCase):
         self.assertTrue(SimpleCFood.match_item("hallo"))
         self.assertFalse(SimpleCFood.match_item("allo"))
 
+    def test_extensions(self):
+        """Test the RE generation."""
+        empty_extensions = []
+        extensions = ["foo", "bar"]
+
+        self.assertIsNone(AbstractFileCFood.re_from_extensions(empty_extensions))
+        self.assertIsNotNone(SimpleCFood.re_from_extensions(extensions))
+
+        class ExtCFood(AbstractFileCFood):
+
+            @staticmethod
+            def get_re():
+                return AbstractFileCFood.re_from_extensions(extensions)
+            create_identifiables = None
+            update_identifiables = None
+
+        # test which paths are matched
+        print(ExtCFood.re_from_extensions(extensions))
+        self.assertTrue(ExtCFood.match_item("hello/world.foo"))
+        self.assertTrue(ExtCFood.match_item("hello/world.bar"))
+        self.assertFalse(ExtCFood.match_item("hello/world.baz"))
+        self.assertFalse(ExtCFood.match_item("hello/world.foo "))  # Mind the space.
+        self.assertFalse(ExtCFood.match_item("hello/world.foobar"))
+        self.assertFalse(ExtCFood.match_item("hello/world.foo|bar"))
+        self.assertFalse(ExtCFood.match_item("hello/world.fobar"))
+        self.assertFalse(ExtCFood.match_item("hello/world.fooar"))
+
+        # Test stored extension
+        self.assertEqual(ExtCFood("hello/world.foo").match["ext"], "foo")
+
 
 class InsertionTest(unittest.TestCase):
     def test_contained_in_list(self):
@@ -160,6 +190,35 @@ class InsertionTest(unittest.TestCase):
                             value=new_int, to_be_updated=to_be_updated)
         assert to_be_updated[0] is entity
 
+        """Test properties with lists"""
+        rec1 = db.Record(id=12345)
+        rec1.add_property("Exp", value=[98765], datatype=db.LIST("Exp"))
+        rec2 = db.Record(id=98765)
+        update = []
+        # compare Entity with id
+        assure_has_property(rec1, "Exp", [rec2], to_be_updated=update)
+        assert len(update) == 0
+        update = []
+        # compare id with id
+        assure_has_property(rec1, "Exp", [98765], to_be_updated=update)
+        assert len(update) == 0
+        update = []
+        # compare id with different list of ids
+        assure_has_property(rec1, "Exp2", [98765, 444, 555],
+                            to_be_updated=update)
+        assert len(update) == 1
+
+        rec = db.Record(id=666666)
+        rec3 = db.Record(id=777777)
+        rec.add_property("Exp", value=[888888, rec3], datatype=db.LIST("Exp"))
+        rec2 = db.Record(id=888888)
+        update = []
+        # compare id and Entity with id and Entity
+        # i.e. check that conversion from Entity to id works in both
+        # directions.
+        assure_has_property(rec, "Exp", [rec2, 777777], to_be_updated=update)
+        assert len(update) == 0
+
     def test_property_is(self):
         """Test properties with string, int, float, and Boolean values"""
         entity = db.Record()
diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py
index 074239399002833e8500af6369f1b2c7bcc8a3ac..159adfca1d589bb092b6f59110828b5868401e25 100644
--- a/unittests/test_data_model.py
+++ b/unittests/test_data_model.py
@@ -1,19 +1,11 @@
 import unittest
 
 import caosdb as db
-import pytest
 from caosadvancedtools.models.data_model import DataModel
 
 
 class DataModelTest(unittest.TestCase):
 
-    def tearDown(self):
-        try:
-            tests = db.execute_query("FIND test*")
-            tests.delete()
-        except Exception:
-            pass
-
     def test_collecting(self):
         maintained = {"one": db.RecordType(name="TestRecord").add_property(
             name="testproperty"),
@@ -24,17 +16,6 @@ class DataModelTest(unittest.TestCase):
         assert "TestRecord" in names
         assert "testproperty" in names
 
-    # TODO this seems to require integration test
-    @pytest.mark.xfail
-    def test_get_existing_entities(self):
-        db.RecordType(name="TestRecord").insert()
-        c = db.Container().extend([
-            db.Property(name="testproperty"),
-            db.RecordType(name="TestRecord")])
-        exist = DataModel.get_existing_entities(c)
-        assert len(exist) == 1
-        assert exist[0].name == "TestRecord"
-
     def test_sync_ids_by_name(self):
         container = db.Container().extend([db.RecordType(name="TestRecord"),
                                            db.RecordType(name="TestRecord2"),
diff --git a/unittests/test_h5.py b/unittests/test_h5.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ae94686fe4542f6833e21e9a80f01e4257538d
--- /dev/null
+++ b/unittests/test_h5.py
@@ -0,0 +1,187 @@
+import unittest
+from tempfile import NamedTemporaryFile
+
+import caosdb as db
+import caosdb.apiutils
+import h5py
+import numpy as np
+from caosadvancedtools.cfoods import h5
+from caosadvancedtools.cfoods.h5 import h5_attr_to_property
+
+from create_dummy_hdf5file import create_hdf5_file
+
+ENTS = {
+    101: db.Record(id=101),
+    102: db.Record(id=102),
+    103: db.Record(id=103).add_property("test", value=101,
+                                        datatype=db.REFERENCE),
+}
+
+
+def dummy_get(eid):
+    return ENTS[eid]
+
+
+class H5CFoodTest(unittest.TestCase):
+    def setUp(self):
+        self.h5file = NamedTemporaryFile(delete=False, suffix=".h5")
+        self.h5file.close()
+        create_hdf5_file(self.h5file.name)
+        self.h5obj = h5py.File(self.h5file.name, mode="a")
+
+    def test_create_record_records(self):
+        result = h5.H5CFood.create_structure(self.h5obj)
+
+        record_list = []
+        parents = ['group_level1_a', 'group_level1_b', 'group_level1_c', 'root_integers']
+
+        for i in parents:
+            record_list.append(db.Record().add_parent(name=i))
+
+        found_parents = []
+
+        for ent in [p.value for p in result.properties]:
+            if ent.parents[0].name == 'group_level1_a':
+                found_parents.append('group_level1_a')
+                self.assertTrue(ent.get_property("group_level2_aa") is not None)
+                self.assertTrue(ent.get_property("group_level1_a") is None)
+            elif ent.parents[0].name == 'group_level1_b':
+                found_parents.append('group_level1_b')
+                pass
+            elif ent.parents[0].name == 'group_level1_c':
+                found_parents.append('group_level1_c')
+                pass
+            elif ent.parents[0].name == 'root_integers':
+                found_parents.append('root_integers')
+                pass
+
+        for p in parents:
+            self.assertTrue(p in found_parents)
+
+        for i in range(len(result.properties)):
+            for j in result.properties[i].value.get_parents():
+                for k in record_list[i].get_parents():
+                    self.assertEqual(j.name, k.name)
+
+        result1 = h5.H5CFood.create_structure(self.h5obj["group_level1_a"])
+
+        for i in result1.get_parents():
+            self.assertEqual(i.name, "group_level1_a")
+
+        result2 = h5.H5CFood.create_structure(self.h5obj["group_level1_a/group_level2_aa"])
+
+        for i in result2.get_parents():
+            self.assertEqual(i.name, "group_level2_aa")
+
+    def test_collect_existing_structure(self):
+        real_retrieve = caosdb.apiutils.retrieve_entity_with_id
+        caosdb.apiutils.retrieve_entity_with_id = dummy_get
+
+        # should run without problem
+        h5.collect_existing_structure(db.Record(), db.Record(id=234), h5.EntityMapping())
+
+        # test with retrieval: both Records have one test Property with one
+        # value -> The referenced Entities are matched
+        r_exist = db.Record(id=234)
+        r_exist.add_property("test", value=101, datatype=db.REFERENCE)
+        r_target = db.Record()
+        r_child = db.Record()
+        r_target.add_property("test", value=r_child, datatype=db.REFERENCE)
+        em = h5.EntityMapping()
+        h5.collect_existing_structure(r_target, r_exist, em)
+        self.assertTrue(em.to_existing[r_child._cuid] is ENTS[101])
+        self.assertTrue(em.to_target[101] is r_child)
+
+        # test with retrieval: the existing Record has another Property
+        # -> The referenced Entities are matched
+        r_exist = db.Record(id=234)
+        r_exist.add_property("test_other", value=101, datatype=db.REFERENCE)
+        r_target = db.Record()
+        r_child = db.Record()
+        r_target.add_property("test", value=r_child, datatype=db.REFERENCE)
+        em = h5.EntityMapping()
+        h5.collect_existing_structure(r_target, r_exist, em)
+        self.assertEqual(em.to_existing, {})
+        self.assertEqual(em.to_target, {})
+
+        # test with retrieval: both Records have one test Property; the
+        # existing is missing the value -> The referenced Entities are matched
+        r_exist = db.Record(id=234)
+        r_exist.add_property("test", value=None, datatype=db.REFERENCE)
+        r_target = db.Record()
+        r_child = db.Record()
+        r_target.add_property("test", value=r_child, datatype=db.REFERENCE)
+        em = h5.EntityMapping()
+        h5.collect_existing_structure(r_target, r_exist, em)
+        self.assertEqual(em.to_existing, {})
+        self.assertEqual(em.to_target, {})
+
+        # test with retrieval: both Records have one test Property with
+        # multiple values -> The referenced Entities are matched
+        r_exist = db.Record(id=234)
+        r_exist.add_property("test", value=[101, 102], datatype=db.LIST(db.REFERENCE))
+        r_target = db.Record()
+        r_child = db.Record()
+        r_child2 = db.Record()
+        r_target.add_property("test", value=[r_child, r_child2],
+                              datatype=db.LIST(db.REFERENCE))
+        em = h5.EntityMapping()
+        h5.collect_existing_structure(r_target, r_exist, em)
+        self.assertEqual(em.to_existing[r_child._cuid], ENTS[101])
+        self.assertEqual(em.to_existing[r_child2._cuid], ENTS[102])
+        self.assertEqual(em.to_target[101], r_child)
+        self.assertEqual(em.to_target[102], r_child2)
+
+        # test with retrieval: both Records have one test Property with one
+        # value; Add another recursion level -> The referenced Entities are matched
+        r_exist = db.Record(id=234)
+        r_exist.add_property("test", value=103, datatype=db.REFERENCE)
+        r_target = db.Record()
+        r_child = db.Record()
+        r_child2 = db.Record()
+        r_target.add_property("test", value=r_child, datatype=db.REFERENCE)
+        r_child.add_property("test", value=r_child2, datatype=db.REFERENCE)
+        em = h5.EntityMapping()
+        h5.collect_existing_structure(r_target, r_exist, em)
+        self.assertEqual(em.to_existing[r_child._cuid], ENTS[103])
+        self.assertEqual(em.to_target[103], r_child)
+        self.assertEqual(em.to_existing[r_child2._cuid], ENTS[101])
+        self.assertEqual(em.to_target[101], r_child2)
+
+        caosdb.apiutils.retrieve_entity_with_id = real_retrieve
+
+    def test_h5_attr_to_property(self):
+
+        test_int: int = 1
+        test_integer = np.int_(1)
+        test_float = np.float_(1.0)
+        test_str = "Test"
+        test_complex: complex = 2+3j
+        self.assertRaises(NotImplementedError, h5_attr_to_property, test_int)  # only numpy-integers processed?
+        self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(test_integer))
+        self.assertTupleEqual((1.0, db.DOUBLE), h5_attr_to_property(test_float))
+        self.assertTupleEqual(("Test", db.TEXT), h5_attr_to_property(test_str))
+        self.assertTupleEqual((2+3j, db.TEXT), h5_attr_to_property(test_complex))
+        # strings are often represented using a binary format
+        self.assertTupleEqual(("yeti", db.TEXT), h5_attr_to_property(
+            np.array(["yeti"], dtype=h5py.string_dtype(r'utf-8', 8))[0]))
+
+        test_integer_1d = np.arange(10)
+        test_float_1d = np.arange(0, 1, 0.1)
+        test_str_1d = np.array(["a", "b", "c"])
+        self.assertTrue((np.arange(10) == h5_attr_to_property(test_integer_1d)[0]).all())
+        self.assertTrue(db.LIST(db.INTEGER) == h5_attr_to_property(test_integer_1d)[1])
+        self.assertTrue((np.arange(0, 1, 0.1) == h5_attr_to_property(test_float_1d)[0]).all())
+        self.assertTrue(db.LIST(db.DOUBLE) == h5_attr_to_property(test_float_1d)[1])
+        self.assertTrue((np.array(["a", "b", "c"]) == h5_attr_to_property(test_str_1d)[0]).all())
+        self.assertTrue(db.LIST(db.TEXT) == h5_attr_to_property(test_str_1d)[1])
+
+        test_integers_2d = np.diag(np.arange(100))
+        test_floats_2d = np.eye(100)
+        self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d))
+        self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d))
+
+        # Test scalar values given as np.array
+        self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1)))
+        self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123)))
+        self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World")))
diff --git a/unittests/test_parser.py b/unittests/test_parser.py
index 852577a471ba15e3afc163bd8e1e6fd97abd0c0a..161e2873a9c01f9ce415818116b9e4cf9aeadb5c 100644
--- a/unittests/test_parser.py
+++ b/unittests/test_parser.py
@@ -168,7 +168,6 @@ RT1:
     RT5:
 """
         model = parse_model_from_yaml(to_file(string))
-        print(model["RT1"])
         assert has_property(model["RT1"], "RT2")
         assert model["RT1"].get_importance("RT2") == db.RECOMMENDED
         assert has_property(model["RT1"], "RT3")
@@ -190,7 +189,7 @@ p1:
 p2:
   datatype: TXT
 """
-        self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string)))
+        self.assertRaises(ValueError, parse_model_from_yaml, to_file(string))
 
 
 class ListTest(unittest.TestCase):
@@ -200,10 +199,19 @@ RT1:
   recommended_properties:
     a:
       datatype: LIST(RT2)
+    b:
+      datatype: LIST(TEXT)
+    c:
+      datatype: LIST<TEXT>
 RT2:
 """
         model = parse_model_from_yaml(to_file(string))
 
+        self.assertTrue(isinstance(model['b'], db.Property))
+        self.assertEqual(model['b'].datatype, db.LIST(db.TEXT))
+        self.assertTrue(isinstance(model['c'], db.Property))
+        self.assertEqual(model['c'].datatype, db.LIST(db.TEXT))
+
         # This failed for an older version of caosdb-models
         string_list = """
 A:
@@ -216,16 +224,8 @@ B:
       datatype: INTEGER
 """
         model = parse_model_from_yaml(to_file(string_list))
-
-    def test_dmgd_list(self):
-        string = """
-RT1:
-  recommended_properties:
-    a:
-      datatype: LIST(T2
-RT2:
-"""
-        self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string)))
+        self.assertTrue(isinstance(model['A'], db.RecordType))
+        self.assertEqual(model['A'].properties[0].datatype, db.LIST("B"))
 
 
 class ParserTest(unittest.TestCase):
@@ -274,6 +274,22 @@ A:
             parse_model_from_string(yaml)
         self.assertIn("line 3", yde.exception.args[0])
 
+    def test_reference_property(self):
+        """Test correct creation of reference property using an RT."""
+        modeldef = """A:
+  recommended_properties:
+    ref:
+      datatype: LIST<A>
+"""
+        model = parse_model_from_string(modeldef)
+        self.assertEqual(len(model), 2)
+        for key in model.keys():
+            if key == "A":
+                self.assertTrue(isinstance(model[key], db.RecordType))
+            elif key == "ref":
+                self.assertTrue(isinstance(model[key], db.Property))
+                self.assertEqual(model[key].datatype, "LIST<A>")
+
 
 class ExternTest(unittest.TestCase):
     """TODO Testing the "extern" keyword in the YAML."""
diff --git a/unittests/test_structure_mapping.py b/unittests/test_structure_mapping.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cc4114fc7f92c580f53dd8855bda659082e2b46
--- /dev/null
+++ b/unittests/test_structure_mapping.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com>
+# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+# Copyright (C) 2021 Alexander Kreft <akreft@trineo.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import unittest
+from os import name
+
+import caosdb as db
+from caosadvancedtools.structure_mapping import (EntityMapping,
+                                                 collect_existing_structure)
+from caosdb.common import datatype
+
+
+class structureMappingTest(unittest.TestCase):
+    def test_Entitymapping(self):
+        ex = db.Record(id=100)  # existing Record
+        tar = db.Record()  # target Record
+        em = EntityMapping()
+        em.add(tar, ex)
+
+        for key, val in em.to_existing.items():
+            self.assertEqual(key, tar._cuid)
+            self.assertEqual(val, ex)
+
+        for key, val in em.to_target.items():
+            self.assertEqual(key, ex.id)
+            self.assertEqual(val, tar)
+
+    def test_collect_existing_structure(self):
+        emap = EntityMapping()
+        reca1 = db.Record(name="Animals", id=100)
+        reca2 = db.Record(name="Dogs", id=200)
+        reca3 = db.Record(name="Husky", id=300)
+        reca1.add_property(id=101, name="Cute Animals", datatype=db.REFERENCE, value=reca2)
+        reca2.add_property(id=201, name="Cute Dogs", datatype=db.REFERENCE, value=reca3)
+
+        recb1 = db.Record(name="Animals")
+        recb2 = db.Record(name="Dogs")
+        recb3 = db.Record(name="Husky")
+        recb1.add_property(name="Cute Animals", datatype=db.REFERENCE, value=recb2)
+        recb2.add_property(name="Cute Dogs", datatype=db.REFERENCE, value=recb3)
+
+        collect_existing_structure(recb1, reca1, emap)
+
+        # Test if the two dicts of the entity mapping correctly depend on each other
+
+        for i in emap.to_existing.keys():
+            self.assertEqual(i, emap.to_target[emap.to_existing[i].id]._cuid)
+
+        for j in emap.to_target.keys():
+            self.assertEqual(j, emap.to_existing[emap.to_target[j]._cuid].id)
+
+        # Test if only the right Properties are in the dicts
+        self.assertTrue((reca2 in emap.to_existing.values()) and
+                        (reca3 in emap.to_existing.values()) and
+                        (reca1 not in emap.to_existing.values()))
+        self.assertTrue((recb2 in emap.to_target.values()) and
+                        (recb3 in emap.to_target.values()) and
+                        (recb1 not in emap.to_target.values()))
+
+        # Test the correct assignment of the properties
+        self.assertTrue(reca2 is emap.to_existing[recb2._cuid])
+        self.assertTrue(reca3 is emap.to_existing[recb3._cuid])
+
+        self.assertTrue(recb2 is emap.to_target[reca2.id])
+        self.assertTrue(recb3 is emap.to_target[reca3.id])
+
+        """Test with one additional Property and Properties, which are not Records"""
+        emap2 = EntityMapping()
+        recc1 = db.Record(name="Transportation", id=100)
+        recc2 = db.Record(name="Cars", id=200)
+        recc3 = db.Record(name="Volvo", id=300)
+        recc1.add_property(id=101, name="Type", datatype=db.REFERENCE, value=recc2)
+        recc2.add_property(id=201, name="Brand", datatype=db.REFERENCE, value=recc3)
+        # other datatypes
+        recc3.add_property(id=301, name="max_speed", value=200.2, datatype=db.DOUBLE)
+        recc3.add_property(id=302, name="doors", value=3, datatype=db.INTEGER)
+
+        recd1 = db.Record(name="Transportation")
+        recd2 = db.Record(name="Cars")
+        recd3 = db.Record(name="Volvo")
+        recd4 = db.Record(name="VW")
+        recd1.add_property(name="Type", datatype=db.REFERENCE, value=recd2)
+        recd2.add_property(name="Brand", datatype=db.REFERENCE, value=recd3)
+        # additional Property
+        recd2.add_property(name="Another Brand", datatype=db.REFERENCE, value=recd4)
+        # other datatypes
+        recd3.add_property(name="max_speed", value=200.2, datatype=db.DOUBLE)
+        recd3.add_property(name="doors", value=3, datatype=db.INTEGER)
+        recd4.add_property(name="max_speed", value=210.4, datatype=db.DOUBLE)
+        recd4.add_property(name="doors", value=5, datatype=db.INTEGER)
+        recd4.add_property(name="Warp engine", value=None)
+
+        collect_existing_structure(recd1, recc1, emap2)
+
+        # Test the correct assignment of the properties
+        self.assertTrue(recc2 is emap2.to_existing[recd2._cuid])
+        self.assertTrue(recc3 is emap2.to_existing[recd3._cuid])
+
+        self.assertTrue(recd2 is emap2.to_target[recc2.id])
+        self.assertTrue(recd3 is emap2.to_target[recc3.id])
+
+        """ Test, if the Record `Cars` in `target_structure` have one additional Property """
+        # Test existing structure
+        self.assertEqual(len(recc2.get_properties()), 1)  # number of properties stay unchanged
+        self.assertEqual(len(recd2.get_properties()), 2)  # number of properties stay unchanged
+
+        for prop_record, prop_em in zip(recc2.get_properties(), recd2.get_properties()):
+            self.assertTrue(prop_record.value is emap2.to_existing[prop_em.value._cuid])
+
+        # Test target structure
+        self.assertEqual(len(recc3.get_properties()), 2)  # number of properties stay unchanged
+        self.assertEqual(len(recd3.get_properties()), 2)  # number of properties stay unchanged
+
+        """ Test if the Properties that are not References show up in the entity map """
+        for rec_existing, rec_target in zip(emap2.to_existing.values(), emap2.to_target.values()):
+            self.assertTrue(isinstance(rec_existing, db.Record))
+            self.assertTrue(isinstance(rec_target, db.Record))
diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py
index 51b4803d4db00f1b04fdfc4b78792e6a9de61bb8..b574c867881141928ac59c2b002fb7f185dac7bb 100644
--- a/unittests/test_table_importer.py
+++ b/unittests/test_table_importer.py
@@ -30,9 +30,13 @@ from caosadvancedtools.datainconsistency import DataInconsistencyError
 from caosadvancedtools.table_importer import (XLSImporter, assure_name_format,
                                               date_converter,
                                               datetime_converter,
+                                              TableImporter,
+                                              TSVImporter,
+                                              CSVImporter,
                                               incomplete_date_converter,
                                               win_path_converter,
                                               win_path_list_converter,
+                                              string_in_list,
                                               yes_no_converter)
 
 
@@ -49,6 +53,16 @@ class ConverterTest(unittest.TestCase):
         self.assertRaises(ValueError, yes_no_converter, "True")
         self.assertRaises(ValueError, yes_no_converter, "true")
 
+    def test_string_in_list(self):
+        self.assertEqual("false", string_in_list("false",
+                                                 ["FALSE", "TRUE"]))
+        self.assertEqual("FALSE", string_in_list("FALSE",
+                                                 ["FALSE", "TRUE"], False))
+        self.assertRaises(ValueError, string_in_list, "FALSE", [])
+        self.assertRaises(ValueError, string_in_list, "FALSE", ["fals"])
+        self.assertRaises(ValueError, string_in_list,
+                          "FALSE", ["false"], False)
+
     def test_assure_name_format(self):
         self.assertEqual(assure_name_format("Müstermann, Max"),
                          "Müstermann, Max")
@@ -62,17 +76,17 @@ class ConverterTest(unittest.TestCase):
                          ["/this/computer"])
         self.assertEqual(win_path_list_converter(
             r"\this\computer,\this\computer"),
-                         ["/this/computer", "/this/computer"])
+            ["/this/computer", "/this/computer"])
 
-    @pytest.mark.xfail
+    @pytest.mark.xfail(reason="To be fixed, see Issue #34")
     def test_datetime(self):
         test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
-        self.importer = XLSImporter(converters={'d': datetime_converter,
-                                                }, obligatory_columns=['d'])
+        importer = XLSImporter(converters={'d': datetime_converter,
+                                           }, obligatory_columns=['d'])
 
         xls_file = pd.io.excel.ExcelFile(test_file)
         df = xls_file.parse()
-        df = self.importer.read_xls(test_file)
+        df = importer.read_xls(test_file)
         assert df.shape[0] == 2
         # TODO datatypes are different; fix it
         assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23)
@@ -80,30 +94,30 @@ class ConverterTest(unittest.TestCase):
     def test_date_xlsx(self):
         """Test with .xlsx in order to check openpyxl engine."""
         test_file = os.path.join(os.path.dirname(__file__), "date.xlsx")
-        self.importer = XLSImporter(converters={'a': date_converter,
-                                                'b': date_converter,
-                                                'c': partial(date_converter,
-                                                             fmt="%d.%m.%y")
-                                                }, obligatory_columns=['a'])
+        importer = XLSImporter(converters={'a': date_converter,
+                                           'b': date_converter,
+                                           'c': partial(date_converter,
+                                                        fmt="%d.%m.%y")
+                                           }, obligatory_columns=['a'])
 
         xls_file = pd.io.excel.ExcelFile(test_file)
         df = xls_file.parse()
-        df = self.importer.read_xls(test_file)
+        df = importer.read_xls(test_file)
         assert df.shape[0] == 2
         assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
 
     def test_date_xls(self):
         """Test with .xls in order to check xlrd engine."""
         test_file = os.path.join(os.path.dirname(__file__), "date.xls")
-        self.importer = XLSImporter(converters={'a': date_converter,
-                                                'b': date_converter,
-                                                'c': partial(date_converter,
-                                                             fmt="%d.%m.%y")
-                                                }, obligatory_columns=['a'])
+        importer = XLSImporter(converters={'a': date_converter,
+                                           'b': date_converter,
+                                           'c': partial(date_converter,
+                                                        fmt="%d.%m.%y")
+                                           }, obligatory_columns=['a'])
 
         xls_file = pd.io.excel.ExcelFile(test_file)
         df = xls_file.parse()
-        df = self.importer.read_xls(test_file)
+        df = importer.read_xls(test_file)
         assert df.shape[0] == 2
         assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0]
 
@@ -126,9 +140,9 @@ class ConverterTest(unittest.TestCase):
                           fmts={"%Y": "%Y"})
 
 
-class XLSImporterTest(unittest.TestCase):
+class TableImporterTest(unittest.TestCase):
     def setUp(self):
-        self.importer = XLSImporter(
+        self.importer_kwargs = dict(
             converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter},
             obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')])
         self.valid_df = pd.DataFrame(
@@ -136,39 +150,64 @@ class XLSImporterTest(unittest.TestCase):
 
     def test_missing_col(self):
         df = pd.DataFrame(columns=['a', 'b'])
-        self.assertRaises(ValueError, self.importer.check_columns, df)
-        self.importer.check_columns(self.valid_df)
+        importer = TableImporter(**self.importer_kwargs)
+        self.assertRaises(ValueError, importer.check_columns, df)
+        importer.check_columns(self.valid_df)
 
     def test_missing_val(self):
-        self.importer.check_missing(self.valid_df)
+        importer = TableImporter(**self.importer_kwargs)
+        importer.check_missing(self.valid_df)
         df = pd.DataFrame([[None, np.nan, 2.0, 'yes'],
                            [None, 1, 2.0, 'yes'],
                            ['a', np.nan, 2.0, 'yes'],
                            ['b', 5, 3.0, 'no']],
                           columns=['a', 'b', 'c', 'd'])
-        df_new = self.importer.check_missing(df)
+        df_new = importer.check_missing(df)
         self.assertEqual(df_new.shape[0], 1)
         self.assertEqual(df_new.shape[1], 4)
         self.assertEqual(df_new.iloc[0].b, 5)
 
+    def test_unique(self):
+        importer = TableImporter(**self.importer_kwargs)
+        importer.check_missing(self.valid_df)
+        df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']],
+                          columns=['a', 'b', 'c', 'd'])
+        df_new = importer.check_unique(df)
+        self.assertEqual(df_new.shape[0], 1)
+
+
+class XLSImporterTest(TableImporterTest):
     def test_full(self):
         """ test full run with example data """
         tmp = NamedTemporaryFile(delete=False, suffix=".xlsx")
         tmp.close()
         self.valid_df.to_excel(tmp.name)
-        self.importer.read_xls(tmp.name)
-
-    def test_unique(self):
-        self.importer.check_missing(self.valid_df)
-        df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']],
-                          columns=['a', 'b', 'c', 'd'])
-        df_new = self.importer.check_unique(df)
-        self.assertEqual(df_new.shape[0], 1)
+        importer = XLSImporter(**self.importer_kwargs)
+        importer.read_file(tmp.name)
 
-    @pytest.mark.xfail
     def test_raise(self):
+        importer = XLSImporter(**self.importer_kwargs)
         tmp = NamedTemporaryFile(delete=False, suffix=".lol")
         tmp.close()
-        # TODO ValueError is raised instead
-        self.assertRaises(DataInconsistencyError, self.importer.read_xls,
+        self.assertRaises(DataInconsistencyError, importer.read_xls,
                           tmp.name)
+
+
+class CSVImporterTest(TableImporterTest):
+    def test_full(self):
+        """ test full run with example data """
+        tmp = NamedTemporaryFile(delete=False, suffix=".csv")
+        tmp.close()
+        self.valid_df.to_csv(tmp.name)
+        importer = CSVImporter(**self.importer_kwargs)
+        importer.read_file(tmp.name)
+
+
+class TSVImporterTest(TableImporterTest):
+    def test_full(self):
+        """ test full run with example data """
+        tmp = NamedTemporaryFile(delete=False, suffix=".tsv")
+        tmp.close()
+        self.valid_df.to_csv(tmp.name, sep="\t")
+        importer = TSVImporter(**self.importer_kwargs)
+        importer.read_file(tmp.name)