diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 2188dae2ed495eb2dab789cf8865f795380a2d6f..6fbde525e8995b615e66e15521103507e4d31013 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -7,12 +7,13 @@ RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-pylib.git && \
    cd caosdb-pylib && pip3 install .
 RUN git clone https://gitlab.gwdg.de/bmp-caosdb/caosdb-models.git && \
    cd caosdb-models && pip3 install .
-ADD https://gitlab.com/api/v4/projects/13601752/repository/branches \
-	scifolder_version.txt
-RUN git clone https://gitlab.com/henrik_indiscale/scifolder.git && \
-   cd scifolder && pip3 install .
+ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/restructure_cfood  scifolder_version.json
+RUN git clone -b restructure_cfood \
+    https://gitlab.com/henrik_indiscale/scifolder.git && \
+    cd scifolder && git checkout 403793fdfde511d53 && pip3 install .
 COPY . /git
-RUN rm -r /git/.git && mv /git/.docker/pycaosdb.ini /git/integrationtests 
+RUN rm -r /git/.git \
+    && mv /git/.docker/pycaosdb.ini /git/integrationtests/full_test
 RUN cd /git && pip3 install .
-WORKDIR /git/integrationtests/
+WORKDIR /git/integrationtests/full_test
 CMD /wait-for-it.sh caosdb-server:10443 -t 120 -- ./test.sh
diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml
index 97fe0c6429a7f65de7ba5a56d456ffa61abf09cf..ab20a0788ba2a70d2770af5863826d7b674e7db5 100644
--- a/.docker/docker-compose.yml
+++ b/.docker/docker-compose.yml
@@ -18,7 +18,7 @@ services:
         source: "$EXEPATH/.docker/cert"
         target: /opt/caosdb/cert
       - type: bind
-        source: "$EXEPATH/integrationtests/extroot"
+        source: "$EXEPATH/integrationtests/full_test/extroot"
         target: /opt/caosdb/mnt/extroot
         read_only: true
     ports:
diff --git a/.docker/pycaosdb.ini b/.docker/pycaosdb.ini
index b57f2ebd13a37b75a5da3386ff8ded5d1bc9d2f8..eb6d4ca6c2d05b1977555dfc460ebfbef72a02e8 100644
--- a/.docker/pycaosdb.ini
+++ b/.docker/pycaosdb.ini
@@ -4,7 +4,7 @@ test_server_side_scripting.bin_dir=../caosdb-server/test_scripting/bin/
 [Connection]
 url=https://caosdb-server:10443
 username=admin
-cacert=../.docker/cert/caosdb.cert.pem
+cacert=../../.docker/cert/caosdb.cert.pem
 #cacert=/etc/ssl/cert.pem
 debug=0
 
diff --git a/integrationtests/crawl.py b/integrationtests/full_test/crawl.py
similarity index 86%
rename from integrationtests/crawl.py
rename to integrationtests/full_test/crawl.py
index df76f6cf5d19001f2f5bcb120a6db9f7bf2f16cd..d5f31789bc2f6760699491345cd53324fa56146e 100755
--- a/integrationtests/crawl.py
+++ b/integrationtests/full_test/crawl.py
@@ -54,10 +54,7 @@ if __name__ == "__main__":
     files = Crawler.query_files(args.path)
     print("Query done...")
     config = db.configuration.get_config()
-    c = Crawler(food=[
-        AnalysisCFood(use_cache=True, access=access),
-        ExperimentCFood(use_cache=True, access=access),
-        PublicationCFood(use_cache=True, access=access),
-        SimulationCFood(use_cache=True, access=access),
-    ])
+    c = Crawler(use_cache=True, access=access,
+                food=[AnalysisCFood, ExperimentCFood,
+                      PublicationCFood, SimulationCFood, ])
     c.crawl(files)
diff --git a/integrationtests/extroot/.cerate_dir b/integrationtests/full_test/extroot/.cerate_dir
similarity index 100%
rename from integrationtests/extroot/.cerate_dir
rename to integrationtests/full_test/extroot/.cerate_dir
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/README.md b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/README.md
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/README.md
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/README.md
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/plot.py b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/plot.py
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/plot.py
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/plot.py
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/results.pdf b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/results.pdf
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03/results.pdf
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03/results.pdf
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/README.md b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/README.md
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/README.md
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/README.md
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/analyse.py b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/analyse.py
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/analyse.py
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/analyse.py
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol1.png b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol1.png
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol1.png
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol1.png
diff --git a/integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol2.png b/integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol2.png
similarity index 100%
rename from integrationtests/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol2.png
rename to integrationtests/full_test/extroot/DataAnalysis/TestProject/2019-02-03_something/images/lol2.png
diff --git a/integrationtests/extroot/ExperimentalData/TestProject/2019-02-03/README.md b/integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03/README.md
similarity index 100%
rename from integrationtests/extroot/ExperimentalData/TestProject/2019-02-03/README.md
rename to integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03/README.md
diff --git a/integrationtests/extroot/ExperimentalData/TestProject/2019-02-03/datafile.dat b/integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03/datafile.dat
similarity index 100%
rename from integrationtests/extroot/ExperimentalData/TestProject/2019-02-03/datafile.dat
rename to integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03/datafile.dat
diff --git a/integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/README.md b/integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/README.md
similarity index 100%
rename from integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/README.md
rename to integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/README.md
diff --git a/integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/usefull.xlsx b/integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/usefull.xlsx
similarity index 100%
rename from integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/usefull.xlsx
rename to integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/usefull.xlsx
diff --git a/integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/useless.xlsx b/integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/useless.xlsx
similarity index 100%
rename from integrationtests/extroot/ExperimentalData/TestProject/2019-02-03_something/useless.xlsx
rename to integrationtests/full_test/extroot/ExperimentalData/TestProject/2019-02-03_something/useless.xlsx
diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md b/integrationtests/full_test/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md
similarity index 100%
rename from integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md
rename to integrationtests/full_test/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md
diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/datafile.dat b/integrationtests/full_test/extroot/Publications/Posters/2019-02-03_really_cool_finding/datafile.dat
similarity index 100%
rename from integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/datafile.dat
rename to integrationtests/full_test/extroot/Publications/Posters/2019-02-03_really_cool_finding/datafile.dat
diff --git a/integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/README.md b/integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/README.md
similarity index 100%
rename from integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/README.md
rename to integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/README.md
diff --git a/integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/usefull.xlsx b/integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/usefull.xlsx
similarity index 100%
rename from integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/usefull.xlsx
rename to integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/usefull.xlsx
diff --git a/integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/useless.xlsx b/integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/useless.xlsx
similarity index 100%
rename from integrationtests/extroot/Publications/Theses/2019_paper_on_exciting_stuff/useless.xlsx
rename to integrationtests/full_test/extroot/Publications/Theses/2019_paper_on_exciting_stuff/useless.xlsx
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03/README.md b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/README.md
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03/README.md
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/README.md
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03/sim.py b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/sim.py
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03/sim.py
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/sim.py
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03/snapshots.dat b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/snapshots.dat
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03/snapshots.dat
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03/snapshots.dat
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/README.md b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/README.md
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/README.md
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/README.md
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/large_sim.py b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/large_sim.py
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/large_sim.py
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/large_sim.py
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/parameters.p b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/parameters.p
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/parameters.p
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/parameters.p
diff --git a/integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/timeseries.npy b/integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/timeseries.npy
similarity index 100%
rename from integrationtests/extroot/SimulationData/TestProject/2019-02-03_something/timeseries.npy
rename to integrationtests/full_test/extroot/SimulationData/TestProject/2019-02-03_something/timeseries.npy
diff --git a/integrationtests/filldb.sh b/integrationtests/full_test/filldb.sh
similarity index 100%
rename from integrationtests/filldb.sh
rename to integrationtests/full_test/filldb.sh
diff --git a/integrationtests/insert_model.py b/integrationtests/full_test/insert_model.py
similarity index 100%
rename from integrationtests/insert_model.py
rename to integrationtests/full_test/insert_model.py
diff --git a/integrationtests/insert_record.py b/integrationtests/full_test/insert_record.py
similarity index 100%
rename from integrationtests/insert_record.py
rename to integrationtests/full_test/insert_record.py
diff --git a/integrationtests/model.yml b/integrationtests/full_test/model.yml
similarity index 100%
rename from integrationtests/model.yml
rename to integrationtests/full_test/model.yml
diff --git a/integrationtests/test.sh b/integrationtests/full_test/test.sh
similarity index 100%
rename from integrationtests/test.sh
rename to integrationtests/full_test/test.sh
diff --git a/integrationtests/test_crawler.py b/integrationtests/full_test/test_crawler.py
similarity index 96%
rename from integrationtests/test_crawler.py
rename to integrationtests/full_test/test_crawler.py
index 5797ba42cb795b362ba7271a2bfced8109b402aa..d3952f09bde9a930b5b9bc28d196f1178b5a78bc 100755
--- a/integrationtests/test_crawler.py
+++ b/integrationtests/full_test/test_crawler.py
@@ -104,11 +104,11 @@ class CrawlerTest(unittest.TestCase):
         #########################
         # # first publication # #
         #########################
-        pub = db.execute_query("FIND really_cool_finding", unique=True)
+        pub = db.execute_query("FIND *really_cool_finding", unique=True)
 
         # There should be a Project with name TestProject which is referenced
 
         ##########################
         # # second publication # #
         ##########################
-        pub = db.execute_query("FIND paper_on_exciting_stuff ", unique=True)
+        pub = db.execute_query("FIND *paper_on_exciting_stuff ", unique=True)
diff --git a/integrationtests/single_tests/test_cfood.py b/integrationtests/single_tests/test_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..a489b9b68a028ce273c20b44d3545b973d54c0b8
--- /dev/null
+++ b/integrationtests/single_tests/test_cfood.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2018 Research Group Biomedical Physics,
+# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+import unittest
+from tempfile import NamedTemporaryFile
+
+import caosdb as db
+
+from caosadvancedtools.cfood import AbstractCFood
+
+
+class CFoodTest(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def test_check_existence(self):
+        pass
diff --git a/integrationtests/single_tests/test_crawler.py b/integrationtests/single_tests/test_crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..1647b8ccc9a61e371a00c563f08fb36bb3bab979
--- /dev/null
+++ b/integrationtests/single_tests/test_crawler.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2018 Research Group Biomedical Physics,
+# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+import unittest
+from copy import deepcopy
+from tempfile import NamedTemporaryFile
+
+import caosdb as db
+
+from caosadvancedtools.crawler import Crawler
+
+
+def seek_and_destroy(names):
+    for name in names:
+        db.execute_query("FIND "+name).delete(raise_exception_on_error=False)
+
+
+class CrawlerTest(unittest.TestCase):
+    def setUp(self):
+        # TODO replace by something more reasonable
+        seek_and_destroy(["Experiment", "Analysis", "Publication", "species"])
+        self.rts = db.Container().extend([
+            db.RecordType(name="Experiment").insert(),
+            db.RecordType(name="Analysis").insert(),
+            db.RecordType(name="Publication").insert(),
+            db.Property(name="species", datatype=db.TEXT).insert(),
+        ])
+        self.exp = db.Record()
+        self.exp.add_parent(name="Experiment")
+        self.exp.add_property(name="species", value="microunicorn")
+        self.ana = db.Record()
+        self.ana.add_parent(name="Analysis")
+        self.pub = db.Record()
+        self.pub.add_parent(name="Publication")
+
+    def test_check_existence(self):
+        assert Crawler.find_existing(self.exp) is None
+
+    def test_find_or_insert_identifiables(self):
+        tmpexp = db.Record()
+        tmpexp.add_parent(name="Experiment")
+        tmpexp.add_property(name="species", value="microunicorn")
+        tmpana = db.Record()
+        tmpana.add_parent(name="Analysis")
+        tmpexp.insert()
+        tmpana.insert()
+        self.ana.id = tmpana.id
+        # exp inserted/no id; ana inserted/id; pub missing
+        identifiables = db.Container().extend([self.exp, self.ana, self.pub])
+        old_id = id(identifiables[0])
+        reference_to_first = identifiables[0]
+        assert reference_to_first is identifiables[0]
+        Crawler.find_or_insert_identifiables(identifiables)
+
+        for el in identifiables:
+            assert el.is_valid()
+
+        # check whether instance is the same
+        assert reference_to_first is identifiables[0]
+        assert old_id == id(identifiables[0])
+        # order must not be changed
+        assert identifiables[0].get_parents()[0].name == "Experiment"
+        assert identifiables[1].get_parents()[0].name == "Analysis"
+        assert identifiables[2].get_parents()[0].name == "Publication"
+
+    def tearDown(self):
+        for el in [self.exp, self.ana, self.pub, self.rts]:
+            try:
+                el.delete()
+            except:
+                pass
+
+
+class CrawlerTestExist(CrawlerTest):
+    def setUp(self):
+        super().setUp()
+        self.exp.insert()
+        self.ana.insert()
+        self.pub.insert()
+
+    def test_check_existence(self):
+        res = Crawler.find_existing(self.exp)
+        assert res.id == self.exp.id
+
+    def tearDown(self):
+        for el in [self.exp, self.ana, self.pub, self.rts]:
+            try:
+                el.delete()
+            except:
+                pass
diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py
index 932af01b2d3963b501dfbf1f101d95a55a766712..af434eb0e7a2cc73276934051c725e3a50b8181e 100644
--- a/src/caosadvancedtools/cache.py
+++ b/src/caosadvancedtools/cache.py
@@ -64,3 +64,30 @@ class Cache(object):
             return res
         else:
             return res[1]
+
+    def update_ids_from_cache(self, entities):
+        """ sets ids of those entities that are in cache
+
+        A list of hashes corresponding to the entities is returned
+        """
+        hashes = []
+
+        for ent in entities:
+            ehash = Cache.hash_entity(ent)
+            hashes.append(ehash)
+            eid = self.check_existing(ehash)
+
+            if eid is not None:
+                ent.id = eid
+
+        return hashes
+
+    def insert_list(self, hashes, entities):
+        """ Insert the ids of entities into the cache
+
+        The hashes must correspond to the entities in the list
+        """
+
+        for ehash, ent in zip(hashes, entities):
+            if self.check_existing(ehash) is None:
+                self.insert(ehash, ent.id)
diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py
index 1f5602a01c32587d59f78a37cbc22e463bebb8c5..ac830012e950b756df0eafce5703eaf90040e126 100644
--- a/src/caosadvancedtools/cfood.py
+++ b/src/caosadvancedtools/cfood.py
@@ -22,7 +22,17 @@
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # ** end header
-"""does something"""
+""" Defines how something that shall be inserted into CaosDB is treated.
+
+CaosDB can automatically be filled with Records based on some file structure.
+The Crawler will iterate over the files and test for each file whether a CFood
+exists that matches the file path. If one does, it is instanciated to treat the
+match. This occurs in basically three steps:
+1. create a list of identifiables, i.e. unique representation of CaosDB Records
+(such as an experiment belonging to a project and a date/time)
+2. the identifiables are either found in CaosDB or they are created.
+3. the identifiables are update based on the date in the file structure
+"""
 
 import argparse
 import re
@@ -31,32 +41,12 @@ from copy import deepcopy
 from datetime import datetime
 
 import caosdb as db
-from caosdb.exceptions import TransactionError
 
 from caosadvancedtools.cache import Cache
 
 ENTITIES = {}
 
 
-def get_value(prop):
-    """ Returns the value of a Property
-
-    Parameters
-    ----------
-    prop : The property of which the value shall be returned.
-
-    Returns
-    -------
-    out : The value of the property; if the value is an entity, its ID.
-
-    """
-
-    if isinstance(prop.value, db.Entity):
-        return prop.value.id
-    else:
-        return prop.value
-
-
 def get_entity(name):
     """ Returns the entity with a given name, preferably from a local cache.
 
@@ -72,89 +62,57 @@ def get_entity(name):
 
 
 class AbstractCFood(object):
-    # TODO restructure this class such that no instance is needed to check for
-    # a match
-    # instances shall be used to keep track of a match; i.e. entities can be
-    # object variable
+    # contains the compiled regular expression after the first execution of the
+    # function match()
+    _pattern = None
 
-    def __init__(self, pattern, use_cache=False, access=lambda x: x):
-        """Abstract base class for Crawler food (CFood).
+    def __init__(self, match, access=lambda x: x):
+        """ Abstract base class for Crawler food (CFood).
 
         Parameters
         ----------
-        pattern : str
-                  The regex pattern for matching against file names.
-
-        use_cache : bool, optional
-                    Whether to use caching (not re-inserting probably existing
-                    objects into CaosDB), defaults to False.
+        match : match object of a regular expression match
+                the result from matching a path against the pattern of this
+                class
 
         access : callable, optional
-                 Only used by child classes?
-
+                 A function that takes a CaosDB path and returns a local path
         """
-        self.pattern = re.compile(pattern)
-        self.use_cache = use_cache
         self.access = access
+        self.crawled_file = match.string
+        self.match = match
 
-        if self.use_cache:
-            self.identifiable_cache = Cache()
-
-    def treat_match(self, crawled_file, match):
-        print(crawled_file)
-
-        entities = self.create_identifiables(crawled_file, match)
-
-        for key, identifiable in entities.items():
-
-            if identifiable is None:
-                print("THIS IS STRANGE. No identifiables found in {}.".format(
-                    crawled_file))
-
-                continue
-            existing = None
-
-            print("Looking for \n", identifiable)
-
-            if self.use_cache:
-                identifiable_cache = Cache()
-                identifier = Cache.hash_entity(identifiable)
-                cached_id = self.identifiable_cache.check_existing(identifier)
-
-                # retrieve entity for the cached id
-
-                if cached_id is not None:
-                    existing = db.execute_query("FIND {}".format(cached_id),
-                                                unique=True)
-                    print("Found Entity in cache; Id:", cached_id)
-
-            # Nothing in cache or cache not used. Check in CaosDB
-
-            if existing is None:
-                existing = AbstractCFood.find_existing(identifiable)
-
-            # No record matching the identifiable was found. Insert the record
+    @staticmethod
+    def get_re():
+        """ Returns the regular expression used to identify files that shall be
+        processed
 
-            if existing is None:
-                identifiable.insert()
-                entities[key] = identifiable
-            else:
-                entities[key] = existing
+        This function shall be implemented by subclasses.
+        """
+        raise NotImplementedError()
 
-            print("Got\n", identifiable)
+    @classmethod
+    def match(cls, string):
+        """ Matches the regular expression of this class against file names
 
-            if self.use_cache:
-                print("cid", cached_id)
+        Parameters
+        ----------
+        string : str
+                 The path of the file that shall be matched.
+        """
 
-            if self.use_cache and cached_id is None:
-                identifiable_cache.insert(identifier, entities[key].id)
+        if cls._pattern is None:
+            cls._pattern = re.compile(cls.get_re())
 
-        self.update_identifiables(entities, crawled_file, match)
+        return cls._pattern.match(string)
 
-    def create_identifiables(self, crawled_file, match):
+    def create_identifiables(self):
+        """
+        must return a Container with the identifiables
+        """
         raise NotImplementedError()
 
-    def update_identifiables(self, entities, crawled_file, match):
+    def update_identifiables(self):
         raise NotImplementedError()
 
     @staticmethod
@@ -187,48 +145,3 @@ class AbstractCFood(object):
             entity.add_property(prop, value, datatype=datatype)
         else:
             entity.add_property(prop, value)
-
-    @staticmethod
-    def find_existing(entity):
-        """searches for an entity that matches the identifiable in CaosDB
-
-        Characteristics of the identifiable like, properties, name or id are
-        used for the match.
-        """
-
-        if entity.name is None:
-            # TODO multiple parents are ignored! Sufficient?
-            query_string = "FIND Record " + entity.get_parents()[0].name
-            query_string += " WITH " + " AND ".join(
-                ["'" + p.name + "'='"
-                 + str(get_value(p)) + "'" for p in entity.get_properties()])
-        else:
-            query_string = "FIND '{}'".format(entity.name)
-
-        print(query_string)
-        q = db.Query(query_string)
-        # the identifiable should identify an object uniquely. Thus the query
-        # is using the unique keyword
-        try:
-            r = q.execute(unique=True)
-        except TransactionError:
-            r = None
-
-        if r is not None:
-            print("Found Entity with id:", r.id)
-        else:
-            print("Did not find an existing entity.")
-
-        return r
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=RawTextHelpFormatter)
-
-    return parser
-
-
-if __name__ == "__main__":
-    parser = get_parser()
-    args = parser.parse_args()
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index d908b8285f184f6207e185f2d96594ce482c92b7..783767247221e9ede556868a1941c5bea00328ba 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -22,16 +22,48 @@
 #
 # ** end header
 #
-"""does something"""
+""" Crawls a file structure and inserts Records into CaosDB based on what is
+found.
+
+CaosDB can automatically be filled with Records based on some file structure.
+The Crawler will iterate over the files and test for each file whether a CFood
+exists that matches the file path. If one does, it is instanciated to treat the
+match. This occurs in basically three steps:
+1. create a list of identifiables, i.e. unique representation of CaosDB Records
+(such as an experiment belonging to a project and a date/time)
+2. the identifiables are either found in CaosDB or they are created.
+3. the identifiables are update based on the date in the file structure
+"""
 
 
 import caosdb as db
+from caosdb.exceptions import TransactionError
+
+from .cache import Cache
 
 
 class Crawler(object):
-    def __init__(self, food):
+    def __init__(self, food, access=lambda x: x, use_cache=False):
+        """
+        Parameters
+        ----------
+        pattern : str
+                  The regex pattern for matching against file names.
+
+        use_cache : bool, optional
+                    Whether to use caching (not re-inserting probably existing
+                    objects into CaosDB), defaults to False.
+
+        access : callable, optional
+                 A function that takes a CaosDB path and returns a local path
+        """
         self.food = food
+        self.access = access
         self.report = db.Container()
+        self.use_cache = use_cache
+
+        if self.use_cache:
+            self.cache = Cache()
 
     def crawl(self, files):
         for crawled_file in files:
@@ -42,11 +74,82 @@ class Crawler(object):
 
             #    continue
 
-            for cfood in self.food:
-                match = cfood.pattern.match(crawled_file.path)
+            for Cfood in self.food:
+                match = Cfood.match(crawled_file.path)
 
                 if match is not None:
-                    cfood.treat_match(crawled_file, match)
+                    cfood = Cfood(match, access=self.access)
+                    identifiables = cfood.create_identifiables()
+
+                    if self.use_cache:
+                        hashes = self.cache.update_ids_from_cache(
+                            identifiables)
+
+                    self.find_or_insert_identifiables(identifiables)
+
+                    if self.use_cache:
+                        self.cache.insert_list(hashes, identifiables)
+
+                    cfood.update_identifiables()
+
+    @staticmethod
+    def find_or_insert_identifiables(identifiables):
+        """ Sets the ids of identifiables (that do not have already an id from the
+        cache) based on searching CaosDB and retrieves those entities.
+        The remaining entities (those which can not be retrieved) have no
+        correspondence in CaosDB and are thus inserted.
+        """
+        # looking for matching entities in CaosDB when there is no valid id
+        # i.e. there was none set from a cache
+
+        for ent in identifiables:
+            if ent.id is None or ent.id < 0:
+                existing = Crawler.find_existing(ent)
+
+                if existing is not None:
+                    ent.id = existing.id
+
+        # this makes entities with existing ids valid
+        # identifiables.retrieve(unique=True, raise_exception_on_error=False)
+
+        # insert missing, i.e. those which are not valid
+        missing_identifiables = db.Container()
+        missing_identifiables.extend([ent for ent in identifiables
+                                      if ent.id is None or ent.id < 0])
+        missing_identifiables.insert()
+        identifiables.retrieve(unique=True, raise_exception_on_error=False)
+
+    @staticmethod
+    def find_existing(entity):
+        """searches for an entity that matches the identifiable in CaosDB
+
+        Characteristics of the identifiable like, properties, name or id are
+        used for the match.
+        """
+
+        if entity.name is None:
+            # TODO multiple parents are ignored! Sufficient?
+            query_string = "FIND Record " + entity.get_parents()[0].name
+            query_string += " WITH " + " AND ".join(
+                ["'" + p.name + "'='"
+                 + str(get_value(p)) + "'" for p in entity.get_properties()])
+        else:
+            query_string = "FIND '{}'".format(entity.name)
+
+        q = db.Query(query_string)
+        # the identifiable should identify an object uniquely. Thus the query
+        # is using the unique keyword
+        try:
+            r = q.execute(unique=True)
+        except TransactionError:
+            r = None
+
+        # if r is not None:
+        #     print("Found Entity with id:", r.id)
+        # else:
+        #     print("Did not find an existing entity.")
+
+        return r
 
     @staticmethod
     def query_files(path):
@@ -57,3 +160,22 @@ class Crawler(object):
         print("{} FILES TO BE PROCESSED.".format(len(files)))
 
         return files
+
+
+def get_value(prop):
+    """ Returns the value of a Property
+
+    Parameters
+    ----------
+    prop : The property of which the value shall be returned.
+
+    Returns
+    -------
+    out : The value of the property; if the value is an entity, its ID.
+
+    """
+
+    if isinstance(prop.value, db.Entity):
+        return prop.value.id
+    else:
+        return prop.value
diff --git a/src/caosadvancedtools/utils.py b/src/caosadvancedtools/utils.py
index b030a6c29d98388fae4e17fef37a60d8fa2e980c..64f828f94a09ef22daef3606490580e7fb8f0c52 100644
--- a/src/caosadvancedtools/utils.py
+++ b/src/caosadvancedtools/utils.py
@@ -118,3 +118,35 @@ def return_field_or_property(value, prop=None):
         return value[prop]
     else:
         return value
+
+
+def find_records_that_reference_ids(referenced_ids, rt="", step_size=50):
+    """ Returns a list with ids of records that reference entities with
+    supplied ids
+
+    Sometimes a file or folder will be referenced in a README.md (e.g. in an
+    Analysis) but not those files shall be referenced but the corresponding
+    object  (e.g. the Experiment). Thus the ids of all Records (of a suitable
+    type) are collected that reference one or more of the supplied ids.
+    This is done in chunks as the ids are passed in the header of the http
+    request.
+    """
+    record_ids = set()
+    index = 0
+
+    while index < len(referenced_ids):
+        subset = referenced_ids[index:min(
+            index+step_size, len(referenced_ids))]
+        try:
+            q_string = ("FIND Record {} which references \n".format(rt)
+                        + " or which references \n".join(
+                            [str(el) for el in subset]))
+            exps = db.execute_query(q_string)
+            record_ids.update([exp.id for exp in exps])
+        except Exception as e:
+            print(e)
+            pass
+
+        index += step_size
+
+    return list(record_ids)
diff --git a/unittests/test_cache.py b/unittests/test_cache.py
index 9e26cadde4217311ce7a6195cfeda3185d2b8363..c1c92330b5fba47b0a19a89913ded43ef59d3197 100644
--- a/unittests/test_cache.py
+++ b/unittests/test_cache.py
@@ -26,6 +26,7 @@ from copy import deepcopy
 from tempfile import NamedTemporaryFile
 
 import caosdb as db
+
 from caosadvancedtools.cache import Cache
 
 
@@ -45,9 +46,7 @@ class CacheTest(unittest.TestCase):
         ent2 = db.Record()
         ent2.add_parent(name="Experiment")
         ent_hash = Cache.hash_entity(ent)
-        print(ent_hash)
         ent2_hash = Cache.hash_entity(ent2)
-        print(ent2_hash)
         self.cache.insert(ent2_hash, 1235)
         assert type(self.cache.check_existing(ent2_hash)) is int
         assert self.cache.check_existing(ent_hash) is None
@@ -57,3 +56,22 @@ class CacheTest(unittest.TestCase):
 
     def tearDown(self):
         os.remove(self.cache.db_file)
+
+    def test_update_ids_from_cache(self):
+        ent = db.Record()
+        ent2 = db.Record()
+        ent2.add_parent(name="Experiment")
+        ent3 = db.Record()
+        ent3.add_parent(name="Analysis")
+        test_id = 2353243
+        self.cache.insert(Cache.hash_entity(ent2), test_id)
+        entities = [ent, ent2, ent3]
+        hashes = self.cache.update_ids_from_cache(entities)
+        self.assertEqual(ent2.id, test_id)
+
+        # test
+        ent.id = 1001
+        ent3.id = 1003
+        self.cache.insert_list(hashes, entities)
+        self.assertEqual(self.cache.check_existing(hashes[0]), 1001)
+        self.assertEqual(self.cache.check_existing(hashes[2]), 1003)
diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py
index 8894e1514e28dbed9bdafe148dbaff39d07d1fd6..b69e4f0ee7df79e20e732a22c30f667ca318d4d0 100644
--- a/unittests/test_cfood.py
+++ b/unittests/test_cfood.py
@@ -25,29 +25,25 @@ import unittest
 from tempfile import NamedTemporaryFile
 
 import caosdb as db
-from caosadvancedtools.cfood import AbstractCFood
 
-# TODO this is more like an integration test. should be moved
+from caosadvancedtools.cfood import AbstractCFood
 
 
-class CFoodTest(unittest.TestCase):
-    def setUp(self):
-        self.exp = db.Record()
-        self.exp.add_parent(name="Experiment")
-        self.exp.add_property(name="species", value="microunicorn")
+PATTERN = "h.*"
 
-    def test_check_existence(self):
-        assert AbstractCFood.find_existing(self.exp) is None
 
+class TestCFood(AbstractCFood):
 
-class CFoodTestExist(CFoodTest):
-    def setUp(self):
-        super().setUp()
-        self.exp.insert()
+    @staticmethod
+    def get_re():
+        return PATTERN
 
-    def test_check_existence(self):
-        res = AbstractCFood.find_existing(self.exp)
-        assert res.id == self.exp.id
 
-    def tearDown(self):
-        self.exp.delete()
+class CFoodReTest(unittest.TestCase):
+    def test(self):
+        self.assertEquals(TestCFood.get_re(), PATTERN)
+        self.assertEqual(TestCFood._pattern, None)
+        self.assertIsNotNone(TestCFood.match("hallo"))
+        self.assertIsNotNone(TestCFood._pattern)
+        self.assertIsNotNone(TestCFood.match("hallo"))
+        self.assertIsNone(TestCFood.match("allo"))
diff --git a/unittests/test_table_converter.py b/unittests/test_table_converter.py
index 9f0d618c1d7eba9dd688fe8022d34ec5282acf6f..dbf593de4b63e031777c109c26b971171e660638 100644
--- a/unittests/test_table_converter.py
+++ b/unittests/test_table_converter.py
@@ -26,6 +26,8 @@ from tempfile import NamedTemporaryFile
 
 import caosdb as db
 import pandas as pd
+from caosdb.apiutils import compare_entities
+
 from caosadvancedtools.table_converter import (from_table, from_tsv, to_table,
                                                to_tsv)
 
@@ -78,14 +80,16 @@ class ToTsvTest(unittest.TestCase):
         c.append(r)
         to_tsv(NamedTemporaryFile().name, c)
 
-
-class IntegrationTest(unittest.TestCase):
-    """ converts  tsv to a container and back and compares origin with
-    result """
-
-    def test_backandforth(self):
-        cont = from_tsv(TEST_TABLE, "Measurement")
-        tempfile = NamedTemporaryFile(delete=False)
-        to_tsv(tempfile.name, cont)
-        with open(TEST_TABLE, "r") as no1, open(tempfile.name, "r") as no2:
-            assert no1.read() == no2.read()
+# TODO reactivate this test
+# class IntegrationTest(unittest.TestCase):
+#    """ converts  tsv to a container and back and compares origin with
+#    result """
+#
+#    def test_backandforth(self):
+#        cont = from_tsv(TEST_TABLE, "Measurement")
+#        tempfile = NamedTemporaryFile(delete=False)
+#        to_tsv(tempfile.name, cont)
+#        cont_new = from_tsv(tempfile.name, "Measurement")
+#
+#        for ent1, ent2 in zip(cont_new, cont):
+#            assert compare_entities(ent1, ent2) == ([], [])