diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index ca59395a90c747fc60a155c3fb5f8f264c60d42a..d5d2fe66770b2d37f7ecbb718a2260cdd7f501c1 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -20,15 +20,6 @@ ADD https://gitlab.com/api/v4/projects/13656973/repository/branches/dev \
    pylib_version.json
 RUN git clone https://gitlab.com/caosdb/caosdb-pylib.git && \
    cd caosdb-pylib && git checkout dev && pip3 install .
-ADD https://gitlab.com/api/v4/projects/13656965/repository/branches/master \
-   model_version.json
-RUN git clone https://gitlab.com/caosdb/caosdb-models.git && \
-   cd caosdb-models && pip3 install .
-ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \
-   scifolder_version.json
-RUN git clone \
-    https://gitlab.com/henrik_indiscale/scifolder.git && \
-    cd scifolder && pip3 install .
 COPY . /git
 RUN rm -r /git/.git \
     && mv /git/.docker/pycaosdb.ini /git/integrationtests
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9f746e473a799c027e40b84e3f3a6b36e7539c62..9b573a53f424ccdbe3d47c426e497df15dbc1257 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -78,6 +78,7 @@ build-testenv:
   stage: setup
   only:
       - schedules
+      - web
   script: 
       - df -h
       - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfebbbcf981a7e96c18ea5a12bfd8c515f37759b..106d703caba5475999ce57a488be03677f84e547 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+- included the scifolder module
 - included the caosmodels module
 * `send_mail` function in `caosadvancedtools.serverside.helper` module
 - New class to collect possible problems with the data model
diff --git a/README_SETUP.md b/README_SETUP.md
index b9db16a9feba246aeae8e59574047ba0f9380a38..243fba2dd1259aaefbe6c7163a242b700eb5a66e 100644
--- a/README_SETUP.md
+++ b/README_SETUP.md
@@ -15,17 +15,12 @@ Dependencies will be installed automatically if you use the below described proc
 
 For testing:
 - `tox` 
-- `scifolder`from https://gitlab.com/henrik_indiscale/scifolder
 
 
 ## Installation
 - `pip install . --user`
 - `pip install tox --user`
 
-In order to run the tests you need to install the [scifolder
-package](https://gitlab.com/henrik_indiscale/scifolder) by Henrik tom
-Wörden.
-
 ## Run Unit Tests
 `tox`
 
diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py
index e4bf311e6700448aab0ebf1a5ab72bad6bf1296e..bf72b5f74b463f9ece2bd047548dcb22e8d71dac 100755
--- a/integrationtests/crawl.py
+++ b/integrationtests/crawl.py
@@ -32,8 +32,9 @@ import caosdb as db
 from caosadvancedtools.cfood import fileguide
 from caosadvancedtools.crawler import FileCrawler
 from caosadvancedtools.guard import INSERT, UPDATE
-from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood,
-                       SimulationCFood, SoftwareCFood)
+from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood,
+                                         PublicationCFood, SimulationCFood,
+                                         SoftwareCFood)
 
 try:
     from sss_helper import get_argument_parser, print_success
diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py
index 2289f72e83545db0e7eacedfa52868507b6c4760..270a08a36d7512a8642c2ca08a9ec6ea93b81bd9 100755
--- a/integrationtests/insert_model.py
+++ b/integrationtests/insert_model.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 import caosdb as db
-from caosmodels.parser import parse_model_from_yaml
+from caosadvancedtools.models.parser import parse_model_from_yaml
 
 model = parse_model_from_yaml("model.yml")
 model.sync_data_model(noquestion=True)
diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py
index 3089bf4ce60093206e42477d740ead5854a9debc..6c212e36084430e5f7c3362a04e78565561019b2 100644
--- a/integrationtests/test_crawl_with_datamodel_problems.py
+++ b/integrationtests/test_crawl_with_datamodel_problems.py
@@ -30,9 +30,9 @@ from caosadvancedtools.cfood import fileguide
 from caosadvancedtools.crawler import FileCrawler
 from caosadvancedtools.datamodel_problems import DataModelProblems
 from caosadvancedtools.guard import INSERT
-from caosmodels.parser import parse_model_from_yaml
-from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood,
-                       SimulationCFood)
+from caosadvancedtools.models.parser import parse_model_from_yaml
+from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood,
+                                         PublicationCFood, SimulationCFood)
 
 
 def setup_module():
diff --git a/src/caosadvancedtools/import_from_xml.py b/src/caosadvancedtools/import_from_xml.py
index 9942a9a9f38de90d62471cc86d32c25d55c9cba9..0bf9b1c0cbb478bb75687f9f3e41ca2d4960d2c0 100755
--- a/src/caosadvancedtools/import_from_xml.py
+++ b/src/caosadvancedtools/import_from_xml.py
@@ -33,7 +33,7 @@ from tempfile import NamedTemporaryFile
 
 import caosdb as db
 from caosdb.apiutils import apply_to_ids
-from caosmodels.data_model import DataModel
+from caosadvancedtools.models.data_model import DataModel
 
 
 def create_dummy_file(text="Please ask the administrator for this file."):
diff --git a/src/caosadvancedtools/scifolder/__init__.py b/src/caosadvancedtools/scifolder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7d67937b42ca23173fc93d4e704411f33d80bc4
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/__init__.py
@@ -0,0 +1,5 @@
+from .analysis_cfood import AnalysisCFood
+from .experiment_cfood import ExperimentCFood
+from .publication_cfood import PublicationCFood
+from .simulation_cfood import SimulationCFood
+from .software_cfood import SoftwareCFood
diff --git a/src/caosadvancedtools/scifolder/analysis_cfood.py b/src/caosadvancedtools/scifolder/analysis_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..27cb871aed08f41531c367567ea36ea9a3faaf69
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/analysis_cfood.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+from itertools import chain
+
+import caosdb as db
+from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent,
+                                     assure_has_property,
+                                     assure_object_is_in_list, get_entity)
+from caosadvancedtools.read_md_header import get_header
+
+from .generic_pattern import full_pattern
+from .utils import (get_files_referenced_by_field, parse_responsibles,
+                    reference_records_corresponding_to_files)
+from .withreadme import DATAMODEL as dm
+from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME,
+                         get_glob)
+
+
+class AnalysisCFood(AbstractFileCFood, WithREADME):
+    _prefix = ".*/DataAnalysis/"
+
+    # win_paths can be used to define fields that will contain windows style
+    # path instead of the default unix ones. Possible fields are:
+    # ["results", "sources", "scripts","revisionOf"]
+    win_paths = []
+
+    def __init__(self,  *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        WithREADME.__init__(self)
+
+    def collect_information(self):
+        self.find_referenced_files([RESULTS, SOURCES, SCRIPTS])
+
+    @staticmethod
+    def name_beautifier(name):
+        """ a function that can be used to rename the project. I.e. if
+        the project in CaosDB shall be named differently than in the folder
+        structure.
+        Use discouraged.
+        """
+
+        return name
+
+    @staticmethod
+    def get_re():
+        return AnalysisCFood._prefix + full_pattern
+
+    def create_identifiables(self):
+        # create the project identifiable
+        name = AnalysisCFood.name_beautifier(
+            self.match.group("project_identifier"))
+        self.project = db.Record(name=name)
+        self.project.add_parent(name=dm.Project)
+        self.identifiables.append(self.project)
+
+        # create the Analysis identifiable
+        self.analysis = db.Record()
+        self.analysis.add_parent(name=dm.Analysis)
+        self.analysis.add_property(name=dm.date, value=self.match.group("date"))
+
+        self.analysis.add_property(name=dm.Project, value=self.project)
+        self.identifiables.append(self.analysis)
+
+        if self.match.group("suffix") is not None:
+            self.analysis.add_property(name=dm.identifier,
+                                       value=self.match.group("suffix"))
+        else:
+            # TODO empty string causes an error in search
+            self.analysis.add_property(name=dm.identifier,
+                                       value="empty_identifier")
+
+        # parse people and add them to identifiables
+        # TODO People are currently 'identifiable' due to ther first and last
+        # names. There will be conflicts
+        self.people = parse_responsibles(self.header)
+        self.identifiables.extend(self.people)
+
+    def update_identifiables(self):
+        assure_has_property(self.analysis, "description",
+                            self.header["description"][0],
+                            to_be_updated=self.to_be_updated)
+        assure_object_is_in_list(obj=self.people,
+                                 containing_object=self.analysis,
+                                 property_name=dm.responsible,
+                                 to_be_updated=self.to_be_updated,
+                                 datatype=db.LIST(db.REFERENCE)
+                                 )
+        self.reference_included_records(self.analysis,
+                                        [RESULTS, SOURCES, SCRIPTS],
+                                        to_be_updated=self.to_be_updated
+                                        )
+
+        if SOURCES.key in self.header:
+            reference_records_corresponding_to_files(
+                    record=self.analysis,
+                    recordtypes=[dm.Experiment, dm.Publication, dm.Simulation,
+                                 dm.Analysis],
+                    globs=get_glob(self.header[SOURCES.key]),
+                    property_name=dm.sources,
+                    path=self.crawled_path,
+                    to_be_updated=self.to_be_updated)
+
+        self.reference_files_from_header(record=self.analysis)
+
+        if REVISIONOF.key in self.header:
+            reference_records_corresponding_to_files(
+                record=self.analysis,
+                recordtypes=[dm.Analysis],
+                property_name=dm.revisionOf,
+                globs=get_glob(self.header[REVISIONOF.key]),
+                path=self.crawled_path,
+                to_be_updated=self.to_be_updated)
diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..0eccd18d9481b0bbb91c75d63c849e69e0c6572b
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/experiment_cfood.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import caosdb as db
+from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description,
+                                     assure_has_parent, assure_has_property,
+                                     assure_object_is_in_list, get_entity)
+from caosadvancedtools.read_md_header import get_header
+
+from .generic_pattern import full_pattern
+from .utils import parse_responsibles, reference_records_corresponding_to_files
+from .withreadme import DATAMODEL as dm
+from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob
+
+
+class ExperimentCFood(AbstractFileCFood, WithREADME):
+
+    # win_paths can be used to define fields that will contain windows style
+    # path instead of the default unix ones. Possible fields are:
+    # ["results", "revisionOf"]
+    win_paths = []
+
+    @staticmethod
+    def name_beautifier(x): return x
+
+    def __init__(self,  *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        WithREADME.__init__(self)
+
+        self.name_map = {},
+
+    @staticmethod
+    def get_re():
+        return ".*/ExperimentalData/"+full_pattern
+
+    def collect_information(self):
+        self.find_referenced_files([RESULTS])
+
+    @staticmethod
+    def create_identifiable_experiment(match):
+        # create the project identifiable
+        name = ExperimentCFood.name_beautifier(
+            match.group("project_identifier"))
+        project = db.Record(name=name)
+        project.add_parent(name=dm.Project)
+
+        experiment = db.Record()
+        experiment.add_parent(name=dm.Experiment)
+        experiment.add_property(
+            name=dm.date, value=match.group("date"))
+        experiment.add_property(name=dm.Project, value=project)
+
+        if match.group("suffix") is None:
+            experiment.add_property(
+                name="identifier", value="empty_identifier")
+        else:
+            experiment.add_property(name="identifier",
+                                    value=match.group("suffix"))
+
+        return [experiment, project]
+
+    def create_identifiables(self):
+        self.experiment, self.project = (
+            ExperimentCFood.create_identifiable_experiment(self.match))
+
+        self.identifiables.extend([self.experiment, self.project])
+        self.people = parse_responsibles(self.header)
+        self.identifiables.extend(self.people)
+
+    def update_identifiables(self):
+        # set description
+        assure_has_property(self.experiment, "description",
+                            self.header["description"][0],
+                            to_be_updated=self.to_be_updated)
+
+        # set responsible people
+        assure_object_is_in_list(self.people, self.experiment, dm.responsible,
+                                 to_be_updated=self.to_be_updated,
+                                 datatype=db.LIST(db.REFERENCE))
+
+        self.reference_files_from_header(record=self.experiment)
+
+        if "revisionOf" in self.header:
+            reference_records_corresponding_to_files(
+                record=self.experiment,
+                recordtypes=[dm.Experiment],
+                globs=get_glob(self.header[REVISIONOF.key]),
+                path=self.crawled_path,
+                property_name=dm.revisionOf,
+                to_be_updated=self.to_be_updated)
diff --git a/src/caosadvancedtools/scifolder/generic_pattern.py b/src/caosadvancedtools/scifolder/generic_pattern.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b5a4df2063b9639ee6fd018e241d98df8c583d1
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/generic_pattern.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+""" this module contains regular expressions neeeded for the standard file
+structure """
+
+
+project_pattern = (r"(?P<project_identifier>"
+                   r"(?P<project_year>\d{4})_?(?P<project_name>((?!/).)*))/")
+date_pattern = r"(?P<date>\d{2,4}[-_]\d{1,2}[-_]\d{1,2})"
+date_suffix_pattern = r"(_(?P<suffix>(((?!/).)*)))?/"
+readme_pattern = r"(readme.md|README.md|readme.xlsx|README.xlsx)$"
+
+full_pattern = (project_pattern + date_pattern + date_suffix_pattern
+                # TODO: Additional level are not allowed according to the
+                # specification. This should be removed or enabled via a
+                # configuration
+                + "(.*)"
+                + readme_pattern)
diff --git a/src/caosadvancedtools/scifolder/publication_cfood.py b/src/caosadvancedtools/scifolder/publication_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc78e5b759e98e8989c952ccbafeef117e2ed33d
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/publication_cfood.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+from itertools import chain
+
+import caosdb as db
+from caosadvancedtools.cfood import (AbstractFileCFood,
+                                     assure_object_is_in_list, fileguide,
+                                     get_entity)
+from caosadvancedtools.read_md_header import get_header
+from caosadvancedtools.utils import find_records_that_reference_ids
+
+from .generic_pattern import date_suffix_pattern, readme_pattern
+from .utils import (get_files_referenced_by_field, parse_responsibles,
+                    reference_records_corresponding_to_files)
+from .withreadme import DATAMODEL as dm
+from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME,
+                         get_glob)
+
+
+def folder_to_type(name):
+    if name == "Theses":
+        return "Thesis"
+    elif name == "Articles":
+        return "Article"
+    elif name == "Posters":
+        return "Poster"
+    elif name == "Presentations":
+        return "Presentation"
+    elif name == "Reports":
+        return "Report"
+    else:
+        raise ValueError()
+
+
+class PublicationCFood(AbstractFileCFood, WithREADME):
+    # win_paths can be used to define fields that will contain windows style
+    # path instead of the default unix ones. Possible fields are:
+    # ["results", "sources", "scripts", "revisionOf"]
+    win_paths = []
+
+    def __init__(self,  *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        WithREADME.__init__(self)
+
+    def collect_information(self):
+        self.find_referenced_files([RESULTS, SOURCES, SCRIPTS])
+
+    @staticmethod
+    def get_re():
+        # matches anything but "/", i.e. a folder name
+        _prefix = ".*/Publications/"
+        _type = r"(?P<type>Theses|Articles|Posters|Presentations|Reports)/"
+        _partial_date = r"(?P<date>\d{2,4}([-_]\d{1,2}[-_]\d{1,2})?)"
+
+        return _prefix+_type+_partial_date+date_suffix_pattern+readme_pattern
+
+    def create_identifiables(self):
+        header = get_header(fileguide.access(self.crawled_path))
+        self.publication = db.Record(name=self.match.group("date")
+                                     + "_"+self.match.group("suffix"))
+        self.publication.add_parent(name=folder_to_type(
+            self.match.group("type")))
+        self.identifiables.append(self.publication)
+
+        self.people = parse_responsibles(header)
+        self.identifiables.extend(self.people)
+
+    def update_identifiables(self):
+        header = get_header(fileguide.access(self.crawled_path))
+        self.publication.description = header["description"][0]
+
+        assure_object_is_in_list(self.people, self.publication,
+                                 "responsible",
+                                 self.to_be_updated,
+                                 datatype=db.LIST(db.REFERENCE))
+
+        if SOURCES.key in self.header:
+            reference_records_corresponding_to_files(
+                    record=self.publication,
+                    recordtypes=[dm.Experiment, dm.Publication, dm.Simulation,
+                                 dm.Analysis],
+                    globs=get_glob(self.header[SOURCES.key]),
+                    property_name=dm.sources,
+                    path=self.crawled_path,
+                    to_be_updated=self.to_be_updated)
+        self.reference_files_from_header(record=self.publication)
+
+        if REVISIONOF.key in self.header:
+            reference_records_corresponding_to_files(
+                record=self.publication,
+                recordtypes=[dm.Publication],
+                property_name=dm.revisionOf,
+                globs=get_glob(self.header[REVISIONOF.key]),
+                path=self.crawled_path,
+                to_be_updated=self.to_be_updated)
diff --git a/src/caosadvancedtools/scifolder/simulation_cfood.py b/src/caosadvancedtools/scifolder/simulation_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae129e6a69ce25c6698b98124e81f8bc2921b472
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/simulation_cfood.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+from itertools import chain
+
+import caosdb as db
+from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent,
+                                     assure_has_property,
+                                     assure_object_is_in_list, get_entity)
+from caosadvancedtools.read_md_header import get_header
+
+from .generic_pattern import full_pattern
+from .utils import (get_files_referenced_by_field, parse_responsibles,
+                    reference_records_corresponding_to_files)
+from .withreadme import DATAMODEL as dm
+from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME,
+                         get_glob)
+
+
+class SimulationCFood(AbstractFileCFood, WithREADME):
+    # win_paths can be used to define fields that will contain windows style
+    # path instead of the default unix ones. Possible fields are:
+    # ["results", "sources", "scripts", "revisionOf"]
+    win_paths = []
+
+    def __init__(self,  *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        WithREADME.__init__(self)
+
+    def collect_information(self):
+        self.find_referenced_files([RESULTS, SOURCES, SCRIPTS])
+
+    @staticmethod
+    def get_re():
+        return ".*/SimulationData/" + full_pattern
+
+    def create_identifiables(self):
+        # create the project identifiable
+        self.project = db.Record(name=self.match.group("project_identifier"))
+        self.project.add_parent(name="Project")
+        self.identifiables.append(self.project)
+
+        self.simulation = db.Record()
+        # import IPython
+        # IPython.embed()
+        self.simulation.add_parent(name="Simulation")
+        self.simulation.add_property(
+            name="date", value=self.match.group("date"))
+
+        self.simulation.add_property(name="Project", value=self.project)
+
+        if self.match.group("suffix") is not None:
+            self.simulation.add_property(
+                name="identifier", value=self.match.group("suffix"))
+        else:
+            # TODO empty string causes an error in search
+            self.simulation.add_property(name="identifier",
+                                              value="empty_identifier")
+        self.identifiables.append(self.simulation)
+        self.people = parse_responsibles(self.header)
+        self.identifiables.extend(self.people)
+
+    def update_identifiables(self):
+        assure_has_property(self.simulation, "description",
+                            self.header["description"][0],
+                            to_be_updated=self.to_be_updated)
+
+        # TODO why is here no db.LIST("Person") possible?
+
+        assure_object_is_in_list(self.people, self.simulation,
+                                 "responsible",
+                                 self.to_be_updated,
+                                 datatype=db.LIST(db.REFERENCE))
+
+        if SOURCES.key in self.header:
+            reference_records_corresponding_to_files(
+                    record=self.simulation,
+                    recordtypes=["Experiment", "Publication", "Simulation",
+                                 "Analysis"],
+                    globs=get_glob(self.header[SOURCES.key]),
+                    property_name=dm.sources,
+                    path=self.crawled_path,
+                    to_be_updated=self.to_be_updated)
+        self.reference_files_from_header(record=self.simulation)
+
+        if REVISIONOF.key in self.header:
+            reference_records_corresponding_to_files(
+                record=self.simulation,
+                recordtypes=[dm.Software],
+                property_name=dm.revisionOf,
+                globs=get_glob(self.header[dm.revisionOf]),
+                path=self.crawled_path,
+                to_be_updated=self.to_be_updated)
diff --git a/src/caosadvancedtools/scifolder/software_cfood.py b/src/caosadvancedtools/scifolder/software_cfood.py
new file mode 100644
index 0000000000000000000000000000000000000000..77fb46521e9aab875b6f99d0a1ee4ac44177e09c
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/software_cfood.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2019 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+from itertools import chain
+
+import caosdb as db
+from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent,
+                                     assure_has_property, assure_name_is,
+                                     assure_object_is_in_list, get_entity)
+from caosadvancedtools.guard import global_guard as guard
+from caosadvancedtools.read_md_header import get_header
+
+from .generic_pattern import full_pattern
+from .utils import get_files_referenced_by_field, parse_responsibles
+from .withreadme import BINARIES
+from .withreadme import DATAMODEL as dm
+from .withreadme import SOURCECODE, WithREADME
+
+
+class SoftwareCFood(AbstractFileCFood, WithREADME):
+    _prefix = ".*/Software/"
+    # win_paths can be used to define fields that will contain windows style
+    # path instead of the default unix ones. Possible fields are:
+    # ["binaries", "sourceCode","revisionOf"]
+    win_paths = []
+
+    def __init__(self,  *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        WithREADME.__init__(self)
+
+    def collect_information(self):
+        self.find_referenced_files([BINARIES, SOURCECODE])
+
+    @staticmethod
+    def get_re():
+
+        return SoftwareCFood._prefix + full_pattern
+
+    def create_identifiables(self):
+        # The software is a record type. Let's try to find it.
+        self.software = db.execute_query(
+            "FIND RecordType Software with name = {}".format(
+                self.match.group("project_identifier")))
+
+        if len(self.software) == 0:
+            # Software not found insert if allowed
+            self.software = db.RecordType(
+                name=self.match.group("project_identifier"))
+            self.software.add_parent(name="Software")
+            self.software.add_property(name="alias",
+                                       value=self.match.group("project_name"))
+            guard.safe_insert(self.software)
+        elif len(self.software) == 1:
+            self.software = self.software[0]
+        else:
+            raise RuntimeError("Cannot identify software record type. Multiple"
+                               "matches for {}".format(
+                                   self.match.group("project_identifier")))
+
+        # create the software version
+        # identifiable is made from parent and date and suffix
+        self.softwareversion = db.Record()
+        self.softwareversion.add_parent(self.software)
+        self.softwareversion.add_property("date", self.match.group("date"))
+
+        if self.match.group("suffix"):
+            self.softwareversion.add_property(
+                "version", self.match.group("suffix"))
+
+        self.identifiables.append(self.softwareversion)
+
+        # parse people and add them to identifiables
+        # TODO People are currently 'identifiable' with their first and last
+        # names. There will be conflicts
+        self.people = parse_responsibles(self.header)
+        self.identifiables.extend(self.people)
+
+    def update_identifiables(self):
+        version_name = self.match.group("project_name")
+
+        if self.match.group("suffix"):
+            version_name += "_"+self.match.group("suffix")
+        else:
+            version_name += "_"+self.match.group("date")
+
+        assure_name_is(self.softwareversion, version_name,
+                       to_be_updated=self.to_be_updated)
+        assure_has_property(self.softwareversion, "description",
+                            self.header["description"][0],
+                            to_be_updated=self.to_be_updated)
+        assure_object_is_in_list(obj=self.people,
+                                 containing_object=self.softwareversion,
+                                 property_name="responsible",
+                                 to_be_updated=self.to_be_updated,
+                                 datatype=db.LIST(db.REFERENCE)
+                                 )
+
+        self.reference_files_from_header(record=self.softwareversion)
diff --git a/src/caosadvancedtools/scifolder/utils.py b/src/caosadvancedtools/scifolder/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3241764fd08b7bdd8509f9b0a11239996a1995fb
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/utils.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+from itertools import chain
+
+import caosdb as db
+import pandas as pd
+from caosadvancedtools.cfood import assure_object_is_in_list, fileguide
+from caosadvancedtools.utils import (find_records_that_reference_ids,
+                                     read_field_as_list,
+                                     return_field_or_property,
+                                     string_to_person)
+
+
+def parse_responsibles(header):
+    """
+    Extract the responsible person(s) from the yaml header.
+
+    If field responsible is a list every entry from that list will be added as
+    a person.
+    Currently only the format <Firstname> <Lastname> <*> is supported.
+    If it is a simple string, it is added as the only person.
+    """
+    people = []
+
+    for person in read_field_as_list(header["responsible"]):
+        people.append(string_to_person(person))
+
+    return people
+
+
+def get_files_referenced_by_field(globs, prefix="", final_glob=None):
+    """
+    returns all file entities at paths described by given globs
+
+    This function assumes that the supplied globs is a list of
+    filenames, directories or globs.
+
+    prefix should be the path of the crawled file to supply a context for
+    relative paths.
+    """
+    referenced_files = []
+    globs = [g for g in globs if g is not None]
+
+    for glob in globs:
+        # TODO extract glob manipulation
+
+        if final_glob is not None and not glob.endswith(final_glob):
+            glob += final_glob
+
+        if not glob.startswith("/"):
+            glob = os.path.normpath(os.path.join(prefix, glob))
+        else:
+            glob = os.path.normpath(glob)
+
+        query_string = "FIND file which is stored at {}".format(glob)
+
+        el = db.execute_query(query_string)
+
+        referenced_files.append(el)
+
+    return referenced_files
+
+
+def is_filename_allowed(path, recordtype):
+    if recordtype.lower() == "experiment":
+        if "ExperimentalData" in path:
+            return True
+    elif recordtype.lower() == "analysis":
+        if "DataAnalysis" in path:
+            return True
+    elif recordtype.lower() == "publication":
+        if "Publication" in path:
+            return True
+    elif recordtype.lower() == "simulation":
+        if "Simulation" in path:
+            return True
+
+    return False
+
+
+def get_entity_ids_from_include_file(prefix, file_path):
+    """reads version ids from  include file """
+
+    if not file_path.startswith("/"):
+        file_path = os.path.normpath(os.path.join(prefix, file_path))
+    else:
+        file_path = os.path.normpath(file_path)
+    df = pd.read_csv(fileguide.access(file_path), sep="\t", comment="#")
+
+    if "ID" not in df.columns:
+        raise ValueError("Include file must have an ID column")
+
+    return list(df.ID)
+
+
+def reference_records_corresponding_to_files(record, recordtypes, globs, path,
+                                             to_be_updated, property_name):
+    # TODO this function needs to be refactored:
+    # the treatement of keys like 'results' should be separated from searching
+    # entities (see setting of globs and includes below).
+
+    for recordtype in recordtypes:
+
+        directly_named_files = list(chain(*get_files_referenced_by_field(
+            globs,
+            prefix=os.path.dirname(path))))
+
+        files_in_folders = list(chain(*get_files_referenced_by_field(
+            globs,
+            prefix=os.path.dirname(path),
+            final_glob="**")))
+        files = [f for f in directly_named_files + files_in_folders if
+                 is_filename_allowed(f.path, recordtype=recordtype)]
+        entities = find_records_that_reference_ids(
+            list(set([
+                fi.id for fi in files])),
+            rt=recordtype)
+
+        if len(entities) == 0:
+            continue
+        else:
+            assure_object_is_in_list(entities,
+                                     record,
+                                     property_name,
+                                     to_be_updated,
+                                     datatype=db.LIST(db.REFERENCE))
+
+
+def create_files_list(df, ftype):
+    files = []
+
+    for indx, src in df.loc[ftype,
+                            pd.notnull(df.loc[ftype])].iteritems():
+        desc = df.loc[ftype+" description", indx]
+
+        if pd.notnull(desc):
+            files.append({'file': src, 'description': desc})
+        else:
+            files.append(src)
+
+    return files
+
+
+def add_value_list(header, df, name):
+    if name in df.index:
+        header[name] = list(df.loc[name, pd.notnull(df.loc[name])])
+
+
+def get_xls_header(filepath):
+    """
+    This function reads an xlsx file and creates a dictionary analogue to the
+    one created by the yaml headers in README.md files read with the get_header
+    function of caosdb-advancedtools.
+    As xlsx files lack the hierarchical structure, the information that can be
+    provided is less complex. See the possibility to use the xlsx files as a
+    less powerfull version for people who are not comfortable with the
+    README.md files.
+
+    The xlsx file has a defined set of rows. In each row a list of entries can
+    be given. This structure is converted to a dictionary with a fix structure.
+    """
+
+    header = {}
+
+    df = pd.read_excel(filepath, index_col=0, header=None)
+    add_value_list(header, df, "responsible")
+    add_value_list(header, df, "description")
+    assert len(header["description"]) <= 1
+
+    for ftype in ["sources", "scripts", "results", "sourceCode", "binaries"]:
+        if ftype not in df.index:
+            continue
+        files = create_files_list(df, ftype)
+
+        if len(files) > 0:
+            header[ftype] = files
+
+    add_value_list(header, df, "revisionOf")
+    # there should be only one revision of
+
+    if "revisionOf" in header:
+        if len(header["revisionOf"]) > 0:
+            header["revisionOf"] = header["revisionOf"][0]
+    add_value_list(header, df, "tags")
+
+    return header
diff --git a/src/caosadvancedtools/scifolder/withreadme.py b/src/caosadvancedtools/scifolder/withreadme.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3eb1095f9af74f11d7045a6096f5ca372913b4a
--- /dev/null
+++ b/src/caosadvancedtools/scifolder/withreadme.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+
+import logging
+import os
+from dataclasses import dataclass
+
+import caosdb as db
+from caosadvancedtools.cfood import (assure_has_description, assure_has_parent,
+                                     assure_object_is_in_list, fileguide)
+from caosadvancedtools.read_md_header import get_header as get_md_header
+from caosadvancedtools.table_importer import (win_path_converter,
+                                              win_path_list_converter)
+from caosadvancedtools.utils import return_field_or_property
+
+from .utils import (get_entity_ids_from_include_file,
+                    get_files_referenced_by_field, get_xls_header)
+
+LOGGER = logging.getLogger("withreadme")
+LOGGER.setLevel(level=logging.ERROR)
+
+
+@dataclass
+class DataModel(object):
+    results: str = "results"
+    scripts: str = "scripts"
+    sources: str = "sources"
+    date: str = "date"
+    Project: str = "Project"
+    Analysis: str = "Analysis"
+    identifier: str = "identifier"
+    responsible: str = "responsible"
+    revisionOf: str = "revisionOf"
+    Experiment: str = "Experiment"
+    Publication: str = "Publication"
+    Simulation: str = "Simulation"
+    Analysis: str = "Analysis"
+    revisionOf: str = "revisionOf"
+    binaries: str = "binaries"
+    sourcecode: str = "sourceCode"
+    description: str = "description"
+
+
+DATAMODEL = DataModel()
+dm = DATAMODEL
+
+
+class HeaderField(object):
+    def __init__(self, key, model):
+        self.key = key
+        self.model = model
+
+
+RESULTS = HeaderField("results", dm.results)
+SCRIPTS = HeaderField("scripts", dm.scripts)
+SOURCES = HeaderField("sources", dm.sources)
+FILE = HeaderField("file", None)
+INCLUDE = HeaderField("include", None)
+REVISIONOF = HeaderField("revisionOf", dm.revisionOf)
+BINARIES = HeaderField("binaries", dm.binaries)
+SOURCECODE = HeaderField("sourceCode", dm.sourcecode)
+DESCRIPTION = HeaderField("description", dm.description)
+RECORDTYPE = HeaderField("recordtype", None)
+
+
+def get_glob(field):
+    """ takes a field which must be a list of globs or dicts.
+
+    if it is a dict, it must have either an include or a file key"""
+    globs = []
+
+    for value in field:
+
+        if isinstance(value, dict) and INCLUDE.key in value:
+            continue
+
+        globs.append(return_field_or_property(value, FILE.key))
+
+    return globs
+
+
+def get_description(value):
+    if isinstance(value, dict) and DESCRIPTION.key in value:
+        return value[DESCRIPTION.key]
+    else:
+        return None
+
+
+def get_rt(value):
+    if isinstance(value, dict) and RECORDTYPE.key in value:
+        return value[RECORDTYPE.key]
+    else:
+        return None
+
+
+class WithREADME(object):
+    def __init__(self):
+        self._header = None
+        self.ref_files = {}
+
+    @property
+    def header(self):
+        if self._header is None:
+            if self.crawled_path.lower().endswith(".md"):
+                self._header = get_md_header(
+                    fileguide.access(self.crawled_path))
+            elif self.crawled_path.lower().endswith(".xlsx"):
+                self._header = get_xls_header(
+                    fileguide.access(self.crawled_path))
+            else:
+                raise RuntimeError("Readme format not recognized.")
+            self.convert_win_paths()
+
+        return self._header
+
+    def find_referenced_files(self, fields):
+        """ iterates over given fields in the header and searches for files
+
+        if the field contains a glob. The file entities are attached"""
+
+        for field in fields:
+
+            if field.key not in self.header:
+                continue
+
+            globs = get_glob(self.header[field.key])
+            files = get_files_referenced_by_field(
+                globs, prefix=os.path.dirname(self.crawled_path))
+
+            description = [get_description(val) for val in
+                           self.header[field.key]]
+            recordtype = [get_rt(val) for val in self.header[field.key]]
+            self.ref_files[field.model] = [
+                (f, d, r) for f, d, r in zip(files, description, recordtype)]
+            # flatten returned list of file lists
+            flat_list = [f.path for sublist in files
+                         for f in sublist]
+
+            if len(flat_list) == 0:
+                LOGGER.warn("ATTENTION: the field {} does not reference any "
+                            "known files".format(field.key))
+
+            self.attached_filenames.extend(flat_list)
+
+    def convert_path(self, el):
+        """ converts the path in el to unix type
+
+        el can be a dict of a string. If el is dict it must have a file key
+
+        returns: same type as el
+        """
+
+        if isinstance(el, dict):
+            if INCLUDE.key in el:
+                el[INCLUDE.key] = win_path_converter(el[INCLUDE.key])
+
+                return el
+
+            if FILE.key not in el:
+                raise ValueError("field should have a 'file' attribute")
+            el[FILE.key] = win_path_converter(el[FILE.key])
+
+            return el
+        else:
+            return win_path_converter(el)
+
+    def convert_win_paths(self):
+        for field in self.win_paths:
+            if field in self.header:
+
+                if isinstance(self.header[field], list):
+                    self.header[field] = [
+                        self.convert_path(el) for el in self.header[field]]
+                else:
+                    self.header[field] = self.convert_path(self.header[field])
+
+    def reference_files_from_header(self, record):
+        """adds properties that reference the files collected in ref_files
+
+        ref_files is expected to be a list of (files, description, recordtype)
+        tuples, where files is the list of file entities, description the description
+        that shall be added to each and recordtype the recordtype that the
+        files shall get as parent. files may be an empty list and description
+        and recordtype may be None.
+
+        The files will be grouped according to the keys used in ref_files and
+        the record types. The record types take precedence.
+        """
+        references = {}
+
+        for prop_name, ref_tuple in self.ref_files.items():
+            generic_references = []
+
+            for files, description, recordtype in ref_tuple:
+                if len(files) == 0:
+                    continue
+
+                if description is not None:
+                    for fi in files:
+                        assure_has_description(fi, description, force=True)
+
+                if recordtype is None:
+                    generic_references.extend(files)
+                else:
+                    for fi in files:
+                        # fix parent
+                        assure_has_parent(fi, recordtype, force=True,
+                                          unique=False)
+
+                    if recordtype not in references:
+                        references[recordtype] = []
+                    references[recordtype].extend(files)
+
+            if len(generic_references) > 0:
+                assure_object_is_in_list(
+                    generic_references,
+                    record,
+                    prop_name,
+                    to_be_updated=self.to_be_updated,
+                    datatype=db.LIST(db.REFERENCE),
+                )
+
+        for ref_type in references.keys():
+            assure_object_is_in_list(
+                references[ref_type],
+                record,
+                ref_type,
+                to_be_updated=self.to_be_updated,
+            )
+
+    def reference_included_records(self, record, fields, to_be_updated):
+        """ iterates over given fields in the header and searches for files
+
+        if the field contains a glob. The file entities are attached"""
+
+        for field in fields:
+
+            if field.key not in self.header:
+                continue
+            included = []
+
+            for item in self.header[field.key]:
+                if INCLUDE.key in item:
+                    included.extend(
+                        get_entity_ids_from_include_file(
+                            os.path.dirname(self.crawled_path),
+                            item[INCLUDE.key]))
+
+            assure_object_is_in_list(included,
+                                     record,
+                                     field.model,
+                                     to_be_updated,
+                                     datatype=db.LIST(db.REFERENCE))
diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst
index 2380cdbdbe44989855adb42afd391467502b5809..92a624bb59f4c0fba8d46076d6df0e0e30bbab75 100644
--- a/src/doc/crawler.rst
+++ b/src/doc/crawler.rst
@@ -36,9 +36,8 @@ different components of the CaosDB Crawler can be found in the
 `developers’ information <#extending-the-crawlers>`__ below.
 
 In case you are happy with our suggestion of a standard crawler, feel
-free to use the standard crawler. The standard crawler lives in this git
-repository maintained by Henrik tom Wörden:
-https://gitlab.com/henrik_indiscale/scifolder
+free to use the standard crawler. The standard crawler lives in the submodule
+`caosadvancedtools.scifolder`
 
 Usage
 =====
diff --git a/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..71454e8909393b432ca74fa01e77b33d8b0644d5
--- /dev/null
+++ b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md
@@ -0,0 +1,15 @@
+---
+responsible:	
+- Only Responsible
+description: 	A description of another example analysis.
+
+sources:
+- file:	"/ExperimentalData/2010_TestProject/2019-02-03/*.dat"
+  description:  an example reference to a results file
+
+scripts:
+- file: plot.py
+  description: a plotting script
+results:
+- file: results.pdf
+...
diff --git a/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b7e5051c7bdbcdafb1bbd3a870b00feecfb109ff
--- /dev/null
+++ b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md
@@ -0,0 +1,9 @@
+---
+responsible:	
+- Only Responsible
+description: 	A description of another example experiment.
+
+results:
+- file:	"/ExperimentalData/2010_TestProject/2019-02-03/*.dat"
+  description:  an example reference to a results file
+...
diff --git a/unittests/data/Publications/Posters/2019-02-03_something/README.md b/unittests/data/Publications/Posters/2019-02-03_something/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c95e37ecc569103d8c3a812e45f1a5110781ea26
--- /dev/null
+++ b/unittests/data/Publications/Posters/2019-02-03_something/README.md
@@ -0,0 +1,11 @@
+---
+responsible:	
+- Only Responsible
+description: 	A description of another example experiment.
+
+sources:
+- /DataAnalysis/2010_TestProject/2019-02-03/results.pdf
+
+results:
+- "*.pdf"
+...
diff --git a/unittests/data/README.md b/unittests/data/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a2e0ce6e319219c10bc61653510ad53dd2ab958a
--- /dev/null
+++ b/unittests/data/README.md
@@ -0,0 +1,14 @@
+---
+responsible: Ana Lytic
+description: An examplary analysis of very exciting research. The analysis was conducted following state of the art best practices of scientific methodology.
+sources: 
+  - /ExperimentalData/2010_TestProject/2019-02-03_something/
+  - file: /ExperimentalData/2010_TestProject/2019-02-03_something/
+    description: An example reference to an experiment. The experimental data was analysed with statistical methods using proper error calculations.
+scripts: 
+  - file: scripts
+    description: all the files needed to run the analysis
+results: 
+  - file: results.pdf
+    description: a plot of the statistical analysis
+...
diff --git a/unittests/data/README.xlsx b/unittests/data/README.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..a909347789edc1d5a1bbaacd998744cee83d5f6b
Binary files /dev/null and b/unittests/data/README.xlsx differ
diff --git a/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fba1bd48a89514cbff92f9d8bd518484ecaa624b
--- /dev/null
+++ b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md
@@ -0,0 +1,12 @@
+---
+responsible:	
+- Only Responsible
+description: 	A description of another example experiment.
+
+results:
+- file:	"*.dat"
+  description:  an example reference to a results file
+
+scripts:
+- sim.py
+...
diff --git a/unittests/test_cfoods.py b/unittests/test_cfoods.py
new file mode 100644
index 0000000000000000000000000000000000000000..87e6d6d2da0254e134def92c098b1568c26863ab
--- /dev/null
+++ b/unittests/test_cfoods.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2019 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import os
+import unittest
+
+from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood,
+                                         PublicationCFood, SimulationCFood)
+
+data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                         "data")
+
+
+class CFoodTest(unittest.TestCase):
+    def test_analysis(self):
+        self.assertFalse(AnalysisCFood.match_item("nopath"))
+        path = (data_path+"/DataAnalysis/2010_TestProject/"
+                "2019-02-03_something/README.md")
+        self.assertTrue(AnalysisCFood.match_item(path))
+        AnalysisCFood(path)
+
+    def test_experiment(self):
+        self.assertFalse(ExperimentCFood.match_item("nopath"))
+        path = (data_path+"/ExperimentalData/2010_TestProject/"
+                "2019-02-03_something/README.md")
+        self.assertTrue(ExperimentCFood.match_item(path))
+        ExperimentCFood(path)
+
+    def test_publication(self):
+        self.assertFalse(PublicationCFood.match_item("nopath"))
+        path = data_path+"/Publications/Posters/2019-02-03_something/README.md"
+        self.assertTrue(PublicationCFood.match_item(path))
+        PublicationCFood(path)
+
+    def test_simulation(self):
+        self.assertFalse(SimulationCFood.match_item("nopath"))
+        path = (data_path + "/SimulationData/2010_TestProject/"
+                "2019-02-03_something/README.md")
+        self.assertTrue(SimulationCFood.match_item(path))
+        SimulationCFood(path)
diff --git a/unittests/test_scifolder_utils.py b/unittests/test_scifolder_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..30e211d9daf8da8f831bff4580efbc63d6bdf6fb
--- /dev/null
+++ b/unittests/test_scifolder_utils.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import unittest
+
+from caosadvancedtools.scifolder.utils import get_xls_header
+
+
+class XLSTest(unittest.TestCase):
+    def test_read(self):
+        filename = os.path.join(os.path.dirname(__file__),
+                                "data/README.xlsx")
+        assert os.path.exists(filename)
+
+        header = get_xls_header(filename)
+        assert header is not None
+        assert isinstance(header, dict)
+
+        # responsible
+        assert header['responsible'] == ["Ana Lytic"]
+
+        # description
+        assert len(header['description']) == 1
+        assert isinstance(header['description'][0], str)
+        assert len(header['description'][0]) > 20
+        assert "exciting" in header['description'][0]
+
+        # sources
+        assert isinstance(header['sources'], list)
+
+        for el in header['sources']:
+            assert isinstance(el, dict)
+            assert "TestProject" in el["file"]
+            assert "example" in el["description"]
+
+        # scripts
+        assert isinstance(header['scripts'], list)
+
+        for el in header['scripts']:
+            assert isinstance(el, dict)
+            assert "scripts" == el["file"]
+            assert "all the files" in el["description"]
+
+        # results
+        assert isinstance(header['results'], list)
+
+        for el in header['results']:
+            assert isinstance(el, dict)
+            assert "result.pdf" == el["file"]
+            assert "plot" in el["description"]