diff --git a/.gitignore b/.gitignore
index 67d5d78d3d3d86a0b9d601a3d9ccc9354f472e2b..daa9defe713083b48fb6501c9195a45dd2617300 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,7 @@ src/newcrawler.egg-info/
 unittests/provenance.yml
 .coverage
 TAGS
-src/.coverage
\ No newline at end of file
+src/.coverage
+build/
+*~
+.pdbrc
\ No newline at end of file
diff --git a/src/doc/Makefile b/src/doc/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..dc1690a8f7f74815b25a51e519e4712c7c92b7ec
--- /dev/null
+++ b/src/doc/Makefile
@@ -0,0 +1,49 @@
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com>
+# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com>
+# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+
+# This Makefile is a wrapper for sphinx scripts.
+#
+# It is based upon the autocreated makefile for Sphinx documentation.
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?= -a
+SPHINXBUILD   ?= sphinx-build
+SPHINXAPIDOC   ?= sphinx-apidoc
+PY_BASEDIR    = ../newcrawler
+SOURCEDIR     = .
+BUILDDIR      = ../../build/doc
+
+
+.PHONY: doc-help Makefile
+
+# Put it first so that "make" without argument is like "make help".
+doc-help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile apidoc
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+apidoc:
+	@$(SPHINXAPIDOC) -o _apidoc --separate $(PY_BASEDIR)
diff --git a/src/doc/conf.py b/src/doc/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..75731285a77f8a30fcb4bfc6be0483c4bba0052a
--- /dev/null
+++ b/src/doc/conf.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# Based on the configuration for caosdb-pylib.
+# 
+# # Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de>
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory, add these
+# directories to sys.path here. This is particularly necessary if this package is installed at a
+# different version, for example via `pip install`.
+#
+# If the directory is relative to the documentation root, use os.path.abspath to make it absolute,
+# like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+import sphinx_rtd_theme  # noqa: E402
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'caosdb-newcrawler'
+copyright = '2021, MPIDS'
+author = 'Alexander Schlemmer'
+
+# The short X.Y version
+version = '0.1'
+# The full version, including alpha/beta/rc tags
+# release = '0.5.2-rc2'
+release = '0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosectionlabel',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.napoleon',     # For Google style docstrings
+    "sphinx_rtd_theme",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+source_suffix = ['.rst']
+
+# The master toctree document.
+master_doc = 'index'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+
+html_theme = "sphinx_rtd_theme"
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'caosdb-newcrawlerdoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'caosdb-newcrawler.tex', 'caosdb-newcrawler Documentation',
+     'MPIDS', 'manual'),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
+     author, 'caosdb-newcrawler', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+
+# -- Extension configuration -------------------------------------------------
+
+# True to prefix each section label with the name of the document it is in, followed by a colon. For
+# example, index:Introduction for a section called Introduction that appears in document
+# index.rst. Useful for avoiding ambiguity when the same section heading appears in different
+# documents.
+#
+# Note: This stops "normal" links from working, so it should be kept at False.
+# autosectionlabel_prefix_document = True
+
+# -- Options for intersphinx -------------------------------------------------
+
+# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#confval-intersphinx_mapping
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/", None),
+    "caosdb-mysqlbackend": ("https://docs.indiscale.com/caosdb-mysqlbackend/",
+                            None),
+    "caosdb-server": ("https://docs.indiscale.com/caosdb-server/", None),
+    "caosdb-pylib": ("https://docs.indiscale.com/caosdb-pylib/", None),
+    "caosdb-advanced-user-tools": ("https://docs.indiscale.com/caosdb-advanced-user-tools/", None),
+}
+
+
+# TODO Which options do we want?
+autodoc_default_options = {
+    'members': None,
+    'undoc-members': None,
+}
diff --git a/src/doc/index.rst b/src/doc/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5579b93fd406c0169c410b8a0d237586df504a36
--- /dev/null
+++ b/src/doc/index.rst
@@ -0,0 +1,75 @@
+Crawler 2.0 Documentation
+=========================
+
+Introduction
+------------
+
+The crawler is the main date integration tool for CaosDB.
+Its task is to automatically synchronize data found on file systems or in other
+sources of data with the semantic data model of CaosDB.
+
+More specifically, data that is contained in a hierarchical structure is converted to a data
+structure that is consistent with a predefined semantic data model.
+
+The hierarchical sturcture can be for example a file tree. However it can be
+also something different like the contents of a json file or a file tree with
+json files.
+
+Concepts
+--------
+
+Structure Elements
+++++++++++++++++++
+
+This hierarchical structure is assumed to be consituted of a tree of
+StructureElements. The tree is created on the fly by so called Converters which
+are defined in a yaml file. The tree of StructureElements is a model
+of the existing data (For example could a tree of Python file objects
+(StructureElements) represent a file tree that exists on some file server).
+
+Relevant sources in:
+src/structure_elements.py
+
+Converters
+++++++++++
+
+Converters treat StructureElements and thereby create the StructureElement that
+are the children of the treated StructureElement. Converters therefore create
+the above named tree. The definition of a Converter also contains what
+Converters shall be used to treat the generated child-StructureElements. The
+definition is there a tree itself. (Question: Should there be global Converters
+that are always checked when treating a StructureElement? Should Converters be
+associated with generated child-StructureElements? Currently, all children are
+created and checked against all Converters. It could be that one would like to
+check file-StructureElements against one set of Converters and
+directory-StructureElements against another)
+
+Each StructureElement in the tree has a set of data values, i.e a dictionary of
+key value pairs.
+Some of those values are set due to the kind of StructureElement. For example,
+a file could have the file name as such a key value pair: 'filename': <sth>.
+Converters may define additional functions that create further values. For
+example, a regular expresion could be used to get a date from a file name.
+
+Relevant sources in:
+src/converters.py
+
+Identifiables
++++++++++++++
+
+Relevant sources in:
+src/identifiable_adapters.py
+
+The Crawler
++++++++++++
+
+The crawler can be considered the main program doing the synchronization in basically two steps:
+1. Based on a yaml-specification scan the file system (or other sources) and create a set
+   of CaosDB Entities that are supposed to be inserted or updated in a CaosDB instance.
+2. Compare the current state of the CaosDB instance with the set of CaosDB Entities created in
+   step 1, taking into account the :ref:`registered identifiables<Identifiables>`. Insert or
+   update entites accordingly.
+
+Relevant sources in:
+src/crawl.py
+
diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index 141558f3c64f0aefa9289dd95bf880e761b486f7..b448dd9169ebd3291b0b0a8b5602d4329025d202 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -23,41 +23,6 @@
 # ** end header
 #
 
-"""
-Data that is contained in a hierarchical structure is converted to a data
-structure that is consistent with a predefined semantic data model.
-
-The hierarchical sturcture can be for example a file tree. However it can be
-also something different like the contents of a json file or a file tree with
-json files.
-
-
-This hierarchical structure is assumed to be consituted of a tree of
-StructureElements. The tree is created on the fly by so called Converters which
-are defined in a yaml file. The tree of StructureElements is a model
-of the existing data (For example could a tree of Python file objects
-(StructureElements) represent a file tree that exists on some file server).
-
-Converters treat StructureElements and thereby create the StructureElement that
-are the children of the treated StructureElement. Converters therefore create
-the above named tree. The definition of a Converter also contains what
-Converters shall be used to treat the generated child-StructureElements. The
-definition is there a tree itself. (Question: Should there be global Converters
-that are always checked when treating a StructureElement? Should Converters be
-associated with generated child-StructureElements? Currently, all children are
-created and checked against all Converters. It could be that one would like to
-check file-StructureElements against one set of Converters and
-directory-StructureElements against another)
-
-Each StructureElement in the tree has a set of data values, i.e a dictionary of
-key value pairs.
-Some of those values are set due to the kind of StructureElement. For example,
-a file could have the file name as such a key value pair: 'filename': <sth>.
-Converters may define additional functions that create further values. For
-example, a regular expresion could be used to get a date from a file name.
-
-
-"""
 
 import sys
 import os