From 376359887248d6665fcd0d441c14f75336579411 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Wed, 15 Feb 2023 21:16:51 +0100
Subject: [PATCH] DOC: added and enhance documentation

---
 CHANGELOG.md                              |  5 ++
 INSTALL.md                                | 56 +++++++++++++++++++++
 src/doc/README_SETUP.md                   | 59 ++---------------------
 src/doc/cfood.rst                         | 42 ++++++++++++++--
 src/doc/concepts.rst                      |  4 ++
 src/doc/getting_started/INSTALL.md        |  1 +
 src/doc/getting_started/helloworld.rst    |  5 ++
 src/doc/getting_started/index.rst         | 15 ++++++
 src/doc/getting_started/prerequisites.rst |  6 +++
 src/doc/index.rst                         |  4 +-
 src/doc/macros.rst                        |  2 +-
 src/doc/tutorials/index.rst               |  4 +-
 unittests/test_converters.py              |  2 +-
 13 files changed, 141 insertions(+), 64 deletions(-)
 create mode 100644 INSTALL.md
 create mode 120000 src/doc/getting_started/INSTALL.md
 create mode 100644 src/doc/getting_started/helloworld.rst
 create mode 100644 src/doc/getting_started/index.rst
 create mode 100644 src/doc/getting_started/prerequisites.rst

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7df6439d..8544b7cb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,12 +10,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added ###
 - DateElementConverter: allows to interpret text as a date object
 - the restricted_path argument allows to crawl only a subtree
+- You can now access the file system path of a structure element (if it has one) using the variable
+  name "<converter name>\_path"
 
 ### Changed ###
 
 - The definitions for the default converters were removed from crawl.py and placed into
   a separate yaml file called `default_converters.yml`. There is a new test testing for
   the correct loading behavior of that file.
+- JSONFileConverter, YAMLFileConverter and MarkdownFileConverter now inherit from
+  SimpleFileConverter. Behavior is unchanged, except that the MarkdownFileConverter now raises a
+  ConverterValidationError when the YAML header cannot be read instead of silently not matching.
 
 ### Deprecated ###
 
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 00000000..7d21b4b5
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,56 @@
+
+# Installation ##
+
+
+## Linux ####
+
+Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
+documentation.
+
+Then open a terminal and continue in the [Generic installation](#generic-installation) section.
+
+## Windows ####
+
+If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
+for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
+packages you will ever need out of the box.  If you prefer, you may also install the leaner
+"Miniconda" installer, which allows you to install packages as you need them.
+
+After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
+installation](#generic-installation) section.
+
+## MacOS ####
+
+If there is no Python 3 installed yet, there are two main ways to
+obtain it: Either get the binary package from
+[python.org](https://www.python.org/downloads/) or, for advanced
+users, install via [Homebrew](https://brew.sh/). After installation
+from python.org, it is recommended to also update the TLS certificates
+for Python (this requires administrator rights for your user):
+
+```sh
+# Replace this with your Python version number:
+cd /Applications/Python\ 3.9/
+
+# This needs administrator rights:
+sudo ./Install\ Certificates.command
+```
+
+After these steps, you may continue with the [Generic
+installation](#generic-installation).
+
+## Generic installation ####
+
+---
+
+Obtain the sources from GitLab and install from there (`git` must be installed for
+this option):
+
+```sh
+git clone https://gitlab.com/caosdb/caosdb-crawler
+cd caosdb-crawler
+pip3 install --user .
+```
+
+**Note**: In the near future, this package will also be made available on PyPi.
+
diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md
index 1f6e15d4..952a8c94 100644
--- a/src/doc/README_SETUP.md
+++ b/src/doc/README_SETUP.md
@@ -1,63 +1,10 @@
 # Getting started with the CaosDB Crawler #
 
-## Installation ##
-
-### How to install ###
-
-#### Linux ####
-
-Make sure that Python (at least version 3.8) and pip is installed, using your system tools and
-documentation.
-
-Then open a terminal and continue in the [Generic installation](#generic-installation) section.
-
-#### Windows ####
-
-If a Python distribution is not yet installed, we recommend Anaconda Python, which you can download
-for free from [https://www.anaconda.com](https://www.anaconda.com).  The "Anaconda Individual Edition" provides most of all
-packages you will ever need out of the box.  If you prefer, you may also install the leaner
-"Miniconda" installer, which allows you to install packages as you need them.
-
-After installation, open an Anaconda prompt from the Windows menu and continue in the [Generic
-installation](#generic-installation) section.
-
-#### MacOS ####
-
-If there is no Python 3 installed yet, there are two main ways to
-obtain it: Either get the binary package from
-[python.org](https://www.python.org/downloads/) or, for advanced
-users, install via [Homebrew](https://brew.sh/). After installation
-from python.org, it is recommended to also update the TLS certificates
-for Python (this requires administrator rights for your user):
-
-```sh
-# Replace this with your Python version number:
-cd /Applications/Python\ 3.9/
-
-# This needs administrator rights:
-sudo ./Install\ Certificates.command
-```
-
-After these steps, you may continue with the [Generic
-installation](#generic-installation).
-
-#### Generic installation ####
-
----
-
-Obtain the sources from GitLab and install from there (`git` must be installed for
-this option):
-
-```sh
-git clone https://gitlab.com/caosdb/caosdb-crawler
-cd caosdb-crawler
-pip3 install --user .
-```
-
-**Note**: In the near future, this package will also be made available on PyPi.
-
+## Installation
+see INSTALL.md
 
 ## Run Unit Tests
+Run `pytest unittests`.
 
 ## Documentation ##
 We use sphinx to create the documentation. Docstrings in the code should comply
diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst
index 37f6a8c7..882660af 100644
--- a/src/doc/cfood.rst
+++ b/src/doc/cfood.rst
@@ -149,6 +149,42 @@ create lists or multi properties instead of single values:
 .. code-block:: yaml
                 
         Experiment1:
-            Measurement: +Measurement <- Element in List (list is cleared before run)
-                         *Measurement <- Multi Property (properties are removed before run)
-                         Measurement  <- Overwrite
+            Measurement: +Measurement #  Element in List (list is cleared before run)
+                         *Measurement #  Multi Property (properties are removed before run)
+                         Measurement  #  Overwrite
+
+
+File Entities
+-------------
+
+In order to use File Entities, you must set the appropriate ``role: File``.
+Additionally, the path and file keys have to be given, with values that set the
+paths remotely and locally, respectively. You can use the variable 
+``<converter name>_path`` that is automatically created by converters that deal
+with file system related StructureElements. The file object itsself is stored
+in a vairable with the same name (as it is the case for other Records).
+
+
+.. code-block:: yaml
+
+      somefile:
+        type: SimpleFile
+        match: ^params.*$  # macht any file that starts with "params"
+        records:
+          fileEntity:
+            role: File           # necessary to create a File Entity
+            path: somefile_path  # defines the path in CaosDB
+            file: somefile_path  # path where the file is found locally
+          SomeRecord:
+            ParameterFile: $fileEntity  # creates a reference to the file
+
+Automatically generated keys
+++++++++++++++++++++++++++++
+
+Some variable names are automatically generated and can be used using the
+``$<variable name>`` syntax. Those include:
+- ``<converter name>``: access the path of converter names to the current converter
+- ``<converter name>_path``: the file system path to the structure element
+  (file system related converters only)
+- ``<Record key>``: all entities that are created in the ``records`` section
+  are available under the same key
diff --git a/src/doc/concepts.rst b/src/doc/concepts.rst
index 89757f21..0881d930 100644
--- a/src/doc/concepts.rst
+++ b/src/doc/concepts.rst
@@ -1,6 +1,10 @@
 Concepts
 ))))))))
 
+The CaosDB Crawler can handle any kind of hierarchical data structure. The typical use case is
+directory tree that is traversed. We use the following terms/concepts to describe how the CaosDB
+Crawler works.
+
 Structure Elements
 ++++++++++++++++++
 
diff --git a/src/doc/getting_started/INSTALL.md b/src/doc/getting_started/INSTALL.md
new file mode 120000
index 00000000..95b6037c
--- /dev/null
+++ b/src/doc/getting_started/INSTALL.md
@@ -0,0 +1 @@
+../../../INSTALL.md
\ No newline at end of file
diff --git a/src/doc/getting_started/helloworld.rst b/src/doc/getting_started/helloworld.rst
new file mode 100644
index 00000000..ef4a1398
--- /dev/null
+++ b/src/doc/getting_started/helloworld.rst
@@ -0,0 +1,5 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe the smallest possible crawler run
diff --git a/src/doc/getting_started/index.rst b/src/doc/getting_started/index.rst
new file mode 100644
index 00000000..74ffa7da
--- /dev/null
+++ b/src/doc/getting_started/index.rst
@@ -0,0 +1,15 @@
+Getting Started
++++++++++++++++
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+   :hidden:
+
+   Installation<INSTALL>
+   prerequisites
+   helloworld
+
+This section will help you get going! From the first installation steps to the first simple crawl.
+
+Let's go!
diff --git a/src/doc/getting_started/prerequisites.rst b/src/doc/getting_started/prerequisites.rst
new file mode 100644
index 00000000..dc8022b6
--- /dev/null
+++ b/src/doc/getting_started/prerequisites.rst
@@ -0,0 +1,6 @@
+
+Prerequisites
+)))))))))))))
+
+TODO Describe what you need to actually do a crawler run: data, CaosDB, ...
+
diff --git a/src/doc/index.rst b/src/doc/index.rst
index b4e30e47..d319bf4d 100644
--- a/src/doc/index.rst
+++ b/src/doc/index.rst
@@ -7,12 +7,12 @@ CaosDB-Crawler Documentation
    :caption: Contents:
    :hidden:
 
-   Getting started<README_SETUP>
+   Getting started<getting_started/index>
+   Tutorials<tutorials/index>
    Concepts<concepts>
    Converters<converters>
    CFoods (Crawler Definitions)<cfood>
    Macros<macros>
-   Tutorials<tutorials/index>
    How to upgrade<how-to-upgrade>
    API documentation<_apidoc/modules>
 
diff --git a/src/doc/macros.rst b/src/doc/macros.rst
index d3a3e9b9..7685731d 100644
--- a/src/doc/macros.rst
+++ b/src/doc/macros.rst
@@ -195,7 +195,7 @@ The example will be expanded to:
 
 
 Limitation
-----------
+==========
 
 Currently it is not possible to use the same macro twice in the same yaml node, but in different
 positions. Consider:
diff --git a/src/doc/tutorials/index.rst b/src/doc/tutorials/index.rst
index 88d598ec..02371de1 100644
--- a/src/doc/tutorials/index.rst
+++ b/src/doc/tutorials/index.rst
@@ -1,9 +1,11 @@
 Tutorials
 +++++++++
 
+This chapter contains a collection of tutorials.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
-   :hidden:
 
    Example CFood<example>
+
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index f4643bd0..d0d2e23a 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -613,7 +613,7 @@ def test_load_converters():
 
 
 def test_create_path_value(converter_registry):
-    """ test using the "test_directories" folder"""
+    """ test whether the variable containing the path is added to the general store"""
     dc = Converter.converter_factory(
         definition={
             "type": "Directory",
-- 
GitLab