From fbdf229364159a0eef3e239bef1698d9d49e9603 Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Mon, 16 Oct 2023 09:09:56 +0200
Subject: [PATCH] DOC: More documentation changes.

---
 src/caoscrawler/crawl.py              |  8 ++++----
 src/caoscrawler/scanner.py            |  5 ++---
 src/caoscrawler/structure_elements.py | 24 +++++++++++++++++++++--
 src/doc/converters.rst                | 28 ++++++++++++++++++++-------
 4 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index da721ec3..f9ecfd77 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -1254,7 +1254,7 @@ def crawler_main(crawled_directory_path: str,
                  dry_run: bool = False,
                  prefix: str = "",
                  securityMode: SecurityMode = SecurityMode.UPDATE,
-                 unique_names=True,
+                 unique_names: bool = True,
                  restricted_path: Optional[list[str]] = None,
                  remove_prefix: Optional[str] = None,
                  add_prefix: Optional[str] = None,
@@ -1270,9 +1270,9 @@ def crawler_main(crawled_directory_path: str,
     identifiables_definition_file : str
         filename of an identifiable definition yaml file
     debug : bool
-        DEPRECATED, whether or not to run in debug mode
+        DEPRECATED, use a provenance file instead.
     provenance_file : str
-        provenance information will be stored in a file with given filename
+        Provenance information will be stored in a file with given filename
     dry_run : bool
         do not commit any chnages to the server
     prefix : str
@@ -1312,7 +1312,7 @@ def crawler_main(crawled_directory_path: str,
         _fix_file_paths(crawled_data, add_prefix, remove_prefix)
         _check_record_types(crawled_data)
 
-        if provenance_file is not None and debug:
+        if provenance_file is not None:
             crawler.save_debug_data(debug_tree=debug_tree, filename=provenance_file)
 
         if identifiables_definition_file is not None:
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index f1cad055..53fb7ccc 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -288,7 +288,6 @@ Parameters
                 # extracts values from structure element and stores them in the
                 # variable store
                 converter.create_values(general_store_copy, element)
-
                 keys_modified = converter.create_records(
                     general_store_copy, record_store_copy, element)
 
@@ -396,7 +395,7 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
                             crawler_definition: dict,
                             converter_registry: dict,
                             restricted_path: Optional[list[str]] = None,
-                            debug_tree: Optional[DebugTree] = None):
+                            debug_tree: Optional[DebugTree] = None) -> list[db.Record]:
     """
     Start point of the crawler recursion.
 
@@ -417,7 +416,7 @@ def scan_structure_elements(items: Union[list[StructureElement], StructureElemen
 
     Returns
     -------
-    crawled_data : list
+    crawled_data : list[db.Record]
         the final list with the target state of Records.
     """
 
diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py
index 952f29d0..ff070626 100644
--- a/src/caoscrawler/structure_elements.py
+++ b/src/caoscrawler/structure_elements.py
@@ -28,9 +28,16 @@ import warnings
 
 
 class StructureElement(object):
-    """ base class for elements in the hierarchical data structure """
+    """Base class for elements in the hierarchical data structure.
 
-    def __init__(self, name):
+Parameters
+----------
+
+name: str
+  The name of the StructureElement.  May be used for pattern matching by CFood rules.
+    """
+
+    def __init__(self, name: str):
         # Used to store usage information for debugging:
         self.metadata: tDict[str, set[str]] = {
             "usage": set()
@@ -46,6 +53,18 @@ class StructureElement(object):
 
 
 class FileSystemStructureElement(StructureElement):
+    """StructureElement representing an element of a file system, like a directory or a simple file.
+
+Parameters
+----------
+
+name: str
+  The name of the StructureElement.  May be used for pattern matching by CFood rules.
+
+path: str
+  The path to the file or directory.
+    """
+
     def __init__(self, name: str, path: str):
         super().__init__(name)
         self.path = path
@@ -65,6 +84,7 @@ class Directory(FileSystemStructureElement):
 
 
 class File(FileSystemStructureElement):
+    """StrutureElement representing a file."""
     pass
 
 
diff --git a/src/doc/converters.rst b/src/doc/converters.rst
index 119cee84..0dde61d5 100644
--- a/src/doc/converters.rst
+++ b/src/doc/converters.rst
@@ -58,13 +58,16 @@ to generate records (see :py:meth:`~caoscrawler.converters.Converter.create_reco
 **records** is a dict of definitions that define the semantic structure
 (see details below).
 
-Subtree contains a list of Converter defnitions that look like the one
-described here.
+**subtree** makes the yaml recursive: It contains a list of new Converter definitions, which work on
+ the StructureElements that are returned by the current Converter.
 
 
 Standard Converters
 +++++++++++++++++++
 
+These are the standard converters that exist in a default installation.  For writing and applying
+*custom converters*, see :ref:`below <Custom Converters>`.
+
 Directory Converter
 ===================
 The Directory Converter creates StructureElements for each File and Directory
@@ -189,11 +192,15 @@ CSV File → DictElement
 Custom Converters
 +++++++++++++++++
 
-It was previously mentioned that it is possible to create custom converters.
+As mentioned before it is possible to create custom converters.
 These custom converters can be used to integrate arbitrary data extraction and ETL capabilities
-into the caosdb-crawler and make these extensions available to any yaml specification.
+into the LinkAhead crawler and make these extensions available to any yaml specification.
+
+Tell the crawler about a custom converter
+=========================================
 
-The basic syntax for adding a custom converter to a yaml cfood definition file is:
+To use a custom crawler, it must be defined in the ``Converters`` section of the CFood yaml file.
+The basic syntax for adding a custom converter to a definition file is:
 
 .. code-block:: yaml
 
@@ -202,7 +209,7 @@ The basic syntax for adding a custom converter to a yaml cfood definition file i
        package: <python>.<module>.<name>
        converter: <PythonClassName>
 
-The Converters-section can be either put into the first or second document of the cfood yaml file.
+The Converters section can be either put into the first or the second document of the cfood yaml file.
 It can be also part of a single-document yaml cfood file. Please refer to :doc:`the cfood documentation<cfood>` for more details.
 
 Details:
@@ -211,9 +218,16 @@ Details:
 - **<python>.<module>.<name>**: The name of the module where the converter class resides.
 - **<PythonClassName>**: Within this specified module there must be a class inheriting from base class :py:class:`caoscrawler.converters.Converter`.
 
+Implementing a custom converter
+===============================
+
+Converters inherit from the :py:class:`~caoscrawler.converters.Converter` class.
+
 The following methods are abstract and need to be overwritten by your custom converter to make it work:
 
-- :py:meth:`~caoscrawler.converters.Converter.create_children`
+:py:meth:`~caoscrawler.converters.Converter.create_children`:
+    Return a list of child StructureElement objects.
+
 - :py:meth:`~caoscrawler.converters.Converter.match`
 - :py:meth:`~caoscrawler.converters.Converter.typecheck`
 
-- 
GitLab