From bd1c2d9fa454bef8dd20f6cd63a992ec446cea9f Mon Sep 17 00:00:00 2001
From: Daniel <d.hornung@indiscale.com>
Date: Tue, 10 Dec 2024 14:29:29 +0100
Subject: [PATCH] MAINT: Tests, config files, documentation.

---
 README.md                                       | 10 ++++++++++
 end-to-end-tests/test_crawler.py                |  8 +++++++-
 pylinkahead.ini                                 |  8 ++++----
 src/ruqad/crawler.py                            | 13 ++++++++-----
 src/ruqad/resources/crawler-settings/cfood.yaml |  2 +-
 5 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index a09687e..7f35de5 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,16 @@ analysis.  For more information, look at the section "SCA" below.
 - Then run `make unittest` or `pytest unittests/`.
 
 ### E2E Tests
+
+#### LinkAhead setup ####
+
+- Start an (empty) LinkAhead instance.
+- Set up `pylinkahead.ini` to use this instance.
+- Call `./insert_datamodel.sh` in the `linkahead_setup` directory to set up the datamodel in the
+  LinkAhead instance.
+
+#### KADI token ####
+
 In order to run the E2E test, you need to create a personal access token (pat) in the public 
 [demo instance](https://demo-kadi4mat.iam.kit.edu). You can then run the test as follows:
 `KADITOKEN=<token> python -m pytest end-to-end-tests/test_kadi.py`
diff --git a/end-to-end-tests/test_crawler.py b/end-to-end-tests/test_crawler.py
index 42639cd..66e73a6 100644
--- a/end-to-end-tests/test_crawler.py
+++ b/end-to-end-tests/test_crawler.py
@@ -30,7 +30,9 @@ def test_crawl(capsys):
     """
     crawl a directory as it would be created by export from kadi and running a data quality check
     """
+    print(f"\nData directory: {DATADIR}")
     print(os.listdir(DATADIR))
+
     retval, ent_qc = trigger_crawler(os.fspath(DATADIR))
 
     stdout, stderr = capsys.readouterr()
@@ -46,6 +48,7 @@ def test_crawl(capsys):
     qc = {}
     for ent in ent_qc:
         pth = ent.get_property("ELNFile").value.path
+        # Get folder name ("1222" or "1223")
         match = re.match("/.*/.*/(?P<folder>[0-9]+)/.*\\.eln", pth)
         assert match is not None
         qc[match.group("folder")] = ent
@@ -54,6 +57,9 @@ def test_crawl(capsys):
     assert not qc["1222"].get_property("FAIRLicenseCheck").value
 
     # Check whether the information from "report.zip" is present:
-    for d in ("1222", "1223"):
+    for total, passed, d in ((20, 18, "1222"),
+                             (20, 18, "1223")):
         assert type(qc[d].get_property("numTotalChecks").value) == int
         assert type(qc[d].get_property("numPassingChecks").value) == int
+        assert qc[d].get_property("numTotalChecks").value == total
+        assert qc[d].get_property("numPassingChecks").value == passed
diff --git a/pylinkahead.ini b/pylinkahead.ini
index 326f8b6..fb782ac 100644
--- a/pylinkahead.ini
+++ b/pylinkahead.ini
@@ -4,8 +4,8 @@
 # - the location given in the env variable PYLINKAHEADINI
 
 [Connection]
-url=https://demo.indiscale.com/
-#url=https://localhost:10443/
+# url=https://demo.indiscale.com/
+url=https://localhost:10443/
 
 ## If this option is set, the SSL certificate will be ignored.  Use with care!
 ssl_insecure=1
@@ -17,8 +17,8 @@ username=admin
 ##
 ## DEFAULT: the password method is `plain`, now the password must be saved as
 ## plain text.
-password_method=plain
-password=caosdb
+password_method=input
+# password=caosdb
 
 
 ## OR: `input`: username is optional, password is entered by the user directly
diff --git a/src/ruqad/crawler.py b/src/ruqad/crawler.py
index 2dc803a..cf3284f 100644
--- a/src/ruqad/crawler.py
+++ b/src/ruqad/crawler.py
@@ -26,10 +26,13 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]:
     caosdb-crawler -i crawler/identifiables.yaml -s update crawler/cfood.yaml <target_dir>
     ```
 
-    A tuple:
-    - 1st element of tuple: Return False in case of unsuccessful metadata validation
-      and True otherwise.
-    - 2nd element of tuple: list of quality check records.
+    Returns
+    -------
+
+    out: tuple[bool, list[db.Entity]]
+      - 1st element of tuple: ``False`` in case of unsuccessful metadata validation
+        and ``True`` otherwise.
+      - 2nd element of tuple: list of quality check records.
     """
 
     # insert all .zip and .eln files, if they do not yet exist
@@ -56,7 +59,7 @@ def trigger_crawler(target_dir: str) -> tuple[bool, list[db.Entity]]:
     entities = scan_directory(target_dir,
                               ruqad_crawler_settings.joinpath('cfood.yaml'))
 
-    ent_qc = []
+    ent_qc = []                 # Quality check result records
 
     # Show warning if license is not present in an eln file:
     for ent in entities:
diff --git a/src/ruqad/resources/crawler-settings/cfood.yaml b/src/ruqad/resources/crawler-settings/cfood.yaml
index 11e7925..3590556 100644
--- a/src/ruqad/resources/crawler-settings/cfood.yaml
+++ b/src/ruqad/resources/crawler-settings/cfood.yaml
@@ -1,6 +1,6 @@
 ---
 metadata:
-  crawler-version: 0.9.2
+  crawler-version: 0.10.2
   macros:
 ---
 Converters:
-- 
GitLab