From 8e6b4fba8b2442a8fb265aeb95e57103dd164f24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Thu, 8 Sep 2022 18:07:20 +0200
Subject: [PATCH] ENH: include authorization of inserts

---
 integrationtests/basic_example/test.py | 25 ++++++++++++++++++++++++-
 src/caoscrawler/crawl.py               | 23 ++++++++++++-----------
 unittests/test_tool.py                 |  6 ++----
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/integrationtests/basic_example/test.py b/integrationtests/basic_example/test.py
index cecd6533..08b0d83f 100755
--- a/integrationtests/basic_example/test.py
+++ b/integrationtests/basic_example/test.py
@@ -28,12 +28,13 @@
 module description
 """
 
+from caosadvancedtools.crawler import Crawler as OldCrawler
 import os
 from caosdb import EmptyUniqueQueryError
 import argparse
 import sys
 from argparse import RawTextHelpFormatter
-from caoscrawler import Crawler
+from caoscrawler import Crawler, SecurityMode
 import caosdb as db
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
 import pytest
@@ -170,6 +171,28 @@ def test_insertion(clear_database, usemodel, ident, crawler):
     assert len(ups) == 0
 
 
+def test_insert_auth(clear_database, usemodel, ident, crawler):
+    ins, ups = crawler.synchronize()
+
+    # Do a second run on the same data, there should a new insert:
+    cr = Crawler(debug=True, identifiableAdapter=ident, securityMode=SecurityMode.RETRIEVE)
+    crawl_standard_test_directory(cr, "example_insert")
+    assert len(cr.target_data) == 3
+    ins, ups = cr.synchronize()
+    assert len(ins) == 1
+    assert not ins[0].is_valid()
+    nins, nups = OldCrawler.update_authorized_changes(cr.run_id)
+    assert nins == 1
+
+    # Do it again to check whether nothing is changed:
+    cr = Crawler(debug=True, identifiableAdapter=ident)
+    crawl_standard_test_directory(cr, "example_insert")
+    assert len(cr.target_data) == 3
+    ins, ups = cr.synchronize()
+    assert len(ins) == 0
+    assert len(ups) == 0
+
+
 def test_insertion_and_update(clear_database, usemodel, ident, crawler):
     ins, ups = crawler.synchronize()
 
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index e5b0e6ba..8886c5f8 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -777,12 +777,8 @@ class Crawler(object):
             if securityMode.value > SecurityMode.RETRIEVE.value:
                 db.Container().extend(to_be_inserted).insert()
             elif run_id is not None:
-
-                raise RuntimeError("You must not insert Entities since the Crawler was startet "
-                                   "with RETRIEVE only mode.")
-                # Caching forbidden inserts is currently not implemented
-                # cache = Cache()
-                # cache.insert(to_be_inserted, run_id)
+                update_cache = UpdateCache()
+                update_cache.insert(to_be_inserted, run_id, insert=True)
 
     @staticmethod
     def set_ids_and_datatype_of_parents_and_properties(rec_list):
@@ -845,16 +841,20 @@ class Crawler(object):
             self.execute_updates_in_list(to_be_updated, self.securityMode, self.run_id)
 
         update_cache = UpdateCache()
-        pending_changes = update_cache.get_updates(self.run_id)
+        pending_inserts = update_cache.get_inserts(self.run_id)
+        if pending_inserts:
+            Crawler.inform_about_pending_changes(
+                pending_inserts, self.run_id, self.crawled_directory)
 
-        if pending_changes:
+        pending_updates = update_cache.get_updates(self.run_id)
+        if pending_updates:
             Crawler.inform_about_pending_changes(
-                pending_changes, self.run_id, self.crawled_directory)
+                pending_updates, self.run_id, self.crawled_directory)
 
         return (to_be_inserted, to_be_updated)
 
     @staticmethod
-    def inform_about_pending_changes(pending_changes, run_id, path):
+    def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
         # only done in SSS mode
 
@@ -870,7 +870,8 @@ UNAUTHORIZED UPDATE ({} of {}):
 ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
         logger.info("There were unauthorized changes (see above). An "
                     "email was sent to the curator.\n"
-                    "You can authorize the updates by invoking the crawler"
+                    "You can authorize the " + ("inserts" if inserts else "updates")
+                    + " by invoking the crawler"
                     " with the run id: {rid}\n".format(rid=run_id))
 
     @staticmethod
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index b86bc7b8..59573756 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -605,13 +605,11 @@ def test_security_mode(updateCacheMock, upmock, insmock, ident):
     # remove one element
     del ident._records[-1]
     # insert forbidden
-    with pytest.raises(RuntimeError) as excinfo:
-        crawler.synchronize(commit_changes=True)
+    crawler.synchronize(commit_changes=True)
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
-    # as long as caching of inserts is not implemented this is not called
-    updateCacheMock.assert_not_called()
+    assert updateCacheMock.call_count == 1
     # reset counts
     reset_mocks([updateCacheMock, insmock, upmock])
     # restore original ident
-- 
GitLab