From e2c34f315336c28cd3c850cfdefa09163e69f06c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Wed, 6 Jan 2021 15:41:33 +0000
Subject: [PATCH] ENH: Catch FileNotFoundErrors that occur during crawling

---
 CHANGELOG.md                     |  1 +
 integrationtests/test.sh         |  6 +++++-
 src/caosadvancedtools/crawler.py | 13 +++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 106d703c..f4aa6af8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added ###
 
+- Error handling for missing files when using the crawler
 - included the scifolder module
 - included the caosmodels module
 * `send_mail` function in `caosadvancedtools.serverside.helper` module
diff --git a/integrationtests/test.sh b/integrationtests/test.sh
index 27ec1c54..74ca823f 100755
--- a/integrationtests/test.sh
+++ b/integrationtests/test.sh
@@ -19,9 +19,13 @@ python3 -m pytest test_crawler_with_cfoods.py
 echo "make a change"
 pushd extroot
 egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
+# remove a file to check that this does not lead to a crawler crash
+mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back
 popd
 echo "run crawler"
-./crawl.py  / &> $OUT
+./crawl.py  / | tee $OUT
+# rename the moved file
+mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx
 # check whether there was something UNAUTHORIZED
 set -e
 grep "There where unauthorized changes" $OUT
diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py
index 0207c8e3..ccabff5f 100644
--- a/src/caosadvancedtools/crawler.py
+++ b/src/caosadvancedtools/crawler.py
@@ -196,6 +196,9 @@ class Crawler(object):
                         logger.debug("{} matched\n{}.".format(
                             Cfood.__name__,
                             item))
+                    except FileNotFoundError as e:
+                        logger.info("Cannot access {}. However, it might be needed for"
+                                    " the correct execution".format(e.filename))
                     except DataInconsistencyError as e:
                         logger.debug(traceback.format_exc())
                         logger.debug(e)
@@ -227,6 +230,10 @@ class Crawler(object):
                 logger.debug(traceback.format_exc())
                 logger.debug(e)
                 remove_cfoods.append(cfood)
+            except FileNotFoundError as e:
+                logger.info("Cannot access {}. However, it might be needed for"
+                            " the correct execution".format(e.filename))
+                remove_cfoods.append(cfood)
             except Exception as e:
                 try:
                     DataModelProblems.evaluate_exception(e)
@@ -313,6 +320,9 @@ class Crawler(object):
 
                 cfood.update_identifiables()
                 self.push_identifiables_to_CaosDB(cfood)
+            except FileNotFoundError as e:
+                logger.info("Cannot access {}. However, it might be needed for"
+                            " the correct execution".format(e.filename))
             except DataInconsistencyError as e:
                 logger.debug(traceback.format_exc())
                 logger.debug(e)
@@ -534,6 +544,9 @@ carefully and if the changes are ok, click on the following link:
         logger.debug(cfood.to_be_updated)
         try:
             guard.safe_update(cfood.to_be_updated, unique=False)
+        except FileNotFoundError as e:
+            logger.info("Cannot access {}. However, it might be needed for"
+                        " the correct execution".format(e.filename))
         except ProhibitedException:
             try:
                 self.update_cache.insert(cfood.to_be_updated, self.run_id)
-- 
GitLab