From e2c34f315336c28cd3c850cfdefa09163e69f06c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Wed, 6 Jan 2021 15:41:33 +0000 Subject: [PATCH] ENH: Catch FileNotFoundErrors that occur during crawling --- CHANGELOG.md | 1 + integrationtests/test.sh | 6 +++++- src/caosadvancedtools/crawler.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 106d703c..f4aa6af8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +- Error handling for missing files when using the crawler - included the scifolder module - included the caosmodels module * `send_mail` function in `caosadvancedtools.serverside.helper` module diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 27ec1c54..74ca823f 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -19,9 +19,13 @@ python3 -m pytest test_crawler_with_cfoods.py echo "make a change" pushd extroot egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' +# remove a file to check that this does not lead to a crawler crash +mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back popd echo "run crawler" -./crawl.py / &> $OUT +./crawl.py / | tee $OUT +# rename the moved file +mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx # check whether there was something UNAUTHORIZED set -e grep "There where unauthorized changes" $OUT diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 0207c8e3..ccabff5f 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -196,6 +196,9 @@ class Crawler(object): logger.debug("{} matched\n{}.".format( Cfood.__name__, item)) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) @@ -227,6 +230,10 @@ class Crawler(object): logger.debug(traceback.format_exc()) logger.debug(e) remove_cfoods.append(cfood) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) + remove_cfoods.append(cfood) except Exception as e: try: DataModelProblems.evaluate_exception(e) @@ -313,6 +320,9 @@ class Crawler(object): cfood.update_identifiables() self.push_identifiables_to_CaosDB(cfood) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) @@ -534,6 +544,9 @@ carefully and if the changes are ok, click on the following link: logger.debug(cfood.to_be_updated) try: guard.safe_update(cfood.to_be_updated, unique=False) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) except ProhibitedException: try: self.update_cache.insert(cfood.to_be_updated, self.run_id) -- GitLab