From 9bbb91e929f9543503ade21c3a9b8b468f4e3d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Tue, 1 Jun 2021 11:06:33 +0000 Subject: [PATCH] MAINT: make output of crawler a bit more precise --- README_SETUP.md | 2 +- integrationtests/crawl.py | 2 +- .../extroot/{.cerate_dir => .create_dir} | 0 .../2010_TestProject/2019-02-03/README.md | 2 +- .../2010_TestProject/2019-02-03/README.md | 2 +- .../2019-02-03_really_cool_finding/README.md | 2 +- .../2010_TestProject/2019-02-03/README.md | 2 +- setup.py | 4 ++-- src/caosadvancedtools/crawler.py | 9 +++++++-- src/doc/crawler.rst | 14 +++++++++----- 10 files changed, 24 insertions(+), 15 deletions(-) rename integrationtests/extroot/{.cerate_dir => .create_dir} (100%) diff --git a/README_SETUP.md b/README_SETUP.md index 9b7b27ec..19f05163 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -34,7 +34,7 @@ For testing: 3. Start an empty (!) CaosDB instance (with the mounted extroot). The database will be cleared during testing, so it's important to use an empty instance. -4. Run `test.sh`. +4. Run `test.sh`. Note that this may modify content of the `integrationtest/extroot/` directory. ## Code Formatting `autopep8 -i -r ./` diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index bf72b5f7..65600016 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -43,7 +43,7 @@ except ModuleNotFoundError: return argparse.ArgumentParser() def print_success(text): - print("Success: "+text) + print("Success: " + text) def get_parser(): diff --git a/integrationtests/extroot/.cerate_dir b/integrationtests/extroot/.create_dir similarity index 100% rename from integrationtests/extroot/.cerate_dir rename to integrationtests/extroot/.create_dir diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md index 71454e89..ce0111b8 100644 --- a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example analysis. +description: A description of this example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md index b7e5051c..b9d7ef5e 100644 --- a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. results: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md index c95e37ec..6d378818 100644 --- a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md +++ b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. sources: - /DataAnalysis/2010_TestProject/2019-02-03/results.pdf diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md index fba1bd48..f978bb77 100644 --- a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. results: - file: "*.dat" diff --git a/setup.py b/setup.py index 8902af8c..1d824f51 100755 --- a/setup.py +++ b/setup.py @@ -155,8 +155,8 @@ def setup_package(): author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', install_requires=["caosdb>=0.4.0", - "openpyxl>=3.0.0", - "xlrd==1.2.0", + "pandas>=1.2.0", + "xlrd>=2.0", ], packages=find_packages('src'), package_dir={'': 'src'}, diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 1684c970..0a36284a 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -310,8 +310,6 @@ class Crawler(object): if self.interactive and "y" != input("Do you want to continue? (y)"): return - logger.info("Inserting or updating Records...") - for cfood in cfoods: try: cfood.create_identifiables() @@ -544,6 +542,10 @@ carefully and if the changes are ok, click on the following link: logger.debug(cfood.to_be_updated) try: + if len(cfood.to_be_updated) > 0: + logger.info( + "Updating {} Records...".format( + len(cfood.to_be_updated))) guard.safe_update(cfood.to_be_updated, unique=False) except FileNotFoundError as e: logger.info("Cannot access {}. However, it might be needed for" @@ -605,6 +607,9 @@ carefully and if the changes are ok, click on the following link: logger.debug("No new entities to be inserted.") else: try: + logger.info( + "Inserting {} Records...".format( + len(missing_identifiables))) guard.safe_insert(missing_identifiables, unique=False) except Exception as e: DataModelProblems.evaluate_exception(e) diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 7c95dad9..0a710fa1 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -75,7 +75,7 @@ The crawler can be executed directly via a python script (usually called ``crawl.py``). The script prints the progress and reports potential problems. The exact behavior depends on your setup. However, you can have a look at the example in the -`tests <https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/blob/main/integrationtests/full_test/crawl.py>`__. +`tests <https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/main/integrationtests/crawl.py>`__. .. Note:: The crawler depends on the CaosDB Python client, so make sure to install :doc:`pycaosdb <caosdb-pylib:getting_started>`. @@ -86,14 +86,18 @@ Typically, an invocation looks like: .. code:: python - python3 crawl.py "/TestData/" + python3 crawl.py /someplace/ -In this case ``/TestData/`` identifies the path to be crawled **within -the CaosDB file system**. You can browse the CaosDB file system by +.. Note:: For trying out the above mentioned example crawler from the integration tests, + make sure that the ``extroot`` directory in the ``integrationtests`` folder is used as + CaosDB's extroot directory, and call the crawler indirectly via ``./test.sh``. + +In this case ``/someplace/`` identifies the path to be crawled **within +CaosDB's file system**. You can browse the CaosDB file system by opening the WebUI of your CaosDB instance and clicking on “File Systemâ€. In the backend, ``crawl.py`` starts a CQL query -``FIND File WHICH IS STORED AT /TestData/**`` and crawls the resulting +``FIND File WHICH IS STORED AT /someplace/**`` and crawls the resulting files according to your customized ``CFoods``. Crawling may consist of two distinct steps: 1. Insertion of files (use -- GitLab