diff --git a/README_SETUP.md b/README_SETUP.md index 9b7b27ec056583708a8773ebac49f37ff45d9fd4..19f051636952945fe76b2ab752264031ac43378d 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -34,7 +34,7 @@ For testing: 3. Start an empty (!) CaosDB instance (with the mounted extroot). The database will be cleared during testing, so it's important to use an empty instance. -4. Run `test.sh`. +4. Run `test.sh`. Note that this may modify content of the `integrationtest/extroot/` directory. ## Code Formatting `autopep8 -i -r ./` diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index bf72b5f74b463f9ece2bd047548dcb22e8d71dac..65600016ed5dff97d3794b61cf540b9d0505698d 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -43,7 +43,7 @@ except ModuleNotFoundError: return argparse.ArgumentParser() def print_success(text): - print("Success: "+text) + print("Success: " + text) def get_parser(): diff --git a/integrationtests/extroot/.cerate_dir b/integrationtests/extroot/.create_dir similarity index 100% rename from integrationtests/extroot/.cerate_dir rename to integrationtests/extroot/.create_dir diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md index 71454e8909393b432ca74fa01e77b33d8b0644d5..ce0111b893a41a0e086b2bbf98e30a8ca7af3102 100644 --- a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example analysis. +description: A description of this example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md index b7e5051c7bdbcdafb1bbd3a870b00feecfb109ff..b9d7ef5e43334fb3191d671921b94200f39a8f7b 100644 --- a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. results: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md index c95e37ecc569103d8c3a812e45f1a5110781ea26..6d378818385593b482fa80ef76c77e1c5460a061 100644 --- a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md +++ b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. sources: - /DataAnalysis/2010_TestProject/2019-02-03/results.pdf diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md index fba1bd48a89514cbff92f9d8bd518484ecaa624b..f978bb773f6a8e0ebf0d56f3b302425fe536cb9e 100644 --- a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md +++ b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible -description: A description of another example experiment. +description: A description of this example experiment. results: - file: "*.dat" diff --git a/setup.py b/setup.py index 36b474ad7188ef06096c46c43aacf1d0135fe2d3..f26b126c2a589554ace736661aa3a685b3f671d3 100755 --- a/setup.py +++ b/setup.py @@ -156,6 +156,7 @@ def setup_package(): author_email='h.tomwoerden@indiscale.com', install_requires=["caosdb>=0.4.0", "openpyxl>=3.0.0", + "pandas>=1.2.0", "xlrd>=2.0", ], packages=find_packages('src'), diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 1684c9709bfb7810231ffae7251cdc5805950c6e..0a36284a7aac3a2775d598ab6816cb974e0a87ce 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -310,8 +310,6 @@ class Crawler(object): if self.interactive and "y" != input("Do you want to continue? (y)"): return - logger.info("Inserting or updating Records...") - for cfood in cfoods: try: cfood.create_identifiables() @@ -544,6 +542,10 @@ carefully and if the changes are ok, click on the following link: logger.debug(cfood.to_be_updated) try: + if len(cfood.to_be_updated) > 0: + logger.info( + "Updating {} Records...".format( + len(cfood.to_be_updated))) guard.safe_update(cfood.to_be_updated, unique=False) except FileNotFoundError as e: logger.info("Cannot access {}. However, it might be needed for" @@ -605,6 +607,9 @@ carefully and if the changes are ok, click on the following link: logger.debug("No new entities to be inserted.") else: try: + logger.info( + "Inserting {} Records...".format( + len(missing_identifiables))) guard.safe_insert(missing_identifiables, unique=False) except Exception as e: DataModelProblems.evaluate_exception(e) diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 7c95dad9ed10c025bc1baf811feb4534fc994175..0a710fa1e3ed2c8115f7209be30de758c0c23ec3 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -75,7 +75,7 @@ The crawler can be executed directly via a python script (usually called ``crawl.py``). The script prints the progress and reports potential problems. The exact behavior depends on your setup. However, you can have a look at the example in the -`tests <https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/blob/main/integrationtests/full_test/crawl.py>`__. +`tests <https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/main/integrationtests/crawl.py>`__. .. Note:: The crawler depends on the CaosDB Python client, so make sure to install :doc:`pycaosdb <caosdb-pylib:getting_started>`. @@ -86,14 +86,18 @@ Typically, an invocation looks like: .. code:: python - python3 crawl.py "/TestData/" + python3 crawl.py /someplace/ -In this case ``/TestData/`` identifies the path to be crawled **within -the CaosDB file system**. You can browse the CaosDB file system by +.. Note:: For trying out the above mentioned example crawler from the integration tests, + make sure that the ``extroot`` directory in the ``integrationtests`` folder is used as + CaosDB's extroot directory, and call the crawler indirectly via ``./test.sh``. + +In this case ``/someplace/`` identifies the path to be crawled **within +CaosDB's file system**. You can browse the CaosDB file system by opening the WebUI of your CaosDB instance and clicking on “File Systemâ€. In the backend, ``crawl.py`` starts a CQL query -``FIND File WHICH IS STORED AT /TestData/**`` and crawls the resulting +``FIND File WHICH IS STORED AT /someplace/**`` and crawls the resulting files according to your customized ``CFoods``. Crawling may consist of two distinct steps: 1. Insertion of files (use