diff --git a/.docker-base/Dockerfile b/.docker-base/Dockerfile index 2152183a410302df34d35ec6f514399678e0baaf..923924e75e03c6ca8346b17cdf87eda78efd766f 100644 --- a/.docker-base/Dockerfile +++ b/.docker-base/Dockerfile @@ -9,6 +9,34 @@ RUN apk add --no-cache py3-pip python3 python3-dev gcc make \ git bash curl gettext py3-requests RUN apk add --no-cache libffi-dev openssl-dev libc-dev libxslt libxslt-dev \ libxml2 libxml2-dev + +# install rust (needed for compiling a docker-compose dependency) +# This is necessary until alpine comes with an up to date RUST +# copied from https://github.com/rust-lang/docker-rust/blob/bbc7feb12033da3909dced4e88ddbb6964fbc328/1.50.0/alpine3.13/Dockerfile + +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/usr/local/cargo/bin:$PATH \ + RUST_VERSION=1.50.0 + +RUN set -eux; \ + apkArch="$(apk --print-arch)"; \ + case "$apkArch" in \ + x86_64) rustArch='x86_64-unknown-linux-musl'; rustupSha256='05c5c05ec76671d73645aac3afbccf2187352fce7e46fc85be859f52a42797f6' ;; \ + aarch64) rustArch='aarch64-unknown-linux-musl'; rustupSha256='6a8a480d8d9e7f8c6979d7f8b12bc59da13db67970f7b13161ff409f0a771213' ;; \ + *) echo >&2 "unsupported architecture: $apkArch"; exit 1 ;; \ + esac; \ + url="https://static.rust-lang.org/rustup/archive/1.23.1/${rustArch}/rustup-init"; \ + wget "$url"; \ + echo "${rustupSha256} *rustup-init" | sha256sum -c -; \ + chmod +x rustup-init; \ + ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${rustArch}; \ + rm rustup-init; \ + chmod -R a+w $RUSTUP_HOME $CARGO_HOME; \ + rustup --version; \ + cargo --version; \ + rustc --version; + RUN pip3 install docker-compose==1.25 # Script for waiting on LA server diff --git a/.docker/Dockerfile b/.docker/Dockerfile index ca59395a90c747fc60a155c3fb5f8f264c60d42a..876f252299991f2fa4410994b73259c3593c2198 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -1,7 +1,9 @@ -FROM debian:10 +FROM debian:11 RUN apt-get update && \ apt-get install \ curl \ + libhdf5-dev \ + pkgconf \ python3 \ python3-pip \ python3-requests \ @@ -11,29 +13,22 @@ RUN apt-get update && \ tox \ git \ openjdk-11-jdk-headless \ - python-autopep8 \ + python3-autopep8 \ python3-pytest \ libxml2 \ -y + + COPY .docker/wait-for-it.sh /wait-for-it.sh ADD https://gitlab.com/api/v4/projects/13656973/repository/branches/dev \ pylib_version.json RUN git clone https://gitlab.com/caosdb/caosdb-pylib.git && \ cd caosdb-pylib && git checkout dev && pip3 install . -ADD https://gitlab.com/api/v4/projects/13656965/repository/branches/master \ - model_version.json -RUN git clone https://gitlab.com/caosdb/caosdb-models.git && \ - cd caosdb-models && pip3 install . -ADD https://gitlab.com/api/v4/projects/13601752/repository/branches/master \ - scifolder_version.json -RUN git clone \ - https://gitlab.com/henrik_indiscale/scifolder.git && \ - cd scifolder && pip3 install . +# At least recommonmark 0.6 required. +RUN pip3 install -U html2text pycodestyle pylint recommonmark sphinx-rtd-theme COPY . /git RUN rm -r /git/.git \ && mv /git/.docker/pycaosdb.ini /git/integrationtests -RUN cd /git && pip3 install . +RUN cd /git && pip3 install .[h5-crawler] WORKDIR /git/integrationtests -CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh -# At least recommonmark 0.6 required. -RUN pip3 install recommonmark sphinx-rtd-theme +CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index e859e4337653a41dd2e17a819760b18fe2185c5e..36964ee68b7e384267a08484524de1f72cdfad6d 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -7,7 +7,7 @@ services: networks: - caosnet caosdb-server: - image: "$CI_REGISTRY_INDISCALE/caosdb/src/caosdb-deploy:$CAOSDB_TAG" + image: "$CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG" user: 999:999 depends_on: - sqldb diff --git a/.gitignore b/.gitignore index bea8a04f8e93b7659fdc4d7b8d5246a19b8759ad..e2526574b37539d054397d49bbefcadcc9dce654 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ # -*- mode:conf; -*- +# generated +src/caosadvancedtools/version.py # compiled python and dist stuff *.egg diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6f16302268efdc594c4ef9213830e62b4c06c9da..8ea83fb605ad4cb8bf451c495c78dd5e58952688 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,13 +21,9 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. variables: - CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/caosdb-advanced-user-tools/testenv:latest - CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/caosdb-advanced-user-tools/base:latest - # When using dind, it's wise to use the overlayfs driver for - # improved performance. + CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-advanced-user-tools/testenv:latest + CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/src/caosdb-advanced-user-tools/base:latest -services: - - docker:19.03.0-dind stages: - setup @@ -38,33 +34,37 @@ stages: - deploy test: - tags: [cached-dind] + tags: [docker] + services: + - docker:20.10.5-dind + variables: + # This is a workaround for the gitlab-runner health check mechanism when + # using docker-dind service. The runner will otherwise guess the port + # wrong and the health check will timeout. + SERVICE_PORT_2376_TCP_PORT: 2375 stage: integrationtest image: $CI_REGISTRY_IMAGE_BASE script: - if [[ "$CAOSDB_TAG" == "" ]]; then - CAOSDB_TAG=dev-latest; + CAOSDB_TAG=dev; fi - echo $CAOSDB_TAG - time docker load < /image-cache/caosdb-advanced-testenv.tar || true - time docker load < /image-cache/mariadb.tar || true - - time docker load < /image-cache/caosdb.tar || true + - time docker load < /image-cache/caosdb-dev.tar || true - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY - - docker login -u gitlab+deploy-token-ci-pull -p $TOKEN_CI_PULL $CI_REGISTRY_INDISCALE - - time docker pull $CI_REGISTRY_IMAGE - - time docker pull mariadb:10.4 - - time docker pull $CI_REGISTRY_INDISCALE/caosdb/src/caosdb-deploy:$CAOSDB_TAG - EXEPATH=`pwd` CAOSDB_TAG=$CAOSDB_TAG docker-compose -f .docker/docker-compose.yml up -d - cd .docker - /bin/sh ./run.sh - cd .. - - docker logs docker_caosdb-server_1 &> ../caosdb_log.txt - - docker logs docker_sqldb_1 &> ../mariadb_log.txt + - docker logs docker_caosdb-server_1 &> caosdb_log.txt + - docker logs docker_sqldb_1 &> mariadb_log.txt - docker-compose -f .docker/docker-compose.yml down - rc=`cat .docker/result` - exit $rc dependencies: [cert] + needs: [cert] artifacts: paths: - caosdb_log.txt @@ -76,11 +76,10 @@ build-testenv: tags: [cached-dind] image: docker:18.09 stage: setup - only: - - schedules - - web + # Hint: do not use only here; the image needs always to be build since it + # contains the repo code + #only: script: - - df -h - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY # use here general latest or specific branch latest... - docker build @@ -97,6 +96,7 @@ cert: tags: [docker] stage: cert image: $CI_REGISTRY_IMAGE + needs: [build-testenv] artifacts: paths: - .docker/cert/ @@ -104,35 +104,43 @@ cert: script: - cd .docker - CAOSHOSTNAME=caosdb-server ./cert.sh + style: tags: [docker] stage: style image: $CI_REGISTRY_IMAGE + needs: [build-testenv] + script: + - make style + allow_failure: true + +linting: + tags: [docker] + stage: style + image: $CI_REGISTRY_IMAGE + needs: [build-testenv] script: - # For the moment, ignore type comparisons in datamodel_problems.py - - autopep8 -ar --diff --exit-code --exclude ./src/caosadvancedtools/datamodel_problems.py . - - autopep8 -ar --diff --exit-code --ignore E721 ./src/caosadvancedtools/datamodel_problems.py + - make lint allow_failure: true unittest: tags: [docker] stage: unittest image: $CI_REGISTRY_IMAGE + needs: [build-testenv] script: - python3 -c "import caosdb; print('CaosDB Version:', caosdb.__version__)" - tox # Build the sphinx documentation and make it ready for deployment by Gitlab Pages -# documentation: -# stage: deploy - # Special job for serving a static website. See https://docs.gitlab.com/ee/ci/yaml/README.html#pages -pages: - stage: deploy - image: $CI_REGISTRY_IMAGE +pages_prepare: &pages_prepare tags: [docker] + image: $CI_REGISTRY_IMAGE + stage: deploy only: - - dev + refs: + - /^release-.*$/ script: - echo "Deploying" - make doc @@ -140,3 +148,9 @@ pages: artifacts: paths: - public +pages: + <<: *pages_prepare + only: + refs: + # version tags: v0.1.1 + - /^v(\d+\.\d+\.\d+)$/ diff --git a/.gitlab/merge_request_templates/Default.md b/.gitlab/merge_request_templates/Default.md deleted file mode 100644 index 77a95da1cc40c815e4952a1283d345af56e80461..0000000000000000000000000000000000000000 --- a/.gitlab/merge_request_templates/Default.md +++ /dev/null @@ -1,49 +0,0 @@ -# Summary - - Insert a meaningful description for this merge request here. What is the - new/changed behavior? Which bug has been fixed? Are there related Issues? - -# Focus - - Point the reviewer to the core of the code change. Where should they start - reading? What should they focus on (e.g. security, performance, - maintainability, user-friendliness, compliance with the specs, finding more - corner cases, concrete questions)? - -# Test Environment - - How to set up a test environment for manual testing? - -# Check List for the Author - -Please, prepare your MR for a review. Be sure to write a summary and a -focus and create gitlab comments for the reviewer. They should guide the -reviewer through the changes, explain your changes and also point out open -questions. For further good practices have a look at [our review -guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md) - -- [ ] All automated tests pass -- [ ] Reference related Issues -- [ ] Up-to-date CHANGELOG.md -- [ ] Annotations in code (Gitlab comments) - - Intent of new code - - Problems with old code - - Why this implementation? - - -# Check List for the Reviewer - - -- [ ] I understand the intent of this MR -- [ ] All automated tests pass -- [ ] Up-to-date CHANGELOG.md -- [ ] The test environment setup works and the intended behavior is - reproducible in the test environment -- [ ] In-code documentation and comments are up-to-date. -- [ ] Check: Are there spezifications? Are they satisfied? - -For further good practices have a look at [our review guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md). - - -/assign me -/target_branch dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 337e9265e4e291674a906b06d3ec79fc5e0dac1c..be44a47d1a0c79c8a4fa39f382d4d3a0e22439f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,27 +8,105 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### -* `send_mail` function in `caosadvancedtools.serverside.helper` module +### Changed ### + +### Deprecated ### + +### Removed ### + +### Fixed ### + +### Security ### + +## [0.4.1] - 2022-05-03 ## +(Henrik tom Wörden) + +### Changed ### + +- `JsonSchemaParser` now identifies `name` properties in the schema with the + CaosDB name property. + +### Fixed ### + +- [#40](https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/40) + `assure_object_is_in_list` now handles adding objects to an initially empty list correctly. + +## [0.4.0] - 2022-04-05 ## + +### Added ### + +- CFood that creates a Record for each line in a csv file +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting [EXPERIMENTAL] +- **EXPERIMENTAL:** Models parser can import from Json Schema files now: + `models.parser.parse_model_from_json_schema(...)`. See the documentation of + `models.parser.JsonSchemaParser` for the limitations of the current + implementation. +- New keyword "role" in yaml data model that allows creation of Records and Files. +- It is now possible to set values of properties and default values of properties + directly in the yaml model. + +### Changed ### + +- `TableConverter` now converts int to float and vice versa to match the desired dtype. + +### Deprecated ### + +### Removed ### + +### Fixed ### + +- CaosDB internal properties `name`, `unit` and `description` can now be used via the `extern` + keyword in YAML datamodel specifications. + +### Security ### + +## [0.3.1] - 2021-12-06 ## + +### Added ### +- `check_reference_field` function to check whether entities with provided ids + exits (for example when importing data from a table) +- added the `datatypes` argument to `TableImporter` for columns that do not + need a special conversion function + +## [0.3.0] - 2021-11-02 ## + +### Added ### + +- Error handling for missing files when using the crawler +- included the scifolder module +- included the caosmodels module +- `send_mail` function in `caosadvancedtools.serverside.helper` module - New class to collect possible problems with the data model - New class for checking and importing tables - Function to get a file path to a shared resource directory -- Function to setup logging appropriate for server side scripts with webui +- Function to setup logging appropriate for server side scripts with webui output - New class for collecting information for exporting tables, e.g., to metadata repositories - new name parsing - new test for software folder structure - new assure_name_is function -- two utility functions when working with files: NameCollector and +- two utility functions when working with files: NameCollector and get_file_via_download - Automated documentation builds: `make doc` - Crawler documentation +- Proof-of-concept integration with Bloxberg. +- Introduce a cfood that can create a Record structure based on the contents of a hdf5 file + h5py is now an optional dependency +- table importer implementations for csv and tsv +- string-in-list check for table imports +- AbtractCFood has new property, `update_flags`. ### Changed ### -* `caosadvancedtools.serverside.helper.init_data_model` also checks the role +- identifiables of single CFoods are now treated one after the other. This + allows them to have dependencies among each other if they are ordered + correctly +- identifiables must have at least one property or a name +- `caosadvancedtools.serverside.helper.init_data_model` also checks the role and data type of entities. -* The `caosadvancedtools.table_importer.date_converter` now actually returns +- The `caosadvancedtools.table_importer.date_converter` now actually returns `datetime.date` instance. A new `caosadvancedtools.table_importer.datetime_converter` replaces the old `date_converter` and returns a `datetime.datetime` instance. @@ -37,18 +115,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - instead of `get_entity`, type-specific functions are used in `cfood.py` when the type of the entity in question is known. - Logger is used instead of `print` for errors in `crawler.py`. - -### Deprecated ### - -### Removed ### +- complies with new exception handling, i.e., TransactionErros with + children being raised in all cases of EntityErrors (see + [#32](https://gitlab.com/caosdb/caosdb-pylib/-/issues/32) in + caosdb-pylib) +- `caosadvancedtools.cfood.assure_object_is_in_list` conducts in-place + updates if no `to_be_updated` object is supplied. ### Fixed ### - An exception in collect_information does no longer lead to a break down. -* Fixed an issue where `caosadvancedtools.cache.UpdateCache` would +- Removed dependency on discontiued xlrd version +- Fixed an issue where `caosadvancedtools.cache.UpdateCache` would cause an `sqlite3.IntegrityError` if more than one change was cached for the same entity. - -### Security ### +- #40 Insertion of identifiables with missing obligatory properties +- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation + of a RecordType. This is fixed now. +- #52 `XLSimporter.read_xls` throwed a wrong error when reading from a file with a wrong ending. + Now, a `DataInconsistencyError` is raised instead of a ValueError. +- List properties are no longer updated unnecessarily by the crawler. ## [0.2.0] - 2020-05-28 ## diff --git a/FEATURES.md b/FEATURES.md new file mode 100644 index 0000000000000000000000000000000000000000..44b2a5de7b1ff48da8e190a8b0f9a50ef58733cb --- /dev/null +++ b/FEATURES.md @@ -0,0 +1,13 @@ +# Features + +## Stable +To be filled. + +## Experimental + +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting +- Models parser can import from Json Schema files: + `models.parser.parse_model_from_json_schema(...)`. See the documentation of + `models.parser.JsonSchemaParser` for the limitations of the current + implementation. diff --git a/Makefile b/Makefile index cbac0ea0a77e5523529ef181d83ffb9738d72faf..d9b182cbd0b17490e9d81b900d6ba8cefadb1b64 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ # This Makefile is a wrapper for several other scripts. -.PHONY: help doc install +.PHONY: help doc install unittest help: @echo 'Type `make doc` for documentation, or `make install` for (local) installation.' @@ -30,4 +30,16 @@ doc: $(MAKE) -C src/doc html install: - @echo "Not implemented yet, use pip for installation." + pip3 install . + +unittest: + pytest-3 unittests + +style: + pycodestyle --count src unittests --exclude=swagger_client + autopep8 -ar --diff --exit-code --exclude swagger_client . +.PHONY: style + +lint: + pylint --unsafe-load-any-extension=y -d all -e E,F --ignore=swagger_client src/caosadvancedtools +.PHONY: lint diff --git a/README.md b/README.md index 5208a711f72a3daa919e9195a5a0b05413e3de3a..83a767476286acba98d113b8fa7ab6b482751230 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,57 @@ -[](https://gitlab.com/caosdb/caosdb-advanced-user-tools/commits/master) +# README -Project migrated to https://gitlab.com/caosdb - -# Welcome +## Welcome This is the **CaosDB Advanced User Tools** repository and a part of the -CaosDB project. This project contains tools that are beyond the typical use of +CaosDB project. +This project contains tools that are beyond the typical use of the CaosDB python client. Especially, this includes the crawler which will typically be used by a data curator. -# Setup +## Setup Please read the [README_SETUP.md](README_SETUP.md) for instructions on how to setup this code. +## Further Reading + +Please refer to the [official +documentation](https://docs.indiscale.com/caosdb-advanced-user-tools/) for more +information. + +## Contributing + +Thank you very much to all contributers—[past, +present](https://gitlab.com/caosdb/caosdb/-/blob/dev/HUMANS.md), and prospective +ones. -# Further Reading +### Code of Conduct -Please refer to the [official gitlab repository of the CaosDB -project](https://gitlab.com/caosdb/caosdb) for more information. +By participating, you are expected to uphold our [Code of +Conduct](https://gitlab.com/caosdb/caosdb/-/blob/dev/CODE_OF_CONDUCT.md). -# License +### How to Contribute -Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute for -Dynamics and Self-Organization Göttingen. +- You found a bug, have a question, or want to request a feature? Please [create + an issue](https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues). +- You want to contribute code? Please fork the repository and create a merge + request in GitLab and choose this repository as target. Make sure to select + "Allow commits from members who can merge the target branch" under + Contribution when creating the merge request. This allows our team to work + with you on your request. +- If you have a suggestion for the + [documentation](https://docs.indiscale.com/caosdb-advanced-user-tools/), the + preferred way is also a merge request as describe above (the documentation + resides in `src/doc`). However, you can also create an issue for it. +- You can also contact us at **info (AT) caosdb.de** and join the CaosDB + community on + [#caosdb:matrix.org](https://matrix.to/#/!unwwlTfOznjEnMMXxf:matrix.org). + +## License + +* Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute + for Dynamics and Self-Organization Göttingen. +* Copyright (C) 2020-2021 Indiscale GmbH <info@indiscale.com> All files in this repository are licensed under a [GNU Affero General Public License](LICENCE.md) (version 3 or later). - diff --git a/README_SETUP.md b/README_SETUP.md index b9db16a9feba246aeae8e59574047ba0f9380a38..43047d554afbe8ffba11aef67b20dde44d29bdcf 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -8,23 +8,28 @@ git clone 'https://gitlab.com/caosdb/caosdb-advanced-user-tools' ``` ## Dependencies -Dependencies will be installed automatically if you use the below described procedure. -- `caosdb>=0.4.0` +Dependencies will be installed automatically if you use the below described +procedure. +- `caosdb>=0.6.0` - `openpyxl>=3.0.0` - `xlrd>=1.2.0` +- `pandas>=1.2.0` +- `numpy>=1.17.3` + +If you want to use the optional h5-crawler the following dependencies will be +installed additionally: +- `h5py>=3.3.0` For testing: -- `tox` -- `scifolder`from https://gitlab.com/henrik_indiscale/scifolder +- `tox` ## Installation - `pip install . --user` - `pip install tox --user` -In order to run the tests you need to install the [scifolder -package](https://gitlab.com/henrik_indiscale/scifolder) by Henrik tom -Wörden. +Optional h5-crawler: +- `pip install .[h5-crawler] --user` ## Run Unit Tests `tox` @@ -32,17 +37,19 @@ Wörden. ## Run Integration Tests Locally 1. Change directory to `integrationtests/`. -2. Mount `extroot` to the folder that will be used as - extroot. E.g. `sudo mount -o bind extroot - ../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path - the extroot of the empty profile to be used is located at). -3. Start an empty (!) CaosDB instance (with the mounted extroot). The - database will be cleared during testing, so it's important to use - an empty insctance. -4. Run `test.sh`. +2. Mount `extroot` to the folder that will be used as extroot. E.g. `sudo mount + -o bind extroot ../../caosdb-deploy/profiles/debug/paths/extroot` (or + whatever path the extroot of the empty profile to be used is located at). +3. Start (or restart) an empty (!) CaosDB instance (with the mounted + extroot). The database will be cleared during testing, so it's important to + use an empty instance. Make sure your configuration for the python caosdb + module is correct and allows to connect to the server. +4. Run `test.sh`. Note that this may modify content of the + `integrationtest/extroot/` directory. ## Code Formatting -`autopep8 -i -r ./` + +`make style` ## Documentation # diff --git a/RELEASE_GUIDELINES.md b/RELEASE_GUIDELINES.md new file mode 100644 index 0000000000000000000000000000000000000000..7592b02d8084d3a5e6419ae66b61331026f2766c --- /dev/null +++ b/RELEASE_GUIDELINES.md @@ -0,0 +1,43 @@ +# Release Guidelines for the CaosDB Python Client Library + +This document specifies release guidelines in addition to the general release +guidelines of the CaosDB Project +([RELEASE_GUIDELINES.md](https://gitlab.com/caosdb/caosdb/blob/dev/RELEASE_GUIDELINES.md)) + +## General Prerequisites + +* All tests are passing. +* FEATURES.md is up-to-date and a public API is being declared in that document. +* CHANGELOG.md is up-to-date. +* dependencies in `setup.py` are up-to-date. + +## Steps + +1. Create a release branch from the dev branch. This prevents further changes + to the code base and a never ending release process. Naming: `release-<VERSION>` + +2. Update CHANGELOG.md + +3. Check all general prerequisites. + +4. Update the version: + - `version` variables in `src/doc/conf.py` + - Version on [setup.py](./setup.py): Check the `MAJOR`, `MINOR`, `MICRO`, `PRE` variables and set + `ISRELEASED` to `True`. Use the possibility to issue pre-release versions for testing. + +5. Merge the release branch into the main branch. + +6. Tag the latest commit of the main branch with `v<VERSION>`. + +7. Delete the release branch. + +8. Remove possibly existing `./dist` directory with old release. + +9. Publish the release by executing `./release.sh` with uploads the caosdb + module to the Python Package Index [pypi.org](https://pypi.org). + +10. Merge the main branch back into the dev branch. + +11. After the merge of main to dev, start a new development version by + setting `ISRELEASED` to `False` and by increasing at least the `MICRO` + version in [setup.py](./setup.py) and preparing CHANGELOG.md. diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index e4bf311e6700448aab0ebf1a5ab72bad6bf1296e..defed2cb4f5fb0a0f349898e555c5d25924e2f9b 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -32,8 +32,11 @@ import caosdb as db from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.guard import INSERT, UPDATE -from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood, SoftwareCFood) +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood, + SoftwareCFood, ResultTableCFood) + +from example_hdf5cfood import ExampleH5CFood try: from sss_helper import get_argument_parser, print_success @@ -42,7 +45,7 @@ except ModuleNotFoundError: return argparse.ArgumentParser() def print_success(text): - print("Success: "+text) + print("Success: " + text) def get_parser(): @@ -88,6 +91,8 @@ if __name__ == "__main__": interactive=False, hideKnown=False, cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood, PublicationCFood, SimulationCFood, + ResultTableCFood, + ExampleH5CFood ]) if args.authorize_run: diff --git a/integrationtests/create_analysis.py b/integrationtests/create_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7aa0d2d6671f14a3c65cf5ed135dfecb0aa69c --- /dev/null +++ b/integrationtests/create_analysis.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +A small script that creates an Analysis Record that can be used for testing the +automated analysis pipeline. +""" + +import sys +from datetime import datetime + +import caosdb as db + + +def main(): + script = db.File( + file="../src/caosadvancedtools/serverside/examples/example_script.py", + path=("AutomatedAnalysis/scripts/" + + str(datetime.now())+"example_script.py"), + ) + script.insert() + + da = db.Record() + da.add_parent("Analysis") + da.add_property("scripts", value=[script], datatype=db.LIST(db.FILE)) + da.add_property("sources", + value=db.execute_query( + "FIND FILE which is stored at '**/timeseries.npy'", + unique=True), + ) + da.add_property("date", "2020-01-01") + da.add_property("identifier", "TEST") + only = db.execute_query( + "FIND RECORD Person WITH firstname=Only", + unique=True) + only.add_property(db.Property("Email").retrieve().id, "only@example.com") + only.update() + da.add_property("responsible", only) + da.insert() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/integrationtests/example_hdf5cfood.py b/integrationtests/example_hdf5cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..5485402d2042b2055a087b99abcba409095a7c70 --- /dev/null +++ b/integrationtests/example_hdf5cfood.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +An exemplary definition of a HDF5 CFood for integration testing +""" + +import caosdb as db +from caosadvancedtools.cfoods.h5 import H5CFood +from caosadvancedtools.scifolder import ExperimentCFood +from caosadvancedtools.scifolder.generic_pattern import readme_pattern + + +class ExampleH5CFood(H5CFood): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.root_name = "ExampleH5" + + @staticmethod + def get_re(): + return ExperimentCFood.get_re()[:-len(readme_pattern)] + r".*\.hdf5" + + def create_identifiables(self): + super().create_identifiables() + self.identifiable_root = db.Record() + self.identifiable_root.add_property("hdf5File", self.crawled_file) + self.identifiable_root.add_parent("ExampleH5") + self.identifiables.append(self.identifiable_root) + + def special_treatment(self, key, value, dtype): + if key == "attr_data_root": + return "single_attribute", value, dtype + + return key, value, dtype diff --git a/integrationtests/example_script.py b/integrationtests/example_script.py new file mode 120000 index 0000000000000000000000000000000000000000..f6e9b498ff97638cb4105e019424c0c677a7f414 --- /dev/null +++ b/integrationtests/example_script.py @@ -0,0 +1 @@ +../src/caosadvancedtools/serverside/examples/example_script.py \ No newline at end of file diff --git a/integrationtests/extroot/.cerate_dir b/integrationtests/extroot/.create_dir similarity index 100% rename from integrationtests/extroot/.cerate_dir rename to integrationtests/extroot/.create_dir diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py +++ b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf and b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf differ diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e29553fe01c8706e15a042e5ac6f85ed1a2cc8ce 100644 --- a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat @@ -0,0 +1 @@ +datadatadata diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf Binary files /dev/null and b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 differ diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7de3bd15d29b93085322250a06adb9b8f389f8e4 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md @@ -0,0 +1,5 @@ +--- +responsible: +- Tom Wood +description: Something. +... diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf and b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf differ diff --git a/integrationtests/extroot/README.md b/integrationtests/extroot/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4d45129ba23fffc825b2631e0eaa39f3d048427d --- /dev/null +++ b/integrationtests/extroot/README.md @@ -0,0 +1,3 @@ +This directory is mounted into the LinkAhead docker container, to allow the +inclusion of external file systems. For production use, please set the +`paths:extroot` option in the profile. diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..18da9b18cda23d411d0f2666629377dd7991ac8f 100644 Binary files a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy and b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy differ diff --git a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md index d844a2ddf0d87d303c69b9107a366f2e34b6d03c..2057703d18dad94127037e05b3180603e9e37380 100644 --- a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md +++ b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md @@ -1,6 +1,6 @@ --- responsible: Responsible, Only -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py +++ b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md index a47ea6e105c20d050ddf2fdc8cd29d4685ba30bf..bd57ffe2c43fe6406672db2dd18902b8269569d4 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible MPI DS -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md index 97b7137af372c127ee01458c9844b5ff10fd464b..b55907aaa2bb3794dbe04484c025146c3c7cd101 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md @@ -2,7 +2,7 @@ responsible: - Some Responsible - Responsible, No, MPI DS -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh index 98d22347bd2d40e8384a2a217452fd3ba5bc445f..9f55365eb595537b43caa9b197c8bc31ea1e69cb 100755 --- a/integrationtests/filldb.sh +++ b/integrationtests/filldb.sh @@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software python3 insert_model.py +python3 insert_some.py python3 crawl.py / diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py index 2289f72e83545db0e7eacedfa52868507b6c4760..26bf478cdf0d3709e7c0c086fecf722b8c7f90fa 100755 --- a/integrationtests/insert_model.py +++ b/integrationtests/insert_model.py @@ -1,11 +1,33 @@ #!/usr/bin/env python3 import caosdb as db -from caosmodels.parser import parse_model_from_yaml +import h5py +from caosadvancedtools.cfoods.h5 import H5CFood +from caosadvancedtools.models.data_model import DataModel +from caosadvancedtools.models.parser import parse_model_from_yaml -model = parse_model_from_yaml("model.yml") -model.sync_data_model(noquestion=True) -if len(db.execute_query("FIND Property alias")) == 0: - al = db.Property(name="alias") - al.add_parent(name="name") - al.insert() +def main(): + + # for testing existing data model parts with the YAML Interface + db.RecordType(name="TestRT1", datatype=db.TEXT).insert() + db.Property(name="TestP1", datatype=db.TEXT).insert() + + model = parse_model_from_yaml("model.yml") + model.sync_data_model(noquestion=True) + + if len(db.execute_query("FIND Property alias")) == 0: + al = db.Property(name="alias") + al.add_parent(name="name") + al.insert() + + h5model = db.Container() + h5file = h5py.File( + 'extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r') + H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model, + root_name="ExampleH5") + h5model = DataModel(h5model) + h5model.sync_data_model(noquestion=True) + + +if __name__ == "__main__": + main() diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py new file mode 100644 index 0000000000000000000000000000000000000000..cf16a45ddf1f95ed261af1d9f18edfa1cbf4b450 --- /dev/null +++ b/integrationtests/insert_some.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import caosdb as db +from caosadvancedtools.scifolder.experiment_cfood import dm + +# This inserts two identifiables. When no dependencies are possible among +# identifiables, it should not be possible to find both: the experiment +# identifiable would for example not reference the correct project Record +project = db.Record(name='2010_TestProject') +project.add_parent(name=dm.Project) +project.insert() + +pers = db.Record() +pers.add_parent("Person") +pers.add_property("lastname", "Wood") +pers.add_property("firstname", "Tom") +pers.insert() + +experiment = db.Record() +experiment.add_parent(name=dm.Experiment) +experiment.description = "Something." +experiment.add_property( + name=dm.date, value='2019-02-04') +experiment.add_property(name=dm.Project, value=project) +experiment.add_property( + name="identifier", value="empty_identifier") +experiment.add_property( + name="responsible", value=pers) +experiment.insert(flags={"force-missing-obligatory": "ignore"}) diff --git a/integrationtests/model.yml b/integrationtests/model.yml index 0a4ad381bfc119dd65d2c192f8de823deda525ae..9f7a62d1d0befbc7225353380c79db2f368c969c 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -9,6 +9,7 @@ Experiment: # TODO empty recommended_properties is a problem #recommended_properties: responsible: + datatype: LIST<Person> Project: SoftwareVersion: recommended_properties: @@ -18,6 +19,14 @@ SoftwareVersion: binaries: sourceCode: Software: +DepthTest: + obligatory_properties: + temperature: + datatype: DOUBLE + description: 'temp' + depth: + datatype: DOUBLE + description: 'temp' Person: obligatory_properties: firstName: @@ -30,16 +39,16 @@ Person: email: datatype: TEXT description: 'Email of a Person.' -responsible: - datatype: REFERENCE revisionOf: datatype: REFERENCE results: - datatype: REFERENCE + datatype: LIST<REFERENCE> sources: - datatype: REFERENCE + datatype: LIST<REFERENCE> scripts: - datatype: REFERENCE + datatype: LIST<REFERENCE> +single_attribute: + datatype: LIST<INTEGER> Simulation: obligatory_properties: date: @@ -50,6 +59,9 @@ Analysis: date: identifier: responsible: + suggested_properties: + mean_value: + datatype: DOUBLE Publication: Thesis: inherit_from_suggested: @@ -66,3 +78,8 @@ Presentation: Report: inherit_from_suggested: - Publication +hdf5File: + datatype: REFERENCE +extern: + - TestRT1 + - TestP1 diff --git a/integrationtests/test.sh b/integrationtests/test.sh index 51a05356cadcafba9efdb5cdde3807476548e275..700d88160b08652b0c5257d8ba819e277edb2971 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -1,50 +1,82 @@ #!/bin/bash +if [ "$1" != "--force" ] +then + echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)" + read safety + if [ -z $safety ] + then + echo "Exiting..." + exit 0 + elif [ $safety != "yes" ] + then + echo "Exiting..." + exit 0 + fi +fi OUT=/tmp/crawler.output ls cat pycaosdb.ini python3 -c "import caosdb; print('CaosDB Version:', caosdb.__version__)" rm -rf cache.db +set -e echo "Clearing database" python3 clear_database.py echo "Testing crawler without cfoods" python3 -m pytest test_crawler_basics.py echo "Testing caching" python3 -m pytest test_cache.py +echo "Testing models" +python3 -m pytest test_data_model.py +echo "Testing cfood functionality" +python3 -m pytest test_assure_functions.py echo "Filling the database" ./filldb.sh echo "Testing the crawler database" python3 -m pytest test_crawler_with_cfoods.py echo "make a change" -pushd extroot -egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' -popd +cd extroot +egrep -liRZ 'A description of another example' . \ + | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' +# remove a file to check that this does not lead to a crawler crash +mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx \ + DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back +cd .. echo "run crawler" ./crawl.py / | tee $OUT +# rename the moved file +mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back \ + extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx # check whether there was something UNAUTHORIZED -cat $OUT -set -e grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') echo $RUN_ID echo "run crawler again" echo "./crawl.py -a $RUN_ID /" -./crawl.py -a $RUN_ID / > $OUT +./crawl.py -a $RUN_ID / | tee $OUT set +e if grep "There where unauthorized changes" $OUT then exit 1 fi set -e -echo "undo changes" -pushd extroot -egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' -popd +echo "Undoing previous changes to extroot content..." +cd extroot +egrep -liRZ 'A description of this example' . \ + | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' +cd .. +echo "Done." python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" python3 test_im_und_export.py + +# automated analysis +# for some reason the loadFiles of sim data has to be called again +python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData +python3 create_analysis.py + # Better safe than sorry: python3 clear_database.py @@ -56,5 +88,11 @@ python3 -m pytest test_crawl_with_datamodel_problems.py echo "Testing table export" python3 -m pytest test_base_table_exporter_integration.py +echo "Testing json-schema datamodel parser" +python3 -m pytest test_json_schema_datamodel_parser.py + +echo "Testing yaml datamodel parser" +python3 -m pytest test_yaml_parser.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_assure_functions.py b/integrationtests/test_assure_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..b1c731dbbf25f33b54fc3a005402f292525d2d05 --- /dev/null +++ b/integrationtests/test_assure_functions.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 University Medical Center Göttingen, Institute for Medical Informatics +# Copyright (C) 2021 Florian Spreckelsen <florian.spreckelsen@med.uni-goettingen.de> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +"""Integration tests for the `assure_...` functions from +`caosadvancedtools.cfood`. They mainly test the in-place updates when +no `to_be_updated` is specified. + +""" +import caosdb as db + +from caosadvancedtools.cfood import (assure_object_is_in_list) +from caosadvancedtools.guard import (global_guard, RETRIEVE, UPDATE) + + +def setup(): + """Delete all test entities.""" + db.execute_query("FIND Test*").delete(raise_exception_on_error=False) + + +def setup_module(): + """Allow all updates and delete test data""" + global_guard.level = UPDATE + setup() + + +def teardown_module(): + """Reset guard level and delete test data.""" + global_guard.level = RETRIEVE + setup() + + +def test_assure_list_in_place(): + """Test an in-place update with `assure_object_is_in_list`.""" + int_list_prop = db.Property(name="TestIntListProperty", + datatype=db.LIST(db.INTEGER)).insert() + rt1 = db.RecordType(name="TestType1").add_property( + name=int_list_prop.name).insert() + rec1 = db.Record(name="TestRecord1").add_parent(rt1) + rec1.add_property(name=int_list_prop.name, value=[1]).insert() + + # Nothing should happen: + assure_object_is_in_list(1, rec1, int_list_prop.name, to_be_updated=None) + assert len(rec1.get_property(int_list_prop.name).value) == 1 + assert 1 in rec1.get_property(int_list_prop.name).value + + # Insertion should happen in-place + assure_object_is_in_list(2, rec1, int_list_prop.name, to_be_updated=None) + assert len(rec1.get_property(int_list_prop.name).value) == 2 + assert 2 in rec1.get_property(int_list_prop.name).value + + # Better safe than sorry -- test for reference properties, too. + ref_rt = db.RecordType(name="TestRefType").insert() + ref_rec1 = db.Record(name="TestRefRec1").add_parent(ref_rt).insert() + ref_rec2 = db.Record(name="TestRefRec2").add_parent(ref_rt).insert() + ref_rec3 = db.Record(name="TestRefRec3").add_parent(ref_rt).insert() + rt2 = db.RecordType(name="TestType2").add_property( + name=ref_rt.name, datatype=db.LIST(ref_rt.name)).insert() + rec2 = db.Record(name="TestRecord2").add_parent(rt2) + rec2.add_property(name=ref_rt.name, value=[ref_rec1], + datatype=db.LIST(ref_rt.name)).insert() + + # Again, nothing should happen + assure_object_is_in_list(ref_rec1, rec2, ref_rt.name, to_be_updated=None) + assert len(rec2.get_property(ref_rt.name).value) == 1 + assert ref_rec1.id in rec2.get_property(ref_rt.name).value + + # In-place update with two additional references + assure_object_is_in_list([ref_rec2, ref_rec3], + rec2, ref_rt.name, to_be_updated=None) + assert len(rec2.get_property(ref_rt.name).value) == 3 + assert ref_rec2.id in rec2.get_property(ref_rt.name).value + assert ref_rec3.id in rec2.get_property(ref_rt.name).value + + +def test_add_to_empty_list(): + """See https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/40.""" + # @author Florian Spreckelsen + # @date 2022-04-19 + referenced_rt = db.RecordType(name="TestReferencedType").insert() + list_prop = db.Property(name="TestListProp", + datatype=db.LIST(referenced_rt)).insert() + referencing_rt = db.RecordType( + name="TestReferencingType").add_property(list_prop).insert() + + db.Record(name="TestReferencedRecord").add_parent(referenced_rt).insert() + db.Record(name="TestReferencingRecord").add_parent( + referencing_rt).add_property(list_prop, value=[]).insert() + + referenced_rec = db.execute_query("FIND TestReferencedRecord", unique=True) + referencing_rec = db.execute_query( + "FIND TestReferencingRecord", unique=True) + + assure_object_is_in_list(referenced_rec, referencing_rec, list_prop.name) + + referencing_rec = db.execute_query( + "FIND TestReferencingRecord", unique=True) + assert referencing_rec.get_property(list_prop.name).value == [ + referenced_rec.id] diff --git a/integrationtests/test_base_table_exporter_integration.py b/integrationtests/test_base_table_exporter_integration.py index 1c9158bd1d9600884571957d4916939f82c1a9ca..9d79e857fe706d78103ade3b92ee38498a2a1607 100644 --- a/integrationtests/test_base_table_exporter_integration.py +++ b/integrationtests/test_base_table_exporter_integration.py @@ -23,6 +23,7 @@ # ** end header # import caosdb as db +import pytest from caosadvancedtools import table_export as te @@ -85,8 +86,11 @@ def setup_module(): pass +@pytest.fixture(autouse=True) def setup(): - """No further setup""" + """Same as module setup.""" + setup_module() + yield None setup_module() diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py index 3089bf4ce60093206e42477d740ead5854a9debc..0c6a145afdab682f82af09a17fb9aa0770769959 100644 --- a/integrationtests/test_crawl_with_datamodel_problems.py +++ b/integrationtests/test_crawl_with_datamodel_problems.py @@ -20,19 +20,22 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # # ** end header -"""Test whether the crawler correctly identifies the data model -problems caused by a faulty model. + +"""Test whether the crawler correctly identifies the data model problems caused by a faulty model. """ + import caosdb as db from caosadvancedtools import loadFiles from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.datamodel_problems import DataModelProblems from caosadvancedtools.guard import INSERT -from caosmodels.parser import parse_model_from_yaml -from scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, - SimulationCFood) +from caosadvancedtools.models.parser import parse_model_from_yaml +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood) + +from insert_model import main as insert_model def setup_module(): @@ -67,8 +70,7 @@ def test_crawler_with_data_model_problems(): prefix="", dryrun=False, forceAllowSymlinks=False) # load and damage the model - model = parse_model_from_yaml("model.yml") - model.sync_data_model(noquestion=True) + insert_model() deleted_entities = {"Experiment", "Poster", "results"} for ent in deleted_entities: @@ -89,5 +91,6 @@ def test_crawler_with_data_model_problems(): # There should be datamodel problems assert len(DataModelProblems.missing) > 0 + # Deleted entities should have been identified: - assert deleted_entities.issubset(DataModelProblems.missing) + assert DataModelProblems.missing.issubset(deleted_entities) diff --git a/integrationtests/test_crawler_basics.py b/integrationtests/test_crawler_basics.py index 85fca282c8546ad1e7f6a708a2eaf46e374a528f..7da90844f14cf0d1eaded9d4fc8f37320da46aad 100644 --- a/integrationtests/test_crawler_basics.py +++ b/integrationtests/test_crawler_basics.py @@ -65,6 +65,7 @@ class CrawlerTest(unittest.TestCase): self.rec2.add_parent(name="Test_Type_2") self.rec3 = db.Record() self.rec3.add_parent(name="Test_Type_3") + self.rec3.add_property(name="Test_Prop", value="Test") def test_check_existence(self): # This hasn't been inserted yet: @@ -92,6 +93,7 @@ class CrawlerTest(unittest.TestCase): old_id = id(identifiables[0]) reference_to_first = identifiables[0] assert reference_to_first is identifiables[0] + Crawler.find_or_insert_identifiables(identifiables) for el in identifiables: @@ -107,6 +109,7 @@ class CrawlerTest(unittest.TestCase): def tearDown(self): setup_module() + # Delete nameless entities for el in [self.rec1, self.rec2, self.rec3]: try: diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index c39c3fc67d7ca30e3d013ac205ef398de216ad9c..4efef87cef52e4a2a20a615afe210c32f52a276a 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -26,6 +26,7 @@ import os import unittest import caosdb as db +from caosdb.apiutils import retrieve_entity_with_id def get_entity_with_id(eid): @@ -34,6 +35,14 @@ def get_entity_with_id(eid): class CrawlerTest(unittest.TestCase): def test_experiment(self): + + ######################## + # # dummy for dependency test experiment # # + ######################## + exp = db.execute_query( + "FIND Experiment with date=2019-02-04 and identifier=empty_identifier", + unique=True) + ######################## # # first experiment # # ######################## @@ -57,6 +66,17 @@ class CrawlerTest(unittest.TestCase): datfile.description) assert os.path.basename(datfile.path) == "datafile.dat" + # There should be two DepthTest Properties + depthtests = exp.get_property("DepthTest") + assert depthtests is not None + assert len(depthtests.value) == 2 + depthtest = db.Record(id=depthtests.value[0]) + depthtest.retrieve() + assert "DepthTest" in [p.name for p in depthtest.get_parents()] + assert 234.4 == depthtest.get_property("temperature").value + assert "°C" == depthtest.get_property("temperature").unit + assert 3.0 == depthtest.get_property("depth").value + # Should have a responsible person self.assertIsNotNone(exp.get_property("responsible")) person = db.Record(id=exp.get_property("responsible").value[0]) @@ -478,3 +498,17 @@ class CrawlerTest(unittest.TestCase): # Should have a description self.assertIsNotNone(ana.description) + + def test_exampleh5(self): + examp = db.execute_query("FIND Record ExampleH5", unique=True) + + for prop in examp.properties: + if prop.name == 'group_level1_a': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level2_aa") is not None) + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level1_a") is None) + elif prop.name == 'group_level1_b': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_b_floats") is not None) + elif prop.name == 'group_level1_c': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_c_floats") is not None) + elif prop.name == 'root_integers': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("single_attribute") is not None) diff --git a/integrationtests/test_data_model.py b/integrationtests/test_data_model.py new file mode 100644 index 0000000000000000000000000000000000000000..2949fa81727a6c61a8646a48c249204fa87542d8 --- /dev/null +++ b/integrationtests/test_data_model.py @@ -0,0 +1,63 @@ +import unittest + +import caosdb as db +from caosadvancedtools.models.data_model import DataModel + + +class DataModelTest(unittest.TestCase): + def test_creation(self): + # create RT and one property + dm = DataModel() + dm.append(db.RecordType(name="TestRecord")) + dm.append(db.Property(name="testproperty", datatype=db.INTEGER)) + + dm.sync_data_model(noquestion=True) + db.execute_query("FIND RECORDTYPE TestRecord", unique=True) + db.execute_query("FIND PROPERTY testproperty", unique=True) + + # add the property to the RT + dm = DataModel() + dm.extend([ + db.RecordType(name="TestRecord").add_property(name="testproperty"), + db.Property(name="testproperty", datatype=db.INTEGER)]) + dm.sync_data_model(noquestion=True) + rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) + assert rt.get_property("testproperty") is not None + + # replace the one property + dm = DataModel([ + db.RecordType(name="TestRecord").add_property(name="test"), + db.Property(name="test", datatype=db.INTEGER)]) + dm.sync_data_model(noquestion=True) + db.execute_query("FIND RECORDTYPE TestRecord", unique=True) + rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) + assert rt.get_property("test") is not None + + def test_missing(self): + # Test sync with missing prop + # insert propt + dm = DataModel([db.Property(name="testproperty", datatype=db.INTEGER)]) + dm.sync_data_model(noquestion=True) + # insert RT using the prop separatly + maintained = {"one": db.RecordType(name="TestRecord").add_property( + name="testproperty")} + dm = DataModel(maintained.values()) + dm.sync_data_model(noquestion=True) + rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) + assert rt.get_property("testproperty") is not None + + def test_get_existing_entities(self): + db.RecordType(name="TestRecord").insert() + c = db.Container().extend([ + db.Property(name="test"), + db.RecordType(name="TestRecord")]) + exist = DataModel.get_existing_entities(c) + assert len(exist) == 1 + assert exist[0].name == "TestRecord" + + def tearDown(self): + try: + tests = db.execute_query("FIND test*") + tests.delete() + except Exception: + pass diff --git a/integrationtests/test_datamodel.schema.json b/integrationtests/test_datamodel.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..356964702dd83a8c81edf1e8d72bf4a30468e6f2 --- /dev/null +++ b/integrationtests/test_datamodel.schema.json @@ -0,0 +1,85 @@ +[ + { + "title": "TestTypeWithAtomicProps", + "description": "RecordType with scalar atomic properties", + "type": "object", + "properties": { + "simple_text_prop": { "type": "string" }, + "int_prop_with_name": { "type": "integer", "title": "IntegerProperty" }, + "double_prop": { + "type": "number", + "description": "Some generic double-valued property" + }, + "bool_prop": { "type": "boolean" }, + "datetime_prop": { "type": "string", "format": "date-time" }, + "date_prop": { "type": "string", "format": "date" } + }, + "required": [ "simple_text_prop", "double_prop" ] + }, + { + "title": "TestTypeWithReferencesAndEnum", + "type": "object", + "properties": { + "TestTypeWithAtomicProps": {}, + "OtherReference": { + "type": "object", + "description": "Some generic refernced RecordType", + "properties": {} + }, + "named_refernce": { + "type": "object", + "title": "NamedReference", + "properties": { + "simple_text_prop": {} + } + }, + "string_enum": { + "type": "string", + "enum": [ "StringEnumA", "StringEnumB", "StringEnumC" ] + }, + "named_enum": { + "type": "string", + "title": "NamedEnum", + "enum": [ "NameA", "NameB", "NameC" ] + } + } + }, + { + "title": "TestTypeWithLists", + "type": "object", + "properties": { + "string_list": { + "type": "array", + "description": "A list of words", + "items": { "type": "string" } + }, + "named_int_list": { + "type": "array", + "title": "NamedIntList", + "items": { "type": "integer" } + }, + "ListRecordType": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "NamedReferenceList": { + "type": "array", + "items": { + "title": "ReferencedListTypeWithName", + "type": "object", + "description": "Referenced by a named list-of-references property", + "properties": { + "double_prop": {} + } + } + }, + "ListNumberEnum": { + "type": "array", + "items": { + "type": "number", + "enum": [ 1.1, 2.2, 3.3 ] + } + } + } + } +] diff --git a/integrationtests/test_datamodel_problems.py b/integrationtests/test_datamodel_problems.py index d9af69c79007bae5212f16d18363ff18b4ba9d32..3bca302dd2a337cee7fd023ee6a64c5185bc99f5 100644 --- a/integrationtests/test_datamodel_problems.py +++ b/integrationtests/test_datamodel_problems.py @@ -30,7 +30,8 @@ class in crawler.py and cfood.py can be found in full-tests. import caosdb as db import pytest from caosadvancedtools.datamodel_problems import DataModelProblems -from caosdb.exceptions import (UnqualifiedParentsError, +from caosdb.exceptions import (TransactionError, + UnqualifiedParentsError, UnqualifiedPropertiesError) @@ -43,12 +44,15 @@ def setup_module(): print(delete_exc) +@pytest.fixture(autouse=True) def setup(): - """No further setup""" + """Same as module setup.""" + setup_module() + yield None setup_module() -def teardown(): +def teardown_module(): """Clear and delete again.""" setup_module() @@ -65,8 +69,9 @@ def test_missing_parent(): missing_name = "TestType" rec = db.Record(name="TestRecord") rec.add_parent(name=missing_name) - with pytest.raises(UnqualifiedParentsError): + with pytest.raises(TransactionError) as te: _insert_and_evaluate_exception(rec) + assert te.value.has_error(UnqualifiedParentsError) assert missing_name in DataModelProblems.missing @@ -74,8 +79,9 @@ def test_missing_property(): """Test if missing Property is in datamodel problems.""" missing_name = "TestProp" rec = db.Record(name="TestRecord").add_property(name=missing_name) - with pytest.raises(UnqualifiedPropertiesError): + with pytest.raises(TransactionError) as te: _insert_and_evaluate_exception(rec) + assert te.value.has_error(UnqualifiedPropertiesError) assert missing_name in DataModelProblems.missing @@ -89,8 +95,9 @@ def test_missing_property_existing_type(): db.RecordType(name=existing_rt).insert() rec = db.Record(name="TestRecord").add_parent(name=existing_rt) rec.add_property(name=missing_prop) - with pytest.raises(UnqualifiedPropertiesError): + with pytest.raises(TransactionError) as te: _insert_and_evaluate_exception(rec) + assert te.value.has_error(UnqualifiedPropertiesError) assert missing_prop in DataModelProblems.missing assert existing_rt not in DataModelProblems.missing @@ -107,7 +114,8 @@ def test_wrong_property_value(): prop = db.Property(name=prop_name, datatype=prop_dtype).insert() rec = db.Record(name="TestRecord").add_parent( name=rt_name).add_property(name=prop_name, value="bla") - with pytest.raises(UnqualifiedPropertiesError): + with pytest.raises(TransactionError) as te: _insert_and_evaluate_exception(rec) + assert te.value.has_error(UnqualifiedPropertiesError) # Should be empty assert not DataModelProblems.missing diff --git a/integrationtests/test_im_und_export.py b/integrationtests/test_im_und_export.py index 5c7584e6f98ee792789f144d89f13ef84a7467fc..8ea45fd2cebbcb2c3be6c8cb79805204486f7862 100644 --- a/integrationtests/test_im_und_export.py +++ b/integrationtests/test_im_und_export.py @@ -3,25 +3,21 @@ import os from tempfile import TemporaryDirectory import caosdb as db - -from caosadvancedtools.export_related import export +from caosadvancedtools.export_related import export_related_to from caosadvancedtools.import_from_xml import import_xml if __name__ == "__main__": print("Conducting im- and export tests") rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True) directory = TemporaryDirectory() - export(rec.id, directory=directory.name) + export_related_to(rec.id, directory=directory.name) # delete everything - rec = db.execute_query("FIND record which was inserted by me") - prop = db.execute_query("FIND property which was inserted by me") - rt = db.execute_query("FIND recordtype which was inserted by me") - fi = db.execute_query("FIND file which was inserted by me") - c = db.Container() - c.extend(rec+prop+rt+fi) - c.delete() + print("Clearing database") + recs = db.execute_query("FIND entity with id>99") + recs.delete() assert 0 == len(db.execute_query("FIND File which is stored at " "**/poster.pdf")) + print("Importing stored elements") import_xml(os.path.join(directory.name, "caosdb_data.xml"), interactive=False) # The following tests the existence of some required entities. @@ -32,3 +28,4 @@ if __name__ == "__main__": db.execute_query("FIND RecordType Person", unique=True) db.execute_query("FIND Record Person with firstname=Only", unique=True) db.execute_query("FIND File which is stored at **/poster.pdf", unique=True) + print("Found all required Records and Files.") diff --git a/integrationtests/test_json_schema_datamodel_parser.py b/integrationtests/test_json_schema_datamodel_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..21ae8d2d7bad5527a7a314220b38af8ff816475f --- /dev/null +++ b/integrationtests/test_json_schema_datamodel_parser.py @@ -0,0 +1,174 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import os + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_json_schema + + +def _clear_db(): + ents = db.execute_query("FIND ENTITY WITH ID>99") + if ents: + ents.delete() + + +def setup_module(): + _clear_db() + + +def teardown_module(): + _clear_db() + + +def _load_and_sync(fname): + """Load datamodel from json schema in fname and synchronize it without asking. + + """ + # @author Florian Spreckelsen + # @date 2022-03-23 + fpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), fname) + model = parse_model_from_json_schema(fpath) + model.sync_data_model(noquestion=True) + + +def test_json_parsed_datamodel(): + # @author Florian Spreckelsen + # @date 2022-03-23 + + _load_and_sync("test_datamodel.schema.json") + + # RecordType with atomic properties + rt1 = db.execute_query( + "FIND RECORDTYPE TestTypeWithAtomicProps", unique=True) + assert rt1.description == "RecordType with scalar atomic properties" + assert rt1.get_property("simple_text_prop") is not None + assert rt1.get_property("simple_text_prop").datatype == db.TEXT + assert rt1.get_importance("simple_text_prop") == db.OBLIGATORY + + assert rt1.get_property("IntegerProperty") is not None + assert rt1.get_property("IntegerProperty").datatype == db.INTEGER + assert rt1.get_importance("IntegerProperty") == db.RECOMMENDED + + assert rt1.get_property("double_prop") is not None + assert rt1.get_property("double_prop").datatype == db.DOUBLE + assert rt1.get_importance("double_prop") == db.OBLIGATORY + assert (db.Property(name="double_prop").retrieve().description == + "Some generic double-valued property") + + further_props = [ + ("bool_prop", db.BOOLEAN), + ("datetime_prop", db.DATETIME), + ("date_prop", db.DATETIME) + ] + for name, dtype in further_props: + assert rt1.get_property(name) is not None + assert rt1.get_property(name).datatype == dtype + assert rt1.get_importance(name) == db.RECOMMENDED + + # RecordType with references and enums + rt2 = db.execute_query( + "FIND RECORDTYPE TestTypeWithReferencesAndEnum", unique=True) + assert rt2.get_property(rt1.name) is not None + assert rt2.get_property(rt1.name).is_reference() + assert rt2.get_property(rt1.name).name == rt1.name + assert rt2.get_property(rt1.name).id == rt1.id + + other_ref_type = db.execute_query( + "FIND RECORDTYPE OtherReference", unique=True) + assert rt2.get_property(other_ref_type.name) is not None + assert rt2.get_property(other_ref_type.name).is_reference() + assert rt2.get_property(other_ref_type.name).name == other_ref_type.name + assert rt2.get_property(other_ref_type.name).id == other_ref_type.id + assert other_ref_type.description == "Some generic refernced RecordType" + assert len(other_ref_type.properties) == 0 + + named_ref_type = db.execute_query( + "FIND RECORDTYPE NamedReference", unique=True) + assert rt2.get_property(named_ref_type.name) is not None + assert rt2.get_property(named_ref_type.name).is_reference() + assert rt2.get_property(named_ref_type.name).name == named_ref_type.name + assert rt2.get_property(named_ref_type.name).id == named_ref_type.id + assert named_ref_type.get_property("simple_text_prop") is not None + assert (named_ref_type.get_property("simple_text_prop").id == + rt1.get_property("simple_text_prop").id) + assert (named_ref_type.get_property("simple_text_prop").datatype == + rt1.get_property("simple_text_prop").datatype) + + enums = { + "string_enum": ["StringEnumA", "StringEnumB", "StringEnumC"], + "NamedEnum": ["NameA", "NameB", "NameC"] + } + for enum_type_name, enum_names in enums.items(): + enum_type = db.execute_query( + f"FIND RECORDTYPE {enum_type_name}", unique=True) + assert len(enum_type.properties) == 0 + enum_records = db.execute_query(f"FIND RECORD {enum_type_name}") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt2.get_property(enum_type_name) is not None + assert rt2.get_property(enum_type_name).is_reference() + assert rt2.get_property(enum_type_name).name == enum_type.name + assert rt2.get_property(enum_type_name).id == enum_type.id + + # Recordtype with lists + rt3 = db.execute_query("FIND RECORDTYPE TestTypeWithLists", unique=True) + assert rt3.get_property("string_list") is not None + assert rt3.get_property("string_list").datatype == db.LIST(db.TEXT) + string_list_prop = db.Property(name="string_list").retrieve() + assert string_list_prop.description == "A list of words" + assert string_list_prop.datatype == db.LIST(db.TEXT) + assert string_list_prop.id == rt3.get_property("string_list").id + + assert rt3.get_property("NamedIntList") is not None + assert rt3.get_property("NamedIntList").datatype == db.LIST(db.INTEGER) + + # This is a list of a plain references to a specific type + list_rt = db.execute_query("FIND RECORDTYPE ListRecordType", unique=True) + assert len(list_rt.properties) == 0 + assert rt3.get_property(list_rt.name) is not None + assert rt3.get_property(list_rt.name).is_reference() + assert rt3.get_property(list_rt.name).datatype == db.LIST(list_rt) + assert rt3.get_property(list_rt.name).id == list_rt.id + + # This is a list property of its own, referencing another separate RT + referenced_list_rt = db.execute_query( + "FIND RECORDTYPE ReferencedListTypeWithName", unique=True) + assert referenced_list_rt.description == "Referenced by a named list-of-references property" + assert referenced_list_rt.get_property("double_prop") is not None + assert (referenced_list_rt.get_property("double_prop").id == + rt1.get_property("double_prop").id) + assert rt3.get_property("NamedReferenceList") is not None + assert rt3.get_property("NamedReferenceList").is_reference() + assert rt3.get_property( + "NamedReferenceList").datatype == db.LIST(referenced_list_rt) + assert rt3.get_property("NamedReferenceList").id != referenced_list_rt.id + + enum_type = db.execute_query("FIND RECORDTYPE ListNumberEnum", unique=True) + assert len(enum_type.properties) == 0 + enum_names = ["1.1", "2.2", "3.3"] + enum_records = db.execute_query("FIND RECORD ListNumberEnum") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt3.get_property(enum_type.name) is not None + assert rt3.get_property(enum_type.name).datatype == db.LIST(enum_type) + assert rt3.get_property(enum_type.name).id == enum_type.id diff --git a/integrationtests/test_yaml_parser.py b/integrationtests/test_yaml_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..e2a2c4c056ced56d2605d93914186c2cba97e137 --- /dev/null +++ b/integrationtests/test_yaml_parser.py @@ -0,0 +1,69 @@ +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_string + + +def _delete_everything(): + ents = db.execute_query("FIND ENTITY WITH ID > 99") + if ents: + ents.delete() + + +def setup_module(): + _delete_everything() + + +def teardown_module(): + _delete_everything() + + +def test_internal_props_in_extern(): + """Test adding the internal `name` property as a parent to an existing + property. + + """ + + model = """ +extern: +- name +- test_name +- description +- unit +test_name: + inherit_from_suggested: + - name + - description + - unit +""" + db.Property(name="test_name", datatype=db.TEXT).insert() + ents = parse_model_from_string(model) + ents.sync_data_model(noquestion=True) + + test_prop = db.Property(name="test_name").retrieve() + assert len(test_prop.parents) == 3 + desc_prop = db.Property(name="description").retrieve() + name_prop = db.Property(name="name").retrieve() + unit_prop = db.Property(name="unit").retrieve() + assert test_prop.has_parent(desc_prop) + assert test_prop.has_parent(name_prop) + assert test_prop.has_parent(unit_prop) diff --git a/integrationtests/update_analysis.py b/integrationtests/update_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..bd18ab375437bec02320dcfd269896c2ba7e2bb0 --- /dev/null +++ b/integrationtests/update_analysis.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""Example update script. An anlysis Record is retrieved and passed to the +generic run function which then calls the appropriate script based on the +Record. + +The simple query here could be replaced with something that e.g. retrieves all +entities that where changed within a certain period of time. + +""" + +import sys + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import run + + +def main(): + da = db.execute_query("FIND Analysis with identifier=TEST", unique=True) + run(da) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..625f83ce950841f7a239538123ef7b5812fc5c5f --- /dev/null +++ b/pylintrc @@ -0,0 +1,19 @@ +# -*- mode:conf; -*- + +[FORMAT] +# Good variable names which should always be accepted, separated by a comma +good-names=ii,rt,df + +[TYPECHECK] +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis +ignored-modules=etree,h5py,labfolder + +[MASTER] +# TODO: The max_inferred size is necessary for https://github.com/PyCQA/pylint/issues/4577, +# otherwise pandas.read_csv's return value would be inferred as TextFileReader. +init-hook= + import sys; sys.path.extend(["src/caosadvancedtools"]); + import astroid; astroid.context.InferenceContext.max_inferred = 500; + diff --git a/pytest.ini b/pytest.ini index 211913fa06d4e0a46c9c9024e147c5313e4746e1..e65efaf9aaf061a8a1ec0040f87d682536fac4c2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,3 @@ [pytest] testpaths = unittests addopts = -vv -python_paths = src diff --git a/release.sh b/release.sh new file mode 100755 index 0000000000000000000000000000000000000000..1af097f014de6cd9eb3d3e8ba5da34aea0fe1671 --- /dev/null +++ b/release.sh @@ -0,0 +1,4 @@ +#!/bin/bash +rm -rf dist/ build/ .eggs/ +python setup.py sdist bdist_wheel +python -m twine upload -s dist/* diff --git a/setup.py b/setup.py index 8902af8c2f03b4e5972beeb85fbf4cc05d66d730..929613de35de01da98b02c77cd76b17b04784bd8 100755 --- a/setup.py +++ b/setup.py @@ -46,8 +46,8 @@ from setuptools import find_packages, setup ######################################################################## MAJOR = 0 -MINOR = 2 -MICRO = 0 +MINOR = 4 +MICRO = 2 PRE = "" # e.g. rc0, alpha.1, 0.beta-23 ISRELEASED = False @@ -154,14 +154,19 @@ def setup_package(): long_description_content_type="text/markdown", author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', - install_requires=["caosdb>=0.4.0", + install_requires=["caosdb>=0.7.0", + "jsonschema>=4.4.0", + "numpy>=1.17.3", "openpyxl>=3.0.0", - "xlrd==1.2.0", + "pandas>=1.2.0", + "xlrd>=2.0", ], + extras_require={"h5-crawler": ["h5py>=3.3.0", ], + }, packages=find_packages('src'), package_dir={'': 'src'}, setup_requires=["pytest-runner>=2.0,<3dev"], - tests_require=["pytest", "pytest-cov", "coverage>=4.4.2"], + tests_require=["pytest", "pytest-pythonpath", "pytest-cov", "coverage>=4.4.2"], ) try: setup(**metadata) diff --git a/src/caosadvancedtools/bloxberg/__init__.py b/src/caosadvancedtools/bloxberg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5ca50276b8fd48370fd84bd0f5358dd1e48d6b8e --- /dev/null +++ b/src/caosadvancedtools/bloxberg/__init__.py @@ -0,0 +1,4 @@ +"""Integration with the Bloxberg proof-of-existence blockchain. +""" + +print("Warning: The Bloxberg module is still experimental and under active development.") diff --git a/src/caosadvancedtools/bloxberg/bloxberg.py b/src/caosadvancedtools/bloxberg/bloxberg.py new file mode 100644 index 0000000000000000000000000000000000000000..42af1e11a23a37214ec294b8032517bb5c70bb5b --- /dev/null +++ b/src/caosadvancedtools/bloxberg/bloxberg.py @@ -0,0 +1,197 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +"""Interaction with the Bloxberg blockchain. +""" + + +import hashlib +import json +import secrets + +import caosdb as db + +from ..models.parser import parse_model_from_string +from . import swagger_client + + +__model_yaml = """ +BloxbergCertificate: + obligatory_properties: + pepper: + datatype: TEXT + hash: + datatype: TEXT + proofValue: + datatype: TEXT + certificateJSON: + datatype: TEXT + recommended_properties: + certified: + datatype: REFERENCE +""" +__model = parse_model_from_string(__model_yaml) + + +class Bloxberg: + """A Bloxberg instance can be used to obtain or verify certificates.""" + + def __init__(self, connection=None): + """A Bloxberg instance can be used to obtain or verify certificates. + +Parameters +---------- +connection : dict +A dict with the following keys: + - url : The bloxberg URL. Default is "https://qa.certify.bloxberg.org" + """ + self._create_conf(connection) + self._api_client = swagger_client.ApiClient(configuration=self._conf) + self._api = swagger_client.CertificateApi(self._api_client) + + def _create_conf(self, connection=None): + """Generate a Swagger configuration object.""" + self._conf = swagger_client.Configuration() + if connection: + if "URL" in connection: + self._conf.host = connection["URL"] + + def certify(self, entity): + """Attempt to certify the given `entity` and return a certificate Record. + +Parameters +---------- +entity : caosdb.Entity +The entity to be certified + +Returns +------- +out : caosdb.Record +A BloxbergCertificate Record with all the necessary Properties. +""" + # Calculate hash + pepper = str(secrets.randbits(1024)) + entity.retrieve() + hasher = hashlib.sha256() + hasher.update(pepper.encode(encoding="utf8")) + hasher.update(str(entity).encode(encoding="utf8")) + entity_hash = "0x" + hasher.hexdigest() + print(entity_hash) + pubkey = "0x9858eC18a269EE69ebfD7C38eb297996827DDa98" # TODO The key of the API server? + # Create body + body = swagger_client.Batch(public_key=pubkey, crid=[entity_hash], crid_type="sha2-256", + enable_ipfs=False) + # Submit hash & obtain response + result = self._api.create_bloxberg_certificate_create_bloxberg_certificate_post(body=body) + attribute_map = result[0].attribute_map + cert = result[0].to_dict() + for old, new in attribute_map.items(): + if old == new: + continue + cert[new] = cert.pop(old) + json_s = json.dumps(cert) + # Generate result Record + cert_rec = db.Record().add_parent("BloxbergCertificate") + # Extract information and put into result + cert_rec.add_property(property="certified", value=entity) + cert_rec.add_property(property="pepper", value=pepper) + cert_rec.add_property(property="hash", value=entity_hash) + cert_rec.add_property(property="proofvalue", value=cert["proof"]["proofValue"]) + cert_rec.add_property(property="certificateJSON", value=json_s) + # Return result + return cert_rec + + def verify(self, certificate): + """Attempt to verify the certificate. + +A certificate passes verification if the Bloxberg instance says it is good. Typical use cases may +also include the `validate` step to make sure that the certificate's original data exists and +contains what it claimed to contain when the certificate was created. + +This method does nothing if the verification passes, else it raises an exception. + +Parameters +---------- +certificate : caosdb.Record +The BloxbergCertificate Record which shall be verified. + + """ + raise NotImplementedError("Bloxberg first needs to implement a verification API method.") + + @staticmethod + def json_from_certificate(certificate, filename=None): + """Generate a qa.certify.bloxberg.org JSON string, optionally writing it to a file. + +Parameters +---------- +certificate : caosdb.Record +The BloxbergCertificate Record for which the JSON is generated. + +filename : str +Write the JSON to this file. +""" + content = {} + + return content + + +def ensure_data_model(force=False): + """Make sure that the data model fits our needs. + + Most importantly, this means that a suitable RecordType "BoxbergCertificate" must exist. + """ + __model.sync_data_model(noquestion=force) + + +def certify_entity(entity, json_filename=None): + """Certify the given entity and store the result in the CaosDB. + +Parameters +---------- +entity : caosdb.Entity + The Entity to be certified. + +json_filename : str + If given, store the JSON here. +""" + if isinstance(entity, int): + entity = db.Entity(id=entity) + + blx = Bloxberg() + print("Obtaining certificate...") + certificate = blx.certify(entity) + print("Certificate was successfully obtained.") + certificate.insert() + print("Certificate was stored in CaosDB.") + + if json_filename: + with open(json_filename, "w") as json_file: + json_file.write(certificate.get_property("certificateJSON").value) + + +def demo_run(): + """Run the core functions for demonstration purposes.""" + print("Making sure that the remote data model is up to date.") + ensure_data_model() + print("Data model is up to date.") + import caosdb as db + CertRT = db.RecordType(name="BloxbergCertificate").retrieve() + print("Certifying the `BloxbergCertificate` RecordType...") + json_filename = "/tmp/cert.json" + certify_entity(CertRT, json_filename=json_filename) + print("Certificate json file can be found here: {}".format(json_filename)) + print("You can verify the certificate here: https://certify.bloxberg.org/verify") diff --git a/src/caosadvancedtools/bloxberg/swagger_client/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..136c5b27a37cfbd9135230468ae5a29cb0eb2b77 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py @@ -0,0 +1,34 @@ +# coding: utf-8 + +# flake8: noqa + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +# Fake the installation +import sys, pathlib +__this_dir = str(pathlib.Path(__file__).parent.parent) +if __this_dir not in sys.path: + sys.path.append(__this_dir) + +# import apis into sdk package +from swagger_client.api.certificate_api import CertificateApi +from swagger_client.api.pdf_api import PdfApi +# import ApiClient +from swagger_client.api_client import ApiClient +from swagger_client.configuration import Configuration +# import models into sdk package +from swagger_client.models.batch import Batch +from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate +from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate +from swagger_client.models.http_validation_error import HTTPValidationError +from swagger_client.models.validation_error import ValidationError diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d33c26ea8bc245108934d5e0e9fdcd046da3232e --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import + +# flake8: noqa + +# import apis into api package +from swagger_client.api.certificate_api import CertificateApi +from swagger_client.api.pdf_api import PdfApi diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py b/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0f1c6a5a51ff4d2338df4c6e233b93fc2a950a --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py @@ -0,0 +1,132 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import re # noqa: F401 + +# python 2 and python 3 compatibility library +import six + +from swagger_client.api_client import ApiClient + + +class CertificateApi(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + Ref: https://github.com/swagger-api/swagger-codegen + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def create_bloxberg_certificate_create_bloxberg_certificate_post(self, body, **kwargs): # noqa: E501 + """Createbloxbergcertificate # noqa: E501 + + Creates, transacts, and signs a research object certificate on the bloxberg blockchain. Hashes must be generated client side for each desired file and provided in an array. Each hash corresponds to one research object certificate returned in a JSON object array. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.create_bloxberg_certificate_create_bloxberg_certificate_post(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param Batch body: (required) + :return: list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate] + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, **kwargs) # noqa: E501 + else: + (data) = self.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, **kwargs) # noqa: E501 + return data + + def create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(self, body, **kwargs): # noqa: E501 + """Createbloxbergcertificate # noqa: E501 + + Creates, transacts, and signs a research object certificate on the bloxberg blockchain. Hashes must be generated client side for each desired file and provided in an array. Each hash corresponds to one research object certificate returned in a JSON object array. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param Batch body: (required) + :return: list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate] + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['body'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method create_bloxberg_certificate_create_bloxberg_certificate_post" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'body' is set + if ('body' not in params or + params['body'] is None): + raise ValueError("Missing the required parameter `body` when calling `create_bloxberg_certificate_create_bloxberg_certificate_post`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'body' in params: + body_params = params['body'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = [] # noqa: E501 + + return self.api_client.call_api( + '/createBloxbergCertificate', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate]', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py b/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a279de21e45735be31eed1ce18fd7c275cf6cb --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py @@ -0,0 +1,132 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import re # noqa: F401 + +# python 2 and python 3 compatibility library +import six + +from swagger_client.api_client import ApiClient + + +class PdfApi(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + Ref: https://github.com/swagger-api/swagger-codegen + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def generate_pdf_generate_pdf_post(self, body, **kwargs): # noqa: E501 + """Generatepdf # noqa: E501 + + Accepts as input the response from the createBloxbergCertificate endpoint, for example a research object JSON array. Returns as response a zip archive with PDF files that correspond to the number of cryptographic identifiers provided. PDF files are embedded with the Research Object Certification which is used for verification. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.generate_pdf_generate_pdf_post(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param list[ControllerCertToolsGeneratePdfJsonCertificate] body: (required) + :return: Object + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.generate_pdf_generate_pdf_post_with_http_info(body, **kwargs) # noqa: E501 + else: + (data) = self.generate_pdf_generate_pdf_post_with_http_info(body, **kwargs) # noqa: E501 + return data + + def generate_pdf_generate_pdf_post_with_http_info(self, body, **kwargs): # noqa: E501 + """Generatepdf # noqa: E501 + + Accepts as input the response from the createBloxbergCertificate endpoint, for example a research object JSON array. Returns as response a zip archive with PDF files that correspond to the number of cryptographic identifiers provided. PDF files are embedded with the Research Object Certification which is used for verification. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.generate_pdf_generate_pdf_post_with_http_info(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param list[ControllerCertToolsGeneratePdfJsonCertificate] body: (required) + :return: Object + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['body'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method generate_pdf_generate_pdf_post" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'body' is set + if ('body' not in params or + params['body'] is None): + raise ValueError("Missing the required parameter `body` when calling `generate_pdf_generate_pdf_post`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'body' in params: + body_params = params['body'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = [] # noqa: E501 + + return self.api_client.call_api( + '/generatePDF', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Object', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api_client.py b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py new file mode 100644 index 0000000000000000000000000000000000000000..25e6501a4e36b09bca266f2eb375807053a58870 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py @@ -0,0 +1,628 @@ +# coding: utf-8 +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" +from __future__ import absolute_import + +import datetime +import json +import mimetypes +from multiprocessing.pool import ThreadPool +import os +import re +import tempfile + +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import quote + +from swagger_client.configuration import Configuration +import swagger_client.models +from swagger_client import rest + + +class ApiClient(object): + """Generic API client for Swagger client library builds. + + Swagger generic API client. This client handles the client- + server communication, and is invariant across implementations. Specifics of + the methods and models for each application are generated from the Swagger + templates. + + NOTE: This class is auto generated by the swagger code generator program. + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + + :param configuration: .Configuration object for this client + :param header_name: a header to pass when making calls to the API. + :param header_value: a header value to pass when making calls to + the API. + :param cookie: a cookie to include in the header when making calls + to the API + """ + + PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types + NATIVE_TYPES_MAPPING = { + 'int': int, + 'long': int if six.PY3 else long, # noqa: F821 + 'float': float, + 'str': str, + 'bool': bool, + 'date': datetime.date, + 'datetime': datetime.datetime, + 'object': object, + } + + def __init__(self, configuration=None, header_name=None, header_value=None, + cookie=None): + if configuration is None: + configuration = Configuration() + self.configuration = configuration + + self.pool = ThreadPool() + self.rest_client = rest.RESTClientObject(configuration) + self.default_headers = {} + if header_name is not None: + self.default_headers[header_name] = header_value + self.cookie = cookie + # Set default User-Agent. + self.user_agent = 'Swagger-Codegen/1.0.0/python' + + def __del__(self): + self.pool.close() + self.pool.join() + + @property + def user_agent(self): + """User agent for this API client""" + return self.default_headers['User-Agent'] + + @user_agent.setter + def user_agent(self, value): + self.default_headers['User-Agent'] = value + + def set_default_header(self, header_name, header_value): + self.default_headers[header_name] = header_value + + def __call_api( + self, resource_path, method, path_params=None, + query_params=None, header_params=None, body=None, post_params=None, + files=None, response_type=None, auth_settings=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + + config = self.configuration + + # header parameters + header_params = header_params or {} + header_params.update(self.default_headers) + if self.cookie: + header_params['Cookie'] = self.cookie + if header_params: + header_params = self.sanitize_for_serialization(header_params) + header_params = dict(self.parameters_to_tuples(header_params, + collection_formats)) + + # path parameters + if path_params: + path_params = self.sanitize_for_serialization(path_params) + path_params = self.parameters_to_tuples(path_params, + collection_formats) + for k, v in path_params: + # specified safe chars, encode everything + resource_path = resource_path.replace( + '{%s}' % k, + quote(str(v), safe=config.safe_chars_for_path_param) + ) + + # query parameters + if query_params: + query_params = self.sanitize_for_serialization(query_params) + query_params = self.parameters_to_tuples(query_params, + collection_formats) + + # post parameters + if post_params or files: + post_params = self.prepare_post_parameters(post_params, files) + post_params = self.sanitize_for_serialization(post_params) + post_params = self.parameters_to_tuples(post_params, + collection_formats) + + # auth setting + self.update_params_for_auth(header_params, query_params, auth_settings) + + # body + if body: + body = self.sanitize_for_serialization(body) + + # request url + url = self.configuration.host + resource_path + + # perform request and return response + response_data = self.request( + method, url, query_params=query_params, headers=header_params, + post_params=post_params, body=body, + _preload_content=_preload_content, + _request_timeout=_request_timeout) + + self.last_response = response_data + + return_data = response_data + if _preload_content: + # deserialize response data + if response_type: + return_data = self.deserialize(response_data, response_type) + else: + return_data = None + + if _return_http_data_only: + return (return_data) + else: + return (return_data, response_data.status, + response_data.getheaders()) + + def sanitize_for_serialization(self, obj): + """Builds a JSON POST object. + + If obj is None, return None. + If obj is str, int, long, float, bool, return directly. + If obj is datetime.datetime, datetime.date + convert to string in iso8601 format. + If obj is list, sanitize each element in the list. + If obj is dict, return the dict. + If obj is swagger model, return the properties dict. + + :param obj: The data to serialize. + :return: The serialized form of data. + """ + if obj is None: + return None + elif isinstance(obj, self.PRIMITIVE_TYPES): + return obj + elif isinstance(obj, list): + return [self.sanitize_for_serialization(sub_obj) + for sub_obj in obj] + elif isinstance(obj, tuple): + return tuple(self.sanitize_for_serialization(sub_obj) + for sub_obj in obj) + elif isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + + if isinstance(obj, dict): + obj_dict = obj + else: + # Convert model obj to dict except + # attributes `swagger_types`, `attribute_map` + # and attributes which value is not None. + # Convert attribute name to json key in + # model definition for request. + obj_dict = {obj.attribute_map[attr]: getattr(obj, attr) + for attr, _ in six.iteritems(obj.swagger_types) + if getattr(obj, attr) is not None} + + return {key: self.sanitize_for_serialization(val) + for key, val in six.iteritems(obj_dict)} + + def deserialize(self, response, response_type): + """Deserializes response into an object. + + :param response: RESTResponse object to be deserialized. + :param response_type: class literal for + deserialized object, or string of class name. + + :return: deserialized object. + """ + # handle file downloading + # save response body into a tmp file and return the instance + if response_type == "file": + return self.__deserialize_file(response) + + # fetch data from response object + try: + data = json.loads(response.data) + except ValueError: + data = response.data + + return self.__deserialize(data, response_type) + + def __deserialize(self, data, klass): + """Deserializes dict, list, str into an object. + + :param data: dict, list or str. + :param klass: class literal, or string of class name. + + :return: object. + """ + if data is None: + return None + + if type(klass) == str: + if klass.startswith('list['): + sub_kls = re.match(r'list\[(.*)\]', klass).group(1) + return [self.__deserialize(sub_data, sub_kls) + for sub_data in data] + + if klass.startswith('dict('): + sub_kls = re.match(r'dict\(([^,]*), (.*)\)', klass).group(2) + return {k: self.__deserialize(v, sub_kls) + for k, v in six.iteritems(data)} + + # convert str to class + if klass in self.NATIVE_TYPES_MAPPING: + klass = self.NATIVE_TYPES_MAPPING[klass] + else: + klass = getattr(swagger_client.models, klass) + + if klass in self.PRIMITIVE_TYPES: + return self.__deserialize_primitive(data, klass) + elif klass == object: + return self.__deserialize_object(data) + elif klass == datetime.date: + return self.__deserialize_date(data) + elif klass == datetime.datetime: + return self.__deserialize_datatime(data) + else: + return self.__deserialize_model(data, klass) + + def call_api(self, resource_path, method, + path_params=None, query_params=None, header_params=None, + body=None, post_params=None, files=None, + response_type=None, auth_settings=None, async_req=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + """Makes the HTTP request (synchronous) and returns deserialized data. + + To make an async request, set the async_req parameter. + + :param resource_path: Path to method endpoint. + :param method: Method to call. + :param path_params: Path parameters in the url. + :param query_params: Query parameters in the url. + :param header_params: Header parameters to be + placed in the request header. + :param body: Request body. + :param post_params dict: Request post form parameters, + for `application/x-www-form-urlencoded`, `multipart/form-data`. + :param auth_settings list: Auth Settings names for the request. + :param response: Response data type. + :param files dict: key -> filename, value -> filepath, + for `multipart/form-data`. + :param async_req bool: execute request asynchronously + :param _return_http_data_only: response data without head status code + and headers + :param collection_formats: dict of collection formats for path, query, + header, and post parameters. + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + :return: + If async_req parameter is True, + the request will be called asynchronously. + The method will return the request thread. + If parameter async_req is False or missing, + then the method will return the response directly. + """ + if not async_req: + return self.__call_api(resource_path, method, + path_params, query_params, header_params, + body, post_params, files, + response_type, auth_settings, + _return_http_data_only, collection_formats, + _preload_content, _request_timeout) + else: + thread = self.pool.apply_async(self.__call_api, (resource_path, + method, path_params, query_params, + header_params, body, + post_params, files, + response_type, auth_settings, + _return_http_data_only, + collection_formats, + _preload_content, _request_timeout)) + return thread + + def request(self, method, url, query_params=None, headers=None, + post_params=None, body=None, _preload_content=True, + _request_timeout=None): + """Makes the HTTP request using RESTClient.""" + if method == "GET": + return self.rest_client.GET(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "HEAD": + return self.rest_client.HEAD(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "OPTIONS": + return self.rest_client.OPTIONS(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "POST": + return self.rest_client.POST(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PUT": + return self.rest_client.PUT(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PATCH": + return self.rest_client.PATCH(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "DELETE": + return self.rest_client.DELETE(url, + query_params=query_params, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + else: + raise ValueError( + "http method must be `GET`, `HEAD`, `OPTIONS`," + " `POST`, `PATCH`, `PUT` or `DELETE`." + ) + + def parameters_to_tuples(self, params, collection_formats): + """Get parameters as list of tuples, formatting collections. + + :param params: Parameters as dict or list of two-tuples + :param dict collection_formats: Parameter collection formats + :return: Parameters as list of tuples, collections formatted + """ + new_params = [] + if collection_formats is None: + collection_formats = {} + for k, v in six.iteritems(params) if isinstance(params, dict) else params: # noqa: E501 + if k in collection_formats: + collection_format = collection_formats[k] + if collection_format == 'multi': + new_params.extend((k, value) for value in v) + else: + if collection_format == 'ssv': + delimiter = ' ' + elif collection_format == 'tsv': + delimiter = '\t' + elif collection_format == 'pipes': + delimiter = '|' + else: # csv is the default + delimiter = ',' + new_params.append( + (k, delimiter.join(str(value) for value in v))) + else: + new_params.append((k, v)) + return new_params + + def prepare_post_parameters(self, post_params=None, files=None): + """Builds form parameters. + + :param post_params: Normal form parameters. + :param files: File parameters. + :return: Form parameters with files. + """ + params = [] + + if post_params: + params = post_params + + if files: + for k, v in six.iteritems(files): + if not v: + continue + file_names = v if type(v) is list else [v] + for n in file_names: + with open(n, 'rb') as f: + filename = os.path.basename(f.name) + filedata = f.read() + mimetype = (mimetypes.guess_type(filename)[0] or + 'application/octet-stream') + params.append( + tuple([k, tuple([filename, filedata, mimetype])])) + + return params + + def select_header_accept(self, accepts): + """Returns `Accept` based on an array of accepts provided. + + :param accepts: List of headers. + :return: Accept (e.g. application/json). + """ + if not accepts: + return + + accepts = [x.lower() for x in accepts] + + if 'application/json' in accepts: + return 'application/json' + else: + return ', '.join(accepts) + + def select_header_content_type(self, content_types): + """Returns `Content-Type` based on an array of content_types provided. + + :param content_types: List of content-types. + :return: Content-Type (e.g. application/json). + """ + if not content_types: + return 'application/json' + + content_types = [x.lower() for x in content_types] + + if 'application/json' in content_types or '*/*' in content_types: + return 'application/json' + else: + return content_types[0] + + def update_params_for_auth(self, headers, querys, auth_settings): + """Updates header and query params based on authentication setting. + + :param headers: Header parameters dict to be updated. + :param querys: Query parameters tuple list to be updated. + :param auth_settings: Authentication setting identifiers list. + """ + if not auth_settings: + return + + for auth in auth_settings: + auth_setting = self.configuration.auth_settings().get(auth) + if auth_setting: + if not auth_setting['value']: + continue + elif auth_setting['in'] == 'header': + headers[auth_setting['key']] = auth_setting['value'] + elif auth_setting['in'] == 'query': + querys.append((auth_setting['key'], auth_setting['value'])) + else: + raise ValueError( + 'Authentication token must be in `query` or `header`' + ) + + def __deserialize_file(self, response): + """Deserializes body to file + + Saves response body into a file in a temporary folder, + using the filename from the `Content-Disposition` header if provided. + + :param response: RESTResponse. + :return: file path. + """ + fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) + os.close(fd) + os.remove(path) + + content_disposition = response.getheader("Content-Disposition") + if content_disposition: + filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', + content_disposition).group(1) + path = os.path.join(os.path.dirname(path), filename) + + with open(path, "wb") as f: + f.write(response.data) + + return path + + def __deserialize_primitive(self, data, klass): + """Deserializes string to primitive type. + + :param data: str. + :param klass: class literal. + + :return: int, long, float, str, bool. + """ + try: + return klass(data) + except UnicodeEncodeError: + return six.text_type(data) + except TypeError: + return data + + def __deserialize_object(self, value): + """Return a original value. + + :return: object. + """ + return value + + def __deserialize_date(self, string): + """Deserializes string to date. + + :param string: str. + :return: date. + """ + try: + from dateutil.parser import parse + return parse(string).date() + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason="Failed to parse `{0}` as date object".format(string) + ) + + def __deserialize_datatime(self, string): + """Deserializes string to datetime. + + The string should be in iso8601 datetime format. + + :param string: str. + :return: datetime. + """ + try: + from dateutil.parser import parse + return parse(string) + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason=( + "Failed to parse `{0}` as datetime object" + .format(string) + ) + ) + + def __hasattr(self, object, name): + return name in object.__class__.__dict__ + + def __deserialize_model(self, data, klass): + """Deserializes list or dict to model. + + :param data: dict, list. + :param klass: class literal. + :return: model object. + """ + + if not klass.swagger_types and not self.__hasattr(klass, 'get_real_child_model'): + return data + + kwargs = {} + if klass.swagger_types is not None: + for attr, attr_type in six.iteritems(klass.swagger_types): + if (data is not None and + klass.attribute_map[attr] in data and + isinstance(data, (list, dict))): + value = data[klass.attribute_map[attr]] + kwargs[attr] = self.__deserialize(value, attr_type) + + instance = klass(**kwargs) + + if (isinstance(instance, dict) and + klass.swagger_types is not None and + isinstance(data, dict)): + for key, value in data.items(): + if key not in klass.swagger_types: + instance[key] = value + if self.__hasattr(instance, 'get_real_child_model'): + klass_name = instance.get_real_child_model(data) + if klass_name: + instance = self.__deserialize(data, klass_name) + return instance diff --git a/src/caosadvancedtools/bloxberg/swagger_client/configuration.py b/src/caosadvancedtools/bloxberg/swagger_client/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..2be9f6a733a030d0dea2ab43b9e85f6ed15085d8 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/configuration.py @@ -0,0 +1,244 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import copy +import logging +import multiprocessing +import sys +import urllib3 + +import six +from six.moves import http_client as httplib + + +class TypeWithDefault(type): + def __init__(cls, name, bases, dct): + super(TypeWithDefault, cls).__init__(name, bases, dct) + cls._default = None + + def __call__(cls): + if cls._default is None: + cls._default = type.__call__(cls) + return copy.copy(cls._default) + + def set_default(cls, default): + cls._default = copy.copy(default) + + +class Configuration(six.with_metaclass(TypeWithDefault, object)): + """NOTE: This class is auto generated by the swagger code generator program. + + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + """ + + def __init__(self): + """Constructor""" + # Default Base url + self.host = "https://qa.certify.bloxberg.org" + # Temp file folder for downloading files + self.temp_folder_path = None + + # Authentication Settings + # dict to store API key(s) + self.api_key = {} + # dict to store API prefix (e.g. Bearer) + self.api_key_prefix = {} + # function to refresh API key if expired + self.refresh_api_key_hook = None + # Username for HTTP basic authentication + self.username = "" + # Password for HTTP basic authentication + self.password = "" + # Logging Settings + self.logger = {} + self.logger["package_logger"] = logging.getLogger("swagger_client") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + # Log format + self.logger_format = '%(asctime)s %(levelname)s %(message)s' + # Log stream handler + self.logger_stream_handler = None + # Log file handler + self.logger_file_handler = None + # Debug file location + self.logger_file = None + # Debug switch + self.debug = False + + # SSL/TLS verification + # Set this to false to skip verifying SSL certificate when calling API + # from https server. + self.verify_ssl = True + # Set this to customize the certificate file to verify the peer. + self.ssl_ca_cert = None + # client certificate file + self.cert_file = None + # client key file + self.key_file = None + # Set this to True/False to enable/disable SSL hostname verification. + self.assert_hostname = None + + # urllib3 connection pool's maximum number of connections saved + # per pool. urllib3 uses 1 connection as default value, but this is + # not the best value when you are making a lot of possibly parallel + # requests to the same host, which is often the case here. + # cpu_count * 5 is used as default value to increase performance. + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + + # Proxy URL + self.proxy = None + # Safe chars for path_param + self.safe_chars_for_path_param = '' + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_file_handler) + if self.logger_stream_handler: + logger.removeHandler(self.logger_stream_handler) + else: + # If not set logging file, + # then add stream handler and remove file handler. + self.logger_stream_handler = logging.StreamHandler() + self.logger_stream_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_stream_handler) + if self.logger_file_handler: + logger.removeHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.DEBUG) + # turn on httplib debug + httplib.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.WARNING) + # turn off httplib debug + httplib.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook: + self.refresh_api_key_hook(self) + + key = self.api_key.get(identifier) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def get_basic_auth_token(self): + """Gets HTTP basic authentication header (string). + + :return: The token for basic HTTP authentication. + """ + return urllib3.util.make_headers( + basic_auth=self.username + ':' + self.password + ).get('authorization') + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + return { + } + + def to_debug_report(self): + """Gets the essential information for debugging. + + :return: The report for debugging. + """ + return "Python SDK Debug Report:\n"\ + "OS: {env}\n"\ + "Python Version: {pyversion}\n"\ + "Version of the API: 0.2.0\n"\ + "SDK Package Version: 1.0.0".\ + format(env=sys.platform, pyversion=sys.version) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55b01c66f4f68f86ea6fd8bc34e61fc534d3902f --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py @@ -0,0 +1,21 @@ +# coding: utf-8 + +# flake8: noqa +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +# import models into model package +from swagger_client.models.batch import Batch +from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate +from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate +from swagger_client.models.http_validation_error import HTTPValidationError +from swagger_client.models.validation_error import ValidationError diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py new file mode 100644 index 0000000000000000000000000000000000000000..7a347cf7ac9148df8ec9a43200f4058f127447b9 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py @@ -0,0 +1,227 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class Batch(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'public_key': 'str', + 'crid': 'list[str]', + 'crid_type': 'str', + 'enable_ipfs': 'bool', + 'metadata_json': 'str' + } + + attribute_map = { + 'public_key': 'publicKey', + 'crid': 'crid', + 'crid_type': 'cridType', + 'enable_ipfs': 'enableIPFS', + 'metadata_json': 'metadataJson' + } + + def __init__(self, public_key=None, crid=None, crid_type=None, enable_ipfs=None, metadata_json=None): # noqa: E501 + """Batch - a model defined in Swagger""" # noqa: E501 + self._public_key = None + self._crid = None + self._crid_type = None + self._enable_ipfs = None + self._metadata_json = None + self.discriminator = None + self.public_key = public_key + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + self.enable_ipfs = enable_ipfs + if metadata_json is not None: + self.metadata_json = metadata_json + + @property + def public_key(self): + """Gets the public_key of this Batch. # noqa: E501 + + Public bloxberg address where the Research Object Certificate token will be minted # noqa: E501 + + :return: The public_key of this Batch. # noqa: E501 + :rtype: str + """ + return self._public_key + + @public_key.setter + def public_key(self, public_key): + """Sets the public_key of this Batch. + + Public bloxberg address where the Research Object Certificate token will be minted # noqa: E501 + + :param public_key: The public_key of this Batch. # noqa: E501 + :type: str + """ + if public_key is None: + raise ValueError("Invalid value for `public_key`, must not be `None`") # noqa: E501 + + self._public_key = public_key + + @property + def crid(self): + """Gets the crid of this Batch. # noqa: E501 + + Cryptographic Identifier of each file you wish to certify. One certificate will be generated per hash up to a maximum of 1001 in a single request # noqa: E501 + + :return: The crid of this Batch. # noqa: E501 + :rtype: list[str] + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this Batch. + + Cryptographic Identifier of each file you wish to certify. One certificate will be generated per hash up to a maximum of 1001 in a single request # noqa: E501 + + :param crid: The crid of this Batch. # noqa: E501 + :type: list[str] + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this Batch. # noqa: E501 + + If crid is not self-describing, provide the type of cryptographic function you used to generate the cryptographic identifier. Please use the name field from the multihash list to ensure compatibility: https://github.com/multiformats/multicodec/blob/master/table.csv # noqa: E501 + + :return: The crid_type of this Batch. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this Batch. + + If crid is not self-describing, provide the type of cryptographic function you used to generate the cryptographic identifier. Please use the name field from the multihash list to ensure compatibility: https://github.com/multiformats/multicodec/blob/master/table.csv # noqa: E501 + + :param crid_type: The crid_type of this Batch. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def enable_ipfs(self): + """Gets the enable_ipfs of this Batch. # noqa: E501 + + EXPERIMENTAL: Set to true to enable posting certificate to IPFS. If set to false, will simply return certificates in the response. By default, this is disabled on the server due to performance and storage problems with IPFS # noqa: E501 + + :return: The enable_ipfs of this Batch. # noqa: E501 + :rtype: bool + """ + return self._enable_ipfs + + @enable_ipfs.setter + def enable_ipfs(self, enable_ipfs): + """Sets the enable_ipfs of this Batch. + + EXPERIMENTAL: Set to true to enable posting certificate to IPFS. If set to false, will simply return certificates in the response. By default, this is disabled on the server due to performance and storage problems with IPFS # noqa: E501 + + :param enable_ipfs: The enable_ipfs of this Batch. # noqa: E501 + :type: bool + """ + if enable_ipfs is None: + raise ValueError("Invalid value for `enable_ipfs`, must not be `None`") # noqa: E501 + + self._enable_ipfs = enable_ipfs + + @property + def metadata_json(self): + """Gets the metadata_json of this Batch. # noqa: E501 + + Provide optional metadata to describe the research object batch in more detail that will be included in the certificate. # noqa: E501 + + :return: The metadata_json of this Batch. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this Batch. + + Provide optional metadata to describe the research object batch in more detail that will be included in the certificate. # noqa: E501 + + :param metadata_json: The metadata_json of this Batch. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(Batch, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, Batch): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py new file mode 100644 index 0000000000000000000000000000000000000000..2d7fd2d763ba40c9a384203301aa3e70efdf7783 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py @@ -0,0 +1,379 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ControllerCertToolsGeneratePdfJsonCertificate(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'context': 'list[str]', + 'id': 'str', + 'type': 'list[str]', + 'issuer': 'str', + 'issuance_date': 'str', + 'credential_subject': 'object', + 'display_html': 'str', + 'crid': 'str', + 'crid_type': 'str', + 'metadata_json': 'str', + 'proof': 'object' + } + + attribute_map = { + 'context': '@context', + 'id': 'id', + 'type': 'type', + 'issuer': 'issuer', + 'issuance_date': 'issuanceDate', + 'credential_subject': 'credentialSubject', + 'display_html': 'displayHtml', + 'crid': 'crid', + 'crid_type': 'cridType', + 'metadata_json': 'metadataJson', + 'proof': 'proof' + } + + def __init__(self, context=None, id=None, type=None, issuer=None, issuance_date=None, credential_subject=None, display_html=None, crid=None, crid_type=None, metadata_json=None, proof=None): # noqa: E501 + """ControllerCertToolsGeneratePdfJsonCertificate - a model defined in Swagger""" # noqa: E501 + self._context = None + self._id = None + self._type = None + self._issuer = None + self._issuance_date = None + self._credential_subject = None + self._display_html = None + self._crid = None + self._crid_type = None + self._metadata_json = None + self._proof = None + self.discriminator = None + if context is not None: + self.context = context + self.id = id + self.type = type + self.issuer = issuer + self.issuance_date = issuance_date + self.credential_subject = credential_subject + if display_html is not None: + self.display_html = display_html + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + if metadata_json is not None: + self.metadata_json = metadata_json + self.proof = proof + + @property + def context(self): + """Gets the context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :return: The context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._context + + @context.setter + def context(self, context): + """Sets the context of this ControllerCertToolsGeneratePdfJsonCertificate. + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :param context: The context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: list[str] + """ + + self._context = context + + @property + def id(self): + """Gets the id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._id + + @id.setter + def id(self, id): + """Sets the id of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param id: The id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if id is None: + raise ValueError("Invalid value for `id`, must not be `None`") # noqa: E501 + + self._id = id + + @property + def type(self): + """Gets the type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param type: The type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: list[str] + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + @property + def issuer(self): + """Gets the issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuer + + @issuer.setter + def issuer(self, issuer): + """Sets the issuer of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param issuer: The issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if issuer is None: + raise ValueError("Invalid value for `issuer`, must not be `None`") # noqa: E501 + + self._issuer = issuer + + @property + def issuance_date(self): + """Gets the issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuance_date + + @issuance_date.setter + def issuance_date(self, issuance_date): + """Sets the issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param issuance_date: The issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if issuance_date is None: + raise ValueError("Invalid value for `issuance_date`, must not be `None`") # noqa: E501 + + self._issuance_date = issuance_date + + @property + def credential_subject(self): + """Gets the credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._credential_subject + + @credential_subject.setter + def credential_subject(self, credential_subject): + """Sets the credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param credential_subject: The credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: object + """ + if credential_subject is None: + raise ValueError("Invalid value for `credential_subject`, must not be `None`") # noqa: E501 + + self._credential_subject = credential_subject + + @property + def display_html(self): + """Gets the display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._display_html + + @display_html.setter + def display_html(self, display_html): + """Sets the display_html of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param display_html: The display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._display_html = display_html + + @property + def crid(self): + """Gets the crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param crid: The crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param crid_type: The crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def metadata_json(self): + """Gets the metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param metadata_json: The metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + @property + def proof(self): + """Gets the proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._proof + + @proof.setter + def proof(self, proof): + """Sets the proof of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param proof: The proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: object + """ + if proof is None: + raise ValueError("Invalid value for `proof`, must not be `None`") # noqa: E501 + + self._proof = proof + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ControllerCertToolsGeneratePdfJsonCertificate, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ControllerCertToolsGeneratePdfJsonCertificate): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py new file mode 100644 index 0000000000000000000000000000000000000000..4a6d2d3f0e15faa8672f001e964d66c6e0a27780 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py @@ -0,0 +1,379 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ControllerCertToolsGenerateUnsignedCertificateJsonCertificate(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'context': 'list[str]', + 'id': 'str', + 'type': 'list[str]', + 'issuer': 'str', + 'issuance_date': 'str', + 'credential_subject': 'object', + 'display_html': 'str', + 'crid': 'str', + 'crid_type': 'str', + 'metadata_json': 'str', + 'proof': 'object' + } + + attribute_map = { + 'context': '@context', + 'id': 'id', + 'type': 'type', + 'issuer': 'issuer', + 'issuance_date': 'issuanceDate', + 'credential_subject': 'credentialSubject', + 'display_html': 'displayHtml', + 'crid': 'crid', + 'crid_type': 'cridType', + 'metadata_json': 'metadataJson', + 'proof': 'proof' + } + + def __init__(self, context=None, id=None, type=None, issuer=None, issuance_date=None, credential_subject=None, display_html=None, crid=None, crid_type=None, metadata_json=None, proof=None): # noqa: E501 + """ControllerCertToolsGenerateUnsignedCertificateJsonCertificate - a model defined in Swagger""" # noqa: E501 + self._context = None + self._id = None + self._type = None + self._issuer = None + self._issuance_date = None + self._credential_subject = None + self._display_html = None + self._crid = None + self._crid_type = None + self._metadata_json = None + self._proof = None + self.discriminator = None + if context is not None: + self.context = context + self.id = id + self.type = type + self.issuer = issuer + self.issuance_date = issuance_date + self.credential_subject = credential_subject + if display_html is not None: + self.display_html = display_html + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + if metadata_json is not None: + self.metadata_json = metadata_json + self.proof = proof + + @property + def context(self): + """Gets the context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :return: The context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._context + + @context.setter + def context(self, context): + """Sets the context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :param context: The context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: list[str] + """ + + self._context = context + + @property + def id(self): + """Gets the id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._id + + @id.setter + def id(self, id): + """Sets the id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param id: The id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if id is None: + raise ValueError("Invalid value for `id`, must not be `None`") # noqa: E501 + + self._id = id + + @property + def type(self): + """Gets the type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param type: The type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: list[str] + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + @property + def issuer(self): + """Gets the issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuer + + @issuer.setter + def issuer(self, issuer): + """Sets the issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param issuer: The issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if issuer is None: + raise ValueError("Invalid value for `issuer`, must not be `None`") # noqa: E501 + + self._issuer = issuer + + @property + def issuance_date(self): + """Gets the issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuance_date + + @issuance_date.setter + def issuance_date(self, issuance_date): + """Sets the issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param issuance_date: The issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if issuance_date is None: + raise ValueError("Invalid value for `issuance_date`, must not be `None`") # noqa: E501 + + self._issuance_date = issuance_date + + @property + def credential_subject(self): + """Gets the credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._credential_subject + + @credential_subject.setter + def credential_subject(self, credential_subject): + """Sets the credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param credential_subject: The credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: object + """ + if credential_subject is None: + raise ValueError("Invalid value for `credential_subject`, must not be `None`") # noqa: E501 + + self._credential_subject = credential_subject + + @property + def display_html(self): + """Gets the display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._display_html + + @display_html.setter + def display_html(self, display_html): + """Sets the display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param display_html: The display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._display_html = display_html + + @property + def crid(self): + """Gets the crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param crid: The crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param crid_type: The crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def metadata_json(self): + """Gets the metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param metadata_json: The metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + @property + def proof(self): + """Gets the proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._proof + + @proof.setter + def proof(self, proof): + """Sets the proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param proof: The proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: object + """ + if proof is None: + raise ValueError("Invalid value for `proof`, must not be `None`") # noqa: E501 + + self._proof = proof + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ControllerCertToolsGenerateUnsignedCertificateJsonCertificate, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ControllerCertToolsGenerateUnsignedCertificateJsonCertificate): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py new file mode 100644 index 0000000000000000000000000000000000000000..21c9e467311c596499f3f408c5ac670b5852c6fa --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py @@ -0,0 +1,110 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class HTTPValidationError(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'detail': 'list[ValidationError]' + } + + attribute_map = { + 'detail': 'detail' + } + + def __init__(self, detail=None): # noqa: E501 + """HTTPValidationError - a model defined in Swagger""" # noqa: E501 + self._detail = None + self.discriminator = None + if detail is not None: + self.detail = detail + + @property + def detail(self): + """Gets the detail of this HTTPValidationError. # noqa: E501 + + + :return: The detail of this HTTPValidationError. # noqa: E501 + :rtype: list[ValidationError] + """ + return self._detail + + @detail.setter + def detail(self, detail): + """Sets the detail of this HTTPValidationError. + + + :param detail: The detail of this HTTPValidationError. # noqa: E501 + :type: list[ValidationError] + """ + + self._detail = detail + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(HTTPValidationError, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, HTTPValidationError): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py new file mode 100644 index 0000000000000000000000000000000000000000..7ae6bf0900449ff3612798a4503692c4e38e1c11 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py @@ -0,0 +1,165 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ValidationError(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'loc': 'list[str]', + 'msg': 'str', + 'type': 'str' + } + + attribute_map = { + 'loc': 'loc', + 'msg': 'msg', + 'type': 'type' + } + + def __init__(self, loc=None, msg=None, type=None): # noqa: E501 + """ValidationError - a model defined in Swagger""" # noqa: E501 + self._loc = None + self._msg = None + self._type = None + self.discriminator = None + self.loc = loc + self.msg = msg + self.type = type + + @property + def loc(self): + """Gets the loc of this ValidationError. # noqa: E501 + + + :return: The loc of this ValidationError. # noqa: E501 + :rtype: list[str] + """ + return self._loc + + @loc.setter + def loc(self, loc): + """Sets the loc of this ValidationError. + + + :param loc: The loc of this ValidationError. # noqa: E501 + :type: list[str] + """ + if loc is None: + raise ValueError("Invalid value for `loc`, must not be `None`") # noqa: E501 + + self._loc = loc + + @property + def msg(self): + """Gets the msg of this ValidationError. # noqa: E501 + + + :return: The msg of this ValidationError. # noqa: E501 + :rtype: str + """ + return self._msg + + @msg.setter + def msg(self, msg): + """Sets the msg of this ValidationError. + + + :param msg: The msg of this ValidationError. # noqa: E501 + :type: str + """ + if msg is None: + raise ValueError("Invalid value for `msg`, must not be `None`") # noqa: E501 + + self._msg = msg + + @property + def type(self): + """Gets the type of this ValidationError. # noqa: E501 + + + :return: The type of this ValidationError. # noqa: E501 + :rtype: str + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ValidationError. + + + :param type: The type of this ValidationError. # noqa: E501 + :type: str + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ValidationError, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ValidationError): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/rest.py b/src/caosadvancedtools/bloxberg/swagger_client/rest.py new file mode 100644 index 0000000000000000000000000000000000000000..c42e720c284832da70996e0eb885f6ffdcbb52d2 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/rest.py @@ -0,0 +1,322 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import io +import json +import logging +import re +import ssl + +import certifi +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import urlencode + +try: + import urllib3 +except ImportError: + raise ImportError('Swagger python client requires urllib3.') + + +logger = logging.getLogger(__name__) + + +class RESTResponse(io.IOBase): + + def __init__(self, resp): + self.urllib3_response = resp + self.status = resp.status + self.reason = resp.reason + self.data = resp.data + + def getheaders(self): + """Returns a dictionary of the response headers.""" + return self.urllib3_response.getheaders() + + def getheader(self, name, default=None): + """Returns a given response header.""" + return self.urllib3_response.getheader(name, default) + + +class RESTClientObject(object): + + def __init__(self, configuration, pools_size=4, maxsize=None): + # urllib3.PoolManager will pass all kw parameters to connectionpool + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/poolmanager.py#L75 # noqa: E501 + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/connectionpool.py#L680 # noqa: E501 + # maxsize is the number of requests to host that are allowed in parallel # noqa: E501 + # Custom SSL certificates and client certificates: http://urllib3.readthedocs.io/en/latest/advanced-usage.html # noqa: E501 + + # cert_reqs + if configuration.verify_ssl: + cert_reqs = ssl.CERT_REQUIRED + else: + cert_reqs = ssl.CERT_NONE + + # ca_certs + if configuration.ssl_ca_cert: + ca_certs = configuration.ssl_ca_cert + else: + # if not set certificate file, use Mozilla's root certificates. + ca_certs = certifi.where() + + addition_pool_args = {} + if configuration.assert_hostname is not None: + addition_pool_args['assert_hostname'] = configuration.assert_hostname # noqa: E501 + + if maxsize is None: + if configuration.connection_pool_maxsize is not None: + maxsize = configuration.connection_pool_maxsize + else: + maxsize = 4 + + # https pool manager + if configuration.proxy: + self.pool_manager = urllib3.ProxyManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + proxy_url=configuration.proxy, + **addition_pool_args + ) + else: + self.pool_manager = urllib3.PoolManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + **addition_pool_args + ) + + def request(self, method, url, query_params=None, headers=None, + body=None, post_params=None, _preload_content=True, + _request_timeout=None): + """Perform requests. + + :param method: http request method + :param url: http request url + :param query_params: query parameters in the url + :param headers: http request headers + :param body: request json body, for `application/json` + :param post_params: request post parameters, + `application/x-www-form-urlencoded` + and `multipart/form-data` + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + """ + method = method.upper() + assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', + 'PATCH', 'OPTIONS'] + + if post_params and body: + raise ValueError( + "body parameter cannot be used with post_params parameter." + ) + + post_params = post_params or {} + headers = headers or {} + + timeout = None + if _request_timeout: + if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)): # noqa: E501,F821 + timeout = urllib3.Timeout(total=_request_timeout) + elif (isinstance(_request_timeout, tuple) and + len(_request_timeout) == 2): + timeout = urllib3.Timeout( + connect=_request_timeout[0], read=_request_timeout[1]) + + if 'Content-Type' not in headers: + headers['Content-Type'] = 'application/json' + + try: + # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE` + if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']: + if query_params: + url += '?' + urlencode(query_params) + if re.search('json', headers['Content-Type'], re.IGNORECASE): + request_body = '{}' + if body is not None: + request_body = json.dumps(body) + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'application/x-www-form-urlencoded': # noqa: E501 + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=False, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'multipart/form-data': + # must del headers['Content-Type'], or the correct + # Content-Type which generated by urllib3 will be + # overwritten. + del headers['Content-Type'] + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=True, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + # Pass a `string` parameter directly in the body to support + # other content types than Json when `body` argument is + # provided in serialized form + elif isinstance(body, str): + request_body = body + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + else: + # Cannot generate the request from given parameters + msg = """Cannot prepare a request message for provided + arguments. Please check that your arguments match + declared content type.""" + raise ApiException(status=0, reason=msg) + # For `GET`, `HEAD` + else: + r = self.pool_manager.request(method, url, + fields=query_params, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + except urllib3.exceptions.SSLError as e: + msg = "{0}\n{1}".format(type(e).__name__, str(e)) + raise ApiException(status=0, reason=msg) + + if _preload_content: + r = RESTResponse(r) + + # In the python 3, the response.data is bytes. + # we need to decode it to string. + if six.PY3: + r.data = r.data.decode('utf8') + + # log response body + logger.debug("response body: %s", r.data) + + if not 200 <= r.status <= 299: + raise ApiException(http_resp=r) + + return r + + def GET(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("GET", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def HEAD(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("HEAD", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def OPTIONS(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("OPTIONS", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def DELETE(self, url, headers=None, query_params=None, body=None, + _preload_content=True, _request_timeout=None): + return self.request("DELETE", url, + headers=headers, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def POST(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("POST", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PUT(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PUT", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PATCH(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PATCH", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + +class ApiException(Exception): + + def __init__(self, status=None, reason=None, http_resp=None): + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\n"\ + "Reason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format( + self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index eb9678ac73b0c49a84ad606fc085a60cf140b35a..62096a926603de844fb2581616ffdac33080f368 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -33,6 +33,8 @@ from hashlib import sha256 import caosdb as db from lxml import etree +import tempfile + def put_in_container(stuff): if isinstance(stuff, list): @@ -44,11 +46,23 @@ def put_in_container(stuff): return stuff +def cleanXML(xml): + # remove transaction benchmark + props = xml.findall('TransactionBenchmark') + + for prop in props: + parent = prop.find("..") + parent.remove(prop) + + return xml + + def get_pretty_xml(cont): cont = put_in_container(cont) + xml = cont.to_xml(local_serialization=True) + cleanXML(xml) - return etree.tounicode(cont.to_xml( - local_serialization=True), pretty_print=True) + return etree.tounicode(xml, pretty_print=True) class AbstractCache(ABC): @@ -110,7 +124,8 @@ class AbstractCache(ABC): current_schema = 1 if current_schema > self.get_cache_schema_version(): - raise RuntimeError("Cache is corrupt or was created with a future version of this program.") + raise RuntimeError( + "Cache is corrupt or was created with a future version of this program.") elif current_schema < self.get_cache_schema_version(): raise RuntimeError("Cache version too old.") @@ -319,7 +334,11 @@ class UpdateCache(AbstractCache): return "/tmp/crawler_update_cache.db" def __init__(self, db_file=None, force_creation=False): - super().__init__(db_file, force_creation) + if db_file is None: + tmppath = tempfile.gettempdir() + tmpf = os.path.join(tmppath, "crawler_update_cache.db") + db_file = tmpf + super().__init__(db_file=db_file, force_creation) @staticmethod def get_previous_version(cont): @@ -335,13 +354,16 @@ class UpdateCache(AbstractCache): return old_ones def insert(self, cont, run_id): - """ insert a pending, unauthorized update + """Insert a pending, unauthorized update - Parameters: - ----------- + + Parameters + ---------- cont: Container with the records to be updated containing the desired version, i.e. the state after the update. - run_id: the id of the crawler run + + run_id: int + The id of the crawler run """ cont = put_in_container(cont) old_ones = UpdateCache.get_previous_version(cont) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 54a6b8093748a2e9ee5986bf5efe2e9f0dbdb078..4a9f955a17fc429deb6cdd10c3645700e579b4df 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -1,13 +1,15 @@ #!/usr/bin/env python # encoding: utf-8 # -# ** header v3.0 # This file is a part of the CaosDB Project. # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2019-2022 IndiScale GmbH <info@indiscale.com> # Copyright (C) 2019,2020 Henrik tom Wörden -# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2020-2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2021 University Medical Center Göttingen, Institute for Medical Informatics +# Copyright (C) 2021 Florian Spreckelsen <florian.spreckelsen@med.uni-goettingen.de> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -21,8 +23,6 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header """ Defines how something that shall be inserted into CaosDB is treated. CaosDB can automatically be filled with Records based on some structure, a file @@ -44,7 +44,9 @@ from abc import ABCMeta, abstractmethod from datetime import datetime import caosdb as db -from caosdb.exceptions import AmbiguityException, EntityDoesNotExistError +from caosdb.common.models import Entity +from caosdb.exceptions import (BadQueryError, EmptyUniqueQueryError, + QueryNotUniqueError, TransactionError) from .datamodel_problems import DataModelProblems from .guard import global_guard as guard @@ -73,12 +75,12 @@ def get_entity(name): def get_property(name): - """Returns the record type with a given name, preferably from a local + """Returns the property with a given name, preferably from a local cache. If the local cache does not contain the record type, try to - retrieve it from CaosDB. If it does not exist, add it to the data - model problems + retrieve it from CaosDB. If it does not exist, see whether it + could be a record type used as a property. """ @@ -87,7 +89,7 @@ def get_property(name): prop = db.execute_query("FIND Property with name='{}'".format( name), unique=True) - except (EntityDoesNotExistError, AmbiguityException): + except (EmptyUniqueQueryError, QueryNotUniqueError): # Property might actually be a RecordTypes prop = get_recordtype(name) PROPERTIES[name] = prop @@ -125,7 +127,7 @@ def get_recordtype(name): try: rec = db.execute_query("FIND RecordType WITH name='{}'".format(name), unique=True) - except (EntityDoesNotExistError, AmbiguityException) as e: + except (EmptyUniqueQueryError, QueryNotUniqueError) as e: DataModelProblems.add(name) raise e RECORDTYPES[name] = rec @@ -148,13 +150,24 @@ fileguide = FileGuide() class AbstractCFood(object, metaclass=ABCMeta): + """ Abstract base class for Crawler food (CFood).""" def __init__(self, item): - """ Abstract base class for Crawler food (CFood).""" + """A CFood has two main methods which must be customized: + + 1. `create_identifiables` + This method defines (and inserts if necessary) the identifiables which may be updated at a + later stage. After calling this method, the `identifiables` Container contains those + Records which will be updated at a later time. + + 2. `update_identifiables` + This method updates the stored identifiables as necessary. + """ self.to_be_updated = db.Container() self.identifiables = db.Container() self.item = item self.attached_items = [] + self.update_flags = {} @abstractmethod def create_identifiables(self): @@ -264,7 +277,7 @@ def get_entity_for_path(path): FILES[path] = db.execute_query(q, unique=True) return FILES[path] - except EntityDoesNotExistError: + except BadQueryError: path_prefix = "**" if not path.startswith("/"): @@ -294,7 +307,7 @@ class AbstractFileCFood(AbstractCFood): super().__init__(*args, item=crawled_path, **kwargs) self._crawled_file = None self.crawled_path = crawled_path - self.match = re.match(type(self).get_re(), crawled_path) + self.match = re.match(self.get_re(), crawled_path) self.attached_filenames = [] @property @@ -305,7 +318,31 @@ class AbstractFileCFood(AbstractCFood): return self._crawled_file @staticmethod - def get_re(): + def re_from_extensions(extensions): + """Return a regular expression which matches the given file extensions. + + Useful for inheriting classes. + + Parameters + ---------- + extensions : iterable<str> + An iterable with the allowed extensions. + + Returns + ------- + out : str + The regular expression, starting with ``.*\\.`` and ending with the EOL dollar + character. The actual extension will be accessible in the + :py:attribute:`pattern group name<python:re.Pattern.groupindexe>` ``ext``. + """ + + if not extensions: + return None + + return r".*\.(?P<ext>" + "|".join(extensions) + ")$" + + @classmethod + def get_re(cls): """ Returns the regular expression used to identify files that shall be processed @@ -349,19 +386,22 @@ class AbstractFileCFood(AbstractCFood): def assure_object_is_in_list(obj, containing_object, property_name, - to_be_updated, datatype=None): - """ - Checks whether `obj` is one of the values in the list property - `property_name` of the supplied entity containing_object`. + to_be_updated=None, datatype=None): + """Checks whether `obj` is one of the values in the list property + `property_name` of the supplied entity `containing_object`. - If this is the case this function returns. Otherwise the entity is added to - the property `property_name` and the entity `containing_object` is added to - the supplied list to_be_updated in order to indicate, that the entity - `containing_object` should be updated. + If this is the case this function returns. Otherwise the entity is + added to the property `property_name` and the entity + `containing_object` is added to the supplied list to_be_updated in + order to indicate, that the entity `containing_object` should be + updated. If none is submitted the update will be conducted + in-place. - If the property is missing, it is added first and then the entity is added. + If the property is missing, it is added first and then the entity + is added/updated. If obj is a list, every element is added + """ if datatype is None: @@ -370,12 +410,15 @@ def assure_object_is_in_list(obj, containing_object, property_name, if containing_object.get_property(property_name) is None: containing_object.add_property(property_name, value=[], datatype=datatype) + # TODO: case where multiple times the same property exists is not treated - if not isinstance(containing_object.get_property(property_name).value, list): - containing_object.get_property(property_name).value = [ - containing_object.get_property(property_name).value] - containing_object.get_property(property_name).datatype = datatype - current_list = containing_object.get_property(property_name).value + list_prop = containing_object.get_property(property_name) + if list_prop.value is None: + list_prop.value = [] + elif not isinstance(list_prop.value, list): + list_prop.value = [list_prop.value] + list_prop.datatype = datatype + current_list = list_prop.value if not isinstance(obj, list): objects = [obj] @@ -409,7 +452,12 @@ def assure_object_is_in_list(obj, containing_object, property_name, update = True if update: - to_be_updated.append(containing_object) + if to_be_updated is not None: + to_be_updated.append(containing_object) + else: + get_ids_for_entities_with_names([containing_object]) + + guard.safe_update(containing_object) def assure_special_is(entity, value, kind, to_be_updated=None, force=False): @@ -615,8 +663,20 @@ def assure_has_property(entity, name, value, to_be_updated=None, if isinstance(value, db.Entity): value = value.id + if isinstance(value, list): + value = [i.id if isinstance(i, db.Entity) else i for i in value] + for el in possible_properties: - if el.value == value: + tmp_value = el.value + + if isinstance(tmp_value, db.Entity): + tmp_value = el.value.id + + if isinstance(tmp_value, list): + tmp_value = [i.id if isinstance( + i, db.Entity) else i for i in tmp_value] + + if tmp_value == value: contained = True break @@ -750,10 +810,12 @@ class RowCFood(AbstractCFood): for key, value in self.item.iteritems(): if key in self.unique_cols: continue - rec.add_property(key, value) + assure_property_is(rec, key, + value, + to_be_updated=self.to_be_updated) -class CMeal(object): +class CMeal(): """ CMeal groups equivalent items and allow their collected insertion. @@ -781,12 +843,23 @@ class CMeal(object): matching_groups = [] def __init__(self): + self.item = None + # FIXME is this only necessary, because of inconsistent use of super().__init__()? + if "match" not in self.__dict__: + self.match = None self.__class__.existing_instances.append(self) + @staticmethod + def get_re(): + raise NotImplementedError("Subclasses must implement this function.") + @classmethod def all_groups_equal(cls, m1, m2): equal = True + if m2 is None: + return False + for group in cls.matching_groups: if (group not in m1.groupdict() or group not in m2.groupdict() or @@ -818,5 +891,5 @@ class CMeal(object): if match is None: return False - else: - return self.all_groups_equal(match, self.match) + + return self.all_groups_equal(match, self.match) diff --git a/src/caosadvancedtools/cfoods/__init__.py b/src/caosadvancedtools/cfoods/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30ce05add09a223c2f65dbe187a6cfb1768d7a22 --- /dev/null +++ b/src/caosadvancedtools/cfoods/__init__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Specialized CFoods.""" diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf9d0baefa435b71eeaeefe63a9b018faabe7ea --- /dev/null +++ b/src/caosadvancedtools/cfoods/h5.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020,2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021 Alexander Kreft +# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity, +# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""A CFood for hdf5 files + + +This module allows to parse hdf5 files and reproduce their structure in form +of Records that reference each other. + +hdf5 files are composed of groups and datasets. Both of which can have +attributes. Groups and datasets are mapped to Records and attributes to +Properties. +""" + +import re +from copy import deepcopy + +import caosdb as db +import h5py +import numpy as np +from caosadvancedtools.cfood import fileguide +from caosdb.common.datatype import is_reference +from caosdb.common.utils import uuid + +from ..cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_property_is) +from ..structure_mapping import (EntityMapping, collect_existing_structure, + update_structure) + + +def h5_attr_to_property(val): + """ returns the value and datatype of a CaosDB Property for the given value + + + 1d arrays are converted to lists + If no suitable Property can be created (None, None) is returned. + + 2d and higher dimensionality arrays are being ignored. + """ + + if isinstance(val, str): + return val, db.TEXT + elif isinstance(val, complex): + return val, db.TEXT + else: + if not hasattr(val, 'dtype'): + raise NotImplementedError("Code assumes only str are missing the" + "dtype attribute") + + if issubclass(val.dtype.type, np.floating): + dtype = db.DOUBLE + elif issubclass(val.dtype.type, np.integer): + dtype = db.INTEGER + elif val.dtype.kind in ['S', 'U']: + dtype = db.TEXT + val = val.astype(str) + elif val.dtype.kind == 'O': + if not np.all([isinstance(el, str) for el in val]): + raise NotImplementedError("Cannot convert arbitrary objects") + dtype = db.TEXT + val = val.astype(str) + else: + raise NotImplementedError("Unknown dtype used") + + if isinstance(val, np.ndarray): + if val.ndim > 1: + return None, None + # The tolist method is on both numpy.ndarray and numpy.generic + # and properly converts scalars (including 0-dimensional + # numpy.ndarray) to Python scalars and 1D arrays to lists of + # Python scalars. + if val.ndim != 0: + dtype = db.LIST(dtype) + val = val.tolist() + + # TODO this can eventually be removed + + if(hasattr(val, 'ndim')): + if not isinstance(val, np.ndarray) and val.ndim != 0: + print(val, val.ndim) + raise Exception( + "Implementation assumes that only np.arrays have ndim.") + + return val, dtype + + +class H5CFood(AbstractFileCFood): + """ H5CFood which consumes a HDF5 file. + + The structure is mapped onto an equivalent structure of interconnected + Records. + + Attributes + ---------- + h5file : h5py.File, default None + Name of the hdf5-file to read + """ + + # to be overwritten by subclasses + + def __init__(self, *args, **kwargs): + """CFood which consumes HDF5 files.""" + super().__init__(*args, **kwargs) + self.h5file = None + self.identifiable_root = None + self.root_name = "root" + self.hdf5Container = db.Container() + self.em = EntityMapping() + + def collect_information(self): + self.h5file = h5py.File(fileguide.access(self.crawled_path), 'r') + + @staticmethod + def get_re(): + """Return a regular expression string to match *.h5, *.nc, *.hdf, *.hdf5.""" + extensions = [ + "h5", + "nc", + "hdf", + "hdf5", + ] + + return AbstractFileCFood.re_from_extensions(extensions) + + def create_identifiables(self): + """Create identifiables out of groups in the HDF5 file. + + This method will call is_identifiable(h5path, h5object) and create_identifiable(h5path, + h5object) on each HDF5 object to decide and actually create the identifiables. + """ + # manually create the identifiable root element: self.identifiable_root + self.structure = self.create_structure(self.h5file, + special_treatment=self.special_treatment, + root_name=self.root_name) + + def update_identifiables(self): + """Check if the identifiables need to be updated. + + In that case also add the updated entities to the list of updateables. + + This method will iterate over the groups and datasets governed by this CFood's identifiables + and call ``update_object(path, h5object)`` on each object. + + """ + + self.structure._cuid = "root element" + self.em.add(self.structure, self.identifiable_root) + collect_existing_structure(self.structure, self.identifiable_root, + self.em) + self.to_be_inserted = db.Container() + self.insert_missing_structure(self.structure) + + # TODO this is a workaround due to the fact that the caosdb library + # changes the objects in the Container if it is inserted. The graph + # structure is flattened. I.e. references to other entity objects are + # replaced with their IDs. However this code depends on this graph. + tmp_copy = deepcopy(self.to_be_inserted) + tmp_copy.insert() + + for e1, e2 in zip(tmp_copy, self.to_be_inserted): + e2.id = e1.id + # End workaround + + # self.update_structure(self.structure) + update_structure(self.em, self.to_be_updated, self.structure) + + def special_treatment(self, key, value, dtype): + """define special treatment of attributes + + to be overwritten by child classes. + + key: attribute name + value: attribute value + """ + + return key, value, dtype + + @classmethod + def create_structure(cls, h5obj, create_recordTypes=False, collection=None, + special_treatment=None, root_name="root"): + """Create Records and Record types from a given hdf5-object for all + items in the tree. Attributes are added as properties, the + values only if the dimension < 2. + + Parameters + ---------- + h5obj : h5py.File + a hdf5-file object + + root_name : name that is used instead of '/' + Type of the root Record (the Record corresponding to + the root node in the HDF5 file) + + Returns + ------- + rec : db.Container + Contains the Record Types, Records and Properties for the + input-tree + + """ + + if collection is None: + collection = [] + + if special_treatment is None: + def special_treatment(x, y, z): return x, y, z + + if h5obj.name == "/": + name_without_path = root_name + else: + name_without_path = h5obj.name.split("/")[-1] + + if create_recordTypes: + rec = db.RecordType(name=name_without_path) + else: + rec = db.Record().add_parent(name=name_without_path) + collection.append(rec) + + if isinstance(h5obj, h5py.Group): + for subgroup in h5obj.keys(): + subgroup_name = h5obj[subgroup].name.split("/")[-1] + + sub = H5CFood.create_structure(h5obj[subgroup], + create_recordTypes=create_recordTypes, + collection=collection, + special_treatment=special_treatment) + + if create_recordTypes: + rec.add_property(subgroup_name) + else: + rec.add_property(subgroup_name, value=sub) + + for key, val in h5obj.attrs.items(): + # ignored + + if key in ["REFERENCE_LIST", "DIMENSION_LIST", "NAME", "CLASS"]: + continue + + val, dtype = h5_attr_to_property(val) + + if val is None and dtype is None: + continue + + if create_recordTypes and key.lower() not in ['description']: + treated_k, _, treated_dtype = special_treatment( + key, val, dtype) + + if treated_k is not None: + prop = db.Property(name=treated_k, datatype=treated_dtype) + collection.append(prop) + rec.add_property(name=treated_k) + else: + treated_k, treated_v, treated_dtype = special_treatment( + key, val, dtype) + + if treated_k is not None: + rec.add_property(name=treated_k, value=treated_v, + datatype=treated_dtype) + + return rec + + def insert_missing_structure(self, target_structure: db.Record): + if target_structure._cuid not in self.em.to_existing: + self.to_be_inserted.append(target_structure) + + for prop in target_structure.get_properties(): + if prop.is_reference(server_retrieval=True): + self.insert_missing_structure(prop.value) diff --git a/src/caosadvancedtools/collect_datamodel.py b/src/caosadvancedtools/collect_datamodel.py index 1ca68068e713dd34ebc3368ad760461578dee4ef..806d15333cac7f745ce2fb82a02e0214ad2b6616 100644 --- a/src/caosadvancedtools/collect_datamodel.py +++ b/src/caosadvancedtools/collect_datamodel.py @@ -26,14 +26,19 @@ import argparse import os import caosdb as db +from caosdb.apiutils import retrieve_entities_with_ids + +from export_related import export def get_dm(): - rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")]) + rts = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM RECORDTYPE")]) if None in rts: rts.remove(None) - ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")]) + ps = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM PROPERTY")]) if None in ps: ps.remove(None) @@ -47,18 +52,26 @@ def get_parser(): "be stored") p.add_argument("-c", "--compare", help="directory where the datamodel that" " shall be compared is stored") + p.add_argument("-x", "--xml", action="store_true", + help="store xml as well") return p -def store(directory): +def store(directory, xml=False): rts, ps = get_dm() os.makedirs(directory, exist_ok=True) with open(os.path.join(directory, "recordtypes.txt"), "w") as fi: - fi.write(",".join(rts)) + fi.write(",".join([el[1] for el in rts])) with open(os.path.join(directory, "properties.txt"), "w") as fi: - fi.write(",".join(ps)) + fi.write(",".join([el[1] for el in ps])) + + if xml: + cont = retrieve_entities_with_ids( + [el[0] for el in rts]+[el[0] for el in ps]) + + export(cont, directory) def load_dm(directory): @@ -104,7 +117,7 @@ if __name__ == "__main__": args = p.parse_args() if args.store: - store(args.store) + store(args.store, xml=args.xml) if args.compare: compare(args.compare) diff --git a/src/caosadvancedtools/converter/labfolder_api.py b/src/caosadvancedtools/converter/labfolder_api.py index 567ee5a8aa7fdb1176fcbcc9bff96dfb6a19b821..cf57c0155a3b3970834abb2fc1058215ef7ecba8 100644 --- a/src/caosadvancedtools/converter/labfolder_api.py +++ b/src/caosadvancedtools/converter/labfolder_api.py @@ -28,7 +28,7 @@ import time import html2text import caosdb as db -from labfolder.connection import configure_connection +from labfolder.connection import configure_connection # pylint: disable=import-error class Importer(object): @@ -82,7 +82,7 @@ class Importer(object): p = db.Property(name=element['title'], unit=element['unit'], datatype=db.DOUBLE) try: p.insert() - except db.exceptions.EntityError as e: + except db.exceptions.TransactionError as e: print(e) return diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index b676f708567e39226e53c69b066da7210f2ff34b..0cd692fcd49cc44ba5dc5294fa973061c2e85c29 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -48,7 +48,7 @@ from datetime import datetime from sqlite3 import IntegrityError import caosdb as db -from caosdb.exceptions import TransactionError +from caosdb.exceptions import BadQueryError from .cache import Cache, UpdateCache, get_pretty_xml from .cfood import RowCFood, add_files, get_ids_for_entities_with_names @@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard +from .serverside.helper import send_mail as main_send_mail from .suppressKnown import SuppressKnown logger = logging.getLogger(__name__) @@ -65,6 +66,82 @@ def separated(text): return "-"*60 + "\n" + text +def apply_list_of_updates(to_be_updated, update_flags={}, + update_cache=None, run_id=None): + """Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB + after removing possible duplicates. If a chace is provided, uauthorized + updates can be cached for further authorization. + + Parameters: + ----------- + to_be_updated : db.Container + Container with the entities that will be updated. + update_flags : dict, optional + Dictionary of CaosDB server flags that will be used for the + update. Default is an empty dict. + update_cache : UpdateCache or None, optional + Cache in which the intended updates will be stored so they can be + authorized afterwards. Default is None. + run_id : String or None, optional + Id with which the pending updates are cached. Only meaningful if + `update_cache` is provided. Default is None. + """ + + if len(to_be_updated) == 0: + return + + get_ids_for_entities_with_names(to_be_updated) + + # remove duplicates + tmp = db.Container() + + for el in to_be_updated: + if el not in tmp: + tmp.append(el) + + to_be_updated = tmp + + info = "UPDATE: updating the following entities\n" + + baseurl = db.configuration.get_config()["Connection"]["url"] + + def make_clickable(txt, id): + return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt) + + for el in to_be_updated: + info += str("\t" + make_clickable(el.name, el.id) + if el.name is not None + else "\t" + make_clickable(str(el.id), el.id)) + info += "\n" + logger.info(info) + + logger.debug(to_be_updated) + try: + if len(to_be_updated) > 0: + logger.info( + "Updating {} Records...".format( + len(to_be_updated))) + guard.safe_update(to_be_updated, unique=False, + flags=update_flags) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) + except ProhibitedException: + try: + update_cache.insert(to_be_updated, run_id) + except IntegrityError as e: + logger.warning( + "There were problems with the update of {}.".format( + to_be_updated), + extra={"identifier": str(to_be_updated), + "category": "update-cache"} + ) + logger.debug(traceback.format_exc()) + logger.debug(e) + except Exception as e: + DataModelProblems.evaluate_exception(e) + + class Crawler(object): def __init__(self, cfood_types, use_cache=False, abort_on_exception=True, interactive=True, hideKnown=False, @@ -197,9 +274,14 @@ class Crawler(object): logger.debug("{} matched\n{}.".format( Cfood.__name__, item)) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) + # TODO: Generally: in which cases should exceptions be raised? When is + # errors_occured set to True? The expected behavior must be documented. except Exception as e: try: DataModelProblems.evaluate_exception(e) @@ -228,6 +310,10 @@ class Crawler(object): logger.debug(traceback.format_exc()) logger.debug(e) remove_cfoods.append(cfood) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) + remove_cfoods.append(cfood) except Exception as e: try: DataModelProblems.evaluate_exception(e) @@ -304,16 +390,20 @@ class Crawler(object): if self.interactive and "y" != input("Do you want to continue? (y)"): return - logger.info("Inserting or updating Records...") - for cfood in cfoods: try: cfood.create_identifiables() - self._cached_find_or_insert_identifiables(cfood.identifiables) cfood.update_identifiables() - self.push_identifiables_to_CaosDB(cfood) + apply_list_of_updates( + cfood.to_be_updated, + cfood.update_flags, + update_cache=self.update_cache, + run_id=self.run_id) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) @@ -407,6 +497,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) <script src="{url}/webinterface/js/utif.js"></script> <script src="{url}/webinterface/js/loglevel.js"></script> <script src="{url}/webinterface/js/bootstrap.js"></script> + <script src="{url}/webinterface/js/ext_table_preview.js"></script> <script src="{url}/webinterface/js/webcaosdb.js"></script> <script src="{url}/webinterface/js/plotly.js"></script> <script src="{url}/webinterface/js/caosdb.js"></script> @@ -492,7 +583,6 @@ carefully and if the changes are ok, click on the following link: """.format(url=caosdb_config["Connection"]["url"], filename=filename, changes="\n".join(changes)) - sendmail = caosdb_config["Misc"]["sendmail"] try: fro = caosdb_config["advancedtools"]["crawler.from_mail"] to = caosdb_config["advancedtools"]["crawler.to_mail"] @@ -502,55 +592,14 @@ carefully and if the changes are ok, click on the following link: "'from_mail' and 'to_mail'.") return - p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE) - p.communicate(input=text.encode()) - - def push_identifiables_to_CaosDB(self, cfood): - """ - Updates the to_be_updated Container, i.e. pushes the changes to CaosDB - """ - - if len(cfood.to_be_updated) == 0: - return - - get_ids_for_entities_with_names(cfood.to_be_updated) - - # remove duplicates - tmp = db.Container() - - for el in cfood.to_be_updated: - if el not in tmp: - tmp.append(el) - - cfood.to_be_updated = tmp - - info = "UPDATE: updating the following entities\n" - - for el in cfood.to_be_updated: - info += str("\t" + el.name if el.name is not None else "\t" + - str(el.id)) - info += "\n" - logger.info(info) - - logger.debug(cfood.to_be_updated) - try: - guard.safe_update(cfood.to_be_updated, unique=False) - except ProhibitedException: - try: - self.update_cache.insert(cfood.to_be_updated, self.run_id) - except IntegrityError as e: - logger.warning( - "There were problems with the update of {}.".format( - cfood.to_be_updated), - extra={"identifier": str(cfood.to_be_updated), - "category": "update-cache"} - ) - logger.debug(traceback.format_exc()) - logger.debug(e) - except Exception as e: - DataModelProblems.evaluate_exception(e) + main_send_mail( + from_addr=fro, + to=to, + subject="Crawler Update", + body=text) # TODO remove static? + @staticmethod def find_or_insert_identifiables(identifiables): """ Sets the ids of identifiables (that do not have already an id from the @@ -561,40 +610,51 @@ carefully and if the changes are ok, click on the following link: # looking for matching entities in CaosDB when there is no valid id # i.e. there was none set from a cache + existing = [] + inserted = [] + for ent in identifiables: if ent.id is None or ent.id < 0: logger.debug("Looking for: {}".format( ent.id if ent.id is not None else ent.name)) - existing = Crawler.find_existing(ent) + found = Crawler.find_existing(ent) - if existing is not None: - ent.id = existing.id + if found is not None: + ent.id = found.id else: logger.debug("Id is known of: {}".format(ent)) - # insert missing, i.e. those which are not valid - missing_identifiables = db.Container() - missing_identifiables.extend([ent for ent in identifiables - if ent.id is None or ent.id < 0]) - # TODO the following should not be necessary. Fix it - - for ent in missing_identifiables: - ent.id = None + # insert missing, i.e. those which are not valid + if ent.id is None or ent.id < 0: + missing = ent + ent.id = None + else: + missing = None + existing.append(ent) - if len(missing_identifiables) > 0: - info = "Going to insert the following entities:\n" + if missing: + try: + guard.safe_insert(missing, unique=False, + flags={"force-missing-obligatory": "ignore"}) + inserted.append(ent) + except Exception as e: + DataModelProblems.evaluate_exception(e) + if len(existing) > 0: + info = "Identified the following existing entities:\n" - for ent in missing_identifiables: + for ent in existing: info += str(ent)+"\n" logger.debug(info) + else: + logger.debug("Did not identify any existing entities") + if len(inserted) > 0: + info = "Inserted the following entities:\n" - if len(missing_identifiables) == 0: - logger.debug("No new entities to be inserted.") + for ent in inserted: + info += str(ent)+"\n" + logger.debug(info) else: - try: - guard.safe_insert(missing_identifiables, unique=False) - except Exception as e: - DataModelProblems.evaluate_exception(e) + logger.debug("Did not insert any new entities") logger.debug("Retrieving entities from CaosDB...") identifiables.retrieve(unique=True, raise_exception_on_error=False) @@ -610,6 +670,10 @@ carefully and if the changes are ok, click on the following link: raise ValueError("The identifiable must have at least one parent.") query_string = "FIND Record " + ident.get_parents()[0].name query_string += " WITH " + if ident.name is None and len(ident.get_properties()) == 0: + raise ValueError( + "The identifiable must have features to identify it.") + if ident.name is not None: query_string += "name='{}' AND".format(ident.name) @@ -640,7 +704,8 @@ carefully and if the changes are ok, click on the following link: # is using the unique keyword try: r = q.execute(unique=True) - except TransactionError: + except BadQueryError: + # either empty or ambiguous response r = None # if r is not None: @@ -669,8 +734,8 @@ class FileCrawler(Crawler): @staticmethod def query_files(path): - query_str = "FIND FILE WHICH IS STORED AT " + ( - path if path.endswith("/") else path + "/") + "**" + query_str = "FIND FILE WHICH IS STORED AT '" + ( + path if path.endswith("/") else path + "/") + "**'" q_info = "Sending the following query: '" + query_str + "'\n" files = db.execute_query(query_str) logger.info( diff --git a/src/caosadvancedtools/datamodel_problems.py b/src/caosadvancedtools/datamodel_problems.py index 224744ea763de97b5710e81da93ad9f638c82ea5..df5b7e56dfcc939e2eabf6454cb6e81b22a37727 100644 --- a/src/caosadvancedtools/datamodel_problems.py +++ b/src/caosadvancedtools/datamodel_problems.py @@ -28,6 +28,7 @@ or updating entities with missing parents and/or properties. """ from caosdb.exceptions import (EntityDoesNotExistError, + TransactionError, UnqualifiedParentsError, UnqualifiedPropertiesError) @@ -42,48 +43,51 @@ class DataModelProblems(object): DataModelProblems.missing.add(ent) @staticmethod - def evaluate_exception(e): - """Take an exception, see whether it was caused by datamodel problems, - and update missing parents and/or properties if this was the - case. Otherwise, raise the exception. + def _evaluate_unqualified(e): + """Evaluate all UnqualifiedParentsErrors and + UnqualifiedPropertiesErrors and check for possible datamodel + problems. """ - # type(e) == type(Exception()) seems to be necessary because - # of EntityMultiErrors that are instances of (all? some of?) - # theirchild errors. So isinstance doesn't show the desired - # behavior. + # UnqualifiedParentsErrors are always possible problems: + if isinstance(e, UnqualifiedParentsError): + for err in e.errors: + DataModelProblems.add(err.entity.name) + elif isinstance(e, UnqualifiedPropertiesError): + # Only those UnqualifiedPropertiesErrors that were caused + # by (at least) an EntityDoesNotExistError are possible + # datamodel problems + for err in e.errors: + if isinstance(err, EntityDoesNotExistError): + DataModelProblems.add(err.entity.name) + # If there is at least one UnqualifiedParentsError or at least + # one UnqualifiedPropertiesError on some level below, go + # through the children. + elif (e.has_error(UnqualifiedParentsError) or + e.has_error(UnqualifiedPropertiesError)): + for err in e.errors: + DataModelProblems._evaluate_unqualified(err) + + @staticmethod + def evaluate_exception(e): + """Take a TransactionError, see whether it was caused by datamodel + problems, and update missing parents and/or properties if this + was the case. Afterwards, raise the exception. - if type(e) == type(UnqualifiedPropertiesError()): - for err in e.get_errors(): - # Here, it doesn't matter if there is an actual - # EntityDoesNotExistError or a MultiEntityError - # including an EntityDoesNotExistError. The latter - # case happens when a wrong entity with a value is - # given since then, an EntityHasNoDatatypeError is - # raised as well. Still, the problem is the missing - # property, so this is okay. + Parameters + ---------- + e : TransactionError + TransactionError, the children of which are checked for + possible datamodel problems. - if isinstance(err, EntityDoesNotExistError): - property_missing = True - DataModelProblems.add(err.get_entity().name) - raise e - elif type(e) == type(UnqualifiedParentsError()): - # This is always caused by missing/wrong parents + """ + if not isinstance(e, TransactionError): + raise ValueError( + "Only TransactionErrors can be checked for datamodel problems") + + if (e.has_error(UnqualifiedParentsError) or + e.has_error(UnqualifiedPropertiesError)): + for err in e.errors: + DataModelProblems._evaluate_unqualified(err) - for err in e.get_errors(): - DataModelProblems.add(err.get_entity().name) - raise e - # This is the ugly workaround for a MultiEntityError that - # stems from a UnqualifiedParentsError: an - # EntityDoesNotExistError is raised AND the causing entity has - # type PARENT. - elif ((type(e) == type(EntityDoesNotExistError())) and - ((str(type(e.get_entity()).__name__).upper() == "PARENT"))): - DataModelProblems.add(e.get_entity().name) - raise e - # Evaluate children of real MultiEntityErrors: - elif hasattr(e, "errors") and len(e.get_errors()) > 0: - for err in e.get_errors(): - DataModelProblems.evaluate_exception(err) - else: - raise e + raise e diff --git a/src/caosadvancedtools/example_cfood.py b/src/caosadvancedtools/example_cfood.py index 6111d95defc37bbb6d836feec3fa3d2e4e3d91ab..2e395d5c3030508087e25a7156d35c8954d223d7 100644 --- a/src/caosadvancedtools/example_cfood.py +++ b/src/caosadvancedtools/example_cfood.py @@ -26,8 +26,8 @@ from .cfood import AbstractFileCFood, assure_has_property class ExampleCFood(AbstractFileCFood): - @staticmethod - def get_re(): + @classmethod + def get_re(cls): return (r".*/(?P<species>[^/]+)/" r"(?P<date>\d{4}-\d{2}-\d{2})/README.md") diff --git a/src/caosadvancedtools/export_related.py b/src/caosadvancedtools/export_related.py index 47fe2f4900add818e940fa81466bb9c98a2f0223..69b588c34cc7c8123ab4291f6d8f76f06e7400be 100755 --- a/src/caosadvancedtools/export_related.py +++ b/src/caosadvancedtools/export_related.py @@ -47,6 +47,9 @@ def get_ids_of_related_entities(entity): """ entities = [] + if isinstance(entity, int): + entity = db.Entity(id=entity).retrieve() + for par in entity.parents: entities.append(par.id) @@ -76,32 +79,32 @@ def recursively_collect_related(entity): """ all_entities = db.Container() all_entities.append(entity) - ids = set([entity.id]) - new_entities = [entity] + ids = set() + new_ids = set([entity.id]) - while new_entities: - new_ids = set() + while new_ids: + ids.update(new_ids) - for ent in new_entities: - new_ids.update(get_ids_of_related_entities(ent)) + for eid in list(new_ids): + new_ids.update(get_ids_of_related_entities(eid)) new_ids = new_ids - ids - new_entities = retrieve_entities_with_ids(list(new_ids)) - ids.update([e.id for e in new_entities]) - all_entities.extend(new_entities) - return all_entities + return retrieve_entities_with_ids(list(ids)) def invert_ids(entities): apply_to_ids(entities, lambda x: x*-1) -def export(rec_id, directory="."): +def export_related_to(rec_id, directory="."): if not isinstance(rec_id, int): raise ValueError("rec_id needs to be an integer") ent = db.execute_query("FIND {}".format(rec_id), unique=True) cont = recursively_collect_related(ent) + export(cont, directory=directory) + +def export(cont, directory="."): directory = os.path.abspath(directory) dl_dir = os.path.join(directory, "downloads") @@ -119,6 +122,9 @@ def export(rec_id, directory="."): print("Failed download of:", target) invert_ids(cont) + + for el in cont: + el.version = None xml = etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) @@ -147,4 +153,4 @@ if __name__ == "__main__": parser = defineParser() args = parser.parse_args() - export(args.id, directory=args.directory) + export_related_to(args.id, directory=args.directory) diff --git a/src/caosadvancedtools/import_from_xml.py b/src/caosadvancedtools/import_from_xml.py index 9942a9a9f38de90d62471cc86d32c25d55c9cba9..9d0e03f649db771147915740cabf201fae910760 100755 --- a/src/caosadvancedtools/import_from_xml.py +++ b/src/caosadvancedtools/import_from_xml.py @@ -33,7 +33,7 @@ from tempfile import NamedTemporaryFile import caosdb as db from caosdb.apiutils import apply_to_ids -from caosmodels.data_model import DataModel +from caosadvancedtools.models.data_model import DataModel def create_dummy_file(text="Please ask the administrator for this file."): @@ -57,7 +57,7 @@ def import_xml(filename, rerun=False, interactive=True): tmpfile = create_dummy_file() model = [] - files = [] + files = {} # add files to files list and properties and record types to model @@ -70,19 +70,19 @@ def import_xml(filename, rerun=False, interactive=True): el.file = target else: el.file = tmpfile - files.append(el) + files[el.path] = el if (isinstance(el, db.Property) or isinstance(el, db.RecordType)): model.append(el) # remove entities of the model from the container - for el in model+files: + for el in model+list(files.values()): cont.remove(el) id_mapping = {} - for el in model+files: + for el in model+list(files.values()): id_mapping[el.id] = el # insert/update the model @@ -93,10 +93,10 @@ def import_xml(filename, rerun=False, interactive=True): # insert files if not rerun: - for _, el in enumerate(files): + for _, el in enumerate(files.values()): r = el.insert(unique=False) else: - for _, el in enumerate(files): + for _, el in enumerate(files.values()): el.id = None el.retrieve() diff --git a/src/caosadvancedtools/models/__init__.py b/src/caosadvancedtools/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d70cb810488ba846e5311cbb50991c3eb32bdfad --- /dev/null +++ b/src/caosadvancedtools/models/__init__.py @@ -0,0 +1,2 @@ +"""Submodule for working with data models. +""" diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py new file mode 100644 index 0000000000000000000000000000000000000000..d9079e6196b4751ca86ba41275108330b946d57c --- /dev/null +++ b/src/caosadvancedtools/models/data_model.py @@ -0,0 +1,263 @@ +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# +from copy import deepcopy +# TODO(fspreck) for backwards compatibility with Python < 3.9 but this is +# actually +# [deprecated](https://docs.python.org/3/library/typing.html#typing.List), so +# remove this, when we drop support for old Python versions. +from typing import List + +import caosdb as db +from caosdb.apiutils import compare_entities, describe_diff + + +CAOSDB_INTERNAL_PROPERTIES = [ + "description", + "name", + "unit", +] + + +class DataModel(dict): + """Provides tools for managing a data model. + + When constructing a data model the CaosDB representation can easily be + created using the classes RecordType and Propery, storing them in a + Container and inserting it in CaoSDB. However, this has one drawback: You + cannot simply change someting and update the container. The container will + insist on having valid ids for all contained Entities. + + This class allows you to define your model as easily but also provides you + with a method (`sync_data_model`) that will sync with the data model in an + existing CaosDB instance. + + This is possible because entities, defined in this model, are identified + with entities in CaosDB using names. I.e. a RecordType "Experiment" in this + model will update an existing RecordType with name "Experiment" in CaosDB. + Thus, be carefull not to change existing Entities that were created for a + different purpose (e.g. someone else's experiment). + + DataModel inherits from dict. The keys are always the names of the + entities. Thus you cannot have unnamed entities in your model. + + Example: + + # Create a DataModel with a RecordType and a Property, not assuming any + # relation between the two. + dm = DataModel([db.RecordType(name="myRecordType"), + db.Property(name="myProperty")]) + # Sync the DataModel with the server, so that the server state is consistent + # with this DataModel's content. + dm.sync_data_model() + # Now the DataModel's IDs are the same as on the server. + """ + + def __init__(self, *args): + if len(args) == 1 and hasattr(args[0], '__iter__'): + super().__init__([(e.name, e) for e in args[0]]) + else: + super().__init__(args) + + def append(self, entity: db.Entity): + self[entity.name] = entity + + def extend(self, entities: List[db.Entity]): + for entity in entities: + self.append(entity) + + def sync_data_model(self, noquestion: bool = False, verbose: bool = True): + """Synchronize this DataModel with a CaosDB instance. + + Updates existing entities from the CaosDB instance and inserts + non-existing entities into the instance. Note: This allows to easily + overwrite changes that were made to an existing data model. Use this + function with care and double check its effect. + + Raises + ------ + TransactionError + If one of the involved transactions fails. + + """ + all_entities = self.collect_entities() + tmp_exist = self.get_existing_entities(all_entities) + non_existing_entities = db.Container().extend( + DataModel.entities_without( + self.values(), [e.name.lower() for e in tmp_exist])) + existing_entities = db.Container().extend( + DataModel.entities_without( + self.values(), [e.name.lower() for e in non_existing_entities])) + self.sync_ids_by_name(tmp_exist) + + if len(non_existing_entities) > 0: + if verbose: + print("New entities:") + + for ent in non_existing_entities: + print(ent.name) + + if noquestion or str(input("Do you really want to insert those " + "entities? [y/N] ")).lower() == "y": + non_existing_entities.insert() + self.sync_ids_by_name(non_existing_entities) + if verbose: + print("Updated entities.") + else: + return + else: + if verbose: + print("No new entities.") + + if len(existing_entities) > 0: + if verbose: + print("Inspecting changes that will be made...") + any_change = False + + for ent in existing_entities: + if ent.name in CAOSDB_INTERNAL_PROPERTIES: + # Workaround for the usage of internal properties like name + # in via the extern keyword: + ref = db.Property(name=ent.name).retrieve() + else: + query = db.Query(f"FIND * with id={ent.id}") + ref = query.execute(unique=True) + diff = (describe_diff(*compare_entities(ent, ref + ), name=ent.name)) + + if diff != "": + if verbose: + print(diff) + any_change = True + + if any_change: + if noquestion or input("Do you really want to apply the above " + "changes? [y/N]") == "y": + existing_entities.update() + if verbose: + print("Synchronized existing entities.") + else: + if verbose: + print("No differences found. No update") + else: + if verbose: + print("No existing entities updated.") + + @staticmethod + def get_existing_entities(entities): + """ Return a list with those entities of the supplied iterable that + exist in the CaosDB instance. + + Args + ---- + entities : iterable + The entities to be retrieved. This object will not be moidified. + + Raises + ------ + TransactionError + If the retrieval fails. + """ + container = db.Container().extend(deepcopy(entities)) + valid_entities = [e for e in container.retrieve( + sync=False, raise_exception_on_error=False) if e.is_valid()] + + return valid_entities + + @staticmethod + def entities_without(entities, names): + """ Return a new list with all entities which do *not* have + certain names. + + Parameters + ---------- + entities : iterable + A iterable with entities. + names : iterable of str + Only entities which do *not* have one of these names will end up in + the returned iterable. + + Returns + ------- + list + A list with entities. + """ + newc = [] + + for e in entities: + if e.name.lower() not in names: + newc.append(e) + + return newc + + def sync_ids_by_name(self, valid_entities): + """Add IDs from valid_entities to the entities in this DataModel. + + "By name" means that the valid IDs (from the valid_entities) are + assigned to the entities, their properties in this DataModel by their + names, also parents are replaced by equally named entities in + valid_entities. These changes happen in place to this DataModel! + + Parameters + ---------- + valid_entities : list of Entity + A list (e.g. a Container) of valid entities. + + Returns + ------- + None + + """ + + for valid_e in valid_entities: + for entity in self.values(): + if entity.name.lower() == valid_e.name.lower(): + entity.id = valid_e.id + + # sync properties + + for prop in entity.get_properties(): + + if prop.name.lower() == valid_e.name.lower(): + prop.id = valid_e.id + + # sync parents + + for par in entity.get_parents(): + if par.name.lower() == valid_e.name.lower(): + par._wrap(valid_e) + + def collect_entities(self): + """ Collects all entities: explicitly defined RecordTypes and + Properties and those mentioned as Properties + """ + all_ents = {} + + for ent in self.values(): + all_ents[ent.name] = ent + + for prop in ent.get_properties(): + all_ents[prop.name] = prop + + return list(all_ents.values()) diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..ad149222b5b90671a50943dc00bc9de8074a42f1 --- /dev/null +++ b/src/caosadvancedtools/models/parser.py @@ -0,0 +1,800 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" +This module (and script) provides methods to read a DataModel from a YAML file. + +If a file name is passed to parse_model_from_yaml it is parsed and a DataModel +is created. The yaml file needs to be structured in a certain way which will be +described in the following. + +The file should only contain a dictionary. The keys are the names of +RecordTypes or Properties. The values are again dictionaries describing the +entities. This information can be defined via the keys listed in KEYWORDS. +Notably, properties can be given in a dictionary under the xxxx_properties keys +and will be added with the respective importance. These properties can be +RecordTypes or Properties and can be defined right there. +Every Property or RecordType only needs to be defined once anywhere. When it is +not defined, simply the name can be supplied with no value. +Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs +to be a list with the names. Here, NO NEW entities can be defined. +""" +import json +import re +import sys +import yaml + +from typing import List + +import jsonschema +import caosdb as db + +from .data_model import CAOSDB_INTERNAL_PROPERTIES, DataModel + +# Keywords which are allowed in data model descriptions. +KEYWORDS = ["parent", # deprecated, use inherit_from_* instead: + # https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/36 + "importance", + "datatype", # for example TEXT, INTEGER or REFERENCE + "unit", + "description", + "recommended_properties", + "obligatory_properties", + "suggested_properties", + "inherit_from_recommended", + "inherit_from_suggested", + "inherit_from_obligatory", + "role", + "value", + ] + +# TODO: check whether it's really ignored +# These KEYWORDS are not forbidden as properties, but merely ignored. +KEYWORDS_IGNORED = [ + "unit", +] + +JSON_SCHEMA_ATOMIC_TYPES = [ + "string", + "boolean", + "integer", + "number" +] + + +def _get_listdatatype(dtype): + """matches a string to check whether the type definition is a list + + returns the type within the list or None, if it cannot be matched with a + list definition + """ + # TODO: string representation should be the same as used by the server: + # e.g. LIST<TEXT> + # this should be changed in the module and the old behavour should be + # marked as depricated + match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype) + + if match is None: + return None + else: + return match.group("dt") + +# Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by +# https://stackoverflow.com/users/2572431/augurar + + +class SafeLineLoader(yaml.SafeLoader): + """Load a line and keep meta-information. + + Note that this will add a `__line__` element to all the dicts. + """ + + def construct_mapping(self, node, deep=False): + """Overwritung the parent method.""" + mapping = super().construct_mapping(node, deep=deep) + # Add 1 so line numbering starts at 1 + mapping['__line__'] = node.start_mark.line + 1 + + return mapping +# End of https://stackoverflow.com/a/53647080 + + +class TwiceDefinedException(Exception): + def __init__(self, name): + super().__init__("The Entity '{}' was defined multiple times!".format( + name)) + + +class YamlDefinitionError(RuntimeError): + def __init__(self, line, template=None): + if not template: + template = "Error in YAML definition in line {}." + super().__init__(template.format(line)) + + +class JsonSchemaDefinitionError(RuntimeError): + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + def __init__(self, msg): + super().__init__(msg) + + +def parse_model_from_yaml(filename): + """Shortcut if the Parser object is not needed.""" + parser = Parser() + + return parser.parse_model_from_yaml(filename) + + +def parse_model_from_string(string): + """Shortcut if the Parser object is not needed.""" + parser = Parser() + + return parser.parse_model_from_string(string) + + +def parse_model_from_json_schema(filename: str): + """Return a datamodel parsed from a json schema definition. + + Parameters + ---------- + filename : str + The path of the json schema file that is to be parsed + + Returns + ------- + out : Datamodel + The datamodel generated from the input schema which then can be used for + synchronizing with CaosDB. + + Note + ---- + This is an experimental feature, see ``JsonSchemaParser`` for information + about the limitations of the current implementation. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + parser = JsonSchemaParser() + + return parser.parse_model_from_json_schema(filename) + + +class Parser(object): + def __init__(self): + """Initialize an empty parser object and initialize the dictionary of entities and the list of + treated elements. + + """ + self.model = {} + self.treated = [] + + def parse_model_from_yaml(self, filename): + """Create and return a data model from the given file. + + Parameters + ---------- + filename : str + The path to the YAML file. + + Returns + ------- + out : DataModel + The created DataModel + """ + with open(filename, 'r') as outfile: + ymlmodel = yaml.load(outfile, Loader=SafeLineLoader) + + return self._create_model_from_dict(ymlmodel) + + def parse_model_from_string(self, string): + """Create and return a data model from the given YAML string. + + Parameters + ---------- + string : str + The YAML string. + + Returns + ------- + out : DataModel + The created DataModel + """ + ymlmodel = yaml.load(string, Loader=SafeLineLoader) + + return self._create_model_from_dict(ymlmodel) + + def _create_model_from_dict(self, ymlmodel): + """Create and return a data model out of the YAML dict `ymlmodel`. + + Parameters + ---------- + ymlmodel : dict + The dictionary parsed from a YAML file. + + Returns + ------- + out : DataModel + The created DataModel + """ + + if not isinstance(ymlmodel, dict): + raise ValueError("Yaml file should only contain one dictionary!") + + # Extern keyword: + # The extern keyword can be used to include Properties and RecordTypes + # from existing CaosDB datamodels into the current model. + # Any name included in the list specified by the extern keyword + # will be used in queries to retrieve a property or (if no property exists) + # a record type with the name of the element. + # The retrieved entity will be added to the model. + # If no entity with that name is found an exception is raised. + + if "extern" not in ymlmodel: + ymlmodel["extern"] = [] + + for name in ymlmodel["extern"]: + if name in CAOSDB_INTERNAL_PROPERTIES: + self.model[name] = db.Property(name=name).retrieve() + continue + for role in ("Property", "RecordType", "Record", "File"): + if db.execute_query("COUNT {} {}".format(role, name)) > 0: + self.model[name] = db.execute_query( + "FIND {} WITH name={}".format(role, name), unique=True) + break + else: + raise Exception("Did not find {}".format(name)) + + ymlmodel.pop("extern") + + # add all names to ymlmodel; initialize properties + + for name, entity in ymlmodel.items(): + self._add_entity_to_model(name, entity) + # initialize recordtypes + self._set_recordtypes() + self._check_and_convert_datatypes() + + for name, entity in ymlmodel.items(): + self._treat_entity(name, entity, line=ymlmodel["__line__"]) + + return DataModel(self.model.values()) + + @staticmethod + def _stringify(name, context=None): + """Make a string out of `name`. + + Warnings are emitted for difficult values of `name`. + + Parameters + ---------- + name : + The value to be converted to a string. + + context : obj + Will be printed in the case of warnings. + + Returns + ------- + out : str + If `name` was a string, return it. Else return str(`name`). + """ + + if name is None: + print("WARNING: Name of this context is None: {}".format(context), + file=sys.stderr) + + if not isinstance(name, str): + name = str(name) + + return name + + def _add_entity_to_model(self, name, definition): + """ adds names of Properties and RecordTypes to the model dictionary + + Properties are also initialized. + + name is the key of the yaml element and definition the value. + """ + + if name == "__line__": + return + name = self._stringify(name) + + if name not in self.model: + self.model[name] = None + + if definition is None: + return + + if (self.model[name] is None + and isinstance(definition, dict) + # is it a property + and "datatype" in definition + # but not simply an RT of the model + and not (_get_listdatatype(definition["datatype"]) == name and + _get_listdatatype(definition["datatype"]) in self.model)): + + # and create the new property + self.model[name] = db.Property(name=name, + datatype=definition["datatype"]) + elif (self.model[name] is None and isinstance(definition, dict) + and "role" in definition): + if definition["role"] == "RecordType": + self.model[name] = db.RecordType(name=name) + elif definition["role"] == "Record": + self.model[name] = db.Record(name=name) + elif definition["role"] == "File": + # TODO(fspreck) Implement files at some later point in time + raise NotImplementedError( + "The definition of file objects is not yet implemented.") + + # self.model[name] = db.File(name=name) + elif definition["role"] == "Property": + self.model[name] = db.Property(name=name) + else: + raise RuntimeError("Unknown role {} in definition of entity.".format( + definition["role"])) + + # for setting values of properties directly: + if not isinstance(definition, dict): + return + + # add other definitions recursively + for prop_type in ["recommended_properties", + "suggested_properties", "obligatory_properties"]: + + if prop_type in definition: + # Empty property mapping should be allowed. + + if definition[prop_type] is None: + definition[prop_type] = {} + try: + for n, e in definition[prop_type].items(): + if n == "__line__": + continue + self._add_entity_to_model(n, e) + except AttributeError as ate: + if ate.args[0].endswith("'items'"): + line = definition["__line__"] + + if isinstance(definition[prop_type], list): + line = definition[prop_type][0]["__line__"] + raise YamlDefinitionError(line) from None + raise + + def _add_to_recordtype(self, ent_name, props, importance): + """Add properties to a RecordType. + + Parameters + ---------- + ent_name : str + The name of the entity to which the properties shall be added. + + props : dict [str -> dict or :doc:`Entity`] + The properties, indexed by their names. Properties may be given as :doc:`Entity` objects + or as dictionaries. + + importance + The importance as used in :doc:`Entity.add_property`. + + Returns + ------- + None + + """ + + for n, e in props.items(): + if n in KEYWORDS: + if n in KEYWORDS_IGNORED: + continue + raise YamlDefinitionError("Unexpected keyword in line {}: {}".format( + props["__line__"], n)) + + if n == "__line__": + continue + n = self._stringify(n) + + if isinstance(e, dict): + if "datatype" in e and _get_listdatatype(e["datatype"]) is not None: + # Reuse the existing datatype for lists. + datatype = db.LIST(_get_listdatatype(e["datatype"])) + else: + # Ignore a possible e["datatype"] here if it's not a list + # since it has been treated in the definition of the + # property (entity) already + datatype = None + if "value" in e: + value = e["value"] + else: + value = None + + else: + value = e + datatype = None + + self.model[ent_name].add_property(name=n, + value=value, + importance=importance, + datatype=datatype) + + def _inherit(self, name, prop, inheritance): + if not isinstance(prop, list): + raise YamlDefinitionError("Parents must be a list, error in line {}".format( + prop["__line__"])) + + for pname in prop: + if not isinstance(pname, str): + raise ValueError("Only provide the names of parents.") + self.model[name].add_parent(name=pname, inheritance=inheritance) + + def _treat_entity(self, name, definition, line=None): + """Parse the definition and the information to the entity.""" + + if name == "__line__": + return + name = self._stringify(name) + + try: + if definition is None: + return + + # for setting values of properties directly: + if not isinstance(definition, dict): + return + + if ("datatype" in definition + and definition["datatype"].startswith("LIST")): + + return + + if name in self.treated: + raise TwiceDefinedException(name) + + for prop_name, prop in definition.items(): + if prop_name == "__line__": + continue + line = definition["__line__"] + + if prop_name == "unit": + self.model[name].unit = prop + + elif prop_name == "value": + self.model[name].value = prop + + elif prop_name == "description": + self.model[name].description = prop + + elif prop_name == "recommended_properties": + self._add_to_recordtype( + name, prop, importance=db.RECOMMENDED) + + for n, e in prop.items(): + self._treat_entity(n, e) + + elif prop_name == "obligatory_properties": + self._add_to_recordtype( + name, prop, importance=db.OBLIGATORY) + + for n, e in prop.items(): + self._treat_entity(n, e) + + elif prop_name == "suggested_properties": + self._add_to_recordtype( + name, prop, importance=db.SUGGESTED) + + for n, e in prop.items(): + self._treat_entity(n, e) + + # datatype is already set + elif prop_name == "datatype": + continue + + # role has already been used + elif prop_name == "role": + continue + + elif prop_name == "inherit_from_obligatory": + self._inherit(name, prop, db.OBLIGATORY) + elif prop_name == "inherit_from_recommended": + self._inherit(name, prop, db.RECOMMENDED) + elif prop_name == "inherit_from_suggested": + self._inherit(name, prop, db.SUGGESTED) + + else: + raise ValueError("invalid keyword: {}".format(prop_name)) + except AttributeError as ate: + if ate.args[0].endswith("'items'"): + raise YamlDefinitionError(line) from None + except Exception as e: + print("Error in treating: "+name) + raise e + self.treated.append(name) + + def _check_and_convert_datatypes(self): + """ checks if datatype is valid. + datatype of properties is simply initialized with string. Here, we + iterate over properties and check whether it is a base datatype of a + name that was defined in the model (or extern part) + + the string representations are replaced with caosdb objects + + """ + + for key, value in self.model.items(): + + if isinstance(value, db.Property): + dtype = value.datatype + is_list = False + + if _get_listdatatype(value.datatype) is not None: + dtype = _get_listdatatype(value.datatype) + is_list = True + + if dtype in self.model: + if is_list: + value.datatype = db.LIST(self.model[dtype]) + else: + value.datatype = self.model[dtype] + + continue + + if dtype in [db.DOUBLE, + db.REFERENCE, + db.TEXT, + db.DATETIME, + db.INTEGER, + db.FILE, + db.BOOLEAN]: + + if is_list: + value.datatype = db.LIST(db.__getattribute__( # pylint: disable=no-member + dtype)) + else: + value.datatype = db.__getattribute__( # pylint: disable=no-member + dtype) + + continue + + raise ValueError("Property {} has an unknown datatype: {}".format( + value.name, value.datatype)) + + def _set_recordtypes(self): + """ properties are defined in first iteration; set remaining as RTs """ + + for key, value in self.model.items(): + if value is None: + self.model[key] = db.RecordType(name=key) + + +class JsonSchemaParser(Parser): + """Extends the yaml parser to read in datamodels defined in a json schema. + + **EXPERIMENTAL:** While this calss can already be used to create data models + from basic json schemas, there are the following limitations and missing + features: + + * Due to limitations of json-schema itself, we currently do not support + inheritance in the imported data models + * The same goes for suggested properties of RecordTypes + * Currently, ``$defs`` and ``$ref`` in the input schema are not resolved. + * Already defined RecordTypes and (scalar) Properties can't be re-used as + list properties + * Reference properties that are different from the referenced RT. (Although + this is possible for list of references) + * Values + * Roles + * The extern keyword from the yaml parser + * Currently, a json-schema cannot be transformed into a data model if its + root element isn't a RecordType (or Property) with ``title`` and ``type``. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + + def parse_model_from_json_schema(self, filename: str): + """Return a datamodel created from the definition in the json schema in + `filename`. + + Parameters + ---------- + filename : str + The path to the json-schema file containing the datamodel definition + + Returns + ------- + out : DataModel + The created DataModel + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + with open(filename, 'r') as schema_file: + model_dict = json.load(schema_file) + + return self._create_model_from_dict(model_dict) + + def _create_model_from_dict(self, model_dict: [dict, List[dict]]): + """Parse a dictionary and return the Datamodel created from it. + + The dictionary was typically created from the model definition in a json schema file. + + Parameters + ---------- + model_dict : dict or list[dict] + One or several dictionaries read in from a json-schema file + + Returns + ------- + our : DataModel + The datamodel defined in `model_dict` + """ + # @review Timm Fitschen 2022-02-30 + if isinstance(model_dict, dict): + model_dict = [model_dict] + + for ii, elt in enumerate(model_dict): + if "title" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `title` key word") + if "type" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `type` key word") + # Check if this is a valid Json Schema + try: + jsonschema.Draft202012Validator.check_schema(elt) + except jsonschema.SchemaError as err: + raise JsonSchemaDefinitionError( + f"Json Schema error in {elt['title']}:\n{str(err)}") from err + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + + return DataModel(self.model.values()) + + def _get_atomic_datatype(self, elt): + # @review Timm Fitschen 2022-02-30 + if elt["type"] == "string": + if "format" in elt and elt["format"] in ["date", "date-time"]: + return db.DATETIME + else: + return db.TEXT + elif elt["type"] == "integer": + return db.INTEGER + elif elt["type"] == "number": + return db.DOUBLE + elif elt["type"] == "boolean": + return db.BOOLEAN + else: + raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") + + def _treat_element(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + force_list = False + if name in self.model: + return self.model[name], force_list + if "type" not in elt: + # Each element must have a specific type + raise JsonSchemaDefinitionError( + f"`type` is missing in element {name}.") + if name == "name": + # This is identified with the CaosDB name property as long as the + # type is correct. + if not elt["type"] == "string": + raise JsonSchemaDefinitionError( + "The 'name' property must be string-typed, otherwise it cannot " + "be identified with CaosDB's name property." + ) + return None, force_list + if "enum" in elt: + ent = self._treat_enum(elt, name) + elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: + ent = db.Property( + name=name, datatype=self._get_atomic_datatype(elt)) + elif elt["type"] == "object": + ent = self._treat_record_type(elt, name) + elif elt["type"] == "array": + ent, force_list = self._treat_list(elt, name) + else: + raise NotImplementedError( + f"Cannot parse items of type '{elt['type']}' (yet).") + if "description" in elt and ent.description is None: + # There is a description and it hasn't been set by another + # treat_something function + ent.description = elt["description"] + + self.model[name] = ent + return ent, force_list + + def _treat_record_type(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + rt = db.RecordType(name=name) + if "required" in elt: + required = elt["required"] + else: + required = [] + if "properties" in elt: + for key, prop in elt["properties"].items(): + if "title" in prop: + name = self._stringify(prop["title"]) + else: + name = self._stringify(key) + prop_ent, force_list = self._treat_element(prop, name) + if prop_ent is None: + # Nothing to be appended since the property has to be + # treated specially. + continue + importance = db.OBLIGATORY if key in required else db.RECOMMENDED + if not force_list: + rt.add_property(prop_ent, importance=importance) + else: + # Special case of rt used as a list property + rt.add_property(prop_ent, importance=importance, + datatype=db.LIST(prop_ent)) + + if "description" in elt: + rt.description = elt["description"] + return rt + + def _treat_enum(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + if "type" in elt and elt["type"] == "integer": + raise NotImplementedError( + "Integer-enums are not allowd until " + "https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 " + "has been fixed." + ) + rt = db.RecordType(name=name) + for enum_elt in elt["enum"]: + rec = db.Record(name=self._stringify(enum_elt)) + rec.add_parent(rt) + self.model[enum_elt] = rec + + return rt + + def _treat_list(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + + if "items" not in elt: + raise JsonSchemaDefinitionError( + f"The definition of the list items is missing in {elt}.") + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if "title" not in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + + +if __name__ == "__main__": + model = parse_model_from_yaml('data_model.yml') + print(model) diff --git a/src/caosadvancedtools/pandoc_header_tools.py b/src/caosadvancedtools/pandoc_header_tools.py index 262defd2e46ea1a6fbe80ab6c476bb8f311cc9a5..e746a26ac19c00de4ee7785399ef98478472340c 100644 --- a/src/caosadvancedtools/pandoc_header_tools.py +++ b/src/caosadvancedtools/pandoc_header_tools.py @@ -136,10 +136,10 @@ it is not at the beginning, it must be preceded by a blank line. # If a header section was found: if state == 2: headerlines = [] - for l in textlines[found_1:found_2]: - l = l.replace("\t", " ") - l = l.rstrip() - headerlines.append(l) + for line in textlines[found_1:found_2]: + line = line.replace("\t", " ") + line = line.rstrip() + headerlines.append(line) # try: try: yaml_part = yaml.load("\n".join(headerlines), Loader=yaml.BaseLoader) @@ -156,7 +156,7 @@ it is not at the beginning, it must be preceded by a blank line. else: print("Adding header in: {fn}".format(fn=filename)) add_header(filename) - return _get_header(filename) + return get_header(filename) def save_header(filename, header_data): diff --git a/src/caosadvancedtools/scifolder/__init__.py b/src/caosadvancedtools/scifolder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf753cfc0b72bf95e34edea1301b96ed18f040d0 --- /dev/null +++ b/src/caosadvancedtools/scifolder/__init__.py @@ -0,0 +1,6 @@ +from .analysis_cfood import AnalysisCFood +from .experiment_cfood import ExperimentCFood +from .publication_cfood import PublicationCFood +from .simulation_cfood import SimulationCFood +from .software_cfood import SoftwareCFood +from .result_table_cfood import ResultTableCFood diff --git a/src/caosadvancedtools/scifolder/analysis_cfood.py b/src/caosadvancedtools/scifolder/analysis_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..27cb871aed08f41531c367567ea36ea9a3faaf69 --- /dev/null +++ b/src/caosadvancedtools/scifolder/analysis_cfood.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +class AnalysisCFood(AbstractFileCFood, WithREADME): + _prefix = ".*/DataAnalysis/" + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts","revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def name_beautifier(name): + """ a function that can be used to rename the project. I.e. if + the project in CaosDB shall be named differently than in the folder + structure. + Use discouraged. + """ + + return name + + @staticmethod + def get_re(): + return AnalysisCFood._prefix + full_pattern + + def create_identifiables(self): + # create the project identifiable + name = AnalysisCFood.name_beautifier( + self.match.group("project_identifier")) + self.project = db.Record(name=name) + self.project.add_parent(name=dm.Project) + self.identifiables.append(self.project) + + # create the Analysis identifiable + self.analysis = db.Record() + self.analysis.add_parent(name=dm.Analysis) + self.analysis.add_property(name=dm.date, value=self.match.group("date")) + + self.analysis.add_property(name=dm.Project, value=self.project) + self.identifiables.append(self.analysis) + + if self.match.group("suffix") is not None: + self.analysis.add_property(name=dm.identifier, + value=self.match.group("suffix")) + else: + # TODO empty string causes an error in search + self.analysis.add_property(name=dm.identifier, + value="empty_identifier") + + # parse people and add them to identifiables + # TODO People are currently 'identifiable' due to ther first and last + # names. There will be conflicts + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + assure_has_property(self.analysis, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + assure_object_is_in_list(obj=self.people, + containing_object=self.analysis, + property_name=dm.responsible, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE) + ) + self.reference_included_records(self.analysis, + [RESULTS, SOURCES, SCRIPTS], + to_be_updated=self.to_be_updated + ) + + if SOURCES.key in self.header: + reference_records_corresponding_to_files( + record=self.analysis, + recordtypes=[dm.Experiment, dm.Publication, dm.Simulation, + dm.Analysis], + globs=get_glob(self.header[SOURCES.key]), + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + + self.reference_files_from_header(record=self.analysis) + + if REVISIONOF.key in self.header: + reference_records_corresponding_to_files( + record=self.analysis, + recordtypes=[dm.Analysis], + property_name=dm.revisionOf, + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..38606b5f8ffd372d7bf6f507ed96738d9345f16c --- /dev/null +++ b/src/caosadvancedtools/scifolder/experiment_cfood.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import parse_responsibles, reference_records_corresponding_to_files +from .withreadme import DATAMODEL as dm +from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob + + +class ExperimentCFood(AbstractFileCFood, WithREADME): + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "revisionOf"] + win_paths = [] + + @staticmethod + def name_beautifier(x): return x + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + self.name_map = {}, + + @staticmethod + def get_re(): + return ".*/ExperimentalData/"+full_pattern + + def collect_information(self): + self.find_referenced_files([RESULTS]) + + @staticmethod + def create_identifiable_experiment(match): + # create the project identifiable + name = ExperimentCFood.name_beautifier( + match.group("project_identifier")) + project = db.Record(name=name) + project.add_parent(name=dm.Project) + + experiment = db.Record() + experiment.add_parent(name=dm.Experiment) + experiment.add_property( + name=dm.date, value=match.group("date")) + experiment.add_property(name=dm.Project, value=project) + + if match.group("suffix") is None: + experiment.add_property( + name="identifier", value="empty_identifier") + else: + experiment.add_property(name="identifier", + value=match.group("suffix")) + + return [experiment, project] + + def create_identifiables(self): + self.experiment, self.project = ( + ExperimentCFood.create_identifiable_experiment(self.match)) + + self.identifiables.extend([self.project, self.experiment]) + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + # set description + assure_has_property(self.experiment, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + + # set responsible people + assure_object_is_in_list(self.people, self.experiment, dm.responsible, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + self.reference_files_from_header(record=self.experiment) + + if "revisionOf" in self.header: + reference_records_corresponding_to_files( + record=self.experiment, + recordtypes=[dm.Experiment], + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + property_name=dm.revisionOf, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/generic_pattern.py b/src/caosadvancedtools/scifolder/generic_pattern.py new file mode 100644 index 0000000000000000000000000000000000000000..0b5a4df2063b9639ee6fd018e241d98df8c583d1 --- /dev/null +++ b/src/caosadvancedtools/scifolder/generic_pattern.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +""" this module contains regular expressions neeeded for the standard file +structure """ + + +project_pattern = (r"(?P<project_identifier>" + r"(?P<project_year>\d{4})_?(?P<project_name>((?!/).)*))/") +date_pattern = r"(?P<date>\d{2,4}[-_]\d{1,2}[-_]\d{1,2})" +date_suffix_pattern = r"(_(?P<suffix>(((?!/).)*)))?/" +readme_pattern = r"(readme.md|README.md|readme.xlsx|README.xlsx)$" + +full_pattern = (project_pattern + date_pattern + date_suffix_pattern + # TODO: Additional level are not allowed according to the + # specification. This should be removed or enabled via a + # configuration + + "(.*)" + + readme_pattern) diff --git a/src/caosadvancedtools/scifolder/publication_cfood.py b/src/caosadvancedtools/scifolder/publication_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..fc78e5b759e98e8989c952ccbafeef117e2ed33d --- /dev/null +++ b/src/caosadvancedtools/scifolder/publication_cfood.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, + assure_object_is_in_list, fileguide, + get_entity) +from caosadvancedtools.read_md_header import get_header +from caosadvancedtools.utils import find_records_that_reference_ids + +from .generic_pattern import date_suffix_pattern, readme_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +def folder_to_type(name): + if name == "Theses": + return "Thesis" + elif name == "Articles": + return "Article" + elif name == "Posters": + return "Poster" + elif name == "Presentations": + return "Presentation" + elif name == "Reports": + return "Report" + else: + raise ValueError() + + +class PublicationCFood(AbstractFileCFood, WithREADME): + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts", "revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def get_re(): + # matches anything but "/", i.e. a folder name + _prefix = ".*/Publications/" + _type = r"(?P<type>Theses|Articles|Posters|Presentations|Reports)/" + _partial_date = r"(?P<date>\d{2,4}([-_]\d{1,2}[-_]\d{1,2})?)" + + return _prefix+_type+_partial_date+date_suffix_pattern+readme_pattern + + def create_identifiables(self): + header = get_header(fileguide.access(self.crawled_path)) + self.publication = db.Record(name=self.match.group("date") + + "_"+self.match.group("suffix")) + self.publication.add_parent(name=folder_to_type( + self.match.group("type"))) + self.identifiables.append(self.publication) + + self.people = parse_responsibles(header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + header = get_header(fileguide.access(self.crawled_path)) + self.publication.description = header["description"][0] + + assure_object_is_in_list(self.people, self.publication, + "responsible", + self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + if SOURCES.key in self.header: + reference_records_corresponding_to_files( + record=self.publication, + recordtypes=[dm.Experiment, dm.Publication, dm.Simulation, + dm.Analysis], + globs=get_glob(self.header[SOURCES.key]), + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + self.reference_files_from_header(record=self.publication) + + if REVISIONOF.key in self.header: + reference_records_corresponding_to_files( + record=self.publication, + recordtypes=[dm.Publication], + property_name=dm.revisionOf, + globs=get_glob(self.header[REVISIONOF.key]), + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/result_table_cfood.py b/src/caosadvancedtools/scifolder/result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..deaa2d00118659a9b177a05fe40b19a1793a16fb --- /dev/null +++ b/src/caosadvancedtools/scifolder/result_table_cfood.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import re + +import caosdb as db +import pandas as pd +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from ..cfood import assure_property_is, fileguide +from .experiment_cfood import ExperimentCFood +from .generic_pattern import date_pattern, date_suffix_pattern, project_pattern +from .utils import parse_responsibles, reference_records_corresponding_to_files +from .withreadme import DATAMODEL as dm +from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob + + +# TODO similarities with TableCrawler +class ResultTableCFood(AbstractFileCFood): + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "revisionOf"] + win_paths = [] + table_re = r"result_table_(?P<recordtype>.*).csv$" + property_name_re = re.compile(r"^(?P<pname>.+?)\s*(\[\s?(?P<unit>.*?)\s?\] *)?$") + + @staticmethod + def name_beautifier(x): return x + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.table = pd.read_csv(fileguide.access(self.crawled_path)) + + @staticmethod + def get_re(): + return (".*/ExperimentalData/"+project_pattern + date_pattern + + date_suffix_pattern + ResultTableCFood.table_re) + + def create_identifiables(self): + self.recs = [] + self.experiment, self.project = ( + ExperimentCFood.create_identifiable_experiment(self.match)) + + for idx, row in self.table.iterrows(): + rec = db.Record() + rec.add_parent(self.match.group("recordtype")) + + for col in self.table.columns[:2]: + match = re.match(ResultTableCFood.property_name_re, col) + + if match.group("unit"): + rec.add_property(match.group("pname"), row.loc[col], unit=match.group("unit")) + else: + rec.add_property(match.group("pname"), row.loc[col]) + self.identifiables.append(rec) + self.recs.append(rec) + + self.identifiables.extend([self.project, self.experiment]) + + def update_identifiables(self): + for ii, (idx, row) in enumerate(self.table.iterrows()): + for col in row.index: + match = re.match(ResultTableCFood.property_name_re, col) + assure_property_is(self.recs[ii], match.group("pname"), row.loc[col], to_be_updated=self.to_be_updated) + assure_property_is(self.experiment, self.match.group("recordtype"), + self.recs, to_be_updated=self.to_be_updated, + datatype=db.LIST(self.match.group("recordtype"))) diff --git a/src/caosadvancedtools/scifolder/simulation_cfood.py b/src/caosadvancedtools/scifolder/simulation_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f23f1485d7a1f64dcd940552051d2e1ec5bb07 --- /dev/null +++ b/src/caosadvancedtools/scifolder/simulation_cfood.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import (get_files_referenced_by_field, parse_responsibles, + reference_records_corresponding_to_files) +from .withreadme import DATAMODEL as dm +from .withreadme import (RESULTS, REVISIONOF, SCRIPTS, SOURCES, WithREADME, + get_glob) + + +class SimulationCFood(AbstractFileCFood, WithREADME): + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "sources", "scripts", "revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([RESULTS, SOURCES, SCRIPTS]) + + @staticmethod + def get_re(): + return ".*/SimulationData/" + full_pattern + + def create_identifiables(self): + # create the project identifiable + self.project = db.Record(name=self.match.group("project_identifier")) + self.project.add_parent(name="Project") + self.identifiables.append(self.project) + + self.simulation = db.Record() + # import IPython + # IPython.embed() + self.simulation.add_parent(name="Simulation") + self.simulation.add_property( + name="date", value=self.match.group("date")) + + self.simulation.add_property(name="Project", value=self.project) + + if self.match.group("suffix") is not None: + self.simulation.add_property( + name="identifier", value=self.match.group("suffix")) + else: + # TODO empty string causes an error in search + self.simulation.add_property(name="identifier", + value="empty_identifier") + self.identifiables.append(self.simulation) + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + assure_has_property(self.simulation, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + + # TODO why is here no db.LIST("Person") possible? + + assure_object_is_in_list(self.people, self.simulation, + "responsible", + self.to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + if SOURCES.key in self.header: # pylint: disable=unsupported-membership-test + reference_records_corresponding_to_files( + record=self.simulation, + recordtypes=["Experiment", "Publication", "Simulation", + "Analysis"], + globs=get_glob(self.header[SOURCES.key]), # pylint: disable=unsubscriptable-object + property_name=dm.sources, + path=self.crawled_path, + to_be_updated=self.to_be_updated) + self.reference_files_from_header(record=self.simulation) + + if REVISIONOF.key in self.header: # pylint: disable=unsupported-membership-test + reference_records_corresponding_to_files( + record=self.simulation, + recordtypes=[dm.Software], # pylint: disable=no-member + property_name=dm.revisionOf, + globs=get_glob(self.header[dm.revisionOf]), # pylint: disable=unsubscriptable-object + path=self.crawled_path, + to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/software_cfood.py b/src/caosadvancedtools/scifolder/software_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..77fb46521e9aab875b6f99d0a1ee4ac44177e09c --- /dev/null +++ b/src/caosadvancedtools/scifolder/software_cfood.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2019 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +from itertools import chain + +import caosdb as db +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_parent, + assure_has_property, assure_name_is, + assure_object_is_in_list, get_entity) +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.read_md_header import get_header + +from .generic_pattern import full_pattern +from .utils import get_files_referenced_by_field, parse_responsibles +from .withreadme import BINARIES +from .withreadme import DATAMODEL as dm +from .withreadme import SOURCECODE, WithREADME + + +class SoftwareCFood(AbstractFileCFood, WithREADME): + _prefix = ".*/Software/" + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["binaries", "sourceCode","revisionOf"] + win_paths = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + WithREADME.__init__(self) + + def collect_information(self): + self.find_referenced_files([BINARIES, SOURCECODE]) + + @staticmethod + def get_re(): + + return SoftwareCFood._prefix + full_pattern + + def create_identifiables(self): + # The software is a record type. Let's try to find it. + self.software = db.execute_query( + "FIND RecordType Software with name = {}".format( + self.match.group("project_identifier"))) + + if len(self.software) == 0: + # Software not found insert if allowed + self.software = db.RecordType( + name=self.match.group("project_identifier")) + self.software.add_parent(name="Software") + self.software.add_property(name="alias", + value=self.match.group("project_name")) + guard.safe_insert(self.software) + elif len(self.software) == 1: + self.software = self.software[0] + else: + raise RuntimeError("Cannot identify software record type. Multiple" + "matches for {}".format( + self.match.group("project_identifier"))) + + # create the software version + # identifiable is made from parent and date and suffix + self.softwareversion = db.Record() + self.softwareversion.add_parent(self.software) + self.softwareversion.add_property("date", self.match.group("date")) + + if self.match.group("suffix"): + self.softwareversion.add_property( + "version", self.match.group("suffix")) + + self.identifiables.append(self.softwareversion) + + # parse people and add them to identifiables + # TODO People are currently 'identifiable' with their first and last + # names. There will be conflicts + self.people = parse_responsibles(self.header) + self.identifiables.extend(self.people) + + def update_identifiables(self): + version_name = self.match.group("project_name") + + if self.match.group("suffix"): + version_name += "_"+self.match.group("suffix") + else: + version_name += "_"+self.match.group("date") + + assure_name_is(self.softwareversion, version_name, + to_be_updated=self.to_be_updated) + assure_has_property(self.softwareversion, "description", + self.header["description"][0], + to_be_updated=self.to_be_updated) + assure_object_is_in_list(obj=self.people, + containing_object=self.softwareversion, + property_name="responsible", + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE) + ) + + self.reference_files_from_header(record=self.softwareversion) diff --git a/src/caosadvancedtools/scifolder/utils.py b/src/caosadvancedtools/scifolder/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..afa671af85506a57a06ad5198bec4495823c76f1 --- /dev/null +++ b/src/caosadvancedtools/scifolder/utils.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import logging +import os +from itertools import chain + +import caosdb as db +import pandas as pd +from caosadvancedtools.cfood import assure_object_is_in_list, fileguide +from caosadvancedtools.utils import (find_records_that_reference_ids, + read_field_as_list, + return_field_or_property, + string_to_person) + +logger = logging.getLogger("caosadvancedtools") + + +def parse_responsibles(header): + """ + Extract the responsible person(s) from the yaml header. + + If field responsible is a list every entry from that list will be added as + a person. + Currently only the format <Firstname> <Lastname> <*> is supported. + If it is a simple string, it is added as the only person. + """ + people = [] + + for person in read_field_as_list(header["responsible"]): + people.append(string_to_person(person)) + + return people + + +def get_files_referenced_by_field(globs, prefix="", final_glob=None): + """ + returns all file entities at paths described by given globs + + This function assumes that the supplied globs is a list of + filenames, directories or globs. + + prefix should be the path of the crawled file to supply a context for + relative paths. + """ + referenced_files = [] + globs = [g for g in globs if g is not None] + + for glob in globs: + # TODO extract glob manipulation + + if final_glob is not None and not glob.endswith(final_glob): + glob += final_glob + + if not glob.startswith("/"): + glob = os.path.normpath(os.path.join(prefix, glob)) + else: + glob = os.path.normpath(glob) + + query_string = "FIND file which is stored at {}".format(glob) + logger.debug(query_string) + + el = db.execute_query(query_string) + + referenced_files.append(el) + + return referenced_files + + +def is_filename_allowed(path, recordtype): + if recordtype.lower() == "experiment": + if "ExperimentalData" in path: + return True + elif recordtype.lower() == "analysis": + if "DataAnalysis" in path: + return True + elif recordtype.lower() == "publication": + if "Publication" in path: + return True + elif recordtype.lower() == "simulation": + if "Simulation" in path: + return True + + return False + + +def get_entity_ids_from_include_file(prefix, file_path): + """reads version ids from include file """ + + if not file_path.startswith("/"): + file_path = os.path.normpath(os.path.join(prefix, file_path)) + else: + file_path = os.path.normpath(file_path) + df = pd.read_csv(fileguide.access(file_path), sep="\t", comment="#") + + if "ID" not in df.columns: + raise ValueError("Include file must have an ID column") + + return list(df.ID) + + +def reference_records_corresponding_to_files(record, recordtypes, globs, path, + to_be_updated, property_name): + # TODO this function needs to be refactored: + # the treatement of keys like 'results' should be separated from searching + # entities (see setting of globs and includes below). + + for recordtype in recordtypes: + + directly_named_files = list(chain(*get_files_referenced_by_field( + globs, + prefix=os.path.dirname(path)))) + + files_in_folders = list(chain(*get_files_referenced_by_field( + globs, + prefix=os.path.dirname(path), + final_glob="/**"))) + files = [f for f in directly_named_files + files_in_folders if + is_filename_allowed(f.path, recordtype=recordtype)] + logger.debug("Referenced files:\n" + str(files)) + entities = find_records_that_reference_ids( + list(set([ + fi.id for fi in files])), + rt=recordtype) + logger.debug("Referencing entities:\n" + str(entities)) + + if len(entities) == 0: + continue + else: + assure_object_is_in_list(entities, + record, + property_name, + to_be_updated, + datatype=db.LIST(db.REFERENCE)) + + +def create_files_list(df, ftype): + files = [] + + for indx, src in df.loc[ftype, + pd.notnull(df.loc[ftype])].iteritems(): + desc = df.loc[ftype+" description", indx] + + if pd.notnull(desc): + files.append({'file': src, 'description': desc}) + else: + files.append(src) + + return files + + +def add_value_list(header, df, name): + if name in df.index: + header[name] = list(df.loc[name, pd.notnull(df.loc[name])]) + + +def get_xls_header(filepath): + """ + This function reads an xlsx file and creates a dictionary analogue to the + one created by the yaml headers in README.md files read with the get_header + function of caosdb-advancedtools. + As xlsx files lack the hierarchical structure, the information that can be + provided is less complex. See the possibility to use the xlsx files as a + less powerfull version for people who are not comfortable with the + README.md files. + + The xlsx file has a defined set of rows. In each row a list of entries can + be given. This structure is converted to a dictionary with a fix structure. + """ + + header = {} + + df = pd.read_excel(filepath, index_col=0, header=None) + add_value_list(header, df, "responsible") + add_value_list(header, df, "description") + assert len(header["description"]) <= 1 + + for ftype in ["sources", "scripts", "results", "sourceCode", "binaries"]: + if ftype not in df.index: + continue + files = create_files_list(df, ftype) + + if len(files) > 0: + header[ftype] = files + + add_value_list(header, df, "revisionOf") + # there should be only one revision of + + if "revisionOf" in header: + if len(header["revisionOf"]) > 0: + header["revisionOf"] = header["revisionOf"][0] + add_value_list(header, df, "tags") + + return header diff --git a/src/caosadvancedtools/scifolder/withreadme.py b/src/caosadvancedtools/scifolder/withreadme.py new file mode 100644 index 0000000000000000000000000000000000000000..e1968ba49799827467c7ef93a7070b7f090010fb --- /dev/null +++ b/src/caosadvancedtools/scifolder/withreadme.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + + +import logging +import os +from dataclasses import dataclass + +import caosdb as db +from caosadvancedtools.cfood import (assure_has_description, assure_has_parent, + assure_object_is_in_list, fileguide) +from caosadvancedtools.read_md_header import get_header as get_md_header +from caosadvancedtools.table_importer import (win_path_converter, + win_path_list_converter) +from caosadvancedtools.utils import return_field_or_property + +from .utils import (get_entity_ids_from_include_file, + get_files_referenced_by_field, get_xls_header) + +LOGGER = logging.getLogger("withreadme") +LOGGER.setLevel(level=logging.ERROR) + + +@dataclass +class DataModel(object): + results: str = "results" + scripts: str = "scripts" + sources: str = "sources" + date: str = "date" + Project: str = "Project" + Analysis: str = "Analysis" + identifier: str = "identifier" + responsible: str = "responsible" + revisionOf: str = "revisionOf" + Experiment: str = "Experiment" + Publication: str = "Publication" + Simulation: str = "Simulation" + Analysis: str = "Analysis" + revisionOf: str = "revisionOf" + binaries: str = "binaries" + sourcecode: str = "sourceCode" + description: str = "description" + + +DATAMODEL = DataModel() +dm = DATAMODEL + + +class HeaderField(object): + def __init__(self, key, model): + self.key = key + self.model = model + + +RESULTS = HeaderField("results", dm.results) +SCRIPTS = HeaderField("scripts", dm.scripts) +SOURCES = HeaderField("sources", dm.sources) +FILE = HeaderField("file", None) +INCLUDE = HeaderField("include", None) +REVISIONOF = HeaderField("revisionOf", dm.revisionOf) +BINARIES = HeaderField("binaries", dm.binaries) +SOURCECODE = HeaderField("sourceCode", dm.sourcecode) +DESCRIPTION = HeaderField("description", dm.description) +RECORDTYPE = HeaderField("recordtype", None) + + +def get_glob(field): + """ takes a field which must be a list of globs or dicts. + + if it is a dict, it must have either an include or a file key""" + globs = [] + + if not isinstance(field, list): + field = [field] + + for value in field: + + if isinstance(value, dict) and INCLUDE.key in value: + continue + + globs.append(return_field_or_property(value, FILE.key)) + + return globs + + +def get_description(value): + if isinstance(value, dict) and DESCRIPTION.key in value: + return value[DESCRIPTION.key] + else: + return None + + +def get_rt(value): + if isinstance(value, dict) and RECORDTYPE.key in value: + return value[RECORDTYPE.key] + else: + return None + + +class WithREADME(object): + def __init__(self): + self._header = None + self.ref_files = {} + + @property + def header(self): + if self._header is None: + if self.crawled_path.lower().endswith(".md"): # pylint: disable=no-member + self._header = get_md_header( + fileguide.access(self.crawled_path)) # pylint: disable=no-member + elif self.crawled_path.lower().endswith(".xlsx"): # pylint: disable=no-member + self._header = get_xls_header( + fileguide.access(self.crawled_path)) # pylint: disable=no-member + else: + raise RuntimeError("Readme format not recognized.") + self.convert_win_paths() + + return self._header + + def find_referenced_files(self, fields): + """ iterates over given fields in the header and searches for files + + if the field contains a glob. The file entities are attached""" + + for field in fields: + + if field.key not in self.header: + continue + + globs = get_glob(self.header[field.key]) + files = get_files_referenced_by_field( + globs, prefix=os.path.dirname(self.crawled_path)) # pylint: disable=no-member + + description = [get_description(val) for val in + self.header[field.key]] + recordtype = [get_rt(val) for val in self.header[field.key]] + self.ref_files[field.model] = [ + (f, d, r) for f, d, r in zip(files, description, recordtype)] + # flatten returned list of file lists + flat_list = [f.path for sublist in files + for f in sublist] + + if len(flat_list) == 0: + LOGGER.warn("ATTENTION: the field {} does not reference any " + "known files".format(field.key)) + + self.attached_filenames.extend(flat_list) # pylint: disable=no-member + + def convert_path(self, el): + """ converts the path in el to unix type + + el can be a dict of a string. If el is dict it must have a file key + + returns: same type as el + """ + + if isinstance(el, dict): + if INCLUDE.key in el: + el[INCLUDE.key] = win_path_converter(el[INCLUDE.key]) + + return el + + if FILE.key not in el: + raise ValueError("field should have a 'file' attribute") + el[FILE.key] = win_path_converter(el[FILE.key]) + + return el + else: + return win_path_converter(el) + + def convert_win_paths(self): + for field in self.win_paths: # pylint: disable=no-member + if field in self.header: + + if isinstance(self.header[field], list): + self.header[field] = [ + self.convert_path(el) for el in self.header[field]] + else: + self.header[field] = self.convert_path(self.header[field]) + + def reference_files_from_header(self, record): + """adds properties that reference the files collected in ref_files + + ref_files is expected to be a list of (files, description, recordtype) + tuples, where files is the list of file entities, description the description + that shall be added to each and recordtype the recordtype that the + files shall get as parent. files may be an empty list and description + and recordtype may be None. + + The files will be grouped according to the keys used in ref_files and + the record types. The record types take precedence. + """ + references = {} + + for prop_name, ref_tuple in self.ref_files.items(): + generic_references = [] + + for files, description, recordtype in ref_tuple: + if len(files) == 0: + continue + + if description is not None: + for fi in files: + assure_has_description(fi, description, force=True) + + if recordtype is None: + generic_references.extend(files) + else: + for fi in files: + # fix parent + assure_has_parent(fi, recordtype, force=True, + unique=False) + + if recordtype not in references: + references[recordtype] = [] + references[recordtype].extend(files) + + if len(generic_references) > 0: + assure_object_is_in_list( + generic_references, + record, + prop_name, + to_be_updated=self.to_be_updated, + datatype=db.LIST(db.REFERENCE), + ) + + for ref_type in references.keys(): + assure_object_is_in_list( + references[ref_type], + record, + ref_type, + to_be_updated=self.to_be_updated, # pylint: disable=no-member + ) + + def reference_included_records(self, record, fields, to_be_updated): + """ iterates over given fields in the header and searches for files + + if the field contains a glob. The file entities are attached""" + + for field in fields: + + if field.key not in self.header: # pylint: disable=no-member + continue + included = [] + + for item in self.header[field.key]: # pylint: disable=no-member + if INCLUDE.key in item: + try: + included.extend( + get_entity_ids_from_include_file( + os.path.dirname(self.crawled_path), # pylint: disable=no-member + item[INCLUDE.key])) + except ValueError: + al = logging.getLogger("caosadvancedtools") + al.warning("The include file cannot be read. Please " + "make sure, it contains an 'ID' column." + " The file is ignored." + "\n{}".format(item[INCLUDE.key])) + + assure_object_is_in_list(included, + record, + field.model, + to_be_updated, + datatype=db.LIST(db.REFERENCE)) diff --git a/src/caosadvancedtools/serverside/examples/example_script.py b/src/caosadvancedtools/serverside/examples/example_script.py new file mode 100755 index 0000000000000000000000000000000000000000..d97d2d0d1f936b1c12e857d38fce043f0b514340 --- /dev/null +++ b/src/caosadvancedtools/serverside/examples/example_script.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""An example script that illustrates how scripts can be used in conjunction +with the generic_analysis module. + +The data model needed for this script is: + +Analysis: + sources: REFEERENCE + scripts: FILE + results: REFEERENCE + mean_value: DOUBLE + +Person: + Email: TEXT + +""" + +import argparse +import logging +import sys +from argparse import RawTextHelpFormatter +from datetime import datetime + +import caosdb as db +import matplotlib.pyplot as plt +import numpy as np +from caosadvancedtools.cfood import assure_property_is +from caosadvancedtools.crawler import apply_list_of_updates +from caosadvancedtools.guard import INSERT, UPDATE +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.serverside.helper import send_mail as main_send_mail + +# logging should be done like this in order to allow the caller script to +# direct the output. +logger = logging.getLogger(__name__) + +# allow updates of existing entities +guard.set_level(level=UPDATE) + + +def send_mail(changes: [db.Entity], receipient: str): + """ calls sendmail in order to send a mail to the curator about pending + changes + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + receipient: The person who shall receive the mail. + """ + + caosdb_config = db.configuration.get_config() + text = """Dear Curator, +The following changes where done automatically. + +{changes} + """.format(changes="\n".join(changes)) + try: + fro = caosdb_config["advancedtools"]["automated_updates.from_mail"] + except KeyError: + logger.error("Server Configuration is missing a setting for " + "sending mails. The administrator should check " + "'from_mail'.") + return + + main_send_mail( + from_addr=fro, + to=receipient, + subject="Automated Update", + body=text) + + +def main(args): + + # auth_token is provided by the server side scripting API + # use this token for authentication when creating a new connection + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(auth_token=args.auth_token) + logger.debug("Established connection") + + try: + dataAnalysisRecord = db.Record(id=args.entityid).retrieve() + except db.TransactionError: + logger.error("Cannot retrieve Record with id ={}".format( + args.entityid + )) + + # The script may require certain information to exist. Here, we expect that + # a sources Property exists that references a numpy file. + # Similarly an InputDataSet could be used. + + if (dataAnalysisRecord.get_property("sources") is None + or not db.apiutils.is_reference( + dataAnalysisRecord.get_property("sources"))): + + raise RuntimeError("sources Refenrence must exist.") + + logger.debug("Found required data.") + + # ####### this core might be replaced by a call to another script ####### # + # Download the data + source_val = dataAnalysisRecord.get_property("sources").value + npobj = db.File( + id=(source_val[0] + if isinstance(source_val, list) + else source_val)).retrieve() + npfile = npobj.download() + logger.debug("Downloaded data.") + data = np.load(npfile) + + # Plot data + filename = "hist.png" + plt.hist(data) + plt.savefig(filename) + + mean = data.mean() + # ####################################################################### # + + # Insert the result plot + fig = db.File(file=filename, + path="/Analysis/results/"+str(datetime.now())+"/"+filename) + fig.insert() + + # Add the mean value to the analysis Record + # If such a property existed before, it is changed if necessary. The old + # value will persist in the versioning of LinkAhead + to_be_updated = db.Container() + assure_property_is( + dataAnalysisRecord, + "mean_value", + mean, + to_be_updated=to_be_updated + ) + + # Add the file with the plot to the analysis Record + # If a file was already referenced, the new one will be referenced instead. + # The old file is being kept and is still referenced in an old version of + # the analysis Record. + assure_property_is( + dataAnalysisRecord, + "results", + [fig.id], + to_be_updated=to_be_updated + ) + + if len(to_be_updated) > 0: + print(to_be_updated) + apply_list_of_updates(to_be_updated, update_flags={}) + logger.debug("Update sucessful.") + logger.info("The following Entities were changed:\n{}.".format( + [el.id for el in to_be_updated]) + ) + + # Send mails to people that are referenced. + people = db.execute_query("FIND RECORD Person WHICH IS REFERENCED BY " + "{}".format(dataAnalysisRecord.id)) + for person in people: + if person.get_property("Email") is not None: + send_mail([str(el) for el in to_be_updated], + receipient=person.get_property("Email").value) + logger.debug("Mails send.") + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("--auth-token", + help="Token provided by the server for authentication") + parser.add_argument("entityid", + help="The ID of the DataAnalysis Record.", type=int) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..85d0c860df75fce205c5eaad77731fc04eee9e40 --- /dev/null +++ b/src/caosadvancedtools/serverside/generic_analysis.py @@ -0,0 +1,213 @@ +# encoding: utf-8 +# +# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# See: https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/55 + +# This source file is work in progress and currently untested. + + +""" +Variante I: Python module implementiert eine 'main' function, die einen Record +als Argument entgegennimmt und diesen um z.B. 'results' ergänzt und updated. + +Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line) +und updated das Objekt selbstständig. + +Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde. +Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von +z.B. Parametern) + +Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen. +ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann +der Record leicht erzeugt werden. + + + + "Analyze" "Perform Anlysis" + Knopf an Record Form im WebUI + im WebUI + | | + | | + v v + Winzskript, dass einen + DataAnalysis-Stub erzeugt + | + | + v + execute_script Routine --> AnalysisSkript + erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls + den Pythonmodulenamen notwendig, Email + ^ + | + | + Cronjob findet outdated + DataAnalysis + + +Analyseskript macht update: + - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records) + - spezielle Funktionen sollten verwendet werden + - Logging und informieren muss im Skript passieren + - Skript kann mit subprocess aufgerufen werden (alternative unvollständige + DataAnalysis einfügen) + + +# Features + - Emailversand bei Insert oder Update + - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden + - Debug Info: müsste optional/bei Fehler zur Verfügung stehen. + - Skript/Software version sollte gespeichert werden + + +Outlook: the part of the called scripts that interact with LinkAhead might in +future be replaced by the Crawler. The working directory would be copied to the +file server and then crawled. +""" + +import argparse +import importlib +import logging +import os +import sys + +import caosdb as db +from caosdb.utils.server_side_scripting import run_server_side_script + +logger = logging.getLogger(__name__) + + +def check_referenced_script(record: db.Record): + """ return the name of a referenced script + + If the supplied record does not have an appropriate Property warings are + logged. + """ + + if record.get_property("scripts") is None: + logger.warning("The follwing changed Record is missing the 'scripts' " + "Property:\n{}".format(str(record))) + + return + + script_prop = record.get_property("scripts") + + if not db.apiutils.is_reference(script_prop): + logger.warning("The 'scripts' Property of the following Record should " + "reference a File:\n{}".format(str(record))) + + return + + script = db.execute_query("FIND ENTITY WITH id={}".format( + script_prop.value[0] if isinstance(script_prop.value, list) + else script_prop.value), unique=True) + + if (not isinstance(script, db.File)): + logger.warning("The 'scripts' Property of the Record {} should " + "reference a File. Entity {} is not a File".format( + record.id, script_prop.value)) + + return + + script_name = os.path.basename(script.path) + + return script_name + + +def call_script(script_name: str, record_id: int): + ret = run_server_side_script(script_name, record_id) + + if ret.code != 0: + logger.error("Script failed!") + logger.debug(ret.stdout) + logger.error(ret.stderr) + else: + logger.debug(ret.stdout) + logger.error(ret.stderr) + + +def run(dataAnalysisRecord: db.Record): + """run a data analysis script. + + There are two options: + 1. A python script installed as a pip package. + 2. A generic script that can be executed on the command line. + + Using a python package: + It should be located in package plugin and implement at least + a main function that takes a DataAnalysisRecord as a single argument. + The script may perform changes to the Record and insert and update + Entities. + + Using a generic script: + The only argument that is supplied to the script is the ID of the + dataAnalysisRecord. Apart from the different Argument everything that is + said for the python package holds here. + """ + + if dataAnalysisRecord.get_property("scripts") is not None: + script_name = check_referenced_script(dataAnalysisRecord) + logger.debug( + "Found 'scripts'. Call script '{}' in separate process".format( + script_name) + ) + call_script(script_name, dataAnalysisRecord.id) + logger.debug( + "Script '{}' done.\n-----------------------------------".format( + script_name)) + + if dataAnalysisRecord.get_property("Software") is not None: + mod = dataAnalysisRecord.get_property("Software").value + logger.debug( + "Found 'Software'. Call '{}' as Python module".format( + mod) + ) + m = importlib.import_module(mod) + + m.main(dataAnalysisRecord) + logger.debug( + "'main' function of Python module '{}' done" + ".\n-----------------------------------".format(mod)) + + +def _parse_arguments(): + """ Parses the command line arguments. """ + parser = argparse.ArgumentParser(description='__doc__') + parser.add_argument("--module", help="An id an input dataset.") + parser.add_argument("--inputset", help="An id an input dataset.") + parser.add_argument("--parameterset", help="An id of a parameter record.") + + return parser.parse_args() + + +def main(): + """ This is for testing only. """ + args = _parse_arguments() + + dataAnalysisRecord = db.Record() + dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity) + dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter) + dataAnalysisRecord.add_property(name="Software", value=args.module) + + dataAnalysisRecord.insert() + run(dataAnalysisRecord) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py index 19efc9ed2b3e99e17eb28f5c87b0a6dbc0c47499..ba75739e0fdc0a83f235db6920471afb196f4246 100644 --- a/src/caosadvancedtools/serverside/helper.py +++ b/src/caosadvancedtools/serverside/helper.py @@ -390,11 +390,11 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None, else: caosdb_config = db.configuration.get_config() - if not "Misc" in caosdb_config or not "sendmail" in caosdb_config["Misc"]: + if "Misc" not in caosdb_config or "sendmail" not in caosdb_config["Misc"]: err_msg = ("No sendmail executable configured. " "Please configure `Misc.sendmail` " "in your pycaosdb.ini.") - raise db.ConfigurationException(err_msg) + raise db.ConfigurationError(err_msg) sendmail = caosdb_config["Misc"]["sendmail"] # construct sendmail command diff --git a/src/caosadvancedtools/serverside/model.yml b/src/caosadvancedtools/serverside/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f5a9634a97e39da4c5b3a6dfe1bf0c587863231 --- /dev/null +++ b/src/caosadvancedtools/serverside/model.yml @@ -0,0 +1,15 @@ +# Parent of all datasets which are used as input to or output from +# analysis scripts +Dataset: + +# Parent of all parametersets which are used as input for analysis scripts +ParameterSet: + +DataAnalysis: + recommended_properties: + InputDataset: + datatype: Dataset + OutputDataset: + datatype: Dataset + ParameterSet: + date: \ No newline at end of file diff --git a/src/caosadvancedtools/serverside/sync.py b/src/caosadvancedtools/serverside/sync.py new file mode 100755 index 0000000000000000000000000000000000000000..04283a15ba7919af6027b53217ffb69355ddfc6f --- /dev/null +++ b/src/caosadvancedtools/serverside/sync.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# Sync data model for generic data analysis method +# A. Schlemmer, 09/2021 + +from caosadvancedtools.models import parser +model = parser.parse_model_from_yaml("model.yml") +model.sync_data_model() diff --git a/src/caosadvancedtools/structure_mapping.py b/src/caosadvancedtools/structure_mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..50e57ac4d84f2034fbdb6da6c7159f450a993c3a --- /dev/null +++ b/src/caosadvancedtools/structure_mapping.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import caosdb as db +from caosdb.apiutils import resolve_reference +from caosdb.common.utils import uuid + +from .cfood import (assure_has_description, assure_has_parent, + assure_property_is) + + +class EntityMapping(object): + """ + map local entities to entities on the server + + the dict to_existing maps _cuid property to entity objects + the dict to_target maps id property to entity objects + """ + + def __init__(self): + self.to_existing = {} + self.to_target = {} + + def add(self, target, existing): + if target._cuid is None: + target._cuid = str(uuid()) + self.to_existing[str(target._cuid)] = existing + self.to_target[existing.id] = target + + +def collect_existing_structure(target_structure, existing_root, em): + """ recursively collects existing entities + + The collected entities are those that correspond to the ones in + target_structure. + + + em: EntityMapping + """ + + for prop in target_structure.properties: + if prop.value is None: + continue + + if not prop.is_reference(server_retrieval=True): + continue + + if (len([p for p in target_structure.properties if p.name == prop.name]) + != 1): + raise ValueError("Current implementation allows only one property " + "for each property name") + + if (existing_root.get_property(prop.name) is not None and + existing_root.get_property(prop.name).value is not None): + resolve_reference(prop) + + resolve_reference(existing_root.get_property(prop.name)) + referenced = existing_root.get_property(prop.name).value + + if not isinstance(referenced, list): + referenced = [referenced] + target_value = prop.value + + if not isinstance(target_value, list): + target_value = [target_value] + + if len(target_value) != len(referenced): + raise ValueError() + + for tent, eent in zip(target_value, referenced): + em.add(tent, eent) + collect_existing_structure(tent, eent, em) + + +def update_structure(em, updating: db.Container, target_structure: db.Record): + """compare the existing records with the target record tree created + from the h5 object + + Parameters + ---------- + + existing_structure + retrieved entity; e.g. the top level identifiable + + target_structure : db.Record + A record which may have references to other records. Must be a DAG. + """ + + if target_structure._cuid in em.to_existing: + update_matched_entity(em, + updating, + target_structure, + em.to_existing[target_structure._cuid]) + + for prop in target_structure.get_properties(): + if prop.is_reference(server_retrieval=True): + update_structure(em, updating, prop.value) + + +def update_matched_entity(em, updating, target_record, existing_record): + """ + update the Record existing in the server according to the Record + supplied as target_record + """ + + for parent in target_record.get_parents(): + if parent.name == "": + raise ValueError("Parent name must not be empty.") + assure_has_parent(existing_record, parent.name, force=True) + + if target_record.description is not None: + # check whether description is equal + assure_has_description(existing_record, target_record.description, + to_be_updated=updating) + + for prop in target_record.get_properties(): + # check for remaining property types + + if isinstance(prop.value, db.Entity): + if prop.value._cuid in em.to_existing: + value = em.to_existing[prop.value._cuid].id + else: + value = prop.value.id + else: + value = prop.value + assure_property_is(existing_record, prop.name, value, + to_be_updated=updating) diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py index c15f0e06fa7d126937497aeb877dd5d2991b6ff7..c4b57039c5184f2443e4dbb91cf11f5e59ae6790 100644 --- a/src/caosadvancedtools/suppressKnown.py +++ b/src/caosadvancedtools/suppressKnown.py @@ -5,6 +5,8 @@ import os import sqlite3 from hashlib import sha256 +import tempfile + class SuppressKnown(logging.Filter): """ @@ -26,8 +28,9 @@ class SuppressKnown(logging.Filter): if db_file: self.db_file = db_file else: - self.db_file = "/tmp/caosadvanced_suppressed_cache.db" - + tmppath = tempfile.gettempdir() + tmpf = os.path.join(tmppath, "caosadvanced_suppressed_cache.db") + self.db_file = tmpf if not os.path.exists(self.db_file): self.create_cache() diff --git a/src/caosadvancedtools/table_export.py b/src/caosadvancedtools/table_export.py index bed0edc97a794dd83b2bdd7b1c0449c710c18d3f..056207a76fa01357e2269cd4cb8e9a09905d5d90 100644 --- a/src/caosadvancedtools/table_export.py +++ b/src/caosadvancedtools/table_export.py @@ -308,7 +308,7 @@ class BaseTableExporter(object): " was specified but no record is given." ) else: - if not "selector" in d: + if "selector" not in d: d["selector"] = d[QUERY].strip().split(" ")[1] # guess find function and insert if existing else: diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 04c8ea23b19ee0cc055dc58b69f1b3d6fecd1b55..1f515e78e3ddbd198fa0336589a359ba9154f038 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -31,6 +31,7 @@ import logging import pathlib from datetime import datetime +import caosdb as db import numpy as np import pandas as pd from xlrd import XLRDError @@ -50,12 +51,27 @@ def assure_name_format(name): name = str(name) if len(name.split(",")) != 2: - raise ValueError("Name field should be 'LastName, FirstName'." + raise ValueError("The field value should be 'LastName, FirstName'. " "The supplied value was '{}'.".format(name)) return name +def check_reference_field(ent_id, recordtype): + if 1 != db.execute_query("COUNT {} WITH id={}".format( + recordtype, + ent_id), + unique=True): + raise ValueError( + "No {} with the supplied id={} exists. \n" + "Please supply a valid ID.".format( + recordtype, + ent_id + )) + + return ent_id + + def yes_no_converter(val): """ converts a string to True or False if possible. @@ -88,7 +104,10 @@ def date_converter(val, fmt="%Y-%m-%d"): converts it using format string """ - return datetime_converter(val, fmt=fmt).date() + if val is None: + return None + else: + return datetime_converter(val, fmt=fmt).date() def incomplete_date_converter(val, fmts={"%Y-%m-%d": "%Y-%m-%d", @@ -137,6 +156,9 @@ def win_path_converter(val): checks whether the value looks like a windows path and converts it to posix """ + if val == "": + return val + if not check_win_path(val): raise ValueError( "Field should be a Windows path, but is\n'{}'.".format(val)) @@ -145,80 +167,100 @@ def win_path_converter(val): return path.as_posix() -class TSVImporter(object): - def __init__(self, converters, obligatory_columns=[], unique_columns=[]): - raise NotImplementedError() +def string_in_list(val, options, ignore_case=True): + """Return the given value if it is contained in options, raise an + error otherwise. + Parameters + ---------- + val : str + String value to be checked. + options : list<str> + List of possible values that val may obtain + ignore_case : bool, optional + Specify whether the comparison of val and the possible options + should ignor capitalization. Default is True. + + Returns + ------- + val : str + The original value if it is contained in options -class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_keys=None): - """ - converters: dict with column names as keys and converter functions as - values - This dict also defines what columns are required to exist - throught the existing keys. The converter functions are - applied to the cell values. They should also check for - ValueErrors, such that a separate value check is not - necessary. - obligatory_columns: list of column names, optional - each listed column must not have missing values - unique_columns : list of column names that in - combination must be unique; i.e. each row has a - unique combination of values in those columns. - """ - self.sup = SuppressKnown() - self.required_columns = list(converters.keys()) - self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns - self.unique_keys = [] if unique_keys is None else unique_keys - self.converters = converters + Raises + ------ + ValueError + If val is not contained in options. + """ - def read_xls(self, filename, **kwargs): - """ - converts an xls file into a Pandas DataFrame. + if ignore_case: + val = val.lower() + options = [o.lower() for o in options] - The converters of the XLSImporter object are used. + if val not in options: + raise ValueError( + "Field value is '{}', but it should be one of the following " + "values: {}.".format(val, ", ".join( + ["'{}'".format(o) for o in options]))) - Raises: DataInconsistencyError + return val + + +class TableImporter(): + """Abstract base class for importing data from tables. + """ + + def __init__(self, converters, obligatory_columns=None, unique_keys=None, + datatypes=None): """ - try: - xls_file = pd.io.excel.ExcelFile(filename) - except XLRDError as e: - logger.warning( - "Cannot read \n{}.\nError:{}".format(filename, - str(e)), - extra={'identifier': str(filename), - 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + Parameters + ---------- + converters : dict + Dict with column names as keys and converter functions as values. This dict also defines + what columns are required to exist throught the existing keys. The converter functions are + applied to the cell values. They should also check for ValueErrors, such that a separate + value check is not necessary. + + obligatory_columns : list, optional + List of column names, each listed column must not have missing values. + + unique_keys : list, optional + List of column names that in combination must be unique: each row has a unique + combination of values in those columns. + + datatypes : dict, optional + Dict with column names as keys and datatypes as values. All non-null values will be + checked whether they have the provided datatype. This dict also defines what columns are + required to exist throught the existing keys. - if len(xls_file.sheet_names) > 1: - # Multiple sheets is the default now. Only show in debug - logger.debug( - "Excel file {} contains multiple sheets. " - "All but the first are being ignored.".format(filename)) + """ - try: - df = xls_file.parse(converters=self.converters, **kwargs) - except Exception as e: - logger.warning( - "Cannot parse {}.".format(filename), - extra={'identifier': str(filename), - 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + if converters is None: + converters = {} - self.check_columns(df, filename=filename) - df = self.check_missing(df, filename=filename) + if datatypes is None: + datatypes = {} - if len(self.unique_keys) > 0: - df = self.check_unique(df, filename=filename) + self.sup = SuppressKnown() + self.required_columns = list(converters.keys())+list(datatypes.keys()) + self.obligatory_columns = ([] + if obligatory_columns is None + else obligatory_columns) + self.unique_keys = [] if unique_keys is None else unique_keys + self.converters = converters + self.datatypes = datatypes - return df + def read_file(self, filename, **kwargs): + raise NotImplementedError() def check_columns(self, df, filename=None): - """ - checks whether all required columns, i.e. columns for which converters - were defined exist. + """Check whether all required columns exist. + + Required columns are columns for which converters are defined. + + Raises + ------ + DataInconsistencyError - Raises: DataInconsistencyError """ for col in self.required_columns: @@ -234,12 +276,11 @@ class XLSImporter(object): raise DataInconsistencyError(errmsg) def check_unique(self, df, filename=None): - """ - Check whether value combinations that shall be unique for each row are - unique. + """Check whether value combinations that shall be unique for each row are unique. If a second row is found, that uses the same combination of values as a previous one, the second one is removed. + """ df = df.copy() uniques = [] @@ -266,11 +307,57 @@ class XLSImporter(object): return df + def check_datatype(self, df, filename=None, strict=False): + """Check for each column whether non-null fields have the correct datatype. + + .. note:: + + If columns are integer, but should be float, this method converts the respective columns + in place. + + Parameters + ---------- + + strict: boolean, optional + If False (the default), try to convert columns, otherwise raise an error. + + """ + for key, datatype in self.datatypes.items(): + # Check for castable numeric types first: We unconditionally cast int to the default + # float, because CaosDB does not have different sizes anyway. + col_dtype = df.dtypes[key] + if not strict and not np.issubdtype(col_dtype, datatype): + issub = np.issubdtype + # These special cases should be fine. + if issub(col_dtype, np.integer) and issub(datatype, np.floating): + df[key] = df[key].astype(datatype) + + # Now check each element + for idx, val in df.loc[ + pd.notnull(df.loc[:, key]), key].iteritems(): + + if not isinstance(val, datatype): + msg = ( + "In row no. {rn} and column '{c}' of file '{fi}' the " + "datatype was {was} but it should be " + "{expected}".format(rn=idx, c=key, fi=filename, + was=str(type(val)).strip("<>"), + expected=str(datatype).strip("<>")) + ) + logger.warning(msg, extra={'identifier': filename, + 'category': "inconsistency"}) + raise DataInconsistencyError(msg) + def check_missing(self, df, filename=None): """ Check in each row whether obligatory fields are empty or null. Rows that have missing values are removed. + + Returns + ------- + out : pandas.DataFrame + The input DataFrame with incomplete rows removed. """ df = df.copy() @@ -306,3 +393,104 @@ class XLSImporter(object): okay = False return df + + def check_dataframe(self, df, filename=None, strict=False): + """Check if the dataframe conforms to the restrictions. + + Checked restrictions are: Columns, data types, uniqueness requirements. + + Parameters + ---------- + + df: pandas.DataFrame + The dataframe to be checked. + + filename: string, optional + The file name, only used for output in case of problems. + + strict: boolean, optional + If False (the default), try to convert columns, otherwise raise an error. + """ + self.check_columns(df, filename=filename) + df = self.check_missing(df, filename=filename) + self.check_datatype(df, filename=filename, strict=strict) + + if len(self.unique_keys) > 0: + df = self.check_unique(df, filename=filename) + + return df + + +class XLSImporter(TableImporter): + def read_file(self, filename, **kwargs): + return self.read_xls(filename=filename, **kwargs) + + def read_xls(self, filename, **kwargs): + """Convert an xls file into a Pandas DataFrame. + + The converters of the XLSImporter object are used. + + Raises: DataInconsistencyError + """ + try: + xls_file = pd.io.excel.ExcelFile(filename) + except (XLRDError, ValueError) as e: + logger.warning( + "Cannot read \n{}.\nError:{}".format(filename, + str(e)), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + + if len(xls_file.sheet_names) > 1: + # Multiple sheets is the default now. Only show in debug + logger.debug( + "Excel file {} contains multiple sheets. " + "All but the first are being ignored.".format(filename)) + + try: + df = xls_file.parse(converters=self.converters, **kwargs) + except Exception as e: + logger.warning( + "Cannot parse {}.\n{}".format(filename, e), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + + df = self.check_dataframe(df, filename) + + return df + + +class CSVImporter(TableImporter): + def read_file(self, filename, sep=",", **kwargs): + try: + df = pd.read_csv(filename, sep=sep, converters=self.converters, + **kwargs) + except ValueError as ve: + logger.warning( + "Cannot parse {}.\n{}".format(filename, ve), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*ve.args) + + df = self.check_dataframe(df, filename) + + return df + + +class TSVImporter(TableImporter): + def read_file(self, filename, **kwargs): + try: + df = pd.read_csv(filename, sep="\t", converters=self.converters, + **kwargs) + except ValueError as ve: + logger.warning( + "Cannot parse {}.\n{}".format(filename, ve), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*ve.args) + + df = self.check_dataframe(df, filename) + + return df diff --git a/src/doc/Makefile b/src/doc/Makefile index d28503eb0e883e6c879898c12dac07f91bd2df68..7a1bec105f4b0fe1d70cabd7e3cf5f1ceff93bee 100644 --- a/src/doc/Makefile +++ b/src/doc/Makefile @@ -45,4 +45,4 @@ doc-help: @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) apidoc: - @$(SPHINXAPIDOC) -o _apidoc $(PY_BASEDIR) + @$(SPHINXAPIDOC) --force -o _apidoc $(PY_BASEDIR) diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md new file mode 120000 index 0000000000000000000000000000000000000000..88332e357f5e06f3de522768ccdcd9e513c15f62 --- /dev/null +++ b/src/doc/README_SETUP.md @@ -0,0 +1 @@ +../../README_SETUP.md \ No newline at end of file diff --git a/src/doc/conf.py b/src/doc/conf.py index 0a4ef217b1dea1c7504c69b85150d563d5037bf8..c7f82a99d3b287ca72ca57430b2d4b868539d39e 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -17,18 +17,19 @@ # sys.path.insert(0, os.path.abspath('../caosdb')) -# -- Project information ----------------------------------------------------- - import sphinx_rtd_theme + +# -- Project information ----------------------------------------------------- + project = 'caosadvancedtools' -copyright = '2020, IndiScale GmbH' +copyright = '2021, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.2.0' +version = '0.4.1' # The full version, including alpha/beta/rc tags -release = '0.2.0' +release = '0.4.1' # -- General configuration --------------------------------------------------- @@ -92,6 +93,9 @@ html_theme = "sphinx_rtd_theme" # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +# Disable static path to remove warning. +html_static_path = [] + # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -185,10 +189,11 @@ epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/': None, + "python": ("https://docs.python.org/", None), "caosdb-pylib": ("https://caosdb.gitlab.io/caosdb-pylib/", None), } + # TODO Which options do we want? autodoc_default_options = { 'members': None, diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 2380cdbdbe44989855adb42afd391467502b5809..4b99c97e6db16e5691f373fa5fb4903e4d078155 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -3,7 +3,7 @@ CaosDB Crawler ============== The `CaosDB -crawler <https://gitlab.com/caosdb/caosdb-advanced-user-tools/blob/master/src/caosadvancedtools/crawler.py>`__ +crawler <https://gitlab.com/caosdb/caosdb-advanced-user-tools/blob/main/src/caosadvancedtools/crawler.py>`__ is a tool for the automated insertion or update of entities in CaosDB. Typically, a file structure is crawled, but other things can be crawled as well. For example tables or HDF5 files. @@ -36,9 +36,8 @@ different components of the CaosDB Crawler can be found in the `developers’ information <#extending-the-crawlers>`__ below. In case you are happy with our suggestion of a standard crawler, feel -free to use the standard crawler. The standard crawler lives in this git -repository maintained by Henrik tom Wörden: -https://gitlab.com/henrik_indiscale/scifolder +free to use the standard crawler. The standard crawler lives in the submodule +`caosadvancedtools.scifolder` Usage ===== @@ -72,11 +71,11 @@ indicated in the messages). Invocation as Python Script --------------------------- -The crawler can be executed directly via a python script (usually called +The crawler can be executed directly via a Python script (usually called ``crawl.py``). The script prints the progress and reports potential problems. The exact behavior depends on your setup. However, you can have a look at the example in the -`tests <https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/blob/master/integrationtests/full_test/crawl.py>`__. +`tests <https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/main/integrationtests/crawl.py>`__. .. Note:: The crawler depends on the CaosDB Python client, so make sure to install :doc:`pycaosdb <caosdb-pylib:getting_started>`. @@ -85,16 +84,20 @@ have a look at the example in the Call ``python3 crawl.py --help`` to see what parameters can be provided. Typically, an invocation looks like: -.. code:: python +.. code:: sh + + python3 crawl.py /someplace/ - python3 crawl.py "/TestData/" +.. Note:: For trying out the above mentioned example crawler from the integration tests, + make sure that the ``extroot`` directory in the ``integrationtests`` folder is used as + CaosDB's extroot directory, and call the crawler indirectly via ``./test.sh``. -In this case ``/TestData/`` identifies the path to be crawled **within -the CaosDB file system**. You can browse the CaosDB file system by +In this case ``/someplace/`` identifies the path to be crawled **within +CaosDB's file system**. You can browse the CaosDB file system by opening the WebUI of your CaosDB instance and clicking on “File System”. In the backend, ``crawl.py`` starts a CQL query -``FIND File WHICH IS STORED AT /TestData/**`` and crawls the resulting +``FIND File WHICH IS STORED AT /someplace/**`` and crawls the resulting files according to your customized ``CFoods``. Crawling may consist of two distinct steps: 1. Insertion of files (use @@ -128,6 +131,10 @@ The behavior and rules of the crawler are defined in logical units called CFoods. In order to extend the crawler you need to extend an existing CFood or create new one. +.. Note:: A crawler always needs a corresponding data model to exits in the + server. The following does not cover this aspect. Please refer + for example to documentation of the YAML Interface. + .. _c-food-introduction: CFood -- Introduction @@ -389,7 +396,7 @@ shows how a set of CFoods can be defined to deal with a complex file structure. You can find detailed information on files need to be structured `here <https://gitlab.com/salexan/check-sfs/-/blob/f-software/filesystem_structure.md>`__ and the source -code of the CFoods `here <https://gitlab.com/henrik_indiscale/scifolder>`__. +code of the CFoods `here <https://gitlab.com/caosdb/caosdb-advanced-user-tools>`__. Sources ======= diff --git a/src/doc/index.rst b/src/doc/index.rst index ee266598cd6cfbcfaa6f54b8e39aa32e4c2b6915..9aa045349ab05d3f5130a7f33b38c7eca0c4f32e 100644 --- a/src/doc/index.rst +++ b/src/doc/index.rst @@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th Concepts <concepts> tutorials Caosdb-Crawler <crawler> + YAML Interface <yaml_interface> _apidoc/modules diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..476e92829238a0fc9dac851c61790c022e9fcde9 --- /dev/null +++ b/src/doc/yaml_interface.rst @@ -0,0 +1,126 @@ +YAML-Interface +-------------- + +The yaml interface is a module in caosdb-pylib that can be used to create and update +CaosDB models using a simplified definition in YAML format. + +Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml. + +.. code-block:: yaml + + Project: + obligatory_properties: + projectId: + datatype: INTEGER + description: 'UID of this project' + Person: + recommended_properties: + firstName: + datatype: TEXT + description: 'first name' + lastName: + datatype: TEXT + description: 'last name' + LabbookEntry: + recommended_properties: + Project: + entryId: + datatype: INTEGER + description: 'UID of this entry' + responsible: + datatype: Person + description: 'the person responsible for these notes' + textElement: + datatype: TEXT + description: 'a text element of a labbook recording' + associatedFile: + datatype: FILE + description: 'A file associated with this recording' + table: + datatype: FILE + description: 'A table document associated with this recording' + extern: + - Textfile + + + +This example defines 3 ``RecordType``s: + +- A ``Project`` with one obligatory property ``datatype`` +- A Person with a ``firstName`` and a ``lastName`` (as recommended properties) +- A ``LabbookEntry`` with multiple recommended properties of different data types +- It is assumed that the server knows a RecordType or Property with the name + ``Textfile``. + + +One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB: + +- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB. +- The new property is added as a recommended property to record type ``Person``. + +Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``. + +Note the difference between the three property declarations of ``LabbookEntry``: + +- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``. +- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above. +- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``. + +If the data model depends on record types or properties which already exist in CaosDB, those can be +added using the ``extern`` keyword: ``extern`` takes a list of previously defined names. + +Datatypes +--------- + +You can use any data type understood by CaosDB as datatype attribute in the yaml model. + +List attributes are a bit special: + +.. code-block:: yaml + + datatype: LIST<DOUBLE> + +would declare a list datatype of DOUBLE elements. + +.. code-block:: yaml + + datatype: LIST<Project> + +would declare a list of elements with datatype Project. + + +Keywords +-------- + +- **parent**: Parent of this entity. +- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested" +- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project. +- **unit**: The unit of the property, e.g. "m/s". +- **description**: A description for this entity. +- **recommended_properties**: Add properties to this entity with importance "recommended". +- **obligatory_properties**: Add properties to this entity with importance "obligatory". +- **suggested_properties**: Add properties to this entity with importance "suggested". +- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent. +- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent. +- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent. + +Usage +----- + +You can use the yaml parser directly in python as follows: + + +.. code-block:: python + + from caosadvancedtools.models import parser as parser + model = parser.parse_model_from_yaml("model.yml") + + +This creates a DataModel object containing all entities defined in the yaml file. + +You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize +the model with a CaosDB instance, e.g.: + +.. code-block:: python + + model.sync_data_model() diff --git a/tox.ini b/tox.ini index 3386d09aeeedcb33bcdb1191231043b0f7575140..dde34b987b9b08bfdfc51a06dd46a9a0e0494f28 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,16 @@ [tox] -envlist=py36, py37, py38, py39 +envlist=py36, py37, py38, py39, py310 skip_missing_interpreters = true [testenv] deps=nose pandas -#TODO remove this once versioning is released in pylib - git+https://gitlab.com/caosdb/caosdb-pylib.git@dev + git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev pytest pytest-cov openpyxl xlrd == 1.2 + h5py commands=py.test --cov=caosadvancedtools -vv {posargs} + +[flake8] +max-line-length=100 diff --git a/unittests/create_dummy_hdf5file.py b/unittests/create_dummy_hdf5file.py new file mode 100644 index 0000000000000000000000000000000000000000..ce04030154c70e1d533f67aeec12321b86ddf305 --- /dev/null +++ b/unittests/create_dummy_hdf5file.py @@ -0,0 +1,70 @@ +import h5py +import numpy as np + + +def create_hdf5_file(filename="hdf5_dummy_file.hdf5"): + ''' + Create a dummy hdf5-file for testing. + Structure: + + root:-->root + group_level1_a:-->group + group_level2_aa:-->group + group_level3_aaa:-->group + level3_aaa_floats_2d = float64(100x100) + group_level3_aab:-->group + group_level2_ab:-->group + group_level3_aba:-->group + level3_aba_floats_2d = float64(100x100) + group_level2_ac:-->group + level2_ac_integers_2d = int32(100x100) + group_level1_b:-->group + group_level2_ba:-->group + level2_ba_integers_2d = int32(100x100) + level1_b_floats = float64(10000) + group_level1_c:-->group + level1_c_floats = float64(10000) + root_integers = int32(10000) + ''' + + with h5py.File(filename, mode="w") as hdf5: + '''Create toplevel groups''' + group_lvl1_a = hdf5.create_group("group_level1_a") + group_lvl1_b = hdf5.create_group("group_level1_b") + group_lvl1_c = hdf5.create_group("group_level1_c") + + '''Create level 2 groups''' + group_lvl2_aa = group_lvl1_a.create_group("group_level2_aa") + group_lvl2_ab = group_lvl1_a.create_group("group_level2_ab") + group_lvl2_ac = group_lvl1_a.create_group("group_level2_ac") + group_lvl2_ba = group_lvl1_b.create_group("group_level2_ba") + + '''Create level 3 groups''' + group_lvl3_aaa = group_lvl2_aa.create_group("group_level3_aaa") + group_lvl3_aab = group_lvl2_aa.create_group("group_level3_aab") + group_lvl3_aba = group_lvl2_ab.create_group("group_level3_aba") + + '''Create datasets''' + integers = np.arange(10000) + floats = np.arange(0, 1000, 0.1) + integers_2d = np.diag(np.arange(100)) + floats_2d = np.eye(100) + data_root = hdf5.create_dataset("root_integers", data=integers) + data_lvl1_b = group_lvl1_b.create_dataset("level1_b_floats", data=floats) + data_lvl2_c = group_lvl1_c.create_dataset("level1_c_floats", data=floats) + data_lvl2_ac = group_lvl2_ac.create_dataset("level2_ac_integers_2d", data=integers_2d) + data_lvl2_ba = group_lvl2_ba.create_dataset("level2_ba_integers_2d", data=integers_2d) + data_lvl3_aaa = group_lvl3_aaa.create_dataset("level3_aaa_floats_2d", data=floats_2d) + data_lvl3_aba = group_lvl3_aba.create_dataset("level3_aba_floats_2d", data=floats_2d) + + '''Create attributes''' + attr_group_lvl1_a = group_lvl1_a.attrs.create("attr_group_lvl1_a", 1) + attr_group_lvl2_aa = group_lvl2_aa.attrs.create("attr_group_lvl2_aa", -2) + attr_group_lvl3_aaa = group_lvl3_aaa.attrs.create("attr_group_lvl3_aaa", 1.0) + attr_data_root = data_root.attrs.create("attr_data_root", -2.0) + attr_data_lvl2_ac = data_lvl2_ac.attrs.create("attr_data_lvl2_ac", np.diag(np.arange(10))) + attr_data_lvl3_aaa = data_lvl3_aaa.attrs.create("attr_data_lvl3_aaa", np.eye(10)) + + +if __name__ == "__main__": + create_hdf5_file() diff --git a/unittests/create_filetree.py b/unittests/create_filetree.py index 6f95618dbc834c3bc140163efdc90aa51c8d5248..f80b9681163859027bb8f8c7cd6b1387bf2d378d 100644 --- a/unittests/create_filetree.py +++ b/unittests/create_filetree.py @@ -42,8 +42,6 @@ def main(folder, dry=True): if not dry: os.mkdir(series_path) for date in [datetime.today()-timedelta(days=i)-timedelta(weeks=50*ii) for i in range(10)]: - #import IPython - # IPython.embed() exp_path = os.path.join(series_path, "Exp_"+str(date.date())) print("Exp: "+os.path.basename(exp_path)) if not dry: diff --git a/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 0000000000000000000000000000000000000000..71454e8909393b432ca74fa01e77b33d8b0644d5 --- /dev/null +++ b/unittests/data/DataAnalysis/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,15 @@ +--- +responsible: +- Only Responsible +description: A description of another example analysis. + +sources: +- file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" + description: an example reference to a results file + +scripts: +- file: plot.py + description: a plotting script +results: +- file: results.pdf +... diff --git a/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7e5051c7bdbcdafb1bbd3a870b00feecfb109ff --- /dev/null +++ b/unittests/data/ExperimentalData/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,9 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +results: +- file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" + description: an example reference to a results file +... diff --git a/unittests/data/Publications/Posters/2019-02-03_something/README.md b/unittests/data/Publications/Posters/2019-02-03_something/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c95e37ecc569103d8c3a812e45f1a5110781ea26 --- /dev/null +++ b/unittests/data/Publications/Posters/2019-02-03_something/README.md @@ -0,0 +1,11 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +sources: +- /DataAnalysis/2010_TestProject/2019-02-03/results.pdf + +results: +- "*.pdf" +... diff --git a/unittests/data/README.md b/unittests/data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a2e0ce6e319219c10bc61653510ad53dd2ab958a --- /dev/null +++ b/unittests/data/README.md @@ -0,0 +1,14 @@ +--- +responsible: Ana Lytic +description: An examplary analysis of very exciting research. The analysis was conducted following state of the art best practices of scientific methodology. +sources: + - /ExperimentalData/2010_TestProject/2019-02-03_something/ + - file: /ExperimentalData/2010_TestProject/2019-02-03_something/ + description: An example reference to an experiment. The experimental data was analysed with statistical methods using proper error calculations. +scripts: + - file: scripts + description: all the files needed to run the analysis +results: + - file: results.pdf + description: a plot of the statistical analysis +... diff --git a/unittests/data/README.xlsx b/unittests/data/README.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a909347789edc1d5a1bbaacd998744cee83d5f6b Binary files /dev/null and b/unittests/data/README.xlsx differ diff --git a/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fba1bd48a89514cbff92f9d8bd518484ecaa624b --- /dev/null +++ b/unittests/data/SimulationData/2010_TestProject/2019-02-03_something/README.md @@ -0,0 +1,12 @@ +--- +responsible: +- Only Responsible +description: A description of another example experiment. + +results: +- file: "*.dat" + description: an example reference to a results file + +scripts: +- sim.py +... diff --git a/unittests/data/datatypes.xlsx b/unittests/data/datatypes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..34fc4cf43092a68b630e0e04ebc43609b8a0b17b Binary files /dev/null and b/unittests/data/datatypes.xlsx differ diff --git a/unittests/date.xls b/unittests/date.xls new file mode 100644 index 0000000000000000000000000000000000000000..966ad4dc1d04055d75b455c8d0f9a5ac6f36200d Binary files /dev/null and b/unittests/date.xls differ diff --git a/unittests/hdf5_dummy_file.hdf5 b/unittests/hdf5_dummy_file.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf Binary files /dev/null and b/unittests/hdf5_dummy_file.hdf5 differ diff --git a/unittests/json-schema-models/datamodel_atomic_properties.schema.json b/unittests/json-schema-models/datamodel_atomic_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..3828f131180a839d5c9b8bc5aa1a1285717da723 --- /dev/null +++ b/unittests/json-schema-models/datamodel_atomic_properties.schema.json @@ -0,0 +1,24 @@ +[ + { + "title": "Dataset1", + "description": "Some description", + "type": "object", + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "number_prop": { "type": "number", "description": "Some float property" } + }, + "required": [ "title", "number_prop" ] + }, + { + "title": "Dataset2", + "type": "object", + "properties": { + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } + } +] diff --git a/unittests/json-schema-models/datamodel_enum_prop.schema.json b/unittests/json-schema-models/datamodel_enum_prop.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..a14008d141606368519c0caadc30b16a1dc9d16d --- /dev/null +++ b/unittests/json-schema-models/datamodel_enum_prop.schema.json @@ -0,0 +1,16 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "license": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + }, + "number_enum": { + "type": "number", + "enum": [1.1, 2.2, 3.3] + } + }, + "required": ["license"] +} diff --git a/unittests/json-schema-models/datamodel_int_enum_broken.schema.json b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..159b84ac36c26325b59cdd25d2830152c4acdaaa --- /dev/null +++ b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json @@ -0,0 +1,11 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "int_enum": { + "type": "integer", + "enum": [1, 2, 3] + } + } +} diff --git a/unittests/json-schema-models/datamodel_list_properties.schema.json b/unittests/json-schema-models/datamodel_list_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..b95f468a1c13f1912266e65f029654077ce6a14e --- /dev/null +++ b/unittests/json-schema-models/datamodel_list_properties.schema.json @@ -0,0 +1,46 @@ +{ + "title": "Dataset", + "description": "Dataset with list (array) properties", + "type": "object", + "properties": { + "keywords": { + "type": "array", + "items": { "type": "string" } + }, + "booleans": { + "type": "array", + "items": { "type": "boolean" } + }, + "integers": { + "type": "array", + "items": { "type": "integer" } + }, + "floats": { + "type": "array", + "items": { "type": "number" } + }, + "datetimes": { + "type": "array", + "items": { "type": "string", "format": "date-time" } + }, + "dates": { + "type": "array", + "items": { "type": "string", "format": "date" } + }, + "reference": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "reference_with_name": { + "type": "array", + "items": { "type": "object", "title": "event", "properties": {} } + }, + "license": { + "type": "array", + "items": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + } + } + } +} diff --git a/unittests/json-schema-models/datamodel_missing_property_type.schema.json b/unittests/json-schema-models/datamodel_missing_property_type.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..eac3cc563df587568c4e9610d72618610566beef --- /dev/null +++ b/unittests/json-schema-models/datamodel_missing_property_type.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "method": { "description": "Missing property type" } + } +} diff --git a/unittests/json-schema-models/datamodel_name.schema.json b/unittests/json-schema-models/datamodel_name.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e86028c36172d27a4523f2c08db1b413b5c19f --- /dev/null +++ b/unittests/json-schema-models/datamodel_name.schema.json @@ -0,0 +1,12 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "name": { "type": "string", "description": "Name of this dataset" }, + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } +} diff --git a/unittests/json-schema-models/datamodel_name_wrong_type.schema.json b/unittests/json-schema-models/datamodel_name_wrong_type.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..1988ad3d8cd613def36df69f5ad30fedd0a26e48 --- /dev/null +++ b/unittests/json-schema-models/datamodel_name_wrong_type.schema.json @@ -0,0 +1,12 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "name": { "type": "boolean", "description": "Name of this dataset" }, + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } +} diff --git a/unittests/json-schema-models/datamodel_references.schema.json b/unittests/json-schema-models/datamodel_references.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..6b79a9bcdbbd8beaf9974a600e9c5ff30cb513f4 --- /dev/null +++ b/unittests/json-schema-models/datamodel_references.schema.json @@ -0,0 +1,24 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + "properties": { + "event": { + "type": "object", + "properties": { + "longitude": { + "type": "number" + }, + "latitude": { + "type": "number" + }, + "location": { + "type": "string", + "description": "geographical location (e.g., North Sea; Espoo, Finland)" + } + }, + "required": ["longitude", "latitude"] + } + }, + "required": ["event"] +} diff --git a/unittests/json-schema-models/datamodel_required_no_list.schema.json b/unittests/json-schema-models/datamodel_required_no_list.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..f3697a71320bc8baf05156bec2c71f3915378654 --- /dev/null +++ b/unittests/json-schema-models/datamodel_required_no_list.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "required": "Dataset" +} diff --git a/unittests/json-schema-models/datamodel_string_properties.schema.json b/unittests/json-schema-models/datamodel_string_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..62bc0a2a4250050e5433038bf61e7c9692bb0200 --- /dev/null +++ b/unittests/json-schema-models/datamodel_string_properties.schema.json @@ -0,0 +1,14 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "method": { "type": "string", "description": "FIXME" }, + "titled": { "title": "The title", "type": "string", "description": "None" } + }, + + "required": ["title"] +} diff --git a/unittests/test.csv b/unittests/test.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/unittests/test.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/unittests/test_cache.py b/unittests/test_cache.py index d3eb93a448d6efed07bf967873f74f882f99db48..f0f6a8f4f5920f2ac5a07fa334673158d30e9e5b 100644 --- a/unittests/test_cache.py +++ b/unittests/test_cache.py @@ -27,8 +27,8 @@ from tempfile import NamedTemporaryFile import sqlite3 import caosdb as db - -from caosadvancedtools.cache import Cache +from caosadvancedtools.cache import Cache, cleanXML +from lxml import etree import pytest @@ -94,6 +94,22 @@ class CacheTest(unittest.TestCase): self.assertEqual(self.cache.check_existing(hashes[0])[0], 1001) self.assertEqual(self.cache.check_existing(hashes[2])[0], 1003) + def test_clean(self): + xml = etree.XML( + """\ + <Entities> + <TransactionBenchmark> + </TransactionBenchmark> + <RecordType id="110" name="Guitar"> + <Version id="eb8c7527980e598b887e84d055db18cfc3806ce6" head="true"/> + <Parent id="108" name="MusicalInstrument" flag="inheritance:OBLIGATORY,"/> + <Property id="106" name="electric" datatype="BOOLEAN" importance="RECOMMENDED" flag="inheritance:FIX"/> + </RecordType> + </Entities> +""") + cleanXML(xml) + assert len(xml.findall('TransactionBenchmark')) == 0 + def create_sqlite_file(commands): """ diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index 1bad508a2c22cf1ee1e29be11c3342d2115dd5a2..7055bc7c51962c0cbc487f29bcdacb391218a7d3 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -48,13 +48,14 @@ class ExampleCFoodMeal(AbstractFileCFood, CMeal): CMeal.__init__(self) @classmethod - def match_item(cls, item): + def match_item(cls, path): """ standard match_match, but returns False if a suitable cfood exists """ - if cls.has_suitable_cfood(item): + print(path) + if cls.has_suitable_cfood(path): return False - return re.match(cls.get_re(), item) is not None + return re.match(cls.get_re(), path) is not None def looking_for(self, crawled_file): """ standard looking_for, but returns True if the file matches all @@ -112,6 +113,36 @@ class CFoodReTest(unittest.TestCase): self.assertTrue(SimpleCFood.match_item("hallo")) self.assertFalse(SimpleCFood.match_item("allo")) + def test_extensions(self): + """Test the RE generation.""" + empty_extensions = [] + extensions = ["foo", "bar"] + + self.assertIsNone(AbstractFileCFood.re_from_extensions(empty_extensions)) + self.assertIsNotNone(SimpleCFood.re_from_extensions(extensions)) + + class ExtCFood(AbstractFileCFood): + + @staticmethod + def get_re(): + return AbstractFileCFood.re_from_extensions(extensions) + create_identifiables = None + update_identifiables = None + + # test which paths are matched + print(ExtCFood.re_from_extensions(extensions)) + self.assertTrue(ExtCFood.match_item("hello/world.foo")) + self.assertTrue(ExtCFood.match_item("hello/world.bar")) + self.assertFalse(ExtCFood.match_item("hello/world.baz")) + self.assertFalse(ExtCFood.match_item("hello/world.foo ")) # Mind the space. + self.assertFalse(ExtCFood.match_item("hello/world.foobar")) + self.assertFalse(ExtCFood.match_item("hello/world.foo|bar")) + self.assertFalse(ExtCFood.match_item("hello/world.fobar")) + self.assertFalse(ExtCFood.match_item("hello/world.fooar")) + + # Test stored extension + self.assertEqual(ExtCFood("hello/world.foo").match["ext"], "foo") + class InsertionTest(unittest.TestCase): def test_contained_in_list(self): @@ -160,6 +191,35 @@ class InsertionTest(unittest.TestCase): value=new_int, to_be_updated=to_be_updated) assert to_be_updated[0] is entity + """Test properties with lists""" + rec1 = db.Record(id=12345) + rec1.add_property("Exp", value=[98765], datatype=db.LIST("Exp")) + rec2 = db.Record(id=98765) + update = [] + # compare Entity with id + assure_has_property(rec1, "Exp", [rec2], to_be_updated=update) + assert len(update) == 0 + update = [] + # compare id with id + assure_has_property(rec1, "Exp", [98765], to_be_updated=update) + assert len(update) == 0 + update = [] + # compare id with different list of ids + assure_has_property(rec1, "Exp2", [98765, 444, 555], + to_be_updated=update) + assert len(update) == 1 + + rec = db.Record(id=666666) + rec3 = db.Record(id=777777) + rec.add_property("Exp", value=[888888, rec3], datatype=db.LIST("Exp")) + rec2 = db.Record(id=888888) + update = [] + # compare id and Entity with id and Entity + # i.e. check that conversion from Entity to id works in both + # directions. + assure_has_property(rec, "Exp", [rec2, 777777], to_be_updated=update) + assert len(update) == 0 + def test_property_is(self): """Test properties with string, int, float, and Boolean values""" entity = db.Record() diff --git a/unittests/test_cfoods.py b/unittests/test_cfoods.py new file mode 100644 index 0000000000000000000000000000000000000000..87e6d6d2da0254e134def92c098b1568c26863ab --- /dev/null +++ b/unittests/test_cfoods.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +import os +import unittest + +from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, + PublicationCFood, SimulationCFood) + +data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + "data") + + +class CFoodTest(unittest.TestCase): + def test_analysis(self): + self.assertFalse(AnalysisCFood.match_item("nopath")) + path = (data_path+"/DataAnalysis/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(AnalysisCFood.match_item(path)) + AnalysisCFood(path) + + def test_experiment(self): + self.assertFalse(ExperimentCFood.match_item("nopath")) + path = (data_path+"/ExperimentalData/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(ExperimentCFood.match_item(path)) + ExperimentCFood(path) + + def test_publication(self): + self.assertFalse(PublicationCFood.match_item("nopath")) + path = data_path+"/Publications/Posters/2019-02-03_something/README.md" + self.assertTrue(PublicationCFood.match_item(path)) + PublicationCFood(path) + + def test_simulation(self): + self.assertFalse(SimulationCFood.match_item("nopath")) + path = (data_path + "/SimulationData/2010_TestProject/" + "2019-02-03_something/README.md") + self.assertTrue(SimulationCFood.match_item(path)) + SimulationCFood(path) diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index f603031eddbcf1e10c2842ec4e89ca591700b94f..64bf291c1181d901ac39a4d2535dcd6eddf39f70 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -45,3 +45,7 @@ class CrawlerTest(unittest.TestCase): datatype=db.LIST("RT2")) qs = Crawler.create_query_for_identifiable(ident) assert qs == "FIND Record RT WITH references 2345 AND references 234567 " + ident = db.Record() + ident.add_parent(name="RT") + self.assertRaises(ValueError, Crawler.create_query_for_identifiable, + ident) diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py new file mode 100644 index 0000000000000000000000000000000000000000..159adfca1d589bb092b6f59110828b5868401e25 --- /dev/null +++ b/unittests/test_data_model.py @@ -0,0 +1,35 @@ +import unittest + +import caosdb as db +from caosadvancedtools.models.data_model import DataModel + + +class DataModelTest(unittest.TestCase): + + def test_collecting(self): + maintained = {"one": db.RecordType(name="TestRecord").add_property( + name="testproperty"), + "two": db.Property(name="testproperty", datatype=db.INTEGER)} + dm = DataModel(maintained.values()) + col = dm.collect_entities() + names = [e.name for e in col] + assert "TestRecord" in names + assert "testproperty" in names + + def test_sync_ids_by_name(self): + container = db.Container().extend([db.RecordType(name="TestRecord"), + db.RecordType(name="TestRecord2"), + ]) + + # assign negative ids + container.to_xml() + l1 = DataModel(container) + + rt = db.RecordType(name="TestRecord") + rt.id = 1002 + rt2 = db.RecordType(name="TestRecordNonono") + rt2.id = 1000 + l2 = [rt2, rt] + DataModel.sync_ids_by_name(l1, l2) + assert l1["TestRecord"].id == rt.id + assert l1["TestRecord2"].id < 0 diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a1077b97ec58f80c8534c89d5fa5f57d8d815cb9 --- /dev/null +++ b/unittests/test_generic_analysis.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import \ + check_referenced_script + +from test_utils import BaseMockUpTest + + +class TestGAnalysisNoFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><Record name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # missing scripts + self.assertIsNone(check_referenced_script(db.Record())) + # wrong datatype + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.TEXT))) + # wrong value + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="hallo"))) + # no file + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234"))) + + +class TestGAnalysisFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # all correct + self.assertEqual(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234")), "script.py") diff --git a/unittests/test_h5.py b/unittests/test_h5.py new file mode 100644 index 0000000000000000000000000000000000000000..360d4b28938492d0f2af6d696e39dffb1cc3fead --- /dev/null +++ b/unittests/test_h5.py @@ -0,0 +1,190 @@ +import unittest +from tempfile import NamedTemporaryFile + +import caosdb as db +import caosdb.apiutils +import h5py +import numpy as np +from caosadvancedtools.cfoods import h5 +from caosadvancedtools.cfoods.h5 import h5_attr_to_property + +from create_dummy_hdf5file import create_hdf5_file + +ENTS = { + 101: db.Record(id=101), + 102: db.Record(id=102), + 103: db.Record(id=103).add_property("test", value=101, + datatype=db.REFERENCE), +} + + +def dummy_get(eid): + return ENTS[eid] + + +class H5CFoodTest(unittest.TestCase): + def setUp(self): + self.h5file = NamedTemporaryFile(delete=False, suffix=".h5") + self.h5file.close() + create_hdf5_file(self.h5file.name) + self.h5obj = h5py.File(self.h5file.name, mode="a") + + def test_create_record_records(self): + result = h5.H5CFood.create_structure(self.h5obj) + + record_list = [] + parents = ['group_level1_a', 'group_level1_b', 'group_level1_c', 'root_integers'] + + for i in parents: + record_list.append(db.Record().add_parent(name=i)) + + found_parents = [] + + for ent in [p.value for p in result.properties]: + if ent.parents[0].name == 'group_level1_a': + found_parents.append('group_level1_a') + self.assertTrue(ent.get_property("group_level2_aa") is not None) + self.assertTrue(ent.get_property("group_level1_a") is None) + elif ent.parents[0].name == 'group_level1_b': + found_parents.append('group_level1_b') + pass + elif ent.parents[0].name == 'group_level1_c': + found_parents.append('group_level1_c') + pass + elif ent.parents[0].name == 'root_integers': + found_parents.append('root_integers') + pass + + for p in parents: + self.assertTrue(p in found_parents) + + for i in range(len(result.properties)): + for j in result.properties[i].value.get_parents(): + for k in record_list[i].get_parents(): + self.assertEqual(j.name, k.name) + + result1 = h5.H5CFood.create_structure(self.h5obj["group_level1_a"]) + + for i in result1.get_parents(): + self.assertEqual(i.name, "group_level1_a") + + result2 = h5.H5CFood.create_structure(self.h5obj["group_level1_a/group_level2_aa"]) + + for i in result2.get_parents(): + self.assertEqual(i.name, "group_level2_aa") + + def test_collect_existing_structure(self): + # TODO this does probably break the code: The function will not be + # restored correctly. + # Change it to use the BaseMockUpTest + real_retrieve = caosdb.apiutils.retrieve_entity_with_id + caosdb.apiutils.retrieve_entity_with_id = dummy_get + + # should run without problem + h5.collect_existing_structure(db.Record(), db.Record(id=234), h5.EntityMapping()) + + # test with retrieval: both Records have one test Property with one + # value -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=101, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertTrue(em.to_existing[r_child._cuid] is ENTS[101]) + self.assertTrue(em.to_target[101] is r_child) + + # test with retrieval: the existing Record has another Property + # -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test_other", value=101, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing, {}) + self.assertEqual(em.to_target, {}) + + # test with retrieval: both Records have one test Property; the + # existing is missing the value -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=None, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing, {}) + self.assertEqual(em.to_target, {}) + + # test with retrieval: both Records have one test Property with + # multiple values -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=[101, 102], datatype=db.LIST(db.REFERENCE)) + r_target = db.Record() + r_child = db.Record() + r_child2 = db.Record() + r_target.add_property("test", value=[r_child, r_child2], + datatype=db.LIST(db.REFERENCE)) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing[r_child._cuid], ENTS[101]) + self.assertEqual(em.to_existing[r_child2._cuid], ENTS[102]) + self.assertEqual(em.to_target[101], r_child) + self.assertEqual(em.to_target[102], r_child2) + + # test with retrieval: both Records have one test Property with one + # value; Add another recursion level -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=103, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_child2 = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + r_child.add_property("test", value=r_child2, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing[r_child._cuid], ENTS[103]) + self.assertEqual(em.to_target[103], r_child) + self.assertEqual(em.to_existing[r_child2._cuid], ENTS[101]) + self.assertEqual(em.to_target[101], r_child2) + + caosdb.apiutils.retrieve_entity_with_id = real_retrieve + + def test_h5_attr_to_property(self): + + test_int: int = 1 + test_integer = np.int_(1) + test_float = np.float_(1.0) + test_str = "Test" + test_complex: complex = 2+3j + self.assertRaises(NotImplementedError, h5_attr_to_property, test_int) # only numpy-integers processed? + self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(test_integer)) + self.assertTupleEqual((1.0, db.DOUBLE), h5_attr_to_property(test_float)) + self.assertTupleEqual(("Test", db.TEXT), h5_attr_to_property(test_str)) + self.assertTupleEqual((2+3j, db.TEXT), h5_attr_to_property(test_complex)) + # strings are often represented using a binary format + self.assertTupleEqual(("yeti", db.TEXT), h5_attr_to_property( + np.array(["yeti"], dtype=h5py.string_dtype(r'utf-8', 8))[0])) + + test_integer_1d = np.arange(10) + test_float_1d = np.arange(0, 1, 0.1) + test_str_1d = np.array(["a", "b", "c"]) + self.assertTrue((np.arange(10) == h5_attr_to_property(test_integer_1d)[0]).all()) + self.assertTrue(db.LIST(db.INTEGER) == h5_attr_to_property(test_integer_1d)[1]) + self.assertTrue((np.arange(0, 1, 0.1) == h5_attr_to_property(test_float_1d)[0]).all()) + self.assertTrue(db.LIST(db.DOUBLE) == h5_attr_to_property(test_float_1d)[1]) + self.assertTrue((np.array(["a", "b", "c"]) == h5_attr_to_property(test_str_1d)[0]).all()) + self.assertTrue(db.LIST(db.TEXT) == h5_attr_to_property(test_str_1d)[1]) + + test_integers_2d = np.diag(np.arange(100)) + test_floats_2d = np.eye(100) + self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d)) + self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d)) + + # Test scalar values given as np.array + self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1))) + self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123))) + self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World"))) diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..7f47890f413dce5511cd498fe802e03a1af3be70 --- /dev/null +++ b/unittests/test_json_schema_model_parser.py @@ -0,0 +1,358 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +# @review Daniel Hornung 2022-02-18 + +import os +import pytest + +import caosdb as db +from caosadvancedtools.models.parser import (parse_model_from_json_schema, + JsonSchemaDefinitionError) + +FILEPATH = os.path.join(os.path.dirname( + os.path.abspath(__file__)), 'json-schema-models') + + +def test_rt_with_string_properties(): + """Test datamodel parsing of datamodel_string_properties.schema.json""" + # @author Florian Spreckelsen + # @date 2022-02-17 + + model = parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_string_properties.schema.json")) + assert "Dataset" in model + dataset_rt = model["Dataset"] + assert isinstance(dataset_rt, db.RecordType) + assert dataset_rt.name == "Dataset" + assert dataset_rt.description == "" + assert len(dataset_rt.get_properties()) == 4 + + assert dataset_rt.get_property("title") is not None + assert dataset_rt.get_property("campaign") is not None + assert dataset_rt.get_property("method") is not None + + assert dataset_rt.get_property("The title") is not None + assert dataset_rt.get_property("titled") is None + + title_prop = dataset_rt.get_property("title") + assert title_prop.datatype == db.TEXT + assert dataset_rt.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = dataset_rt.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert dataset_rt.get_importance(campaign_prop.name) == db.RECOMMENDED + + method_prop = dataset_rt.get_property("method") + assert method_prop.datatype == db.TEXT + assert dataset_rt.get_importance(method_prop.name) == db.RECOMMENDED + + +def test_datamodel_with_atomic_properties(): + """Test read-in of two separate record types with atomic-typed properties.""" + # @author Florian Spreckelsen + # @date 2022-02-18 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_atomic_properties.schema.json")) + assert "Dataset1" in model + assert "Dataset2" in model + + rt1 = model["Dataset1"] + assert isinstance(rt1, db.RecordType) + assert rt1.name == "Dataset1" + assert rt1.description == "Some description" + assert len(rt1.get_properties()) == 3 + + assert rt1.get_property("title") is not None + assert rt1.get_property("campaign") is not None + assert rt1.get_property("number_prop") is not None + + title_prop = rt1.get_property("title") + assert title_prop.datatype == db.TEXT + assert rt1.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = rt1.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED + + float_prop = rt1.get_property("number_prop") + assert float_prop.datatype == db.DOUBLE + assert rt1.get_importance(float_prop.name) == db.OBLIGATORY + + rt2 = model["Dataset2"] + assert isinstance(rt2, db.RecordType) + assert rt2.name == "Dataset2" + assert not rt2.description + assert len(rt2.get_properties()) == 5 + + date_prop = rt2.get_property("date") + assert date_prop.datatype == db.DATETIME + + datetime_prop = rt2.get_property("date_time") + assert date_prop.datatype == db.DATETIME + + int_prop = rt2.get_property("integer") + assert int_prop.datatype == db.INTEGER + assert int_prop.description == "Some integer property" + + bool_prop = rt2.get_property("boolean") + assert bool_prop.datatype == db.BOOLEAN + + float_prop2 = rt2.get_property("number_prop") + assert float_prop.datatype == float_prop2.datatype + + +def test_required_no_list(): + """Exception must be raised when "required" is not a list.""" + # @author Daniel Hornung + # @date 2022-02-18 + + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_required_no_list.schema.json")) + assert "'Dataset' is not of type 'array'" in str(err.value) + + +def test_missing_property_type(): + """Exception must be raised when "type" is missing.""" + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_missing_property_type.schema.json")) + assert "`type` is missing" in str(err.value) + + +def test_enum(): + """Enums are represented in references to records of a specific type.""" + # @author Florian Spreckelsen + # @date 2022-03-16 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_enum_prop.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + for name in ["Dataset", "license"] + licenses: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("license") is not None + assert model["Dataset"].get_property("license").is_reference() + assert model["Dataset"].get_property("license").datatype.name == "license" + assert isinstance(model["license"], db.RecordType) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) + + # Also allow enums with non-string types + number_enums = ["1.1", "2.2", "3.3"] + for name in ["number_enum"] + number_enums: + assert name in model + + assert isinstance(model["number_enum"], db.RecordType) + assert model["Dataset"].get_property("number_enum") is not None + assert model["Dataset"].get_property("number_enum").is_reference() + assert model["Dataset"].get_property( + "number_enum").datatype.name == "number_enum" + + for name in number_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["number_enum"]) + + +@pytest.mark.xfail(reason="Don't allow integer enums until https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 has been fixed") +def test_int_enum(): + """Check an enum property with type: integer""" + # @author Florian Spreckelsen + # @date 2022-03-22 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_int_enum_broken.schema.json")) + int_enums = ["1", "2", "3"] + for name in ["Dataset", "int_enum"] + int_enums: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("int_enum") is not None + assert model["Dataset"].get_property("int_enum").is_reference + assert model["Dataset"].get_property( + "int_enum").datatype.name == "int_enum" + assert isinstance(model["int_enum"], db.RecordType) + + for name in int_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["int_enum"]) + + +def test_references(): + """Test reference properties""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_references.schema.json")) + for name in ["Dataset", "event", "longitude", "latitude", "location"]: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("event") is not None + assert model["Dataset"].get_importance("event") == db.OBLIGATORY + assert model["Dataset"].get_property("event").is_reference() + assert model["Dataset"].get_property("event").datatype.name == "event" + + assert isinstance(model["event"], db.RecordType) + assert model["event"].get_property("longitude") is not None + assert model["event"].get_importance("longitude") == db.OBLIGATORY + assert model["event"].get_property("longitude").datatype == db.DOUBLE + + assert model["event"].get_property("latitude") is not None + assert model["event"].get_importance("latitude") == db.OBLIGATORY + assert model["event"].get_property("latitude").datatype == db.DOUBLE + + assert model["event"].get_property("location") is not None + assert model["event"].get_importance("location") == db.RECOMMENDED + assert model["event"].get_property("location").datatype == db.TEXT + + assert isinstance(model["longitude"], db.Property) + assert model["longitude"].datatype == db.DOUBLE + + assert isinstance(model["latitude"], db.Property) + assert model["latitude"].datatype == db.DOUBLE + + assert isinstance(model["location"], db.Property) + assert model["location"].datatype == db.TEXT + assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)" + + +def test_list(): + """Test list properties with all possible datatypes.""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_list_properties.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + names = ["Dataset", "keywords", "booleans", "integers", "floats", + "datetimes", "dates", "reference", "reference_with_name", "event", + "license"] + for name in names + licenses: + assert name in model + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("keywords") is not None + assert dataset_rt.get_property("keywords").datatype == db.LIST(db.TEXT) + assert isinstance(model["keywords"], db.Property) + assert model["keywords"].name == "keywords" + assert model["keywords"].datatype == db.LIST(db.TEXT) + + assert dataset_rt.get_property("booleans") is not None + assert dataset_rt.get_property("booleans").datatype == db.LIST(db.BOOLEAN) + assert isinstance(model["booleans"], db.Property) + assert model["booleans"].name == "booleans" + assert model["booleans"].datatype == db.LIST(db.BOOLEAN) + + assert dataset_rt.get_property("integers") is not None + assert dataset_rt.get_property("integers").datatype == db.LIST(db.INTEGER) + assert isinstance(model["integers"], db.Property) + assert model["integers"].name == "integers" + assert model["integers"].datatype == db.LIST(db.INTEGER) + + assert dataset_rt.get_property("floats") is not None + assert dataset_rt.get_property("floats").datatype == db.LIST(db.DOUBLE) + assert isinstance(model["floats"], db.Property) + assert model["floats"].name == "floats" + assert model["floats"].datatype == db.LIST(db.DOUBLE) + + assert dataset_rt.get_property("datetimes") is not None + assert dataset_rt.get_property( + "datetimes").datatype == db.LIST(db.DATETIME) + assert isinstance(model["datetimes"], db.Property) + assert model["datetimes"].name == "datetimes" + assert model["datetimes"].datatype == db.LIST(db.DATETIME) + + assert dataset_rt.get_property("dates") is not None + assert dataset_rt.get_property( + "dates").datatype == db.LIST(db.DATETIME) + assert isinstance(model["dates"], db.Property) + assert model["dates"].name == "dates" + assert model["dates"].datatype == db.LIST(db.DATETIME) + + # Simple reference list property + assert dataset_rt.get_property("reference") is not None + assert dataset_rt.get_property("reference").is_reference() + assert dataset_rt.get_property( + "reference").datatype == db.LIST("reference") + assert isinstance(model["reference"], db.RecordType) + assert model["reference"].name == "reference" + assert dataset_rt.get_property( + "reference").datatype == db.LIST(model["reference"]) + + # Reference list with name + assert dataset_rt.get_property("reference_with_name") is not None + assert dataset_rt.get_property("reference_with_name").is_reference() + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST("event") + assert isinstance(model["event"], db.RecordType) + assert model["event"].name == "event" + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST(model["event"]) + assert isinstance(model["reference_with_name"], db.Property) + assert model["reference_with_name"].name == "reference_with_name" + assert model["reference_with_name"].datatype == db.LIST(model["event"]) + + # References to enum types + assert dataset_rt.get_property("license") is not None + assert dataset_rt.get_property("license").is_reference() + assert dataset_rt.get_property("license").datatype == db.LIST("license") + assert isinstance(model["license"], db.RecordType) + assert model["license"].name == "license" + assert dataset_rt.get_property( + "license").datatype == db.LIST(model["license"]) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) + + +def test_name_property(): + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_name.schema.json")) + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("name") is None + assert "name" not in model + + with pytest.raises(JsonSchemaDefinitionError) as err: + broken = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_name_wrong_type.schema.json")) + assert str(err.value).startswith( + "The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's " + "name property.") diff --git a/unittests/test_result_table_cfood.py b/unittests/test_result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..3341a2394cc9ef15ae172bb8992445d87c60d063 --- /dev/null +++ b/unittests/test_result_table_cfood.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +test module for ResultTableCFood +""" + + +import os +import re +import unittest + +import caosdb as db +from caosadvancedtools.scifolder.result_table_cfood import ResultTableCFood + + +class CFoodTest(unittest.TestCase): + def test_re(self): + self.assertIsNotNone(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv").group("recordtype"), + "Hallo") + self.assertIsNotNone(re.match(ResultTableCFood.table_re, + "result_table_Cool RecordType.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Cool RecordType.csv").group("recordtype"), + "Cool RecordType") + self.assertIsNone(re.match(ResultTableCFood.table_re, "result_tableCool RecordType.csv")) + + self.assertIsNotNone(re.match(ResultTableCFood.property_name_re, + "temperature [C]")) + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("pname"), + "temperature") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [ C ]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature").group("pname"), "temperature") + + def test_ident(self): + rtc = ResultTableCFood(os.path.join(os.path.dirname(__file__), "test.csv")) + rtc.match = re.match(ResultTableCFood.get_re(), + "/ExperimentalData/2010_TestProject/2019-02-03_something/result_table_RT.csv") + rtc.create_identifiables() + rtc.update_identifiables() diff --git a/unittests/test_scifolder_utils.py b/unittests/test_scifolder_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..30e211d9daf8da8f831bff4580efbc63d6bdf6fb --- /dev/null +++ b/unittests/test_scifolder_utils.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import os +import unittest + +from caosadvancedtools.scifolder.utils import get_xls_header + + +class XLSTest(unittest.TestCase): + def test_read(self): + filename = os.path.join(os.path.dirname(__file__), + "data/README.xlsx") + assert os.path.exists(filename) + + header = get_xls_header(filename) + assert header is not None + assert isinstance(header, dict) + + # responsible + assert header['responsible'] == ["Ana Lytic"] + + # description + assert len(header['description']) == 1 + assert isinstance(header['description'][0], str) + assert len(header['description'][0]) > 20 + assert "exciting" in header['description'][0] + + # sources + assert isinstance(header['sources'], list) + + for el in header['sources']: + assert isinstance(el, dict) + assert "TestProject" in el["file"] + assert "example" in el["description"] + + # scripts + assert isinstance(header['scripts'], list) + + for el in header['scripts']: + assert isinstance(el, dict) + assert "scripts" == el["file"] + assert "all the files" in el["description"] + + # results + assert isinstance(header['results'], list) + + for el in header['results']: + assert isinstance(el, dict) + assert "result.pdf" == el["file"] + assert "plot" in el["description"] diff --git a/unittests/test_structure_mapping.py b/unittests/test_structure_mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..5cc4114fc7f92c580f53dd8855bda659082e2b46 --- /dev/null +++ b/unittests/test_structure_mapping.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021 Alexander Kreft <akreft@trineo.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import unittest +from os import name + +import caosdb as db +from caosadvancedtools.structure_mapping import (EntityMapping, + collect_existing_structure) +from caosdb.common import datatype + + +class structureMappingTest(unittest.TestCase): + def test_Entitymapping(self): + ex = db.Record(id=100) # existing Record + tar = db.Record() # target Record + em = EntityMapping() + em.add(tar, ex) + + for key, val in em.to_existing.items(): + self.assertEqual(key, tar._cuid) + self.assertEqual(val, ex) + + for key, val in em.to_target.items(): + self.assertEqual(key, ex.id) + self.assertEqual(val, tar) + + def test_collect_existing_structure(self): + emap = EntityMapping() + reca1 = db.Record(name="Animals", id=100) + reca2 = db.Record(name="Dogs", id=200) + reca3 = db.Record(name="Husky", id=300) + reca1.add_property(id=101, name="Cute Animals", datatype=db.REFERENCE, value=reca2) + reca2.add_property(id=201, name="Cute Dogs", datatype=db.REFERENCE, value=reca3) + + recb1 = db.Record(name="Animals") + recb2 = db.Record(name="Dogs") + recb3 = db.Record(name="Husky") + recb1.add_property(name="Cute Animals", datatype=db.REFERENCE, value=recb2) + recb2.add_property(name="Cute Dogs", datatype=db.REFERENCE, value=recb3) + + collect_existing_structure(recb1, reca1, emap) + + # Test if the two dicts of the entity mapping correctly depend on each other + + for i in emap.to_existing.keys(): + self.assertEqual(i, emap.to_target[emap.to_existing[i].id]._cuid) + + for j in emap.to_target.keys(): + self.assertEqual(j, emap.to_existing[emap.to_target[j]._cuid].id) + + # Test if only the right Properties are in the dicts + self.assertTrue((reca2 in emap.to_existing.values()) and + (reca3 in emap.to_existing.values()) and + (reca1 not in emap.to_existing.values())) + self.assertTrue((recb2 in emap.to_target.values()) and + (recb3 in emap.to_target.values()) and + (recb1 not in emap.to_target.values())) + + # Test the correct assignment of the properties + self.assertTrue(reca2 is emap.to_existing[recb2._cuid]) + self.assertTrue(reca3 is emap.to_existing[recb3._cuid]) + + self.assertTrue(recb2 is emap.to_target[reca2.id]) + self.assertTrue(recb3 is emap.to_target[reca3.id]) + + """Test with one additional Property and Properties, which are not Records""" + emap2 = EntityMapping() + recc1 = db.Record(name="Transportation", id=100) + recc2 = db.Record(name="Cars", id=200) + recc3 = db.Record(name="Volvo", id=300) + recc1.add_property(id=101, name="Type", datatype=db.REFERENCE, value=recc2) + recc2.add_property(id=201, name="Brand", datatype=db.REFERENCE, value=recc3) + # other datatypes + recc3.add_property(id=301, name="max_speed", value=200.2, datatype=db.DOUBLE) + recc3.add_property(id=302, name="doors", value=3, datatype=db.INTEGER) + + recd1 = db.Record(name="Transportation") + recd2 = db.Record(name="Cars") + recd3 = db.Record(name="Volvo") + recd4 = db.Record(name="VW") + recd1.add_property(name="Type", datatype=db.REFERENCE, value=recd2) + recd2.add_property(name="Brand", datatype=db.REFERENCE, value=recd3) + # additional Property + recd2.add_property(name="Another Brand", datatype=db.REFERENCE, value=recd4) + # other datatypes + recd3.add_property(name="max_speed", value=200.2, datatype=db.DOUBLE) + recd3.add_property(name="doors", value=3, datatype=db.INTEGER) + recd4.add_property(name="max_speed", value=210.4, datatype=db.DOUBLE) + recd4.add_property(name="doors", value=5, datatype=db.INTEGER) + recd4.add_property(name="Warp engine", value=None) + + collect_existing_structure(recd1, recc1, emap2) + + # Test the correct assignment of the properties + self.assertTrue(recc2 is emap2.to_existing[recd2._cuid]) + self.assertTrue(recc3 is emap2.to_existing[recd3._cuid]) + + self.assertTrue(recd2 is emap2.to_target[recc2.id]) + self.assertTrue(recd3 is emap2.to_target[recc3.id]) + + """ Test, if the Record `Cars` in `target_structure` have one additional Property """ + # Test existing structure + self.assertEqual(len(recc2.get_properties()), 1) # number of properties stay unchanged + self.assertEqual(len(recd2.get_properties()), 2) # number of properties stay unchanged + + for prop_record, prop_em in zip(recc2.get_properties(), recd2.get_properties()): + self.assertTrue(prop_record.value is emap2.to_existing[prop_em.value._cuid]) + + # Test target structure + self.assertEqual(len(recc3.get_properties()), 2) # number of properties stay unchanged + self.assertEqual(len(recd3.get_properties()), 2) # number of properties stay unchanged + + """ Test if the Properties that are not References show up in the entity map """ + for rec_existing, rec_target in zip(emap2.to_existing.values(), emap2.to_target.values()): + self.assertTrue(isinstance(rec_existing, db.Record)) + self.assertTrue(isinstance(rec_target, db.Record)) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 69983017a77082887181d14ea12f4f876e42aa3d..70f0f87f8706d72c386b18f54b7a9a10908eb477 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -25,15 +25,22 @@ from tempfile import NamedTemporaryFile import numpy as np import pandas as pd +import pytest from caosadvancedtools.datainconsistency import DataInconsistencyError -from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, +from caosadvancedtools.table_importer import (CSVImporter, TableImporter, + TSVImporter, XLSImporter, + assure_name_format, + check_reference_field, date_converter, datetime_converter, incomplete_date_converter, + string_in_list, win_path_converter, win_path_list_converter, yes_no_converter) +from test_utils import BaseMockUpTest + class ConverterTest(unittest.TestCase): def test_yes_no(self): @@ -48,6 +55,16 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "true") + def test_string_in_list(self): + self.assertEqual("false", string_in_list("false", + ["FALSE", "TRUE"])) + self.assertEqual("FALSE", string_in_list("FALSE", + ["FALSE", "TRUE"], False)) + self.assertRaises(ValueError, string_in_list, "FALSE", []) + self.assertRaises(ValueError, string_in_list, "FALSE", ["fals"]) + self.assertRaises(ValueError, string_in_list, + "FALSE", ["false"], False) + def test_assure_name_format(self): self.assertEqual(assure_name_format("Müstermann, Max"), "Müstermann, Max") @@ -61,30 +78,47 @@ class ConverterTest(unittest.TestCase): ["/this/computer"]) self.assertEqual(win_path_list_converter( r"\this\computer,\this\computer"), - ["/this/computer", "/this/computer"]) + ["/this/computer", "/this/computer"]) def test_datetime(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'d': datetime_converter, - }, obligatory_columns=['d']) + importer = XLSImporter(converters={'d': datetime_converter, + }, obligatory_columns=['d']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 + # TODO datatypes are different; fix it assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23) - def test_date(self): + def test_date_xlsx(self): + """Test with .xlsx in order to check openpyxl engine.""" test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'a': date_converter, - 'b': date_converter, - 'c': partial(date_converter, - fmt="%d.%m.%y") - }, obligatory_columns=['a']) + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) + + xls_file = pd.io.excel.ExcelFile(test_file) + df = xls_file.parse() + df = importer.read_xls(test_file) + assert df.shape[0] == 2 + assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] + + def test_date_xls(self): + """Test with .xls in order to check xlrd engine.""" + test_file = os.path.join(os.path.dirname(__file__), "date.xls") + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] @@ -107,47 +141,135 @@ class ConverterTest(unittest.TestCase): fmts={"%Y": "%Y"}) -class XLSImporterTest(unittest.TestCase): +class TableImporterTest(unittest.TestCase): def setUp(self): - self.importer = XLSImporter( - converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, + self.importer_kwargs = dict( + converters={'c': float, 'd': yes_no_converter}, + datatypes={'a': str, 'b': int}, obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( [['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd']) def test_missing_col(self): - df = pd.DataFrame(columns=['a', 'b']) - self.assertRaises(ValueError, self.importer.check_columns, df) - self.importer.check_columns(self.valid_df) + # check missing from converters + df = pd.DataFrame(columns=['a', 'b', 'c']) + importer = TableImporter(**self.importer_kwargs) + self.assertRaises(ValueError, importer.check_columns, df) + # check missing from datatypes + df = pd.DataFrame(columns=['a', 'd', 'c']) + importer = TableImporter(**self.importer_kwargs) + self.assertRaises(ValueError, importer.check_columns, df) + # check valid + importer.check_columns(self.valid_df) def test_missing_val(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + # check valid + importer.check_missing(self.valid_df) + # check invalid df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], [None, 1, 2.0, 'yes'], ['a', np.nan, 2.0, 'yes'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_missing(df) + df_new = importer.check_missing(df) self.assertEqual(df_new.shape[0], 1) self.assertEqual(df_new.shape[1], 4) self.assertEqual(df_new.iloc[0].b, 5) - def test_full(self): - """ test full run with example data """ - tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") - tmp.close() - self.valid_df.to_excel(tmp.name) - self.importer.read_xls(tmp.name) + def test_wrong_datatype(self): + importer = TableImporter(**self.importer_kwargs) + df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], + [5, 1, 2.0, 'yes']], + columns=['a', 'b', 'c', 'd']) + self.assertRaises(DataInconsistencyError, importer.check_datatype, df) def test_unique(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + importer.check_missing(self.valid_df) df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_unique(df) + df_new = importer.check_unique(df) self.assertEqual(df_new.shape[0], 1) + +class XLSImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") + tmp.close() + self.valid_df.to_excel(tmp.name) + importer = XLSImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + def test_raise(self): + importer = XLSImporter(**self.importer_kwargs) tmp = NamedTemporaryFile(delete=False, suffix=".lol") tmp.close() - self.assertRaises(DataInconsistencyError, self.importer.read_xls, + self.assertRaises(DataInconsistencyError, importer.read_xls, tmp.name) + + def test_datatypes(self): + """Test datataypes in columns.""" + importer = XLSImporter(converters={}, + obligatory_columns=["float_as_float"], + datatypes={ + "float_as_float": float, + "int_as_float": float, + "int_as_int": int, + } + ) + df = importer.read_xls(os.path.join( + os.path.dirname(__file__), "data", "datatypes.xlsx")) + assert np.issubdtype(df.loc[0, "int_as_float"], float) + + +class CSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".csv") + tmp.close() + self.valid_df.to_csv(tmp.name) + importer = CSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + + +class TSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".tsv") + tmp.close() + self.valid_df.to_csv(tmp.name, sep="\t") + importer = TSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + + +class CountQueryNoneConverterTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # simulate that 0 entity exists + self.entities = ( + '<Response count="0">' + '<Query string="count record" results="0">' + '</Query>' + '</Response>' + ) + + def test_check_reference_field(self): + self.assertRaises(ValueError, check_reference_field, "1232", "Max") + + +class CountQuerySingleConverterTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # simulate that 1 entity exists + self.entities = ( + '<Response count="1">' + '<Query string="count record" results="1">' + '</Query>' + '</Response>' + ) + + def test_check_reference_field(self): + self.assertEqual(check_reference_field("1232", "Max"), + "1232") diff --git a/unittests/test_utils.py b/unittests/test_utils.py index 054d7c99069f294e9975742c1c0261fd7ebc768c..7369931799b00eba5a835458a6fad474de1d9039 100644 --- a/unittests/test_utils.py +++ b/unittests/test_utils.py @@ -32,14 +32,7 @@ from caosdb.connection.mockup import MockUpResponse, MockUpServerConnection from caosdb.exceptions import TransactionError -class ReferencesBaseTest(unittest.TestCase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.entities = ( - '<Response><File name="test.npy" path="/some/path/test.npy' - '" id="1234"/><Query string="find record" results="1">' - '</Query></Response>') - +class BaseMockUpTest(unittest.TestCase): def setUp(self): conlogger = logging.getLogger("connection") conlogger.setLevel(level=logging.ERROR) @@ -70,6 +63,15 @@ class ReferencesBaseTest(unittest.TestCase): return log + +class ReferencesBaseTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="test.npy" path="/some/path/test.npy' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + def test_ref(self): self.clear_log() files = get_referenced_files("test.npy", prefix=None, filename=None, diff --git a/unittests/test_yaml_model_parser.py b/unittests/test_yaml_model_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..a9f072b754618e38237cbf70e74c7944551f1045 --- /dev/null +++ b/unittests/test_yaml_model_parser.py @@ -0,0 +1,476 @@ +import unittest +from datetime import date +from tempfile import NamedTemporaryFile +from pytest import raises + +import caosdb as db +from caosadvancedtools.models.parser import (TwiceDefinedException, + YamlDefinitionError, + parse_model_from_string, + parse_model_from_yaml) + + +def to_file(string): + f = NamedTemporaryFile(mode="w", delete=False) + f.write(string) + f.close() + + return f.name + +# TODO: check purpose of this function... add documentation + + +def parse_str(string): + parse_model_from_yaml(to_file(string)) + + +def has_property(el, name): + for p in el.get_properties(): + if p.name == name: + return True + + return False + + +def has_parent(el, name): + for p in el.get_parents(): + if p.name == name: + return True + + return False + + +class TwiceTest(unittest.TestCase): + def test_defined_once(self): + string = """ +RT1: + recommended_properties: + a: +RT2: + recommended_properties: + RT1: +RT3: + recommended_properties: + RT4: + recommended_properties: + a: +RT4: +""" + model = parse_model_from_yaml(to_file(string)) + assert has_property(model["RT1"], "a") + assert has_property(model["RT4"], "a") + + def test_defined_twice(self): + string = """ +RT1: + recommended_properties: + a: +RT2: + recommended_properties: + RT1: + recommended_properties: + a: +""" + + self.assertRaises(TwiceDefinedException, + lambda: parse_model_from_yaml(to_file(string))) + + def test_typical_case(self): + string = """ +RT1: + recommended_properties: + p1: + datatype: TEXT + description: shiet egal + obligatory_properties: + p2: + datatype: TEXT +RT2: + description: "This is awesome" + inherit_from_suggested: + - RT1 + - RT4 + obligatory_properties: + RT1: + p3: + datatype: DATETIME + recommended_properties: + p4: + RT4: +p1: +p5: +RT5: + """ + parse_model_from_yaml(to_file(string)) + + def test_wrong_kind(self): + string = """ +- RT1: +- RT2: +""" + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) + + def test_unknown_kwarg(self): + string = """ +RT1: + datetime: + p1: +""" + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) + + def test_definition_in_inheritance(self): + string = """ +RT2: + description: "This is awesome" + inherit_from_suggested: + - RT1: + description: "tach" +""" + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) + + def test_inheritance(self): + string = """ +RT1: + description: "This is awesome" + inherit_from_suggested: + - RT2 + inherit_from_recommended: + - RT3 + inherit_from_obligatory: + - RT4 + - RT5 +RT2: +RT3: +RT4: +RT5: +""" + model = parse_model_from_yaml(to_file(string)) + assert has_parent(model["RT1"], "RT2") + assert (model["RT1"].get_parent( + "RT2")._flags["inheritance"] == db.SUGGESTED) + assert has_parent(model["RT1"], "RT3") + assert (model["RT1"].get_parent( + "RT3")._flags["inheritance"] == db.RECOMMENDED) + assert has_parent(model["RT1"], "RT4") + assert (model["RT1"].get_parent( + "RT4")._flags["inheritance"] == db.OBLIGATORY) + assert has_parent(model["RT1"], "RT5") + assert (model["RT1"].get_parent( + "RT5")._flags["inheritance"] == db.OBLIGATORY) + + def test_properties(self): + string = """ +RT1: + description: "This is awesome" + recommended_properties: + RT2: + suggested_properties: + RT3: + obligatory_properties: + RT4: + recommended_properties: + RT2: + RT5: +""" + model = parse_model_from_yaml(to_file(string)) + assert has_property(model["RT1"], "RT2") + assert model["RT1"].get_importance("RT2") == db.RECOMMENDED + assert has_property(model["RT1"], "RT3") + assert model["RT1"].get_importance("RT3") == db.SUGGESTED + assert has_property(model["RT1"], "RT4") + assert model["RT1"].get_importance("RT4") == db.OBLIGATORY + assert has_property(model["RT1"], "RT5") + assert model["RT1"].get_importance("RT5") == db.OBLIGATORY + assert has_property(model["RT4"], "RT2") + assert model["RT4"].get_importance("RT2") == db.RECOMMENDED + + def test_datatype(self): + string = """ +p1: + datatype: TEXT +""" + parse_model_from_yaml(to_file(string)) + string = """ +p2: + datatype: TXT +""" + self.assertRaises(ValueError, parse_model_from_yaml, to_file(string)) + + +class ListTest(unittest.TestCase): + def test_list(self): + string = """ +RT1: + recommended_properties: + a: + datatype: LIST(RT2) + b: + datatype: LIST(TEXT) + c: + datatype: LIST<TEXT> +RT2: +""" + model = parse_model_from_yaml(to_file(string)) + + self.assertTrue(isinstance(model['b'], db.Property)) + self.assertEqual(model['b'].datatype, db.LIST(db.TEXT)) + self.assertTrue(isinstance(model['c'], db.Property)) + self.assertEqual(model['c'].datatype, db.LIST(db.TEXT)) + + # This failed for an older version of caosdb-models + string_list = """ +A: + obligatory_properties: + B: + datatype: LIST(B) +B: + obligatory_properties: + c: + datatype: INTEGER +""" + model = parse_model_from_yaml(to_file(string_list)) + self.assertTrue(isinstance(model['A'], db.RecordType)) + self.assertEqual(model['A'].properties[0].datatype, db.LIST("B")) + + +class ParserTest(unittest.TestCase): + """Generic tests for good and bad syntax.""" + + def test_empty_property_list(self): + """Emtpy property lists are allowed now.""" + empty = """ +A: + obligatory_properties: +""" + parse_str(empty) + + def test_non_string_name(self): + """Test for when the name does not look like a string to YAML.""" + name_int = """1: + recommended_properties: + 1.2: + Null: + 0x0: + 010: +""" + model = parse_model_from_string(name_int) + self.assertEqual(len(model), 5) + for key in model.keys(): + self.assertIsInstance(key, str) + + def test_unexpected_keyword(self): + """Test for when keywords happen at places where they should not be.""" + yaml = """A: + obligatory_properties: + recommended_properties: +""" + with self.assertRaises(YamlDefinitionError) as yde: + parse_model_from_string(yaml) + self.assertIn("line 3", yde.exception.args[0]) + self.assertIn("recommended_properties", yde.exception.args[0]) + + def test_parents_list(self): + """Parents must be a list.""" + yaml = """A: + inherit_from_obligatory: + A: +""" + with self.assertRaises(YamlDefinitionError) as yde: + parse_model_from_string(yaml) + self.assertIn("line 3", yde.exception.args[0]) + + def test_reference_property(self): + """Test correct creation of reference property using an RT.""" + modeldef = """A: + recommended_properties: + ref: + datatype: LIST<A> +""" + model = parse_model_from_string(modeldef) + self.assertEqual(len(model), 2) + for key, value in model.items(): + if key == "A": + self.assertTrue(isinstance(value, db.RecordType)) + elif key == "ref": + self.assertTrue(isinstance(value, db.Property)) + self.assertEqual(value.datatype, "LIST<A>") + + +class ExternTest(unittest.TestCase): + """TODO Testing the "extern" keyword in the YAML.""" + @unittest.expectedFailure + def test_extern(self): + raise NotImplementedError("Extern testing is not implemented yet.") + + +class ErrorMessageTest(unittest.TestCase): + """Tests for understandable error messages.""" + + # Note: This was changed with implementation of role keyword + @unittest.expectedFailure + def test_non_dict(self): + """When a value is given, where a list or mapping is expected.""" + recordtype_value = """ +A: "some class" +""" + recommended_value = """ +A: + recommended_properties: 23 +""" + property_value = """ +prop: + datatype: DOUBLE +A: + recommended_properties: + - prop: 3.14 +""" + # Failing strings and the lines where they fail + failing = { + recordtype_value: 2, + recommended_value: 3, + property_value: 6 + } + for string, line in failing.items(): + # parse_str(string) + with self.assertRaises(YamlDefinitionError) as yde: + parse_str(string) + assert "line {}".format(line) in yde.exception.args[0] + + +def test_define_role(): + model = """ +A: + role: Record +""" + entities = parse_model_from_string(model) + assert "A" in entities + assert isinstance(entities["A"], db.Record) + assert entities["A"].role == "Record" + + model = """ +A: + role: Record + inherit_from_obligatory: + - C + obligatory_properties: + b: +b: + datatype: INTEGER +C: + obligatory_properties: + b: +D: + role: RecordType +""" + entities = parse_model_from_string(model) + for name, ent in (("A", "Record"), ("b", "Property"), + ("C", "RecordType"), ("D", "RecordType")): + assert name in entities + assert isinstance(entities[name], getattr(db, ent)) + assert entities[name].role == ent + + assert entities["A"].parents[0].name == "C" + assert entities["A"].name == "A" + + assert entities["A"].properties[0].name == "b" + assert entities["A"].properties[0].value is None + + assert entities["C"].properties[0].name == "b" + assert entities["C"].properties[0].value is None + + model = """ +A: + role: Record + obligatory_properties: + b: 42 +b: + datatype: INTEGER +""" + + entities = parse_model_from_string(model) + assert entities["A"].get_property("b").value == 42 + assert entities["b"].value is None + + model = """ +b: + datatype: INTEGER + value: 18 +""" + entities = parse_model_from_string(model) + assert entities["b"].value == 18 + + +def test_issue_72(): + """Tests for + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/72 + + In some cases, faulty values would be read in for properties without a + specified value. + + """ + model = """ +Experiment: + obligatory_properties: + date: + datatype: DATETIME + description: 'date of the experiment' + identifier: + datatype: TEXT + description: 'identifier of the experiment' + temperature: + datatype: DOUBLE + description: 'temp' +TestExperiment: + role: Record + inherit_from_obligatory: + - Experiment + obligatory_properties: + date: 2022-03-02 + identifier: Test + temperature: 23 + recommended_properties: + additional_prop: + datatype: INTEGER + value: 7 +""" + entities = parse_model_from_string(model) + assert "Experiment" in entities + assert "date" in entities + assert "identifier" in entities + assert "temperature" in entities + assert "TestExperiment" in entities + assert "additional_prop" in entities + assert isinstance(entities["Experiment"], db.RecordType) + + assert entities["Experiment"].get_property("date") is not None + # No value is set, so this has to be None + assert entities["Experiment"].get_property("date").value is None + + assert entities["Experiment"].get_property("identifier") is not None + assert entities["Experiment"].get_property("identifier").value is None + + assert entities["Experiment"].get_property("temperature") is not None + assert entities["Experiment"].get_property("temperature").value is None + + test_rec = entities["TestExperiment"] + assert isinstance(test_rec, db.Record) + assert test_rec.get_property("date").value == date(2022, 3, 2) + assert test_rec.get_property("identifier").value == "Test" + assert test_rec.get_property("temperature").value == 23 + assert test_rec.get_property("additional_prop").value == 7 + + +def test_file_role(): + """Not implemented for now, see + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/74. + + """ + model = """ +F: + role: File +""" + with raises(NotImplementedError): + entities = parse_model_from_string(model)