diff --git a/.docker-base/Dockerfile b/.docker-base/Dockerfile index 2152183a410302df34d35ec6f514399678e0baaf..923924e75e03c6ca8346b17cdf87eda78efd766f 100644 --- a/.docker-base/Dockerfile +++ b/.docker-base/Dockerfile @@ -9,6 +9,34 @@ RUN apk add --no-cache py3-pip python3 python3-dev gcc make \ git bash curl gettext py3-requests RUN apk add --no-cache libffi-dev openssl-dev libc-dev libxslt libxslt-dev \ libxml2 libxml2-dev + +# install rust (needed for compiling a docker-compose dependency) +# This is necessary until alpine comes with an up to date RUST +# copied from https://github.com/rust-lang/docker-rust/blob/bbc7feb12033da3909dced4e88ddbb6964fbc328/1.50.0/alpine3.13/Dockerfile + +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/usr/local/cargo/bin:$PATH \ + RUST_VERSION=1.50.0 + +RUN set -eux; \ + apkArch="$(apk --print-arch)"; \ + case "$apkArch" in \ + x86_64) rustArch='x86_64-unknown-linux-musl'; rustupSha256='05c5c05ec76671d73645aac3afbccf2187352fce7e46fc85be859f52a42797f6' ;; \ + aarch64) rustArch='aarch64-unknown-linux-musl'; rustupSha256='6a8a480d8d9e7f8c6979d7f8b12bc59da13db67970f7b13161ff409f0a771213' ;; \ + *) echo >&2 "unsupported architecture: $apkArch"; exit 1 ;; \ + esac; \ + url="https://static.rust-lang.org/rustup/archive/1.23.1/${rustArch}/rustup-init"; \ + wget "$url"; \ + echo "${rustupSha256} *rustup-init" | sha256sum -c -; \ + chmod +x rustup-init; \ + ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${rustArch}; \ + rm rustup-init; \ + chmod -R a+w $RUSTUP_HOME $CARGO_HOME; \ + rustup --version; \ + cargo --version; \ + rustc --version; + RUN pip3 install docker-compose==1.25 # Script for waiting on LA server diff --git a/.docker/Dockerfile b/.docker/Dockerfile index d5d2fe66770b2d37f7ecbb718a2260cdd7f501c1..876f252299991f2fa4410994b73259c3593c2198 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -1,7 +1,9 @@ -FROM debian:10 +FROM debian:11 RUN apt-get update && \ apt-get install \ curl \ + libhdf5-dev \ + pkgconf \ python3 \ python3-pip \ python3-requests \ @@ -11,20 +13,22 @@ RUN apt-get update && \ tox \ git \ openjdk-11-jdk-headless \ - python-autopep8 \ + python3-autopep8 \ python3-pytest \ libxml2 \ -y + + COPY .docker/wait-for-it.sh /wait-for-it.sh ADD https://gitlab.com/api/v4/projects/13656973/repository/branches/dev \ pylib_version.json RUN git clone https://gitlab.com/caosdb/caosdb-pylib.git && \ cd caosdb-pylib && git checkout dev && pip3 install . +# At least recommonmark 0.6 required. +RUN pip3 install -U html2text pycodestyle pylint recommonmark sphinx-rtd-theme COPY . /git RUN rm -r /git/.git \ && mv /git/.docker/pycaosdb.ini /git/integrationtests -RUN cd /git && pip3 install . +RUN cd /git && pip3 install .[h5-crawler] WORKDIR /git/integrationtests -CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh -# At least recommonmark 0.6 required. -RUN pip3 install recommonmark sphinx-rtd-theme +CMD /wait-for-it.sh caosdb-server:10443 -t 500 -- ./test.sh --force diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index e859e4337653a41dd2e17a819760b18fe2185c5e..36964ee68b7e384267a08484524de1f72cdfad6d 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -7,7 +7,7 @@ services: networks: - caosnet caosdb-server: - image: "$CI_REGISTRY_INDISCALE/caosdb/src/caosdb-deploy:$CAOSDB_TAG" + image: "$CI_REGISTRY/caosdb/src/caosdb-deploy:$CAOSDB_TAG" user: 999:999 depends_on: - sqldb diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8ad682bf6bac6b50ed6a98ffe42b94f2c96aabb0..8ebbefaa39650ddaff45b856a8a4d44a2ac495d1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,13 +21,9 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. variables: - CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/caosdb-advanced-user-tools/testenv:latest - CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/caosdb-advanced-user-tools/base:latest - # When using dind, it's wise to use the overlayfs driver for - # improved performance. + CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-advanced-user-tools/testenv:latest + CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/src/caosdb-advanced-user-tools/base:latest -services: - - docker:19.03.0-dind stages: - setup @@ -38,33 +34,37 @@ stages: - deploy test: - tags: [cached-dind] + tags: [docker] + services: + - docker:20.10.5-dind + variables: + # This is a workaround for the gitlab-runner health check mechanism when + # using docker-dind service. The runner will otherwise guess the port + # wrong and the health check will timeout. + SERVICE_PORT_2376_TCP_PORT: 2375 stage: integrationtest image: $CI_REGISTRY_IMAGE_BASE script: - if [[ "$CAOSDB_TAG" == "" ]]; then - CAOSDB_TAG=dev-latest; + CAOSDB_TAG=dev; fi - echo $CAOSDB_TAG - time docker load < /image-cache/caosdb-advanced-testenv.tar || true - time docker load < /image-cache/mariadb.tar || true - - time docker load < /image-cache/caosdb.tar || true + - time docker load < /image-cache/caosdb-dev.tar || true - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY - - docker login -u gitlab+deploy-token-ci-pull -p $TOKEN_CI_PULL $CI_REGISTRY_INDISCALE - - time docker pull $CI_REGISTRY_IMAGE - - time docker pull mariadb:10.4 - - time docker pull $CI_REGISTRY_INDISCALE/caosdb/src/caosdb-deploy:$CAOSDB_TAG - EXEPATH=`pwd` CAOSDB_TAG=$CAOSDB_TAG docker-compose -f .docker/docker-compose.yml up -d - cd .docker - /bin/sh ./run.sh - cd .. - - docker logs docker_caosdb-server_1 &> ../caosdb_log.txt - - docker logs docker_sqldb_1 &> ../mariadb_log.txt + - docker logs docker_caosdb-server_1 &> caosdb_log.txt + - docker logs docker_sqldb_1 &> mariadb_log.txt - docker-compose -f .docker/docker-compose.yml down - rc=`cat .docker/result` - exit $rc dependencies: [cert] + needs: [cert] artifacts: paths: - caosdb_log.txt @@ -76,11 +76,10 @@ build-testenv: tags: [cached-dind] image: docker:18.09 stage: setup - only: - - schedules - - web + # Hint: do not use only here; the image needs always to be build since it + # contains the repo code + #only: script: - - df -h - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY # use here general latest or specific branch latest... - docker build @@ -97,6 +96,7 @@ cert: tags: [docker] stage: cert image: $CI_REGISTRY_IMAGE + needs: [build-testenv] artifacts: paths: - .docker/cert/ @@ -104,32 +104,42 @@ cert: script: - cd .docker - CAOSHOSTNAME=caosdb-server ./cert.sh + style: tags: [docker] stage: style image: $CI_REGISTRY_IMAGE + needs: [build-testenv] + script: + - make style + allow_failure: true + +linting: + tags: [docker] + stage: style + image: $CI_REGISTRY_IMAGE + needs: [build-testenv] script: - - autopep8 -ar --diff --exit-code . + - make lint allow_failure: true unittest: tags: [docker] stage: unittest image: $CI_REGISTRY_IMAGE + needs: [build-testenv] script: - tox # Build the sphinx documentation and make it ready for deployment by Gitlab Pages -# documentation: -# stage: deploy - # Special job for serving a static website. See https://docs.gitlab.com/ee/ci/yaml/README.html#pages -pages: - stage: deploy - image: $CI_REGISTRY_IMAGE +pages_prepare: &pages_prepare tags: [docker] + image: $CI_REGISTRY_IMAGE + stage: deploy only: - - dev + refs: + - /^release-.*$/ script: - echo "Deploying" - make doc @@ -137,3 +147,9 @@ pages: artifacts: paths: - public +pages: + <<: *pages_prepare + only: + refs: + # version tags: v0.1.1 + - /^v(\d+\.\d+\.\d+)$/ diff --git a/.gitlab/merge_request_templates/Default.md b/.gitlab/merge_request_templates/Default.md deleted file mode 100644 index 77a95da1cc40c815e4952a1283d345af56e80461..0000000000000000000000000000000000000000 --- a/.gitlab/merge_request_templates/Default.md +++ /dev/null @@ -1,49 +0,0 @@ -# Summary - - Insert a meaningful description for this merge request here. What is the - new/changed behavior? Which bug has been fixed? Are there related Issues? - -# Focus - - Point the reviewer to the core of the code change. Where should they start - reading? What should they focus on (e.g. security, performance, - maintainability, user-friendliness, compliance with the specs, finding more - corner cases, concrete questions)? - -# Test Environment - - How to set up a test environment for manual testing? - -# Check List for the Author - -Please, prepare your MR for a review. Be sure to write a summary and a -focus and create gitlab comments for the reviewer. They should guide the -reviewer through the changes, explain your changes and also point out open -questions. For further good practices have a look at [our review -guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md) - -- [ ] All automated tests pass -- [ ] Reference related Issues -- [ ] Up-to-date CHANGELOG.md -- [ ] Annotations in code (Gitlab comments) - - Intent of new code - - Problems with old code - - Why this implementation? - - -# Check List for the Reviewer - - -- [ ] I understand the intent of this MR -- [ ] All automated tests pass -- [ ] Up-to-date CHANGELOG.md -- [ ] The test environment setup works and the intended behavior is - reproducible in the test environment -- [ ] In-code documentation and comments are up-to-date. -- [ ] Check: Are there spezifications? Are they satisfied? - -For further good practices have a look at [our review guidelines](https://gitlab.com/caosdb/caosdb/-/blob/dev/REVIEW_GUIDELINES.md). - - -/assign me -/target_branch dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 3973a4b3f6b0098b871abf6394e5b9158b3e43c2..be44a47d1a0c79c8a4fa39f382d4d3a0e22439f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,30 +8,105 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +### Changed ### + +### Deprecated ### + +### Removed ### + +### Fixed ### + +### Security ### + +## [0.4.1] - 2022-05-03 ## +(Henrik tom Wörden) + +### Changed ### + +- `JsonSchemaParser` now identifies `name` properties in the schema with the + CaosDB name property. + +### Fixed ### + +- [#40](https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/40) + `assure_object_is_in_list` now handles adding objects to an initially empty list correctly. + +## [0.4.0] - 2022-04-05 ## + +### Added ### + +- CFood that creates a Record for each line in a csv file +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting [EXPERIMENTAL] +- **EXPERIMENTAL:** Models parser can import from Json Schema files now: + `models.parser.parse_model_from_json_schema(...)`. See the documentation of + `models.parser.JsonSchemaParser` for the limitations of the current + implementation. +- New keyword "role" in yaml data model that allows creation of Records and Files. +- It is now possible to set values of properties and default values of properties + directly in the yaml model. + +### Changed ### + +- `TableConverter` now converts int to float and vice versa to match the desired dtype. + +### Deprecated ### + +### Removed ### + +### Fixed ### + +- CaosDB internal properties `name`, `unit` and `description` can now be used via the `extern` + keyword in YAML datamodel specifications. + +### Security ### + +## [0.3.1] - 2021-12-06 ## + +### Added ### +- `check_reference_field` function to check whether entities with provided ids + exits (for example when importing data from a table) +- added the `datatypes` argument to `TableImporter` for columns that do not + need a special conversion function + +## [0.3.0] - 2021-11-02 ## + +### Added ### + - Error handling for missing files when using the crawler - included the scifolder module - included the caosmodels module -* `send_mail` function in `caosadvancedtools.serverside.helper` module +- `send_mail` function in `caosadvancedtools.serverside.helper` module - New class to collect possible problems with the data model - New class for checking and importing tables - Function to get a file path to a shared resource directory -- Function to setup logging appropriate for server side scripts with webui +- Function to setup logging appropriate for server side scripts with webui output - New class for collecting information for exporting tables, e.g., to metadata repositories - new name parsing - new test for software folder structure - new assure_name_is function -- two utility functions when working with files: NameCollector and +- two utility functions when working with files: NameCollector and get_file_via_download - Automated documentation builds: `make doc` - Crawler documentation +- Proof-of-concept integration with Bloxberg. +- Introduce a cfood that can create a Record structure based on the contents of a hdf5 file + h5py is now an optional dependency +- table importer implementations for csv and tsv +- string-in-list check for table imports +- AbtractCFood has new property, `update_flags`. ### Changed ### -* `caosadvancedtools.serverside.helper.init_data_model` also checks the role +- identifiables of single CFoods are now treated one after the other. This + allows them to have dependencies among each other if they are ordered + correctly +- identifiables must have at least one property or a name +- `caosadvancedtools.serverside.helper.init_data_model` also checks the role and data type of entities. -* The `caosadvancedtools.table_importer.date_converter` now actually returns +- The `caosadvancedtools.table_importer.date_converter` now actually returns `datetime.date` instance. A new `caosadvancedtools.table_importer.datetime_converter` replaces the old `date_converter` and returns a `datetime.datetime` instance. @@ -47,17 +122,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `caosadvancedtools.cfood.assure_object_is_in_list` conducts in-place updates if no `to_be_updated` object is supplied. -### Deprecated ### - -### Removed ### - ### Fixed ### - An exception in collect_information does no longer lead to a break down. -* Fixed an issue where `caosadvancedtools.cache.UpdateCache` would +- Removed dependency on discontiued xlrd version +- Fixed an issue where `caosadvancedtools.cache.UpdateCache` would cause an `sqlite3.IntegrityError` if more than one change was cached for the same entity. - -### Security ### +- #40 Insertion of identifiables with missing obligatory properties +- Before, a Property with the datatype "LIST(TEXT)" would lead to the creation + of a RecordType. This is fixed now. +- #52 `XLSimporter.read_xls` throwed a wrong error when reading from a file with a wrong ending. + Now, a `DataInconsistencyError` is raised instead of a ValueError. +- List properties are no longer updated unnecessarily by the crawler. ## [0.2.0] - 2020-05-28 ## diff --git a/FEATURES.md b/FEATURES.md new file mode 100644 index 0000000000000000000000000000000000000000..44b2a5de7b1ff48da8e190a8b0f9a50ef58733cb --- /dev/null +++ b/FEATURES.md @@ -0,0 +1,13 @@ +# Features + +## Stable +To be filled. + +## Experimental + +- `generic_analysis.py` allows to easily call scripts to perform analyses in + server side scripting +- Models parser can import from Json Schema files: + `models.parser.parse_model_from_json_schema(...)`. See the documentation of + `models.parser.JsonSchemaParser` for the limitations of the current + implementation. diff --git a/Makefile b/Makefile index cbac0ea0a77e5523529ef181d83ffb9738d72faf..d9b182cbd0b17490e9d81b900d6ba8cefadb1b64 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ # This Makefile is a wrapper for several other scripts. -.PHONY: help doc install +.PHONY: help doc install unittest help: @echo 'Type `make doc` for documentation, or `make install` for (local) installation.' @@ -30,4 +30,16 @@ doc: $(MAKE) -C src/doc html install: - @echo "Not implemented yet, use pip for installation." + pip3 install . + +unittest: + pytest-3 unittests + +style: + pycodestyle --count src unittests --exclude=swagger_client + autopep8 -ar --diff --exit-code --exclude swagger_client . +.PHONY: style + +lint: + pylint --unsafe-load-any-extension=y -d all -e E,F --ignore=swagger_client src/caosadvancedtools +.PHONY: lint diff --git a/README.md b/README.md index 5208a711f72a3daa919e9195a5a0b05413e3de3a..83a767476286acba98d113b8fa7ab6b482751230 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,57 @@ -[](https://gitlab.com/caosdb/caosdb-advanced-user-tools/commits/master) +# README -Project migrated to https://gitlab.com/caosdb - -# Welcome +## Welcome This is the **CaosDB Advanced User Tools** repository and a part of the -CaosDB project. This project contains tools that are beyond the typical use of +CaosDB project. +This project contains tools that are beyond the typical use of the CaosDB python client. Especially, this includes the crawler which will typically be used by a data curator. -# Setup +## Setup Please read the [README_SETUP.md](README_SETUP.md) for instructions on how to setup this code. +## Further Reading + +Please refer to the [official +documentation](https://docs.indiscale.com/caosdb-advanced-user-tools/) for more +information. + +## Contributing + +Thank you very much to all contributers—[past, +present](https://gitlab.com/caosdb/caosdb/-/blob/dev/HUMANS.md), and prospective +ones. -# Further Reading +### Code of Conduct -Please refer to the [official gitlab repository of the CaosDB -project](https://gitlab.com/caosdb/caosdb) for more information. +By participating, you are expected to uphold our [Code of +Conduct](https://gitlab.com/caosdb/caosdb/-/blob/dev/CODE_OF_CONDUCT.md). -# License +### How to Contribute -Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute for -Dynamics and Self-Organization Göttingen. +- You found a bug, have a question, or want to request a feature? Please [create + an issue](https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues). +- You want to contribute code? Please fork the repository and create a merge + request in GitLab and choose this repository as target. Make sure to select + "Allow commits from members who can merge the target branch" under + Contribution when creating the merge request. This allows our team to work + with you on your request. +- If you have a suggestion for the + [documentation](https://docs.indiscale.com/caosdb-advanced-user-tools/), the + preferred way is also a merge request as describe above (the documentation + resides in `src/doc`). However, you can also create an issue for it. +- You can also contact us at **info (AT) caosdb.de** and join the CaosDB + community on + [#caosdb:matrix.org](https://matrix.to/#/!unwwlTfOznjEnMMXxf:matrix.org). + +## License + +* Copyright (C) 2018 Research Group Biomedical Physics, Max Planck Institute + for Dynamics and Self-Organization Göttingen. +* Copyright (C) 2020-2021 Indiscale GmbH <info@indiscale.com> All files in this repository are licensed under a [GNU Affero General Public License](LICENCE.md) (version 3 or later). - diff --git a/README_SETUP.md b/README_SETUP.md index 243fba2dd1259aaefbe6c7163a242b700eb5a66e..43047d554afbe8ffba11aef67b20dde44d29bdcf 100644 --- a/README_SETUP.md +++ b/README_SETUP.md @@ -8,36 +8,48 @@ git clone 'https://gitlab.com/caosdb/caosdb-advanced-user-tools' ``` ## Dependencies -Dependencies will be installed automatically if you use the below described procedure. -- `caosdb>=0.4.0` +Dependencies will be installed automatically if you use the below described +procedure. +- `caosdb>=0.6.0` - `openpyxl>=3.0.0` - `xlrd>=1.2.0` +- `pandas>=1.2.0` +- `numpy>=1.17.3` + +If you want to use the optional h5-crawler the following dependencies will be +installed additionally: +- `h5py>=3.3.0` For testing: -- `tox` +- `tox` ## Installation - `pip install . --user` - `pip install tox --user` +Optional h5-crawler: +- `pip install .[h5-crawler] --user` + ## Run Unit Tests `tox` ## Run Integration Tests Locally 1. Change directory to `integrationtests/`. -2. Mount `extroot` to the folder that will be used as - extroot. E.g. `sudo mount -o bind extroot - ../../caosdb-deploy/profiles/empty/paths/extroot` (or whatever path - the extroot of the empty profile to be used is located at). -3. Start an empty (!) CaosDB instance (with the mounted extroot). The - database will be cleared during testing, so it's important to use - an empty insctance. -4. Run `test.sh`. +2. Mount `extroot` to the folder that will be used as extroot. E.g. `sudo mount + -o bind extroot ../../caosdb-deploy/profiles/debug/paths/extroot` (or + whatever path the extroot of the empty profile to be used is located at). +3. Start (or restart) an empty (!) CaosDB instance (with the mounted + extroot). The database will be cleared during testing, so it's important to + use an empty instance. Make sure your configuration for the python caosdb + module is correct and allows to connect to the server. +4. Run `test.sh`. Note that this may modify content of the + `integrationtest/extroot/` directory. ## Code Formatting -`autopep8 -i -r ./` + +`make style` ## Documentation # diff --git a/RELEASE_GUIDELINES.md b/RELEASE_GUIDELINES.md new file mode 100644 index 0000000000000000000000000000000000000000..7592b02d8084d3a5e6419ae66b61331026f2766c --- /dev/null +++ b/RELEASE_GUIDELINES.md @@ -0,0 +1,43 @@ +# Release Guidelines for the CaosDB Python Client Library + +This document specifies release guidelines in addition to the general release +guidelines of the CaosDB Project +([RELEASE_GUIDELINES.md](https://gitlab.com/caosdb/caosdb/blob/dev/RELEASE_GUIDELINES.md)) + +## General Prerequisites + +* All tests are passing. +* FEATURES.md is up-to-date and a public API is being declared in that document. +* CHANGELOG.md is up-to-date. +* dependencies in `setup.py` are up-to-date. + +## Steps + +1. Create a release branch from the dev branch. This prevents further changes + to the code base and a never ending release process. Naming: `release-<VERSION>` + +2. Update CHANGELOG.md + +3. Check all general prerequisites. + +4. Update the version: + - `version` variables in `src/doc/conf.py` + - Version on [setup.py](./setup.py): Check the `MAJOR`, `MINOR`, `MICRO`, `PRE` variables and set + `ISRELEASED` to `True`. Use the possibility to issue pre-release versions for testing. + +5. Merge the release branch into the main branch. + +6. Tag the latest commit of the main branch with `v<VERSION>`. + +7. Delete the release branch. + +8. Remove possibly existing `./dist` directory with old release. + +9. Publish the release by executing `./release.sh` with uploads the caosdb + module to the Python Package Index [pypi.org](https://pypi.org). + +10. Merge the main branch back into the dev branch. + +11. After the merge of main to dev, start a new development version by + setting `ISRELEASED` to `False` and by increasing at least the `MICRO` + version in [setup.py](./setup.py) and preparing CHANGELOG.md. diff --git a/integrationtests/crawl.py b/integrationtests/crawl.py index bf72b5f74b463f9ece2bd047548dcb22e8d71dac..defed2cb4f5fb0a0f349898e555c5d25924e2f9b 100755 --- a/integrationtests/crawl.py +++ b/integrationtests/crawl.py @@ -34,7 +34,9 @@ from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.guard import INSERT, UPDATE from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, SimulationCFood, - SoftwareCFood) + SoftwareCFood, ResultTableCFood) + +from example_hdf5cfood import ExampleH5CFood try: from sss_helper import get_argument_parser, print_success @@ -43,7 +45,7 @@ except ModuleNotFoundError: return argparse.ArgumentParser() def print_success(text): - print("Success: "+text) + print("Success: " + text) def get_parser(): @@ -89,6 +91,8 @@ if __name__ == "__main__": interactive=False, hideKnown=False, cfood_types=[ExperimentCFood, AnalysisCFood, SoftwareCFood, PublicationCFood, SimulationCFood, + ResultTableCFood, + ExampleH5CFood ]) if args.authorize_run: diff --git a/integrationtests/create_analysis.py b/integrationtests/create_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7aa0d2d6671f14a3c65cf5ed135dfecb0aa69c --- /dev/null +++ b/integrationtests/create_analysis.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +A small script that creates an Analysis Record that can be used for testing the +automated analysis pipeline. +""" + +import sys +from datetime import datetime + +import caosdb as db + + +def main(): + script = db.File( + file="../src/caosadvancedtools/serverside/examples/example_script.py", + path=("AutomatedAnalysis/scripts/" + + str(datetime.now())+"example_script.py"), + ) + script.insert() + + da = db.Record() + da.add_parent("Analysis") + da.add_property("scripts", value=[script], datatype=db.LIST(db.FILE)) + da.add_property("sources", + value=db.execute_query( + "FIND FILE which is stored at '**/timeseries.npy'", + unique=True), + ) + da.add_property("date", "2020-01-01") + da.add_property("identifier", "TEST") + only = db.execute_query( + "FIND RECORD Person WITH firstname=Only", + unique=True) + only.add_property(db.Property("Email").retrieve().id, "only@example.com") + only.update() + da.add_property("responsible", only) + da.insert() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/integrationtests/example_hdf5cfood.py b/integrationtests/example_hdf5cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..5485402d2042b2055a087b99abcba409095a7c70 --- /dev/null +++ b/integrationtests/example_hdf5cfood.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +An exemplary definition of a HDF5 CFood for integration testing +""" + +import caosdb as db +from caosadvancedtools.cfoods.h5 import H5CFood +from caosadvancedtools.scifolder import ExperimentCFood +from caosadvancedtools.scifolder.generic_pattern import readme_pattern + + +class ExampleH5CFood(H5CFood): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.root_name = "ExampleH5" + + @staticmethod + def get_re(): + return ExperimentCFood.get_re()[:-len(readme_pattern)] + r".*\.hdf5" + + def create_identifiables(self): + super().create_identifiables() + self.identifiable_root = db.Record() + self.identifiable_root.add_property("hdf5File", self.crawled_file) + self.identifiable_root.add_parent("ExampleH5") + self.identifiables.append(self.identifiable_root) + + def special_treatment(self, key, value, dtype): + if key == "attr_data_root": + return "single_attribute", value, dtype + + return key, value, dtype diff --git a/integrationtests/example_script.py b/integrationtests/example_script.py new file mode 120000 index 0000000000000000000000000000000000000000..f6e9b498ff97638cb4105e019424c0c677a7f414 --- /dev/null +++ b/integrationtests/example_script.py @@ -0,0 +1 @@ +../src/caosadvancedtools/serverside/examples/example_script.py \ No newline at end of file diff --git a/integrationtests/extroot/.cerate_dir b/integrationtests/extroot/.create_dir similarity index 100% rename from integrationtests/extroot/.cerate_dir rename to integrationtests/extroot/.create_dir diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py +++ b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf and b/integrationtests/extroot/DataAnalysis/2010_TestProject/2019-02-03/results.pdf differ diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e29553fe01c8706e15a042e5ac6f85ed1a2cc8ce 100644 --- a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/datafile.dat @@ -0,0 +1 @@ +datadatadata diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf Binary files /dev/null and b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5 differ diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-03/result_table_DepthTest.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7de3bd15d29b93085322250a06adb9b8f389f8e4 --- /dev/null +++ b/integrationtests/extroot/ExperimentalData/2010_TestProject/2019-02-04/README.md @@ -0,0 +1,5 @@ +--- +responsible: +- Tom Wood +description: Something. +... diff --git a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..09157f2c0961d412efea36ea0e56db5aac03fd36 100644 Binary files a/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf and b/integrationtests/extroot/Publications/Posters/2019-02-03_really_cool_finding/poster.pdf differ diff --git a/integrationtests/extroot/README.md b/integrationtests/extroot/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4d45129ba23fffc825b2631e0eaa39f3d048427d --- /dev/null +++ b/integrationtests/extroot/README.md @@ -0,0 +1,3 @@ +This directory is mounted into the LinkAhead docker container, to allow the +inclusion of external file systems. For production use, please set the +`paths:extroot` option in the profile. diff --git a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..18da9b18cda23d411d0f2666629377dd7991ac8f 100644 Binary files a/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy and b/integrationtests/extroot/SimulationData/2010_TestProject/2019-02-03_something/timeseries.npy differ diff --git a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md index d844a2ddf0d87d303c69b9107a366f2e34b6d03c..2057703d18dad94127037e05b3180603e9e37380 100644 --- a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md +++ b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/README.md @@ -1,6 +1,6 @@ --- responsible: Responsible, Only -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py +++ b/integrationtests/extroot/Software/2010_TestSoftware/2019-02-03_v0.1/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md index a47ea6e105c20d050ddf2fdc8cd29d4685ba30bf..bd57ffe2c43fe6406672db2dd18902b8269569d4 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/README.md @@ -1,7 +1,7 @@ --- responsible: - Only Responsible MPI DS -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-03/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md index 97b7137af372c127ee01458c9844b5ff10fd464b..b55907aaa2bb3794dbe04484c025146c3c7cd101 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/README.md @@ -2,7 +2,7 @@ responsible: - Some Responsible - Responsible, No, MPI DS -description: A description of this example analysis. +description: A description of another example analysis. sources: - file: "/ExperimentalData/2010_TestProject/2019-02-03/*.dat" diff --git a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2c99b82a33e496eb31cf7fdc354767fe31919033 100644 --- a/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py +++ b/integrationtests/extroot/Software/2020NewProject0X/2020-02-04/plot.py @@ -0,0 +1 @@ +import plot diff --git a/integrationtests/filldb.sh b/integrationtests/filldb.sh index 98d22347bd2d40e8384a2a217452fd3ba5bc445f..9f55365eb595537b43caa9b197c8bc31ea1e69cb 100755 --- a/integrationtests/filldb.sh +++ b/integrationtests/filldb.sh @@ -7,4 +7,5 @@ python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Publications python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/Software python3 insert_model.py +python3 insert_some.py python3 crawl.py / diff --git a/integrationtests/insert_model.py b/integrationtests/insert_model.py index 270a08a36d7512a8642c2ca08a9ec6ea93b81bd9..26bf478cdf0d3709e7c0c086fecf722b8c7f90fa 100755 --- a/integrationtests/insert_model.py +++ b/integrationtests/insert_model.py @@ -1,11 +1,33 @@ #!/usr/bin/env python3 import caosdb as db +import h5py +from caosadvancedtools.cfoods.h5 import H5CFood +from caosadvancedtools.models.data_model import DataModel from caosadvancedtools.models.parser import parse_model_from_yaml -model = parse_model_from_yaml("model.yml") -model.sync_data_model(noquestion=True) -if len(db.execute_query("FIND Property alias")) == 0: - al = db.Property(name="alias") - al.add_parent(name="name") - al.insert() +def main(): + + # for testing existing data model parts with the YAML Interface + db.RecordType(name="TestRT1", datatype=db.TEXT).insert() + db.Property(name="TestP1", datatype=db.TEXT).insert() + + model = parse_model_from_yaml("model.yml") + model.sync_data_model(noquestion=True) + + if len(db.execute_query("FIND Property alias")) == 0: + al = db.Property(name="alias") + al.add_parent(name="name") + al.insert() + + h5model = db.Container() + h5file = h5py.File( + 'extroot/ExperimentalData/2010_TestProject/2019-02-03/hdf5_dummy_file.hdf5', 'r') + H5CFood.create_structure(h5file, create_recordTypes=True, collection=h5model, + root_name="ExampleH5") + h5model = DataModel(h5model) + h5model.sync_data_model(noquestion=True) + + +if __name__ == "__main__": + main() diff --git a/integrationtests/insert_some.py b/integrationtests/insert_some.py new file mode 100644 index 0000000000000000000000000000000000000000..cf16a45ddf1f95ed261af1d9f18edfa1cbf4b450 --- /dev/null +++ b/integrationtests/insert_some.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import caosdb as db +from caosadvancedtools.scifolder.experiment_cfood import dm + +# This inserts two identifiables. When no dependencies are possible among +# identifiables, it should not be possible to find both: the experiment +# identifiable would for example not reference the correct project Record +project = db.Record(name='2010_TestProject') +project.add_parent(name=dm.Project) +project.insert() + +pers = db.Record() +pers.add_parent("Person") +pers.add_property("lastname", "Wood") +pers.add_property("firstname", "Tom") +pers.insert() + +experiment = db.Record() +experiment.add_parent(name=dm.Experiment) +experiment.description = "Something." +experiment.add_property( + name=dm.date, value='2019-02-04') +experiment.add_property(name=dm.Project, value=project) +experiment.add_property( + name="identifier", value="empty_identifier") +experiment.add_property( + name="responsible", value=pers) +experiment.insert(flags={"force-missing-obligatory": "ignore"}) diff --git a/integrationtests/model.yml b/integrationtests/model.yml index 0a4ad381bfc119dd65d2c192f8de823deda525ae..9f7a62d1d0befbc7225353380c79db2f368c969c 100644 --- a/integrationtests/model.yml +++ b/integrationtests/model.yml @@ -9,6 +9,7 @@ Experiment: # TODO empty recommended_properties is a problem #recommended_properties: responsible: + datatype: LIST<Person> Project: SoftwareVersion: recommended_properties: @@ -18,6 +19,14 @@ SoftwareVersion: binaries: sourceCode: Software: +DepthTest: + obligatory_properties: + temperature: + datatype: DOUBLE + description: 'temp' + depth: + datatype: DOUBLE + description: 'temp' Person: obligatory_properties: firstName: @@ -30,16 +39,16 @@ Person: email: datatype: TEXT description: 'Email of a Person.' -responsible: - datatype: REFERENCE revisionOf: datatype: REFERENCE results: - datatype: REFERENCE + datatype: LIST<REFERENCE> sources: - datatype: REFERENCE + datatype: LIST<REFERENCE> scripts: - datatype: REFERENCE + datatype: LIST<REFERENCE> +single_attribute: + datatype: LIST<INTEGER> Simulation: obligatory_properties: date: @@ -50,6 +59,9 @@ Analysis: date: identifier: responsible: + suggested_properties: + mean_value: + datatype: DOUBLE Publication: Thesis: inherit_from_suggested: @@ -66,3 +78,8 @@ Presentation: Report: inherit_from_suggested: - Publication +hdf5File: + datatype: REFERENCE +extern: + - TestRT1 + - TestP1 diff --git a/integrationtests/test.sh b/integrationtests/test.sh index a56b758421a059a0cc3461c08600c13ffd93705c..5bb013db6e70a3a8393e7e3b7c7993a6da6bf9b9 100755 --- a/integrationtests/test.sh +++ b/integrationtests/test.sh @@ -1,8 +1,23 @@ #!/bin/bash +if [ "$1" != "--force" ] +then + echo "Warning: For these tests, the whole database will be deleted. Do you want to proceed? (yes/Exit)" + read safety + if [ -z $safety ] + then + echo "Exiting..." + exit 0 + elif [ $safety != "yes" ] + then + echo "Exiting..." + exit 0 + fi +fi OUT=/tmp/crawler.output ls cat pycaosdb.ini rm -rf cache.db +set -e echo "Clearing database" python3 clear_database.py echo "Testing crawler without cfoods" @@ -19,17 +34,19 @@ echo "Filling the database" echo "Testing the crawler database" python3 -m pytest test_crawler_with_cfoods.py echo "make a change" -pushd extroot -egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' +cd extroot +egrep -liRZ 'A description of another example' . \ + | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' # remove a file to check that this does not lead to a crawler crash -mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back -popd +mv DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx \ + DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back +cd .. echo "run crawler" ./crawl.py / | tee $OUT # rename the moved file -mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx +mv extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx_back \ + extroot/DataAnalysis/2010_TestProject/2019-02-03_something/README.xlsx # check whether there was something UNAUTHORIZED -set -e grep "There where unauthorized changes" $OUT # get the id of the run which is the last field of the output string RUN_ID=$(grep "run id:" $OUT | awk '{ print $NF }') @@ -43,14 +60,22 @@ then exit 1 fi set -e -echo "undo changes" -pushd extroot -egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' -popd +echo "Undoing previous changes to extroot content..." +cd extroot +egrep -liRZ 'A description of this example' . \ + | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' +cd .. +echo "Done." python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" python3 test_im_und_export.py + +# automated analysis +# for some reason the loadFiles of sim data has to be called again +python3 -m caosadvancedtools.loadFiles /opt/caosdb/mnt/extroot/SimulationData +python3 create_analysis.py + # Better safe than sorry: python3 clear_database.py @@ -62,5 +87,11 @@ python3 -m pytest test_crawl_with_datamodel_problems.py echo "Testing table export" python3 -m pytest test_base_table_exporter_integration.py +echo "Testing json-schema datamodel parser" +python3 -m pytest test_json_schema_datamodel_parser.py + +echo "Testing yaml datamodel parser" +python3 -m pytest test_yaml_parser.py + # Obsolete due to teardown in the above test. # echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/integrationtests/test_assure_functions.py b/integrationtests/test_assure_functions.py index 56f9767a0f436201ab6003ffd88f631bdb089544..b1c731dbbf25f33b54fc3a005402f292525d2d05 100644 --- a/integrationtests/test_assure_functions.py +++ b/integrationtests/test_assure_functions.py @@ -1,26 +1,25 @@ #!/usr/bin/env python # encoding: utf-8 # -# ** header v3.0 # This file is a part of the CaosDB Project. # +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> # Copyright (C) 2021 University Medical Center Göttingen, Institute for Medical Informatics # Copyright (C) 2021 Florian Spreckelsen <florian.spreckelsen@med.uni-goettingen.de> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> # -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. """Integration tests for the `assure_...` functions from `caosadvancedtools.cfood`. They mainly test the in-place updates when no `to_be_updated` is specified. @@ -32,26 +31,25 @@ from caosadvancedtools.cfood import (assure_object_is_in_list) from caosadvancedtools.guard import (global_guard, RETRIEVE, UPDATE) -def setup_module(): +def setup(): """Delete all test entities.""" db.execute_query("FIND Test*").delete(raise_exception_on_error=False) -def setup(): +def setup_module(): """Allow all updates and delete test data""" global_guard.level = UPDATE - setup_module() + setup() -def teardown(): +def teardown_module(): """Reset guard level and delete test data.""" global_guard.level = RETRIEVE - setup_module() + setup() def test_assure_list_in_place(): """Test an in-place update with `assure_object_is_in_list`.""" - int_list_prop = db.Property(name="TestIntListProperty", datatype=db.LIST(db.INTEGER)).insert() rt1 = db.RecordType(name="TestType1").add_property( @@ -91,3 +89,29 @@ def test_assure_list_in_place(): assert len(rec2.get_property(ref_rt.name).value) == 3 assert ref_rec2.id in rec2.get_property(ref_rt.name).value assert ref_rec3.id in rec2.get_property(ref_rt.name).value + + +def test_add_to_empty_list(): + """See https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/40.""" + # @author Florian Spreckelsen + # @date 2022-04-19 + referenced_rt = db.RecordType(name="TestReferencedType").insert() + list_prop = db.Property(name="TestListProp", + datatype=db.LIST(referenced_rt)).insert() + referencing_rt = db.RecordType( + name="TestReferencingType").add_property(list_prop).insert() + + db.Record(name="TestReferencedRecord").add_parent(referenced_rt).insert() + db.Record(name="TestReferencingRecord").add_parent( + referencing_rt).add_property(list_prop, value=[]).insert() + + referenced_rec = db.execute_query("FIND TestReferencedRecord", unique=True) + referencing_rec = db.execute_query( + "FIND TestReferencingRecord", unique=True) + + assure_object_is_in_list(referenced_rec, referencing_rec, list_prop.name) + + referencing_rec = db.execute_query( + "FIND TestReferencingRecord", unique=True) + assert referencing_rec.get_property(list_prop.name).value == [ + referenced_rec.id] diff --git a/integrationtests/test_base_table_exporter_integration.py b/integrationtests/test_base_table_exporter_integration.py index 1c9158bd1d9600884571957d4916939f82c1a9ca..9d79e857fe706d78103ade3b92ee38498a2a1607 100644 --- a/integrationtests/test_base_table_exporter_integration.py +++ b/integrationtests/test_base_table_exporter_integration.py @@ -23,6 +23,7 @@ # ** end header # import caosdb as db +import pytest from caosadvancedtools import table_export as te @@ -85,8 +86,11 @@ def setup_module(): pass +@pytest.fixture(autouse=True) def setup(): - """No further setup""" + """Same as module setup.""" + setup_module() + yield None setup_module() diff --git a/integrationtests/test_crawl_with_datamodel_problems.py b/integrationtests/test_crawl_with_datamodel_problems.py index 6c212e36084430e5f7c3362a04e78565561019b2..0c6a145afdab682f82af09a17fb9aa0770769959 100644 --- a/integrationtests/test_crawl_with_datamodel_problems.py +++ b/integrationtests/test_crawl_with_datamodel_problems.py @@ -20,10 +20,11 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # # ** end header -"""Test whether the crawler correctly identifies the data model -problems caused by a faulty model. + +"""Test whether the crawler correctly identifies the data model problems caused by a faulty model. """ + import caosdb as db from caosadvancedtools import loadFiles from caosadvancedtools.cfood import fileguide @@ -34,6 +35,8 @@ from caosadvancedtools.models.parser import parse_model_from_yaml from caosadvancedtools.scifolder import (AnalysisCFood, ExperimentCFood, PublicationCFood, SimulationCFood) +from insert_model import main as insert_model + def setup_module(): """Clear problems and remove all entities except for built-in ones.""" @@ -67,8 +70,7 @@ def test_crawler_with_data_model_problems(): prefix="", dryrun=False, forceAllowSymlinks=False) # load and damage the model - model = parse_model_from_yaml("model.yml") - model.sync_data_model(noquestion=True) + insert_model() deleted_entities = {"Experiment", "Poster", "results"} for ent in deleted_entities: @@ -89,5 +91,6 @@ def test_crawler_with_data_model_problems(): # There should be datamodel problems assert len(DataModelProblems.missing) > 0 + # Deleted entities should have been identified: - assert deleted_entities.issubset(DataModelProblems.missing) + assert DataModelProblems.missing.issubset(deleted_entities) diff --git a/integrationtests/test_crawler_basics.py b/integrationtests/test_crawler_basics.py index 85fca282c8546ad1e7f6a708a2eaf46e374a528f..7da90844f14cf0d1eaded9d4fc8f37320da46aad 100644 --- a/integrationtests/test_crawler_basics.py +++ b/integrationtests/test_crawler_basics.py @@ -65,6 +65,7 @@ class CrawlerTest(unittest.TestCase): self.rec2.add_parent(name="Test_Type_2") self.rec3 = db.Record() self.rec3.add_parent(name="Test_Type_3") + self.rec3.add_property(name="Test_Prop", value="Test") def test_check_existence(self): # This hasn't been inserted yet: @@ -92,6 +93,7 @@ class CrawlerTest(unittest.TestCase): old_id = id(identifiables[0]) reference_to_first = identifiables[0] assert reference_to_first is identifiables[0] + Crawler.find_or_insert_identifiables(identifiables) for el in identifiables: @@ -107,6 +109,7 @@ class CrawlerTest(unittest.TestCase): def tearDown(self): setup_module() + # Delete nameless entities for el in [self.rec1, self.rec2, self.rec3]: try: diff --git a/integrationtests/test_crawler_with_cfoods.py b/integrationtests/test_crawler_with_cfoods.py index c39c3fc67d7ca30e3d013ac205ef398de216ad9c..4efef87cef52e4a2a20a615afe210c32f52a276a 100755 --- a/integrationtests/test_crawler_with_cfoods.py +++ b/integrationtests/test_crawler_with_cfoods.py @@ -26,6 +26,7 @@ import os import unittest import caosdb as db +from caosdb.apiutils import retrieve_entity_with_id def get_entity_with_id(eid): @@ -34,6 +35,14 @@ def get_entity_with_id(eid): class CrawlerTest(unittest.TestCase): def test_experiment(self): + + ######################## + # # dummy for dependency test experiment # # + ######################## + exp = db.execute_query( + "FIND Experiment with date=2019-02-04 and identifier=empty_identifier", + unique=True) + ######################## # # first experiment # # ######################## @@ -57,6 +66,17 @@ class CrawlerTest(unittest.TestCase): datfile.description) assert os.path.basename(datfile.path) == "datafile.dat" + # There should be two DepthTest Properties + depthtests = exp.get_property("DepthTest") + assert depthtests is not None + assert len(depthtests.value) == 2 + depthtest = db.Record(id=depthtests.value[0]) + depthtest.retrieve() + assert "DepthTest" in [p.name for p in depthtest.get_parents()] + assert 234.4 == depthtest.get_property("temperature").value + assert "°C" == depthtest.get_property("temperature").unit + assert 3.0 == depthtest.get_property("depth").value + # Should have a responsible person self.assertIsNotNone(exp.get_property("responsible")) person = db.Record(id=exp.get_property("responsible").value[0]) @@ -478,3 +498,17 @@ class CrawlerTest(unittest.TestCase): # Should have a description self.assertIsNotNone(ana.description) + + def test_exampleh5(self): + examp = db.execute_query("FIND Record ExampleH5", unique=True) + + for prop in examp.properties: + if prop.name == 'group_level1_a': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level2_aa") is not None) + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("group_level1_a") is None) + elif prop.name == 'group_level1_b': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_b_floats") is not None) + elif prop.name == 'group_level1_c': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("level1_c_floats") is not None) + elif prop.name == 'root_integers': + self.assertTrue(retrieve_entity_with_id(prop.value).get_property("single_attribute") is not None) diff --git a/integrationtests/test_data_model.py b/integrationtests/test_data_model.py index 6f530719a810d76e5cc5a2c59fcd2d0325ff5268..2949fa81727a6c61a8646a48c249204fa87542d8 100644 --- a/integrationtests/test_data_model.py +++ b/integrationtests/test_data_model.py @@ -33,13 +33,6 @@ class DataModelTest(unittest.TestCase): rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) assert rt.get_property("test") is not None - def tearDown(self): - try: - tests = db.execute_query("FIND test*") - tests.delete() - except Exception: - pass - def test_missing(self): # Test sync with missing prop # insert propt @@ -52,3 +45,19 @@ class DataModelTest(unittest.TestCase): dm.sync_data_model(noquestion=True) rt = db.execute_query("FIND RECORDTYPE TestRecord", unique=True) assert rt.get_property("testproperty") is not None + + def test_get_existing_entities(self): + db.RecordType(name="TestRecord").insert() + c = db.Container().extend([ + db.Property(name="test"), + db.RecordType(name="TestRecord")]) + exist = DataModel.get_existing_entities(c) + assert len(exist) == 1 + assert exist[0].name == "TestRecord" + + def tearDown(self): + try: + tests = db.execute_query("FIND test*") + tests.delete() + except Exception: + pass diff --git a/integrationtests/test_datamodel.schema.json b/integrationtests/test_datamodel.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..356964702dd83a8c81edf1e8d72bf4a30468e6f2 --- /dev/null +++ b/integrationtests/test_datamodel.schema.json @@ -0,0 +1,85 @@ +[ + { + "title": "TestTypeWithAtomicProps", + "description": "RecordType with scalar atomic properties", + "type": "object", + "properties": { + "simple_text_prop": { "type": "string" }, + "int_prop_with_name": { "type": "integer", "title": "IntegerProperty" }, + "double_prop": { + "type": "number", + "description": "Some generic double-valued property" + }, + "bool_prop": { "type": "boolean" }, + "datetime_prop": { "type": "string", "format": "date-time" }, + "date_prop": { "type": "string", "format": "date" } + }, + "required": [ "simple_text_prop", "double_prop" ] + }, + { + "title": "TestTypeWithReferencesAndEnum", + "type": "object", + "properties": { + "TestTypeWithAtomicProps": {}, + "OtherReference": { + "type": "object", + "description": "Some generic refernced RecordType", + "properties": {} + }, + "named_refernce": { + "type": "object", + "title": "NamedReference", + "properties": { + "simple_text_prop": {} + } + }, + "string_enum": { + "type": "string", + "enum": [ "StringEnumA", "StringEnumB", "StringEnumC" ] + }, + "named_enum": { + "type": "string", + "title": "NamedEnum", + "enum": [ "NameA", "NameB", "NameC" ] + } + } + }, + { + "title": "TestTypeWithLists", + "type": "object", + "properties": { + "string_list": { + "type": "array", + "description": "A list of words", + "items": { "type": "string" } + }, + "named_int_list": { + "type": "array", + "title": "NamedIntList", + "items": { "type": "integer" } + }, + "ListRecordType": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "NamedReferenceList": { + "type": "array", + "items": { + "title": "ReferencedListTypeWithName", + "type": "object", + "description": "Referenced by a named list-of-references property", + "properties": { + "double_prop": {} + } + } + }, + "ListNumberEnum": { + "type": "array", + "items": { + "type": "number", + "enum": [ 1.1, 2.2, 3.3 ] + } + } + } + } +] diff --git a/integrationtests/test_datamodel_problems.py b/integrationtests/test_datamodel_problems.py index 7d56f4da8eea34604ed1c820e14555f087c353bd..3bca302dd2a337cee7fd023ee6a64c5185bc99f5 100644 --- a/integrationtests/test_datamodel_problems.py +++ b/integrationtests/test_datamodel_problems.py @@ -44,12 +44,15 @@ def setup_module(): print(delete_exc) +@pytest.fixture(autouse=True) def setup(): - """No further setup""" + """Same as module setup.""" + setup_module() + yield None setup_module() -def teardown(): +def teardown_module(): """Clear and delete again.""" setup_module() diff --git a/integrationtests/test_im_und_export.py b/integrationtests/test_im_und_export.py index 5c7584e6f98ee792789f144d89f13ef84a7467fc..8ea45fd2cebbcb2c3be6c8cb79805204486f7862 100644 --- a/integrationtests/test_im_und_export.py +++ b/integrationtests/test_im_und_export.py @@ -3,25 +3,21 @@ import os from tempfile import TemporaryDirectory import caosdb as db - -from caosadvancedtools.export_related import export +from caosadvancedtools.export_related import export_related_to from caosadvancedtools.import_from_xml import import_xml if __name__ == "__main__": print("Conducting im- and export tests") rec = db.execute_query("FIND 2019-02-03_really_cool_finding", unique=True) directory = TemporaryDirectory() - export(rec.id, directory=directory.name) + export_related_to(rec.id, directory=directory.name) # delete everything - rec = db.execute_query("FIND record which was inserted by me") - prop = db.execute_query("FIND property which was inserted by me") - rt = db.execute_query("FIND recordtype which was inserted by me") - fi = db.execute_query("FIND file which was inserted by me") - c = db.Container() - c.extend(rec+prop+rt+fi) - c.delete() + print("Clearing database") + recs = db.execute_query("FIND entity with id>99") + recs.delete() assert 0 == len(db.execute_query("FIND File which is stored at " "**/poster.pdf")) + print("Importing stored elements") import_xml(os.path.join(directory.name, "caosdb_data.xml"), interactive=False) # The following tests the existence of some required entities. @@ -32,3 +28,4 @@ if __name__ == "__main__": db.execute_query("FIND RecordType Person", unique=True) db.execute_query("FIND Record Person with firstname=Only", unique=True) db.execute_query("FIND File which is stored at **/poster.pdf", unique=True) + print("Found all required Records and Files.") diff --git a/integrationtests/test_json_schema_datamodel_parser.py b/integrationtests/test_json_schema_datamodel_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..21ae8d2d7bad5527a7a314220b38af8ff816475f --- /dev/null +++ b/integrationtests/test_json_schema_datamodel_parser.py @@ -0,0 +1,174 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import os + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_json_schema + + +def _clear_db(): + ents = db.execute_query("FIND ENTITY WITH ID>99") + if ents: + ents.delete() + + +def setup_module(): + _clear_db() + + +def teardown_module(): + _clear_db() + + +def _load_and_sync(fname): + """Load datamodel from json schema in fname and synchronize it without asking. + + """ + # @author Florian Spreckelsen + # @date 2022-03-23 + fpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), fname) + model = parse_model_from_json_schema(fpath) + model.sync_data_model(noquestion=True) + + +def test_json_parsed_datamodel(): + # @author Florian Spreckelsen + # @date 2022-03-23 + + _load_and_sync("test_datamodel.schema.json") + + # RecordType with atomic properties + rt1 = db.execute_query( + "FIND RECORDTYPE TestTypeWithAtomicProps", unique=True) + assert rt1.description == "RecordType with scalar atomic properties" + assert rt1.get_property("simple_text_prop") is not None + assert rt1.get_property("simple_text_prop").datatype == db.TEXT + assert rt1.get_importance("simple_text_prop") == db.OBLIGATORY + + assert rt1.get_property("IntegerProperty") is not None + assert rt1.get_property("IntegerProperty").datatype == db.INTEGER + assert rt1.get_importance("IntegerProperty") == db.RECOMMENDED + + assert rt1.get_property("double_prop") is not None + assert rt1.get_property("double_prop").datatype == db.DOUBLE + assert rt1.get_importance("double_prop") == db.OBLIGATORY + assert (db.Property(name="double_prop").retrieve().description == + "Some generic double-valued property") + + further_props = [ + ("bool_prop", db.BOOLEAN), + ("datetime_prop", db.DATETIME), + ("date_prop", db.DATETIME) + ] + for name, dtype in further_props: + assert rt1.get_property(name) is not None + assert rt1.get_property(name).datatype == dtype + assert rt1.get_importance(name) == db.RECOMMENDED + + # RecordType with references and enums + rt2 = db.execute_query( + "FIND RECORDTYPE TestTypeWithReferencesAndEnum", unique=True) + assert rt2.get_property(rt1.name) is not None + assert rt2.get_property(rt1.name).is_reference() + assert rt2.get_property(rt1.name).name == rt1.name + assert rt2.get_property(rt1.name).id == rt1.id + + other_ref_type = db.execute_query( + "FIND RECORDTYPE OtherReference", unique=True) + assert rt2.get_property(other_ref_type.name) is not None + assert rt2.get_property(other_ref_type.name).is_reference() + assert rt2.get_property(other_ref_type.name).name == other_ref_type.name + assert rt2.get_property(other_ref_type.name).id == other_ref_type.id + assert other_ref_type.description == "Some generic refernced RecordType" + assert len(other_ref_type.properties) == 0 + + named_ref_type = db.execute_query( + "FIND RECORDTYPE NamedReference", unique=True) + assert rt2.get_property(named_ref_type.name) is not None + assert rt2.get_property(named_ref_type.name).is_reference() + assert rt2.get_property(named_ref_type.name).name == named_ref_type.name + assert rt2.get_property(named_ref_type.name).id == named_ref_type.id + assert named_ref_type.get_property("simple_text_prop") is not None + assert (named_ref_type.get_property("simple_text_prop").id == + rt1.get_property("simple_text_prop").id) + assert (named_ref_type.get_property("simple_text_prop").datatype == + rt1.get_property("simple_text_prop").datatype) + + enums = { + "string_enum": ["StringEnumA", "StringEnumB", "StringEnumC"], + "NamedEnum": ["NameA", "NameB", "NameC"] + } + for enum_type_name, enum_names in enums.items(): + enum_type = db.execute_query( + f"FIND RECORDTYPE {enum_type_name}", unique=True) + assert len(enum_type.properties) == 0 + enum_records = db.execute_query(f"FIND RECORD {enum_type_name}") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt2.get_property(enum_type_name) is not None + assert rt2.get_property(enum_type_name).is_reference() + assert rt2.get_property(enum_type_name).name == enum_type.name + assert rt2.get_property(enum_type_name).id == enum_type.id + + # Recordtype with lists + rt3 = db.execute_query("FIND RECORDTYPE TestTypeWithLists", unique=True) + assert rt3.get_property("string_list") is not None + assert rt3.get_property("string_list").datatype == db.LIST(db.TEXT) + string_list_prop = db.Property(name="string_list").retrieve() + assert string_list_prop.description == "A list of words" + assert string_list_prop.datatype == db.LIST(db.TEXT) + assert string_list_prop.id == rt3.get_property("string_list").id + + assert rt3.get_property("NamedIntList") is not None + assert rt3.get_property("NamedIntList").datatype == db.LIST(db.INTEGER) + + # This is a list of a plain references to a specific type + list_rt = db.execute_query("FIND RECORDTYPE ListRecordType", unique=True) + assert len(list_rt.properties) == 0 + assert rt3.get_property(list_rt.name) is not None + assert rt3.get_property(list_rt.name).is_reference() + assert rt3.get_property(list_rt.name).datatype == db.LIST(list_rt) + assert rt3.get_property(list_rt.name).id == list_rt.id + + # This is a list property of its own, referencing another separate RT + referenced_list_rt = db.execute_query( + "FIND RECORDTYPE ReferencedListTypeWithName", unique=True) + assert referenced_list_rt.description == "Referenced by a named list-of-references property" + assert referenced_list_rt.get_property("double_prop") is not None + assert (referenced_list_rt.get_property("double_prop").id == + rt1.get_property("double_prop").id) + assert rt3.get_property("NamedReferenceList") is not None + assert rt3.get_property("NamedReferenceList").is_reference() + assert rt3.get_property( + "NamedReferenceList").datatype == db.LIST(referenced_list_rt) + assert rt3.get_property("NamedReferenceList").id != referenced_list_rt.id + + enum_type = db.execute_query("FIND RECORDTYPE ListNumberEnum", unique=True) + assert len(enum_type.properties) == 0 + enum_names = ["1.1", "2.2", "3.3"] + enum_records = db.execute_query("FIND RECORD ListNumberEnum") + assert len(enum_records) == len(enum_names) + for rec in enum_records: + assert rec.name in enum_names + assert rt3.get_property(enum_type.name) is not None + assert rt3.get_property(enum_type.name).datatype == db.LIST(enum_type) + assert rt3.get_property(enum_type.name).id == enum_type.id diff --git a/integrationtests/test_yaml_parser.py b/integrationtests/test_yaml_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..e2a2c4c056ced56d2605d93914186c2cba97e137 --- /dev/null +++ b/integrationtests/test_yaml_parser.py @@ -0,0 +1,69 @@ +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +import caosdb as db +from caosadvancedtools.models.parser import parse_model_from_string + + +def _delete_everything(): + ents = db.execute_query("FIND ENTITY WITH ID > 99") + if ents: + ents.delete() + + +def setup_module(): + _delete_everything() + + +def teardown_module(): + _delete_everything() + + +def test_internal_props_in_extern(): + """Test adding the internal `name` property as a parent to an existing + property. + + """ + + model = """ +extern: +- name +- test_name +- description +- unit +test_name: + inherit_from_suggested: + - name + - description + - unit +""" + db.Property(name="test_name", datatype=db.TEXT).insert() + ents = parse_model_from_string(model) + ents.sync_data_model(noquestion=True) + + test_prop = db.Property(name="test_name").retrieve() + assert len(test_prop.parents) == 3 + desc_prop = db.Property(name="description").retrieve() + name_prop = db.Property(name="name").retrieve() + unit_prop = db.Property(name="unit").retrieve() + assert test_prop.has_parent(desc_prop) + assert test_prop.has_parent(name_prop) + assert test_prop.has_parent(unit_prop) diff --git a/integrationtests/update_analysis.py b/integrationtests/update_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..bd18ab375437bec02320dcfd269896c2ba7e2bb0 --- /dev/null +++ b/integrationtests/update_analysis.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""Example update script. An anlysis Record is retrieved and passed to the +generic run function which then calls the appropriate script based on the +Record. + +The simple query here could be replaced with something that e.g. retrieves all +entities that where changed within a certain period of time. + +""" + +import sys + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import run + + +def main(): + da = db.execute_query("FIND Analysis with identifier=TEST", unique=True) + run(da) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..625f83ce950841f7a239538123ef7b5812fc5c5f --- /dev/null +++ b/pylintrc @@ -0,0 +1,19 @@ +# -*- mode:conf; -*- + +[FORMAT] +# Good variable names which should always be accepted, separated by a comma +good-names=ii,rt,df + +[TYPECHECK] +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis +ignored-modules=etree,h5py,labfolder + +[MASTER] +# TODO: The max_inferred size is necessary for https://github.com/PyCQA/pylint/issues/4577, +# otherwise pandas.read_csv's return value would be inferred as TextFileReader. +init-hook= + import sys; sys.path.extend(["src/caosadvancedtools"]); + import astroid; astroid.context.InferenceContext.max_inferred = 500; + diff --git a/pytest.ini b/pytest.ini index 211913fa06d4e0a46c9c9024e147c5313e4746e1..e65efaf9aaf061a8a1ec0040f87d682536fac4c2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,3 @@ [pytest] testpaths = unittests addopts = -vv -python_paths = src diff --git a/release.sh b/release.sh new file mode 100755 index 0000000000000000000000000000000000000000..1af097f014de6cd9eb3d3e8ba5da34aea0fe1671 --- /dev/null +++ b/release.sh @@ -0,0 +1,4 @@ +#!/bin/bash +rm -rf dist/ build/ .eggs/ +python setup.py sdist bdist_wheel +python -m twine upload -s dist/* diff --git a/setup.py b/setup.py index 8902af8c2f03b4e5972beeb85fbf4cc05d66d730..929613de35de01da98b02c77cd76b17b04784bd8 100755 --- a/setup.py +++ b/setup.py @@ -46,8 +46,8 @@ from setuptools import find_packages, setup ######################################################################## MAJOR = 0 -MINOR = 2 -MICRO = 0 +MINOR = 4 +MICRO = 2 PRE = "" # e.g. rc0, alpha.1, 0.beta-23 ISRELEASED = False @@ -154,14 +154,19 @@ def setup_package(): long_description_content_type="text/markdown", author='Henrik tom Wörden', author_email='h.tomwoerden@indiscale.com', - install_requires=["caosdb>=0.4.0", + install_requires=["caosdb>=0.7.0", + "jsonschema>=4.4.0", + "numpy>=1.17.3", "openpyxl>=3.0.0", - "xlrd==1.2.0", + "pandas>=1.2.0", + "xlrd>=2.0", ], + extras_require={"h5-crawler": ["h5py>=3.3.0", ], + }, packages=find_packages('src'), package_dir={'': 'src'}, setup_requires=["pytest-runner>=2.0,<3dev"], - tests_require=["pytest", "pytest-cov", "coverage>=4.4.2"], + tests_require=["pytest", "pytest-pythonpath", "pytest-cov", "coverage>=4.4.2"], ) try: setup(**metadata) diff --git a/src/caosadvancedtools/bloxberg/__init__.py b/src/caosadvancedtools/bloxberg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5ca50276b8fd48370fd84bd0f5358dd1e48d6b8e --- /dev/null +++ b/src/caosadvancedtools/bloxberg/__init__.py @@ -0,0 +1,4 @@ +"""Integration with the Bloxberg proof-of-existence blockchain. +""" + +print("Warning: The Bloxberg module is still experimental and under active development.") diff --git a/src/caosadvancedtools/bloxberg/bloxberg.py b/src/caosadvancedtools/bloxberg/bloxberg.py new file mode 100644 index 0000000000000000000000000000000000000000..42af1e11a23a37214ec294b8032517bb5c70bb5b --- /dev/null +++ b/src/caosadvancedtools/bloxberg/bloxberg.py @@ -0,0 +1,197 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +"""Interaction with the Bloxberg blockchain. +""" + + +import hashlib +import json +import secrets + +import caosdb as db + +from ..models.parser import parse_model_from_string +from . import swagger_client + + +__model_yaml = """ +BloxbergCertificate: + obligatory_properties: + pepper: + datatype: TEXT + hash: + datatype: TEXT + proofValue: + datatype: TEXT + certificateJSON: + datatype: TEXT + recommended_properties: + certified: + datatype: REFERENCE +""" +__model = parse_model_from_string(__model_yaml) + + +class Bloxberg: + """A Bloxberg instance can be used to obtain or verify certificates.""" + + def __init__(self, connection=None): + """A Bloxberg instance can be used to obtain or verify certificates. + +Parameters +---------- +connection : dict +A dict with the following keys: + - url : The bloxberg URL. Default is "https://qa.certify.bloxberg.org" + """ + self._create_conf(connection) + self._api_client = swagger_client.ApiClient(configuration=self._conf) + self._api = swagger_client.CertificateApi(self._api_client) + + def _create_conf(self, connection=None): + """Generate a Swagger configuration object.""" + self._conf = swagger_client.Configuration() + if connection: + if "URL" in connection: + self._conf.host = connection["URL"] + + def certify(self, entity): + """Attempt to certify the given `entity` and return a certificate Record. + +Parameters +---------- +entity : caosdb.Entity +The entity to be certified + +Returns +------- +out : caosdb.Record +A BloxbergCertificate Record with all the necessary Properties. +""" + # Calculate hash + pepper = str(secrets.randbits(1024)) + entity.retrieve() + hasher = hashlib.sha256() + hasher.update(pepper.encode(encoding="utf8")) + hasher.update(str(entity).encode(encoding="utf8")) + entity_hash = "0x" + hasher.hexdigest() + print(entity_hash) + pubkey = "0x9858eC18a269EE69ebfD7C38eb297996827DDa98" # TODO The key of the API server? + # Create body + body = swagger_client.Batch(public_key=pubkey, crid=[entity_hash], crid_type="sha2-256", + enable_ipfs=False) + # Submit hash & obtain response + result = self._api.create_bloxberg_certificate_create_bloxberg_certificate_post(body=body) + attribute_map = result[0].attribute_map + cert = result[0].to_dict() + for old, new in attribute_map.items(): + if old == new: + continue + cert[new] = cert.pop(old) + json_s = json.dumps(cert) + # Generate result Record + cert_rec = db.Record().add_parent("BloxbergCertificate") + # Extract information and put into result + cert_rec.add_property(property="certified", value=entity) + cert_rec.add_property(property="pepper", value=pepper) + cert_rec.add_property(property="hash", value=entity_hash) + cert_rec.add_property(property="proofvalue", value=cert["proof"]["proofValue"]) + cert_rec.add_property(property="certificateJSON", value=json_s) + # Return result + return cert_rec + + def verify(self, certificate): + """Attempt to verify the certificate. + +A certificate passes verification if the Bloxberg instance says it is good. Typical use cases may +also include the `validate` step to make sure that the certificate's original data exists and +contains what it claimed to contain when the certificate was created. + +This method does nothing if the verification passes, else it raises an exception. + +Parameters +---------- +certificate : caosdb.Record +The BloxbergCertificate Record which shall be verified. + + """ + raise NotImplementedError("Bloxberg first needs to implement a verification API method.") + + @staticmethod + def json_from_certificate(certificate, filename=None): + """Generate a qa.certify.bloxberg.org JSON string, optionally writing it to a file. + +Parameters +---------- +certificate : caosdb.Record +The BloxbergCertificate Record for which the JSON is generated. + +filename : str +Write the JSON to this file. +""" + content = {} + + return content + + +def ensure_data_model(force=False): + """Make sure that the data model fits our needs. + + Most importantly, this means that a suitable RecordType "BoxbergCertificate" must exist. + """ + __model.sync_data_model(noquestion=force) + + +def certify_entity(entity, json_filename=None): + """Certify the given entity and store the result in the CaosDB. + +Parameters +---------- +entity : caosdb.Entity + The Entity to be certified. + +json_filename : str + If given, store the JSON here. +""" + if isinstance(entity, int): + entity = db.Entity(id=entity) + + blx = Bloxberg() + print("Obtaining certificate...") + certificate = blx.certify(entity) + print("Certificate was successfully obtained.") + certificate.insert() + print("Certificate was stored in CaosDB.") + + if json_filename: + with open(json_filename, "w") as json_file: + json_file.write(certificate.get_property("certificateJSON").value) + + +def demo_run(): + """Run the core functions for demonstration purposes.""" + print("Making sure that the remote data model is up to date.") + ensure_data_model() + print("Data model is up to date.") + import caosdb as db + CertRT = db.RecordType(name="BloxbergCertificate").retrieve() + print("Certifying the `BloxbergCertificate` RecordType...") + json_filename = "/tmp/cert.json" + certify_entity(CertRT, json_filename=json_filename) + print("Certificate json file can be found here: {}".format(json_filename)) + print("You can verify the certificate here: https://certify.bloxberg.org/verify") diff --git a/src/caosadvancedtools/bloxberg/swagger_client/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..136c5b27a37cfbd9135230468ae5a29cb0eb2b77 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/__init__.py @@ -0,0 +1,34 @@ +# coding: utf-8 + +# flake8: noqa + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +# Fake the installation +import sys, pathlib +__this_dir = str(pathlib.Path(__file__).parent.parent) +if __this_dir not in sys.path: + sys.path.append(__this_dir) + +# import apis into sdk package +from swagger_client.api.certificate_api import CertificateApi +from swagger_client.api.pdf_api import PdfApi +# import ApiClient +from swagger_client.api_client import ApiClient +from swagger_client.configuration import Configuration +# import models into sdk package +from swagger_client.models.batch import Batch +from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate +from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate +from swagger_client.models.http_validation_error import HTTPValidationError +from swagger_client.models.validation_error import ValidationError diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d33c26ea8bc245108934d5e0e9fdcd046da3232e --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/__init__.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import + +# flake8: noqa + +# import apis into api package +from swagger_client.api.certificate_api import CertificateApi +from swagger_client.api.pdf_api import PdfApi diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py b/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0f1c6a5a51ff4d2338df4c6e233b93fc2a950a --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/certificate_api.py @@ -0,0 +1,132 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import re # noqa: F401 + +# python 2 and python 3 compatibility library +import six + +from swagger_client.api_client import ApiClient + + +class CertificateApi(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + Ref: https://github.com/swagger-api/swagger-codegen + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def create_bloxberg_certificate_create_bloxberg_certificate_post(self, body, **kwargs): # noqa: E501 + """Createbloxbergcertificate # noqa: E501 + + Creates, transacts, and signs a research object certificate on the bloxberg blockchain. Hashes must be generated client side for each desired file and provided in an array. Each hash corresponds to one research object certificate returned in a JSON object array. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.create_bloxberg_certificate_create_bloxberg_certificate_post(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param Batch body: (required) + :return: list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate] + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, **kwargs) # noqa: E501 + else: + (data) = self.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, **kwargs) # noqa: E501 + return data + + def create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(self, body, **kwargs): # noqa: E501 + """Createbloxbergcertificate # noqa: E501 + + Creates, transacts, and signs a research object certificate on the bloxberg blockchain. Hashes must be generated client side for each desired file and provided in an array. Each hash corresponds to one research object certificate returned in a JSON object array. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.create_bloxberg_certificate_create_bloxberg_certificate_post_with_http_info(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param Batch body: (required) + :return: list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate] + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['body'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method create_bloxberg_certificate_create_bloxberg_certificate_post" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'body' is set + if ('body' not in params or + params['body'] is None): + raise ValueError("Missing the required parameter `body` when calling `create_bloxberg_certificate_create_bloxberg_certificate_post`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'body' in params: + body_params = params['body'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = [] # noqa: E501 + + return self.api_client.call_api( + '/createBloxbergCertificate', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='list[ControllerCertToolsGenerateUnsignedCertificateJsonCertificate]', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py b/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a279de21e45735be31eed1ce18fd7c275cf6cb --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api/pdf_api.py @@ -0,0 +1,132 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import re # noqa: F401 + +# python 2 and python 3 compatibility library +import six + +from swagger_client.api_client import ApiClient + + +class PdfApi(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + Ref: https://github.com/swagger-api/swagger-codegen + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def generate_pdf_generate_pdf_post(self, body, **kwargs): # noqa: E501 + """Generatepdf # noqa: E501 + + Accepts as input the response from the createBloxbergCertificate endpoint, for example a research object JSON array. Returns as response a zip archive with PDF files that correspond to the number of cryptographic identifiers provided. PDF files are embedded with the Research Object Certification which is used for verification. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.generate_pdf_generate_pdf_post(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param list[ControllerCertToolsGeneratePdfJsonCertificate] body: (required) + :return: Object + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.generate_pdf_generate_pdf_post_with_http_info(body, **kwargs) # noqa: E501 + else: + (data) = self.generate_pdf_generate_pdf_post_with_http_info(body, **kwargs) # noqa: E501 + return data + + def generate_pdf_generate_pdf_post_with_http_info(self, body, **kwargs): # noqa: E501 + """Generatepdf # noqa: E501 + + Accepts as input the response from the createBloxbergCertificate endpoint, for example a research object JSON array. Returns as response a zip archive with PDF files that correspond to the number of cryptographic identifiers provided. PDF files are embedded with the Research Object Certification which is used for verification. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.generate_pdf_generate_pdf_post_with_http_info(body, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param list[ControllerCertToolsGeneratePdfJsonCertificate] body: (required) + :return: Object + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['body'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method generate_pdf_generate_pdf_post" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'body' is set + if ('body' not in params or + params['body'] is None): + raise ValueError("Missing the required parameter `body` when calling `generate_pdf_generate_pdf_post`") # noqa: E501 + + collection_formats = {} + + path_params = {} + + query_params = [] + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'body' in params: + body_params = params['body'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.select_header_content_type( # noqa: E501 + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = [] # noqa: E501 + + return self.api_client.call_api( + '/generatePDF', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='Object', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/api_client.py b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py new file mode 100644 index 0000000000000000000000000000000000000000..25e6501a4e36b09bca266f2eb375807053a58870 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/api_client.py @@ -0,0 +1,628 @@ +# coding: utf-8 +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" +from __future__ import absolute_import + +import datetime +import json +import mimetypes +from multiprocessing.pool import ThreadPool +import os +import re +import tempfile + +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import quote + +from swagger_client.configuration import Configuration +import swagger_client.models +from swagger_client import rest + + +class ApiClient(object): + """Generic API client for Swagger client library builds. + + Swagger generic API client. This client handles the client- + server communication, and is invariant across implementations. Specifics of + the methods and models for each application are generated from the Swagger + templates. + + NOTE: This class is auto generated by the swagger code generator program. + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + + :param configuration: .Configuration object for this client + :param header_name: a header to pass when making calls to the API. + :param header_value: a header value to pass when making calls to + the API. + :param cookie: a cookie to include in the header when making calls + to the API + """ + + PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types + NATIVE_TYPES_MAPPING = { + 'int': int, + 'long': int if six.PY3 else long, # noqa: F821 + 'float': float, + 'str': str, + 'bool': bool, + 'date': datetime.date, + 'datetime': datetime.datetime, + 'object': object, + } + + def __init__(self, configuration=None, header_name=None, header_value=None, + cookie=None): + if configuration is None: + configuration = Configuration() + self.configuration = configuration + + self.pool = ThreadPool() + self.rest_client = rest.RESTClientObject(configuration) + self.default_headers = {} + if header_name is not None: + self.default_headers[header_name] = header_value + self.cookie = cookie + # Set default User-Agent. + self.user_agent = 'Swagger-Codegen/1.0.0/python' + + def __del__(self): + self.pool.close() + self.pool.join() + + @property + def user_agent(self): + """User agent for this API client""" + return self.default_headers['User-Agent'] + + @user_agent.setter + def user_agent(self, value): + self.default_headers['User-Agent'] = value + + def set_default_header(self, header_name, header_value): + self.default_headers[header_name] = header_value + + def __call_api( + self, resource_path, method, path_params=None, + query_params=None, header_params=None, body=None, post_params=None, + files=None, response_type=None, auth_settings=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + + config = self.configuration + + # header parameters + header_params = header_params or {} + header_params.update(self.default_headers) + if self.cookie: + header_params['Cookie'] = self.cookie + if header_params: + header_params = self.sanitize_for_serialization(header_params) + header_params = dict(self.parameters_to_tuples(header_params, + collection_formats)) + + # path parameters + if path_params: + path_params = self.sanitize_for_serialization(path_params) + path_params = self.parameters_to_tuples(path_params, + collection_formats) + for k, v in path_params: + # specified safe chars, encode everything + resource_path = resource_path.replace( + '{%s}' % k, + quote(str(v), safe=config.safe_chars_for_path_param) + ) + + # query parameters + if query_params: + query_params = self.sanitize_for_serialization(query_params) + query_params = self.parameters_to_tuples(query_params, + collection_formats) + + # post parameters + if post_params or files: + post_params = self.prepare_post_parameters(post_params, files) + post_params = self.sanitize_for_serialization(post_params) + post_params = self.parameters_to_tuples(post_params, + collection_formats) + + # auth setting + self.update_params_for_auth(header_params, query_params, auth_settings) + + # body + if body: + body = self.sanitize_for_serialization(body) + + # request url + url = self.configuration.host + resource_path + + # perform request and return response + response_data = self.request( + method, url, query_params=query_params, headers=header_params, + post_params=post_params, body=body, + _preload_content=_preload_content, + _request_timeout=_request_timeout) + + self.last_response = response_data + + return_data = response_data + if _preload_content: + # deserialize response data + if response_type: + return_data = self.deserialize(response_data, response_type) + else: + return_data = None + + if _return_http_data_only: + return (return_data) + else: + return (return_data, response_data.status, + response_data.getheaders()) + + def sanitize_for_serialization(self, obj): + """Builds a JSON POST object. + + If obj is None, return None. + If obj is str, int, long, float, bool, return directly. + If obj is datetime.datetime, datetime.date + convert to string in iso8601 format. + If obj is list, sanitize each element in the list. + If obj is dict, return the dict. + If obj is swagger model, return the properties dict. + + :param obj: The data to serialize. + :return: The serialized form of data. + """ + if obj is None: + return None + elif isinstance(obj, self.PRIMITIVE_TYPES): + return obj + elif isinstance(obj, list): + return [self.sanitize_for_serialization(sub_obj) + for sub_obj in obj] + elif isinstance(obj, tuple): + return tuple(self.sanitize_for_serialization(sub_obj) + for sub_obj in obj) + elif isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + + if isinstance(obj, dict): + obj_dict = obj + else: + # Convert model obj to dict except + # attributes `swagger_types`, `attribute_map` + # and attributes which value is not None. + # Convert attribute name to json key in + # model definition for request. + obj_dict = {obj.attribute_map[attr]: getattr(obj, attr) + for attr, _ in six.iteritems(obj.swagger_types) + if getattr(obj, attr) is not None} + + return {key: self.sanitize_for_serialization(val) + for key, val in six.iteritems(obj_dict)} + + def deserialize(self, response, response_type): + """Deserializes response into an object. + + :param response: RESTResponse object to be deserialized. + :param response_type: class literal for + deserialized object, or string of class name. + + :return: deserialized object. + """ + # handle file downloading + # save response body into a tmp file and return the instance + if response_type == "file": + return self.__deserialize_file(response) + + # fetch data from response object + try: + data = json.loads(response.data) + except ValueError: + data = response.data + + return self.__deserialize(data, response_type) + + def __deserialize(self, data, klass): + """Deserializes dict, list, str into an object. + + :param data: dict, list or str. + :param klass: class literal, or string of class name. + + :return: object. + """ + if data is None: + return None + + if type(klass) == str: + if klass.startswith('list['): + sub_kls = re.match(r'list\[(.*)\]', klass).group(1) + return [self.__deserialize(sub_data, sub_kls) + for sub_data in data] + + if klass.startswith('dict('): + sub_kls = re.match(r'dict\(([^,]*), (.*)\)', klass).group(2) + return {k: self.__deserialize(v, sub_kls) + for k, v in six.iteritems(data)} + + # convert str to class + if klass in self.NATIVE_TYPES_MAPPING: + klass = self.NATIVE_TYPES_MAPPING[klass] + else: + klass = getattr(swagger_client.models, klass) + + if klass in self.PRIMITIVE_TYPES: + return self.__deserialize_primitive(data, klass) + elif klass == object: + return self.__deserialize_object(data) + elif klass == datetime.date: + return self.__deserialize_date(data) + elif klass == datetime.datetime: + return self.__deserialize_datatime(data) + else: + return self.__deserialize_model(data, klass) + + def call_api(self, resource_path, method, + path_params=None, query_params=None, header_params=None, + body=None, post_params=None, files=None, + response_type=None, auth_settings=None, async_req=None, + _return_http_data_only=None, collection_formats=None, + _preload_content=True, _request_timeout=None): + """Makes the HTTP request (synchronous) and returns deserialized data. + + To make an async request, set the async_req parameter. + + :param resource_path: Path to method endpoint. + :param method: Method to call. + :param path_params: Path parameters in the url. + :param query_params: Query parameters in the url. + :param header_params: Header parameters to be + placed in the request header. + :param body: Request body. + :param post_params dict: Request post form parameters, + for `application/x-www-form-urlencoded`, `multipart/form-data`. + :param auth_settings list: Auth Settings names for the request. + :param response: Response data type. + :param files dict: key -> filename, value -> filepath, + for `multipart/form-data`. + :param async_req bool: execute request asynchronously + :param _return_http_data_only: response data without head status code + and headers + :param collection_formats: dict of collection formats for path, query, + header, and post parameters. + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + :return: + If async_req parameter is True, + the request will be called asynchronously. + The method will return the request thread. + If parameter async_req is False or missing, + then the method will return the response directly. + """ + if not async_req: + return self.__call_api(resource_path, method, + path_params, query_params, header_params, + body, post_params, files, + response_type, auth_settings, + _return_http_data_only, collection_formats, + _preload_content, _request_timeout) + else: + thread = self.pool.apply_async(self.__call_api, (resource_path, + method, path_params, query_params, + header_params, body, + post_params, files, + response_type, auth_settings, + _return_http_data_only, + collection_formats, + _preload_content, _request_timeout)) + return thread + + def request(self, method, url, query_params=None, headers=None, + post_params=None, body=None, _preload_content=True, + _request_timeout=None): + """Makes the HTTP request using RESTClient.""" + if method == "GET": + return self.rest_client.GET(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "HEAD": + return self.rest_client.HEAD(url, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + headers=headers) + elif method == "OPTIONS": + return self.rest_client.OPTIONS(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "POST": + return self.rest_client.POST(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PUT": + return self.rest_client.PUT(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "PATCH": + return self.rest_client.PATCH(url, + query_params=query_params, + headers=headers, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + elif method == "DELETE": + return self.rest_client.DELETE(url, + query_params=query_params, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + else: + raise ValueError( + "http method must be `GET`, `HEAD`, `OPTIONS`," + " `POST`, `PATCH`, `PUT` or `DELETE`." + ) + + def parameters_to_tuples(self, params, collection_formats): + """Get parameters as list of tuples, formatting collections. + + :param params: Parameters as dict or list of two-tuples + :param dict collection_formats: Parameter collection formats + :return: Parameters as list of tuples, collections formatted + """ + new_params = [] + if collection_formats is None: + collection_formats = {} + for k, v in six.iteritems(params) if isinstance(params, dict) else params: # noqa: E501 + if k in collection_formats: + collection_format = collection_formats[k] + if collection_format == 'multi': + new_params.extend((k, value) for value in v) + else: + if collection_format == 'ssv': + delimiter = ' ' + elif collection_format == 'tsv': + delimiter = '\t' + elif collection_format == 'pipes': + delimiter = '|' + else: # csv is the default + delimiter = ',' + new_params.append( + (k, delimiter.join(str(value) for value in v))) + else: + new_params.append((k, v)) + return new_params + + def prepare_post_parameters(self, post_params=None, files=None): + """Builds form parameters. + + :param post_params: Normal form parameters. + :param files: File parameters. + :return: Form parameters with files. + """ + params = [] + + if post_params: + params = post_params + + if files: + for k, v in six.iteritems(files): + if not v: + continue + file_names = v if type(v) is list else [v] + for n in file_names: + with open(n, 'rb') as f: + filename = os.path.basename(f.name) + filedata = f.read() + mimetype = (mimetypes.guess_type(filename)[0] or + 'application/octet-stream') + params.append( + tuple([k, tuple([filename, filedata, mimetype])])) + + return params + + def select_header_accept(self, accepts): + """Returns `Accept` based on an array of accepts provided. + + :param accepts: List of headers. + :return: Accept (e.g. application/json). + """ + if not accepts: + return + + accepts = [x.lower() for x in accepts] + + if 'application/json' in accepts: + return 'application/json' + else: + return ', '.join(accepts) + + def select_header_content_type(self, content_types): + """Returns `Content-Type` based on an array of content_types provided. + + :param content_types: List of content-types. + :return: Content-Type (e.g. application/json). + """ + if not content_types: + return 'application/json' + + content_types = [x.lower() for x in content_types] + + if 'application/json' in content_types or '*/*' in content_types: + return 'application/json' + else: + return content_types[0] + + def update_params_for_auth(self, headers, querys, auth_settings): + """Updates header and query params based on authentication setting. + + :param headers: Header parameters dict to be updated. + :param querys: Query parameters tuple list to be updated. + :param auth_settings: Authentication setting identifiers list. + """ + if not auth_settings: + return + + for auth in auth_settings: + auth_setting = self.configuration.auth_settings().get(auth) + if auth_setting: + if not auth_setting['value']: + continue + elif auth_setting['in'] == 'header': + headers[auth_setting['key']] = auth_setting['value'] + elif auth_setting['in'] == 'query': + querys.append((auth_setting['key'], auth_setting['value'])) + else: + raise ValueError( + 'Authentication token must be in `query` or `header`' + ) + + def __deserialize_file(self, response): + """Deserializes body to file + + Saves response body into a file in a temporary folder, + using the filename from the `Content-Disposition` header if provided. + + :param response: RESTResponse. + :return: file path. + """ + fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) + os.close(fd) + os.remove(path) + + content_disposition = response.getheader("Content-Disposition") + if content_disposition: + filename = re.search(r'filename=[\'"]?([^\'"\s]+)[\'"]?', + content_disposition).group(1) + path = os.path.join(os.path.dirname(path), filename) + + with open(path, "wb") as f: + f.write(response.data) + + return path + + def __deserialize_primitive(self, data, klass): + """Deserializes string to primitive type. + + :param data: str. + :param klass: class literal. + + :return: int, long, float, str, bool. + """ + try: + return klass(data) + except UnicodeEncodeError: + return six.text_type(data) + except TypeError: + return data + + def __deserialize_object(self, value): + """Return a original value. + + :return: object. + """ + return value + + def __deserialize_date(self, string): + """Deserializes string to date. + + :param string: str. + :return: date. + """ + try: + from dateutil.parser import parse + return parse(string).date() + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason="Failed to parse `{0}` as date object".format(string) + ) + + def __deserialize_datatime(self, string): + """Deserializes string to datetime. + + The string should be in iso8601 datetime format. + + :param string: str. + :return: datetime. + """ + try: + from dateutil.parser import parse + return parse(string) + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason=( + "Failed to parse `{0}` as datetime object" + .format(string) + ) + ) + + def __hasattr(self, object, name): + return name in object.__class__.__dict__ + + def __deserialize_model(self, data, klass): + """Deserializes list or dict to model. + + :param data: dict, list. + :param klass: class literal. + :return: model object. + """ + + if not klass.swagger_types and not self.__hasattr(klass, 'get_real_child_model'): + return data + + kwargs = {} + if klass.swagger_types is not None: + for attr, attr_type in six.iteritems(klass.swagger_types): + if (data is not None and + klass.attribute_map[attr] in data and + isinstance(data, (list, dict))): + value = data[klass.attribute_map[attr]] + kwargs[attr] = self.__deserialize(value, attr_type) + + instance = klass(**kwargs) + + if (isinstance(instance, dict) and + klass.swagger_types is not None and + isinstance(data, dict)): + for key, value in data.items(): + if key not in klass.swagger_types: + instance[key] = value + if self.__hasattr(instance, 'get_real_child_model'): + klass_name = instance.get_real_child_model(data) + if klass_name: + instance = self.__deserialize(data, klass_name) + return instance diff --git a/src/caosadvancedtools/bloxberg/swagger_client/configuration.py b/src/caosadvancedtools/bloxberg/swagger_client/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..2be9f6a733a030d0dea2ab43b9e85f6ed15085d8 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/configuration.py @@ -0,0 +1,244 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import copy +import logging +import multiprocessing +import sys +import urllib3 + +import six +from six.moves import http_client as httplib + + +class TypeWithDefault(type): + def __init__(cls, name, bases, dct): + super(TypeWithDefault, cls).__init__(name, bases, dct) + cls._default = None + + def __call__(cls): + if cls._default is None: + cls._default = type.__call__(cls) + return copy.copy(cls._default) + + def set_default(cls, default): + cls._default = copy.copy(default) + + +class Configuration(six.with_metaclass(TypeWithDefault, object)): + """NOTE: This class is auto generated by the swagger code generator program. + + Ref: https://github.com/swagger-api/swagger-codegen + Do not edit the class manually. + """ + + def __init__(self): + """Constructor""" + # Default Base url + self.host = "https://qa.certify.bloxberg.org" + # Temp file folder for downloading files + self.temp_folder_path = None + + # Authentication Settings + # dict to store API key(s) + self.api_key = {} + # dict to store API prefix (e.g. Bearer) + self.api_key_prefix = {} + # function to refresh API key if expired + self.refresh_api_key_hook = None + # Username for HTTP basic authentication + self.username = "" + # Password for HTTP basic authentication + self.password = "" + # Logging Settings + self.logger = {} + self.logger["package_logger"] = logging.getLogger("swagger_client") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + # Log format + self.logger_format = '%(asctime)s %(levelname)s %(message)s' + # Log stream handler + self.logger_stream_handler = None + # Log file handler + self.logger_file_handler = None + # Debug file location + self.logger_file = None + # Debug switch + self.debug = False + + # SSL/TLS verification + # Set this to false to skip verifying SSL certificate when calling API + # from https server. + self.verify_ssl = True + # Set this to customize the certificate file to verify the peer. + self.ssl_ca_cert = None + # client certificate file + self.cert_file = None + # client key file + self.key_file = None + # Set this to True/False to enable/disable SSL hostname verification. + self.assert_hostname = None + + # urllib3 connection pool's maximum number of connections saved + # per pool. urllib3 uses 1 connection as default value, but this is + # not the best value when you are making a lot of possibly parallel + # requests to the same host, which is often the case here. + # cpu_count * 5 is used as default value to increase performance. + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + + # Proxy URL + self.proxy = None + # Safe chars for path_param + self.safe_chars_for_path_param = '' + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_file_handler) + if self.logger_stream_handler: + logger.removeHandler(self.logger_stream_handler) + else: + # If not set logging file, + # then add stream handler and remove file handler. + self.logger_stream_handler = logging.StreamHandler() + self.logger_stream_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_stream_handler) + if self.logger_file_handler: + logger.removeHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.DEBUG) + # turn on httplib debug + httplib.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.WARNING) + # turn off httplib debug + httplib.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook: + self.refresh_api_key_hook(self) + + key = self.api_key.get(identifier) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def get_basic_auth_token(self): + """Gets HTTP basic authentication header (string). + + :return: The token for basic HTTP authentication. + """ + return urllib3.util.make_headers( + basic_auth=self.username + ':' + self.password + ).get('authorization') + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + return { + } + + def to_debug_report(self): + """Gets the essential information for debugging. + + :return: The report for debugging. + """ + return "Python SDK Debug Report:\n"\ + "OS: {env}\n"\ + "Python Version: {pyversion}\n"\ + "Version of the API: 0.2.0\n"\ + "SDK Package Version: 1.0.0".\ + format(env=sys.platform, pyversion=sys.version) diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py b/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55b01c66f4f68f86ea6fd8bc34e61fc534d3902f --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/__init__.py @@ -0,0 +1,21 @@ +# coding: utf-8 + +# flake8: noqa +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +# import models into model package +from swagger_client.models.batch import Batch +from swagger_client.models.controller_cert_tools_generate_pdf_json_certificate import ControllerCertToolsGeneratePdfJsonCertificate +from swagger_client.models.controller_cert_tools_generate_unsigned_certificate_json_certificate import ControllerCertToolsGenerateUnsignedCertificateJsonCertificate +from swagger_client.models.http_validation_error import HTTPValidationError +from swagger_client.models.validation_error import ValidationError diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py new file mode 100644 index 0000000000000000000000000000000000000000..7a347cf7ac9148df8ec9a43200f4058f127447b9 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/batch.py @@ -0,0 +1,227 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class Batch(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'public_key': 'str', + 'crid': 'list[str]', + 'crid_type': 'str', + 'enable_ipfs': 'bool', + 'metadata_json': 'str' + } + + attribute_map = { + 'public_key': 'publicKey', + 'crid': 'crid', + 'crid_type': 'cridType', + 'enable_ipfs': 'enableIPFS', + 'metadata_json': 'metadataJson' + } + + def __init__(self, public_key=None, crid=None, crid_type=None, enable_ipfs=None, metadata_json=None): # noqa: E501 + """Batch - a model defined in Swagger""" # noqa: E501 + self._public_key = None + self._crid = None + self._crid_type = None + self._enable_ipfs = None + self._metadata_json = None + self.discriminator = None + self.public_key = public_key + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + self.enable_ipfs = enable_ipfs + if metadata_json is not None: + self.metadata_json = metadata_json + + @property + def public_key(self): + """Gets the public_key of this Batch. # noqa: E501 + + Public bloxberg address where the Research Object Certificate token will be minted # noqa: E501 + + :return: The public_key of this Batch. # noqa: E501 + :rtype: str + """ + return self._public_key + + @public_key.setter + def public_key(self, public_key): + """Sets the public_key of this Batch. + + Public bloxberg address where the Research Object Certificate token will be minted # noqa: E501 + + :param public_key: The public_key of this Batch. # noqa: E501 + :type: str + """ + if public_key is None: + raise ValueError("Invalid value for `public_key`, must not be `None`") # noqa: E501 + + self._public_key = public_key + + @property + def crid(self): + """Gets the crid of this Batch. # noqa: E501 + + Cryptographic Identifier of each file you wish to certify. One certificate will be generated per hash up to a maximum of 1001 in a single request # noqa: E501 + + :return: The crid of this Batch. # noqa: E501 + :rtype: list[str] + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this Batch. + + Cryptographic Identifier of each file you wish to certify. One certificate will be generated per hash up to a maximum of 1001 in a single request # noqa: E501 + + :param crid: The crid of this Batch. # noqa: E501 + :type: list[str] + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this Batch. # noqa: E501 + + If crid is not self-describing, provide the type of cryptographic function you used to generate the cryptographic identifier. Please use the name field from the multihash list to ensure compatibility: https://github.com/multiformats/multicodec/blob/master/table.csv # noqa: E501 + + :return: The crid_type of this Batch. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this Batch. + + If crid is not self-describing, provide the type of cryptographic function you used to generate the cryptographic identifier. Please use the name field from the multihash list to ensure compatibility: https://github.com/multiformats/multicodec/blob/master/table.csv # noqa: E501 + + :param crid_type: The crid_type of this Batch. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def enable_ipfs(self): + """Gets the enable_ipfs of this Batch. # noqa: E501 + + EXPERIMENTAL: Set to true to enable posting certificate to IPFS. If set to false, will simply return certificates in the response. By default, this is disabled on the server due to performance and storage problems with IPFS # noqa: E501 + + :return: The enable_ipfs of this Batch. # noqa: E501 + :rtype: bool + """ + return self._enable_ipfs + + @enable_ipfs.setter + def enable_ipfs(self, enable_ipfs): + """Sets the enable_ipfs of this Batch. + + EXPERIMENTAL: Set to true to enable posting certificate to IPFS. If set to false, will simply return certificates in the response. By default, this is disabled on the server due to performance and storage problems with IPFS # noqa: E501 + + :param enable_ipfs: The enable_ipfs of this Batch. # noqa: E501 + :type: bool + """ + if enable_ipfs is None: + raise ValueError("Invalid value for `enable_ipfs`, must not be `None`") # noqa: E501 + + self._enable_ipfs = enable_ipfs + + @property + def metadata_json(self): + """Gets the metadata_json of this Batch. # noqa: E501 + + Provide optional metadata to describe the research object batch in more detail that will be included in the certificate. # noqa: E501 + + :return: The metadata_json of this Batch. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this Batch. + + Provide optional metadata to describe the research object batch in more detail that will be included in the certificate. # noqa: E501 + + :param metadata_json: The metadata_json of this Batch. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(Batch, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, Batch): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py new file mode 100644 index 0000000000000000000000000000000000000000..2d7fd2d763ba40c9a384203301aa3e70efdf7783 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_pdf_json_certificate.py @@ -0,0 +1,379 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ControllerCertToolsGeneratePdfJsonCertificate(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'context': 'list[str]', + 'id': 'str', + 'type': 'list[str]', + 'issuer': 'str', + 'issuance_date': 'str', + 'credential_subject': 'object', + 'display_html': 'str', + 'crid': 'str', + 'crid_type': 'str', + 'metadata_json': 'str', + 'proof': 'object' + } + + attribute_map = { + 'context': '@context', + 'id': 'id', + 'type': 'type', + 'issuer': 'issuer', + 'issuance_date': 'issuanceDate', + 'credential_subject': 'credentialSubject', + 'display_html': 'displayHtml', + 'crid': 'crid', + 'crid_type': 'cridType', + 'metadata_json': 'metadataJson', + 'proof': 'proof' + } + + def __init__(self, context=None, id=None, type=None, issuer=None, issuance_date=None, credential_subject=None, display_html=None, crid=None, crid_type=None, metadata_json=None, proof=None): # noqa: E501 + """ControllerCertToolsGeneratePdfJsonCertificate - a model defined in Swagger""" # noqa: E501 + self._context = None + self._id = None + self._type = None + self._issuer = None + self._issuance_date = None + self._credential_subject = None + self._display_html = None + self._crid = None + self._crid_type = None + self._metadata_json = None + self._proof = None + self.discriminator = None + if context is not None: + self.context = context + self.id = id + self.type = type + self.issuer = issuer + self.issuance_date = issuance_date + self.credential_subject = credential_subject + if display_html is not None: + self.display_html = display_html + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + if metadata_json is not None: + self.metadata_json = metadata_json + self.proof = proof + + @property + def context(self): + """Gets the context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :return: The context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._context + + @context.setter + def context(self, context): + """Sets the context of this ControllerCertToolsGeneratePdfJsonCertificate. + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :param context: The context of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: list[str] + """ + + self._context = context + + @property + def id(self): + """Gets the id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._id + + @id.setter + def id(self, id): + """Sets the id of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param id: The id of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if id is None: + raise ValueError("Invalid value for `id`, must not be `None`") # noqa: E501 + + self._id = id + + @property + def type(self): + """Gets the type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param type: The type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: list[str] + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + @property + def issuer(self): + """Gets the issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuer + + @issuer.setter + def issuer(self, issuer): + """Sets the issuer of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param issuer: The issuer of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if issuer is None: + raise ValueError("Invalid value for `issuer`, must not be `None`") # noqa: E501 + + self._issuer = issuer + + @property + def issuance_date(self): + """Gets the issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuance_date + + @issuance_date.setter + def issuance_date(self, issuance_date): + """Sets the issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param issuance_date: The issuance_date of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if issuance_date is None: + raise ValueError("Invalid value for `issuance_date`, must not be `None`") # noqa: E501 + + self._issuance_date = issuance_date + + @property + def credential_subject(self): + """Gets the credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._credential_subject + + @credential_subject.setter + def credential_subject(self, credential_subject): + """Sets the credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param credential_subject: The credential_subject of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: object + """ + if credential_subject is None: + raise ValueError("Invalid value for `credential_subject`, must not be `None`") # noqa: E501 + + self._credential_subject = credential_subject + + @property + def display_html(self): + """Gets the display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._display_html + + @display_html.setter + def display_html(self, display_html): + """Sets the display_html of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param display_html: The display_html of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._display_html = display_html + + @property + def crid(self): + """Gets the crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param crid: The crid of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param crid_type: The crid_type of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def metadata_json(self): + """Gets the metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param metadata_json: The metadata_json of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + @property + def proof(self): + """Gets the proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + + + :return: The proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._proof + + @proof.setter + def proof(self, proof): + """Sets the proof of this ControllerCertToolsGeneratePdfJsonCertificate. + + + :param proof: The proof of this ControllerCertToolsGeneratePdfJsonCertificate. # noqa: E501 + :type: object + """ + if proof is None: + raise ValueError("Invalid value for `proof`, must not be `None`") # noqa: E501 + + self._proof = proof + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ControllerCertToolsGeneratePdfJsonCertificate, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ControllerCertToolsGeneratePdfJsonCertificate): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py new file mode 100644 index 0000000000000000000000000000000000000000..4a6d2d3f0e15faa8672f001e964d66c6e0a27780 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/controller_cert_tools_generate_unsigned_certificate_json_certificate.py @@ -0,0 +1,379 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ControllerCertToolsGenerateUnsignedCertificateJsonCertificate(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'context': 'list[str]', + 'id': 'str', + 'type': 'list[str]', + 'issuer': 'str', + 'issuance_date': 'str', + 'credential_subject': 'object', + 'display_html': 'str', + 'crid': 'str', + 'crid_type': 'str', + 'metadata_json': 'str', + 'proof': 'object' + } + + attribute_map = { + 'context': '@context', + 'id': 'id', + 'type': 'type', + 'issuer': 'issuer', + 'issuance_date': 'issuanceDate', + 'credential_subject': 'credentialSubject', + 'display_html': 'displayHtml', + 'crid': 'crid', + 'crid_type': 'cridType', + 'metadata_json': 'metadataJson', + 'proof': 'proof' + } + + def __init__(self, context=None, id=None, type=None, issuer=None, issuance_date=None, credential_subject=None, display_html=None, crid=None, crid_type=None, metadata_json=None, proof=None): # noqa: E501 + """ControllerCertToolsGenerateUnsignedCertificateJsonCertificate - a model defined in Swagger""" # noqa: E501 + self._context = None + self._id = None + self._type = None + self._issuer = None + self._issuance_date = None + self._credential_subject = None + self._display_html = None + self._crid = None + self._crid_type = None + self._metadata_json = None + self._proof = None + self.discriminator = None + if context is not None: + self.context = context + self.id = id + self.type = type + self.issuer = issuer + self.issuance_date = issuance_date + self.credential_subject = credential_subject + if display_html is not None: + self.display_html = display_html + self.crid = crid + if crid_type is not None: + self.crid_type = crid_type + if metadata_json is not None: + self.metadata_json = metadata_json + self.proof = proof + + @property + def context(self): + """Gets the context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :return: The context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._context + + @context.setter + def context(self, context): + """Sets the context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + Relevant JSON-LD context links in order to validate Verifiable Credentials according to their spec. # noqa: E501 + + :param context: The context of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: list[str] + """ + + self._context = context + + @property + def id(self): + """Gets the id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._id + + @id.setter + def id(self, id): + """Sets the id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param id: The id of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if id is None: + raise ValueError("Invalid value for `id`, must not be `None`") # noqa: E501 + + self._id = id + + @property + def type(self): + """Gets the type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: list[str] + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param type: The type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: list[str] + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + @property + def issuer(self): + """Gets the issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuer + + @issuer.setter + def issuer(self, issuer): + """Sets the issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param issuer: The issuer of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if issuer is None: + raise ValueError("Invalid value for `issuer`, must not be `None`") # noqa: E501 + + self._issuer = issuer + + @property + def issuance_date(self): + """Gets the issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._issuance_date + + @issuance_date.setter + def issuance_date(self, issuance_date): + """Sets the issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param issuance_date: The issuance_date of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if issuance_date is None: + raise ValueError("Invalid value for `issuance_date`, must not be `None`") # noqa: E501 + + self._issuance_date = issuance_date + + @property + def credential_subject(self): + """Gets the credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._credential_subject + + @credential_subject.setter + def credential_subject(self, credential_subject): + """Sets the credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param credential_subject: The credential_subject of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: object + """ + if credential_subject is None: + raise ValueError("Invalid value for `credential_subject`, must not be `None`") # noqa: E501 + + self._credential_subject = credential_subject + + @property + def display_html(self): + """Gets the display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._display_html + + @display_html.setter + def display_html(self, display_html): + """Sets the display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param display_html: The display_html of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._display_html = display_html + + @property + def crid(self): + """Gets the crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid + + @crid.setter + def crid(self, crid): + """Sets the crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param crid: The crid of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + if crid is None: + raise ValueError("Invalid value for `crid`, must not be `None`") # noqa: E501 + + self._crid = crid + + @property + def crid_type(self): + """Gets the crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._crid_type + + @crid_type.setter + def crid_type(self, crid_type): + """Sets the crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param crid_type: The crid_type of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._crid_type = crid_type + + @property + def metadata_json(self): + """Gets the metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: str + """ + return self._metadata_json + + @metadata_json.setter + def metadata_json(self, metadata_json): + """Sets the metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param metadata_json: The metadata_json of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: str + """ + + self._metadata_json = metadata_json + + @property + def proof(self): + """Gets the proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + + + :return: The proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :rtype: object + """ + return self._proof + + @proof.setter + def proof(self, proof): + """Sets the proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. + + + :param proof: The proof of this ControllerCertToolsGenerateUnsignedCertificateJsonCertificate. # noqa: E501 + :type: object + """ + if proof is None: + raise ValueError("Invalid value for `proof`, must not be `None`") # noqa: E501 + + self._proof = proof + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ControllerCertToolsGenerateUnsignedCertificateJsonCertificate, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ControllerCertToolsGenerateUnsignedCertificateJsonCertificate): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py new file mode 100644 index 0000000000000000000000000000000000000000..21c9e467311c596499f3f408c5ac670b5852c6fa --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/http_validation_error.py @@ -0,0 +1,110 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class HTTPValidationError(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'detail': 'list[ValidationError]' + } + + attribute_map = { + 'detail': 'detail' + } + + def __init__(self, detail=None): # noqa: E501 + """HTTPValidationError - a model defined in Swagger""" # noqa: E501 + self._detail = None + self.discriminator = None + if detail is not None: + self.detail = detail + + @property + def detail(self): + """Gets the detail of this HTTPValidationError. # noqa: E501 + + + :return: The detail of this HTTPValidationError. # noqa: E501 + :rtype: list[ValidationError] + """ + return self._detail + + @detail.setter + def detail(self, detail): + """Sets the detail of this HTTPValidationError. + + + :param detail: The detail of this HTTPValidationError. # noqa: E501 + :type: list[ValidationError] + """ + + self._detail = detail + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(HTTPValidationError, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, HTTPValidationError): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py new file mode 100644 index 0000000000000000000000000000000000000000..7ae6bf0900449ff3612798a4503692c4e38e1c11 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/models/validation_error.py @@ -0,0 +1,165 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +import pprint +import re # noqa: F401 + +import six + +class ValidationError(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'loc': 'list[str]', + 'msg': 'str', + 'type': 'str' + } + + attribute_map = { + 'loc': 'loc', + 'msg': 'msg', + 'type': 'type' + } + + def __init__(self, loc=None, msg=None, type=None): # noqa: E501 + """ValidationError - a model defined in Swagger""" # noqa: E501 + self._loc = None + self._msg = None + self._type = None + self.discriminator = None + self.loc = loc + self.msg = msg + self.type = type + + @property + def loc(self): + """Gets the loc of this ValidationError. # noqa: E501 + + + :return: The loc of this ValidationError. # noqa: E501 + :rtype: list[str] + """ + return self._loc + + @loc.setter + def loc(self, loc): + """Sets the loc of this ValidationError. + + + :param loc: The loc of this ValidationError. # noqa: E501 + :type: list[str] + """ + if loc is None: + raise ValueError("Invalid value for `loc`, must not be `None`") # noqa: E501 + + self._loc = loc + + @property + def msg(self): + """Gets the msg of this ValidationError. # noqa: E501 + + + :return: The msg of this ValidationError. # noqa: E501 + :rtype: str + """ + return self._msg + + @msg.setter + def msg(self, msg): + """Sets the msg of this ValidationError. + + + :param msg: The msg of this ValidationError. # noqa: E501 + :type: str + """ + if msg is None: + raise ValueError("Invalid value for `msg`, must not be `None`") # noqa: E501 + + self._msg = msg + + @property + def type(self): + """Gets the type of this ValidationError. # noqa: E501 + + + :return: The type of this ValidationError. # noqa: E501 + :rtype: str + """ + return self._type + + @type.setter + def type(self, type): + """Sets the type of this ValidationError. + + + :param type: The type of this ValidationError. # noqa: E501 + :type: str + """ + if type is None: + raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 + + self._type = type + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(ValidationError, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, ValidationError): + return False + + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + """Returns true if both objects are not equal""" + return not self == other diff --git a/src/caosadvancedtools/bloxberg/swagger_client/rest.py b/src/caosadvancedtools/bloxberg/swagger_client/rest.py new file mode 100644 index 0000000000000000000000000000000000000000..c42e720c284832da70996e0eb885f6ffdcbb52d2 --- /dev/null +++ b/src/caosadvancedtools/bloxberg/swagger_client/rest.py @@ -0,0 +1,322 @@ +# coding: utf-8 + +""" + Research Object Certification + + No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) # noqa: E501 + + OpenAPI spec version: 0.2.0 + + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + +from __future__ import absolute_import + +import io +import json +import logging +import re +import ssl + +import certifi +# python 2 and python 3 compatibility library +import six +from six.moves.urllib.parse import urlencode + +try: + import urllib3 +except ImportError: + raise ImportError('Swagger python client requires urllib3.') + + +logger = logging.getLogger(__name__) + + +class RESTResponse(io.IOBase): + + def __init__(self, resp): + self.urllib3_response = resp + self.status = resp.status + self.reason = resp.reason + self.data = resp.data + + def getheaders(self): + """Returns a dictionary of the response headers.""" + return self.urllib3_response.getheaders() + + def getheader(self, name, default=None): + """Returns a given response header.""" + return self.urllib3_response.getheader(name, default) + + +class RESTClientObject(object): + + def __init__(self, configuration, pools_size=4, maxsize=None): + # urllib3.PoolManager will pass all kw parameters to connectionpool + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/poolmanager.py#L75 # noqa: E501 + # https://github.com/shazow/urllib3/blob/f9409436f83aeb79fbaf090181cd81b784f1b8ce/urllib3/connectionpool.py#L680 # noqa: E501 + # maxsize is the number of requests to host that are allowed in parallel # noqa: E501 + # Custom SSL certificates and client certificates: http://urllib3.readthedocs.io/en/latest/advanced-usage.html # noqa: E501 + + # cert_reqs + if configuration.verify_ssl: + cert_reqs = ssl.CERT_REQUIRED + else: + cert_reqs = ssl.CERT_NONE + + # ca_certs + if configuration.ssl_ca_cert: + ca_certs = configuration.ssl_ca_cert + else: + # if not set certificate file, use Mozilla's root certificates. + ca_certs = certifi.where() + + addition_pool_args = {} + if configuration.assert_hostname is not None: + addition_pool_args['assert_hostname'] = configuration.assert_hostname # noqa: E501 + + if maxsize is None: + if configuration.connection_pool_maxsize is not None: + maxsize = configuration.connection_pool_maxsize + else: + maxsize = 4 + + # https pool manager + if configuration.proxy: + self.pool_manager = urllib3.ProxyManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + proxy_url=configuration.proxy, + **addition_pool_args + ) + else: + self.pool_manager = urllib3.PoolManager( + num_pools=pools_size, + maxsize=maxsize, + cert_reqs=cert_reqs, + ca_certs=ca_certs, + cert_file=configuration.cert_file, + key_file=configuration.key_file, + **addition_pool_args + ) + + def request(self, method, url, query_params=None, headers=None, + body=None, post_params=None, _preload_content=True, + _request_timeout=None): + """Perform requests. + + :param method: http request method + :param url: http request url + :param query_params: query parameters in the url + :param headers: http request headers + :param body: request json body, for `application/json` + :param post_params: request post parameters, + `application/x-www-form-urlencoded` + and `multipart/form-data` + :param _preload_content: if False, the urllib3.HTTPResponse object will + be returned without reading/decoding response + data. Default is True. + :param _request_timeout: timeout setting for this request. If one + number provided, it will be total request + timeout. It can also be a pair (tuple) of + (connection, read) timeouts. + """ + method = method.upper() + assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', + 'PATCH', 'OPTIONS'] + + if post_params and body: + raise ValueError( + "body parameter cannot be used with post_params parameter." + ) + + post_params = post_params or {} + headers = headers or {} + + timeout = None + if _request_timeout: + if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)): # noqa: E501,F821 + timeout = urllib3.Timeout(total=_request_timeout) + elif (isinstance(_request_timeout, tuple) and + len(_request_timeout) == 2): + timeout = urllib3.Timeout( + connect=_request_timeout[0], read=_request_timeout[1]) + + if 'Content-Type' not in headers: + headers['Content-Type'] = 'application/json' + + try: + # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE` + if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']: + if query_params: + url += '?' + urlencode(query_params) + if re.search('json', headers['Content-Type'], re.IGNORECASE): + request_body = '{}' + if body is not None: + request_body = json.dumps(body) + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'application/x-www-form-urlencoded': # noqa: E501 + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=False, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + elif headers['Content-Type'] == 'multipart/form-data': + # must del headers['Content-Type'], or the correct + # Content-Type which generated by urllib3 will be + # overwritten. + del headers['Content-Type'] + r = self.pool_manager.request( + method, url, + fields=post_params, + encode_multipart=True, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + # Pass a `string` parameter directly in the body to support + # other content types than Json when `body` argument is + # provided in serialized form + elif isinstance(body, str): + request_body = body + r = self.pool_manager.request( + method, url, + body=request_body, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + else: + # Cannot generate the request from given parameters + msg = """Cannot prepare a request message for provided + arguments. Please check that your arguments match + declared content type.""" + raise ApiException(status=0, reason=msg) + # For `GET`, `HEAD` + else: + r = self.pool_manager.request(method, url, + fields=query_params, + preload_content=_preload_content, + timeout=timeout, + headers=headers) + except urllib3.exceptions.SSLError as e: + msg = "{0}\n{1}".format(type(e).__name__, str(e)) + raise ApiException(status=0, reason=msg) + + if _preload_content: + r = RESTResponse(r) + + # In the python 3, the response.data is bytes. + # we need to decode it to string. + if six.PY3: + r.data = r.data.decode('utf8') + + # log response body + logger.debug("response body: %s", r.data) + + if not 200 <= r.status <= 299: + raise ApiException(http_resp=r) + + return r + + def GET(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("GET", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def HEAD(self, url, headers=None, query_params=None, _preload_content=True, + _request_timeout=None): + return self.request("HEAD", url, + headers=headers, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + query_params=query_params) + + def OPTIONS(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("OPTIONS", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def DELETE(self, url, headers=None, query_params=None, body=None, + _preload_content=True, _request_timeout=None): + return self.request("DELETE", url, + headers=headers, + query_params=query_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def POST(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("POST", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PUT(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PUT", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + def PATCH(self, url, headers=None, query_params=None, post_params=None, + body=None, _preload_content=True, _request_timeout=None): + return self.request("PATCH", url, + headers=headers, + query_params=query_params, + post_params=post_params, + _preload_content=_preload_content, + _request_timeout=_request_timeout, + body=body) + + +class ApiException(Exception): + + def __init__(self, status=None, reason=None, http_resp=None): + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\n"\ + "Reason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format( + self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index c255a00d0216c2b944e54158de6910538f3da1ae..ff807f2aba6210d643e675e7e3dd91d7c3b30906 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -32,6 +32,8 @@ from hashlib import sha256 import caosdb as db from lxml import etree +import tempfile + def put_in_container(stuff): if isinstance(stuff, list): @@ -154,7 +156,9 @@ class UpdateCache(Cache): def __init__(self, db_file=None): if db_file is None: - db_file = "/tmp/crawler_update_cache.db" + tmppath = tempfile.gettempdir() + tmpf = os.path.join(tmppath, "crawler_update_cache.db") + db_file = tmpf super().__init__(db_file=db_file) @staticmethod @@ -171,13 +175,16 @@ class UpdateCache(Cache): return old_ones def insert(self, cont, run_id): - """ insert a pending, unauthorized update + """Insert a pending, unauthorized update - Parameters: - ----------- + + Parameters + ---------- cont: Container with the records to be updated containing the desired version, i.e. the state after the update. - run_id: the id of the crawler run + + run_id: int + The id of the crawler run """ cont = put_in_container(cont) old_ones = UpdateCache.get_previous_version(cont) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 8ce1dced48ba12e62717fe5bd788178e1e5a9488..4a9f955a17fc429deb6cdd10c3645700e579b4df 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -1,14 +1,13 @@ #!/usr/bin/env python # encoding: utf-8 # -# ** header v3.0 # This file is a part of the CaosDB Project. # # Copyright (C) 2018 Research Group Biomedical Physics, # Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# Copyright (C) 2019,2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2019-2022 IndiScale GmbH <info@indiscale.com> # Copyright (C) 2019,2020 Henrik tom Wörden -# Copyright (C) 2020 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2020-2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> # Copyright (C) 2021 University Medical Center Göttingen, Institute for Medical Informatics # Copyright (C) 2021 Florian Spreckelsen <florian.spreckelsen@med.uni-goettingen.de> # @@ -24,8 +23,6 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header """ Defines how something that shall be inserted into CaosDB is treated. CaosDB can automatically be filled with Records based on some structure, a file @@ -47,6 +44,7 @@ from abc import ABCMeta, abstractmethod from datetime import datetime import caosdb as db +from caosdb.common.models import Entity from caosdb.exceptions import (BadQueryError, EmptyUniqueQueryError, QueryNotUniqueError, TransactionError) @@ -152,13 +150,24 @@ fileguide = FileGuide() class AbstractCFood(object, metaclass=ABCMeta): + """ Abstract base class for Crawler food (CFood).""" def __init__(self, item): - """ Abstract base class for Crawler food (CFood).""" + """A CFood has two main methods which must be customized: + + 1. `create_identifiables` + This method defines (and inserts if necessary) the identifiables which may be updated at a + later stage. After calling this method, the `identifiables` Container contains those + Records which will be updated at a later time. + + 2. `update_identifiables` + This method updates the stored identifiables as necessary. + """ self.to_be_updated = db.Container() self.identifiables = db.Container() self.item = item self.attached_items = [] + self.update_flags = {} @abstractmethod def create_identifiables(self): @@ -298,7 +307,7 @@ class AbstractFileCFood(AbstractCFood): super().__init__(*args, item=crawled_path, **kwargs) self._crawled_file = None self.crawled_path = crawled_path - self.match = re.match(type(self).get_re(), crawled_path) + self.match = re.match(self.get_re(), crawled_path) self.attached_filenames = [] @property @@ -309,7 +318,31 @@ class AbstractFileCFood(AbstractCFood): return self._crawled_file @staticmethod - def get_re(): + def re_from_extensions(extensions): + """Return a regular expression which matches the given file extensions. + + Useful for inheriting classes. + + Parameters + ---------- + extensions : iterable<str> + An iterable with the allowed extensions. + + Returns + ------- + out : str + The regular expression, starting with ``.*\\.`` and ending with the EOL dollar + character. The actual extension will be accessible in the + :py:attribute:`pattern group name<python:re.Pattern.groupindexe>` ``ext``. + """ + + if not extensions: + return None + + return r".*\.(?P<ext>" + "|".join(extensions) + ")$" + + @classmethod + def get_re(cls): """ Returns the regular expression used to identify files that shall be processed @@ -355,7 +388,7 @@ class AbstractFileCFood(AbstractCFood): def assure_object_is_in_list(obj, containing_object, property_name, to_be_updated=None, datatype=None): """Checks whether `obj` is one of the values in the list property - `property_name` of the supplied entity containing_object`. + `property_name` of the supplied entity `containing_object`. If this is the case this function returns. Otherwise the entity is added to the property `property_name` and the entity @@ -377,12 +410,15 @@ def assure_object_is_in_list(obj, containing_object, property_name, if containing_object.get_property(property_name) is None: containing_object.add_property(property_name, value=[], datatype=datatype) + # TODO: case where multiple times the same property exists is not treated - if not isinstance(containing_object.get_property(property_name).value, list): - containing_object.get_property(property_name).value = [ - containing_object.get_property(property_name).value] - containing_object.get_property(property_name).datatype = datatype - current_list = containing_object.get_property(property_name).value + list_prop = containing_object.get_property(property_name) + if list_prop.value is None: + list_prop.value = [] + elif not isinstance(list_prop.value, list): + list_prop.value = [list_prop.value] + list_prop.datatype = datatype + current_list = list_prop.value if not isinstance(obj, list): objects = [obj] @@ -627,8 +663,20 @@ def assure_has_property(entity, name, value, to_be_updated=None, if isinstance(value, db.Entity): value = value.id + if isinstance(value, list): + value = [i.id if isinstance(i, db.Entity) else i for i in value] + for el in possible_properties: - if el.value == value: + tmp_value = el.value + + if isinstance(tmp_value, db.Entity): + tmp_value = el.value.id + + if isinstance(tmp_value, list): + tmp_value = [i.id if isinstance( + i, db.Entity) else i for i in tmp_value] + + if tmp_value == value: contained = True break @@ -762,10 +810,12 @@ class RowCFood(AbstractCFood): for key, value in self.item.iteritems(): if key in self.unique_cols: continue - rec.add_property(key, value) + assure_property_is(rec, key, + value, + to_be_updated=self.to_be_updated) -class CMeal(object): +class CMeal(): """ CMeal groups equivalent items and allow their collected insertion. @@ -793,12 +843,23 @@ class CMeal(object): matching_groups = [] def __init__(self): + self.item = None + # FIXME is this only necessary, because of inconsistent use of super().__init__()? + if "match" not in self.__dict__: + self.match = None self.__class__.existing_instances.append(self) + @staticmethod + def get_re(): + raise NotImplementedError("Subclasses must implement this function.") + @classmethod def all_groups_equal(cls, m1, m2): equal = True + if m2 is None: + return False + for group in cls.matching_groups: if (group not in m1.groupdict() or group not in m2.groupdict() or @@ -830,5 +891,5 @@ class CMeal(object): if match is None: return False - else: - return self.all_groups_equal(match, self.match) + + return self.all_groups_equal(match, self.match) diff --git a/src/caosadvancedtools/models/version.py b/src/caosadvancedtools/cfoods/__init__.py similarity index 65% rename from src/caosadvancedtools/models/version.py rename to src/caosadvancedtools/cfoods/__init__.py index 29c67c6877a6531adc0fe337d497e26d15825006..30ce05add09a223c2f65dbe187a6cfb1768d7a22 100644 --- a/src/caosadvancedtools/models/version.py +++ b/src/caosadvancedtools/cfoods/__init__.py @@ -1,9 +1,9 @@ -# -# ** header v3.0 +#!/usr/bin/env python3 + # This file is a part of the CaosDB Project. # -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# Copyright (C) 2020 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -17,16 +17,5 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -# THIS FILE IS GENERATED FROM SETUP.PY -short_version = '0.1.0' -version = '0.1.0' -full_version = '0.1.0.dev-Unknown' -git_revision = 'Unknown' -release = False -if not release: - version = full_version +"""Specialized CFoods.""" diff --git a/src/caosadvancedtools/cfoods/h5.py b/src/caosadvancedtools/cfoods/h5.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf9d0baefa435b71eeaeefe63a9b018faabe7ea --- /dev/null +++ b/src/caosadvancedtools/cfoods/h5.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020,2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021 Alexander Kreft +# Copyright (C) 2021 Laboratory for Fluid Physics and Biocomplexity, +# Max-Planck-Insitute für Dynamik und Selbstorganisation <www.lfpn.ds.mpg.de> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""A CFood for hdf5 files + + +This module allows to parse hdf5 files and reproduce their structure in form +of Records that reference each other. + +hdf5 files are composed of groups and datasets. Both of which can have +attributes. Groups and datasets are mapped to Records and attributes to +Properties. +""" + +import re +from copy import deepcopy + +import caosdb as db +import h5py +import numpy as np +from caosadvancedtools.cfood import fileguide +from caosdb.common.datatype import is_reference +from caosdb.common.utils import uuid + +from ..cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_property_is) +from ..structure_mapping import (EntityMapping, collect_existing_structure, + update_structure) + + +def h5_attr_to_property(val): + """ returns the value and datatype of a CaosDB Property for the given value + + + 1d arrays are converted to lists + If no suitable Property can be created (None, None) is returned. + + 2d and higher dimensionality arrays are being ignored. + """ + + if isinstance(val, str): + return val, db.TEXT + elif isinstance(val, complex): + return val, db.TEXT + else: + if not hasattr(val, 'dtype'): + raise NotImplementedError("Code assumes only str are missing the" + "dtype attribute") + + if issubclass(val.dtype.type, np.floating): + dtype = db.DOUBLE + elif issubclass(val.dtype.type, np.integer): + dtype = db.INTEGER + elif val.dtype.kind in ['S', 'U']: + dtype = db.TEXT + val = val.astype(str) + elif val.dtype.kind == 'O': + if not np.all([isinstance(el, str) for el in val]): + raise NotImplementedError("Cannot convert arbitrary objects") + dtype = db.TEXT + val = val.astype(str) + else: + raise NotImplementedError("Unknown dtype used") + + if isinstance(val, np.ndarray): + if val.ndim > 1: + return None, None + # The tolist method is on both numpy.ndarray and numpy.generic + # and properly converts scalars (including 0-dimensional + # numpy.ndarray) to Python scalars and 1D arrays to lists of + # Python scalars. + if val.ndim != 0: + dtype = db.LIST(dtype) + val = val.tolist() + + # TODO this can eventually be removed + + if(hasattr(val, 'ndim')): + if not isinstance(val, np.ndarray) and val.ndim != 0: + print(val, val.ndim) + raise Exception( + "Implementation assumes that only np.arrays have ndim.") + + return val, dtype + + +class H5CFood(AbstractFileCFood): + """ H5CFood which consumes a HDF5 file. + + The structure is mapped onto an equivalent structure of interconnected + Records. + + Attributes + ---------- + h5file : h5py.File, default None + Name of the hdf5-file to read + """ + + # to be overwritten by subclasses + + def __init__(self, *args, **kwargs): + """CFood which consumes HDF5 files.""" + super().__init__(*args, **kwargs) + self.h5file = None + self.identifiable_root = None + self.root_name = "root" + self.hdf5Container = db.Container() + self.em = EntityMapping() + + def collect_information(self): + self.h5file = h5py.File(fileguide.access(self.crawled_path), 'r') + + @staticmethod + def get_re(): + """Return a regular expression string to match *.h5, *.nc, *.hdf, *.hdf5.""" + extensions = [ + "h5", + "nc", + "hdf", + "hdf5", + ] + + return AbstractFileCFood.re_from_extensions(extensions) + + def create_identifiables(self): + """Create identifiables out of groups in the HDF5 file. + + This method will call is_identifiable(h5path, h5object) and create_identifiable(h5path, + h5object) on each HDF5 object to decide and actually create the identifiables. + """ + # manually create the identifiable root element: self.identifiable_root + self.structure = self.create_structure(self.h5file, + special_treatment=self.special_treatment, + root_name=self.root_name) + + def update_identifiables(self): + """Check if the identifiables need to be updated. + + In that case also add the updated entities to the list of updateables. + + This method will iterate over the groups and datasets governed by this CFood's identifiables + and call ``update_object(path, h5object)`` on each object. + + """ + + self.structure._cuid = "root element" + self.em.add(self.structure, self.identifiable_root) + collect_existing_structure(self.structure, self.identifiable_root, + self.em) + self.to_be_inserted = db.Container() + self.insert_missing_structure(self.structure) + + # TODO this is a workaround due to the fact that the caosdb library + # changes the objects in the Container if it is inserted. The graph + # structure is flattened. I.e. references to other entity objects are + # replaced with their IDs. However this code depends on this graph. + tmp_copy = deepcopy(self.to_be_inserted) + tmp_copy.insert() + + for e1, e2 in zip(tmp_copy, self.to_be_inserted): + e2.id = e1.id + # End workaround + + # self.update_structure(self.structure) + update_structure(self.em, self.to_be_updated, self.structure) + + def special_treatment(self, key, value, dtype): + """define special treatment of attributes + + to be overwritten by child classes. + + key: attribute name + value: attribute value + """ + + return key, value, dtype + + @classmethod + def create_structure(cls, h5obj, create_recordTypes=False, collection=None, + special_treatment=None, root_name="root"): + """Create Records and Record types from a given hdf5-object for all + items in the tree. Attributes are added as properties, the + values only if the dimension < 2. + + Parameters + ---------- + h5obj : h5py.File + a hdf5-file object + + root_name : name that is used instead of '/' + Type of the root Record (the Record corresponding to + the root node in the HDF5 file) + + Returns + ------- + rec : db.Container + Contains the Record Types, Records and Properties for the + input-tree + + """ + + if collection is None: + collection = [] + + if special_treatment is None: + def special_treatment(x, y, z): return x, y, z + + if h5obj.name == "/": + name_without_path = root_name + else: + name_without_path = h5obj.name.split("/")[-1] + + if create_recordTypes: + rec = db.RecordType(name=name_without_path) + else: + rec = db.Record().add_parent(name=name_without_path) + collection.append(rec) + + if isinstance(h5obj, h5py.Group): + for subgroup in h5obj.keys(): + subgroup_name = h5obj[subgroup].name.split("/")[-1] + + sub = H5CFood.create_structure(h5obj[subgroup], + create_recordTypes=create_recordTypes, + collection=collection, + special_treatment=special_treatment) + + if create_recordTypes: + rec.add_property(subgroup_name) + else: + rec.add_property(subgroup_name, value=sub) + + for key, val in h5obj.attrs.items(): + # ignored + + if key in ["REFERENCE_LIST", "DIMENSION_LIST", "NAME", "CLASS"]: + continue + + val, dtype = h5_attr_to_property(val) + + if val is None and dtype is None: + continue + + if create_recordTypes and key.lower() not in ['description']: + treated_k, _, treated_dtype = special_treatment( + key, val, dtype) + + if treated_k is not None: + prop = db.Property(name=treated_k, datatype=treated_dtype) + collection.append(prop) + rec.add_property(name=treated_k) + else: + treated_k, treated_v, treated_dtype = special_treatment( + key, val, dtype) + + if treated_k is not None: + rec.add_property(name=treated_k, value=treated_v, + datatype=treated_dtype) + + return rec + + def insert_missing_structure(self, target_structure: db.Record): + if target_structure._cuid not in self.em.to_existing: + self.to_be_inserted.append(target_structure) + + for prop in target_structure.get_properties(): + if prop.is_reference(server_retrieval=True): + self.insert_missing_structure(prop.value) diff --git a/src/caosadvancedtools/collect_datamodel.py b/src/caosadvancedtools/collect_datamodel.py index 1ca68068e713dd34ebc3368ad760461578dee4ef..806d15333cac7f745ce2fb82a02e0214ad2b6616 100644 --- a/src/caosadvancedtools/collect_datamodel.py +++ b/src/caosadvancedtools/collect_datamodel.py @@ -26,14 +26,19 @@ import argparse import os import caosdb as db +from caosdb.apiutils import retrieve_entities_with_ids + +from export_related import export def get_dm(): - rts = set([r.name for r in db.execute_query("SELECT name FROM RECORDTYPE")]) + rts = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM RECORDTYPE")]) if None in rts: rts.remove(None) - ps = set([r.name for r in db.execute_query("SELECT name FROM PROPERTY")]) + ps = set([(r.id, r.name) for r + in db.execute_query("SELECT name FROM PROPERTY")]) if None in ps: ps.remove(None) @@ -47,18 +52,26 @@ def get_parser(): "be stored") p.add_argument("-c", "--compare", help="directory where the datamodel that" " shall be compared is stored") + p.add_argument("-x", "--xml", action="store_true", + help="store xml as well") return p -def store(directory): +def store(directory, xml=False): rts, ps = get_dm() os.makedirs(directory, exist_ok=True) with open(os.path.join(directory, "recordtypes.txt"), "w") as fi: - fi.write(",".join(rts)) + fi.write(",".join([el[1] for el in rts])) with open(os.path.join(directory, "properties.txt"), "w") as fi: - fi.write(",".join(ps)) + fi.write(",".join([el[1] for el in ps])) + + if xml: + cont = retrieve_entities_with_ids( + [el[0] for el in rts]+[el[0] for el in ps]) + + export(cont, directory) def load_dm(directory): @@ -104,7 +117,7 @@ if __name__ == "__main__": args = p.parse_args() if args.store: - store(args.store) + store(args.store, xml=args.xml) if args.compare: compare(args.compare) diff --git a/src/caosadvancedtools/converter/labfolder_api.py b/src/caosadvancedtools/converter/labfolder_api.py index a29d965b1598285105a06871ee1017adfdf4e222..cf57c0155a3b3970834abb2fc1058215ef7ecba8 100644 --- a/src/caosadvancedtools/converter/labfolder_api.py +++ b/src/caosadvancedtools/converter/labfolder_api.py @@ -28,7 +28,7 @@ import time import html2text import caosdb as db -from labfolder.connection import configure_connection +from labfolder.connection import configure_connection # pylint: disable=import-error class Importer(object): diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 5dccdd8ce04daf6b6c15c676d195ce02c8d6ae12..0159688c7c7d59e779d576aed54b176e802fca85 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -56,6 +56,7 @@ from .datainconsistency import DataInconsistencyError from .datamodel_problems import DataModelProblems from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard +from .serverside.helper import send_mail as main_send_mail from .suppressKnown import SuppressKnown logger = logging.getLogger(__name__) @@ -65,6 +66,82 @@ def separated(text): return "-"*60 + "\n" + text +def apply_list_of_updates(to_be_updated, update_flags={}, + update_cache=None, run_id=None): + """Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB + after removing possible duplicates. If a chace is provided, uauthorized + updates can be cached for further authorization. + + Parameters: + ----------- + to_be_updated : db.Container + Container with the entities that will be updated. + update_flags : dict, optional + Dictionary of CaosDB server flags that will be used for the + update. Default is an empty dict. + update_cache : UpdateCache or None, optional + Cache in which the intended updates will be stored so they can be + authorized afterwards. Default is None. + run_id : String or None, optional + Id with which the pending updates are cached. Only meaningful if + `update_cache` is provided. Default is None. + """ + + if len(to_be_updated) == 0: + return + + get_ids_for_entities_with_names(to_be_updated) + + # remove duplicates + tmp = db.Container() + + for el in to_be_updated: + if el not in tmp: + tmp.append(el) + + to_be_updated = tmp + + info = "UPDATE: updating the following entities\n" + + baseurl = db.configuration.get_config()["Connection"]["url"] + + def make_clickable(txt, id): + return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt) + + for el in to_be_updated: + info += str("\t" + make_clickable(el.name, el.id) + if el.name is not None + else "\t" + make_clickable(str(el.id), el.id)) + info += "\n" + logger.info(info) + + logger.debug(to_be_updated) + try: + if len(to_be_updated) > 0: + logger.info( + "Updating {} Records...".format( + len(to_be_updated))) + guard.safe_update(to_be_updated, unique=False, + flags=update_flags) + except FileNotFoundError as e: + logger.info("Cannot access {}. However, it might be needed for" + " the correct execution".format(e.filename)) + except ProhibitedException: + try: + update_cache.insert(to_be_updated, run_id) + except IntegrityError as e: + logger.warning( + "There were problems with the update of {}.".format( + to_be_updated), + extra={"identifier": str(to_be_updated), + "category": "update-cache"} + ) + logger.debug(traceback.format_exc()) + logger.debug(e) + except Exception as e: + DataModelProblems.evaluate_exception(e) + + class Crawler(object): def __init__(self, cfood_types, use_cache=False, abort_on_exception=True, interactive=True, hideKnown=False, @@ -202,6 +279,8 @@ class Crawler(object): except DataInconsistencyError as e: logger.debug(traceback.format_exc()) logger.debug(e) + # TODO: Generally: in which cases should exceptions be raised? When is + # errors_occured set to True? The expected behavior must be documented. except Exception as e: try: DataModelProblems.evaluate_exception(e) @@ -310,16 +389,17 @@ class Crawler(object): if self.interactive and "y" != input("Do you want to continue? (y)"): return - logger.info("Inserting or updating Records...") - for cfood in cfoods: try: cfood.create_identifiables() - self._cached_find_or_insert_identifiables(cfood.identifiables) cfood.update_identifiables() - self.push_identifiables_to_CaosDB(cfood) + apply_list_of_updates( + cfood.to_be_updated, + cfood.update_flags, + update_cache=self.update_cache, + run_id=self.run_id) except FileNotFoundError as e: logger.info("Cannot access {}. However, it might be needed for" " the correct execution".format(e.filename)) @@ -502,7 +582,6 @@ carefully and if the changes are ok, click on the following link: """.format(url=caosdb_config["Connection"]["url"], filename=filename, changes="\n".join(changes)) - sendmail = caosdb_config["Misc"]["sendmail"] try: fro = caosdb_config["advancedtools"]["crawler.from_mail"] to = caosdb_config["advancedtools"]["crawler.to_mail"] @@ -512,58 +591,14 @@ carefully and if the changes are ok, click on the following link: "'from_mail' and 'to_mail'.") return - p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE) - p.communicate(input=text.encode()) - - def push_identifiables_to_CaosDB(self, cfood): - """ - Updates the to_be_updated Container, i.e. pushes the changes to CaosDB - """ - - if len(cfood.to_be_updated) == 0: - return - - get_ids_for_entities_with_names(cfood.to_be_updated) - - # remove duplicates - tmp = db.Container() - - for el in cfood.to_be_updated: - if el not in tmp: - tmp.append(el) - - cfood.to_be_updated = tmp - - info = "UPDATE: updating the following entities\n" - - for el in cfood.to_be_updated: - info += str("\t" + el.name if el.name is not None else "\t" + - str(el.id)) - info += "\n" - logger.info(info) - - logger.debug(cfood.to_be_updated) - try: - guard.safe_update(cfood.to_be_updated, unique=False) - except FileNotFoundError as e: - logger.info("Cannot access {}. However, it might be needed for" - " the correct execution".format(e.filename)) - except ProhibitedException: - try: - self.update_cache.insert(cfood.to_be_updated, self.run_id) - except IntegrityError as e: - logger.warning( - "There were problems with the update of {}.".format( - cfood.to_be_updated), - extra={"identifier": str(cfood.to_be_updated), - "category": "update-cache"} - ) - logger.debug(traceback.format_exc()) - logger.debug(e) - except Exception as e: - DataModelProblems.evaluate_exception(e) + main_send_mail( + from_addr=fro, + to=to, + subject="Crawler Update", + body=text) # TODO remove static? + @staticmethod def find_or_insert_identifiables(identifiables): """ Sets the ids of identifiables (that do not have already an id from the @@ -574,40 +609,51 @@ carefully and if the changes are ok, click on the following link: # looking for matching entities in CaosDB when there is no valid id # i.e. there was none set from a cache + existing = [] + inserted = [] + for ent in identifiables: if ent.id is None or ent.id < 0: logger.debug("Looking for: {}".format( ent.id if ent.id is not None else ent.name)) - existing = Crawler.find_existing(ent) + found = Crawler.find_existing(ent) - if existing is not None: - ent.id = existing.id + if found is not None: + ent.id = found.id else: logger.debug("Id is known of: {}".format(ent)) - # insert missing, i.e. those which are not valid - missing_identifiables = db.Container() - missing_identifiables.extend([ent for ent in identifiables - if ent.id is None or ent.id < 0]) - # TODO the following should not be necessary. Fix it - - for ent in missing_identifiables: - ent.id = None + # insert missing, i.e. those which are not valid + if ent.id is None or ent.id < 0: + missing = ent + ent.id = None + else: + missing = None + existing.append(ent) - if len(missing_identifiables) > 0: - info = "Going to insert the following entities:\n" + if missing: + try: + guard.safe_insert(missing, unique=False, + flags={"force-missing-obligatory": "ignore"}) + inserted.append(ent) + except Exception as e: + DataModelProblems.evaluate_exception(e) + if len(existing) > 0: + info = "Identified the following existing entities:\n" - for ent in missing_identifiables: + for ent in existing: info += str(ent)+"\n" logger.debug(info) + else: + logger.debug("Did not identify any existing entities") + if len(inserted) > 0: + info = "Inserted the following entities:\n" - if len(missing_identifiables) == 0: - logger.debug("No new entities to be inserted.") + for ent in inserted: + info += str(ent)+"\n" + logger.debug(info) else: - try: - guard.safe_insert(missing_identifiables, unique=False) - except Exception as e: - DataModelProblems.evaluate_exception(e) + logger.debug("Did not insert any new entities") logger.debug("Retrieving entities from CaosDB...") identifiables.retrieve(unique=True, raise_exception_on_error=False) @@ -623,6 +669,10 @@ carefully and if the changes are ok, click on the following link: raise ValueError("The identifiable must have at least one parent.") query_string = "FIND Record " + ident.get_parents()[0].name query_string += " WITH " + if ident.name is None and len(ident.get_properties()) == 0: + raise ValueError( + "The identifiable must have features to identify it.") + if ident.name is not None: query_string += "name='{}' AND".format(ident.name) @@ -683,8 +733,8 @@ class FileCrawler(Crawler): @staticmethod def query_files(path): - query_str = "FIND FILE WHICH IS STORED AT " + ( - path if path.endswith("/") else path + "/") + "**" + query_str = "FIND FILE WHICH IS STORED AT '" + ( + path if path.endswith("/") else path + "/") + "**'" q_info = "Sending the following query: '" + query_str + "'\n" files = db.execute_query(query_str) logger.info( diff --git a/src/caosadvancedtools/example_cfood.py b/src/caosadvancedtools/example_cfood.py index 6111d95defc37bbb6d836feec3fa3d2e4e3d91ab..2e395d5c3030508087e25a7156d35c8954d223d7 100644 --- a/src/caosadvancedtools/example_cfood.py +++ b/src/caosadvancedtools/example_cfood.py @@ -26,8 +26,8 @@ from .cfood import AbstractFileCFood, assure_has_property class ExampleCFood(AbstractFileCFood): - @staticmethod - def get_re(): + @classmethod + def get_re(cls): return (r".*/(?P<species>[^/]+)/" r"(?P<date>\d{4}-\d{2}-\d{2})/README.md") diff --git a/src/caosadvancedtools/export_related.py b/src/caosadvancedtools/export_related.py index 47fe2f4900add818e940fa81466bb9c98a2f0223..69b588c34cc7c8123ab4291f6d8f76f06e7400be 100755 --- a/src/caosadvancedtools/export_related.py +++ b/src/caosadvancedtools/export_related.py @@ -47,6 +47,9 @@ def get_ids_of_related_entities(entity): """ entities = [] + if isinstance(entity, int): + entity = db.Entity(id=entity).retrieve() + for par in entity.parents: entities.append(par.id) @@ -76,32 +79,32 @@ def recursively_collect_related(entity): """ all_entities = db.Container() all_entities.append(entity) - ids = set([entity.id]) - new_entities = [entity] + ids = set() + new_ids = set([entity.id]) - while new_entities: - new_ids = set() + while new_ids: + ids.update(new_ids) - for ent in new_entities: - new_ids.update(get_ids_of_related_entities(ent)) + for eid in list(new_ids): + new_ids.update(get_ids_of_related_entities(eid)) new_ids = new_ids - ids - new_entities = retrieve_entities_with_ids(list(new_ids)) - ids.update([e.id for e in new_entities]) - all_entities.extend(new_entities) - return all_entities + return retrieve_entities_with_ids(list(ids)) def invert_ids(entities): apply_to_ids(entities, lambda x: x*-1) -def export(rec_id, directory="."): +def export_related_to(rec_id, directory="."): if not isinstance(rec_id, int): raise ValueError("rec_id needs to be an integer") ent = db.execute_query("FIND {}".format(rec_id), unique=True) cont = recursively_collect_related(ent) + export(cont, directory=directory) + +def export(cont, directory="."): directory = os.path.abspath(directory) dl_dir = os.path.join(directory, "downloads") @@ -119,6 +122,9 @@ def export(rec_id, directory="."): print("Failed download of:", target) invert_ids(cont) + + for el in cont: + el.version = None xml = etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) @@ -147,4 +153,4 @@ if __name__ == "__main__": parser = defineParser() args = parser.parse_args() - export(args.id, directory=args.directory) + export_related_to(args.id, directory=args.directory) diff --git a/src/caosadvancedtools/import_from_xml.py b/src/caosadvancedtools/import_from_xml.py index 0bf9b1c0cbb478bb75687f9f3e41ca2d4960d2c0..9d0e03f649db771147915740cabf201fae910760 100755 --- a/src/caosadvancedtools/import_from_xml.py +++ b/src/caosadvancedtools/import_from_xml.py @@ -57,7 +57,7 @@ def import_xml(filename, rerun=False, interactive=True): tmpfile = create_dummy_file() model = [] - files = [] + files = {} # add files to files list and properties and record types to model @@ -70,19 +70,19 @@ def import_xml(filename, rerun=False, interactive=True): el.file = target else: el.file = tmpfile - files.append(el) + files[el.path] = el if (isinstance(el, db.Property) or isinstance(el, db.RecordType)): model.append(el) # remove entities of the model from the container - for el in model+files: + for el in model+list(files.values()): cont.remove(el) id_mapping = {} - for el in model+files: + for el in model+list(files.values()): id_mapping[el.id] = el # insert/update the model @@ -93,10 +93,10 @@ def import_xml(filename, rerun=False, interactive=True): # insert files if not rerun: - for _, el in enumerate(files): + for _, el in enumerate(files.values()): r = el.insert(unique=False) else: - for _, el in enumerate(files): + for _, el in enumerate(files.values()): el.id = None el.retrieve() diff --git a/src/caosadvancedtools/models/__init__.py b/src/caosadvancedtools/models/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d70cb810488ba846e5311cbb50991c3eb32bdfad 100644 --- a/src/caosadvancedtools/models/__init__.py +++ b/src/caosadvancedtools/models/__init__.py @@ -0,0 +1,2 @@ +"""Submodule for working with data models. +""" diff --git a/src/caosadvancedtools/models/data_model.py b/src/caosadvancedtools/models/data_model.py index f4fd7c7e311d6e0d798bac5054b5614f8525ae83..d9079e6196b4751ca86ba41275108330b946d57c 100644 --- a/src/caosadvancedtools/models/data_model.py +++ b/src/caosadvancedtools/models/data_model.py @@ -23,11 +23,23 @@ # ** end header # from copy import deepcopy +# TODO(fspreck) for backwards compatibility with Python < 3.9 but this is +# actually +# [deprecated](https://docs.python.org/3/library/typing.html#typing.List), so +# remove this, when we drop support for old Python versions. +from typing import List import caosdb as db from caosdb.apiutils import compare_entities, describe_diff +CAOSDB_INTERNAL_PROPERTIES = [ + "description", + "name", + "unit", +] + + class DataModel(dict): """Provides tools for managing a data model. @@ -68,14 +80,14 @@ class DataModel(dict): else: super().__init__(args) - def append(self, entity): + def append(self, entity: db.Entity): self[entity.name] = entity - def extend(self, entities): + def extend(self, entities: List[db.Entity]): for entity in entities: self.append(entity) - def sync_data_model(self, noquestion=False): + def sync_data_model(self, noquestion: bool = False, verbose: bool = True): """Synchronize this DataModel with a CaosDB instance. Updates existing entities from the CaosDB instance and inserts @@ -100,44 +112,57 @@ class DataModel(dict): self.sync_ids_by_name(tmp_exist) if len(non_existing_entities) > 0: - print("New entities:") + if verbose: + print("New entities:") - for ent in non_existing_entities: - print(ent.name) + for ent in non_existing_entities: + print(ent.name) if noquestion or str(input("Do you really want to insert those " - "entities? [y] ")).lower() == "y": + "entities? [y/N] ")).lower() == "y": non_existing_entities.insert() self.sync_ids_by_name(non_existing_entities) - print("Updated entities.") + if verbose: + print("Updated entities.") else: return else: - print("No new entities.") + if verbose: + print("No new entities.") if len(existing_entities) > 0: - print("Inspecting changes that will be made...") + if verbose: + print("Inspecting changes that will be made...") any_change = False for ent in existing_entities: - q = db.Query("FIND * with id={}".format(ent.id)) - ref = q.execute(unique=True) + if ent.name in CAOSDB_INTERNAL_PROPERTIES: + # Workaround for the usage of internal properties like name + # in via the extern keyword: + ref = db.Property(name=ent.name).retrieve() + else: + query = db.Query(f"FIND * with id={ent.id}") + ref = query.execute(unique=True) diff = (describe_diff(*compare_entities(ent, ref ), name=ent.name)) if diff != "": - print(diff) + if verbose: + print(diff) any_change = True if any_change: if noquestion or input("Do you really want to apply the above " - "changes? [y]") == "y": + "changes? [y/N]") == "y": existing_entities.update() - print("Synchronized existing entities.") + if verbose: + print("Synchronized existing entities.") else: - print("No differences found. No update") + if verbose: + print("No differences found. No update") else: - print("No existing entities updated.") + if verbose: + print("No existing entities updated.") @staticmethod def get_existing_entities(entities): @@ -171,9 +196,7 @@ class DataModel(dict): A iterable with entities. names : iterable of str Only entities which do *not* have one of these names will end up in - the - - returned iterable. + the returned iterable. Returns ------- diff --git a/src/caosadvancedtools/models/parser.py b/src/caosadvancedtools/models/parser.py index d2fbf506a6f1435481ab25de29e664722f71c46a..ad149222b5b90671a50943dc00bc9de8074a42f1 100644 --- a/src/caosadvancedtools/models/parser.py +++ b/src/caosadvancedtools/models/parser.py @@ -1,5 +1,24 @@ +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + """ -This script provides the a function to read a DataModel from a yaml file. +This module (and script) provides methods to read a DataModel from a YAML file. If a file name is passed to parse_model_from_yaml it is parsed and a DataModel is created. The yaml file needs to be structured in a certain way which will be @@ -13,20 +32,26 @@ and will be added with the respective importance. These properties can be RecordTypes or Properties and can be defined right there. Every Property or RecordType only needs to be defined once anywhere. When it is not defined, simply the name can be supplied with no value. -Parents can be provided under the inherit_from_xxxx keywords. The value needs +Parents can be provided under the 'inherit_from_xxxx' keywords. The value needs to be a list with the names. Here, NO NEW entities can be defined. """ +import json import re import sys +import yaml +from typing import List + +import jsonschema import caosdb as db -import yaml -from .data_model import DataModel +from .data_model import CAOSDB_INTERNAL_PROPERTIES, DataModel -KEYWORDS = ["parent", +# Keywords which are allowed in data model descriptions. +KEYWORDS = ["parent", # deprecated, use inherit_from_* instead: + # https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/36 "importance", - "datatype", + "datatype", # for example TEXT, INTEGER or REFERENCE "unit", "description", "recommended_properties", @@ -34,16 +59,46 @@ KEYWORDS = ["parent", "suggested_properties", "inherit_from_recommended", "inherit_from_suggested", - "inherit_from_obligatory", ] + "inherit_from_obligatory", + "role", + "value", + ] +# TODO: check whether it's really ignored # These KEYWORDS are not forbidden as properties, but merely ignored. KEYWORDS_IGNORED = [ "unit", ] +JSON_SCHEMA_ATOMIC_TYPES = [ + "string", + "boolean", + "integer", + "number" +] + + +def _get_listdatatype(dtype): + """matches a string to check whether the type definition is a list + + returns the type within the list or None, if it cannot be matched with a + list definition + """ + # TODO: string representation should be the same as used by the server: + # e.g. LIST<TEXT> + # this should be changed in the module and the old behavour should be + # marked as depricated + match = re.match(r"^LIST[(<](?P<dt>.*)[)>]$", dtype) + + if match is None: + return None + else: + return match.group("dt") # Taken from https://stackoverflow.com/a/53647080, CC-BY-SA, 2018 by # https://stackoverflow.com/users/2572431/augurar + + class SafeLineLoader(yaml.SafeLoader): """Load a line and keep meta-information. @@ -55,6 +110,7 @@ class SafeLineLoader(yaml.SafeLoader): mapping = super().construct_mapping(node, deep=deep) # Add 1 so line numbering starts at 1 mapping['__line__'] = node.start_mark.line + 1 + return mapping # End of https://stackoverflow.com/a/53647080 @@ -72,20 +128,62 @@ class YamlDefinitionError(RuntimeError): super().__init__(template.format(line)) +class JsonSchemaDefinitionError(RuntimeError): + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + def __init__(self, msg): + super().__init__(msg) + + def parse_model_from_yaml(filename): """Shortcut if the Parser object is not needed.""" parser = Parser() + return parser.parse_model_from_yaml(filename) def parse_model_from_string(string): """Shortcut if the Parser object is not needed.""" parser = Parser() + return parser.parse_model_from_string(string) +def parse_model_from_json_schema(filename: str): + """Return a datamodel parsed from a json schema definition. + + Parameters + ---------- + filename : str + The path of the json schema file that is to be parsed + + Returns + ------- + out : Datamodel + The datamodel generated from the input schema which then can be used for + synchronizing with CaosDB. + + Note + ---- + This is an experimental feature, see ``JsonSchemaParser`` for information + about the limitations of the current implementation. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Daniel Hornung 2022-02-18 + parser = JsonSchemaParser() + + return parser.parse_model_from_json_schema(filename) + + class Parser(object): def __init__(self): + """Initialize an empty parser object and initialize the dictionary of entities and the list of + treated elements. + + """ self.model = {} self.treated = [] @@ -95,15 +193,16 @@ class Parser(object): Parameters ---------- filename : str - The path to the YAML file. + The path to the YAML file. Returns ------- out : DataModel - The created DataModel + The created DataModel """ with open(filename, 'r') as outfile: ymlmodel = yaml.load(outfile, Loader=SafeLineLoader) + return self._create_model_from_dict(ymlmodel) def parse_model_from_string(self, string): @@ -112,14 +211,15 @@ class Parser(object): Parameters ---------- string : str - The YAML string. + The YAML string. Returns ------- out : DataModel - The created DataModel + The created DataModel """ ymlmodel = yaml.load(string, Loader=SafeLineLoader) + return self._create_model_from_dict(ymlmodel) def _create_model_from_dict(self, ymlmodel): @@ -128,12 +228,12 @@ class Parser(object): Parameters ---------- ymlmodel : dict - The dictionary parsed from a YAML file. + The dictionary parsed from a YAML file. Returns ------- out : DataModel - The created DataModel + The created DataModel """ if not isinstance(ymlmodel, dict): @@ -147,17 +247,19 @@ class Parser(object): # a record type with the name of the element. # The retrieved entity will be added to the model. # If no entity with that name is found an exception is raised. + if "extern" not in ymlmodel: ymlmodel["extern"] = [] for name in ymlmodel["extern"]: - if db.execute_query("COUNT Property {}".format(name)) > 0: - self.model[name] = db.execute_query( - "FIND Property WITH name={}".format(name), unique=True) - - elif db.execute_query("COUNT RecordType {}".format(name)) > 0: - self.model[name] = db.execute_query( - "FIND RecordType WITH name={}".format(name), unique=True) + if name in CAOSDB_INTERNAL_PROPERTIES: + self.model[name] = db.Property(name=name).retrieve() + continue + for role in ("Property", "RecordType", "Record", "File"): + if db.execute_query("COUNT {} {}".format(role, name)) > 0: + self.model[name] = db.execute_query( + "FIND {} WITH name={}".format(role, name), unique=True) + break else: raise Exception("Did not find {}".format(name)) @@ -169,7 +271,7 @@ class Parser(object): self._add_entity_to_model(name, entity) # initialize recordtypes self._set_recordtypes() - self._check_datatypes() + self._check_and_convert_datatypes() for name, entity in ymlmodel.items(): self._treat_entity(name, entity, line=ymlmodel["__line__"]) @@ -185,31 +287,38 @@ class Parser(object): Parameters ---------- name : - The value to be converted to a string. + The value to be converted to a string. context : obj - Will be printed in the case of warnings. + Will be printed in the case of warnings. Returns ------- out : str - If `name` was a string, return it. Else return str(`name`). + If `name` was a string, return it. Else return str(`name`). """ + if name is None: print("WARNING: Name of this context is None: {}".format(context), file=sys.stderr) + if not isinstance(name, str): name = str(name) + return name def _add_entity_to_model(self, name, definition): """ adds names of Properties and RecordTypes to the model dictionary Properties are also initialized. + + name is the key of the yaml element and definition the value. """ + if name == "__line__": return name = self._stringify(name) + if name not in self.model: self.model[name] = None @@ -220,20 +329,42 @@ class Parser(object): and isinstance(definition, dict) # is it a property and "datatype" in definition - # but not a list - and not definition["datatype"].startswith("LIST")): + # but not simply an RT of the model + and not (_get_listdatatype(definition["datatype"]) == name and + _get_listdatatype(definition["datatype"]) in self.model)): # and create the new property self.model[name] = db.Property(name=name, datatype=definition["datatype"]) + elif (self.model[name] is None and isinstance(definition, dict) + and "role" in definition): + if definition["role"] == "RecordType": + self.model[name] = db.RecordType(name=name) + elif definition["role"] == "Record": + self.model[name] = db.Record(name=name) + elif definition["role"] == "File": + # TODO(fspreck) Implement files at some later point in time + raise NotImplementedError( + "The definition of file objects is not yet implemented.") + + # self.model[name] = db.File(name=name) + elif definition["role"] == "Property": + self.model[name] = db.Property(name=name) + else: + raise RuntimeError("Unknown role {} in definition of entity.".format( + definition["role"])) - # add other definitions recursively + # for setting values of properties directly: + if not isinstance(definition, dict): + return + # add other definitions recursively for prop_type in ["recommended_properties", "suggested_properties", "obligatory_properties"]: if prop_type in definition: # Empty property mapping should be allowed. + if definition[prop_type] is None: definition[prop_type] = {} try: @@ -244,36 +375,66 @@ class Parser(object): except AttributeError as ate: if ate.args[0].endswith("'items'"): line = definition["__line__"] + if isinstance(definition[prop_type], list): line = definition[prop_type][0]["__line__"] raise YamlDefinitionError(line) from None raise def _add_to_recordtype(self, ent_name, props, importance): - """Add properties to a RecordType.""" + """Add properties to a RecordType. + + Parameters + ---------- + ent_name : str + The name of the entity to which the properties shall be added. + + props : dict [str -> dict or :doc:`Entity`] + The properties, indexed by their names. Properties may be given as :doc:`Entity` objects + or as dictionaries. + + importance + The importance as used in :doc:`Entity.add_property`. + + Returns + ------- + None + + """ + for n, e in props.items(): if n in KEYWORDS: if n in KEYWORDS_IGNORED: continue raise YamlDefinitionError("Unexpected keyword in line {}: {}".format( props["__line__"], n)) + if n == "__line__": continue n = self._stringify(n) - if isinstance(e, dict) and "datatype" in e and e["datatype"].startswith("LIST"): - match = re.match(r"LIST[(](.*)[)]", e["datatype"]) + if isinstance(e, dict): + if "datatype" in e and _get_listdatatype(e["datatype"]) is not None: + # Reuse the existing datatype for lists. + datatype = db.LIST(_get_listdatatype(e["datatype"])) + else: + # Ignore a possible e["datatype"] here if it's not a list + # since it has been treated in the definition of the + # property (entity) already + datatype = None + if "value" in e: + value = e["value"] + else: + value = None - if match is None: - raise ValueError("List datatype definition is wrong") - dt = db.LIST(match.group(1)) - self.model[ent_name].add_property(name=n, - importance=importance, - datatype=dt - ) else: - self.model[ent_name].add_property(name=n, - importance=importance) + value = e + datatype = None + + self.model[ent_name].add_property(name=n, + value=value, + importance=importance, + datatype=datatype) def _inherit(self, name, prop, inheritance): if not isinstance(prop, list): @@ -287,6 +448,7 @@ class Parser(object): def _treat_entity(self, name, definition, line=None): """Parse the definition and the information to the entity.""" + if name == "__line__": return name = self._stringify(name) @@ -295,6 +457,10 @@ class Parser(object): if definition is None: return + # for setting values of properties directly: + if not isinstance(definition, dict): + return + if ("datatype" in definition and definition["datatype"].startswith("LIST")): @@ -311,23 +477,29 @@ class Parser(object): if prop_name == "unit": self.model[name].unit = prop + elif prop_name == "value": + self.model[name].value = prop + elif prop_name == "description": self.model[name].description = prop elif prop_name == "recommended_properties": - self._add_to_recordtype(name, prop, importance=db.RECOMMENDED) + self._add_to_recordtype( + name, prop, importance=db.RECOMMENDED) for n, e in prop.items(): self._treat_entity(n, e) elif prop_name == "obligatory_properties": - self._add_to_recordtype(name, prop, importance=db.OBLIGATORY) + self._add_to_recordtype( + name, prop, importance=db.OBLIGATORY) for n, e in prop.items(): self._treat_entity(n, e) elif prop_name == "suggested_properties": - self._add_to_recordtype(name, prop, importance=db.SUGGESTED) + self._add_to_recordtype( + name, prop, importance=db.SUGGESTED) for n, e in prop.items(): self._treat_entity(n, e) @@ -336,6 +508,10 @@ class Parser(object): elif prop_name == "datatype": continue + # role has already been used + elif prop_name == "role": + continue + elif prop_name == "inherit_from_obligatory": self._inherit(name, prop, db.OBLIGATORY) elif prop_name == "inherit_from_recommended": @@ -353,21 +529,53 @@ class Parser(object): raise e self.treated.append(name) - def _check_datatypes(self): + def _check_and_convert_datatypes(self): """ checks if datatype is valid. - datatype of properties is simply initialized with string. Here over - properties is iterated and datatype is corrected. """ + datatype of properties is simply initialized with string. Here, we + iterate over properties and check whether it is a base datatype of a + name that was defined in the model (or extern part) + + the string representations are replaced with caosdb objects + + """ for key, value in self.model.items(): + if isinstance(value, db.Property): - if value.datatype in self.model: - value.datatype = self.model[value.datatype] - else: - # get the datatype - try: - value.datatype = db.__getattribute__(value.datatype) - except AttributeError: - raise ValueError("Unknown Datatype.") + dtype = value.datatype + is_list = False + + if _get_listdatatype(value.datatype) is not None: + dtype = _get_listdatatype(value.datatype) + is_list = True + + if dtype in self.model: + if is_list: + value.datatype = db.LIST(self.model[dtype]) + else: + value.datatype = self.model[dtype] + + continue + + if dtype in [db.DOUBLE, + db.REFERENCE, + db.TEXT, + db.DATETIME, + db.INTEGER, + db.FILE, + db.BOOLEAN]: + + if is_list: + value.datatype = db.LIST(db.__getattribute__( # pylint: disable=no-member + dtype)) + else: + value.datatype = db.__getattribute__( # pylint: disable=no-member + dtype) + + continue + + raise ValueError("Property {} has an unknown datatype: {}".format( + value.name, value.datatype)) def _set_recordtypes(self): """ properties are defined in first iteration; set remaining as RTs """ @@ -377,6 +585,216 @@ class Parser(object): self.model[key] = db.RecordType(name=key) +class JsonSchemaParser(Parser): + """Extends the yaml parser to read in datamodels defined in a json schema. + + **EXPERIMENTAL:** While this calss can already be used to create data models + from basic json schemas, there are the following limitations and missing + features: + + * Due to limitations of json-schema itself, we currently do not support + inheritance in the imported data models + * The same goes for suggested properties of RecordTypes + * Currently, ``$defs`` and ``$ref`` in the input schema are not resolved. + * Already defined RecordTypes and (scalar) Properties can't be re-used as + list properties + * Reference properties that are different from the referenced RT. (Although + this is possible for list of references) + * Values + * Roles + * The extern keyword from the yaml parser + * Currently, a json-schema cannot be transformed into a data model if its + root element isn't a RecordType (or Property) with ``title`` and ``type``. + + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + + def parse_model_from_json_schema(self, filename: str): + """Return a datamodel created from the definition in the json schema in + `filename`. + + Parameters + ---------- + filename : str + The path to the json-schema file containing the datamodel definition + + Returns + ------- + out : DataModel + The created DataModel + """ + # @author Florian Spreckelsen + # @date 2022-02-17 + # @review Timm Fitschen 2022-02-30 + with open(filename, 'r') as schema_file: + model_dict = json.load(schema_file) + + return self._create_model_from_dict(model_dict) + + def _create_model_from_dict(self, model_dict: [dict, List[dict]]): + """Parse a dictionary and return the Datamodel created from it. + + The dictionary was typically created from the model definition in a json schema file. + + Parameters + ---------- + model_dict : dict or list[dict] + One or several dictionaries read in from a json-schema file + + Returns + ------- + our : DataModel + The datamodel defined in `model_dict` + """ + # @review Timm Fitschen 2022-02-30 + if isinstance(model_dict, dict): + model_dict = [model_dict] + + for ii, elt in enumerate(model_dict): + if "title" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `title` key word") + if "type" not in elt: + raise JsonSchemaDefinitionError( + f"Object {ii+1} is lacking the `type` key word") + # Check if this is a valid Json Schema + try: + jsonschema.Draft202012Validator.check_schema(elt) + except jsonschema.SchemaError as err: + raise JsonSchemaDefinitionError( + f"Json Schema error in {elt['title']}:\n{str(err)}") from err + name = self._stringify(elt["title"], context=elt) + self._treat_element(elt, name) + + return DataModel(self.model.values()) + + def _get_atomic_datatype(self, elt): + # @review Timm Fitschen 2022-02-30 + if elt["type"] == "string": + if "format" in elt and elt["format"] in ["date", "date-time"]: + return db.DATETIME + else: + return db.TEXT + elif elt["type"] == "integer": + return db.INTEGER + elif elt["type"] == "number": + return db.DOUBLE + elif elt["type"] == "boolean": + return db.BOOLEAN + else: + raise JsonSchemaDefinitionError(f"Unkown atomic type in {elt}.") + + def _treat_element(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + force_list = False + if name in self.model: + return self.model[name], force_list + if "type" not in elt: + # Each element must have a specific type + raise JsonSchemaDefinitionError( + f"`type` is missing in element {name}.") + if name == "name": + # This is identified with the CaosDB name property as long as the + # type is correct. + if not elt["type"] == "string": + raise JsonSchemaDefinitionError( + "The 'name' property must be string-typed, otherwise it cannot " + "be identified with CaosDB's name property." + ) + return None, force_list + if "enum" in elt: + ent = self._treat_enum(elt, name) + elif elt["type"] in JSON_SCHEMA_ATOMIC_TYPES: + ent = db.Property( + name=name, datatype=self._get_atomic_datatype(elt)) + elif elt["type"] == "object": + ent = self._treat_record_type(elt, name) + elif elt["type"] == "array": + ent, force_list = self._treat_list(elt, name) + else: + raise NotImplementedError( + f"Cannot parse items of type '{elt['type']}' (yet).") + if "description" in elt and ent.description is None: + # There is a description and it hasn't been set by another + # treat_something function + ent.description = elt["description"] + + self.model[name] = ent + return ent, force_list + + def _treat_record_type(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + rt = db.RecordType(name=name) + if "required" in elt: + required = elt["required"] + else: + required = [] + if "properties" in elt: + for key, prop in elt["properties"].items(): + if "title" in prop: + name = self._stringify(prop["title"]) + else: + name = self._stringify(key) + prop_ent, force_list = self._treat_element(prop, name) + if prop_ent is None: + # Nothing to be appended since the property has to be + # treated specially. + continue + importance = db.OBLIGATORY if key in required else db.RECOMMENDED + if not force_list: + rt.add_property(prop_ent, importance=importance) + else: + # Special case of rt used as a list property + rt.add_property(prop_ent, importance=importance, + datatype=db.LIST(prop_ent)) + + if "description" in elt: + rt.description = elt["description"] + return rt + + def _treat_enum(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + if "type" in elt and elt["type"] == "integer": + raise NotImplementedError( + "Integer-enums are not allowd until " + "https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 " + "has been fixed." + ) + rt = db.RecordType(name=name) + for enum_elt in elt["enum"]: + rec = db.Record(name=self._stringify(enum_elt)) + rec.add_parent(rt) + self.model[enum_elt] = rec + + return rt + + def _treat_list(self, elt: dict, name: str): + # @review Timm Fitschen 2022-02-30 + + if "items" not in elt: + raise JsonSchemaDefinitionError( + f"The definition of the list items is missing in {elt}.") + items = elt["items"] + if "enum" in items: + return self._treat_enum(items, name), True + if items["type"] in JSON_SCHEMA_ATOMIC_TYPES: + datatype = db.LIST(self._get_atomic_datatype(items)) + return db.Property(name=name, datatype=datatype), False + if items["type"] == "object": + if "title" not in items or self._stringify(items["title"]) == name: + # Property is RecordType + return self._treat_record_type(items, name), True + else: + # List property will be an entity of its own with a name + # different from the referenced RT + ref_rt = self._treat_record_type( + items, self._stringify(items["title"])) + self.model[ref_rt.name] = ref_rt + return db.Property(name=name, datatype=db.LIST(ref_rt)), False + + if __name__ == "__main__": model = parse_model_from_yaml('data_model.yml') print(model) diff --git a/src/caosadvancedtools/pandoc_header_tools.py b/src/caosadvancedtools/pandoc_header_tools.py index 262defd2e46ea1a6fbe80ab6c476bb8f311cc9a5..e746a26ac19c00de4ee7785399ef98478472340c 100644 --- a/src/caosadvancedtools/pandoc_header_tools.py +++ b/src/caosadvancedtools/pandoc_header_tools.py @@ -136,10 +136,10 @@ it is not at the beginning, it must be preceded by a blank line. # If a header section was found: if state == 2: headerlines = [] - for l in textlines[found_1:found_2]: - l = l.replace("\t", " ") - l = l.rstrip() - headerlines.append(l) + for line in textlines[found_1:found_2]: + line = line.replace("\t", " ") + line = line.rstrip() + headerlines.append(line) # try: try: yaml_part = yaml.load("\n".join(headerlines), Loader=yaml.BaseLoader) @@ -156,7 +156,7 @@ it is not at the beginning, it must be preceded by a blank line. else: print("Adding header in: {fn}".format(fn=filename)) add_header(filename) - return _get_header(filename) + return get_header(filename) def save_header(filename, header_data): diff --git a/src/caosadvancedtools/scifolder/__init__.py b/src/caosadvancedtools/scifolder/__init__.py index d7d67937b42ca23173fc93d4e704411f33d80bc4..cf753cfc0b72bf95e34edea1301b96ed18f040d0 100644 --- a/src/caosadvancedtools/scifolder/__init__.py +++ b/src/caosadvancedtools/scifolder/__init__.py @@ -3,3 +3,4 @@ from .experiment_cfood import ExperimentCFood from .publication_cfood import PublicationCFood from .simulation_cfood import SimulationCFood from .software_cfood import SoftwareCFood +from .result_table_cfood import ResultTableCFood diff --git a/src/caosadvancedtools/scifolder/experiment_cfood.py b/src/caosadvancedtools/scifolder/experiment_cfood.py index 540f9dac854968071deafadac253ef1e373e5cfb..c3c8a18031b81f945a55504653f89b3577fcf0bf 100644 --- a/src/caosadvancedtools/scifolder/experiment_cfood.py +++ b/src/caosadvancedtools/scifolder/experiment_cfood.py @@ -84,7 +84,7 @@ class ExperimentCFood(AbstractFileCFood, WithREADME): self.experiment, self.project = ( ExperimentCFood.create_identifiable_experiment(self.match)) - self.identifiables.extend([self.experiment, self.project]) + self.identifiables.extend([self.project, self.experiment]) self.people = parse_responsibles(self.header) self.identifiables.extend(self.people) diff --git a/src/caosadvancedtools/scifolder/result_table_cfood.py b/src/caosadvancedtools/scifolder/result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..deaa2d00118659a9b177a05fe40b19a1793a16fb --- /dev/null +++ b/src/caosadvancedtools/scifolder/result_table_cfood.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright (C) 2019 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import re + +import caosdb as db +import pandas as pd +from caosadvancedtools.cfood import (AbstractFileCFood, assure_has_description, + assure_has_parent, assure_has_property, + assure_object_is_in_list, get_entity) +from caosadvancedtools.read_md_header import get_header + +from ..cfood import assure_property_is, fileguide +from .experiment_cfood import ExperimentCFood +from .generic_pattern import date_pattern, date_suffix_pattern, project_pattern +from .utils import parse_responsibles, reference_records_corresponding_to_files +from .withreadme import DATAMODEL as dm +from .withreadme import RESULTS, REVISIONOF, SCRIPTS, WithREADME, get_glob + + +# TODO similarities with TableCrawler +class ResultTableCFood(AbstractFileCFood): + + # win_paths can be used to define fields that will contain windows style + # path instead of the default unix ones. Possible fields are: + # ["results", "revisionOf"] + win_paths = [] + table_re = r"result_table_(?P<recordtype>.*).csv$" + property_name_re = re.compile(r"^(?P<pname>.+?)\s*(\[\s?(?P<unit>.*?)\s?\] *)?$") + + @staticmethod + def name_beautifier(x): return x + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.table = pd.read_csv(fileguide.access(self.crawled_path)) + + @staticmethod + def get_re(): + return (".*/ExperimentalData/"+project_pattern + date_pattern + + date_suffix_pattern + ResultTableCFood.table_re) + + def create_identifiables(self): + self.recs = [] + self.experiment, self.project = ( + ExperimentCFood.create_identifiable_experiment(self.match)) + + for idx, row in self.table.iterrows(): + rec = db.Record() + rec.add_parent(self.match.group("recordtype")) + + for col in self.table.columns[:2]: + match = re.match(ResultTableCFood.property_name_re, col) + + if match.group("unit"): + rec.add_property(match.group("pname"), row.loc[col], unit=match.group("unit")) + else: + rec.add_property(match.group("pname"), row.loc[col]) + self.identifiables.append(rec) + self.recs.append(rec) + + self.identifiables.extend([self.project, self.experiment]) + + def update_identifiables(self): + for ii, (idx, row) in enumerate(self.table.iterrows()): + for col in row.index: + match = re.match(ResultTableCFood.property_name_re, col) + assure_property_is(self.recs[ii], match.group("pname"), row.loc[col], to_be_updated=self.to_be_updated) + assure_property_is(self.experiment, self.match.group("recordtype"), + self.recs, to_be_updated=self.to_be_updated, + datatype=db.LIST(self.match.group("recordtype"))) diff --git a/src/caosadvancedtools/scifolder/simulation_cfood.py b/src/caosadvancedtools/scifolder/simulation_cfood.py index ae129e6a69ce25c6698b98124e81f8bc2921b472..c8f23f1485d7a1f64dcd940552051d2e1ec5bb07 100644 --- a/src/caosadvancedtools/scifolder/simulation_cfood.py +++ b/src/caosadvancedtools/scifolder/simulation_cfood.py @@ -88,22 +88,22 @@ class SimulationCFood(AbstractFileCFood, WithREADME): self.to_be_updated, datatype=db.LIST(db.REFERENCE)) - if SOURCES.key in self.header: + if SOURCES.key in self.header: # pylint: disable=unsupported-membership-test reference_records_corresponding_to_files( record=self.simulation, recordtypes=["Experiment", "Publication", "Simulation", "Analysis"], - globs=get_glob(self.header[SOURCES.key]), + globs=get_glob(self.header[SOURCES.key]), # pylint: disable=unsubscriptable-object property_name=dm.sources, path=self.crawled_path, to_be_updated=self.to_be_updated) self.reference_files_from_header(record=self.simulation) - if REVISIONOF.key in self.header: + if REVISIONOF.key in self.header: # pylint: disable=unsupported-membership-test reference_records_corresponding_to_files( record=self.simulation, - recordtypes=[dm.Software], + recordtypes=[dm.Software], # pylint: disable=no-member property_name=dm.revisionOf, - globs=get_glob(self.header[dm.revisionOf]), + globs=get_glob(self.header[dm.revisionOf]), # pylint: disable=unsubscriptable-object path=self.crawled_path, to_be_updated=self.to_be_updated) diff --git a/src/caosadvancedtools/scifolder/withreadme.py b/src/caosadvancedtools/scifolder/withreadme.py index 8a63e1f6d90ed4e78d01f76393cc72982cdc79d4..e1968ba49799827467c7ef93a7070b7f090010fb 100644 --- a/src/caosadvancedtools/scifolder/withreadme.py +++ b/src/caosadvancedtools/scifolder/withreadme.py @@ -121,12 +121,12 @@ class WithREADME(object): @property def header(self): if self._header is None: - if self.crawled_path.lower().endswith(".md"): + if self.crawled_path.lower().endswith(".md"): # pylint: disable=no-member self._header = get_md_header( - fileguide.access(self.crawled_path)) - elif self.crawled_path.lower().endswith(".xlsx"): + fileguide.access(self.crawled_path)) # pylint: disable=no-member + elif self.crawled_path.lower().endswith(".xlsx"): # pylint: disable=no-member self._header = get_xls_header( - fileguide.access(self.crawled_path)) + fileguide.access(self.crawled_path)) # pylint: disable=no-member else: raise RuntimeError("Readme format not recognized.") self.convert_win_paths() @@ -145,7 +145,7 @@ class WithREADME(object): globs = get_glob(self.header[field.key]) files = get_files_referenced_by_field( - globs, prefix=os.path.dirname(self.crawled_path)) + globs, prefix=os.path.dirname(self.crawled_path)) # pylint: disable=no-member description = [get_description(val) for val in self.header[field.key]] @@ -160,7 +160,7 @@ class WithREADME(object): LOGGER.warn("ATTENTION: the field {} does not reference any " "known files".format(field.key)) - self.attached_filenames.extend(flat_list) + self.attached_filenames.extend(flat_list) # pylint: disable=no-member def convert_path(self, el): """ converts the path in el to unix type @@ -185,7 +185,7 @@ class WithREADME(object): return win_path_converter(el) def convert_win_paths(self): - for field in self.win_paths: + for field in self.win_paths: # pylint: disable=no-member if field in self.header: if isinstance(self.header[field], list): @@ -245,7 +245,7 @@ class WithREADME(object): references[ref_type], record, ref_type, - to_be_updated=self.to_be_updated, + to_be_updated=self.to_be_updated, # pylint: disable=no-member ) def reference_included_records(self, record, fields, to_be_updated): @@ -255,16 +255,16 @@ class WithREADME(object): for field in fields: - if field.key not in self.header: + if field.key not in self.header: # pylint: disable=no-member continue included = [] - for item in self.header[field.key]: + for item in self.header[field.key]: # pylint: disable=no-member if INCLUDE.key in item: try: included.extend( get_entity_ids_from_include_file( - os.path.dirname(self.crawled_path), + os.path.dirname(self.crawled_path), # pylint: disable=no-member item[INCLUDE.key])) except ValueError: al = logging.getLogger("caosadvancedtools") diff --git a/src/caosadvancedtools/serverside/examples/example_script.py b/src/caosadvancedtools/serverside/examples/example_script.py new file mode 100755 index 0000000000000000000000000000000000000000..d97d2d0d1f936b1c12e857d38fce043f0b514340 --- /dev/null +++ b/src/caosadvancedtools/serverside/examples/example_script.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +"""An example script that illustrates how scripts can be used in conjunction +with the generic_analysis module. + +The data model needed for this script is: + +Analysis: + sources: REFEERENCE + scripts: FILE + results: REFEERENCE + mean_value: DOUBLE + +Person: + Email: TEXT + +""" + +import argparse +import logging +import sys +from argparse import RawTextHelpFormatter +from datetime import datetime + +import caosdb as db +import matplotlib.pyplot as plt +import numpy as np +from caosadvancedtools.cfood import assure_property_is +from caosadvancedtools.crawler import apply_list_of_updates +from caosadvancedtools.guard import INSERT, UPDATE +from caosadvancedtools.guard import global_guard as guard +from caosadvancedtools.serverside.helper import send_mail as main_send_mail + +# logging should be done like this in order to allow the caller script to +# direct the output. +logger = logging.getLogger(__name__) + +# allow updates of existing entities +guard.set_level(level=UPDATE) + + +def send_mail(changes: [db.Entity], receipient: str): + """ calls sendmail in order to send a mail to the curator about pending + changes + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + receipient: The person who shall receive the mail. + """ + + caosdb_config = db.configuration.get_config() + text = """Dear Curator, +The following changes where done automatically. + +{changes} + """.format(changes="\n".join(changes)) + try: + fro = caosdb_config["advancedtools"]["automated_updates.from_mail"] + except KeyError: + logger.error("Server Configuration is missing a setting for " + "sending mails. The administrator should check " + "'from_mail'.") + return + + main_send_mail( + from_addr=fro, + to=receipient, + subject="Automated Update", + body=text) + + +def main(args): + + # auth_token is provided by the server side scripting API + # use this token for authentication when creating a new connection + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(auth_token=args.auth_token) + logger.debug("Established connection") + + try: + dataAnalysisRecord = db.Record(id=args.entityid).retrieve() + except db.TransactionError: + logger.error("Cannot retrieve Record with id ={}".format( + args.entityid + )) + + # The script may require certain information to exist. Here, we expect that + # a sources Property exists that references a numpy file. + # Similarly an InputDataSet could be used. + + if (dataAnalysisRecord.get_property("sources") is None + or not db.apiutils.is_reference( + dataAnalysisRecord.get_property("sources"))): + + raise RuntimeError("sources Refenrence must exist.") + + logger.debug("Found required data.") + + # ####### this core might be replaced by a call to another script ####### # + # Download the data + source_val = dataAnalysisRecord.get_property("sources").value + npobj = db.File( + id=(source_val[0] + if isinstance(source_val, list) + else source_val)).retrieve() + npfile = npobj.download() + logger.debug("Downloaded data.") + data = np.load(npfile) + + # Plot data + filename = "hist.png" + plt.hist(data) + plt.savefig(filename) + + mean = data.mean() + # ####################################################################### # + + # Insert the result plot + fig = db.File(file=filename, + path="/Analysis/results/"+str(datetime.now())+"/"+filename) + fig.insert() + + # Add the mean value to the analysis Record + # If such a property existed before, it is changed if necessary. The old + # value will persist in the versioning of LinkAhead + to_be_updated = db.Container() + assure_property_is( + dataAnalysisRecord, + "mean_value", + mean, + to_be_updated=to_be_updated + ) + + # Add the file with the plot to the analysis Record + # If a file was already referenced, the new one will be referenced instead. + # The old file is being kept and is still referenced in an old version of + # the analysis Record. + assure_property_is( + dataAnalysisRecord, + "results", + [fig.id], + to_be_updated=to_be_updated + ) + + if len(to_be_updated) > 0: + print(to_be_updated) + apply_list_of_updates(to_be_updated, update_flags={}) + logger.debug("Update sucessful.") + logger.info("The following Entities were changed:\n{}.".format( + [el.id for el in to_be_updated]) + ) + + # Send mails to people that are referenced. + people = db.execute_query("FIND RECORD Person WHICH IS REFERENCED BY " + "{}".format(dataAnalysisRecord.id)) + for person in people: + if person.get_property("Email") is not None: + send_mail([str(el) for el in to_be_updated], + receipient=person.get_property("Email").value) + logger.debug("Mails send.") + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument("--auth-token", + help="Token provided by the server for authentication") + parser.add_argument("entityid", + help="The ID of the DataAnalysis Record.", type=int) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args)) diff --git a/src/caosadvancedtools/serverside/generic_analysis.py b/src/caosadvancedtools/serverside/generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..85d0c860df75fce205c5eaad77731fc04eee9e40 --- /dev/null +++ b/src/caosadvancedtools/serverside/generic_analysis.py @@ -0,0 +1,213 @@ +# encoding: utf-8 +# +# Copyright (C) 2021 Alexander Schlemmer <alexander.schlemmer@ds.mpg.de> +# Copyright (C) 2021 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# See: https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/55 + +# This source file is work in progress and currently untested. + + +""" +Variante I: Python module implementiert eine 'main' function, die einen Record +als Argument entgegennimmt und diesen um z.B. 'results' ergänzt und updated. + +Variante II: Ein skript erhält eine ID als Argument (z.B. auf der command line) +und updated das Objekt selbstständig. + +Idealfall: Idempotenz; I.e. es ist egal, ob das Skript schon aufgerufen wurde. +Ein weiterer Aufruf führt ggf. zu einem Update (aber nur bei Änderungen von +z.B. Parametern) + +Das aufgerufene Skript kann beliebige Eigenschaften benutzen und erstellen. +ABER wenn die Standardeigenschaften (InputDataSet, etc) verwendet werden, kann +der Record leicht erzeugt werden. + + + + "Analyze" "Perform Anlysis" + Knopf an Record Form im WebUI + im WebUI + | | + | | + v v + Winzskript, dass einen + DataAnalysis-Stub erzeugt + | + | + v + execute_script Routine --> AnalysisSkript + erhält den Stub und ggf. Nutzt Funktionen um Updates durchzuführen falls + den Pythonmodulenamen notwendig, Email + ^ + | + | + Cronjob findet outdated + DataAnalysis + + +Analyseskript macht update: + - flexibel welche Änderungen vorgenommen werden (z.B. mehrere Records) + - spezielle Funktionen sollten verwendet werden + - Logging und informieren muss im Skript passieren + - Skript kann mit subprocess aufgerufen werden (alternative unvollständige + DataAnalysis einfügen) + + +# Features + - Emailversand bei Insert oder Update + - Kurze Info: "Create XY Analysis" kann vmtl automatisch erzeugt werden + - Debug Info: müsste optional/bei Fehler zur Verfügung stehen. + - Skript/Software version sollte gespeichert werden + + +Outlook: the part of the called scripts that interact with LinkAhead might in +future be replaced by the Crawler. The working directory would be copied to the +file server and then crawled. +""" + +import argparse +import importlib +import logging +import os +import sys + +import caosdb as db +from caosdb.utils.server_side_scripting import run_server_side_script + +logger = logging.getLogger(__name__) + + +def check_referenced_script(record: db.Record): + """ return the name of a referenced script + + If the supplied record does not have an appropriate Property warings are + logged. + """ + + if record.get_property("scripts") is None: + logger.warning("The follwing changed Record is missing the 'scripts' " + "Property:\n{}".format(str(record))) + + return + + script_prop = record.get_property("scripts") + + if not db.apiutils.is_reference(script_prop): + logger.warning("The 'scripts' Property of the following Record should " + "reference a File:\n{}".format(str(record))) + + return + + script = db.execute_query("FIND ENTITY WITH id={}".format( + script_prop.value[0] if isinstance(script_prop.value, list) + else script_prop.value), unique=True) + + if (not isinstance(script, db.File)): + logger.warning("The 'scripts' Property of the Record {} should " + "reference a File. Entity {} is not a File".format( + record.id, script_prop.value)) + + return + + script_name = os.path.basename(script.path) + + return script_name + + +def call_script(script_name: str, record_id: int): + ret = run_server_side_script(script_name, record_id) + + if ret.code != 0: + logger.error("Script failed!") + logger.debug(ret.stdout) + logger.error(ret.stderr) + else: + logger.debug(ret.stdout) + logger.error(ret.stderr) + + +def run(dataAnalysisRecord: db.Record): + """run a data analysis script. + + There are two options: + 1. A python script installed as a pip package. + 2. A generic script that can be executed on the command line. + + Using a python package: + It should be located in package plugin and implement at least + a main function that takes a DataAnalysisRecord as a single argument. + The script may perform changes to the Record and insert and update + Entities. + + Using a generic script: + The only argument that is supplied to the script is the ID of the + dataAnalysisRecord. Apart from the different Argument everything that is + said for the python package holds here. + """ + + if dataAnalysisRecord.get_property("scripts") is not None: + script_name = check_referenced_script(dataAnalysisRecord) + logger.debug( + "Found 'scripts'. Call script '{}' in separate process".format( + script_name) + ) + call_script(script_name, dataAnalysisRecord.id) + logger.debug( + "Script '{}' done.\n-----------------------------------".format( + script_name)) + + if dataAnalysisRecord.get_property("Software") is not None: + mod = dataAnalysisRecord.get_property("Software").value + logger.debug( + "Found 'Software'. Call '{}' as Python module".format( + mod) + ) + m = importlib.import_module(mod) + + m.main(dataAnalysisRecord) + logger.debug( + "'main' function of Python module '{}' done" + ".\n-----------------------------------".format(mod)) + + +def _parse_arguments(): + """ Parses the command line arguments. """ + parser = argparse.ArgumentParser(description='__doc__') + parser.add_argument("--module", help="An id an input dataset.") + parser.add_argument("--inputset", help="An id an input dataset.") + parser.add_argument("--parameterset", help="An id of a parameter record.") + + return parser.parse_args() + + +def main(): + """ This is for testing only. """ + args = _parse_arguments() + + dataAnalysisRecord = db.Record() + dataAnalysisRecord.add_property(name="InputDataSet", value=args.entity) + dataAnalysisRecord.add_property(name="ParameterSet", value=args.parameter) + dataAnalysisRecord.add_property(name="Software", value=args.module) + + dataAnalysisRecord.insert() + run(dataAnalysisRecord) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/caosadvancedtools/serverside/helper.py b/src/caosadvancedtools/serverside/helper.py index 19efc9ed2b3e99e17eb28f5c87b0a6dbc0c47499..ba75739e0fdc0a83f235db6920471afb196f4246 100644 --- a/src/caosadvancedtools/serverside/helper.py +++ b/src/caosadvancedtools/serverside/helper.py @@ -390,11 +390,11 @@ def send_mail(from_addr, to, subject, body, cc=None, bcc=None, else: caosdb_config = db.configuration.get_config() - if not "Misc" in caosdb_config or not "sendmail" in caosdb_config["Misc"]: + if "Misc" not in caosdb_config or "sendmail" not in caosdb_config["Misc"]: err_msg = ("No sendmail executable configured. " "Please configure `Misc.sendmail` " "in your pycaosdb.ini.") - raise db.ConfigurationException(err_msg) + raise db.ConfigurationError(err_msg) sendmail = caosdb_config["Misc"]["sendmail"] # construct sendmail command diff --git a/src/caosadvancedtools/serverside/model.yml b/src/caosadvancedtools/serverside/model.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f5a9634a97e39da4c5b3a6dfe1bf0c587863231 --- /dev/null +++ b/src/caosadvancedtools/serverside/model.yml @@ -0,0 +1,15 @@ +# Parent of all datasets which are used as input to or output from +# analysis scripts +Dataset: + +# Parent of all parametersets which are used as input for analysis scripts +ParameterSet: + +DataAnalysis: + recommended_properties: + InputDataset: + datatype: Dataset + OutputDataset: + datatype: Dataset + ParameterSet: + date: \ No newline at end of file diff --git a/src/caosadvancedtools/serverside/sync.py b/src/caosadvancedtools/serverside/sync.py new file mode 100755 index 0000000000000000000000000000000000000000..04283a15ba7919af6027b53217ffb69355ddfc6f --- /dev/null +++ b/src/caosadvancedtools/serverside/sync.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# Sync data model for generic data analysis method +# A. Schlemmer, 09/2021 + +from caosadvancedtools.models import parser +model = parser.parse_model_from_yaml("model.yml") +model.sync_data_model() diff --git a/src/caosadvancedtools/structure_mapping.py b/src/caosadvancedtools/structure_mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..50e57ac4d84f2034fbdb6da6c7159f450a993c3a --- /dev/null +++ b/src/caosadvancedtools/structure_mapping.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import caosdb as db +from caosdb.apiutils import resolve_reference +from caosdb.common.utils import uuid + +from .cfood import (assure_has_description, assure_has_parent, + assure_property_is) + + +class EntityMapping(object): + """ + map local entities to entities on the server + + the dict to_existing maps _cuid property to entity objects + the dict to_target maps id property to entity objects + """ + + def __init__(self): + self.to_existing = {} + self.to_target = {} + + def add(self, target, existing): + if target._cuid is None: + target._cuid = str(uuid()) + self.to_existing[str(target._cuid)] = existing + self.to_target[existing.id] = target + + +def collect_existing_structure(target_structure, existing_root, em): + """ recursively collects existing entities + + The collected entities are those that correspond to the ones in + target_structure. + + + em: EntityMapping + """ + + for prop in target_structure.properties: + if prop.value is None: + continue + + if not prop.is_reference(server_retrieval=True): + continue + + if (len([p for p in target_structure.properties if p.name == prop.name]) + != 1): + raise ValueError("Current implementation allows only one property " + "for each property name") + + if (existing_root.get_property(prop.name) is not None and + existing_root.get_property(prop.name).value is not None): + resolve_reference(prop) + + resolve_reference(existing_root.get_property(prop.name)) + referenced = existing_root.get_property(prop.name).value + + if not isinstance(referenced, list): + referenced = [referenced] + target_value = prop.value + + if not isinstance(target_value, list): + target_value = [target_value] + + if len(target_value) != len(referenced): + raise ValueError() + + for tent, eent in zip(target_value, referenced): + em.add(tent, eent) + collect_existing_structure(tent, eent, em) + + +def update_structure(em, updating: db.Container, target_structure: db.Record): + """compare the existing records with the target record tree created + from the h5 object + + Parameters + ---------- + + existing_structure + retrieved entity; e.g. the top level identifiable + + target_structure : db.Record + A record which may have references to other records. Must be a DAG. + """ + + if target_structure._cuid in em.to_existing: + update_matched_entity(em, + updating, + target_structure, + em.to_existing[target_structure._cuid]) + + for prop in target_structure.get_properties(): + if prop.is_reference(server_retrieval=True): + update_structure(em, updating, prop.value) + + +def update_matched_entity(em, updating, target_record, existing_record): + """ + update the Record existing in the server according to the Record + supplied as target_record + """ + + for parent in target_record.get_parents(): + if parent.name == "": + raise ValueError("Parent name must not be empty.") + assure_has_parent(existing_record, parent.name, force=True) + + if target_record.description is not None: + # check whether description is equal + assure_has_description(existing_record, target_record.description, + to_be_updated=updating) + + for prop in target_record.get_properties(): + # check for remaining property types + + if isinstance(prop.value, db.Entity): + if prop.value._cuid in em.to_existing: + value = em.to_existing[prop.value._cuid].id + else: + value = prop.value.id + else: + value = prop.value + assure_property_is(existing_record, prop.name, value, + to_be_updated=updating) diff --git a/src/caosadvancedtools/suppressKnown.py b/src/caosadvancedtools/suppressKnown.py index c15f0e06fa7d126937497aeb877dd5d2991b6ff7..c4b57039c5184f2443e4dbb91cf11f5e59ae6790 100644 --- a/src/caosadvancedtools/suppressKnown.py +++ b/src/caosadvancedtools/suppressKnown.py @@ -5,6 +5,8 @@ import os import sqlite3 from hashlib import sha256 +import tempfile + class SuppressKnown(logging.Filter): """ @@ -26,8 +28,9 @@ class SuppressKnown(logging.Filter): if db_file: self.db_file = db_file else: - self.db_file = "/tmp/caosadvanced_suppressed_cache.db" - + tmppath = tempfile.gettempdir() + tmpf = os.path.join(tmppath, "caosadvanced_suppressed_cache.db") + self.db_file = tmpf if not os.path.exists(self.db_file): self.create_cache() diff --git a/src/caosadvancedtools/table_export.py b/src/caosadvancedtools/table_export.py index bed0edc97a794dd83b2bdd7b1c0449c710c18d3f..056207a76fa01357e2269cd4cb8e9a09905d5d90 100644 --- a/src/caosadvancedtools/table_export.py +++ b/src/caosadvancedtools/table_export.py @@ -308,7 +308,7 @@ class BaseTableExporter(object): " was specified but no record is given." ) else: - if not "selector" in d: + if "selector" not in d: d["selector"] = d[QUERY].strip().split(" ")[1] # guess find function and insert if existing else: diff --git a/src/caosadvancedtools/table_importer.py b/src/caosadvancedtools/table_importer.py index 04c8ea23b19ee0cc055dc58b69f1b3d6fecd1b55..1f515e78e3ddbd198fa0336589a359ba9154f038 100755 --- a/src/caosadvancedtools/table_importer.py +++ b/src/caosadvancedtools/table_importer.py @@ -31,6 +31,7 @@ import logging import pathlib from datetime import datetime +import caosdb as db import numpy as np import pandas as pd from xlrd import XLRDError @@ -50,12 +51,27 @@ def assure_name_format(name): name = str(name) if len(name.split(",")) != 2: - raise ValueError("Name field should be 'LastName, FirstName'." + raise ValueError("The field value should be 'LastName, FirstName'. " "The supplied value was '{}'.".format(name)) return name +def check_reference_field(ent_id, recordtype): + if 1 != db.execute_query("COUNT {} WITH id={}".format( + recordtype, + ent_id), + unique=True): + raise ValueError( + "No {} with the supplied id={} exists. \n" + "Please supply a valid ID.".format( + recordtype, + ent_id + )) + + return ent_id + + def yes_no_converter(val): """ converts a string to True or False if possible. @@ -88,7 +104,10 @@ def date_converter(val, fmt="%Y-%m-%d"): converts it using format string """ - return datetime_converter(val, fmt=fmt).date() + if val is None: + return None + else: + return datetime_converter(val, fmt=fmt).date() def incomplete_date_converter(val, fmts={"%Y-%m-%d": "%Y-%m-%d", @@ -137,6 +156,9 @@ def win_path_converter(val): checks whether the value looks like a windows path and converts it to posix """ + if val == "": + return val + if not check_win_path(val): raise ValueError( "Field should be a Windows path, but is\n'{}'.".format(val)) @@ -145,80 +167,100 @@ def win_path_converter(val): return path.as_posix() -class TSVImporter(object): - def __init__(self, converters, obligatory_columns=[], unique_columns=[]): - raise NotImplementedError() +def string_in_list(val, options, ignore_case=True): + """Return the given value if it is contained in options, raise an + error otherwise. + Parameters + ---------- + val : str + String value to be checked. + options : list<str> + List of possible values that val may obtain + ignore_case : bool, optional + Specify whether the comparison of val and the possible options + should ignor capitalization. Default is True. + + Returns + ------- + val : str + The original value if it is contained in options -class XLSImporter(object): - def __init__(self, converters, obligatory_columns=None, unique_keys=None): - """ - converters: dict with column names as keys and converter functions as - values - This dict also defines what columns are required to exist - throught the existing keys. The converter functions are - applied to the cell values. They should also check for - ValueErrors, such that a separate value check is not - necessary. - obligatory_columns: list of column names, optional - each listed column must not have missing values - unique_columns : list of column names that in - combination must be unique; i.e. each row has a - unique combination of values in those columns. - """ - self.sup = SuppressKnown() - self.required_columns = list(converters.keys()) - self.obligatory_columns = [] if obligatory_columns is None else obligatory_columns - self.unique_keys = [] if unique_keys is None else unique_keys - self.converters = converters + Raises + ------ + ValueError + If val is not contained in options. + """ - def read_xls(self, filename, **kwargs): - """ - converts an xls file into a Pandas DataFrame. + if ignore_case: + val = val.lower() + options = [o.lower() for o in options] - The converters of the XLSImporter object are used. + if val not in options: + raise ValueError( + "Field value is '{}', but it should be one of the following " + "values: {}.".format(val, ", ".join( + ["'{}'".format(o) for o in options]))) - Raises: DataInconsistencyError + return val + + +class TableImporter(): + """Abstract base class for importing data from tables. + """ + + def __init__(self, converters, obligatory_columns=None, unique_keys=None, + datatypes=None): """ - try: - xls_file = pd.io.excel.ExcelFile(filename) - except XLRDError as e: - logger.warning( - "Cannot read \n{}.\nError:{}".format(filename, - str(e)), - extra={'identifier': str(filename), - 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + Parameters + ---------- + converters : dict + Dict with column names as keys and converter functions as values. This dict also defines + what columns are required to exist throught the existing keys. The converter functions are + applied to the cell values. They should also check for ValueErrors, such that a separate + value check is not necessary. + + obligatory_columns : list, optional + List of column names, each listed column must not have missing values. + + unique_keys : list, optional + List of column names that in combination must be unique: each row has a unique + combination of values in those columns. + + datatypes : dict, optional + Dict with column names as keys and datatypes as values. All non-null values will be + checked whether they have the provided datatype. This dict also defines what columns are + required to exist throught the existing keys. - if len(xls_file.sheet_names) > 1: - # Multiple sheets is the default now. Only show in debug - logger.debug( - "Excel file {} contains multiple sheets. " - "All but the first are being ignored.".format(filename)) + """ - try: - df = xls_file.parse(converters=self.converters, **kwargs) - except Exception as e: - logger.warning( - "Cannot parse {}.".format(filename), - extra={'identifier': str(filename), - 'category': "inconsistency"}) - raise DataInconsistencyError(*e.args) + if converters is None: + converters = {} - self.check_columns(df, filename=filename) - df = self.check_missing(df, filename=filename) + if datatypes is None: + datatypes = {} - if len(self.unique_keys) > 0: - df = self.check_unique(df, filename=filename) + self.sup = SuppressKnown() + self.required_columns = list(converters.keys())+list(datatypes.keys()) + self.obligatory_columns = ([] + if obligatory_columns is None + else obligatory_columns) + self.unique_keys = [] if unique_keys is None else unique_keys + self.converters = converters + self.datatypes = datatypes - return df + def read_file(self, filename, **kwargs): + raise NotImplementedError() def check_columns(self, df, filename=None): - """ - checks whether all required columns, i.e. columns for which converters - were defined exist. + """Check whether all required columns exist. + + Required columns are columns for which converters are defined. + + Raises + ------ + DataInconsistencyError - Raises: DataInconsistencyError """ for col in self.required_columns: @@ -234,12 +276,11 @@ class XLSImporter(object): raise DataInconsistencyError(errmsg) def check_unique(self, df, filename=None): - """ - Check whether value combinations that shall be unique for each row are - unique. + """Check whether value combinations that shall be unique for each row are unique. If a second row is found, that uses the same combination of values as a previous one, the second one is removed. + """ df = df.copy() uniques = [] @@ -266,11 +307,57 @@ class XLSImporter(object): return df + def check_datatype(self, df, filename=None, strict=False): + """Check for each column whether non-null fields have the correct datatype. + + .. note:: + + If columns are integer, but should be float, this method converts the respective columns + in place. + + Parameters + ---------- + + strict: boolean, optional + If False (the default), try to convert columns, otherwise raise an error. + + """ + for key, datatype in self.datatypes.items(): + # Check for castable numeric types first: We unconditionally cast int to the default + # float, because CaosDB does not have different sizes anyway. + col_dtype = df.dtypes[key] + if not strict and not np.issubdtype(col_dtype, datatype): + issub = np.issubdtype + # These special cases should be fine. + if issub(col_dtype, np.integer) and issub(datatype, np.floating): + df[key] = df[key].astype(datatype) + + # Now check each element + for idx, val in df.loc[ + pd.notnull(df.loc[:, key]), key].iteritems(): + + if not isinstance(val, datatype): + msg = ( + "In row no. {rn} and column '{c}' of file '{fi}' the " + "datatype was {was} but it should be " + "{expected}".format(rn=idx, c=key, fi=filename, + was=str(type(val)).strip("<>"), + expected=str(datatype).strip("<>")) + ) + logger.warning(msg, extra={'identifier': filename, + 'category': "inconsistency"}) + raise DataInconsistencyError(msg) + def check_missing(self, df, filename=None): """ Check in each row whether obligatory fields are empty or null. Rows that have missing values are removed. + + Returns + ------- + out : pandas.DataFrame + The input DataFrame with incomplete rows removed. """ df = df.copy() @@ -306,3 +393,104 @@ class XLSImporter(object): okay = False return df + + def check_dataframe(self, df, filename=None, strict=False): + """Check if the dataframe conforms to the restrictions. + + Checked restrictions are: Columns, data types, uniqueness requirements. + + Parameters + ---------- + + df: pandas.DataFrame + The dataframe to be checked. + + filename: string, optional + The file name, only used for output in case of problems. + + strict: boolean, optional + If False (the default), try to convert columns, otherwise raise an error. + """ + self.check_columns(df, filename=filename) + df = self.check_missing(df, filename=filename) + self.check_datatype(df, filename=filename, strict=strict) + + if len(self.unique_keys) > 0: + df = self.check_unique(df, filename=filename) + + return df + + +class XLSImporter(TableImporter): + def read_file(self, filename, **kwargs): + return self.read_xls(filename=filename, **kwargs) + + def read_xls(self, filename, **kwargs): + """Convert an xls file into a Pandas DataFrame. + + The converters of the XLSImporter object are used. + + Raises: DataInconsistencyError + """ + try: + xls_file = pd.io.excel.ExcelFile(filename) + except (XLRDError, ValueError) as e: + logger.warning( + "Cannot read \n{}.\nError:{}".format(filename, + str(e)), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + + if len(xls_file.sheet_names) > 1: + # Multiple sheets is the default now. Only show in debug + logger.debug( + "Excel file {} contains multiple sheets. " + "All but the first are being ignored.".format(filename)) + + try: + df = xls_file.parse(converters=self.converters, **kwargs) + except Exception as e: + logger.warning( + "Cannot parse {}.\n{}".format(filename, e), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*e.args) + + df = self.check_dataframe(df, filename) + + return df + + +class CSVImporter(TableImporter): + def read_file(self, filename, sep=",", **kwargs): + try: + df = pd.read_csv(filename, sep=sep, converters=self.converters, + **kwargs) + except ValueError as ve: + logger.warning( + "Cannot parse {}.\n{}".format(filename, ve), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*ve.args) + + df = self.check_dataframe(df, filename) + + return df + + +class TSVImporter(TableImporter): + def read_file(self, filename, **kwargs): + try: + df = pd.read_csv(filename, sep="\t", converters=self.converters, + **kwargs) + except ValueError as ve: + logger.warning( + "Cannot parse {}.\n{}".format(filename, ve), + extra={'identifier': str(filename), + 'category': "inconsistency"}) + raise DataInconsistencyError(*ve.args) + + df = self.check_dataframe(df, filename) + + return df diff --git a/src/doc/Makefile b/src/doc/Makefile index d28503eb0e883e6c879898c12dac07f91bd2df68..7a1bec105f4b0fe1d70cabd7e3cf5f1ceff93bee 100644 --- a/src/doc/Makefile +++ b/src/doc/Makefile @@ -45,4 +45,4 @@ doc-help: @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) apidoc: - @$(SPHINXAPIDOC) -o _apidoc $(PY_BASEDIR) + @$(SPHINXAPIDOC) --force -o _apidoc $(PY_BASEDIR) diff --git a/src/doc/README_SETUP.md b/src/doc/README_SETUP.md new file mode 120000 index 0000000000000000000000000000000000000000..88332e357f5e06f3de522768ccdcd9e513c15f62 --- /dev/null +++ b/src/doc/README_SETUP.md @@ -0,0 +1 @@ +../../README_SETUP.md \ No newline at end of file diff --git a/src/doc/conf.py b/src/doc/conf.py index 29b790d4d445f2f9c155a0858b00a1a289e0ec4e..c7f82a99d3b287ca72ca57430b2d4b868539d39e 100644 --- a/src/doc/conf.py +++ b/src/doc/conf.py @@ -17,18 +17,19 @@ # sys.path.insert(0, os.path.abspath('../caosdb')) -# -- Project information ----------------------------------------------------- - import sphinx_rtd_theme + +# -- Project information ----------------------------------------------------- + project = 'caosadvancedtools' -copyright = '2020, IndiScale GmbH' +copyright = '2021, IndiScale GmbH' author = 'Daniel Hornung' # The short X.Y version -version = '0.X.Y' +version = '0.4.1' # The full version, including alpha/beta/rc tags -release = '0.x.y-beta-rc2' +release = '0.4.1' # -- General configuration --------------------------------------------------- @@ -92,6 +93,9 @@ html_theme = "sphinx_rtd_theme" # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +# Disable static path to remove warning. +html_static_path = [] + # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -185,10 +189,11 @@ epub_exclude_files = ['search.html'] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/': None, + "python": ("https://docs.python.org/", None), "caosdb-pylib": ("https://caosdb.gitlab.io/caosdb-pylib/", None), } + # TODO Which options do we want? autodoc_default_options = { 'members': None, diff --git a/src/doc/crawler.rst b/src/doc/crawler.rst index 92a624bb59f4c0fba8d46076d6df0e0e30bbab75..4b99c97e6db16e5691f373fa5fb4903e4d078155 100644 --- a/src/doc/crawler.rst +++ b/src/doc/crawler.rst @@ -3,7 +3,7 @@ CaosDB Crawler ============== The `CaosDB -crawler <https://gitlab.com/caosdb/caosdb-advanced-user-tools/blob/master/src/caosadvancedtools/crawler.py>`__ +crawler <https://gitlab.com/caosdb/caosdb-advanced-user-tools/blob/main/src/caosadvancedtools/crawler.py>`__ is a tool for the automated insertion or update of entities in CaosDB. Typically, a file structure is crawled, but other things can be crawled as well. For example tables or HDF5 files. @@ -71,11 +71,11 @@ indicated in the messages). Invocation as Python Script --------------------------- -The crawler can be executed directly via a python script (usually called +The crawler can be executed directly via a Python script (usually called ``crawl.py``). The script prints the progress and reports potential problems. The exact behavior depends on your setup. However, you can have a look at the example in the -`tests <https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/blob/master/integrationtests/full_test/crawl.py>`__. +`tests <https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/main/integrationtests/crawl.py>`__. .. Note:: The crawler depends on the CaosDB Python client, so make sure to install :doc:`pycaosdb <caosdb-pylib:getting_started>`. @@ -84,16 +84,20 @@ have a look at the example in the Call ``python3 crawl.py --help`` to see what parameters can be provided. Typically, an invocation looks like: -.. code:: python +.. code:: sh + + python3 crawl.py /someplace/ - python3 crawl.py "/TestData/" +.. Note:: For trying out the above mentioned example crawler from the integration tests, + make sure that the ``extroot`` directory in the ``integrationtests`` folder is used as + CaosDB's extroot directory, and call the crawler indirectly via ``./test.sh``. -In this case ``/TestData/`` identifies the path to be crawled **within -the CaosDB file system**. You can browse the CaosDB file system by +In this case ``/someplace/`` identifies the path to be crawled **within +CaosDB's file system**. You can browse the CaosDB file system by opening the WebUI of your CaosDB instance and clicking on “File System”. In the backend, ``crawl.py`` starts a CQL query -``FIND File WHICH IS STORED AT /TestData/**`` and crawls the resulting +``FIND File WHICH IS STORED AT /someplace/**`` and crawls the resulting files according to your customized ``CFoods``. Crawling may consist of two distinct steps: 1. Insertion of files (use @@ -127,6 +131,10 @@ The behavior and rules of the crawler are defined in logical units called CFoods. In order to extend the crawler you need to extend an existing CFood or create new one. +.. Note:: A crawler always needs a corresponding data model to exits in the + server. The following does not cover this aspect. Please refer + for example to documentation of the YAML Interface. + .. _c-food-introduction: CFood -- Introduction @@ -388,7 +396,7 @@ shows how a set of CFoods can be defined to deal with a complex file structure. You can find detailed information on files need to be structured `here <https://gitlab.com/salexan/check-sfs/-/blob/f-software/filesystem_structure.md>`__ and the source -code of the CFoods `here <https://gitlab.com/henrik_indiscale/scifolder>`__. +code of the CFoods `here <https://gitlab.com/caosdb/caosdb-advanced-user-tools>`__. Sources ======= diff --git a/src/doc/index.rst b/src/doc/index.rst index ee266598cd6cfbcfaa6f54b8e39aa32e4c2b6915..9aa045349ab05d3f5130a7f33b38c7eca0c4f32e 100644 --- a/src/doc/index.rst +++ b/src/doc/index.rst @@ -15,6 +15,7 @@ This documentation helps you to :doc:`get started<getting_started>`, explains th Concepts <concepts> tutorials Caosdb-Crawler <crawler> + YAML Interface <yaml_interface> _apidoc/modules diff --git a/src/doc/yaml_interface.rst b/src/doc/yaml_interface.rst new file mode 100644 index 0000000000000000000000000000000000000000..476e92829238a0fc9dac851c61790c022e9fcde9 --- /dev/null +++ b/src/doc/yaml_interface.rst @@ -0,0 +1,126 @@ +YAML-Interface +-------------- + +The yaml interface is a module in caosdb-pylib that can be used to create and update +CaosDB models using a simplified definition in YAML format. + +Let's start with an example taken from https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/blob/dev/unittests/model.yml. + +.. code-block:: yaml + + Project: + obligatory_properties: + projectId: + datatype: INTEGER + description: 'UID of this project' + Person: + recommended_properties: + firstName: + datatype: TEXT + description: 'first name' + lastName: + datatype: TEXT + description: 'last name' + LabbookEntry: + recommended_properties: + Project: + entryId: + datatype: INTEGER + description: 'UID of this entry' + responsible: + datatype: Person + description: 'the person responsible for these notes' + textElement: + datatype: TEXT + description: 'a text element of a labbook recording' + associatedFile: + datatype: FILE + description: 'A file associated with this recording' + table: + datatype: FILE + description: 'A table document associated with this recording' + extern: + - Textfile + + + +This example defines 3 ``RecordType``s: + +- A ``Project`` with one obligatory property ``datatype`` +- A Person with a ``firstName`` and a ``lastName`` (as recommended properties) +- A ``LabbookEntry`` with multiple recommended properties of different data types +- It is assumed that the server knows a RecordType or Property with the name + ``Textfile``. + + +One major advantage of using this interface (in contrast to the standard python interface) is that properties can be defined and added to record types "on-the-fly". E.g. the three lines for ``firstName`` as sub entries of ``Person`` have two effects on CaosDB: + +- A new property with name ``firstName``, datatype ``TEXT`` and description ``first name`` is inserted (or updated, if already present) into CaosDB. +- The new property is added as a recommended property to record type ``Person``. + +Any further occurrences of ``firstName`` in the yaml file will reuse the definition provided for ``Person``. + +Note the difference between the three property declarations of ``LabbookEntry``: + +- ``Project``: This record type is added directly as a property of ``LabbookEntry``. Therefore it does not specify any further attributes. Compare to the original declaration of record type ``Project``. +- ``responsible``: This defines and adds a property with name "responsible" to ``LabbookEntry`, which has a datatype ``Person``. ``Person`` is defined above. +- ``firstName``: This defines and adds a property with the standard data type ``TEXT`` to record type ``Person``. + +If the data model depends on record types or properties which already exist in CaosDB, those can be +added using the ``extern`` keyword: ``extern`` takes a list of previously defined names. + +Datatypes +--------- + +You can use any data type understood by CaosDB as datatype attribute in the yaml model. + +List attributes are a bit special: + +.. code-block:: yaml + + datatype: LIST<DOUBLE> + +would declare a list datatype of DOUBLE elements. + +.. code-block:: yaml + + datatype: LIST<Project> + +would declare a list of elements with datatype Project. + + +Keywords +-------- + +- **parent**: Parent of this entity. +- **importance**: Importance of this entity. Possible values: "recommended", "obligatory", "suggested" +- **datatype**: The datatype of this property, e.g. TEXT, INTEGER or Project. +- **unit**: The unit of the property, e.g. "m/s". +- **description**: A description for this entity. +- **recommended_properties**: Add properties to this entity with importance "recommended". +- **obligatory_properties**: Add properties to this entity with importance "obligatory". +- **suggested_properties**: Add properties to this entity with importance "suggested". +- **inherit_from_recommended**: Inherit from another entity using the specified importance level including the higher importance level "obligatory". This would add a corresponding parent and add all obligatory and recommended properties from the parent. +- **inherit_from_suggested including higher importance levels**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory, recommended and suggested properties from the parent. +- **inherit_from_obligatory**: Inherit from another entity using the specified importance level. This would add a corresponding parent and add all obligatory properties from the parent. + +Usage +----- + +You can use the yaml parser directly in python as follows: + + +.. code-block:: python + + from caosadvancedtools.models import parser as parser + model = parser.parse_model_from_yaml("model.yml") + + +This creates a DataModel object containing all entities defined in the yaml file. + +You can then use the functions from caosadvancedtools.models.data_model.DataModel to synchronize +the model with a CaosDB instance, e.g.: + +.. code-block:: python + + model.sync_data_model() diff --git a/tox.ini b/tox.ini index 3d7f652203ed0caf9cdfaebbb159784e6f9b2835..dde34b987b9b08bfdfc51a06dd46a9a0e0494f28 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,16 @@ [tox] -envlist=py36, py37, py38, py39 +envlist=py36, py37, py38, py39, py310 skip_missing_interpreters = true [testenv] deps=nose pandas - caosdb + git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev pytest pytest-cov openpyxl xlrd == 1.2 + h5py commands=py.test --cov=caosadvancedtools -vv {posargs} + +[flake8] +max-line-length=100 diff --git a/unittests/create_dummy_hdf5file.py b/unittests/create_dummy_hdf5file.py new file mode 100644 index 0000000000000000000000000000000000000000..ce04030154c70e1d533f67aeec12321b86ddf305 --- /dev/null +++ b/unittests/create_dummy_hdf5file.py @@ -0,0 +1,70 @@ +import h5py +import numpy as np + + +def create_hdf5_file(filename="hdf5_dummy_file.hdf5"): + ''' + Create a dummy hdf5-file for testing. + Structure: + + root:-->root + group_level1_a:-->group + group_level2_aa:-->group + group_level3_aaa:-->group + level3_aaa_floats_2d = float64(100x100) + group_level3_aab:-->group + group_level2_ab:-->group + group_level3_aba:-->group + level3_aba_floats_2d = float64(100x100) + group_level2_ac:-->group + level2_ac_integers_2d = int32(100x100) + group_level1_b:-->group + group_level2_ba:-->group + level2_ba_integers_2d = int32(100x100) + level1_b_floats = float64(10000) + group_level1_c:-->group + level1_c_floats = float64(10000) + root_integers = int32(10000) + ''' + + with h5py.File(filename, mode="w") as hdf5: + '''Create toplevel groups''' + group_lvl1_a = hdf5.create_group("group_level1_a") + group_lvl1_b = hdf5.create_group("group_level1_b") + group_lvl1_c = hdf5.create_group("group_level1_c") + + '''Create level 2 groups''' + group_lvl2_aa = group_lvl1_a.create_group("group_level2_aa") + group_lvl2_ab = group_lvl1_a.create_group("group_level2_ab") + group_lvl2_ac = group_lvl1_a.create_group("group_level2_ac") + group_lvl2_ba = group_lvl1_b.create_group("group_level2_ba") + + '''Create level 3 groups''' + group_lvl3_aaa = group_lvl2_aa.create_group("group_level3_aaa") + group_lvl3_aab = group_lvl2_aa.create_group("group_level3_aab") + group_lvl3_aba = group_lvl2_ab.create_group("group_level3_aba") + + '''Create datasets''' + integers = np.arange(10000) + floats = np.arange(0, 1000, 0.1) + integers_2d = np.diag(np.arange(100)) + floats_2d = np.eye(100) + data_root = hdf5.create_dataset("root_integers", data=integers) + data_lvl1_b = group_lvl1_b.create_dataset("level1_b_floats", data=floats) + data_lvl2_c = group_lvl1_c.create_dataset("level1_c_floats", data=floats) + data_lvl2_ac = group_lvl2_ac.create_dataset("level2_ac_integers_2d", data=integers_2d) + data_lvl2_ba = group_lvl2_ba.create_dataset("level2_ba_integers_2d", data=integers_2d) + data_lvl3_aaa = group_lvl3_aaa.create_dataset("level3_aaa_floats_2d", data=floats_2d) + data_lvl3_aba = group_lvl3_aba.create_dataset("level3_aba_floats_2d", data=floats_2d) + + '''Create attributes''' + attr_group_lvl1_a = group_lvl1_a.attrs.create("attr_group_lvl1_a", 1) + attr_group_lvl2_aa = group_lvl2_aa.attrs.create("attr_group_lvl2_aa", -2) + attr_group_lvl3_aaa = group_lvl3_aaa.attrs.create("attr_group_lvl3_aaa", 1.0) + attr_data_root = data_root.attrs.create("attr_data_root", -2.0) + attr_data_lvl2_ac = data_lvl2_ac.attrs.create("attr_data_lvl2_ac", np.diag(np.arange(10))) + attr_data_lvl3_aaa = data_lvl3_aaa.attrs.create("attr_data_lvl3_aaa", np.eye(10)) + + +if __name__ == "__main__": + create_hdf5_file() diff --git a/unittests/create_filetree.py b/unittests/create_filetree.py index 6f95618dbc834c3bc140163efdc90aa51c8d5248..f80b9681163859027bb8f8c7cd6b1387bf2d378d 100644 --- a/unittests/create_filetree.py +++ b/unittests/create_filetree.py @@ -42,8 +42,6 @@ def main(folder, dry=True): if not dry: os.mkdir(series_path) for date in [datetime.today()-timedelta(days=i)-timedelta(weeks=50*ii) for i in range(10)]: - #import IPython - # IPython.embed() exp_path = os.path.join(series_path, "Exp_"+str(date.date())) print("Exp: "+os.path.basename(exp_path)) if not dry: diff --git a/unittests/data/datatypes.xlsx b/unittests/data/datatypes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..34fc4cf43092a68b630e0e04ebc43609b8a0b17b Binary files /dev/null and b/unittests/data/datatypes.xlsx differ diff --git a/unittests/date.xls b/unittests/date.xls new file mode 100644 index 0000000000000000000000000000000000000000..966ad4dc1d04055d75b455c8d0f9a5ac6f36200d Binary files /dev/null and b/unittests/date.xls differ diff --git a/unittests/hdf5_dummy_file.hdf5 b/unittests/hdf5_dummy_file.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..41bfb7ab3bcac19d90fd4f018cdd8118ae806eaf Binary files /dev/null and b/unittests/hdf5_dummy_file.hdf5 differ diff --git a/unittests/json-schema-models/datamodel_atomic_properties.schema.json b/unittests/json-schema-models/datamodel_atomic_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..3828f131180a839d5c9b8bc5aa1a1285717da723 --- /dev/null +++ b/unittests/json-schema-models/datamodel_atomic_properties.schema.json @@ -0,0 +1,24 @@ +[ + { + "title": "Dataset1", + "description": "Some description", + "type": "object", + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "number_prop": { "type": "number", "description": "Some float property" } + }, + "required": [ "title", "number_prop" ] + }, + { + "title": "Dataset2", + "type": "object", + "properties": { + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } + } +] diff --git a/unittests/json-schema-models/datamodel_enum_prop.schema.json b/unittests/json-schema-models/datamodel_enum_prop.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..a14008d141606368519c0caadc30b16a1dc9d16d --- /dev/null +++ b/unittests/json-schema-models/datamodel_enum_prop.schema.json @@ -0,0 +1,16 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "license": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + }, + "number_enum": { + "type": "number", + "enum": [1.1, 2.2, 3.3] + } + }, + "required": ["license"] +} diff --git a/unittests/json-schema-models/datamodel_int_enum_broken.schema.json b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..159b84ac36c26325b59cdd25d2830152c4acdaaa --- /dev/null +++ b/unittests/json-schema-models/datamodel_int_enum_broken.schema.json @@ -0,0 +1,11 @@ +{ + "title": "Dataset", + "description": "Some description", + "type": "object", + "properties": { + "int_enum": { + "type": "integer", + "enum": [1, 2, 3] + } + } +} diff --git a/unittests/json-schema-models/datamodel_list_properties.schema.json b/unittests/json-schema-models/datamodel_list_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..b95f468a1c13f1912266e65f029654077ce6a14e --- /dev/null +++ b/unittests/json-schema-models/datamodel_list_properties.schema.json @@ -0,0 +1,46 @@ +{ + "title": "Dataset", + "description": "Dataset with list (array) properties", + "type": "object", + "properties": { + "keywords": { + "type": "array", + "items": { "type": "string" } + }, + "booleans": { + "type": "array", + "items": { "type": "boolean" } + }, + "integers": { + "type": "array", + "items": { "type": "integer" } + }, + "floats": { + "type": "array", + "items": { "type": "number" } + }, + "datetimes": { + "type": "array", + "items": { "type": "string", "format": "date-time" } + }, + "dates": { + "type": "array", + "items": { "type": "string", "format": "date" } + }, + "reference": { + "type": "array", + "items": { "type": "object", "properties": {} } + }, + "reference_with_name": { + "type": "array", + "items": { "type": "object", "title": "event", "properties": {} } + }, + "license": { + "type": "array", + "items": { + "type": "string", + "enum": ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + } + } + } +} diff --git a/unittests/json-schema-models/datamodel_missing_property_type.schema.json b/unittests/json-schema-models/datamodel_missing_property_type.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..eac3cc563df587568c4e9610d72618610566beef --- /dev/null +++ b/unittests/json-schema-models/datamodel_missing_property_type.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "method": { "description": "Missing property type" } + } +} diff --git a/unittests/json-schema-models/datamodel_name.schema.json b/unittests/json-schema-models/datamodel_name.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e86028c36172d27a4523f2c08db1b413b5c19f --- /dev/null +++ b/unittests/json-schema-models/datamodel_name.schema.json @@ -0,0 +1,12 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "name": { "type": "string", "description": "Name of this dataset" }, + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } +} diff --git a/unittests/json-schema-models/datamodel_name_wrong_type.schema.json b/unittests/json-schema-models/datamodel_name_wrong_type.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..1988ad3d8cd613def36df69f5ad30fedd0a26e48 --- /dev/null +++ b/unittests/json-schema-models/datamodel_name_wrong_type.schema.json @@ -0,0 +1,12 @@ +{ + "title": "Dataset", + "type": "object", + "properties": { + "name": { "type": "boolean", "description": "Name of this dataset" }, + "date_time": { "type": "string", "format": "date-time" }, + "date": { "type": "string", "format": "date" }, + "integer": { "type": "integer", "description": "Some integer property" }, + "boolean": { "type": "boolean" }, + "number_prop": { "type": "number", "description": "Some float property" } + } +} diff --git a/unittests/json-schema-models/datamodel_references.schema.json b/unittests/json-schema-models/datamodel_references.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..6b79a9bcdbbd8beaf9974a600e9c5ff30cb513f4 --- /dev/null +++ b/unittests/json-schema-models/datamodel_references.schema.json @@ -0,0 +1,24 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + "properties": { + "event": { + "type": "object", + "properties": { + "longitude": { + "type": "number" + }, + "latitude": { + "type": "number" + }, + "location": { + "type": "string", + "description": "geographical location (e.g., North Sea; Espoo, Finland)" + } + }, + "required": ["longitude", "latitude"] + } + }, + "required": ["event"] +} diff --git a/unittests/json-schema-models/datamodel_required_no_list.schema.json b/unittests/json-schema-models/datamodel_required_no_list.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..f3697a71320bc8baf05156bec2c71f3915378654 --- /dev/null +++ b/unittests/json-schema-models/datamodel_required_no_list.schema.json @@ -0,0 +1,7 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "required": "Dataset" +} diff --git a/unittests/json-schema-models/datamodel_string_properties.schema.json b/unittests/json-schema-models/datamodel_string_properties.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..62bc0a2a4250050e5433038bf61e7c9692bb0200 --- /dev/null +++ b/unittests/json-schema-models/datamodel_string_properties.schema.json @@ -0,0 +1,14 @@ +{ + "title": "Dataset", + "description": "", + "type": "object", + + "properties": { + "title": { "type": "string", "description": "full dataset title" }, + "campaign": { "type": "string", "description": "FIXME" }, + "method": { "type": "string", "description": "FIXME" }, + "titled": { "title": "The title", "type": "string", "description": "None" } + }, + + "required": ["title"] +} diff --git a/unittests/test.csv b/unittests/test.csv new file mode 100644 index 0000000000000000000000000000000000000000..a29679afce78089f3cdd4e5e388262456668cd90 --- /dev/null +++ b/unittests/test.csv @@ -0,0 +1,3 @@ +temperature [°C] ,depth +234.4,3.0 +344.6,5.1 diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index 1bad508a2c22cf1ee1e29be11c3342d2115dd5a2..7055bc7c51962c0cbc487f29bcdacb391218a7d3 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -48,13 +48,14 @@ class ExampleCFoodMeal(AbstractFileCFood, CMeal): CMeal.__init__(self) @classmethod - def match_item(cls, item): + def match_item(cls, path): """ standard match_match, but returns False if a suitable cfood exists """ - if cls.has_suitable_cfood(item): + print(path) + if cls.has_suitable_cfood(path): return False - return re.match(cls.get_re(), item) is not None + return re.match(cls.get_re(), path) is not None def looking_for(self, crawled_file): """ standard looking_for, but returns True if the file matches all @@ -112,6 +113,36 @@ class CFoodReTest(unittest.TestCase): self.assertTrue(SimpleCFood.match_item("hallo")) self.assertFalse(SimpleCFood.match_item("allo")) + def test_extensions(self): + """Test the RE generation.""" + empty_extensions = [] + extensions = ["foo", "bar"] + + self.assertIsNone(AbstractFileCFood.re_from_extensions(empty_extensions)) + self.assertIsNotNone(SimpleCFood.re_from_extensions(extensions)) + + class ExtCFood(AbstractFileCFood): + + @staticmethod + def get_re(): + return AbstractFileCFood.re_from_extensions(extensions) + create_identifiables = None + update_identifiables = None + + # test which paths are matched + print(ExtCFood.re_from_extensions(extensions)) + self.assertTrue(ExtCFood.match_item("hello/world.foo")) + self.assertTrue(ExtCFood.match_item("hello/world.bar")) + self.assertFalse(ExtCFood.match_item("hello/world.baz")) + self.assertFalse(ExtCFood.match_item("hello/world.foo ")) # Mind the space. + self.assertFalse(ExtCFood.match_item("hello/world.foobar")) + self.assertFalse(ExtCFood.match_item("hello/world.foo|bar")) + self.assertFalse(ExtCFood.match_item("hello/world.fobar")) + self.assertFalse(ExtCFood.match_item("hello/world.fooar")) + + # Test stored extension + self.assertEqual(ExtCFood("hello/world.foo").match["ext"], "foo") + class InsertionTest(unittest.TestCase): def test_contained_in_list(self): @@ -160,6 +191,35 @@ class InsertionTest(unittest.TestCase): value=new_int, to_be_updated=to_be_updated) assert to_be_updated[0] is entity + """Test properties with lists""" + rec1 = db.Record(id=12345) + rec1.add_property("Exp", value=[98765], datatype=db.LIST("Exp")) + rec2 = db.Record(id=98765) + update = [] + # compare Entity with id + assure_has_property(rec1, "Exp", [rec2], to_be_updated=update) + assert len(update) == 0 + update = [] + # compare id with id + assure_has_property(rec1, "Exp", [98765], to_be_updated=update) + assert len(update) == 0 + update = [] + # compare id with different list of ids + assure_has_property(rec1, "Exp2", [98765, 444, 555], + to_be_updated=update) + assert len(update) == 1 + + rec = db.Record(id=666666) + rec3 = db.Record(id=777777) + rec.add_property("Exp", value=[888888, rec3], datatype=db.LIST("Exp")) + rec2 = db.Record(id=888888) + update = [] + # compare id and Entity with id and Entity + # i.e. check that conversion from Entity to id works in both + # directions. + assure_has_property(rec, "Exp", [rec2, 777777], to_be_updated=update) + assert len(update) == 0 + def test_property_is(self): """Test properties with string, int, float, and Boolean values""" entity = db.Record() diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index f603031eddbcf1e10c2842ec4e89ca591700b94f..64bf291c1181d901ac39a4d2535dcd6eddf39f70 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -45,3 +45,7 @@ class CrawlerTest(unittest.TestCase): datatype=db.LIST("RT2")) qs = Crawler.create_query_for_identifiable(ident) assert qs == "FIND Record RT WITH references 2345 AND references 234567 " + ident = db.Record() + ident.add_parent(name="RT") + self.assertRaises(ValueError, Crawler.create_query_for_identifiable, + ident) diff --git a/unittests/test_data_model.py b/unittests/test_data_model.py index 074239399002833e8500af6369f1b2c7bcc8a3ac..159adfca1d589bb092b6f59110828b5868401e25 100644 --- a/unittests/test_data_model.py +++ b/unittests/test_data_model.py @@ -1,19 +1,11 @@ import unittest import caosdb as db -import pytest from caosadvancedtools.models.data_model import DataModel class DataModelTest(unittest.TestCase): - def tearDown(self): - try: - tests = db.execute_query("FIND test*") - tests.delete() - except Exception: - pass - def test_collecting(self): maintained = {"one": db.RecordType(name="TestRecord").add_property( name="testproperty"), @@ -24,17 +16,6 @@ class DataModelTest(unittest.TestCase): assert "TestRecord" in names assert "testproperty" in names - # TODO this seems to require integration test - @pytest.mark.xfail - def test_get_existing_entities(self): - db.RecordType(name="TestRecord").insert() - c = db.Container().extend([ - db.Property(name="testproperty"), - db.RecordType(name="TestRecord")]) - exist = DataModel.get_existing_entities(c) - assert len(exist) == 1 - assert exist[0].name == "TestRecord" - def test_sync_ids_by_name(self): container = db.Container().extend([db.RecordType(name="TestRecord"), db.RecordType(name="TestRecord2"), diff --git a/unittests/test_generic_analysis.py b/unittests/test_generic_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..a1077b97ec58f80c8534c89d5fa5f57d8d815cb9 --- /dev/null +++ b/unittests/test_generic_analysis.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +module description +""" + +import caosdb as db +from caosadvancedtools.serverside.generic_analysis import \ + check_referenced_script + +from test_utils import BaseMockUpTest + + +class TestGAnalysisNoFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><Record name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # missing scripts + self.assertIsNone(check_referenced_script(db.Record())) + # wrong datatype + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.TEXT))) + # wrong value + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="hallo"))) + # no file + self.assertIsNone(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234"))) + + +class TestGAnalysisFile(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="script.py" path="/some/path/script.py' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + + def test_check_referenced_script(self): + # all correct + self.assertEqual(check_referenced_script(db.Record().add_property( + "scripts", datatype=db.REFERENCE, value="1234")), "script.py") diff --git a/unittests/test_h5.py b/unittests/test_h5.py new file mode 100644 index 0000000000000000000000000000000000000000..360d4b28938492d0f2af6d696e39dffb1cc3fead --- /dev/null +++ b/unittests/test_h5.py @@ -0,0 +1,190 @@ +import unittest +from tempfile import NamedTemporaryFile + +import caosdb as db +import caosdb.apiutils +import h5py +import numpy as np +from caosadvancedtools.cfoods import h5 +from caosadvancedtools.cfoods.h5 import h5_attr_to_property + +from create_dummy_hdf5file import create_hdf5_file + +ENTS = { + 101: db.Record(id=101), + 102: db.Record(id=102), + 103: db.Record(id=103).add_property("test", value=101, + datatype=db.REFERENCE), +} + + +def dummy_get(eid): + return ENTS[eid] + + +class H5CFoodTest(unittest.TestCase): + def setUp(self): + self.h5file = NamedTemporaryFile(delete=False, suffix=".h5") + self.h5file.close() + create_hdf5_file(self.h5file.name) + self.h5obj = h5py.File(self.h5file.name, mode="a") + + def test_create_record_records(self): + result = h5.H5CFood.create_structure(self.h5obj) + + record_list = [] + parents = ['group_level1_a', 'group_level1_b', 'group_level1_c', 'root_integers'] + + for i in parents: + record_list.append(db.Record().add_parent(name=i)) + + found_parents = [] + + for ent in [p.value for p in result.properties]: + if ent.parents[0].name == 'group_level1_a': + found_parents.append('group_level1_a') + self.assertTrue(ent.get_property("group_level2_aa") is not None) + self.assertTrue(ent.get_property("group_level1_a") is None) + elif ent.parents[0].name == 'group_level1_b': + found_parents.append('group_level1_b') + pass + elif ent.parents[0].name == 'group_level1_c': + found_parents.append('group_level1_c') + pass + elif ent.parents[0].name == 'root_integers': + found_parents.append('root_integers') + pass + + for p in parents: + self.assertTrue(p in found_parents) + + for i in range(len(result.properties)): + for j in result.properties[i].value.get_parents(): + for k in record_list[i].get_parents(): + self.assertEqual(j.name, k.name) + + result1 = h5.H5CFood.create_structure(self.h5obj["group_level1_a"]) + + for i in result1.get_parents(): + self.assertEqual(i.name, "group_level1_a") + + result2 = h5.H5CFood.create_structure(self.h5obj["group_level1_a/group_level2_aa"]) + + for i in result2.get_parents(): + self.assertEqual(i.name, "group_level2_aa") + + def test_collect_existing_structure(self): + # TODO this does probably break the code: The function will not be + # restored correctly. + # Change it to use the BaseMockUpTest + real_retrieve = caosdb.apiutils.retrieve_entity_with_id + caosdb.apiutils.retrieve_entity_with_id = dummy_get + + # should run without problem + h5.collect_existing_structure(db.Record(), db.Record(id=234), h5.EntityMapping()) + + # test with retrieval: both Records have one test Property with one + # value -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=101, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertTrue(em.to_existing[r_child._cuid] is ENTS[101]) + self.assertTrue(em.to_target[101] is r_child) + + # test with retrieval: the existing Record has another Property + # -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test_other", value=101, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing, {}) + self.assertEqual(em.to_target, {}) + + # test with retrieval: both Records have one test Property; the + # existing is missing the value -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=None, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing, {}) + self.assertEqual(em.to_target, {}) + + # test with retrieval: both Records have one test Property with + # multiple values -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=[101, 102], datatype=db.LIST(db.REFERENCE)) + r_target = db.Record() + r_child = db.Record() + r_child2 = db.Record() + r_target.add_property("test", value=[r_child, r_child2], + datatype=db.LIST(db.REFERENCE)) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing[r_child._cuid], ENTS[101]) + self.assertEqual(em.to_existing[r_child2._cuid], ENTS[102]) + self.assertEqual(em.to_target[101], r_child) + self.assertEqual(em.to_target[102], r_child2) + + # test with retrieval: both Records have one test Property with one + # value; Add another recursion level -> The referenced Entities are matched + r_exist = db.Record(id=234) + r_exist.add_property("test", value=103, datatype=db.REFERENCE) + r_target = db.Record() + r_child = db.Record() + r_child2 = db.Record() + r_target.add_property("test", value=r_child, datatype=db.REFERENCE) + r_child.add_property("test", value=r_child2, datatype=db.REFERENCE) + em = h5.EntityMapping() + h5.collect_existing_structure(r_target, r_exist, em) + self.assertEqual(em.to_existing[r_child._cuid], ENTS[103]) + self.assertEqual(em.to_target[103], r_child) + self.assertEqual(em.to_existing[r_child2._cuid], ENTS[101]) + self.assertEqual(em.to_target[101], r_child2) + + caosdb.apiutils.retrieve_entity_with_id = real_retrieve + + def test_h5_attr_to_property(self): + + test_int: int = 1 + test_integer = np.int_(1) + test_float = np.float_(1.0) + test_str = "Test" + test_complex: complex = 2+3j + self.assertRaises(NotImplementedError, h5_attr_to_property, test_int) # only numpy-integers processed? + self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(test_integer)) + self.assertTupleEqual((1.0, db.DOUBLE), h5_attr_to_property(test_float)) + self.assertTupleEqual(("Test", db.TEXT), h5_attr_to_property(test_str)) + self.assertTupleEqual((2+3j, db.TEXT), h5_attr_to_property(test_complex)) + # strings are often represented using a binary format + self.assertTupleEqual(("yeti", db.TEXT), h5_attr_to_property( + np.array(["yeti"], dtype=h5py.string_dtype(r'utf-8', 8))[0])) + + test_integer_1d = np.arange(10) + test_float_1d = np.arange(0, 1, 0.1) + test_str_1d = np.array(["a", "b", "c"]) + self.assertTrue((np.arange(10) == h5_attr_to_property(test_integer_1d)[0]).all()) + self.assertTrue(db.LIST(db.INTEGER) == h5_attr_to_property(test_integer_1d)[1]) + self.assertTrue((np.arange(0, 1, 0.1) == h5_attr_to_property(test_float_1d)[0]).all()) + self.assertTrue(db.LIST(db.DOUBLE) == h5_attr_to_property(test_float_1d)[1]) + self.assertTrue((np.array(["a", "b", "c"]) == h5_attr_to_property(test_str_1d)[0]).all()) + self.assertTrue(db.LIST(db.TEXT) == h5_attr_to_property(test_str_1d)[1]) + + test_integers_2d = np.diag(np.arange(100)) + test_floats_2d = np.eye(100) + self.assertTupleEqual((None, None), h5_attr_to_property(test_integers_2d)) + self.assertTupleEqual((None, None), h5_attr_to_property(test_floats_2d)) + + # Test scalar values given as np.array + self.assertTupleEqual((1, db.INTEGER), h5_attr_to_property(np.array(1))) + self.assertTupleEqual((1.123, db.DOUBLE), h5_attr_to_property(np.array(1.123))) + self.assertTupleEqual(('Hello World', db.TEXT), h5_attr_to_property(np.array("Hello World"))) diff --git a/unittests/test_json_schema_model_parser.py b/unittests/test_json_schema_model_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..7f47890f413dce5511cd498fe802e03a1af3be70 --- /dev/null +++ b/unittests/test_json_schema_model_parser.py @@ -0,0 +1,358 @@ +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# Copyright (C) 2022 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see <https://www.gnu.org/licenses/>. +# + +# @review Daniel Hornung 2022-02-18 + +import os +import pytest + +import caosdb as db +from caosadvancedtools.models.parser import (parse_model_from_json_schema, + JsonSchemaDefinitionError) + +FILEPATH = os.path.join(os.path.dirname( + os.path.abspath(__file__)), 'json-schema-models') + + +def test_rt_with_string_properties(): + """Test datamodel parsing of datamodel_string_properties.schema.json""" + # @author Florian Spreckelsen + # @date 2022-02-17 + + model = parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_string_properties.schema.json")) + assert "Dataset" in model + dataset_rt = model["Dataset"] + assert isinstance(dataset_rt, db.RecordType) + assert dataset_rt.name == "Dataset" + assert dataset_rt.description == "" + assert len(dataset_rt.get_properties()) == 4 + + assert dataset_rt.get_property("title") is not None + assert dataset_rt.get_property("campaign") is not None + assert dataset_rt.get_property("method") is not None + + assert dataset_rt.get_property("The title") is not None + assert dataset_rt.get_property("titled") is None + + title_prop = dataset_rt.get_property("title") + assert title_prop.datatype == db.TEXT + assert dataset_rt.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = dataset_rt.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert dataset_rt.get_importance(campaign_prop.name) == db.RECOMMENDED + + method_prop = dataset_rt.get_property("method") + assert method_prop.datatype == db.TEXT + assert dataset_rt.get_importance(method_prop.name) == db.RECOMMENDED + + +def test_datamodel_with_atomic_properties(): + """Test read-in of two separate record types with atomic-typed properties.""" + # @author Florian Spreckelsen + # @date 2022-02-18 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_atomic_properties.schema.json")) + assert "Dataset1" in model + assert "Dataset2" in model + + rt1 = model["Dataset1"] + assert isinstance(rt1, db.RecordType) + assert rt1.name == "Dataset1" + assert rt1.description == "Some description" + assert len(rt1.get_properties()) == 3 + + assert rt1.get_property("title") is not None + assert rt1.get_property("campaign") is not None + assert rt1.get_property("number_prop") is not None + + title_prop = rt1.get_property("title") + assert title_prop.datatype == db.TEXT + assert rt1.get_importance(title_prop.name) == db.OBLIGATORY + + campaign_prop = rt1.get_property("campaign") + assert campaign_prop.datatype == db.TEXT + assert rt1.get_importance(campaign_prop.name) == db.RECOMMENDED + + float_prop = rt1.get_property("number_prop") + assert float_prop.datatype == db.DOUBLE + assert rt1.get_importance(float_prop.name) == db.OBLIGATORY + + rt2 = model["Dataset2"] + assert isinstance(rt2, db.RecordType) + assert rt2.name == "Dataset2" + assert not rt2.description + assert len(rt2.get_properties()) == 5 + + date_prop = rt2.get_property("date") + assert date_prop.datatype == db.DATETIME + + datetime_prop = rt2.get_property("date_time") + assert date_prop.datatype == db.DATETIME + + int_prop = rt2.get_property("integer") + assert int_prop.datatype == db.INTEGER + assert int_prop.description == "Some integer property" + + bool_prop = rt2.get_property("boolean") + assert bool_prop.datatype == db.BOOLEAN + + float_prop2 = rt2.get_property("number_prop") + assert float_prop.datatype == float_prop2.datatype + + +def test_required_no_list(): + """Exception must be raised when "required" is not a list.""" + # @author Daniel Hornung + # @date 2022-02-18 + + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_required_no_list.schema.json")) + assert "'Dataset' is not of type 'array'" in str(err.value) + + +def test_missing_property_type(): + """Exception must be raised when "type" is missing.""" + with pytest.raises(JsonSchemaDefinitionError) as err: + parse_model_from_json_schema( + os.path.join(FILEPATH, + "datamodel_missing_property_type.schema.json")) + assert "`type` is missing" in str(err.value) + + +def test_enum(): + """Enums are represented in references to records of a specific type.""" + # @author Florian Spreckelsen + # @date 2022-03-16 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_enum_prop.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + for name in ["Dataset", "license"] + licenses: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("license") is not None + assert model["Dataset"].get_property("license").is_reference() + assert model["Dataset"].get_property("license").datatype.name == "license" + assert isinstance(model["license"], db.RecordType) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) + + # Also allow enums with non-string types + number_enums = ["1.1", "2.2", "3.3"] + for name in ["number_enum"] + number_enums: + assert name in model + + assert isinstance(model["number_enum"], db.RecordType) + assert model["Dataset"].get_property("number_enum") is not None + assert model["Dataset"].get_property("number_enum").is_reference() + assert model["Dataset"].get_property( + "number_enum").datatype.name == "number_enum" + + for name in number_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["number_enum"]) + + +@pytest.mark.xfail(reason="Don't allow integer enums until https://gitlab.indiscale.com/caosdb/src/caosdb-server/-/issues/224 has been fixed") +def test_int_enum(): + """Check an enum property with type: integer""" + # @author Florian Spreckelsen + # @date 2022-03-22 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_int_enum_broken.schema.json")) + int_enums = ["1", "2", "3"] + for name in ["Dataset", "int_enum"] + int_enums: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("int_enum") is not None + assert model["Dataset"].get_property("int_enum").is_reference + assert model["Dataset"].get_property( + "int_enum").datatype.name == "int_enum" + assert isinstance(model["int_enum"], db.RecordType) + + for name in int_enums: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["int_enum"]) + + +def test_references(): + """Test reference properties""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_references.schema.json")) + for name in ["Dataset", "event", "longitude", "latitude", "location"]: + assert name in model + + assert isinstance(model["Dataset"], db.RecordType) + assert model["Dataset"].get_property("event") is not None + assert model["Dataset"].get_importance("event") == db.OBLIGATORY + assert model["Dataset"].get_property("event").is_reference() + assert model["Dataset"].get_property("event").datatype.name == "event" + + assert isinstance(model["event"], db.RecordType) + assert model["event"].get_property("longitude") is not None + assert model["event"].get_importance("longitude") == db.OBLIGATORY + assert model["event"].get_property("longitude").datatype == db.DOUBLE + + assert model["event"].get_property("latitude") is not None + assert model["event"].get_importance("latitude") == db.OBLIGATORY + assert model["event"].get_property("latitude").datatype == db.DOUBLE + + assert model["event"].get_property("location") is not None + assert model["event"].get_importance("location") == db.RECOMMENDED + assert model["event"].get_property("location").datatype == db.TEXT + + assert isinstance(model["longitude"], db.Property) + assert model["longitude"].datatype == db.DOUBLE + + assert isinstance(model["latitude"], db.Property) + assert model["latitude"].datatype == db.DOUBLE + + assert isinstance(model["location"], db.Property) + assert model["location"].datatype == db.TEXT + assert model["location"].description == "geographical location (e.g., North Sea; Espoo, Finland)" + + +def test_list(): + """Test list properties with all possible datatypes.""" + # @author Florian Spreckelsen + # @date 2022-03-17 + + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_list_properties.schema.json")) + licenses = ["CC-BY", "CC-BY-SA", "CC0", "restricted access"] + names = ["Dataset", "keywords", "booleans", "integers", "floats", + "datetimes", "dates", "reference", "reference_with_name", "event", + "license"] + for name in names + licenses: + assert name in model + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("keywords") is not None + assert dataset_rt.get_property("keywords").datatype == db.LIST(db.TEXT) + assert isinstance(model["keywords"], db.Property) + assert model["keywords"].name == "keywords" + assert model["keywords"].datatype == db.LIST(db.TEXT) + + assert dataset_rt.get_property("booleans") is not None + assert dataset_rt.get_property("booleans").datatype == db.LIST(db.BOOLEAN) + assert isinstance(model["booleans"], db.Property) + assert model["booleans"].name == "booleans" + assert model["booleans"].datatype == db.LIST(db.BOOLEAN) + + assert dataset_rt.get_property("integers") is not None + assert dataset_rt.get_property("integers").datatype == db.LIST(db.INTEGER) + assert isinstance(model["integers"], db.Property) + assert model["integers"].name == "integers" + assert model["integers"].datatype == db.LIST(db.INTEGER) + + assert dataset_rt.get_property("floats") is not None + assert dataset_rt.get_property("floats").datatype == db.LIST(db.DOUBLE) + assert isinstance(model["floats"], db.Property) + assert model["floats"].name == "floats" + assert model["floats"].datatype == db.LIST(db.DOUBLE) + + assert dataset_rt.get_property("datetimes") is not None + assert dataset_rt.get_property( + "datetimes").datatype == db.LIST(db.DATETIME) + assert isinstance(model["datetimes"], db.Property) + assert model["datetimes"].name == "datetimes" + assert model["datetimes"].datatype == db.LIST(db.DATETIME) + + assert dataset_rt.get_property("dates") is not None + assert dataset_rt.get_property( + "dates").datatype == db.LIST(db.DATETIME) + assert isinstance(model["dates"], db.Property) + assert model["dates"].name == "dates" + assert model["dates"].datatype == db.LIST(db.DATETIME) + + # Simple reference list property + assert dataset_rt.get_property("reference") is not None + assert dataset_rt.get_property("reference").is_reference() + assert dataset_rt.get_property( + "reference").datatype == db.LIST("reference") + assert isinstance(model["reference"], db.RecordType) + assert model["reference"].name == "reference" + assert dataset_rt.get_property( + "reference").datatype == db.LIST(model["reference"]) + + # Reference list with name + assert dataset_rt.get_property("reference_with_name") is not None + assert dataset_rt.get_property("reference_with_name").is_reference() + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST("event") + assert isinstance(model["event"], db.RecordType) + assert model["event"].name == "event" + assert dataset_rt.get_property( + "reference_with_name").datatype == db.LIST(model["event"]) + assert isinstance(model["reference_with_name"], db.Property) + assert model["reference_with_name"].name == "reference_with_name" + assert model["reference_with_name"].datatype == db.LIST(model["event"]) + + # References to enum types + assert dataset_rt.get_property("license") is not None + assert dataset_rt.get_property("license").is_reference() + assert dataset_rt.get_property("license").datatype == db.LIST("license") + assert isinstance(model["license"], db.RecordType) + assert model["license"].name == "license" + assert dataset_rt.get_property( + "license").datatype == db.LIST(model["license"]) + + for name in licenses: + assert isinstance(model[name], db.Record) + assert model[name].name == name + assert len(model[name].parents) == 1 + assert model[name].has_parent(model["license"]) + + +def test_name_property(): + model = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_name.schema.json")) + + dataset_rt = model["Dataset"] + assert dataset_rt.get_property("name") is None + assert "name" not in model + + with pytest.raises(JsonSchemaDefinitionError) as err: + broken = parse_model_from_json_schema(os.path.join( + FILEPATH, "datamodel_name_wrong_type.schema.json")) + assert str(err.value).startswith( + "The 'name' property must be string-typed, otherwise it cannot be identified with CaosDB's " + "name property.") diff --git a/unittests/test_result_table_cfood.py b/unittests/test_result_table_cfood.py new file mode 100644 index 0000000000000000000000000000000000000000..3341a2394cc9ef15ae172bb8992445d87c60d063 --- /dev/null +++ b/unittests/test_result_table_cfood.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2018 Research Group Biomedical Physics, +# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +test module for ResultTableCFood +""" + + +import os +import re +import unittest + +import caosdb as db +from caosadvancedtools.scifolder.result_table_cfood import ResultTableCFood + + +class CFoodTest(unittest.TestCase): + def test_re(self): + self.assertIsNotNone(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Hallo.csv").group("recordtype"), + "Hallo") + self.assertIsNotNone(re.match(ResultTableCFood.table_re, + "result_table_Cool RecordType.csv")) + self.assertEqual(re.match(ResultTableCFood.table_re, "result_table_Cool RecordType.csv").group("recordtype"), + "Cool RecordType") + self.assertIsNone(re.match(ResultTableCFood.table_re, "result_tableCool RecordType.csv")) + + self.assertIsNotNone(re.match(ResultTableCFood.property_name_re, + "temperature [C]")) + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("pname"), + "temperature") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [C]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature [ C ]").group("unit"), "C") + self.assertEqual(re.match(ResultTableCFood.property_name_re, + "temperature").group("pname"), "temperature") + + def test_ident(self): + rtc = ResultTableCFood(os.path.join(os.path.dirname(__file__), "test.csv")) + rtc.match = re.match(ResultTableCFood.get_re(), + "/ExperimentalData/2010_TestProject/2019-02-03_something/result_table_RT.csv") + rtc.create_identifiables() + rtc.update_identifiables() diff --git a/unittests/test_structure_mapping.py b/unittests/test_structure_mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..5cc4114fc7f92c580f53dd8855bda659082e2b46 --- /dev/null +++ b/unittests/test_structure_mapping.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 IndiScale GmbH <www.indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# Copyright (C) 2021 Alexander Kreft <akreft@trineo.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import unittest +from os import name + +import caosdb as db +from caosadvancedtools.structure_mapping import (EntityMapping, + collect_existing_structure) +from caosdb.common import datatype + + +class structureMappingTest(unittest.TestCase): + def test_Entitymapping(self): + ex = db.Record(id=100) # existing Record + tar = db.Record() # target Record + em = EntityMapping() + em.add(tar, ex) + + for key, val in em.to_existing.items(): + self.assertEqual(key, tar._cuid) + self.assertEqual(val, ex) + + for key, val in em.to_target.items(): + self.assertEqual(key, ex.id) + self.assertEqual(val, tar) + + def test_collect_existing_structure(self): + emap = EntityMapping() + reca1 = db.Record(name="Animals", id=100) + reca2 = db.Record(name="Dogs", id=200) + reca3 = db.Record(name="Husky", id=300) + reca1.add_property(id=101, name="Cute Animals", datatype=db.REFERENCE, value=reca2) + reca2.add_property(id=201, name="Cute Dogs", datatype=db.REFERENCE, value=reca3) + + recb1 = db.Record(name="Animals") + recb2 = db.Record(name="Dogs") + recb3 = db.Record(name="Husky") + recb1.add_property(name="Cute Animals", datatype=db.REFERENCE, value=recb2) + recb2.add_property(name="Cute Dogs", datatype=db.REFERENCE, value=recb3) + + collect_existing_structure(recb1, reca1, emap) + + # Test if the two dicts of the entity mapping correctly depend on each other + + for i in emap.to_existing.keys(): + self.assertEqual(i, emap.to_target[emap.to_existing[i].id]._cuid) + + for j in emap.to_target.keys(): + self.assertEqual(j, emap.to_existing[emap.to_target[j]._cuid].id) + + # Test if only the right Properties are in the dicts + self.assertTrue((reca2 in emap.to_existing.values()) and + (reca3 in emap.to_existing.values()) and + (reca1 not in emap.to_existing.values())) + self.assertTrue((recb2 in emap.to_target.values()) and + (recb3 in emap.to_target.values()) and + (recb1 not in emap.to_target.values())) + + # Test the correct assignment of the properties + self.assertTrue(reca2 is emap.to_existing[recb2._cuid]) + self.assertTrue(reca3 is emap.to_existing[recb3._cuid]) + + self.assertTrue(recb2 is emap.to_target[reca2.id]) + self.assertTrue(recb3 is emap.to_target[reca3.id]) + + """Test with one additional Property and Properties, which are not Records""" + emap2 = EntityMapping() + recc1 = db.Record(name="Transportation", id=100) + recc2 = db.Record(name="Cars", id=200) + recc3 = db.Record(name="Volvo", id=300) + recc1.add_property(id=101, name="Type", datatype=db.REFERENCE, value=recc2) + recc2.add_property(id=201, name="Brand", datatype=db.REFERENCE, value=recc3) + # other datatypes + recc3.add_property(id=301, name="max_speed", value=200.2, datatype=db.DOUBLE) + recc3.add_property(id=302, name="doors", value=3, datatype=db.INTEGER) + + recd1 = db.Record(name="Transportation") + recd2 = db.Record(name="Cars") + recd3 = db.Record(name="Volvo") + recd4 = db.Record(name="VW") + recd1.add_property(name="Type", datatype=db.REFERENCE, value=recd2) + recd2.add_property(name="Brand", datatype=db.REFERENCE, value=recd3) + # additional Property + recd2.add_property(name="Another Brand", datatype=db.REFERENCE, value=recd4) + # other datatypes + recd3.add_property(name="max_speed", value=200.2, datatype=db.DOUBLE) + recd3.add_property(name="doors", value=3, datatype=db.INTEGER) + recd4.add_property(name="max_speed", value=210.4, datatype=db.DOUBLE) + recd4.add_property(name="doors", value=5, datatype=db.INTEGER) + recd4.add_property(name="Warp engine", value=None) + + collect_existing_structure(recd1, recc1, emap2) + + # Test the correct assignment of the properties + self.assertTrue(recc2 is emap2.to_existing[recd2._cuid]) + self.assertTrue(recc3 is emap2.to_existing[recd3._cuid]) + + self.assertTrue(recd2 is emap2.to_target[recc2.id]) + self.assertTrue(recd3 is emap2.to_target[recc3.id]) + + """ Test, if the Record `Cars` in `target_structure` have one additional Property """ + # Test existing structure + self.assertEqual(len(recc2.get_properties()), 1) # number of properties stay unchanged + self.assertEqual(len(recd2.get_properties()), 2) # number of properties stay unchanged + + for prop_record, prop_em in zip(recc2.get_properties(), recd2.get_properties()): + self.assertTrue(prop_record.value is emap2.to_existing[prop_em.value._cuid]) + + # Test target structure + self.assertEqual(len(recc3.get_properties()), 2) # number of properties stay unchanged + self.assertEqual(len(recd3.get_properties()), 2) # number of properties stay unchanged + + """ Test if the Properties that are not References show up in the entity map """ + for rec_existing, rec_target in zip(emap2.to_existing.values(), emap2.to_target.values()): + self.assertTrue(isinstance(rec_existing, db.Record)) + self.assertTrue(isinstance(rec_target, db.Record)) diff --git a/unittests/test_table_importer.py b/unittests/test_table_importer.py index 6681ed2cd0d79bda9e0e03de7f24c3cb50557395..70f0f87f8706d72c386b18f54b7a9a10908eb477 100644 --- a/unittests/test_table_importer.py +++ b/unittests/test_table_importer.py @@ -27,14 +27,20 @@ import numpy as np import pandas as pd import pytest from caosadvancedtools.datainconsistency import DataInconsistencyError -from caosadvancedtools.table_importer import (XLSImporter, assure_name_format, +from caosadvancedtools.table_importer import (CSVImporter, TableImporter, + TSVImporter, XLSImporter, + assure_name_format, + check_reference_field, date_converter, datetime_converter, incomplete_date_converter, + string_in_list, win_path_converter, win_path_list_converter, yes_no_converter) +from test_utils import BaseMockUpTest + class ConverterTest(unittest.TestCase): def test_yes_no(self): @@ -49,6 +55,16 @@ class ConverterTest(unittest.TestCase): self.assertRaises(ValueError, yes_no_converter, "True") self.assertRaises(ValueError, yes_no_converter, "true") + def test_string_in_list(self): + self.assertEqual("false", string_in_list("false", + ["FALSE", "TRUE"])) + self.assertEqual("FALSE", string_in_list("FALSE", + ["FALSE", "TRUE"], False)) + self.assertRaises(ValueError, string_in_list, "FALSE", []) + self.assertRaises(ValueError, string_in_list, "FALSE", ["fals"]) + self.assertRaises(ValueError, string_in_list, + "FALSE", ["false"], False) + def test_assure_name_format(self): self.assertEqual(assure_name_format("Müstermann, Max"), "Müstermann, Max") @@ -62,32 +78,47 @@ class ConverterTest(unittest.TestCase): ["/this/computer"]) self.assertEqual(win_path_list_converter( r"\this\computer,\this\computer"), - ["/this/computer", "/this/computer"]) + ["/this/computer", "/this/computer"]) - @pytest.mark.xfail def test_datetime(self): test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'d': datetime_converter, - }, obligatory_columns=['d']) + importer = XLSImporter(converters={'d': datetime_converter, + }, obligatory_columns=['d']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 # TODO datatypes are different; fix it assert df.d.iloc[0] == datetime.datetime(1980, 12, 31, 13, 24, 23) - def test_date(self): + def test_date_xlsx(self): + """Test with .xlsx in order to check openpyxl engine.""" test_file = os.path.join(os.path.dirname(__file__), "date.xlsx") - self.importer = XLSImporter(converters={'a': date_converter, - 'b': date_converter, - 'c': partial(date_converter, - fmt="%d.%m.%y") - }, obligatory_columns=['a']) + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) + + xls_file = pd.io.excel.ExcelFile(test_file) + df = xls_file.parse() + df = importer.read_xls(test_file) + assert df.shape[0] == 2 + assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] + + def test_date_xls(self): + """Test with .xls in order to check xlrd engine.""" + test_file = os.path.join(os.path.dirname(__file__), "date.xls") + importer = XLSImporter(converters={'a': date_converter, + 'b': date_converter, + 'c': partial(date_converter, + fmt="%d.%m.%y") + }, obligatory_columns=['a']) xls_file = pd.io.excel.ExcelFile(test_file) df = xls_file.parse() - df = self.importer.read_xls(test_file) + df = importer.read_xls(test_file) assert df.shape[0] == 2 assert df.a.iloc[0] == df.b.iloc[0] == df.c.iloc[0] @@ -110,49 +141,135 @@ class ConverterTest(unittest.TestCase): fmts={"%Y": "%Y"}) -class XLSImporterTest(unittest.TestCase): +class TableImporterTest(unittest.TestCase): def setUp(self): - self.importer = XLSImporter( - converters={'a': str, 'b': int, 'c': float, 'd': yes_no_converter}, + self.importer_kwargs = dict( + converters={'c': float, 'd': yes_no_converter}, + datatypes={'a': str, 'b': int}, obligatory_columns=['a', 'b'], unique_keys=[('a', 'b')]) self.valid_df = pd.DataFrame( [['a', 1, 2.0, 'yes']], columns=['a', 'b', 'c', 'd']) def test_missing_col(self): - df = pd.DataFrame(columns=['a', 'b']) - self.assertRaises(ValueError, self.importer.check_columns, df) - self.importer.check_columns(self.valid_df) + # check missing from converters + df = pd.DataFrame(columns=['a', 'b', 'c']) + importer = TableImporter(**self.importer_kwargs) + self.assertRaises(ValueError, importer.check_columns, df) + # check missing from datatypes + df = pd.DataFrame(columns=['a', 'd', 'c']) + importer = TableImporter(**self.importer_kwargs) + self.assertRaises(ValueError, importer.check_columns, df) + # check valid + importer.check_columns(self.valid_df) def test_missing_val(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + # check valid + importer.check_missing(self.valid_df) + # check invalid df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], [None, 1, 2.0, 'yes'], ['a', np.nan, 2.0, 'yes'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_missing(df) + df_new = importer.check_missing(df) self.assertEqual(df_new.shape[0], 1) self.assertEqual(df_new.shape[1], 4) self.assertEqual(df_new.iloc[0].b, 5) - def test_full(self): - """ test full run with example data """ - tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") - tmp.close() - self.valid_df.to_excel(tmp.name) - self.importer.read_xls(tmp.name) + def test_wrong_datatype(self): + importer = TableImporter(**self.importer_kwargs) + df = pd.DataFrame([[None, np.nan, 2.0, 'yes'], + [5, 1, 2.0, 'yes']], + columns=['a', 'b', 'c', 'd']) + self.assertRaises(DataInconsistencyError, importer.check_datatype, df) def test_unique(self): - self.importer.check_missing(self.valid_df) + importer = TableImporter(**self.importer_kwargs) + importer.check_missing(self.valid_df) df = pd.DataFrame([['b', 5, 3.0, 'no'], ['b', 5, 3.0, 'no']], columns=['a', 'b', 'c', 'd']) - df_new = self.importer.check_unique(df) + df_new = importer.check_unique(df) self.assertEqual(df_new.shape[0], 1) - @pytest.mark.xfail + +class XLSImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".xlsx") + tmp.close() + self.valid_df.to_excel(tmp.name) + importer = XLSImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + def test_raise(self): + importer = XLSImporter(**self.importer_kwargs) tmp = NamedTemporaryFile(delete=False, suffix=".lol") tmp.close() - # TODO ValueError is raised instead - self.assertRaises(DataInconsistencyError, self.importer.read_xls, + self.assertRaises(DataInconsistencyError, importer.read_xls, tmp.name) + + def test_datatypes(self): + """Test datataypes in columns.""" + importer = XLSImporter(converters={}, + obligatory_columns=["float_as_float"], + datatypes={ + "float_as_float": float, + "int_as_float": float, + "int_as_int": int, + } + ) + df = importer.read_xls(os.path.join( + os.path.dirname(__file__), "data", "datatypes.xlsx")) + assert np.issubdtype(df.loc[0, "int_as_float"], float) + + +class CSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".csv") + tmp.close() + self.valid_df.to_csv(tmp.name) + importer = CSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + + +class TSVImporterTest(TableImporterTest): + def test_full(self): + """ test full run with example data """ + tmp = NamedTemporaryFile(delete=False, suffix=".tsv") + tmp.close() + self.valid_df.to_csv(tmp.name, sep="\t") + importer = TSVImporter(**self.importer_kwargs) + importer.read_file(tmp.name) + + +class CountQueryNoneConverterTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # simulate that 0 entity exists + self.entities = ( + '<Response count="0">' + '<Query string="count record" results="0">' + '</Query>' + '</Response>' + ) + + def test_check_reference_field(self): + self.assertRaises(ValueError, check_reference_field, "1232", "Max") + + +class CountQuerySingleConverterTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # simulate that 1 entity exists + self.entities = ( + '<Response count="1">' + '<Query string="count record" results="1">' + '</Query>' + '</Response>' + ) + + def test_check_reference_field(self): + self.assertEqual(check_reference_field("1232", "Max"), + "1232") diff --git a/unittests/test_utils.py b/unittests/test_utils.py index 054d7c99069f294e9975742c1c0261fd7ebc768c..7369931799b00eba5a835458a6fad474de1d9039 100644 --- a/unittests/test_utils.py +++ b/unittests/test_utils.py @@ -32,14 +32,7 @@ from caosdb.connection.mockup import MockUpResponse, MockUpServerConnection from caosdb.exceptions import TransactionError -class ReferencesBaseTest(unittest.TestCase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.entities = ( - '<Response><File name="test.npy" path="/some/path/test.npy' - '" id="1234"/><Query string="find record" results="1">' - '</Query></Response>') - +class BaseMockUpTest(unittest.TestCase): def setUp(self): conlogger = logging.getLogger("connection") conlogger.setLevel(level=logging.ERROR) @@ -70,6 +63,15 @@ class ReferencesBaseTest(unittest.TestCase): return log + +class ReferencesBaseTest(BaseMockUpTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entities = ( + '<Response><File name="test.npy" path="/some/path/test.npy' + '" id="1234"/><Query string="find record" results="1">' + '</Query></Response>') + def test_ref(self): self.clear_log() files = get_referenced_files("test.npy", prefix=None, filename=None, diff --git a/unittests/test_parser.py b/unittests/test_yaml_model_parser.py similarity index 56% rename from unittests/test_parser.py rename to unittests/test_yaml_model_parser.py index 852577a471ba15e3afc163bd8e1e6fd97abd0c0a..a9f072b754618e38237cbf70e74c7944551f1045 100644 --- a/unittests/test_parser.py +++ b/unittests/test_yaml_model_parser.py @@ -1,5 +1,7 @@ import unittest +from datetime import date from tempfile import NamedTemporaryFile +from pytest import raises import caosdb as db from caosadvancedtools.models.parser import (TwiceDefinedException, @@ -15,6 +17,8 @@ def to_file(string): return f.name +# TODO: check purpose of this function... add documentation + def parse_str(string): parse_model_from_yaml(to_file(string)) @@ -68,7 +72,8 @@ RT2: a: """ - self.assertRaises(TwiceDefinedException, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises(TwiceDefinedException, + lambda: parse_model_from_yaml(to_file(string))) def test_typical_case(self): string = """ @@ -103,7 +108,8 @@ RT5: - RT1: - RT2: """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_unknown_kwarg(self): string = """ @@ -111,7 +117,8 @@ RT1: datetime: p1: """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_definition_in_inheritance(self): string = """ @@ -121,7 +128,8 @@ RT2: - RT1: description: "tach" """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises( + ValueError, lambda: parse_model_from_yaml(to_file(string))) def test_inheritance(self): string = """ @@ -168,7 +176,6 @@ RT1: RT5: """ model = parse_model_from_yaml(to_file(string)) - print(model["RT1"]) assert has_property(model["RT1"], "RT2") assert model["RT1"].get_importance("RT2") == db.RECOMMENDED assert has_property(model["RT1"], "RT3") @@ -190,7 +197,7 @@ p1: p2: datatype: TXT """ - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertRaises(ValueError, parse_model_from_yaml, to_file(string)) class ListTest(unittest.TestCase): @@ -200,10 +207,19 @@ RT1: recommended_properties: a: datatype: LIST(RT2) + b: + datatype: LIST(TEXT) + c: + datatype: LIST<TEXT> RT2: """ model = parse_model_from_yaml(to_file(string)) + self.assertTrue(isinstance(model['b'], db.Property)) + self.assertEqual(model['b'].datatype, db.LIST(db.TEXT)) + self.assertTrue(isinstance(model['c'], db.Property)) + self.assertEqual(model['c'].datatype, db.LIST(db.TEXT)) + # This failed for an older version of caosdb-models string_list = """ A: @@ -216,16 +232,8 @@ B: datatype: INTEGER """ model = parse_model_from_yaml(to_file(string_list)) - - def test_dmgd_list(self): - string = """ -RT1: - recommended_properties: - a: - datatype: LIST(T2 -RT2: -""" - self.assertRaises(ValueError, lambda: parse_model_from_yaml(to_file(string))) + self.assertTrue(isinstance(model['A'], db.RecordType)) + self.assertEqual(model['A'].properties[0].datatype, db.LIST("B")) class ParserTest(unittest.TestCase): @@ -274,6 +282,22 @@ A: parse_model_from_string(yaml) self.assertIn("line 3", yde.exception.args[0]) + def test_reference_property(self): + """Test correct creation of reference property using an RT.""" + modeldef = """A: + recommended_properties: + ref: + datatype: LIST<A> +""" + model = parse_model_from_string(modeldef) + self.assertEqual(len(model), 2) + for key, value in model.items(): + if key == "A": + self.assertTrue(isinstance(value, db.RecordType)) + elif key == "ref": + self.assertTrue(isinstance(value, db.Property)) + self.assertEqual(value.datatype, "LIST<A>") + class ExternTest(unittest.TestCase): """TODO Testing the "extern" keyword in the YAML.""" @@ -285,6 +309,8 @@ class ExternTest(unittest.TestCase): class ErrorMessageTest(unittest.TestCase): """Tests for understandable error messages.""" + # Note: This was changed with implementation of role keyword + @unittest.expectedFailure def test_non_dict(self): """When a value is given, where a list or mapping is expected.""" recordtype_value = """ @@ -311,4 +337,140 @@ A: # parse_str(string) with self.assertRaises(YamlDefinitionError) as yde: parse_str(string) - assert("line {}".format(line) in yde.exception.args[0]) + assert "line {}".format(line) in yde.exception.args[0] + + +def test_define_role(): + model = """ +A: + role: Record +""" + entities = parse_model_from_string(model) + assert "A" in entities + assert isinstance(entities["A"], db.Record) + assert entities["A"].role == "Record" + + model = """ +A: + role: Record + inherit_from_obligatory: + - C + obligatory_properties: + b: +b: + datatype: INTEGER +C: + obligatory_properties: + b: +D: + role: RecordType +""" + entities = parse_model_from_string(model) + for name, ent in (("A", "Record"), ("b", "Property"), + ("C", "RecordType"), ("D", "RecordType")): + assert name in entities + assert isinstance(entities[name], getattr(db, ent)) + assert entities[name].role == ent + + assert entities["A"].parents[0].name == "C" + assert entities["A"].name == "A" + + assert entities["A"].properties[0].name == "b" + assert entities["A"].properties[0].value is None + + assert entities["C"].properties[0].name == "b" + assert entities["C"].properties[0].value is None + + model = """ +A: + role: Record + obligatory_properties: + b: 42 +b: + datatype: INTEGER +""" + + entities = parse_model_from_string(model) + assert entities["A"].get_property("b").value == 42 + assert entities["b"].value is None + + model = """ +b: + datatype: INTEGER + value: 18 +""" + entities = parse_model_from_string(model) + assert entities["b"].value == 18 + + +def test_issue_72(): + """Tests for + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/72 + + In some cases, faulty values would be read in for properties without a + specified value. + + """ + model = """ +Experiment: + obligatory_properties: + date: + datatype: DATETIME + description: 'date of the experiment' + identifier: + datatype: TEXT + description: 'identifier of the experiment' + temperature: + datatype: DOUBLE + description: 'temp' +TestExperiment: + role: Record + inherit_from_obligatory: + - Experiment + obligatory_properties: + date: 2022-03-02 + identifier: Test + temperature: 23 + recommended_properties: + additional_prop: + datatype: INTEGER + value: 7 +""" + entities = parse_model_from_string(model) + assert "Experiment" in entities + assert "date" in entities + assert "identifier" in entities + assert "temperature" in entities + assert "TestExperiment" in entities + assert "additional_prop" in entities + assert isinstance(entities["Experiment"], db.RecordType) + + assert entities["Experiment"].get_property("date") is not None + # No value is set, so this has to be None + assert entities["Experiment"].get_property("date").value is None + + assert entities["Experiment"].get_property("identifier") is not None + assert entities["Experiment"].get_property("identifier").value is None + + assert entities["Experiment"].get_property("temperature") is not None + assert entities["Experiment"].get_property("temperature").value is None + + test_rec = entities["TestExperiment"] + assert isinstance(test_rec, db.Record) + assert test_rec.get_property("date").value == date(2022, 3, 2) + assert test_rec.get_property("identifier").value == "Test" + assert test_rec.get_property("temperature").value == 23 + assert test_rec.get_property("additional_prop").value == 7 + + +def test_file_role(): + """Not implemented for now, see + https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools/-/issues/74. + + """ + model = """ +F: + role: File +""" + with raises(NotImplementedError): + entities = parse_model_from_string(model)