diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 9daad27ef1bdb4da6659b0ad5402f8356c820742..914e5f61138a664784ef1f8f323c101ff5f1c08a 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -7,6 +7,7 @@ RUN apt-get update && \ python3-requests \ python3-pandas \ python3-html2text \ + tox \ git \ openjdk-11-jdk-headless \ python-autopep8 \ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 699b03e22f9b315eccdd9ba038cfb840da9cb516..5f1c0fc3faa17f7c7667104d9ea2e89bc09210de 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,5 +113,4 @@ unittest: stage: unittest image: $CI_REGISTRY_IMAGE script: - - cd src - - python3 -m pytest ../unittests + - tox diff --git a/CHANGELOG.md b/CHANGELOG.md index 22e91a153055649e69428b898a02634daca0afd5..891c5ecfb94f471bd3acab122ee6a6dc7d4d3f4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,21 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.1.0] - ????-??-?? -Tag `v0.1` - Commit ???????? +## [Unreleased] ## + +### Added ### + +### Changed ### + +### Deprecated ### + +### Removed ### + +### Fixed ### + +### Security ### + +## [0.2.0] - 2020-05-28 ## ### Added - everything diff --git a/integrationtests/full_test/crawl.py b/integrationtests/full_test/crawl.py index 66005fd715ada01fac02a4e4e21e7a2501f21ad3..aaab4bb313dabc900170d48f40b30eb7bd361a8e 100755 --- a/integrationtests/full_test/crawl.py +++ b/integrationtests/full_test/crawl.py @@ -28,19 +28,33 @@ import logging from argparse import RawTextHelpFormatter import caosdb as db - from caosadvancedtools.cfood import fileguide from caosadvancedtools.crawler import FileCrawler -from caosadvancedtools.guard import UPDATE +from caosadvancedtools.guard import INSERT, UPDATE from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, PublicationCFood, SimulationCFood) +try: + from sss_helper import get_argument_parser, print_success +except ModuleNotFoundError: + def get_argument_parser(): + return argparse.ArgumentParser() -def get_parser(): - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=RawTextHelpFormatter) + def print_success(text): + print("Success: "+text) - parser.add_argument("path") + +def get_parser(): + # TODO allow to pass something here? + # description=__doc__, formatter_class=RawTextHelpFormatter + # with SSS this default parser will have SSS apropriate arguments. + parser = get_argument_parser() + parser.add_argument("path", + help="the subtree of files below the given path will " + "be considered. Use '/' for everything.") + parser.add_argument("-a", "--authorize-run", action='append', + help="supply the id of the run that you want to" + " authorize") return parser @@ -58,6 +72,12 @@ if __name__ == "__main__": parser = get_parser() args = parser.parse_args() + # assuming SSS + + if hasattr(args, "auth_token") and args.auth_token: + db.configure_connection(password_method="auth_token", + auth_token=args.auth_token) + logger.info("Starting query...") files = FileCrawler.query_files(args.path) logger.info("Query done...") @@ -68,4 +88,10 @@ if __name__ == "__main__": ExperimentCFood, AnalysisCFood, PublicationCFood, SimulationCFood, ]) - c.crawl(security_level=UPDATE) + + if args.authorize_run: + for run_id in args.authorize_run: + c.update_authorized_changes(run_id) + + c.crawl(security_level=INSERT, path=args.path) + print_success('Crawler finished') diff --git a/integrationtests/full_test/test.sh b/integrationtests/full_test/test.sh index 249f734275ad3965affe8daaa898db45d864f09b..2f7bc3d81ba49e75025ef1b0b23751c9952d22eb 100755 --- a/integrationtests/full_test/test.sh +++ b/integrationtests/full_test/test.sh @@ -1,10 +1,39 @@ +#!/bin/bash +OUT=/tmp/crawler.output ls rm -rf cache.db echo "Filling the database" ./filldb.sh echo "Testing the crawler database" python3 -m pytest test_crawler.py +echo "make a change" +pushd extroot +egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g' +popd +echo "run crawler" +./crawl.py / > $OUT +# check whether there was something UNAUTHORIZED +set -e +grep "UNAUTHORIZED UPDATE" $OUT +# get the id of the run +RUN_ID=$(grep "./crawl.py -a " $OUT | awk '{ print $3 }') +echo $RUN_ID +echo "run crawler again" +echo "./crawl.py -a $RUN_ID /" +./crawl.py -a $RUN_ID / > $OUT +set +e +if grep "UNAUTHORIZED UPDATE" $OUT +then + exit 1 +fi +set -e +echo "undo changes" +pushd extroot +egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g' +popd python3 test_table.py # TODO the following test deletes lots of the data inserted by the crawler echo "Testing im and export" python3 test_im_und_export.py + +echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!" diff --git a/setup.py b/setup.py index 117821aaa7e18e80aa3a2060bdd6995b5cdb2938..b7bb5f5f6a4264b75b67f52ba9eaff6368c3fa0c 100755 --- a/setup.py +++ b/setup.py @@ -1,38 +1,170 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- # -# ** header v3.0 -# This file is a part of the CaosDB Project. # -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen +"""caosadvancedtools""" +import os +import subprocess +import sys + +from setuptools import find_packages, setup + +######################################################################## +# The following code is largely based on code in numpy +######################################################################## # -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# Copyright (c) 2005-2019, NumPy Developers. +# All rights reserved. # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. # -# ** end header +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. # -"""Install and (nose-)test caosdb.""" -from setuptools import find_packages, setup +# * Neither the name of the NumPy Developers nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +MAJOR = 0 +MINOR = 2 +MICRO = 0 +PRE = "" # e.g. rc0, alpha.1, 0.beta-23 +ISRELEASED = False + +if PRE: + VERSION = "{}.{}.{}-{}".format(MAJOR, MINOR, MICRO, PRE) +else: + VERSION = "{}.{}.{}".format(MAJOR, MINOR, MICRO) + + +# Return the git revision as a string +def git_version(): + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env) + + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + GIT_REVISION = out.strip().decode('ascii') + except (subprocess.SubprocessError, OSError): + GIT_REVISION = "Unknown" + + return GIT_REVISION + + +def get_version_info(): + # Adding the git rev number needs to be done inside write_version_py(), + # otherwise the import of caosadvancedtools.version messes up the build under + # Python 3. + FULLVERSION = VERSION + + if os.path.exists('.git'): + GIT_REVISION = git_version() + elif os.path.exists('src/caosadvancedtools/version.py'): + # must be a source distribution, use existing version file + try: + from caosadvancedtools.version import git_revision as GIT_REVISION + except ImportError: + raise ImportError("Unable to import git_revision. Try removing " + "src/caosadvancedtools/version.py and the build directory " + "before building.") + else: + GIT_REVISION = "Unknown" + + if not ISRELEASED: + FULLVERSION += '.dev0+' + GIT_REVISION[:7] + + return FULLVERSION, GIT_REVISION + + +def write_version_py(filename='src/caosadvancedtools/version.py'): + cnt = """ +# THIS FILE IS GENERATED FROM caosadvancedtools SETUP.PY +# +short_version = '%(version)s' +version = '%(version)s' +full_version = '%(full_version)s' +git_revision = '%(git_revision)s' +release = %(isrelease)s + +if not release: + version = full_version +""" + FULLVERSION, GIT_REVISION = get_version_info() + + a = open(filename, 'w') + try: + a.write(cnt % {'version': VERSION, + 'full_version': FULLVERSION, + 'git_revision': GIT_REVISION, + 'isrelease': str(ISRELEASED)}) + finally: + a.close() + + +def setup_package(): + # load README + with open("README.md", "r") as fh: + long_description = fh.read() + + src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src") + sys.path.insert(0, src_path) + + # Rewrite the version file everytime + write_version_py() + + metadata = dict( + name='caosadvancedtools', + version=get_version_info()[0], + description='advanced utilities for caosdb', + long_description=long_description, + long_description_content_type="text/markdown", + author='Henrik tom Wörden', + author_email='h.tomwoerden@indiscale.com', + packages=find_packages('src'), + package_dir={'': 'src'}, + setup_requires=["pytest-runner>=2.0,<3dev"], + tests_require=["pytest", "pytest-cov", "coverage>=4.4.2"], + ) + try: + setup(**metadata) + finally: + del sys.path[0] + return + -setup(name='caosadvancedtools', - version='0.1.0', - description='Advanced tools to interact with CaosDB', - author='Henrik tom Wörden', - author_email='henrik.tom-woerden@ds.mpg.de', - packages=find_packages('src'), - package_dir={'': 'src'}, - install_requires=["caosdb>=0.3.0", "caosmodels>=0.1.0"], - extras_require={}, - tests_require=["pytest"], - ) +if __name__ == '__main__': + setup_package() diff --git a/src/caosadvancedtools/cache.py b/src/caosadvancedtools/cache.py index af434eb0e7a2cc73276934051c725e3a50b8181e..1dfdeb7f4b39dbe1f8a5c25a9b3178871a71674b 100644 --- a/src/caosadvancedtools/cache.py +++ b/src/caosadvancedtools/cache.py @@ -9,18 +9,31 @@ import caosdb as db from lxml import etree -def get_pretty_xml(cont): - if isinstance(cont, list): - cont = db.Container().extend(cont) +def put_in_container(stuff): + if isinstance(stuff, list): + stuff = db.Container().extend(stuff) + + if not isinstance(stuff, db.Container): + stuff = db.Container().append(stuff) + + return stuff - if not isinstance(cont, db.Container): - cont = db.Container().append(cont) + +def get_pretty_xml(cont): + cont = put_in_container(cont) return etree.tounicode(cont.to_xml( local_serialization=True), pretty_print=True) class Cache(object): + """ + stores identifiables (as a hash of xml) and their respective ID. + + This allows to retrieve the Record corresponding to an indentifiable + without querying. + """ + def __init__(self, db_file=None): if db_file is None: self.db_file = "cache.db" @@ -37,6 +50,7 @@ class Cache(object): conn.commit() conn.close() + @staticmethod def hash_entity(ent): xml = get_pretty_xml(ent) digest = sha256(xml.encode("utf-8")).hexdigest() @@ -91,3 +105,81 @@ class Cache(object): for ehash, ent in zip(hashes, entities): if self.check_existing(ehash) is None: self.insert(ehash, ent.id) + + +class UpdateCache(Cache): + """ + stores unauthorized updates + + If the Guard is set to a mode that does not allow an update, the update can + be stored in this cache such that it can be authorized and done later. + """ + + def __init__(self, db_file=None): + if db_file is None: + db_file = "/tmp/crawler_update_cache.db" + super().__init__(db_file=db_file) + + @staticmethod + def get_previous_version(cont): + """ Retrieve the current, unchanged version of the entities that shall + be updated, i.e. the version before the update """ + + old_ones = db.Container() + + for ent in cont: + old_ones.append(db.execute_query("FIND {}".format(ent.id), + unique=True)) + + return old_ones + + def insert(self, cont, run_id): + """ insert a pending, unauthorized update + + Parameters: + ----------- + cont: Container with the records to be updated containing the desired + version, i.e. the state after the update. + run_id: the id of the crawler run + """ + cont = put_in_container(cont) + old_ones = UpdateCache.get_previous_version(cont) + new_ones = cont + + old_hash = Cache.hash_entity(old_ones) + new_hash = Cache.hash_entity(new_ones) + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''INSERT INTO updates VALUES (?, ?, ?, ?, ?)''', + (old_hash, new_hash, str(old_ones), str(new_ones), + str(run_id))) + conn.commit() + conn.close() + + def create_cache(self): + """ initialize the cache """ + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''CREATE TABLE updates (olddigest text, newdigest text, + oldrep text, newrep text, run_id text, + primary key (olddigest, run_id))''') + conn.commit() + conn.close() + + def get_updates(self, run_id): + """ returns the pending updates for a given run id + + Parameters: + ----------- + run_id: the id of the crawler run + """ + + conn = sqlite3.connect(self.db_file) + c = conn.cursor() + c.execute('''Select * FROM updates WHERE run_id=?''', + (str(run_id),)) + res = c.fetchall() + conn.commit() + conn.close() + + return res diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 1bfb7c39ab7da7931906e698911c6123d18d35c1..47852fdf345d223a9caa4c043f2a353b04f5aa37 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -100,33 +100,6 @@ class AbstractCFood(object, metaclass=ABCMeta): to self.to_be_updated """ - def push_identifiables_to_CaosDB(self): - """ Updates the self.to_be_updated Container, i.e. pushes the changes - to CaosDB - """ - - if len(self.to_be_updated) == 0: - return - - get_ids_for_entities_with_names(self.to_be_updated) - - # remove duplicates - tmp = db.Container() - - for el in self.to_be_updated: - if el not in tmp: - tmp.append(el) - - self.to_be_updated = tmp - - logger.info("UPDATE: updating the following entities") - - for el in self.to_be_updated: - logger.info("\t" + el.name if el.name is not None else el.id) - - logger.debug(self.to_be_updated) - guard.safe_update(self.to_be_updated) - @classmethod def match_item(cls, item): """ Matches an item found by the crawler against this class. Returns @@ -496,6 +469,8 @@ def insert_id_based_on_name(entity): def get_ids_for_entities_with_names(entities): + # TODO how to deal with name conflicts? + for ent in entities: insert_id_based_on_name(ent) @@ -596,4 +571,7 @@ class CMeal(object): return False match = re.match(self.get_re(), item) - return self.all_groups_equal(match, self.match) + if match is None: + return False + else: + return self.all_groups_equal(match, self.match) diff --git a/src/caosadvancedtools/crawler.py b/src/caosadvancedtools/crawler.py index 2877cd7fcf2016bf338409f03e0b31989f745580..496688af27ae73b4c40a91afa39462fe46c42242 100644 --- a/src/caosadvancedtools/crawler.py +++ b/src/caosadvancedtools/crawler.py @@ -39,15 +39,17 @@ match. This occurs in basically three steps: import logging import os +import subprocess import traceback +import uuid from datetime import datetime import caosdb as db from caosdb.exceptions import TransactionError -from .cache import Cache -from .cfood import RowCFood -from .guard import RETRIEVE +from .cache import Cache, UpdateCache, get_pretty_xml +from .cfood import RowCFood, get_ids_for_entities_with_names +from .guard import RETRIEVE, ProhibitedException from .guard import global_guard as guard logger = logging.getLogger(__name__) @@ -106,6 +108,7 @@ class Crawler(object): self.use_cache = use_cache self.hideKnown = hideKnown self.abort_on_exception = abort_on_exception + self.update_cache = UpdateCache() if self.use_cache: self.cache = Cache() @@ -114,6 +117,36 @@ class Crawler(object): """ generates items to be crawled with an index""" yield 0, None + def update_authorized_changes(self, run_id): + """ + execute the pending updates of a specific run id. + + This should be called if the updates of a certain run were authorized. + + Parameters: + ----------- + run_id: the id of the crawler run + """ + changes = self.update_cache.get_updates(run_id) + + for _, _, old, new, _ in changes: + current = db.Container() + new_cont = db.Container() + new_cont = new_cont.from_xml(new) + + for ent in new_cont: + current.append(db.execute_query("FIND {}".format(ent.id), + unique=True)) + current_xml = get_pretty_xml(current) + + # check whether previous version equals current version + # if not, the update must not be done + + if current_xml != old: + continue + + new_cont.update() + def collect_cfoods(self): """ This is the first phase of the crawl. It collects all cfoods that shall @@ -227,7 +260,9 @@ class Crawler(object): if self.use_cache: self.cache.insert_list(hashes, identifiables) - def crawl(self, security_level=RETRIEVE): + def crawl(self, security_level=RETRIEVE, path=None): + self.run_id = uuid.uuid1() + logger.info("Run Id: " + str(self.run_id)) guard.set_level(level=security_level) cfoods, tbs, errors_occured = self.collect_cfoods() @@ -244,7 +279,7 @@ class Crawler(object): self.cached_find_identifiables(cfood.identifiables) cfood.update_identifiables() - cfood.push_identifiables_to_CaosDB() + self.push_identifiables_to_CaosDB(cfood) except Exception as e: traceback.print_exc() print(e) @@ -254,12 +289,140 @@ class Crawler(object): errors_occured = True tbs.append(e) + pending_changes = self.update_cache.get_updates(self.run_id) + + if pending_changes: + # Sending an Email with a link to a form to authorize updates is + # only done in SSS mode + + if "SHARED_DIR" in os.environ: + filename = self.save_form([el[3] for el in pending_changes], path) + self.send_mail([el[3] for el in pending_changes], filename) + + for i, el in enumerate(pending_changes): + + logger.info( + """ +UNAUTHORIZED UPDATE ({} of {}): +____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) + logger.info("There where unauthorized changes (see above). An " + "email was sent to the curator.\n" + "You can authorize the updates by invoking the crawler" + " with the run id:\n\n" + "./crawl.py -a {rid} {path}\n".format(rid=self.run_id, + path=path)) + if errors_occured: logger.warning("Crawler terminated with failures!") logger.warning(tbs) else: logger.info("Crawler terminated successfully!") + def save_form(self, changes, path): + """ + Saves an html website to a file that contains a form with a button to + authorize the given changes. + + The button will call the crawler with the same path that was used for + the current run and with a parameter to authorize the changes of the + current run. + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + path: the path defining the subtree that is crawled + + """ + from xml.sax.saxutils import escape + # TODO move path related stuff to sss_helper + form = """ +<form method="post" action="{url}/scripting"> + <input type="hidden" name="call" value="crawl.py"/> + <input type="hidden" name="-p0" value=""/> + <input type="hidden" name="-p1" value="{path}"/> + <input type="hidden" name="-Oauthorize-run" value="{rid}"/> + <input type="submit" value="Authorize"/> +</form> +<pre> +<code> +{changes} +</code> +</pre> +""".format(url=db.configuration.get_config()["Connection"]["url"], + rid=self.run_id, + changes=escape("\n".join(changes)), + path=path) + + if "SHARED_DIR" in os.environ: + directory = os.environ["SHARED_DIR"] + filename = str(self.run_id)+".html" + randname = os.path.basename(os.path.abspath(directory)) + filepath = os.path.abspath(os.path.join(directory, filename)) + filename = os.path.join(randname, filename) + with open(filepath, "w") as f: + f.write(form) + return filename + + def send_mail(self, changes, filename): + """ calls sendmail in order to send a mail to the curator about pending + changes + + Parameters: + ----------- + changes: The CaosDB entities in the version after the update. + filename: path to the html site that allow the authorization + """ + + caosdb_config = db.configuration.get_config() + text = """Dear Curator, +there where changes that need your authorization. Please check the following +carefully and if the changes are ok, click on the following link: + +{url}/Shared/{filename} + +{changes} + """.format(url=caosdb_config["Connection"]["url"], + filename=filename, + changes="\n".join(changes)) + sendmail = caosdb_config["advancedtools"]["sendmail"] + p = subprocess.Popen( + [ + sendmail, "-f", + caosdb_config["advancedtools"]["crawler.from_mail"], + caosdb_config["advancedtools"]["crawler.to_mail"]], + stdin=subprocess.PIPE) + p.communicate(input=text.encode()) + + def push_identifiables_to_CaosDB(self, cfood): + """ + Updates the to_be_updated Container, i.e. pushes the changes to CaosDB + """ + + if len(cfood.to_be_updated) == 0: + return + + get_ids_for_entities_with_names(cfood.to_be_updated) + + # remove duplicates + tmp = db.Container() + + for el in cfood.to_be_updated: + if el not in tmp: + tmp.append(el) + + cfood.to_be_updated = tmp + + logger.info("UPDATE: updating the following entities") + + for el in cfood.to_be_updated: + logger.info("\t" + el.name if el.name is not None else el.id) + + logger.debug(cfood.to_be_updated) + try: + guard.safe_update(cfood.to_be_updated) + except ProhibitedException: + self.update_cache.insert(cfood.to_be_updated, self.run_id) + # TODO remove static? @staticmethod def find_or_insert_identifiables(identifiables): diff --git a/src/caosadvancedtools/guard.py b/src/caosadvancedtools/guard.py index 269ef77b38bf42435aae42761688614e7fe54bcd..85c91b60a9f45e121b68c51804745b9bd793c201 100644 --- a/src/caosadvancedtools/guard.py +++ b/src/caosadvancedtools/guard.py @@ -1,19 +1,44 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + import caosdb as db + RETRIEVE = 0 INSERT = 1 UPDATE = 2 +class ProhibitedException(Exception): + pass + + class Guard(object): def __init__(self, level=RETRIEVE): self.freshly_created = [] self.level = level - def safe_insert(self, obj, ): + def safe_insert(self, obj): if self.level < INSERT: - raise Exception("not allowed") + raise ProhibitedException("not allowed") obj.insert() if isinstance(obj, db.Container): @@ -31,12 +56,12 @@ class Guard(object): all_fresh = False if self.level < UPDATE and not all_fresh: - raise Exception("not allowed") + raise ProhibitedException("not allowed") else: obj.update(**kwargs) else: if self.level < UPDATE and obj.id not in self.freshly_created: - raise Exception("not allowed") + raise ProhibitedException("not allowed") else: obj.update(**kwargs) diff --git a/src/programs/crawl.py b/src/programs/crawl.py deleted file mode 100755 index 9962434d2d345e0d6e3ddc1440af90af3a8266ab..0000000000000000000000000000000000000000 --- a/src/programs/crawl.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -# encoding: utf-8 -# -# ** header v3.0 -# This file is a part of the CaosDB Project. -# -# Copyright (C) 2018 Research Group Biomedical Physics, -# Max-Planck-Institute for Dynamics and Self-Organization Göttingen -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/>. -# -# ** end header -# - -from caosadvancedtools.crawler import Crawler -from caosadvancedtools.example_cfood import ExampleCFood -import argparse -import caosdb as db -from argparse import RawTextHelpFormatter - - -def get_parser(): - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=RawTextHelpFormatter) - - return parser - - -if __name__ == "__main__": - parser = get_parser() - args = parser.parse_args() - - fi = db.File(file=__file__, path="filepath") - f = ExampleCFood(pattern="(.*)craw(.*)") - c = Crawler(food=[f]) - c.crawl([fi]) diff --git a/tox.ini b/tox.ini index 5b51a29caaabb3a196d94d375b8ca65293e01223..57cda000e9853362917c1e8830d9ebc589d0e9e4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,10 @@ [tox] -envlist= py37 +envlist=py36, py37, py38 skip_missing_interpreters = true [testenv] deps=nose + pandas + caosdb pytest pytest-cov -commands=py.test {posargs} +commands=py.test --cov=caosadvancedtools -vv {posargs} diff --git a/unittests/test_cfood.py b/unittests/test_cfood.py index bfa3443aa22e59f37f6fd5f764b2fce9860eb520..27365b2a1dc8d40b5df072e042311af468179200 100644 --- a/unittests/test_cfood.py +++ b/unittests/test_cfood.py @@ -25,8 +25,9 @@ import re import unittest import caosdb as db -from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood, CMeal, - assure_has_parent, +from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood, + CMeal, assure_has_parent, + assure_has_property, assure_object_is_in_list) from caosadvancedtools.example_cfood import ExampleCFood @@ -131,6 +132,28 @@ class InsertionTest(unittest.TestCase): assure_has_parent(entity, "parent", to_be_updated) assert len(to_be_updated) == 0 + def test_has_property(self): + """Test properties with string, int, float, and Boolean values""" + entity = db.Record() + to_be_updated = [] + int_name = "Test int" + types_and_values = { + int_name: ("INT", 5), + "Test float": ("FLOAT", 3.14), + "Test bool": ("BOOLEAN", True), + "Test string": ("STRING", "bla") + } + for name, ty_val in types_and_values.items(): + entity.add_property(name=name, datatype=ty_val[0], + value=ty_val[1]) + assure_has_property(entity=entity, name=name, + value=ty_val[1], to_be_updated=to_be_updated) + assert len(to_be_updated) == 0 + new_int = 6 + assure_has_property(entity=entity, name=int_name, + value=new_int, to_be_updated=to_be_updated) + assert to_be_updated[0] is entity + class DependendTest(unittest.TestCase): def test(self): diff --git a/unittests/test_update_cache.py b/unittests/test_update_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..4720f23de0b651b90e3b74ee13e06088462c5e31 --- /dev/null +++ b/unittests/test_update_cache.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 Henrik tom Wörden +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +import os +import unittest +from copy import deepcopy +from tempfile import NamedTemporaryFile + +import caosdb as db +from caosadvancedtools.cache import UpdateCache, get_pretty_xml + + +class CacheTest(unittest.TestCase): + def get_cont(self, *args): + ent = db.Record() + ent2 = db.Record() + ent2.add_parent(name="Experiment") + ent2.add_property("lol", ent) + c = db.Container() + c.extend([ent, ent2]) + + return c + + def setUp(self): + self.cache = UpdateCache(db_file=NamedTemporaryFile(delete=False).name) + self.cache.create_cache() + self.run_id = "235234" + + def test_insert(self): + c = self.get_cont() + # we do not want to check database in this unit test and thus replace + # the lookup + UpdateCache.get_previous_version = self.get_cont + c[0].add_property("hallo", "21235") + c[1].add_property("hallo", "235") + self.cache.insert(c, self.run_id) + + saved_stuff = self.cache.get_updates(self.run_id) + assert len(saved_stuff) == 1 + # old version + assert saved_stuff[0][2] == get_pretty_xml(self.get_cont()) + # new version + assert saved_stuff[0][3] == get_pretty_xml(c) + assert len(self.cache.get_updates("sldkfjsldfjsldjf")) == 0 + + def tearDown(self): + os.remove(self.cache.db_file)