Skip to content
Snippets Groups Projects
Commit 9446c6a6 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-update-auth' into 'dev'

Create Authorization Mechanism

See merge request caosdb/caosdb-advanced-user-tools!20

This implements a mechanism that allows to authorize changes of the crawler after a run (see https://gitlab.com/caosdb/caosdb-advanced-user-tools/-/issues/5).

Furthermoer, changes are included that allow the crawler to run as SSS.

This MR shall precede the first release. In order to prevent confusions with unreleased versions, I increased the number to 0.2.0.
parents 8b004129 8b70f1c3
No related branches found
No related tags found
1 merge request!22Release 0.3
...@@ -7,6 +7,7 @@ RUN apt-get update && \ ...@@ -7,6 +7,7 @@ RUN apt-get update && \
python3-requests \ python3-requests \
python3-pandas \ python3-pandas \
python3-html2text \ python3-html2text \
tox \
git \ git \
openjdk-11-jdk-headless \ openjdk-11-jdk-headless \
python-autopep8 \ python-autopep8 \
......
...@@ -113,5 +113,4 @@ unittest: ...@@ -113,5 +113,4 @@ unittest:
stage: unittest stage: unittest
image: $CI_REGISTRY_IMAGE image: $CI_REGISTRY_IMAGE
script: script:
- cd src - tox
- python3 -m pytest ../unittests
...@@ -4,8 +4,21 @@ All notable changes to this project will be documented in this file. ...@@ -4,8 +4,21 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.1.0] - ????-??-?? ## [Unreleased] ##
Tag `v0.1` - Commit ????????
### Added ###
### Changed ###
### Deprecated ###
### Removed ###
### Fixed ###
### Security ###
## [0.2.0] - 2020-05-28 ##
### Added ### Added
- everything - everything
...@@ -28,19 +28,33 @@ import logging ...@@ -28,19 +28,33 @@ import logging
from argparse import RawTextHelpFormatter from argparse import RawTextHelpFormatter
import caosdb as db import caosdb as db
from caosadvancedtools.cfood import fileguide from caosadvancedtools.cfood import fileguide
from caosadvancedtools.crawler import FileCrawler from caosadvancedtools.crawler import FileCrawler
from caosadvancedtools.guard import UPDATE from caosadvancedtools.guard import INSERT, UPDATE
from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
PublicationCFood, SimulationCFood) PublicationCFood, SimulationCFood)
try:
from sss_helper import get_argument_parser, print_success
except ModuleNotFoundError:
def get_argument_parser():
return argparse.ArgumentParser()
def get_parser(): def print_success(text):
parser = argparse.ArgumentParser(description=__doc__, print("Success: "+text)
formatter_class=RawTextHelpFormatter)
parser.add_argument("path")
def get_parser():
# TODO allow to pass something here?
# description=__doc__, formatter_class=RawTextHelpFormatter
# with SSS this default parser will have SSS apropriate arguments.
parser = get_argument_parser()
parser.add_argument("path",
help="the subtree of files below the given path will "
"be considered. Use '/' for everything.")
parser.add_argument("-a", "--authorize-run", action='append',
help="supply the id of the run that you want to"
" authorize")
return parser return parser
...@@ -58,6 +72,12 @@ if __name__ == "__main__": ...@@ -58,6 +72,12 @@ if __name__ == "__main__":
parser = get_parser() parser = get_parser()
args = parser.parse_args() args = parser.parse_args()
# assuming SSS
if hasattr(args, "auth_token") and args.auth_token:
db.configure_connection(password_method="auth_token",
auth_token=args.auth_token)
logger.info("Starting query...") logger.info("Starting query...")
files = FileCrawler.query_files(args.path) files = FileCrawler.query_files(args.path)
logger.info("Query done...") logger.info("Query done...")
...@@ -68,4 +88,10 @@ if __name__ == "__main__": ...@@ -68,4 +88,10 @@ if __name__ == "__main__":
ExperimentCFood, AnalysisCFood, ExperimentCFood, AnalysisCFood,
PublicationCFood, SimulationCFood, PublicationCFood, SimulationCFood,
]) ])
c.crawl(security_level=UPDATE)
if args.authorize_run:
for run_id in args.authorize_run:
c.update_authorized_changes(run_id)
c.crawl(security_level=INSERT, path=args.path)
print_success('Crawler finished')
#!/bin/bash
OUT=/tmp/crawler.output
ls ls
rm -rf cache.db rm -rf cache.db
echo "Filling the database" echo "Filling the database"
./filldb.sh ./filldb.sh
echo "Testing the crawler database" echo "Testing the crawler database"
python3 -m pytest test_crawler.py python3 -m pytest test_crawler.py
echo "make a change"
pushd extroot
egrep -liRZ 'A description of another example' . | xargs -0 -l sed -i -e 's/A description of another example/A description of this example/g'
popd
echo "run crawler"
./crawl.py / > $OUT
# check whether there was something UNAUTHORIZED
set -e
grep "UNAUTHORIZED UPDATE" $OUT
# get the id of the run
RUN_ID=$(grep "./crawl.py -a " $OUT | awk '{ print $3 }')
echo $RUN_ID
echo "run crawler again"
echo "./crawl.py -a $RUN_ID /"
./crawl.py -a $RUN_ID / > $OUT
set +e
if grep "UNAUTHORIZED UPDATE" $OUT
then
exit 1
fi
set -e
echo "undo changes"
pushd extroot
egrep -liRZ 'A description of this example' . | xargs -0 -l sed -i -e 's/A description of this example/A description of another example/g'
popd
python3 test_table.py python3 test_table.py
# TODO the following test deletes lots of the data inserted by the crawler # TODO the following test deletes lots of the data inserted by the crawler
echo "Testing im and export" echo "Testing im and export"
python3 test_im_und_export.py python3 test_im_und_export.py
echo "/n/n/n YOU NEED TO RESTART THE SERVER TO REDO TESTS!!!"
#!/usr/bin/env python #!/usr/bin/env python
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
# #
# ** header v3.0
# This file is a part of the CaosDB Project.
# #
# Copyright (C) 2018 Research Group Biomedical Physics, """caosadvancedtools"""
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen import os
import subprocess
import sys
from setuptools import find_packages, setup
########################################################################
# The following code is largely based on code in numpy
########################################################################
# #
# This program is free software: you can redistribute it and/or modify # Copyright (c) 2005-2019, NumPy Developers.
# it under the terms of the GNU Affero General Public License as # All rights reserved.
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # Redistribution and use in source and binary forms, with or without
# but WITHOUT ANY WARRANTY; without even the implied warranty of # modification, are permitted provided that the following conditions are
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # met:
# GNU Affero General Public License for more details.
# #
# You should have received a copy of the GNU Affero General Public License # * Redistributions of source code must retain the above copyright
# along with this program. If not, see <https://www.gnu.org/licenses/>. # notice, this list of conditions and the following disclaimer.
# #
# ** end header # * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# #
"""Install and (nose-)test caosdb.""" # * Neither the name of the NumPy Developers nor the names of any
from setuptools import find_packages, setup # contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
MAJOR = 0
MINOR = 2
MICRO = 0
PRE = "" # e.g. rc0, alpha.1, 0.beta-23
ISRELEASED = False
if PRE:
VERSION = "{}.{}.{}-{}".format(MAJOR, MINOR, MICRO, PRE)
else:
VERSION = "{}.{}.{}".format(MAJOR, MINOR, MICRO)
# Return the git revision as a string
def git_version():
def _minimal_ext_cmd(cmd):
# construct minimal environment
env = {}
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
v = os.environ.get(k)
if v is not None:
env[k] = v
# LANGUAGE is used on win32
env['LANGUAGE'] = 'C'
env['LANG'] = 'C'
env['LC_ALL'] = 'C'
out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env)
return out
try:
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
GIT_REVISION = out.strip().decode('ascii')
except (subprocess.SubprocessError, OSError):
GIT_REVISION = "Unknown"
return GIT_REVISION
setup(name='caosadvancedtools', def get_version_info():
version='0.1.0', # Adding the git rev number needs to be done inside write_version_py(),
description='Advanced tools to interact with CaosDB', # otherwise the import of caosadvancedtools.version messes up the build under
# Python 3.
FULLVERSION = VERSION
if os.path.exists('.git'):
GIT_REVISION = git_version()
elif os.path.exists('src/caosadvancedtools/version.py'):
# must be a source distribution, use existing version file
try:
from caosadvancedtools.version import git_revision as GIT_REVISION
except ImportError:
raise ImportError("Unable to import git_revision. Try removing "
"src/caosadvancedtools/version.py and the build directory "
"before building.")
else:
GIT_REVISION = "Unknown"
if not ISRELEASED:
FULLVERSION += '.dev0+' + GIT_REVISION[:7]
return FULLVERSION, GIT_REVISION
def write_version_py(filename='src/caosadvancedtools/version.py'):
cnt = """
# THIS FILE IS GENERATED FROM caosadvancedtools SETUP.PY
#
short_version = '%(version)s'
version = '%(version)s'
full_version = '%(full_version)s'
git_revision = '%(git_revision)s'
release = %(isrelease)s
if not release:
version = full_version
"""
FULLVERSION, GIT_REVISION = get_version_info()
a = open(filename, 'w')
try:
a.write(cnt % {'version': VERSION,
'full_version': FULLVERSION,
'git_revision': GIT_REVISION,
'isrelease': str(ISRELEASED)})
finally:
a.close()
def setup_package():
# load README
with open("README.md", "r") as fh:
long_description = fh.read()
src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "src")
sys.path.insert(0, src_path)
# Rewrite the version file everytime
write_version_py()
metadata = dict(
name='caosadvancedtools',
version=get_version_info()[0],
description='advanced utilities for caosdb',
long_description=long_description,
long_description_content_type="text/markdown",
author='Henrik tom Wörden', author='Henrik tom Wörden',
author_email='henrik.tom-woerden@ds.mpg.de', author_email='h.tomwoerden@indiscale.com',
packages=find_packages('src'), packages=find_packages('src'),
package_dir={'': 'src'}, package_dir={'': 'src'},
install_requires=["caosdb>=0.3.0", "caosmodels>=0.1.0"], setup_requires=["pytest-runner>=2.0,<3dev"],
extras_require={}, tests_require=["pytest", "pytest-cov", "coverage>=4.4.2"],
tests_require=["pytest"],
) )
try:
setup(**metadata)
finally:
del sys.path[0]
return
if __name__ == '__main__':
setup_package()
...@@ -9,18 +9,31 @@ import caosdb as db ...@@ -9,18 +9,31 @@ import caosdb as db
from lxml import etree from lxml import etree
def get_pretty_xml(cont): def put_in_container(stuff):
if isinstance(cont, list): if isinstance(stuff, list):
cont = db.Container().extend(cont) stuff = db.Container().extend(stuff)
if not isinstance(stuff, db.Container):
stuff = db.Container().append(stuff)
return stuff
if not isinstance(cont, db.Container):
cont = db.Container().append(cont) def get_pretty_xml(cont):
cont = put_in_container(cont)
return etree.tounicode(cont.to_xml( return etree.tounicode(cont.to_xml(
local_serialization=True), pretty_print=True) local_serialization=True), pretty_print=True)
class Cache(object): class Cache(object):
"""
stores identifiables (as a hash of xml) and their respective ID.
This allows to retrieve the Record corresponding to an indentifiable
without querying.
"""
def __init__(self, db_file=None): def __init__(self, db_file=None):
if db_file is None: if db_file is None:
self.db_file = "cache.db" self.db_file = "cache.db"
...@@ -37,6 +50,7 @@ class Cache(object): ...@@ -37,6 +50,7 @@ class Cache(object):
conn.commit() conn.commit()
conn.close() conn.close()
@staticmethod
def hash_entity(ent): def hash_entity(ent):
xml = get_pretty_xml(ent) xml = get_pretty_xml(ent)
digest = sha256(xml.encode("utf-8")).hexdigest() digest = sha256(xml.encode("utf-8")).hexdigest()
...@@ -91,3 +105,81 @@ class Cache(object): ...@@ -91,3 +105,81 @@ class Cache(object):
for ehash, ent in zip(hashes, entities): for ehash, ent in zip(hashes, entities):
if self.check_existing(ehash) is None: if self.check_existing(ehash) is None:
self.insert(ehash, ent.id) self.insert(ehash, ent.id)
class UpdateCache(Cache):
"""
stores unauthorized updates
If the Guard is set to a mode that does not allow an update, the update can
be stored in this cache such that it can be authorized and done later.
"""
def __init__(self, db_file=None):
if db_file is None:
db_file = "/tmp/crawler_update_cache.db"
super().__init__(db_file=db_file)
@staticmethod
def get_previous_version(cont):
""" Retrieve the current, unchanged version of the entities that shall
be updated, i.e. the version before the update """
old_ones = db.Container()
for ent in cont:
old_ones.append(db.execute_query("FIND {}".format(ent.id),
unique=True))
return old_ones
def insert(self, cont, run_id):
""" insert a pending, unauthorized update
Parameters:
-----------
cont: Container with the records to be updated containing the desired
version, i.e. the state after the update.
run_id: the id of the crawler run
"""
cont = put_in_container(cont)
old_ones = UpdateCache.get_previous_version(cont)
new_ones = cont
old_hash = Cache.hash_entity(old_ones)
new_hash = Cache.hash_entity(new_ones)
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
c.execute('''INSERT INTO updates VALUES (?, ?, ?, ?, ?)''',
(old_hash, new_hash, str(old_ones), str(new_ones),
str(run_id)))
conn.commit()
conn.close()
def create_cache(self):
""" initialize the cache """
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
c.execute('''CREATE TABLE updates (olddigest text, newdigest text,
oldrep text, newrep text, run_id text,
primary key (olddigest, run_id))''')
conn.commit()
conn.close()
def get_updates(self, run_id):
""" returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
"""
conn = sqlite3.connect(self.db_file)
c = conn.cursor()
c.execute('''Select * FROM updates WHERE run_id=?''',
(str(run_id),))
res = c.fetchall()
conn.commit()
conn.close()
return res
...@@ -100,33 +100,6 @@ class AbstractCFood(object, metaclass=ABCMeta): ...@@ -100,33 +100,6 @@ class AbstractCFood(object, metaclass=ABCMeta):
to self.to_be_updated to self.to_be_updated
""" """
def push_identifiables_to_CaosDB(self):
""" Updates the self.to_be_updated Container, i.e. pushes the changes
to CaosDB
"""
if len(self.to_be_updated) == 0:
return
get_ids_for_entities_with_names(self.to_be_updated)
# remove duplicates
tmp = db.Container()
for el in self.to_be_updated:
if el not in tmp:
tmp.append(el)
self.to_be_updated = tmp
logger.info("UPDATE: updating the following entities")
for el in self.to_be_updated:
logger.info("\t" + el.name if el.name is not None else el.id)
logger.debug(self.to_be_updated)
guard.safe_update(self.to_be_updated)
@classmethod @classmethod
def match_item(cls, item): def match_item(cls, item):
""" Matches an item found by the crawler against this class. Returns """ Matches an item found by the crawler against this class. Returns
...@@ -496,6 +469,8 @@ def insert_id_based_on_name(entity): ...@@ -496,6 +469,8 @@ def insert_id_based_on_name(entity):
def get_ids_for_entities_with_names(entities): def get_ids_for_entities_with_names(entities):
# TODO how to deal with name conflicts?
for ent in entities: for ent in entities:
insert_id_based_on_name(ent) insert_id_based_on_name(ent)
...@@ -596,4 +571,7 @@ class CMeal(object): ...@@ -596,4 +571,7 @@ class CMeal(object):
return False return False
match = re.match(self.get_re(), item) match = re.match(self.get_re(), item)
if match is None:
return False
else:
return self.all_groups_equal(match, self.match) return self.all_groups_equal(match, self.match)
...@@ -39,15 +39,17 @@ match. This occurs in basically three steps: ...@@ -39,15 +39,17 @@ match. This occurs in basically three steps:
import logging import logging
import os import os
import subprocess
import traceback import traceback
import uuid
from datetime import datetime from datetime import datetime
import caosdb as db import caosdb as db
from caosdb.exceptions import TransactionError from caosdb.exceptions import TransactionError
from .cache import Cache from .cache import Cache, UpdateCache, get_pretty_xml
from .cfood import RowCFood from .cfood import RowCFood, get_ids_for_entities_with_names
from .guard import RETRIEVE from .guard import RETRIEVE, ProhibitedException
from .guard import global_guard as guard from .guard import global_guard as guard
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -106,6 +108,7 @@ class Crawler(object): ...@@ -106,6 +108,7 @@ class Crawler(object):
self.use_cache = use_cache self.use_cache = use_cache
self.hideKnown = hideKnown self.hideKnown = hideKnown
self.abort_on_exception = abort_on_exception self.abort_on_exception = abort_on_exception
self.update_cache = UpdateCache()
if self.use_cache: if self.use_cache:
self.cache = Cache() self.cache = Cache()
...@@ -114,6 +117,36 @@ class Crawler(object): ...@@ -114,6 +117,36 @@ class Crawler(object):
""" generates items to be crawled with an index""" """ generates items to be crawled with an index"""
yield 0, None yield 0, None
def update_authorized_changes(self, run_id):
"""
execute the pending updates of a specific run id.
This should be called if the updates of a certain run were authorized.
Parameters:
-----------
run_id: the id of the crawler run
"""
changes = self.update_cache.get_updates(run_id)
for _, _, old, new, _ in changes:
current = db.Container()
new_cont = db.Container()
new_cont = new_cont.from_xml(new)
for ent in new_cont:
current.append(db.execute_query("FIND {}".format(ent.id),
unique=True))
current_xml = get_pretty_xml(current)
# check whether previous version equals current version
# if not, the update must not be done
if current_xml != old:
continue
new_cont.update()
def collect_cfoods(self): def collect_cfoods(self):
""" """
This is the first phase of the crawl. It collects all cfoods that shall This is the first phase of the crawl. It collects all cfoods that shall
...@@ -227,7 +260,9 @@ class Crawler(object): ...@@ -227,7 +260,9 @@ class Crawler(object):
if self.use_cache: if self.use_cache:
self.cache.insert_list(hashes, identifiables) self.cache.insert_list(hashes, identifiables)
def crawl(self, security_level=RETRIEVE): def crawl(self, security_level=RETRIEVE, path=None):
self.run_id = uuid.uuid1()
logger.info("Run Id: " + str(self.run_id))
guard.set_level(level=security_level) guard.set_level(level=security_level)
cfoods, tbs, errors_occured = self.collect_cfoods() cfoods, tbs, errors_occured = self.collect_cfoods()
...@@ -244,7 +279,7 @@ class Crawler(object): ...@@ -244,7 +279,7 @@ class Crawler(object):
self.cached_find_identifiables(cfood.identifiables) self.cached_find_identifiables(cfood.identifiables)
cfood.update_identifiables() cfood.update_identifiables()
cfood.push_identifiables_to_CaosDB() self.push_identifiables_to_CaosDB(cfood)
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
print(e) print(e)
...@@ -254,12 +289,140 @@ class Crawler(object): ...@@ -254,12 +289,140 @@ class Crawler(object):
errors_occured = True errors_occured = True
tbs.append(e) tbs.append(e)
pending_changes = self.update_cache.get_updates(self.run_id)
if pending_changes:
# Sending an Email with a link to a form to authorize updates is
# only done in SSS mode
if "SHARED_DIR" in os.environ:
filename = self.save_form([el[3] for el in pending_changes], path)
self.send_mail([el[3] for el in pending_changes], filename)
for i, el in enumerate(pending_changes):
logger.info(
"""
UNAUTHORIZED UPDATE ({} of {}):
____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
logger.info("There where unauthorized changes (see above). An "
"email was sent to the curator.\n"
"You can authorize the updates by invoking the crawler"
" with the run id:\n\n"
"./crawl.py -a {rid} {path}\n".format(rid=self.run_id,
path=path))
if errors_occured: if errors_occured:
logger.warning("Crawler terminated with failures!") logger.warning("Crawler terminated with failures!")
logger.warning(tbs) logger.warning(tbs)
else: else:
logger.info("Crawler terminated successfully!") logger.info("Crawler terminated successfully!")
def save_form(self, changes, path):
"""
Saves an html website to a file that contains a form with a button to
authorize the given changes.
The button will call the crawler with the same path that was used for
the current run and with a parameter to authorize the changes of the
current run.
Parameters:
-----------
changes: The CaosDB entities in the version after the update.
path: the path defining the subtree that is crawled
"""
from xml.sax.saxutils import escape
# TODO move path related stuff to sss_helper
form = """
<form method="post" action="{url}/scripting">
<input type="hidden" name="call" value="crawl.py"/>
<input type="hidden" name="-p0" value=""/>
<input type="hidden" name="-p1" value="{path}"/>
<input type="hidden" name="-Oauthorize-run" value="{rid}"/>
<input type="submit" value="Authorize"/>
</form>
<pre>
<code>
{changes}
</code>
</pre>
""".format(url=db.configuration.get_config()["Connection"]["url"],
rid=self.run_id,
changes=escape("\n".join(changes)),
path=path)
if "SHARED_DIR" in os.environ:
directory = os.environ["SHARED_DIR"]
filename = str(self.run_id)+".html"
randname = os.path.basename(os.path.abspath(directory))
filepath = os.path.abspath(os.path.join(directory, filename))
filename = os.path.join(randname, filename)
with open(filepath, "w") as f:
f.write(form)
return filename
def send_mail(self, changes, filename):
""" calls sendmail in order to send a mail to the curator about pending
changes
Parameters:
-----------
changes: The CaosDB entities in the version after the update.
filename: path to the html site that allow the authorization
"""
caosdb_config = db.configuration.get_config()
text = """Dear Curator,
there where changes that need your authorization. Please check the following
carefully and if the changes are ok, click on the following link:
{url}/Shared/{filename}
{changes}
""".format(url=caosdb_config["Connection"]["url"],
filename=filename,
changes="\n".join(changes))
sendmail = caosdb_config["advancedtools"]["sendmail"]
p = subprocess.Popen(
[
sendmail, "-f",
caosdb_config["advancedtools"]["crawler.from_mail"],
caosdb_config["advancedtools"]["crawler.to_mail"]],
stdin=subprocess.PIPE)
p.communicate(input=text.encode())
def push_identifiables_to_CaosDB(self, cfood):
"""
Updates the to_be_updated Container, i.e. pushes the changes to CaosDB
"""
if len(cfood.to_be_updated) == 0:
return
get_ids_for_entities_with_names(cfood.to_be_updated)
# remove duplicates
tmp = db.Container()
for el in cfood.to_be_updated:
if el not in tmp:
tmp.append(el)
cfood.to_be_updated = tmp
logger.info("UPDATE: updating the following entities")
for el in cfood.to_be_updated:
logger.info("\t" + el.name if el.name is not None else el.id)
logger.debug(cfood.to_be_updated)
try:
guard.safe_update(cfood.to_be_updated)
except ProhibitedException:
self.update_cache.insert(cfood.to_be_updated, self.run_id)
# TODO remove static? # TODO remove static?
@staticmethod @staticmethod
def find_or_insert_identifiables(identifiables): def find_or_insert_identifiables(identifiables):
......
#!/usr/bin/env python
# encoding: utf-8
#
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2020 Henrik tom Wörden
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import caosdb as db import caosdb as db
RETRIEVE = 0 RETRIEVE = 0
INSERT = 1 INSERT = 1
UPDATE = 2 UPDATE = 2
class ProhibitedException(Exception):
pass
class Guard(object): class Guard(object):
def __init__(self, level=RETRIEVE): def __init__(self, level=RETRIEVE):
self.freshly_created = [] self.freshly_created = []
self.level = level self.level = level
def safe_insert(self, obj, ): def safe_insert(self, obj):
if self.level < INSERT: if self.level < INSERT:
raise Exception("not allowed") raise ProhibitedException("not allowed")
obj.insert() obj.insert()
if isinstance(obj, db.Container): if isinstance(obj, db.Container):
...@@ -31,12 +56,12 @@ class Guard(object): ...@@ -31,12 +56,12 @@ class Guard(object):
all_fresh = False all_fresh = False
if self.level < UPDATE and not all_fresh: if self.level < UPDATE and not all_fresh:
raise Exception("not allowed") raise ProhibitedException("not allowed")
else: else:
obj.update(**kwargs) obj.update(**kwargs)
else: else:
if self.level < UPDATE and obj.id not in self.freshly_created: if self.level < UPDATE and obj.id not in self.freshly_created:
raise Exception("not allowed") raise ProhibitedException("not allowed")
else: else:
obj.update(**kwargs) obj.update(**kwargs)
......
[tox] [tox]
envlist= py37 envlist=py36, py37, py38
skip_missing_interpreters = true skip_missing_interpreters = true
[testenv] [testenv]
deps=nose deps=nose
pandas
caosdb
pytest pytest
pytest-cov pytest-cov
commands=py.test {posargs} commands=py.test --cov=caosadvancedtools -vv {posargs}
...@@ -25,8 +25,9 @@ import re ...@@ -25,8 +25,9 @@ import re
import unittest import unittest
import caosdb as db import caosdb as db
from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood, CMeal, from caosadvancedtools.cfood import (AbstractCFood, AbstractFileCFood,
assure_has_parent, CMeal, assure_has_parent,
assure_has_property,
assure_object_is_in_list) assure_object_is_in_list)
from caosadvancedtools.example_cfood import ExampleCFood from caosadvancedtools.example_cfood import ExampleCFood
...@@ -131,6 +132,28 @@ class InsertionTest(unittest.TestCase): ...@@ -131,6 +132,28 @@ class InsertionTest(unittest.TestCase):
assure_has_parent(entity, "parent", to_be_updated) assure_has_parent(entity, "parent", to_be_updated)
assert len(to_be_updated) == 0 assert len(to_be_updated) == 0
def test_has_property(self):
"""Test properties with string, int, float, and Boolean values"""
entity = db.Record()
to_be_updated = []
int_name = "Test int"
types_and_values = {
int_name: ("INT", 5),
"Test float": ("FLOAT", 3.14),
"Test bool": ("BOOLEAN", True),
"Test string": ("STRING", "bla")
}
for name, ty_val in types_and_values.items():
entity.add_property(name=name, datatype=ty_val[0],
value=ty_val[1])
assure_has_property(entity=entity, name=name,
value=ty_val[1], to_be_updated=to_be_updated)
assert len(to_be_updated) == 0
new_int = 6
assure_has_property(entity=entity, name=int_name,
value=new_int, to_be_updated=to_be_updated)
assert to_be_updated[0] is entity
class DependendTest(unittest.TestCase): class DependendTest(unittest.TestCase):
def test(self): def test(self):
......
#!/usr/bin/env python3 #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
# #
# ** header v3.0 # ** header v3.0
# This file is a part of the CaosDB Project. # This file is a part of the CaosDB Project.
# #
# Copyright (C) 2018 Research Group Biomedical Physics, # Copyright (C) 2020 Henrik tom Wörden
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as # it under the terms of the GNU Affero General Public License as
...@@ -21,27 +20,48 @@ ...@@ -21,27 +20,48 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
# #
# ** end header # ** end header
#
from caosadvancedtools.crawler import Crawler import os
from caosadvancedtools.example_cfood import ExampleCFood import unittest
import argparse from copy import deepcopy
from tempfile import NamedTemporaryFile
import caosdb as db import caosdb as db
from argparse import RawTextHelpFormatter from caosadvancedtools.cache import UpdateCache, get_pretty_xml
class CacheTest(unittest.TestCase):
def get_cont(self, *args):
ent = db.Record()
ent2 = db.Record()
ent2.add_parent(name="Experiment")
ent2.add_property("lol", ent)
c = db.Container()
c.extend([ent, ent2])
def get_parser(): return c
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=RawTextHelpFormatter)
return parser def setUp(self):
self.cache = UpdateCache(db_file=NamedTemporaryFile(delete=False).name)
self.cache.create_cache()
self.run_id = "235234"
def test_insert(self):
c = self.get_cont()
# we do not want to check database in this unit test and thus replace
# the lookup
UpdateCache.get_previous_version = self.get_cont
c[0].add_property("hallo", "21235")
c[1].add_property("hallo", "235")
self.cache.insert(c, self.run_id)
if __name__ == "__main__": saved_stuff = self.cache.get_updates(self.run_id)
parser = get_parser() assert len(saved_stuff) == 1
args = parser.parse_args() # old version
assert saved_stuff[0][2] == get_pretty_xml(self.get_cont())
# new version
assert saved_stuff[0][3] == get_pretty_xml(c)
assert len(self.cache.get_updates("sldkfjsldfjsldjf")) == 0
fi = db.File(file=__file__, path="filepath") def tearDown(self):
f = ExampleCFood(pattern="(.*)craw(.*)") os.remove(self.cache.db_file)
c = Crawler(food=[f])
c.crawl([fi])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment