Skip to content
Snippets Groups Projects
Commit 970d7520 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'guard' into 'master'

Guard

See merge request caosdb/caosdb-advanced-user-tools!9
parents 7949241c fd63ccc4
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,7 @@ from argparse import RawTextHelpFormatter
import caosdb as db
from caosadvancedtools.crawler import Crawler
from caosadvancedtools.verbosity import DEBUG, INFO, QUIET, VERBOSE
from caosadvancedtools.guard import INSERT, RETRIEVE, UPDATE, Guard
from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
PublicationCFood, SimulationCFood)
......@@ -60,4 +61,4 @@ if __name__ == "__main__":
AnalysisCFood, ExperimentCFood,
PublicationCFood, SimulationCFood,
])
c.crawl(files)
c.crawl(files, security_level=UPDATE)
......@@ -62,7 +62,8 @@ class AbstractCFood(object):
# function match()
_pattern = None
def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO):
def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO,
guard=None):
""" Abstract base class for Crawler food (CFood).
Parameters
......@@ -81,6 +82,7 @@ class AbstractCFood(object):
self.identifiables = db.Container()
self.verbosity = verbosity
self.attached_ones = []
self.guard = guard
@staticmethod
def get_re():
......@@ -167,7 +169,7 @@ class AbstractCFood(object):
for el in self.to_be_updated:
print(el.name if el.name is not None else el.id)
print("/"*60)
self.to_be_updated.update()
self.guard.safe_update(self.to_be_updated)
def attach(self, crawled_file):
self.attached_ones.append(crawled_file)
......@@ -242,6 +244,11 @@ def assure_object_is_in_list(obj, containing_object, property_name,
else:
objects = obj
# use ids if values are entities
if all([isinstance(el, db.Entity) for el in objects]):
objects = [el.id for el in objects]
update = False
for o in objects:
......@@ -271,7 +278,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
to_be_updated.append(containing_object)
def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO):
def assure_has_parent(entity, parent, to_be_updated=None, guard=None, verbosity=INFO):
"""
Checks whether `entity` has a parent with name `parent`.
......@@ -304,13 +311,17 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO):
if to_be_updated is None:
get_ids_for_entities_with_names([entity])
entity.update(unique=False)
if guard is None:
entity.update(unique=False)
else:
guard.safe_updat(entity, unique=False)
else:
to_be_updated.append(entity)
def assure_has_property(entity, name, value, to_be_updated=None,
verbosity=INFO, datatype=None):
verbosity=INFO, guard=None, datatype=None):
"""
Checks whether `entity` has a property `name` with the value `value`.
......@@ -319,6 +330,26 @@ def assure_has_property(entity, name, value, to_be_updated=None,
the list in order to indicate, that the entity `entity` should be updated.
Otherwise it is directly updated
"""
if name.lower() == "description":
if entity.description == value:
return
else:
print("/"*60)
print("UPDATE: Adding description with value {} to "
"entity {}".format(
value, entity.id))
print("/"*60)
entity.description = value
if to_be_updated is None:
get_ids_for_entities_with_names([entity])
if guard is None:
entity.update(unique=False)
return
properties = entity.get_properties()
possible_properties = [prop for prop in properties if prop.name == name]
contained = False
......@@ -351,7 +382,11 @@ def assure_has_property(entity, name, value, to_be_updated=None,
if to_be_updated is None:
get_ids_for_entities_with_names([entity])
entity.update(unique=False)
if guard is None:
entity.update(unique=False)
else:
guard.safe_updat(entity, unique=False)
else:
to_be_updated.append(entity)
......
......@@ -42,6 +42,7 @@ import caosdb as db
from caosdb.exceptions import TransactionError
from .cache import Cache
from .guard import INSERT, RETRIEVE, UPDATE, Guard
from .verbosity import DEBUG, INFO, VERBOSE
......@@ -70,7 +71,11 @@ class Crawler(object):
if self.use_cache:
self.cache = Cache()
def crawl(self, files):
def crawl(self, files, security_level=RETRIEVE):
errors_occured = False
tbs = []
self.guard = Guard(level=security_level)
files = sorted(files, key=lambda x: x.path)
cfoods = []
matches = {f.path: 0 for f in files}
......@@ -96,10 +101,15 @@ class Crawler(object):
crawled_file.path))
try:
cfoods.append(Cfood(crawled_file, access=self.access,
verbosity=self.verbosity))
verbosity=self.verbosity,
guard=self.guard))
except Exception as e:
traceback.print_exc()
print(e)
errors_occured = True
+ tbs.append(e)
if self.verbosity >= INFO:
print("-"*60)
......@@ -144,7 +154,9 @@ class Crawler(object):
cfood.identifiables)
self.find_or_insert_identifiables(cfood.identifiables,
self.verbosity)
self.verbosity,
guard=self.guard
)
if self.use_cache:
self.cache.insert_list(hashes, cfood.identifiables)
......@@ -154,14 +166,22 @@ class Crawler(object):
except Exception as e:
traceback.print_exc()
print(e)
errors_occured = True
tbs.append(e)
if self.verbosity >= INFO:
print("-"*60)
print("Crawler terminated successfully!")
if errors_occured:
print("Crawler terminated with failures!")
print(tbs)
else:
print("Crawler terminated successfully!")
print("-"*60)
# TODO remove static?
@staticmethod
def find_or_insert_identifiables(identifiables, verbosity=INFO):
def find_or_insert_identifiables(identifiables, verbosity=INFO, guard=None):
""" Sets the ids of identifiables (that do not have already an id from the
cache) based on searching CaosDB and retrieves those entities.
The remaining entities (those which can not be retrieved) have no
......@@ -202,10 +222,11 @@ class Crawler(object):
for ent in missing_identifiables:
print(ent)
if verbosity >= VERBOSE and len(missing_identifiables) == 0:
print("No new entities to be inserted.")
missing_identifiables.insert()
if len(missing_identifiables) == 0:
if verbosity >= VERBOSE:
print("No new entities to be inserted.")
else:
guard.safe_insert(missing_identifiables)
if verbosity >= VERBOSE:
print("Retrieving entities from CaosDB...")
......
......@@ -19,28 +19,27 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
import caosdb as db
from caosadvancedtools.cfood import AbstractCFood
from .cfood import AbstractCFood, assure_has_property
class ExampleCFood(AbstractCFood):
def create_identifiables(self, crawled_file, match):
print("create_identifiables")
entities = {}
entities["exp"] = db.Record()
#import IPython
# IPython.embed()
entities["exp"].add_parent(name="Experiment")
entities["exp"].add_property(name="species", value=match.group)
@staticmethod
def get_re():
return (r".*/(?P<species>[^/]+)/"
r"(?P<date>\d{4}-\d{2}-\d{2})/README.md")
return entities
def create_identifiables(self):
self.experiment = db.Record()
self.experiment.add_parent(name="Experiment")
self.experiment.add_property(name="date",
value=self.match.group('date'))
self.identifiables.append(self.experiment)
def update_identifiables(self, entities, crawled_file, match):
entities["exp"].add_property(name="date",
value=datetime.today().isoformat())
db.Container().extend(entities.values).update()
def update_identifiables(self):
assure_has_property(
self.experiment,
"species",
self.match.group('species'))
import caosdb as db
RETRIEVE = 0
INSERT = 1
UPDATE = 2
class Guard(object):
def __init__(self, level=RETRIEVE):
self.freshly_created = []
self.level = level
def safe_insert(self, obj, ):
if self.level < INSERT:
raise Exception("not allowed")
obj.insert()
if isinstance(obj, db.Container):
self.freshly_created.extend([
e.id for e in obj])
else:
self.freshly_created.append(obj.id)
def safe_update(self, obj, **kwargs):
if isinstance(obj, db.Container):
all_fresh = True
for el in obj:
if el.id not in self.freshly_created:
all_fresh = False
if self.level < UPDATE and not all_fresh:
raise Exception("not allowed")
else:
obj.update(**kwargs)
else:
if self.level < UPDATE and obj.id not in self.freshly_created:
raise Exception("not allowed")
else:
obj.update(**kwargs)
......@@ -26,6 +26,7 @@ import unittest
import caosdb as db
from caosadvancedtools.cfood import (AbstractCFood, assure_has_parent,
assure_object_is_in_list)
from caosadvancedtools.example_cfood import ExampleCFood
PATTERN = "h.*"
......@@ -71,3 +72,14 @@ class InsertionTest(unittest.TestCase):
to_be_updated = []
assure_has_parent(entity, "parent", to_be_updated)
assert len(to_be_updated) == 0
class ExampleTest(unittest.TestCase):
def test(self):
dummy_file = db.File(path="/data/rabbit/2019-03-03/README.md")
print(dummy_file.path)
print(ExampleCFood.get_re())
cf = ExampleCFood(crawled_file=dummy_file)
self.assertIsNotNone(ExampleCFood.match(dummy_file.path))
self.assertEqual(cf.match.group('species'), 'rabbit')
self.assertEqual(cf.match.group('date'), '2019-03-03')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment