Skip to content
Snippets Groups Projects
Commit 970d7520 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'guard' into 'master'

Guard

See merge request caosdb/caosdb-advanced-user-tools!9
parents 7949241c fd63ccc4
No related branches found
No related tags found
No related merge requests found
...@@ -29,6 +29,7 @@ from argparse import RawTextHelpFormatter ...@@ -29,6 +29,7 @@ from argparse import RawTextHelpFormatter
import caosdb as db import caosdb as db
from caosadvancedtools.crawler import Crawler from caosadvancedtools.crawler import Crawler
from caosadvancedtools.verbosity import DEBUG, INFO, QUIET, VERBOSE from caosadvancedtools.verbosity import DEBUG, INFO, QUIET, VERBOSE
from caosadvancedtools.guard import INSERT, RETRIEVE, UPDATE, Guard
from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood, from scifolder import (AnalysisCFood, ExperimentCFood, ProjectCFood,
PublicationCFood, SimulationCFood) PublicationCFood, SimulationCFood)
...@@ -60,4 +61,4 @@ if __name__ == "__main__": ...@@ -60,4 +61,4 @@ if __name__ == "__main__":
AnalysisCFood, ExperimentCFood, AnalysisCFood, ExperimentCFood,
PublicationCFood, SimulationCFood, PublicationCFood, SimulationCFood,
]) ])
c.crawl(files) c.crawl(files, security_level=UPDATE)
...@@ -62,7 +62,8 @@ class AbstractCFood(object): ...@@ -62,7 +62,8 @@ class AbstractCFood(object):
# function match() # function match()
_pattern = None _pattern = None
def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO): def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO,
guard=None):
""" Abstract base class for Crawler food (CFood). """ Abstract base class for Crawler food (CFood).
Parameters Parameters
...@@ -81,6 +82,7 @@ class AbstractCFood(object): ...@@ -81,6 +82,7 @@ class AbstractCFood(object):
self.identifiables = db.Container() self.identifiables = db.Container()
self.verbosity = verbosity self.verbosity = verbosity
self.attached_ones = [] self.attached_ones = []
self.guard = guard
@staticmethod @staticmethod
def get_re(): def get_re():
...@@ -167,7 +169,7 @@ class AbstractCFood(object): ...@@ -167,7 +169,7 @@ class AbstractCFood(object):
for el in self.to_be_updated: for el in self.to_be_updated:
print(el.name if el.name is not None else el.id) print(el.name if el.name is not None else el.id)
print("/"*60) print("/"*60)
self.to_be_updated.update() self.guard.safe_update(self.to_be_updated)
def attach(self, crawled_file): def attach(self, crawled_file):
self.attached_ones.append(crawled_file) self.attached_ones.append(crawled_file)
...@@ -242,6 +244,11 @@ def assure_object_is_in_list(obj, containing_object, property_name, ...@@ -242,6 +244,11 @@ def assure_object_is_in_list(obj, containing_object, property_name,
else: else:
objects = obj objects = obj
# use ids if values are entities
if all([isinstance(el, db.Entity) for el in objects]):
objects = [el.id for el in objects]
update = False update = False
for o in objects: for o in objects:
...@@ -271,7 +278,7 @@ def assure_object_is_in_list(obj, containing_object, property_name, ...@@ -271,7 +278,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
to_be_updated.append(containing_object) to_be_updated.append(containing_object)
def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO): def assure_has_parent(entity, parent, to_be_updated=None, guard=None, verbosity=INFO):
""" """
Checks whether `entity` has a parent with name `parent`. Checks whether `entity` has a parent with name `parent`.
...@@ -304,13 +311,17 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO): ...@@ -304,13 +311,17 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO):
if to_be_updated is None: if to_be_updated is None:
get_ids_for_entities_with_names([entity]) get_ids_for_entities_with_names([entity])
entity.update(unique=False)
if guard is None:
entity.update(unique=False)
else:
guard.safe_updat(entity, unique=False)
else: else:
to_be_updated.append(entity) to_be_updated.append(entity)
def assure_has_property(entity, name, value, to_be_updated=None, def assure_has_property(entity, name, value, to_be_updated=None,
verbosity=INFO, datatype=None): verbosity=INFO, guard=None, datatype=None):
""" """
Checks whether `entity` has a property `name` with the value `value`. Checks whether `entity` has a property `name` with the value `value`.
...@@ -319,6 +330,26 @@ def assure_has_property(entity, name, value, to_be_updated=None, ...@@ -319,6 +330,26 @@ def assure_has_property(entity, name, value, to_be_updated=None,
the list in order to indicate, that the entity `entity` should be updated. the list in order to indicate, that the entity `entity` should be updated.
Otherwise it is directly updated Otherwise it is directly updated
""" """
if name.lower() == "description":
if entity.description == value:
return
else:
print("/"*60)
print("UPDATE: Adding description with value {} to "
"entity {}".format(
value, entity.id))
print("/"*60)
entity.description = value
if to_be_updated is None:
get_ids_for_entities_with_names([entity])
if guard is None:
entity.update(unique=False)
return
properties = entity.get_properties() properties = entity.get_properties()
possible_properties = [prop for prop in properties if prop.name == name] possible_properties = [prop for prop in properties if prop.name == name]
contained = False contained = False
...@@ -351,7 +382,11 @@ def assure_has_property(entity, name, value, to_be_updated=None, ...@@ -351,7 +382,11 @@ def assure_has_property(entity, name, value, to_be_updated=None,
if to_be_updated is None: if to_be_updated is None:
get_ids_for_entities_with_names([entity]) get_ids_for_entities_with_names([entity])
entity.update(unique=False)
if guard is None:
entity.update(unique=False)
else:
guard.safe_updat(entity, unique=False)
else: else:
to_be_updated.append(entity) to_be_updated.append(entity)
......
...@@ -42,6 +42,7 @@ import caosdb as db ...@@ -42,6 +42,7 @@ import caosdb as db
from caosdb.exceptions import TransactionError from caosdb.exceptions import TransactionError
from .cache import Cache from .cache import Cache
from .guard import INSERT, RETRIEVE, UPDATE, Guard
from .verbosity import DEBUG, INFO, VERBOSE from .verbosity import DEBUG, INFO, VERBOSE
...@@ -70,7 +71,11 @@ class Crawler(object): ...@@ -70,7 +71,11 @@ class Crawler(object):
if self.use_cache: if self.use_cache:
self.cache = Cache() self.cache = Cache()
def crawl(self, files): def crawl(self, files, security_level=RETRIEVE):
errors_occured = False
tbs = []
self.guard = Guard(level=security_level)
files = sorted(files, key=lambda x: x.path) files = sorted(files, key=lambda x: x.path)
cfoods = [] cfoods = []
matches = {f.path: 0 for f in files} matches = {f.path: 0 for f in files}
...@@ -96,10 +101,15 @@ class Crawler(object): ...@@ -96,10 +101,15 @@ class Crawler(object):
crawled_file.path)) crawled_file.path))
try: try:
cfoods.append(Cfood(crawled_file, access=self.access, cfoods.append(Cfood(crawled_file, access=self.access,
verbosity=self.verbosity)) verbosity=self.verbosity,
guard=self.guard))
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
print(e) print(e)
errors_occured = True
+ tbs.append(e)
if self.verbosity >= INFO: if self.verbosity >= INFO:
print("-"*60) print("-"*60)
...@@ -144,7 +154,9 @@ class Crawler(object): ...@@ -144,7 +154,9 @@ class Crawler(object):
cfood.identifiables) cfood.identifiables)
self.find_or_insert_identifiables(cfood.identifiables, self.find_or_insert_identifiables(cfood.identifiables,
self.verbosity) self.verbosity,
guard=self.guard
)
if self.use_cache: if self.use_cache:
self.cache.insert_list(hashes, cfood.identifiables) self.cache.insert_list(hashes, cfood.identifiables)
...@@ -154,14 +166,22 @@ class Crawler(object): ...@@ -154,14 +166,22 @@ class Crawler(object):
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
print(e) print(e)
errors_occured = True
tbs.append(e)
if self.verbosity >= INFO: if self.verbosity >= INFO:
print("-"*60) print("-"*60)
print("Crawler terminated successfully!")
if errors_occured:
print("Crawler terminated with failures!")
print(tbs)
else:
print("Crawler terminated successfully!")
print("-"*60) print("-"*60)
# TODO remove static?
@staticmethod @staticmethod
def find_or_insert_identifiables(identifiables, verbosity=INFO): def find_or_insert_identifiables(identifiables, verbosity=INFO, guard=None):
""" Sets the ids of identifiables (that do not have already an id from the """ Sets the ids of identifiables (that do not have already an id from the
cache) based on searching CaosDB and retrieves those entities. cache) based on searching CaosDB and retrieves those entities.
The remaining entities (those which can not be retrieved) have no The remaining entities (those which can not be retrieved) have no
...@@ -202,10 +222,11 @@ class Crawler(object): ...@@ -202,10 +222,11 @@ class Crawler(object):
for ent in missing_identifiables: for ent in missing_identifiables:
print(ent) print(ent)
if verbosity >= VERBOSE and len(missing_identifiables) == 0: if len(missing_identifiables) == 0:
print("No new entities to be inserted.") if verbosity >= VERBOSE:
print("No new entities to be inserted.")
missing_identifiables.insert() else:
guard.safe_insert(missing_identifiables)
if verbosity >= VERBOSE: if verbosity >= VERBOSE:
print("Retrieving entities from CaosDB...") print("Retrieving entities from CaosDB...")
......
...@@ -19,28 +19,27 @@ ...@@ -19,28 +19,27 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# ** end header
#
import caosdb as db import caosdb as db
from caosadvancedtools.cfood import AbstractCFood from .cfood import AbstractCFood, assure_has_property
class ExampleCFood(AbstractCFood): class ExampleCFood(AbstractCFood):
def create_identifiables(self, crawled_file, match): @staticmethod
print("create_identifiables") def get_re():
entities = {} return (r".*/(?P<species>[^/]+)/"
entities["exp"] = db.Record() r"(?P<date>\d{4}-\d{2}-\d{2})/README.md")
#import IPython
# IPython.embed()
entities["exp"].add_parent(name="Experiment")
entities["exp"].add_property(name="species", value=match.group)
return entities def create_identifiables(self):
self.experiment = db.Record()
self.experiment.add_parent(name="Experiment")
self.experiment.add_property(name="date",
value=self.match.group('date'))
self.identifiables.append(self.experiment)
def update_identifiables(self, entities, crawled_file, match): def update_identifiables(self):
entities["exp"].add_property(name="date", assure_has_property(
value=datetime.today().isoformat()) self.experiment,
db.Container().extend(entities.values).update() "species",
self.match.group('species'))
import caosdb as db
RETRIEVE = 0
INSERT = 1
UPDATE = 2
class Guard(object):
def __init__(self, level=RETRIEVE):
self.freshly_created = []
self.level = level
def safe_insert(self, obj, ):
if self.level < INSERT:
raise Exception("not allowed")
obj.insert()
if isinstance(obj, db.Container):
self.freshly_created.extend([
e.id for e in obj])
else:
self.freshly_created.append(obj.id)
def safe_update(self, obj, **kwargs):
if isinstance(obj, db.Container):
all_fresh = True
for el in obj:
if el.id not in self.freshly_created:
all_fresh = False
if self.level < UPDATE and not all_fresh:
raise Exception("not allowed")
else:
obj.update(**kwargs)
else:
if self.level < UPDATE and obj.id not in self.freshly_created:
raise Exception("not allowed")
else:
obj.update(**kwargs)
...@@ -26,6 +26,7 @@ import unittest ...@@ -26,6 +26,7 @@ import unittest
import caosdb as db import caosdb as db
from caosadvancedtools.cfood import (AbstractCFood, assure_has_parent, from caosadvancedtools.cfood import (AbstractCFood, assure_has_parent,
assure_object_is_in_list) assure_object_is_in_list)
from caosadvancedtools.example_cfood import ExampleCFood
PATTERN = "h.*" PATTERN = "h.*"
...@@ -71,3 +72,14 @@ class InsertionTest(unittest.TestCase): ...@@ -71,3 +72,14 @@ class InsertionTest(unittest.TestCase):
to_be_updated = [] to_be_updated = []
assure_has_parent(entity, "parent", to_be_updated) assure_has_parent(entity, "parent", to_be_updated)
assert len(to_be_updated) == 0 assert len(to_be_updated) == 0
class ExampleTest(unittest.TestCase):
def test(self):
dummy_file = db.File(path="/data/rabbit/2019-03-03/README.md")
print(dummy_file.path)
print(ExampleCFood.get_re())
cf = ExampleCFood(crawled_file=dummy_file)
self.assertIsNotNone(ExampleCFood.match(dummy_file.path))
self.assertEqual(cf.match.group('species'), 'rabbit')
self.assertEqual(cf.match.group('date'), '2019-03-03')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment