Skip to content
Snippets Groups Projects
Commit 23445e6e authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: output can now be controlled by verbosity more specifically

parent 8f96c9cb
No related branches found
No related tags found
No related merge requests found
...@@ -38,6 +38,8 @@ import re ...@@ -38,6 +38,8 @@ import re
import caosdb as db import caosdb as db
from .verbosity import INFO, VERBOSE
ENTITIES = {} ENTITIES = {}
...@@ -60,7 +62,7 @@ class AbstractCFood(object): ...@@ -60,7 +62,7 @@ class AbstractCFood(object):
# function match() # function match()
_pattern = None _pattern = None
def __init__(self, crawled_file, access=lambda x: x, verbose=True): def __init__(self, crawled_file, access=lambda x: x, verbosity=INFO):
""" Abstract base class for Crawler food (CFood). """ Abstract base class for Crawler food (CFood).
Parameters Parameters
...@@ -77,7 +79,7 @@ class AbstractCFood(object): ...@@ -77,7 +79,7 @@ class AbstractCFood(object):
self.match = type(self).match(crawled_file.path) self.match = type(self).match(crawled_file.path)
self.to_be_updated = db.Container() self.to_be_updated = db.Container()
self.identifiables = db.Container() self.identifiables = db.Container()
self.verbose = verbose self.verbosity = verbosity
self.attached_ones = [] self.attached_ones = []
@staticmethod @staticmethod
...@@ -127,7 +129,16 @@ class AbstractCFood(object): ...@@ -127,7 +129,16 @@ class AbstractCFood(object):
if len(self.to_be_updated) == 0: if len(self.to_be_updated) == 0:
return return
get_ids_for_entities_with_names(self.to_be_updated) get_ids_for_entities_with_names(self.to_be_updated)
if self.verbosity >= INFO:
print("/"*60)
print("UPDATE: updating the following entities")
for el in self.to_be_updated:
print(el.name if el.name is not None else el.id)
print("/"*60)
self.to_be_updated.update() self.to_be_updated.update()
def attach(self, crawled_file): def attach(self, crawled_file):
...@@ -173,7 +184,7 @@ class AbstractCFood(object): ...@@ -173,7 +184,7 @@ class AbstractCFood(object):
def assure_object_is_in_list(obj, containing_object, property_name, def assure_object_is_in_list(obj, containing_object, property_name,
to_be_updated, verbose=True): to_be_updated, verbosity=INFO):
""" """
Checks whether `obj` is one of the values in the list property Checks whether `obj` is one of the values in the list property
`property_name` of the supplied entity containing_object`. `property_name` of the supplied entity containing_object`.
...@@ -212,15 +223,17 @@ def assure_object_is_in_list(obj, containing_object, property_name, ...@@ -212,15 +223,17 @@ def assure_object_is_in_list(obj, containing_object, property_name,
break break
if verbose: if contained:
if contained: if verbosity >= VERBOSE:
print("{} is in {} of entity {}".format( print("{} is in {} of entity {}".format(
o, property_name, containing_object.id)) o, property_name, containing_object.id))
else:
print("Appending {} to {} of entity {}".format(
o, property_name, containing_object.id))
if not contained: else:
if verbosity >= INFO:
print("/"*60)
print("UPDATE: Appending {} to {} of entity {}".format(
o, property_name, containing_object.id))
print("/"*60)
current_list.append(o) current_list.append(o)
update = True update = True
...@@ -228,7 +241,7 @@ def assure_object_is_in_list(obj, containing_object, property_name, ...@@ -228,7 +241,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
to_be_updated.append(containing_object) to_be_updated.append(containing_object)
def assure_has_parent(entity, parent, to_be_updated=None, verbose=True): def assure_has_parent(entity, parent, to_be_updated=None, verbosity=INFO):
""" """
Checks whether `entity` has a parent with name `parent`. Checks whether `entity` has a parent with name `parent`.
...@@ -247,13 +260,16 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True): ...@@ -247,13 +260,16 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
break break
if contained: if contained:
if verbose: if verbosity >= VERBOSE:
print("entity {} has parent {}".format(entity.id, parent)) print("entity {} has parent {}".format(entity.id, parent))
return return
if verbose: if verbosity >= INFO:
print("Adding parent {} to entity {}".format(parent, entity.id)) print("/"*60)
print("UPDATE: Adding parent {} to entity {}".format(parent,
entity.id))
print("/"*60)
entity.add_parent(parent) entity.add_parent(parent)
if to_be_updated is None: if to_be_updated is None:
...@@ -262,7 +278,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True): ...@@ -262,7 +278,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
to_be_updated.append(entity) to_be_updated.append(entity)
def assure_has_property(entity, name, value, to_be_updated=None, verbose=True): def assure_has_property(entity, name, value, to_be_updated=None,
verbosity=INFO):
""" """
Checks whether `entity` has a property `name` with the value `value`. Checks whether `entity` has a property `name` with the value `value`.
...@@ -282,17 +299,19 @@ def assure_has_property(entity, name, value, to_be_updated=None, verbose=True): ...@@ -282,17 +299,19 @@ def assure_has_property(entity, name, value, to_be_updated=None, verbose=True):
break break
if contained: if contained:
if verbose: if verbosity >= VERBOSE:
print("entity {} has property {} with value {}".format( print("entity {} has property {} with value {}".format(
entity.id, entity.id,
name, value)) name, value))
return return
if verbose: if verbosity >= INFO:
print("Adding property {} with value {} to entity {}".format( print("/"*60)
print("UPDATE: Adding property {} with value {} to entity {}".format(
name, name,
value, entity.id)) value, entity.id))
print("/"*60)
entity.add_property(name=name, value=value) entity.add_property(name=name, value=value)
if to_be_updated is None: if to_be_updated is None:
......
...@@ -42,10 +42,12 @@ import caosdb as db ...@@ -42,10 +42,12 @@ import caosdb as db
from caosdb.exceptions import TransactionError from caosdb.exceptions import TransactionError
from .cache import Cache from .cache import Cache
from .verbosity import DEBUG, INFO, VERBOSE
class Crawler(object): class Crawler(object):
def __init__(self, food, access=lambda x: x, use_cache=False, verbose=True): def __init__(self, food, access=lambda x: x, use_cache=False,
verbosity=INFO):
""" """
Parameters Parameters
---------- ----------
...@@ -63,15 +65,23 @@ class Crawler(object): ...@@ -63,15 +65,23 @@ class Crawler(object):
self.access = access self.access = access
self.report = db.Container() self.report = db.Container()
self.use_cache = use_cache self.use_cache = use_cache
self.verbose = verbose self.verbosity = verbosity
if self.use_cache: if self.use_cache:
self.cache = Cache() self.cache = Cache()
def crawl(self, files): def crawl(self, files):
files = sorted(files, key=lambda x: x.path)
cfoods = [] cfoods = []
matches = {f.path: 0 for f in files}
for crawled_file in sorted(files, key=lambda x: x.path): if self.verbosity >= INFO:
print("-"*60)
print("Matching files against CFoods")
for crawled_file in files:
if self.verbosity >= VERBOSE:
print("Matching {}...".format(crawled_file.path))
# if crawled_file.size == 0: # if crawled_file.size == 0:
# crawled_file.add_message( # crawled_file.add_message(
# type="Warning", description="This file is empty. Shouldn't we delete it?") # type="Warning", description="This file is empty. Shouldn't we delete it?")
...@@ -79,37 +89,47 @@ class Crawler(object): ...@@ -79,37 +89,47 @@ class Crawler(object):
# continue # continue
if self.verbose:
msg = "Matching {}".format(crawled_file.path)
print("="*len(msg))
print(msg)
print("="*len(msg))
matches = 0
for Cfood in self.food: for Cfood in self.food:
if Cfood.match(crawled_file.path) is not None: if Cfood.match(crawled_file.path) is not None:
matches += 1 matches[crawled_file.path] += 1
if self.verbose: if self.verbosity >= VERBOSE:
print("{} matched.".format(Cfood.__name__)) print("{} matched.".format(Cfood.__name__))
try: try:
cfoods.append(Cfood(crawled_file, access=self.access, cfoods.append(Cfood(crawled_file, access=self.access,
verbose=self.verbose)) verbosity=self.verbosity))
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
print(e) print(e)
if self.verbose and matches == 0: if self.verbosity >= INFO:
print("ATTENTION: No matching cfood!") print("-"*60)
print("Trying to attach files to created CFoods")
if self.verbose and matches > 1: for crawled_file in files:
print("Attention: More than one matching cfood!") if self.verbosity >= VERBOSE:
print("Matching {}...".format(crawled_file.path))
for crawled_file in sorted(files, key=lambda x: x.path):
for cfood in cfoods: for cfood in cfoods:
if cfood.looking_for(crawled_file): if cfood.looking_for(crawled_file):
if self.verbosity >= VERBOSE:
print("{} matched.".format(cfood.__class__.__name__))
cfood.attach(crawled_file) cfood.attach(crawled_file)
matches[crawled_file.path] += 1
if self.verbosity >= INFO:
for crawled_file in files:
if matches[crawled_file.path] == 0:
print("ATTENTION: No matching cfood!")
print("Tried to match {}".format(crawled_file.path))
if matches[crawled_file.path] > 1:
print("Attention: More than one matching cfood!")
print("Tried to match {}".format(crawled_file.path))
if self.verbosity >= INFO:
print("-"*60)
print("Creating and updating Identifiables")
for cfood in cfoods: for cfood in cfoods:
try: try:
...@@ -120,7 +140,7 @@ class Crawler(object): ...@@ -120,7 +140,7 @@ class Crawler(object):
cfood.identifiables) cfood.identifiables)
self.find_or_insert_identifiables(cfood.identifiables, self.find_or_insert_identifiables(cfood.identifiables,
self.verbose) self.verbosity)
if self.use_cache: if self.use_cache:
self.cache.insert_list(hashes, cfood.identifiables) self.cache.insert_list(hashes, cfood.identifiables)
...@@ -131,8 +151,13 @@ class Crawler(object): ...@@ -131,8 +151,13 @@ class Crawler(object):
traceback.print_exc() traceback.print_exc()
print(e) print(e)
if self.verbosity >= INFO:
print("-"*60)
print("Crawler terminated successfully!")
print("-"*60)
@staticmethod @staticmethod
def find_or_insert_identifiables(identifiables, verbose=True): def find_or_insert_identifiables(identifiables, verbosity=INFO):
""" Sets the ids of identifiables (that do not have already an id from the """ Sets the ids of identifiables (that do not have already an id from the
cache) based on searching CaosDB and retrieves those entities. cache) based on searching CaosDB and retrieves those entities.
The remaining entities (those which can not be retrieved) have no The remaining entities (those which can not be retrieved) have no
...@@ -141,20 +166,20 @@ class Crawler(object): ...@@ -141,20 +166,20 @@ class Crawler(object):
# looking for matching entities in CaosDB when there is no valid id # looking for matching entities in CaosDB when there is no valid id
# i.e. there was none set from a cache # i.e. there was none set from a cache
if verbose: if verbosity >= VERBOSE:
print("-----------------------------------------------------") print("-----------------------------------------------------")
for ent in identifiables: for ent in identifiables:
if ent.id is None or ent.id < 0: if ent.id is None or ent.id < 0:
if verbose: if verbosity >= VERBOSE:
print("Looking for:") print("Looking for:")
print(ent) print(ent)
existing = Crawler.find_existing(ent) existing = Crawler.find_existing(ent, verbosity=verbosity)
if existing is not None: if existing is not None:
ent.id = existing.id ent.id = existing.id
else: else:
if verbose: if verbosity >= DEBUG:
print("Id is known of:") print("Id is known of:")
print(ent) print(ent)
...@@ -167,22 +192,26 @@ class Crawler(object): ...@@ -167,22 +192,26 @@ class Crawler(object):
for ent in missing_identifiables: for ent in missing_identifiables:
ent.id = None ent.id = None
if verbose: if verbosity >= INFO and len(missing_identifiables) > 0:
print("Going to insert the following entities:") print("Going to insert the following entities:")
for ent in missing_identifiables: for ent in missing_identifiables:
print(ent) print(ent)
if verbosity >= VERBOSE and len(missing_identifiables) == 0:
print("No new entities to be inserted.")
missing_identifiables.insert() missing_identifiables.insert()
if verbose: if verbosity >= VERBOSE:
print("Updating entities from CaosDB...") print("Retrieving entities from CaosDB...")
identifiables.retrieve(unique=True, raise_exception_on_error=False) identifiables.retrieve(unique=True, raise_exception_on_error=False)
if verbose: if verbosity >= VERBOSE:
print("-----------------------------------------------------") print("-----------------------------------------------------")
@staticmethod @staticmethod
def find_existing(entity): def find_existing(entity, verbosity=INFO):
"""searches for an entity that matches the identifiable in CaosDB """searches for an entity that matches the identifiable in CaosDB
Characteristics of the identifiable like, properties, name or id are Characteristics of the identifiable like, properties, name or id are
...@@ -205,7 +234,8 @@ class Crawler(object): ...@@ -205,7 +234,8 @@ class Crawler(object):
else: else:
query_string = "FIND '{}'".format(entity.name) query_string = "FIND '{}'".format(entity.name)
print(query_string) if verbosity >= VERBOSE:
print(query_string)
q = db.Query(query_string) q = db.Query(query_string)
# the identifiable should identify an object uniquely. Thus the query # the identifiable should identify an object uniquely. Thus the query
# is using the unique keyword # is using the unique keyword
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment