Skip to content
Snippets Groups Projects
Commit a7f2d477 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-output' into 'dev'

ENH: fine tune the output during crawling

See merge request caosdb/caosdb-advanced-user-tools!26
parents a3748259 4f579ab1
Branches
Tags
1 merge request!22Release 0.3
...@@ -105,7 +105,9 @@ style: ...@@ -105,7 +105,9 @@ style:
stage: style stage: style
image: $CI_REGISTRY_IMAGE image: $CI_REGISTRY_IMAGE
script: script:
- autopep8 -ar --diff --exit-code . # For the moment, ignore type comparisons in datamodel_problems.py
- autopep8 -ar --diff --exit-code --exclude ./src/caosadvancedtools/datamodel_problems.py .
- autopep8 -ar --diff --exit-code --ignore E721 ./src/caosadvancedtools/datamodel_problems.py
allow_failure: true allow_failure: true
unittest: unittest:
......
...@@ -78,6 +78,7 @@ def get_property(name): ...@@ -78,6 +78,7 @@ def get_property(name):
model problems model problems
""" """
if name not in PROPERTIES: if name not in PROPERTIES:
try: try:
prop = db.execute_query("FIND Property "+name, prop = db.execute_query("FIND Property "+name,
...@@ -97,6 +98,7 @@ def get_record(name): ...@@ -97,6 +98,7 @@ def get_record(name):
from CaosDB. from CaosDB.
""" """
if name not in RECORDS: if name not in RECORDS:
rec = db.execute_query("FIND Record "+name, unique=True) rec = db.execute_query("FIND Record "+name, unique=True)
RECORDS[name] = rec RECORDS[name] = rec
...@@ -113,6 +115,7 @@ def get_recordtype(name): ...@@ -113,6 +115,7 @@ def get_recordtype(name):
model problems model problems
""" """
if name not in RECORDTYPES: if name not in RECORDTYPES:
try: try:
rec = db.execute_query("FIND RecordType "+name, unique=True) rec = db.execute_query("FIND RecordType "+name, unique=True)
...@@ -382,7 +385,7 @@ def assure_object_is_in_list(obj, containing_object, property_name, ...@@ -382,7 +385,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
o, property_name, containing_object.id)) o, property_name, containing_object.id))
else: else:
logger.info("UPDATE: Appending {} to {} of entity {}".format( logger.debug("UPDATE: Appending {} to {} of entity {}".format(
o, property_name, containing_object.id)) o, property_name, containing_object.id))
current_list.append(o) current_list.append(o)
update = True update = True
...@@ -405,7 +408,7 @@ def assure_has_description(entity, description, to_be_updated=None, ...@@ -405,7 +408,7 @@ def assure_has_description(entity, description, to_be_updated=None,
if entity.description == description: if entity.description == description:
return return
logger.info("UPDATE: set description of entity {}".format(entity.id)) logger.debug("UPDATE: set description of entity {}".format(entity.id))
entity.description = description entity.description = description
if to_be_updated is None: if to_be_updated is None:
...@@ -441,8 +444,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, ...@@ -441,8 +444,8 @@ def assure_has_parent(entity, parent, to_be_updated=None,
return return
logger.info("UPDATE: Adding parent {} to entity {}".format(parent, logger.debug("UPDATE: Adding parent {} to entity {}".format(parent,
entity.id)) entity.id))
entity.add_parent(parent) entity.add_parent(parent)
if to_be_updated is None: if to_be_updated is None:
...@@ -474,8 +477,8 @@ def assure_has_property(entity, name, value, to_be_updated=None, ...@@ -474,8 +477,8 @@ def assure_has_property(entity, name, value, to_be_updated=None,
if entity.description == value: if entity.description == value:
return return
else: else:
logger.info("UPDATE: Adding description with value {} to " logger.debug("UPDATE: Adding description with value {} to "
"entity {}".format(value, entity.id)) "entity {}".format(value, entity.id))
entity.description = value entity.description = value
if to_be_updated is None: if to_be_updated is None:
...@@ -505,7 +508,7 @@ def assure_has_property(entity, name, value, to_be_updated=None, ...@@ -505,7 +508,7 @@ def assure_has_property(entity, name, value, to_be_updated=None,
return return
logger.info( logger.debug(
"UPDATE: Adding property {} with value {} to entity {}".format( "UPDATE: Adding property {} with value {} to entity {}".format(
name, value, entity.id)) name, value, entity.id))
......
...@@ -179,7 +179,7 @@ class Crawler(object): ...@@ -179,7 +179,7 @@ class Crawler(object):
errors_occured = False errors_occured = False
matches = {idx: [] for idx, _ in self.iteritems()} matches = {idx: [] for idx, _ in self.iteritems()}
logger.info(separated("Matching files against CFoods")) logger.debug(separated("Matching files against CFoods"))
for Cfood in self.cfood_types: for Cfood in self.cfood_types:
logger.debug("Matching against {}...".format(Cfood.__name__)) logger.debug("Matching against {}...".format(Cfood.__name__))
...@@ -193,9 +193,9 @@ class Crawler(object): ...@@ -193,9 +193,9 @@ class Crawler(object):
Cfood.__name__, Cfood.__name__,
item)) item))
except Exception as e: except Exception as e:
logger.info("Failed during execution of {}!".format( logger.debug("Failed during execution of {}!".format(
cfood.__class__)) Cfood.__name__))
traceback.print_exc() # traceback.print_exc()
logger.debug(e) logger.debug(e)
if self.abort_on_exception: if self.abort_on_exception:
...@@ -204,12 +204,12 @@ class Crawler(object): ...@@ -204,12 +204,12 @@ class Crawler(object):
errors_occured = True errors_occured = True
tbs.append(e) tbs.append(e)
logger.info(separated("CFoods are collecting information...")) logger.debug(separated("CFoods are collecting information..."))
for cfood in cfoods: for cfood in cfoods:
cfood.collect_information() cfood.collect_information()
logger.info(separated("Trying to attach further items to created CFoods")) logger.debug(separated("Trying to attach further items to created CFoods"))
for cfood in cfoods: for cfood in cfoods:
logger.debug("Matching against {}...".format(Cfood.__name__)) logger.debug("Matching against {}...".format(Cfood.__name__))
...@@ -233,8 +233,8 @@ class Crawler(object): ...@@ -233,8 +233,8 @@ class Crawler(object):
for idx, item in self.iteritems(): for idx, item in self.iteritems():
if len(matches[idx]) == 0: if len(matches[idx]) == 0:
msg = ("ATTENTION: No matching cfood!\n" msg = ("The crawler has no matching rules for and is thus "
"Tried to match {}\n".format(item)) "ignoring:\n{}".format(item))
if item in ucache.filenames: if item in ucache.filenames:
logger.debug(msg) logger.debug(msg)
...@@ -271,12 +271,13 @@ class Crawler(object): ...@@ -271,12 +271,13 @@ class Crawler(object):
logger.info("Run Id: " + str(self.run_id)) logger.info("Run Id: " + str(self.run_id))
guard.set_level(level=security_level) guard.set_level(level=security_level)
logger.info("Scanning the objects to be treated...")
cfoods, tbs, errors_occured = self.collect_cfoods() cfoods, tbs, errors_occured = self.collect_cfoods()
if self.interactive and "y" != input("Do you want to continue? (y)"): if self.interactive and "y" != input("Do you want to continue? (y)"):
return return
logger.info(separated("Creating and updating Identifiables")) logger.info("Inserting or updating Records...")
for cfood in cfoods: for cfood in cfoods:
try: try:
...@@ -288,8 +289,8 @@ class Crawler(object): ...@@ -288,8 +289,8 @@ class Crawler(object):
self.push_identifiables_to_CaosDB(cfood) self.push_identifiables_to_CaosDB(cfood)
except Exception as e: except Exception as e:
logger.info("Failed during execution of {}!".format( logger.info("Failed during execution of {}!".format(
cfood.__class__)) cfood.__class__.__name__))
traceback.print_exc() # traceback.print_exc()
logger.debug(e) logger.debug(e)
if self.abort_on_exception: if self.abort_on_exception:
...@@ -309,20 +310,29 @@ class Crawler(object): ...@@ -309,20 +310,29 @@ class Crawler(object):
for i, el in enumerate(pending_changes): for i, el in enumerate(pending_changes):
logger.info( logger.debug(
""" """
UNAUTHORIZED UPDATE ({} of {}): UNAUTHORIZED UPDATE ({} of {}):
____________________\n""".format(i+1, len(pending_changes)) + str(el[3])) ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
logger.info("There where unauthorized changes (see above). An " logger.info("There where unauthorized changes (see above). An "
"email was sent to the curator.\n" "email was sent to the curator.\n"
"You can authorize the updates by invoking the crawler" "You can authorize the updates by invoking the crawler"
" with the run id:\n\n" " with the run id: {rid}\n".format(rid=self.run_id,
"./crawl.py -a {rid} {path}\n".format(rid=self.run_id, path=path))
path=path))
if len(DataModelProblems.missing) > 0:
if errors_occured: err_msg = ("There were problems with one or more RecordType or "
logger.warning("Crawler terminated with failures!") "Property. Do they exist in the data model?\n")
logger.warning(tbs)
for ent in DataModelProblems.missing:
err_msg += str(ent) + "\n"
logger.error(err_msg)
logger.error('Crawler finished with Datamodel Errors')
elif errors_occured:
logger.error("There were fatal errors during execution, please "
"contact the system administrator!")
logger.error("Crawler terminated with failures!")
logger.debug(tbs)
else: else:
logger.info("Crawler terminated successfully!") logger.info("Crawler terminated successfully!")
...@@ -393,12 +403,16 @@ carefully and if the changes are ok, click on the following link: ...@@ -393,12 +403,16 @@ carefully and if the changes are ok, click on the following link:
filename=filename, filename=filename,
changes="\n".join(changes)) changes="\n".join(changes))
sendmail = caosdb_config["advancedtools"]["sendmail"] sendmail = caosdb_config["advancedtools"]["sendmail"]
p = subprocess.Popen( try:
[ fro = caosdb_config["advancedtools"]["crawler.from_mail"]
sendmail, "-f", to = caosdb_config["advancedtools"]["crawler.to_mail"]
caosdb_config["advancedtools"]["crawler.from_mail"], except KeyError:
caosdb_config["advancedtools"]["crawler.to_mail"]], logger.error("Server Configuration is missing a setting for "
stdin=subprocess.PIPE) "sending mails. The administrator should check "
"'from_mail' and 'to_mail'.")
return
p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE)
p.communicate(input=text.encode()) p.communicate(input=text.encode())
def push_identifiables_to_CaosDB(self, cfood): def push_identifiables_to_CaosDB(self, cfood):
...@@ -420,10 +434,13 @@ carefully and if the changes are ok, click on the following link: ...@@ -420,10 +434,13 @@ carefully and if the changes are ok, click on the following link:
cfood.to_be_updated = tmp cfood.to_be_updated = tmp
logger.info("UPDATE: updating the following entities") info = "UPDATE: updating the following entities\n"
for el in cfood.to_be_updated: for el in cfood.to_be_updated:
logger.info("\t" + el.name if el.name is not None else el.id) info += str("\t" + el.name if el.name is not None else "\t" +
str(el.id))
info += "\n"
logger.info(info)
logger.debug(cfood.to_be_updated) logger.debug(cfood.to_be_updated)
try: try:
...@@ -464,10 +481,11 @@ carefully and if the changes are ok, click on the following link: ...@@ -464,10 +481,11 @@ carefully and if the changes are ok, click on the following link:
ent.id = None ent.id = None
if len(missing_identifiables) > 0: if len(missing_identifiables) > 0:
logger.info("Going to insert the following entities:") info = "Going to insert the following entities:\n"
for ent in missing_identifiables: for ent in missing_identifiables:
logger.info(ent) info += str(ent)+"\n"
logger.debug(info)
if len(missing_identifiables) == 0: if len(missing_identifiables) == 0:
logger.debug("No new entities to be inserted.") logger.debug("No new entities to be inserted.")
...@@ -535,10 +553,13 @@ class FileCrawler(Crawler): ...@@ -535,10 +553,13 @@ class FileCrawler(Crawler):
@staticmethod @staticmethod
def query_files(path): def query_files(path):
query_str = "FIND FILE WHICH IS STORED AT " + (path if path.endswith("/") else path + "/") + "**" query_str = "FIND FILE WHICH IS STORED AT " + (
logger.info("FILES QUERY: " + query_str) path if path.endswith("/") else path + "/") + "**"
q_info = "Sending the following query: '" + query_str + "'\n"
files = db.execute_query(query_str) files = db.execute_query(query_str)
logger.info("{} FILES TO BE PROCESSED.".format(len(files))) logger.info(
q_info + "Found {} files that need to be processed.".format(
len(files)))
return files return files
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment