Skip to content
Snippets Groups Projects
Commit a7f2d477 authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-output' into 'dev'

ENH: fine tune the output during crawling

See merge request caosdb/caosdb-advanced-user-tools!26
parents a3748259 4f579ab1
No related branches found
No related tags found
1 merge request!22Release 0.3
......@@ -105,7 +105,9 @@ style:
stage: style
image: $CI_REGISTRY_IMAGE
script:
- autopep8 -ar --diff --exit-code .
# For the moment, ignore type comparisons in datamodel_problems.py
- autopep8 -ar --diff --exit-code --exclude ./src/caosadvancedtools/datamodel_problems.py .
- autopep8 -ar --diff --exit-code --ignore E721 ./src/caosadvancedtools/datamodel_problems.py
allow_failure: true
unittest:
......
......@@ -78,6 +78,7 @@ def get_property(name):
model problems
"""
if name not in PROPERTIES:
try:
prop = db.execute_query("FIND Property "+name,
......@@ -97,6 +98,7 @@ def get_record(name):
from CaosDB.
"""
if name not in RECORDS:
rec = db.execute_query("FIND Record "+name, unique=True)
RECORDS[name] = rec
......@@ -113,6 +115,7 @@ def get_recordtype(name):
model problems
"""
if name not in RECORDTYPES:
try:
rec = db.execute_query("FIND RecordType "+name, unique=True)
......@@ -382,7 +385,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
o, property_name, containing_object.id))
else:
logger.info("UPDATE: Appending {} to {} of entity {}".format(
logger.debug("UPDATE: Appending {} to {} of entity {}".format(
o, property_name, containing_object.id))
current_list.append(o)
update = True
......@@ -405,7 +408,7 @@ def assure_has_description(entity, description, to_be_updated=None,
if entity.description == description:
return
logger.info("UPDATE: set description of entity {}".format(entity.id))
logger.debug("UPDATE: set description of entity {}".format(entity.id))
entity.description = description
if to_be_updated is None:
......@@ -441,8 +444,8 @@ def assure_has_parent(entity, parent, to_be_updated=None,
return
logger.info("UPDATE: Adding parent {} to entity {}".format(parent,
entity.id))
logger.debug("UPDATE: Adding parent {} to entity {}".format(parent,
entity.id))
entity.add_parent(parent)
if to_be_updated is None:
......@@ -474,8 +477,8 @@ def assure_has_property(entity, name, value, to_be_updated=None,
if entity.description == value:
return
else:
logger.info("UPDATE: Adding description with value {} to "
"entity {}".format(value, entity.id))
logger.debug("UPDATE: Adding description with value {} to "
"entity {}".format(value, entity.id))
entity.description = value
if to_be_updated is None:
......@@ -505,7 +508,7 @@ def assure_has_property(entity, name, value, to_be_updated=None,
return
logger.info(
logger.debug(
"UPDATE: Adding property {} with value {} to entity {}".format(
name, value, entity.id))
......
......@@ -179,7 +179,7 @@ class Crawler(object):
errors_occured = False
matches = {idx: [] for idx, _ in self.iteritems()}
logger.info(separated("Matching files against CFoods"))
logger.debug(separated("Matching files against CFoods"))
for Cfood in self.cfood_types:
logger.debug("Matching against {}...".format(Cfood.__name__))
......@@ -193,9 +193,9 @@ class Crawler(object):
Cfood.__name__,
item))
except Exception as e:
logger.info("Failed during execution of {}!".format(
cfood.__class__))
traceback.print_exc()
logger.debug("Failed during execution of {}!".format(
Cfood.__name__))
# traceback.print_exc()
logger.debug(e)
if self.abort_on_exception:
......@@ -204,12 +204,12 @@ class Crawler(object):
errors_occured = True
tbs.append(e)
logger.info(separated("CFoods are collecting information..."))
logger.debug(separated("CFoods are collecting information..."))
for cfood in cfoods:
cfood.collect_information()
logger.info(separated("Trying to attach further items to created CFoods"))
logger.debug(separated("Trying to attach further items to created CFoods"))
for cfood in cfoods:
logger.debug("Matching against {}...".format(Cfood.__name__))
......@@ -233,8 +233,8 @@ class Crawler(object):
for idx, item in self.iteritems():
if len(matches[idx]) == 0:
msg = ("ATTENTION: No matching cfood!\n"
"Tried to match {}\n".format(item))
msg = ("The crawler has no matching rules for and is thus "
"ignoring:\n{}".format(item))
if item in ucache.filenames:
logger.debug(msg)
......@@ -271,12 +271,13 @@ class Crawler(object):
logger.info("Run Id: " + str(self.run_id))
guard.set_level(level=security_level)
logger.info("Scanning the objects to be treated...")
cfoods, tbs, errors_occured = self.collect_cfoods()
if self.interactive and "y" != input("Do you want to continue? (y)"):
return
logger.info(separated("Creating and updating Identifiables"))
logger.info("Inserting or updating Records...")
for cfood in cfoods:
try:
......@@ -288,8 +289,8 @@ class Crawler(object):
self.push_identifiables_to_CaosDB(cfood)
except Exception as e:
logger.info("Failed during execution of {}!".format(
cfood.__class__))
traceback.print_exc()
cfood.__class__.__name__))
# traceback.print_exc()
logger.debug(e)
if self.abort_on_exception:
......@@ -309,20 +310,29 @@ class Crawler(object):
for i, el in enumerate(pending_changes):
logger.info(
logger.debug(
"""
UNAUTHORIZED UPDATE ({} of {}):
____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
logger.info("There where unauthorized changes (see above). An "
"email was sent to the curator.\n"
"You can authorize the updates by invoking the crawler"
" with the run id:\n\n"
"./crawl.py -a {rid} {path}\n".format(rid=self.run_id,
path=path))
if errors_occured:
logger.warning("Crawler terminated with failures!")
logger.warning(tbs)
" with the run id: {rid}\n".format(rid=self.run_id,
path=path))
if len(DataModelProblems.missing) > 0:
err_msg = ("There were problems with one or more RecordType or "
"Property. Do they exist in the data model?\n")
for ent in DataModelProblems.missing:
err_msg += str(ent) + "\n"
logger.error(err_msg)
logger.error('Crawler finished with Datamodel Errors')
elif errors_occured:
logger.error("There were fatal errors during execution, please "
"contact the system administrator!")
logger.error("Crawler terminated with failures!")
logger.debug(tbs)
else:
logger.info("Crawler terminated successfully!")
......@@ -393,12 +403,16 @@ carefully and if the changes are ok, click on the following link:
filename=filename,
changes="\n".join(changes))
sendmail = caosdb_config["advancedtools"]["sendmail"]
p = subprocess.Popen(
[
sendmail, "-f",
caosdb_config["advancedtools"]["crawler.from_mail"],
caosdb_config["advancedtools"]["crawler.to_mail"]],
stdin=subprocess.PIPE)
try:
fro = caosdb_config["advancedtools"]["crawler.from_mail"]
to = caosdb_config["advancedtools"]["crawler.to_mail"]
except KeyError:
logger.error("Server Configuration is missing a setting for "
"sending mails. The administrator should check "
"'from_mail' and 'to_mail'.")
return
p = subprocess.Popen([sendmail, "-f", fro, to], stdin=subprocess.PIPE)
p.communicate(input=text.encode())
def push_identifiables_to_CaosDB(self, cfood):
......@@ -420,10 +434,13 @@ carefully and if the changes are ok, click on the following link:
cfood.to_be_updated = tmp
logger.info("UPDATE: updating the following entities")
info = "UPDATE: updating the following entities\n"
for el in cfood.to_be_updated:
logger.info("\t" + el.name if el.name is not None else el.id)
info += str("\t" + el.name if el.name is not None else "\t" +
str(el.id))
info += "\n"
logger.info(info)
logger.debug(cfood.to_be_updated)
try:
......@@ -464,10 +481,11 @@ carefully and if the changes are ok, click on the following link:
ent.id = None
if len(missing_identifiables) > 0:
logger.info("Going to insert the following entities:")
info = "Going to insert the following entities:\n"
for ent in missing_identifiables:
logger.info(ent)
info += str(ent)+"\n"
logger.debug(info)
if len(missing_identifiables) == 0:
logger.debug("No new entities to be inserted.")
......@@ -535,10 +553,13 @@ class FileCrawler(Crawler):
@staticmethod
def query_files(path):
query_str = "FIND FILE WHICH IS STORED AT " + (path if path.endswith("/") else path + "/") + "**"
logger.info("FILES QUERY: " + query_str)
query_str = "FIND FILE WHICH IS STORED AT " + (
path if path.endswith("/") else path + "/") + "**"
q_info = "Sending the following query: '" + query_str + "'\n"
files = db.execute_query(query_str)
logger.info("{} FILES TO BE PROCESSED.".format(len(files)))
logger.info(
q_info + "Found {} files that need to be processed.".format(
len(files)))
return files
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment