Skip to content
Snippets Groups Projects
Commit 10b876b6 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-apply-updates' into f-generic-analysis

parents f0e7434e 3d2e2fe5
Branches
Tags
2 merge requests!39Release 0.4.0,!20created draft for generic analysis method
Pipeline #16681 failed
......@@ -66,6 +66,66 @@ def separated(text):
return "-"*60 + "\n" + text
def apply_list_of_updates(to_be_updated, update_flags,
update_cache=None, run_id=None):
"""
Updates the to_be_updated Container, i.e. pushes the changes to CaosDB
"""
if len(to_be_updated) == 0:
return
get_ids_for_entities_with_names(to_be_updated)
# remove duplicates
tmp = db.Container()
for el in to_be_updated:
if el not in tmp:
tmp.append(el)
to_be_updated = tmp
info = "UPDATE: updating the following entities\n"
baseurl = db.configuration.get_config()["Connection"]["url"]
for el in to_be_updated:
def make_clickable(txt, id):
return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt)
info += str("\t" + make_clickable(el.name, el.id)
if el.name is not None
else "\t" + make_clickable(str(el.id), el.id))
info += "\n"
logger.info(info)
logger.debug(to_be_updated)
try:
if len(to_be_updated) > 0:
logger.info(
"Updating {} Records...".format(
len(to_be_updated)))
guard.safe_update(to_be_updated, unique=False,
flags=update_flags)
except FileNotFoundError as e:
logger.info("Cannot access {}. However, it might be needed for"
" the correct execution".format(e.filename))
except ProhibitedException:
try:
update_cache.insert(to_be_updated, run_id)
except IntegrityError as e:
logger.warning(
"There were problems with the update of {}.".format(
to_be_updated),
extra={"identifier": str(to_be_updated),
"category": "update-cache"}
)
logger.debug(traceback.format_exc())
logger.debug(e)
except Exception as e:
DataModelProblems.evaluate_exception(e)
class Crawler(object):
def __init__(self, cfood_types, use_cache=False,
abort_on_exception=True, interactive=True, hideKnown=False,
......@@ -318,7 +378,11 @@ class Crawler(object):
self._cached_find_or_insert_identifiables(cfood.identifiables)
cfood.update_identifiables()
self.push_identifiables_to_CaosDB(cfood)
self.apply_list_of_updates(
cfood.to_be_updated,
cfood.update_flags,
update_cache=self.update_cache,
run_id=self.run_id)
except FileNotFoundError as e:
logger.info("Cannot access {}. However, it might be needed for"
" the correct execution".format(e.filename))
......@@ -516,64 +580,8 @@ carefully and if the changes are ok, click on the following link:
subject="Crawler Update",
body=text)
def push_identifiables_to_CaosDB(self, cfood):
"""
Updates the to_be_updated Container, i.e. pushes the changes to CaosDB
"""
if len(cfood.to_be_updated) == 0:
return
get_ids_for_entities_with_names(cfood.to_be_updated)
# remove duplicates
tmp = db.Container()
for el in cfood.to_be_updated:
if el not in tmp:
tmp.append(el)
cfood.to_be_updated = tmp
info = "UPDATE: updating the following entities\n"
baseurl = db.configuration.get_config()["Connection"]["url"]
for el in cfood.to_be_updated:
def make_clickable(txt, id):
return "<a href='{}/Entity/{}'>{}</a>".format(baseurl, id, txt)
info += str("\t" + make_clickable(el.name, el.id)
if el.name is not None
else "\t" + make_clickable(str(el.id), el.id))
info += "\n"
logger.info(info)
logger.debug(cfood.to_be_updated)
try:
if len(cfood.to_be_updated) > 0:
logger.info(
"Updating {} Records...".format(
len(cfood.to_be_updated)))
guard.safe_update(cfood.to_be_updated, unique=False,
flags=cfood.update_flags)
except FileNotFoundError as e:
logger.info("Cannot access {}. However, it might be needed for"
" the correct execution".format(e.filename))
except ProhibitedException:
try:
self.update_cache.insert(cfood.to_be_updated, self.run_id)
except IntegrityError as e:
logger.warning(
"There were problems with the update of {}.".format(
cfood.to_be_updated),
extra={"identifier": str(cfood.to_be_updated),
"category": "update-cache"}
)
logger.debug(traceback.format_exc())
logger.debug(e)
except Exception as e:
DataModelProblems.evaluate_exception(e)
# TODO remove static?
@staticmethod
def find_or_insert_identifiables(identifiables):
""" Sets the ids of identifiables (that do not have already an id from the
......
......@@ -51,7 +51,7 @@ def assure_name_format(name):
name = str(name)
if len(name.split(",")) != 2:
raise ValueError("Name field should be 'LastName, FirstName'."
raise ValueError("The field value should be 'LastName, FirstName'. "
"The supplied value was '{}'.".format(name))
return name
......@@ -303,14 +303,20 @@ class TableImporter(object):
"""
for key, datatype in self.datatypes.items():
for idx, val in df.loc[pd.notnull(df.loc[:, key]), key].iteritems():
for idx, val in df.loc[
pd.notnull(df.loc[:, key]), key].iteritems():
if not isinstance(val, datatype):
raise DataInconsistencyError(
"In row no. {rn} and column {c} of file '{fi}' the "
msg = (
"In row no. {rn} and column '{c}' of file '{fi}' the "
"datatype was {was} but it should be "
"{expected}".format(rn=idx, c=key, fi=filename,
was=type(val), expected=datatype)
was=str(type(val)).strip("<>"),
expected=str(datatype).strip("<>"))
)
logger.warning(msg, extra={'identifier': filename,
'category': "inconsistency"})
raise DataInconsistencyError(msg)
def check_missing(self, df, filename=None):
"""
......@@ -394,7 +400,7 @@ class XLSImporter(TableImporter):
df = xls_file.parse(converters=self.converters, **kwargs)
except Exception as e:
logger.warning(
"Cannot parse {}.".format(filename),
"Cannot parse {}.\n{}".format(filename, e),
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*e.args)
......@@ -411,7 +417,7 @@ class CSVImporter(TableImporter):
**kwargs)
except ValueError as ve:
logger.warning(
"Cannot parse {}.".format(filename),
"Cannot parse {}.\n{}".format(filename, ve),
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*ve.args)
......@@ -428,7 +434,7 @@ class TSVImporter(TableImporter):
**kwargs)
except ValueError as ve:
logger.warning(
"Cannot parse {}.".format(filename),
"Cannot parse {}.\n{}".format(filename, ve),
extra={'identifier': str(filename),
'category': "inconsistency"})
raise DataInconsistencyError(*ve.args)
......
......@@ -131,6 +131,10 @@ The behavior and rules of the crawler are defined in logical units
called CFoods. In order to extend the crawler you need to extend an
existing CFood or create new one.
.. Note:: A crawler always needs a corresponding data model to exits in the
server. The following does not cover this aspect. Please refer
for example to documentation of the YAML Interface.
.. _c-food-introduction:
CFood -- Introduction
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment