Skip to content
Snippets Groups Projects
Commit 0cadc8df authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

ENH: Remove GEOMAR-specific date treatment

parent 6615797a
No related branches found
No related tags found
1 merge request!1F awi sams
......@@ -105,114 +105,6 @@ def _update_property(entity: db.Record, property_id: int, value, property_name="
return entity
def _treat_date(date_val: str):
date_pattern = r"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}$"
# Check general pattern since dateutil.parse is unreliable with incomplete
# dates (e.g., 2024-01) or wrong formats (e.g., 01.12.2024 is parsed as
# 2024-01-12).
if re.match(date_pattern, date_val) is None:
# ParserError for simplified error handling down the line.
raise dateparser.ParserError(f"{date_val} is not of the format YYYY-MM-DD.")
# Use dateutils.parser despite having checked the pattern to exclude
# nonsense dates like 2024-13-54.
return str(dateparser.parse(date_val).date())
def _append_times_to_entity(ent, data, propname_prefix="Time", colname_time_prefix="Time collected", colname_date_prefix="Date collected"):
propname_start = f"{propname_prefix} start"
propname_stop = f"{propname_prefix} stop"
prop_start = cached_get_entity_by(name=propname_start)
prop_stop = cached_get_entity_by(name=propname_stop)
colname_time_start = f"{colname_time_prefix} start"
colname_time_stop = f"{colname_time_prefix} stop"
colname_date_start = f"{colname_date_prefix} start"
colname_date_stop = f"{colname_date_prefix} stop"
date_start = None
date_stop = None
time_start = None
time_stop = None
timezone = None
# Time start
if colname_date_start in data and return_value_if_not_none(data[colname_date_start]) is not None:
date_start = return_value_if_not_none(data[colname_date_start])
try:
date_start = _treat_date(date_start)
except dateparser.ParserError as perr:
logger.error(
f"There is a problem in '{colname_date_start}': {date_start}"
f" of sample {data[get_column_header_name('entity_id')]}: {perr}"
)
raise DataInconsistencyError
if colname_time_start in data and return_value_if_not_none(data[colname_time_start]) is not None:
if not get_column_header_name("Timezone") in data or return_value_if_not_none(data[get_column_header_name("Timezone")]) is None:
logger.error(f"{colname_time_start} but no timezone given for sample "
f"{data[get_column_header_name('entity_id')]}.")
raise DataInconsistencyError
time_start = return_value_if_not_none(data[colname_time_start])
timezone = return_value_if_not_none(data[get_column_header_name("Timezone")])
if date_start is None:
logger.error(
f"{colname_time_start} is given but {colname_date_start} is missing for "
f"sample {data[get_column_header_name('entity_id')]}.")
raise DataInconsistencyError
try:
_val = str(dateparser.parse(f"{date_start}T{time_start}{timezone}"))
ent = _update_property(ent, prop_start.id, property_name=prop_start.name, value=_val)
except dateparser.ParserError as perr:
logger.error(
f"Couldn't parse {colname_time_start}: {time_start} with timezone {timezone} "
f"of sample {data[get_column_header_name('entity_id')]}: {perr}"
)
raise DataInconsistencyError
elif date_start is not None:
ent = _update_property(ent, prop_start.id, value=date_start, property_name=prop_start.name)
# Time stop; raise error in case of stop without start
if colname_date_stop in data and return_value_if_not_none(data[colname_date_stop]) is not None:
date_stop = return_value_if_not_none(data[colname_date_stop])
if date_stop is not None and date_start is None:
logger.error(
f"{colname_date_stop} is given but {colname_date_start} is missing for "
f"sample {data[get_column_header_name('entity_id')]}.")
raise DataInconsistencyError
if date_stop is None:
_date_stop = date_start
else:
try:
_date_stop = _treat_date(date_stop)
except dateparser.ParserError as perr:
logger.error(
f"There is a problem in '{colname_date_stop}': {date_stop}"
f" of sample {data[get_column_header_name('entity_id')]}: {perr}"
)
raise DataInconsistencyError
if colname_time_stop in data and return_value_if_not_none(data[colname_time_stop]) is not None:
time_stop = return_value_if_not_none(data[colname_time_stop])
if time_start is None:
logger.error(
f"{colname_time_stop} is given but {colname_time_start} is missing for "
f"sample {data[get_column_header_name('entity_id')]}.")
raise DataInconsistencyError
# timezone is set by time start; if it hadn't been there, we would already have an error.
try:
_val = str(dateparser.parse(f"{_date_stop}T{time_stop}{timezone}"))
except dateparser.ParserError as perr:
logger.error(
f"Couldn't parse {colname_time_stop}: {time_stop} with timezone {timezone} "
f"of sample {data[get_column_header_name('entity_id')]}: {perr}"
)
raise DataInconsistencyError
ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_val)
elif date_stop is not None:
# We check date_stop but we used the cleaned-up _date_stop as value
ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_date_stop)
return ent
def get_container(data):
"""
Retrun the BIS ID of the Container Record that is identified by 'Storage contianer' in data.
......
......@@ -60,10 +60,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({
"AphiaID": int,
"entity_id": str,
"Campaign": str,
"Date collected start": str,
"Date collected stop": str,
"Date sampled start": str,
"Date sampled stop": str,
"Fixation": str,
"Gear configuration": str,
"Gear": str,
......@@ -104,7 +100,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({
# Must exist
OBLIGATORY_COLUMNS = _use_custom_names([
"entity_id",
"Date collected start",
"Device",
"Latitude start",
"Longitude start",
......@@ -119,7 +114,6 @@ OBLIGATORY_COLUMNS = _use_custom_names([
OBLIGATORY_COLUMNS_CHILD = _use_custom_names([
"entity_id",
"Date sampled start",
"Main User",
"Parent LinkAhead ID",
"Sample container",
......@@ -145,10 +139,6 @@ COLUMN_CONVERTER = _use_custom_names({
SPECIAL_TREATMENT_SAMPLE = _use_custom_names([
"entity_id",
"Collection",
"Date collected start",
"Date collected stop",
"Date sampled start",
"Date sampled stop",
"Gear configuration",
"Gear",
"Hol",
......@@ -182,7 +172,6 @@ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([
IGNORED_COLUMN_NAMES_SAMPLE = _use_custom_names([
"LinkAhead URL",
"Date",
"IGSN URL",
"IGSN", # TODO This will be relevant for external IGSNs in the future.
"Parent Sample",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment