From 0cadc8dfa4f34bb016ea8a9f05781a7c56de0cee Mon Sep 17 00:00:00 2001
From: Florian Spreckelsen <f.spreckelsen@indiscale.com>
Date: Tue, 4 Feb 2025 16:02:19 +0100
Subject: [PATCH] ENH: Remove GEOMAR-specific date treatment

---
 .../scripting/bin/crawl_sample_data_async.py  | 108 ------------------
 .../sample_upload_column_definitions.py       |  11 --
 2 files changed, 119 deletions(-)

diff --git a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py
index 3c3c093..e538e64 100755
--- a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py
+++ b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py
@@ -105,114 +105,6 @@ def _update_property(entity: db.Record, property_id: int, value, property_name="
     return entity
 
 
-def _treat_date(date_val: str):
-
-    date_pattern = r"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}$"
-    # Check general pattern since dateutil.parse is unreliable with incomplete
-    # dates (e.g., 2024-01) or wrong formats (e.g., 01.12.2024 is parsed as
-    # 2024-01-12).
-    if re.match(date_pattern, date_val) is None:
-        # ParserError for simplified error handling down the line.
-        raise dateparser.ParserError(f"{date_val} is not of the format YYYY-MM-DD.")
-    # Use dateutils.parser despite having checked the pattern to exclude
-    # nonsense dates like 2024-13-54.
-    return str(dateparser.parse(date_val).date())
-
-
-def _append_times_to_entity(ent, data, propname_prefix="Time", colname_time_prefix="Time collected", colname_date_prefix="Date collected"):
-    propname_start = f"{propname_prefix} start"
-    propname_stop = f"{propname_prefix} stop"
-    prop_start = cached_get_entity_by(name=propname_start)
-    prop_stop = cached_get_entity_by(name=propname_stop)
-    colname_time_start = f"{colname_time_prefix} start"
-    colname_time_stop = f"{colname_time_prefix} stop"
-    colname_date_start = f"{colname_date_prefix} start"
-    colname_date_stop = f"{colname_date_prefix} stop"
-
-    date_start = None
-    date_stop = None
-    time_start = None
-    time_stop = None
-    timezone = None
-    # Time start
-    if colname_date_start in data and return_value_if_not_none(data[colname_date_start]) is not None:
-        date_start = return_value_if_not_none(data[colname_date_start])
-        try:
-            date_start = _treat_date(date_start)
-        except dateparser.ParserError as perr:
-            logger.error(
-                f"There is a problem in '{colname_date_start}': {date_start}"
-                f" of sample {data[get_column_header_name('entity_id')]}: {perr}"
-            )
-            raise DataInconsistencyError
-    if colname_time_start in data and return_value_if_not_none(data[colname_time_start]) is not None:
-        if not get_column_header_name("Timezone") in data or return_value_if_not_none(data[get_column_header_name("Timezone")]) is None:
-            logger.error(f"{colname_time_start} but no timezone given for sample "
-                         f"{data[get_column_header_name('entity_id')]}.")
-            raise DataInconsistencyError
-        time_start = return_value_if_not_none(data[colname_time_start])
-        timezone = return_value_if_not_none(data[get_column_header_name("Timezone")])
-        if date_start is None:
-            logger.error(
-                f"{colname_time_start} is given but {colname_date_start} is missing for "
-                f"sample {data[get_column_header_name('entity_id')]}.")
-            raise DataInconsistencyError
-        try:
-            _val = str(dateparser.parse(f"{date_start}T{time_start}{timezone}"))
-            ent = _update_property(ent, prop_start.id, property_name=prop_start.name, value=_val)
-        except dateparser.ParserError as perr:
-            logger.error(
-                f"Couldn't parse {colname_time_start}: {time_start} with timezone {timezone} "
-                f"of sample {data[get_column_header_name('entity_id')]}: {perr}"
-            )
-            raise DataInconsistencyError
-    elif date_start is not None:
-        ent = _update_property(ent, prop_start.id, value=date_start, property_name=prop_start.name)
-
-    # Time stop; raise error in case of stop without start
-    if colname_date_stop in data and return_value_if_not_none(data[colname_date_stop]) is not None:
-        date_stop = return_value_if_not_none(data[colname_date_stop])
-    if date_stop is not None and date_start is None:
-        logger.error(
-            f"{colname_date_stop} is given but {colname_date_start} is missing for "
-            f"sample {data[get_column_header_name('entity_id')]}.")
-        raise DataInconsistencyError
-    if date_stop is None:
-        _date_stop = date_start
-    else:
-        try:
-            _date_stop = _treat_date(date_stop)
-        except dateparser.ParserError as perr:
-            logger.error(
-                f"There is a problem in '{colname_date_stop}': {date_stop}"
-                f" of sample {data[get_column_header_name('entity_id')]}: {perr}"
-            )
-            raise DataInconsistencyError
-
-    if colname_time_stop in data and return_value_if_not_none(data[colname_time_stop]) is not None:
-        time_stop = return_value_if_not_none(data[colname_time_stop])
-        if time_start is None:
-            logger.error(
-                f"{colname_time_stop} is given but {colname_time_start} is missing for "
-                f"sample {data[get_column_header_name('entity_id')]}.")
-            raise DataInconsistencyError
-        # timezone is set by time start; if it hadn't been there, we would already have an error.
-        try:
-            _val = str(dateparser.parse(f"{_date_stop}T{time_stop}{timezone}"))
-        except dateparser.ParserError as perr:
-            logger.error(
-                f"Couldn't parse {colname_time_stop}: {time_stop} with timezone {timezone} "
-                f"of sample {data[get_column_header_name('entity_id')]}: {perr}"
-            )
-            raise DataInconsistencyError
-        ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_val)
-    elif date_stop is not None:
-        # We check date_stop but we used the cleaned-up _date_stop as value
-        ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_date_stop)
-
-    return ent
-
-
 def get_container(data):
     """
     Retrun the BIS ID of the Container Record that is identified by 'Storage contianer' in data.
diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py
index c1155d7..2f74f2e 100644
--- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py
+++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py
@@ -60,10 +60,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({
     "AphiaID": int,
     "entity_id": str,
     "Campaign": str,
-    "Date collected start": str,
-    "Date collected stop": str,
-    "Date sampled start": str,
-    "Date sampled stop": str,
     "Fixation": str,
     "Gear configuration": str,
     "Gear": str,
@@ -104,7 +100,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({
 # Must exist
 OBLIGATORY_COLUMNS = _use_custom_names([
     "entity_id",
-    "Date collected start",
     "Device",
     "Latitude start",
     "Longitude start",
@@ -119,7 +114,6 @@ OBLIGATORY_COLUMNS = _use_custom_names([
 
 OBLIGATORY_COLUMNS_CHILD = _use_custom_names([
     "entity_id",
-    "Date sampled start",
     "Main User",
     "Parent LinkAhead ID",
     "Sample container",
@@ -145,10 +139,6 @@ COLUMN_CONVERTER = _use_custom_names({
 SPECIAL_TREATMENT_SAMPLE = _use_custom_names([
     "entity_id",
     "Collection",
-    "Date collected start",
-    "Date collected stop",
-    "Date sampled start",
-    "Date sampled stop",
     "Gear configuration",
     "Gear",
     "Hol",
@@ -182,7 +172,6 @@ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([
 
 IGNORED_COLUMN_NAMES_SAMPLE = _use_custom_names([
     "LinkAhead URL",
-    "Date",
     "IGSN URL",
     "IGSN",  # TODO This will be relevant for external IGSNs in the future.
     "Parent Sample",
-- 
GitLab