From 0cadc8dfa4f34bb016ea8a9f05781a7c56de0cee Mon Sep 17 00:00:00 2001 From: Florian Spreckelsen <f.spreckelsen@indiscale.com> Date: Tue, 4 Feb 2025 16:02:19 +0100 Subject: [PATCH] ENH: Remove GEOMAR-specific date treatment --- .../scripting/bin/crawl_sample_data_async.py | 108 ------------------ .../sample_upload_column_definitions.py | 11 -- 2 files changed, 119 deletions(-) diff --git a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py index 3c3c093..e538e64 100755 --- a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py +++ b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py @@ -105,114 +105,6 @@ def _update_property(entity: db.Record, property_id: int, value, property_name=" return entity -def _treat_date(date_val: str): - - date_pattern = r"^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}$" - # Check general pattern since dateutil.parse is unreliable with incomplete - # dates (e.g., 2024-01) or wrong formats (e.g., 01.12.2024 is parsed as - # 2024-01-12). - if re.match(date_pattern, date_val) is None: - # ParserError for simplified error handling down the line. - raise dateparser.ParserError(f"{date_val} is not of the format YYYY-MM-DD.") - # Use dateutils.parser despite having checked the pattern to exclude - # nonsense dates like 2024-13-54. - return str(dateparser.parse(date_val).date()) - - -def _append_times_to_entity(ent, data, propname_prefix="Time", colname_time_prefix="Time collected", colname_date_prefix="Date collected"): - propname_start = f"{propname_prefix} start" - propname_stop = f"{propname_prefix} stop" - prop_start = cached_get_entity_by(name=propname_start) - prop_stop = cached_get_entity_by(name=propname_stop) - colname_time_start = f"{colname_time_prefix} start" - colname_time_stop = f"{colname_time_prefix} stop" - colname_date_start = f"{colname_date_prefix} start" - colname_date_stop = f"{colname_date_prefix} stop" - - date_start = None - date_stop = None - time_start = None - time_stop = None - timezone = None - # Time start - if colname_date_start in data and return_value_if_not_none(data[colname_date_start]) is not None: - date_start = return_value_if_not_none(data[colname_date_start]) - try: - date_start = _treat_date(date_start) - except dateparser.ParserError as perr: - logger.error( - f"There is a problem in '{colname_date_start}': {date_start}" - f" of sample {data[get_column_header_name('entity_id')]}: {perr}" - ) - raise DataInconsistencyError - if colname_time_start in data and return_value_if_not_none(data[colname_time_start]) is not None: - if not get_column_header_name("Timezone") in data or return_value_if_not_none(data[get_column_header_name("Timezone")]) is None: - logger.error(f"{colname_time_start} but no timezone given for sample " - f"{data[get_column_header_name('entity_id')]}.") - raise DataInconsistencyError - time_start = return_value_if_not_none(data[colname_time_start]) - timezone = return_value_if_not_none(data[get_column_header_name("Timezone")]) - if date_start is None: - logger.error( - f"{colname_time_start} is given but {colname_date_start} is missing for " - f"sample {data[get_column_header_name('entity_id')]}.") - raise DataInconsistencyError - try: - _val = str(dateparser.parse(f"{date_start}T{time_start}{timezone}")) - ent = _update_property(ent, prop_start.id, property_name=prop_start.name, value=_val) - except dateparser.ParserError as perr: - logger.error( - f"Couldn't parse {colname_time_start}: {time_start} with timezone {timezone} " - f"of sample {data[get_column_header_name('entity_id')]}: {perr}" - ) - raise DataInconsistencyError - elif date_start is not None: - ent = _update_property(ent, prop_start.id, value=date_start, property_name=prop_start.name) - - # Time stop; raise error in case of stop without start - if colname_date_stop in data and return_value_if_not_none(data[colname_date_stop]) is not None: - date_stop = return_value_if_not_none(data[colname_date_stop]) - if date_stop is not None and date_start is None: - logger.error( - f"{colname_date_stop} is given but {colname_date_start} is missing for " - f"sample {data[get_column_header_name('entity_id')]}.") - raise DataInconsistencyError - if date_stop is None: - _date_stop = date_start - else: - try: - _date_stop = _treat_date(date_stop) - except dateparser.ParserError as perr: - logger.error( - f"There is a problem in '{colname_date_stop}': {date_stop}" - f" of sample {data[get_column_header_name('entity_id')]}: {perr}" - ) - raise DataInconsistencyError - - if colname_time_stop in data and return_value_if_not_none(data[colname_time_stop]) is not None: - time_stop = return_value_if_not_none(data[colname_time_stop]) - if time_start is None: - logger.error( - f"{colname_time_stop} is given but {colname_time_start} is missing for " - f"sample {data[get_column_header_name('entity_id')]}.") - raise DataInconsistencyError - # timezone is set by time start; if it hadn't been there, we would already have an error. - try: - _val = str(dateparser.parse(f"{_date_stop}T{time_stop}{timezone}")) - except dateparser.ParserError as perr: - logger.error( - f"Couldn't parse {colname_time_stop}: {time_stop} with timezone {timezone} " - f"of sample {data[get_column_header_name('entity_id')]}: {perr}" - ) - raise DataInconsistencyError - ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_val) - elif date_stop is not None: - # We check date_stop but we used the cleaned-up _date_stop as value - ent = _update_property(ent, prop_stop.id, property_name=prop_stop.name, value=_date_stop) - - return ent - - def get_container(data): """ Retrun the BIS ID of the Container Record that is identified by 'Storage contianer' in data. diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py index c1155d7..2f74f2e 100644 --- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py +++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py @@ -60,10 +60,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({ "AphiaID": int, "entity_id": str, "Campaign": str, - "Date collected start": str, - "Date collected stop": str, - "Date sampled start": str, - "Date sampled stop": str, "Fixation": str, "Gear configuration": str, "Gear": str, @@ -104,7 +100,6 @@ DATATYPE_DEFINITIONS = _use_custom_names({ # Must exist OBLIGATORY_COLUMNS = _use_custom_names([ "entity_id", - "Date collected start", "Device", "Latitude start", "Longitude start", @@ -119,7 +114,6 @@ OBLIGATORY_COLUMNS = _use_custom_names([ OBLIGATORY_COLUMNS_CHILD = _use_custom_names([ "entity_id", - "Date sampled start", "Main User", "Parent LinkAhead ID", "Sample container", @@ -145,10 +139,6 @@ COLUMN_CONVERTER = _use_custom_names({ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([ "entity_id", "Collection", - "Date collected start", - "Date collected stop", - "Date sampled start", - "Date sampled stop", "Gear configuration", "Gear", "Hol", @@ -182,7 +172,6 @@ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([ IGNORED_COLUMN_NAMES_SAMPLE = _use_custom_names([ "LinkAhead URL", - "Date", "IGSN URL", "IGSN", # TODO This will be relevant for external IGSNs in the future. "Parent Sample", -- GitLab