diff --git a/sample-management-custom/caosdb-server/scripting/bin/export_sample_csv.py b/sample-management-custom/caosdb-server/scripting/bin/export_sample_csv.py index 41ca25f59f281b514c74a94dab87c3de30453c71..169575aca15084c9545230027158e3daad85019b 100755 --- a/sample-management-custom/caosdb-server/scripting/bin/export_sample_csv.py +++ b/sample-management-custom/caosdb-server/scripting/bin/export_sample_csv.py @@ -42,8 +42,10 @@ from bis_utils import (create_email_with_link_text, get_options_row, send_mail_with_defaults) from export_container_csv import (generate_label_text, extract_storage_chain as container_storage_chain) +from sample_helpers.sample_registration_get_person_identifier import get_person_identifier_from_rec from sample_helpers.sample_upload_column_definitions import ( - DATATYPE_DEFINITIONS, SPECIAL_TREATMENT_SAMPLE as SPECIAL_TREATMENT) + DATATYPE_DEFINITIONS, SPECIAL_TREATMENT_SAMPLE as + SPECIAL_TREATMENT, use_custom_names) from sample_helpers.utils import (CONSTANTS, get_column_header_name, get_entity_name) @@ -126,7 +128,7 @@ def extract_value_as_list(record, key): def extract_storage_id(record, key): - return extract_value_as_list(record, "Container") + return extract_value_as_list(record, get_entity_name("container_rt")) def extract_pdf_id(record, key): @@ -134,19 +136,20 @@ def extract_pdf_id(record, key): return prop.value if prop is not None else None -def extract_storage_container_label(record, key): - ids = extract_value_as_list(record, "Container") - return retrieve_values(ids, 'BIS Label') - - -def extract_nagoya_case_number(record, key): - ids = extract_value_as_list(record, "NagoyaCase") - return retrieve_values(ids, key) - - def extract_person(record, key): ids = extract_value_as_list(record, key) - return retrieve_values(ids, 'Abbreviation') + person_recs = [cached_record(i) for i in ids] + return [get_person_identifier_from_rec(r) for r in person_recs] + + +def extract_event_responsible(record_key): + evt = retrieve_event(record) + if len(evt) == 0: + return None + elif len(evt) > 1: + logger.debug(f"Sample {record.id} references more than one event.") + return None + return extract_person(evt[0], get_entity_name("responsible_person_event")) def extract_parent_sample(record, key): @@ -161,51 +164,14 @@ def extract_reference_name(record, key): for i in ids if i is not None] -def retrieve_source_event(record): - ids = extract_value_as_list(record, 'SourceEvent') - if record.get_property("SourceEvent") is None: +def retrieve_event(record): + ids = extract_value_as_list(record, get_entity_name("event_rt")) + if record.get_property(get_entity_name("event_rt")) is None: # there are cases where this property is named "Event" ids = extract_value_as_list(record, 'Event') return [cached_record(i) for i in ids] -def retrieve_gear(record): - ids = [e.get_property("Gear").value for e in retrieve_source_event(record) - if e.get_property("Gear") is not None] - return [cached_query(f"SELECT 'parent', 'Configuration' FROM ENTITY WITH id = '{i}'", unique=True) for i in ids] - - -def extract_gear(record, key): - return [e.get_parents()[0].name for e in retrieve_gear(record)] - - -def extract_gear_configuration(record, key): - return [e.get_property("Configuration").value for e in - retrieve_gear(record) - if e.get_property("Configuration") is not None] - - -def extract_date_time(record, p): - if p.lower() == "time start" or p.lower() == "time stop": - # these are attached to the source event directly - return [e.get_property(p).value for e in retrieve_source_event(record) if - e.get_property(p) is not None and e.get_property(p).value is not None] - else: - return extract_value_as_list(record, p) - - -def extract_station_number(record, key): - source_ev = retrieve_source_event(record) - return [e.get_property(key).value for e in source_ev if - e.get_property(key) is not None] - - -def extract_station_id(record, key): - source_ev = retrieve_source_event(record) - return [e.get_property(key).value for e in source_ev if - e.get_property(key) is not None] - - def retrieve_positions(source_ev): pos_ids = extract_value_as_list(source_ev, "Position") return [cached_record(i) for i in pos_ids] @@ -217,7 +183,7 @@ def has_parent(r, par): def extract_position(record, position, component): - source_evs = retrieve_source_event(record) + source_evs = retrieve_event(record) result = [] for ev in source_evs: _pos = [pos for pos in retrieve_positions(ev)] @@ -234,83 +200,108 @@ def extract_position(record, position, component): return [pos.get_property(component).value for pos in result if pos.get_property(component) is not None] +def extract_ele_start(record, key): + return extract_position(record, get_entity_name("StartPosition"), get_entity_name("elevation")) + + +def extract_ele_stop(record, key): + return extract_position(record, get_enum_value("StopPosition"), get_entity_name("elevation")) + + def extract_lat_start(record, key): - return extract_position(record, "StartPosition", "Latitude") + return extract_position(record, get_entity_name("StartPosition"), get_entity_name("latitude")) def extract_lat_stop(record, key): - return extract_position(record, "StopPosition", "Latitude") + return extract_position(record, get_enum_value("StopPosition"), get_entity_name("latitude")) def extract_lng_start(record, key): - return extract_position(record, "StartPosition", "Longitude") + return extract_position(record, get_entity_name("StartPosition"), get_entity_name("longitude")) def extract_lng_stop(record, key): - return extract_position(record, "StopPosition", "Longitude") + return extract_position(record, get_entity_name("StopPosition"), get_entity_name("longitude")) -def extract_sampling_depth_start(record, key): - return extract_position(record, "StartPosition", "Sampling depth") +def extract_linkahead_url(record, key): + # base_uri = db.get_config().get("Connection", "url") + base_uri = get_config_setting("public_host_url") + return urllib.parse.urljoin(base_uri, f"Entity/{record.id}") -def extract_sampling_depth_stop(record, key): - return extract_position(record, "StopPosition", "Sampling depth") +def extract_doi(record, key): + source_evs = retrieve_event(record) + if len(source_evs) > 1: + logger.error( + f"Sample {record.id} references more than one event so no unique DOI can be exported.") + return None + elif len(source_evs) == 0: + return None + ev = source_evs[0] + return ev.get_property(get_entity_name("igsn_doi_prop")).value if ev.get_property("igsn_doi_prop") is not None else None -def extract_water_depth_start(record, key): - return extract_position(record, "StartPosition", "Water depth") +def _extract_event_prop(record, key, ref=False): + evt = retrieve_event(record) + if len(evt) == 0: + return None + elif len(evt) > 1: + logger.debug(f"Sample {record.id} references more than one event.") + return None -def extract_water_depth_stop(record, key): - return extract_position(record, "StopPosition", "Water depth") + if ref: + return extract_reference_name(evt[0], key) + return extract_value_as_list(evt[0], key) -def extract_source_event_name(record, key): - return [e.name for e in retrieve_source_event(record)] +def extract_biome(record, key): -def extract_hol(record, key): - source_ev = retrieve_source_event(record) - return [e.get_property(key).value for e in source_ev if - e.get_property(key) is not None] + return _extract_event_prop(record, get_entity_name("Biome"), ref=True) -def extract_bis_url(record, key): - # base_uri = db.get_config().get("Connection", "url") - base_uri = get_config_setting("public_host_url") - return urllib.parse.urljoin(base_uri, f"Entity/{record.id}") +def extract_campaign(record, key): + return _extract_event_prop(record, get_entity_name("Campaign"), ref=True) -def extract_igsn(record, key): - source_evs = retrieve_source_event(record) - if len(source_evs) > 1: - logger.error( - f"Sample {record.id} references more than one SourceEvent so no unique IGSN can be exported.") - return None - elif len(source_evs) == 0: - return None - ev = source_evs[0] - return ev.get_property(key).value if ev.get_property(key) is not None else None +def extract_device(record, key): -def extract_doi(record, key): - source_evs = retrieve_source_event(record) - if len(source_evs) > 1: - logger.error( - f"Sample {record.id} references more than one SourceEvent so no unique DOI can be exported.") - return None - elif len(source_evs) == 0: - return None - ev = source_evs[0] - return ev.get_property("DOI").value if ev.get_property("DOI") is not None else None + return _extract_event_prop(record, get_entity_name("Device"), ref=True) + + +def extract_end_date(record, key): + + return _extract_event_prop(record, get_entity_name("end_date_prop")) + + +def extract_level(record, key): + + return _extract_event_prop(record, get_entity_name("level")) + + +def extract_sphere(record, key): + + return _extract_event_prop(record, get_entity_name("Sphere"), ref=True) + + +def extract_locality_descr(record, key): + + return _extract_event_prop(record, get_entity_name("locality_description_prop")) + + +def extract_locality_name(record, key): + + return _extract_event_prop(record, get_entity_name("locality_name_prop")) def extract_storage_chain(record, key): - if record.get_property("Container") is not None and record.get_property("Container").value: + if record.get_property(get_entity_name("container_rt")) is not None and record.get_property(get_entity_name("container_rt")).value: - cont_id = record.get_property("Container").value + cont_id = record.get_property(get_entity_name("container_rt")).value if isinstance(cont_id, list): if len(cont_id) > 1: logger.debug(f"Sample {record.id} has multiple containers.") @@ -327,7 +318,7 @@ def extract_storage_chain(record, key): def extract_event_url(record, key): - events = retrieve_source_event(record) + events = retrieve_event(record) if not events: return None if len(events) == 1: @@ -337,72 +328,61 @@ def extract_event_url(record, key): # must include all keys from SPECIAL_TREATMENT -EXTRACTORS = { - "BIS ID": lambda record, key: record.id, - "Parent BIS ID": extract_parent_sample, - "AphiaID": default_find, - "Collection": extract_reference_name, +EXTRACTORS = use_custom_names({ + "entity_id": lambda record, key: record.id, "Main User": extract_person, - "Sampling Person": extract_person, - "PI": extract_person, - "Person": extract_person, - "Gear": extract_gear, - "Gear configuration": extract_gear_configuration, + "Biome": extract_biome, + "Campaign": extract_campaign, + "Collector": extract_person, + "Curator": extract_person, + "Device": extract_device, + "Elevation start": extract_ele_start, + "Elevation stop": extract_ele_stop, + "Embargo": default_find, + "End date": extract_end_date, "Latitude start": extract_lat_start, - "Longitude start": extract_lng_start, - "Storage ID": extract_storage_id, - "Nagoya case number": extract_nagoya_case_number, - "PDFReport": extract_pdf_id, - "Subevent": extract_source_event_name, - "Station ID": extract_station_id, - "Station number": extract_station_number, - "Sampling depth start": extract_sampling_depth_start, - "Sampling depth stop": extract_sampling_depth_stop, - "Water depth start": extract_water_depth_start, - "Water depth stop": extract_water_depth_stop, "Latitude stop": extract_lat_stop, + "Level": extract_level, + "LinkAhead URL": extract_linkahead_url, + "Longitude start": extract_lng_start, "Longitude stop": extract_lng_stop, - "Storage chain": extract_storage_chain, - "Storage Container Label": extract_storage_container_label, - "Hol": extract_hol, + "PDFReport": extract_pdf_id, "Sampling method": default_find, - # "Publications": TODO never used - # "NCBI BioProject": TODO never used - # "NCBI BioSample": TODO never used - # "NCBI Accession": TODO never used - "BIS URL": extract_bis_url, - "IGSN": extract_igsn, - "IGSN URL": extract_doi, - "Sphere": default_find, - "URL SourceEvent": extract_event_url, -} - -REVERSE_COLUMN_CONVERTER = { - "Collection": collection_value, - "PI": person_value, - "Person": person_value, -} + "Sphere": extract_sphere, + "Start date": extract_end_date, + "Storage ID": extract_storage_id, + "Storage chain": extract_storage_chain, + "URL Event": extract_event_url, + "igsn_doi_prop": extract_doi, + "locality_description_prop": extract_locality_descr, + "locality_name_prop": extract_locality_name, + "parent_sample_prop": extract_parent_sample, + "responsible_person_event": extract_event_responsible +}) + +REVERSE_COLUMN_CONVERTER = use_custom_names({ +}) # List of sample properties to be ignored because they are treated # otherwise. Similar, but not identical to SPECIAL TREATMENT. -IGNORE_KEYS = [ - "Parent Sample", - "Container", - "Event", -] +IGNORE_KEYS = use_custom_names([ + "parent_sample_prop", + "container_rt", + "event_rt", +]) # Additional list of keys to be ignored when extracting parent sample information -IGNORE_KEYS_PARENT = IGNORE_KEYS + [ - "LinkAhead ID", -] +IGNORE_KEYS_PARENT = IGNORE_KEYS + use_custom_names([ + "entity_id", +]) # List of columns to be exported although they are not known to or ignored by # the import. -ADDITIONAL_EXPORTS = [ +ADDITIONAL_EXPORTS = use_custom_names([ "LinkAhead URL", "Parent LinkAhead ID", "Storage chain", -] +]) def extract_value(r, e): diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/default_constants.yml b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/default_constants.yml index c7f1baa9c119ab1996eb642a0efbf3e1a2f5475e..f7521803ad3ec7696e2b71d83241a31d2bdebf38 100644 --- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/default_constants.yml +++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/default_constants.yml @@ -24,6 +24,7 @@ csv_column_names: locality_description_prop: "Locality description" locality_name_prop: "Locality name" responsible_person_event: "Event responsible" + parent_sample_prop: "Parent LinkAhead ID" csv_column_descriptions: LinkAhead ID: "An ID generated by LinkAhead (either integer or URL to this entity). Do not change this column!" diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_registration_get_person_identifier.py b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_registration_get_person_identifier.py index 329dc3d9c464832feb0540acc9be62fc28140559..396bf0ef054275f901fb6fa6056dc95e6a624516 100644 --- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_registration_get_person_identifier.py +++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_registration_get_person_identifier.py @@ -26,6 +26,11 @@ def get_person_identifier(form_data: dict) -> str: """ person_rec = db.cached.cached_get_entity_by(eid=form_data["responsible_person"]) + + return get_person_identifier_from_rec(person_rec) + + +def get_person_identifier_from_rec(person_rec: db.Record) -> str: # Use abbreviation if present if (person_rec.get_property(get_entity_name("abbreviation_prop")) is not None and person_rec.get_property(get_entity_name("abbreviation_prop")).value): diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py index d6a62cb72d10698c5842b031c38f30145ac969da..3912595be60fbe9f672b579a8df538f287aeaef3 100644 --- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py +++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/sample_upload_column_definitions.py @@ -43,7 +43,7 @@ def _embargo_converter(text: str): f"The embargo should be either a date in YYYY-MM-DD format, or 'true'/'yes' or 'false'/'no', but is {text}.") -def _use_custom_names(definition: Union[list, dict]): +def use_custom_names(definition: Union[list, dict]): """Replace names in list or dict keys by custom names with `utils.get_column_header_name`. @@ -56,7 +56,7 @@ def _use_custom_names(definition: Union[list, dict]): raise ValueError(f"Expected dict or list, but got {type(definition)}.") -DATATYPE_DEFINITIONS = _use_custom_names({ +DATATYPE_DEFINITIONS = use_custom_names({ "Campaign": str, "Elevation start": float, "Elevation stop": float, @@ -75,16 +75,16 @@ DATATYPE_DEFINITIONS = _use_custom_names({ # Obligatory columns: Must exist and must not be empty # Must exist -OBLIGATORY_COLUMNS = _use_custom_names([ +OBLIGATORY_COLUMNS = use_custom_names([ "entity_id", ]) -OBLIGATORY_COLUMNS_CHILD = _use_custom_names([ +OBLIGATORY_COLUMNS_CHILD = use_custom_names([ "entity_id", "Parent LinkAhead ID", ]) -COLUMN_CONVERTER = _use_custom_names({ +COLUMN_CONVERTER = use_custom_names({ "Collector": semicolon_separated_list, "Curator": semicolon_separated_list, "Embargo": _embargo_converter, @@ -92,7 +92,7 @@ COLUMN_CONVERTER = _use_custom_names({ "Sphere": semicolon_separated_list, }) -SPECIAL_TREATMENT_SAMPLE = _use_custom_names([ +SPECIAL_TREATMENT_SAMPLE = use_custom_names([ "Biome", "Campaign", "Collector", @@ -102,7 +102,7 @@ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([ "Elevation stop", "Embargo", "End date", - "Event responsible", + "responsible_person_event", "igsn_doi_prop", "Latitude start", "Latitude stop", @@ -120,7 +120,7 @@ SPECIAL_TREATMENT_SAMPLE = _use_custom_names([ "entity_id", ]) -IGNORED_COLUMN_NAMES_SAMPLE = _use_custom_names([ +IGNORED_COLUMN_NAMES_SAMPLE = use_custom_names([ "LinkAhead URL", "Parent Sample", "Storage chain",