diff --git a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py index 804e8e44afad2012b9af6d72bcf8bf12107df680..ace23eef4c481fdc20057fce83ef44d4c45dbca5 100755 --- a/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py +++ b/sample-management-custom/caosdb-server/scripting/bin/crawl_sample_data_async.py @@ -51,7 +51,7 @@ from sample_helpers.sample_upload_add_special_properties import add_special_prop from sample_helpers.sample_upload_get_event import add_event_to_sample from sample_helpers.sample_upload_get_person import get_person from sample_helpers.sample_upload_post_processing import post_process_samples -from sample_helpers.utils import get_column_header_name, get_entity_name +from sample_helpers.utils import (get_column_header_name, get_entity_name, update_property) # suppress warning of diff function apilogger = logging.getLogger("linkahead.apiutils") @@ -71,226 +71,6 @@ def _is_ignored_column_name(name, parent_suffix="_parent"): return name in IGNORED_COLUMN_NAMES_SAMPLE or name.endswith(parent_suffix) -def _update_property(entity: db.Record, property_id: int, value, property_name="", datatype=None): - """ - Set the property of an entity. - - If the entity already has the property, just the value is set. - Else, the property is added to the entity - """ - # TODO: Replace by assure_property_is etc. - - # If the value in the spreadsheet is empty (nan) - if ((isinstance(value, list) and len(value) == 0) - or (not isinstance(value, list) and pd.isna(value))): - # Remove the property from te Entity if it has it - try: - entity.get_properties().get_by_name(property_name) - entity.remove_property(property_name) - except KeyError: - pass - return entity - if entity.get_property(property_id) is None: - if datatype: - entity.add_property(id=property_id, value=value, name=property_name, datatype=datatype) - else: - entity.add_property(id=property_id, value=value, name=property_name) - logger.debug("{}: Adding {} = {}".format(entity.id, property_id, value.id if - isinstance(value, db.Entity) else value)) - else: - if isinstance(value, list) and not entity.get_property(property_id).datatype.startswith("LIST"): - entity.get_property(property_id).datatype = db.LIST( - entity.get_property(property_id).datatype) - entity.get_property(property_id).value = value - logger.debug("{}: Setting {} = {}".format(entity.id, property_id, value.id if - isinstance(value, db.Entity) else value)) - return entity - - -def get_container(data): - """ - Retrun the BIS ID of the Container Record that is identified by 'Storage contianer' in data. - A Container can either be identified via a BIS ID or via a BIS Label. - - If no Container can be identified, an Error is raised, since creating/registering new - Containers has to be done before registering samples. - """ - identified_by_label = False - container_identifier = data["Storage ID"] - # If the ID is not spcified, try to get the label - if "Storage Container Label" in data and pd.isnull(container_identifier): - container_identifier = data["Storage Container Label"] - identified_by_label = True - - if identified_by_label: - container = _get_container_by_label(container_identifier) - else: - container = _get_container_by_id(container_identifier) - - if container is not None: - return container - else: - msg = "Container: '{}' could not be identified.".format(container_identifier) - raise DataInconsistencyError(msg) - - -def _get_container_by_id(id): - res = db.execute_query("FIND RECORD Container WITH id = '{}'".format(id)) - if len(res) > 0: - return res[0] - else: - return None - - -def _get_container_by_label(label): - res = db.execute_query("FIND RECORD Container WITH 'BIS Label' = '{}'".format(label)) - if len(res) > 0: - return res[0] - else: - return None - - -def get_event(data, gear_id): - # Events only have names if they have the subevent property. - if "Subevent" in data and return_value_if_not_none(data["Subevent"]) is not None: - event_name = f"{data['Subevent']}" - return _create_new_source_event(event_name, data, gear_id) - - return _create_new_source_event(name=None, data=data, gear_id=gear_id) - - -def _create_new_source_event(name, data, gear_id) -> db.Record: - event = db.Record(name) - event.add_parent("SourceEvent") - event = _append_times_to_entity(event, data) - - event.add_property(name="Gear", value=gear_id) - event.add_property(name="Position", value=_get_positions( - data), datatype=db.common.datatype.LIST("Position")) # type: ignore - if "Station ID" in data and return_value_if_not_none(data["Station ID"]) is not None: - event.add_property(name="Station ID", value=str(data["Station ID"])) - if "Station number" in data and return_value_if_not_none(data["Station number"]) is not None: - event.add_property(name="Station number", value=str(data["Station number"])) - if "Hol" in data and return_value_if_not_none(data["Hol"]) is not None: - event.add_property(name="Hol", value=str(data["Hol"])) - return event - - -def _get_positions(data): - # TODO: Refactor - if "Latitude start" in data: - latitude_start = return_value_if_not_none(data["Latitude start"]) - else: - latitude_start = None - if "Latitude stop" in data: - latitude_stop = return_value_if_not_none(data["Latitude stop"]) - else: - latitude_stop = None - if "Longitude start" in data: - longitude_start = return_value_if_not_none(data["Longitude start"]) - else: - longitude_start = None - if "Longitude stop" in data: - longitude_stop = return_value_if_not_none(data["Longitude stop"]) - else: - longitude_stop = None - if "Sampling depth start" in data: - sampling_depth_start = return_value_if_not_none(data["Sampling depth start"]) - else: - sampling_depth_start = None - if "Sampling depth stop" in data: - sampling_depth_stop = return_value_if_not_none(data["Sampling depth stop"]) - else: - sampling_depth_stop = None - if "Water depth start" in data: - water_depth_start = return_value_if_not_none(data["Water depth start"]) - else: - water_depth_start = None - if "Water depth stop" in data: - water_depth_stop = return_value_if_not_none(data["Water depth stop"]) - else: - water_depth_stop = None - # insert start position - position_start = db.Record() - position_start.add_parent("StartPosition") - position_start.add_property(name="Latitude", value=latitude_start) - position_start.add_property(name="Longitude", value=longitude_start) - if not pd.isna(sampling_depth_start): - if sampling_depth_start < 0.0: - sampling_depth_start *= -1.0 - # identifiable, so add even if it is None - position_start.add_property(name="Sampling depth", value=sampling_depth_start) - if not pd.isna(water_depth_start): - if water_depth_start < 0: - water_depth_start *= -1 - # identifiable, so add even if it is None - position_start.add_property(name="Water depth", value=water_depth_start) - - # A stop position may be specified by depth stop alone: - if not (pd.isna(sampling_depth_stop) and pd.isna(water_depth_stop)): - # Copy all empty info from start position - if pd.isna(latitude_stop) and pd.isna(longitude_stop): - latitude_stop = latitude_start - longitude_stop = longitude_start - if pd.isna(sampling_depth_stop): - sampling_depth_stop = sampling_depth_start - if pd.isna(water_depth_stop): - water_depth_stop = water_depth_start - # If there is an endposition: insert endposition - if not (pd.isna(latitude_stop) or pd.isna(longitude_stop)): - - position_end = db.Record() - # position_end = db.Record("({}, {})".format(latitude_stop, longitude_stop)) - position_end.add_parent("StopPosition") - position_end.add_property(name="Latitude", value=latitude_stop) - position_end.add_property(name="Longitude", value=longitude_stop) - if not pd.isna(sampling_depth_stop): - if sampling_depth_stop < 0: - sampling_depth_stop *= -1 - # position_end.name = position_end.name + " at -{}m".format(sampling_depth_stop) - # identifiable, so add even if it is None - position_end.add_property(name="Sampling depth", value=sampling_depth_stop) - if not pd.isna(water_depth_stop): - if water_depth_stop < 0: - water_depth_stop *= -1 - # identifiable, so add even if it is None - position_end.add_property(name="Water depth", value=water_depth_stop) - # position_end.insert(unique=False) - return [position_start, position_end] - else: - return [position_start] - - -def get_gear(data): - """ - Return the BIS ID of the Gear that is specified by 'Gear' and 'Gear configuration' in data. - - If no Such Gear Record exists, a new Gear Record is created. - """ - - qtext = f"FIND RECORD '{data['Gear']}'" - if "Gear configuration" in data and pd.notnull(data["Gear configuration"]): - qtext += f" WITH 'Configuration'='{data['Gear configuration']}'" - try: - res = db.execute_query(qtext, unique=True) - except db.exceptions.EmptyUniqueQueryError: - raise DataInconsistencyError(f"The query\n{qtext}\nreturned no results.") - except db.exceptions.QueryNotUniqueError: - raise DataInconsistencyError(f"The query\n{qtext}\nreturned more than one result.") - return res - - -def get_nagoya_case(data): - """Create and retrun a NagoyaCase Record.""" - nagoya_case_number = return_value_if_not_none(data["Nagoya case number"]) - - nagoya_case = db.Record(nagoya_case_number) - nagoya_case.add_parent(name="NagoyaCase") - nagoya_case.add_property(name="Nagoya Case Number", value=nagoya_case_number) - - return nagoya_case - - def synchroize(records, additional_property_ents, htmluserlog_public): crawler = Crawler(securityMode=SecurityMode.UPDATE) identifiables_definition_file = os.path.expanduser("~/identifiables.yml") @@ -326,15 +106,6 @@ def update_sample_records(data, htmluserlog_public): # TODO Check data first and if there are Errors in the data: Provide the user with a download # link to a template with Error descriptions. - # Get property ids: - person_property_id = db.get_entity_by_name("Main User").id - sampling_person_property_id = db.get_entity_by_name("Sampling Person").id - nagoya_case_property_id = get_id_of_datatype("NagoyaCase") - container_property_id = get_id_of_datatype("Container") - event_property_id = get_id_of_datatype("SourceEvent") - pdfreport_property_id = get_id_of_datatype("PDFReport") - parent_sample_property_id = db.get_entity_by_name("Parent Sample").id - additional_properties = data.keys().to_list() additional_property_ids = {} # name-> id additional_property_ents = {} # name-> Entity @@ -360,14 +131,17 @@ def update_sample_records(data, htmluserlog_public): for index, row in data.iterrows(): - sample_id_exists = not pd.isnull(row["BIS ID"]) + sample_id_exists = not pd.isnull(row[get_column_header_name("entity_id")]) if not sample_id_exists: raise DataInconsistencyError(f"Missing sample ID in row {index}") try: sample = db.execute_query( - "FIND RECORD Sample WITH id = {}".format(row["BIS ID"]), unique=True) + "FIND RECORD Sample WITH id = {}".format( + row[get_column_header_name("entity_id")]), + unique=True) except db.exceptions.EmptyUniqueQueryError: - msg = "There is no Sample with ID = {} in the system.".format(row["BIS ID"]) + msg = "There is no Sample with ID = {} in the system.".format( + row[get_column_header_name("entity_id")]) raise DataInconsistencyError(msg) # All special properties are added here @@ -392,13 +166,15 @@ def update_sample_records(data, htmluserlog_public): name=ent.properties[0].name, value=return_value_if_not_none(row[property_name])) else: value = return_value_if_not_none(row[property_name]) - sample = _update_property( + sample = update_property( entity=sample, property_id=additional_property_ids[property_name], value=value, property_name=property_name) + # Now, treat events and event data sample = add_event_to_sample(sample, row) samples.append(sample) + # Samples might need additional post processing samples = post_process_samples(samples, data) synchroize(samples, additional_property_ents, htmluserlog_public) diff --git a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/utils.py b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/utils.py index e345c2254e00ee50e01919ae56a83d95c3cf1089..11d46837b36c0131623e44f90be38f4cdfb1cb71 100644 --- a/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/utils.py +++ b/sample-management-custom/caosdb-server/scripting/bin/sample_helpers/utils.py @@ -3,6 +3,8 @@ import yaml from pathlib import Path +import linkahead as db + with open(os.path.join(Path(__file__).parent, "default_constants.yml")) as yaml_file: CONSTANTS = yaml.safe_load(yaml_file) @@ -26,3 +28,39 @@ def get_column_header_name(name: str): return CONSTANTS["csv_column_names"][name] return get_entity_name(name) + + +def update_property(entity: db.Record, property_id: int, value, property_name="", datatype=None): + """ + Set the property of an entity. + + If the entity already has the property, just the value is set. + Else, the property is added to the entity + """ + # TODO: Replace by assure_property_is etc. + + # If the value in the spreadsheet is empty (nan) + if ((isinstance(value, list) and len(value) == 0) + or (not isinstance(value, list) and pd.isna(value))): + # Remove the property from te Entity if it has it + try: + entity.get_properties().get_by_name(property_name) + entity.remove_property(property_name) + except KeyError: + pass + return entity + if entity.get_property(property_id) is None: + if datatype: + entity.add_property(id=property_id, value=value, name=property_name, datatype=datatype) + else: + entity.add_property(id=property_id, value=value, name=property_name) + logger.debug("{}: Adding {} = {}".format(entity.id, property_id, value.id if + isinstance(value, db.Entity) else value)) + else: + if isinstance(value, list) and not entity.get_property(property_id).datatype.startswith("LIST"): + entity.get_property(property_id).datatype = db.LIST( + entity.get_property(property_id).datatype) + entity.get_property(property_id).value = value + logger.debug("{}: Setting {} = {}".format(entity.id, property_id, value.id if + isinstance(value, db.Entity) else value)) + return entity