Skip to content

Sketch Upload API with Tus Hooks

Sketch API for

Current File Transmission gRPC API

caosdb/entity/v1/main.proto
// Stores a single chunk of a file
message FileChunk {
  // Temporary identifier containing the file and registration_id.
  FileTransmissionId file_transmission_id = 1;
  // Binary data of a chunk.
  bytes data = 2;
}

// Temporary identifier of a single file during a transmission.
message FileTransmissionId {
  // The registration id which has been issued by the target of the trans
  string registration_id = 1;
  // A temporary identifier which identifies the file of this chunk. The
  // file_id is also used by transaction to associate entities (which are to be
  // inserted or updated) with a binary blob.
  string file_id = 2;
}

// Settings for the the file transmission.
message FileTransmissionSettings {
  // The maximum chunk size.
  int64 max_chunk_size = 1;
  // The maximum file size.
  int64 max_file_size = 2;
}

// Indicates whether a registration (for upload or download) has been accepted
// or rejected.
enum RegistrationStatus {
  // The registration status is unspecified.
  REGISTRATION_STATUS_UNSPECIFIED = 0;
  // The registration has been accepted and the client may proceed with the
  // actual transmissions.
  REGISTRATION_STATUS_ACCEPTED = 1;
  // The registration has been rejected and the client should not try to
  // proceed with the transmission.
  REGISTRATION_STATUS_REJECTED = 2;
}

// Indicates the state of an upload or a download (a stream of chunks).
enum TransmissionStatus {
  // The transmission status is unspecified.
  TRANSMISSION_STATUS_UNSPECIFIED = 0;
  // The transmission has been successful.
  TRANSMISSION_STATUS_SUCCESS = 1;
  // The transmission terminated with errors.
  TRANSMISSION_STATUS_ERROR = 2;
  // The transmission is incomplete and the client may send/request the next
  // chunk.
  TRANSMISSION_STATUS_GO_ON = 3;
}

// Register a file upload.
message RegisterFileUploadRequest {}

// Response of the file server upon an upload registration request.
message RegisterFileUploadResponse {
  // Whether the server accepted or rejected the registration.
  RegistrationStatus status = 1;
  // The registration id is used to identify chunks and files which belong to
  // the same upload .
  string registration_id = 2;
  // The server's transmission settings for the upload.
  FileTransmissionSettings upload_settings = 4;
}

// Request for a file upload which has been registered previously. Chunks may be
// send in any order.
message FileUploadRequest {
  // A single file chunk
  FileChunk chunk = 1;
}

// Response of the server upon a finished FileUpload.
message FileUploadResponse {
  // Status of the upload.
  TransmissionStatus status = 1;
}

// Request for a file download which has been registered previously.
message FileDownloadRequest {
  // Request the next chunk for this file.
  FileTransmissionId file_transmission_id = 1;
}

// Response containing a chunk of a file.
message FileDownloadResponse {
  // Status of the download
  TransmissionStatus status = 1;
  // A single file chunk
  FileChunk chunk = 2;
}

// File Transaction Service
service FileTransmissionService {
  // Register a file upload. This needs to be done prior to the actual upload
  // and prior to the transaction request which uses the uploaded files.
  rpc RegisterFileUpload(RegisterFileUploadRequest)
      returns (RegisterFileUploadResponse);
  // The actual file upload. The upload has to be registered prior to this rpc.
  rpc FileUpload(stream FileUploadRequest) returns (FileUploadResponse);
  // A file download. The download has to be registered prior this rpc in the
  // RetrieveRequest.
  rpc FileDownload(FileDownloadRequest) returns (stream FileDownloadResponse);
}

Available tusd hooks

Hook name Blocking? Triggered … Useful for … Enabled by default?
pre-create Yes before a new upload is created. validation of meta data, user authentication, specification of custom upload ID Yes
post-create No after a new upload is created. registering the upload with the main application, logging of upload begin Yes
post-receive No regularly while data is being transmitted. logging upload progress, stopping running uploads Yes
pre-finish Yes after all upload data has been received but before a response is sent. sending custom data when an upload is finished No
post-finish No after all upload data has been received and after a response is sent. post-processing of upload, logging of upload end Yes
post-terminate No after an upload has been terminated. clean up of allocated resources Yes

Suggested new API

message Checksum {
   string checksum = 1;
   string checksum_algorithm = 2;
}

message FileInformation {
   string local_filename = 1;
   string target_filename = 2;
   int size_bytes = 3;
   Checksum checksum = 4;
}

message RegisterFileUploadRequest {
   
  repeated FileInformation = 1;
}

message UploadLocation {
   string url = 1;
}

message UploadProtocolVersion {
   int major = 1;
   int minor = 2;
   int patch = 3;
}

// Response of the file server upon an upload registration request.
message RegisterFileUploadResponse {
  RegistrationStatus status = 1;
  string registration_id = 2;
  FileTransmissionSettings upload_settings = 4;
  UploadProtocolVersion version = 5;
  repeated UploadLocation location = 6;
}


...

service FileTransmissionService {
  rpc RegisterFileUpload(RegisterFileUploadRequest)
      returns (RegisterFileUploadResponse);

  rpc FileUpload(stream FileUploadRequest) returns (FileUploadResponse);
  rpc FileDownload(FileDownloadRequest) returns (stream FileDownloadResponse);
}

Pseudo Python Client:


from tusclient import Uploader


filename = "bla.foo"
metadata = {}
response = grpc_client.register_file_upload(files=[filename], metadata=metadata)

if not response.status == REGISTRATION_STATUS_ACCEPTED:
    return


uploader = Uploader(filename,
                    url=response.location.url,
                    chunk_size=200)

Example Pseudo Python LinkAhead server: The server must implement the InvokeHook rpc from hook.proto. The server can then respond to finished uploads (move/rename files, clean up temporary files, verify checksum, ...)

class HookHandler(pb2_grpc.HookHandlerServicer):

    def __init__(self, *args, **kwargs):
        pass

    def InvokeHook(self, hook_request, context):
        # Print data from hook request for debugging
        print('Received hook request:')
        print(hook_request)

        # Prepare hook response structure
        hook_response = pb2.HookResponse()

        # Example: Use the pre-create hook to check if a filename has been supplied
        # using metadata. If not, the upload is rejected with a custom HTTP response.
        # In addition, a custom upload ID with a choosable prefix is supplied.
        # Metadata is configured, so that it only retains the filename meta data
        # and the creation time.
        if hook_request.type == 'pre-create':
            metaData = hook_request.event.upload.metaData
            isValid = 'filename' in metaData
            if not isValid:
                hook_response.rejectUpload = True
                hook_response.httpResponse.statusCode = 400
                hook_response.httpResponse.body = 'no filename provided'
                hook_response.httpResponse.headers['X-Some-Header'] = 'yes'
            else:
                hook_response.changeFileInfo.id = f'prefix-{uuid.uuid4()}'
                hook_response.changeFileInfo.metaData
                hook_response.changeFileInfo.metaData['filename'] = metaData['filename']
                hook_response.changeFileInfo.metaData['creation_time'] = time.ctime()

        # Example: Use the post-finish hook to print information about a completed upload,
        # including its storage location.
        if hook_request.type == 'post-finish':
            id      = hook_request.event.upload.id
            size    = hook_request.event.upload.size
            storage = hook_request.event.upload.storage

            print(f'Upload {id} ({size} bytes) is finished. Find the file at:')
            print(storage)

        # Print data of hook response for debugging
        print('Responding with hook response:')
        print(hook_response)
        print('------')
        print('')

        # Return the hook response to send back to tusd
        return hook_response

Happy Path of an Upload via gRPC

  1. Authenticated LinkAhead client sends request to upload files (via gRPC or HTTP).
  2. LinkAhead server (internally) requests upload locations from tusd.
  3. (optional: tusd validates upload request against LinkAhead server via pre-create hook)
  4. LinkAhead client receives upload locations from LinkAhead server.
  5. LinkAhead client uses tus client to upload files.
  6. LinkAhead server moves files to target location an inserts file to database after being informed via post-finish hook.
Edited by Joscha Schmiedt