diff --git a/cudnn-sys/build.rs b/cudnn-sys/build.rs index 57a8433..494e2c6 100644 --- a/cudnn-sys/build.rs +++ b/cudnn-sys/build.rs @@ -62,7 +62,7 @@ fn main() { // requires a nightly rustc and enabling // unstable features. .rust_target(bindgen::RustTarget::Stable_1_19) - .hide_type("max_align_t") // https://github.com/servo/rust-bindgen/issues/550 + .blacklist_type("max_align_t") // https://github.com/servo/rust-bindgen/issues/550 .raw_line( r" //! Defines the FFI for CUDA cuDNN. diff --git a/cudnn/src/api/rnn.rs b/cudnn/src/api/rnn.rs index eee0f11..9899917 100644 --- a/cudnn/src/api/rnn.rs +++ b/cudnn/src/api/rnn.rs @@ -4,13 +4,21 @@ use crate::ffi::*; use crate::{Error, API}; +use utils::DataType; +// Workspace impl API { - // /// - // /// cuDNN RNN Configuration - // /// - /// Returns the workspace size in byte, which are needed for the given rnnal algorithm. + /// + /// # Arguments + /// * `rnn_desc` Previously initialised RNN Descriptor + /// * `unroll_sequence_length` Length of iterations + /// * `x_desc` An array of tensor descriptors describing the input to each recurrent iteration + /// (one descriptor per iteration). The first dimension (batch size) of the tensors may decrease + /// from element n to element n+1 but may not increase. For example, if you have multiple + /// time series in a batch, they can be different lengths. + /// This dimension is the batch size for the particular iteration of the sequence, + /// and so it should decrease when a sequence in the batch has been terminated. pub fn get_rnn_workspace_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, @@ -26,7 +34,6 @@ impl API { ) } } - unsafe fn ffi_get_rnn_workspace_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, @@ -37,38 +44,105 @@ impl API { let size_ptr: *mut ::libc::size_t = &mut size; match cudnnGetRNNWorkspaceSize(handle, rnn_desc, unroll_sequence_length, x_desc.as_ptr(), size_ptr) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), - cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `src_desc` is not supported for RNN.")), _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Forward Workspace size.")), } } - // cudnnStatus_t - // cudnnGetRNNParamsSize( cudnnHandle_t - // const cudnnRNNDescriptor_t - // const cudnnTensorDescriptor_t - // size_t - // cudnnDataType_t dataType) - - // cudnnStatus_t - // cudnnGetRNNTrainingReserveSize( cudnnHandle_t - // const cudnnRNNDescriptor_t - // const int seqLength, - // const cudnnTensorDescriptor_t - // size_t - // handle, - // rnnDesc, - // *xDesc, - // *sizeInBytes) - - // - // cuDNN RNN - // + /// Size of Reserve Space for RNN Training [cudnnGetRNNTrainingReserveSize][1] + /// # Arguments + /// * `handle` Handle to cudNN Library Descriptor + /// * `rnn_desc` Previously initialised RNN Descriptor + /// * `seq_length` Number of iterations to unroll over - must not exceed workspace size seq_len + /// * `x_desc` Array of tensor descriptors describing each recurrent iteration - one per element + /// in the RNN sequence + /// [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNTrainingReserveSize + pub fn get_rnn_training_reserve_size( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: Vec + ) -> Result { + unsafe { + API::ffi_get_rnn_training_reserve_size( + handle, rnn_desc, seq_length, x_desc.as_slice() + ) + } + } + unsafe fn ffi_get_rnn_training_reserve_size( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: &[cudnnTensorDescriptor_t] + ) -> Result<::libc::size_t, Error> { + let mut size: ::libc::size_t = 0; + let size_ptr : *mut ::libc::size_t = &mut size; + match cudnnGetRNNTrainingReserveSize(handle, rnn_desc,seq_length, x_desc.as_ptr(), size_ptr) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `src_desc` is not supported for RNN.")), + _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Training Reserve size.")), + } + } + /// cudnnGetRNNParamsSize[1] + /// Query the amount of parameter space needed to execute the RNN for rnnDesc, given xDesc + /// # Parameters + /// * `handle` CUDNN Handle + /// * `rnn_desc` Descriptor for the RNN + /// * `x_desc` Input Tensor + /// * `dataType` Data Type for the Input Tensor + /// [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnGetRNNParamsSize + pub fn get_rnn_params_size( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + x_desc: cudnnTensorDescriptor_t, + data_type: DataType + ) -> Result { + unsafe { + API::ffi_get_rnn_params_size( + handle, + rnn_desc, + x_desc, + API::to_cudnn_data_type(data_type) + ) + } + } + unsafe fn ffi_get_rnn_params_size( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + x_desc: cudnnTensorDescriptor_t, + data_type: cudnnDataType_t + ) -> Result<::libc::size_t, Error> { + let mut size: ::libc::size_t = 0; + let size_ptr: *mut ::libc::size_t = &mut size; + match cudnnGetRNNParamsSize(handle, rnn_desc,x_desc, size_ptr, data_type) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("One of the following; rnnDesc is invalid, x_desc is invalid, x_desc isn't fully packed, dataType & tensor Description type don't match")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `rnn_desc` is not supported for RNN.")), + _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Params Size")), + } + } +} +// Descriptors +impl API { /// Creates a generic CUDA cuDNN RNN Descriptor. pub fn create_rnn_descriptor() -> Result { unsafe { API::ffi_create_rnn_descriptor() } } + unsafe fn ffi_create_rnn_descriptor() -> Result { + let mut rnn_desc: cudnnRNNDescriptor_t = ::std::ptr::null_mut(); + match cudnnCreateRNNDescriptor(&mut rnn_desc) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(rnn_desc), + cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { + Err(Error::AllocFailed("The resources could not be allocated")) + } + _ => Err(Error::Unknown( + "Unable create generic CUDA cuDNN RNN Descriptor", + )), + } + } /// Destroys a CUDA cuDNN RNN Descriptor. /// @@ -76,6 +150,14 @@ impl API { pub fn destroy_rnn_descriptor(desc: cudnnRNNDescriptor_t) -> Result<(), Error> { unsafe { API::ffi_destroy_rnn_descriptor(desc) } } + unsafe fn ffi_destroy_rnn_descriptor(rnn_desc: cudnnRNNDescriptor_t) -> Result<(), Error> { + match cudnnDestroyRNNDescriptor(rnn_desc) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + _ => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Dropout Descriptor", + )), + } + } /// Initializes a generic CUDA cuDNN RNN Descriptor with specific properties. pub fn set_rnn_descriptor( @@ -88,8 +170,14 @@ impl API { direction: cudnnDirectionMode_t, mode: cudnnRNNMode_t, algorithm: cudnnRNNAlgo_t, - data_type: cudnnDataType_t, + data_type: DataType, ) -> Result<(), Error> { + let data_type = match data_type { + DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, + DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + }; + unsafe { API::ffi_set_rnn_descriptor( handle, @@ -105,7 +193,6 @@ impl API { ) } } - unsafe fn ffi_set_rnn_descriptor( handle: cudnnHandle_t, desc: cudnnRNNDescriptor_t, @@ -118,6 +205,7 @@ impl API { algorithm: cudnnRNNAlgo_t, data_type: cudnnDataType_t, ) -> Result<(), Error> { + match cudnnSetRNNDescriptor( handle, desc, @@ -137,245 +225,573 @@ impl API { } } - unsafe fn ffi_create_rnn_descriptor() -> Result { - let mut rnn_desc: cudnnRNNDescriptor_t = ::std::ptr::null_mut(); - match cudnnCreateRNNDescriptor(&mut rnn_desc) { - cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(rnn_desc), - cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { - Err(Error::AllocFailed("The resources could not be allocated")) - } - _ => Err(Error::Unknown( - "Unable create generic CUDA cuDNN RNN Descriptor", - )), + /// Set RNN Matrix Math Type [cudnnSetRNNMatrixMathType][1] + /// Required for RNN Operations[2] + /// + /// [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnSetRNNMatrixMathType + /// [2]: https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor-ops-rnn-functions-pre-req + pub fn set_rnn_matrix_math_type(rnn_desc : cudnnRNNDescriptor_t, math_type: cudnnMathType_t) -> Result<(), Error> { + unsafe{ + API::ffi_set_rnn_matrix_math_type(rnn_desc, math_type) } } + unsafe fn ffi_set_rnn_matrix_math_type(rnn_desc: cudnnRNNDescriptor_t, math_type: cudnnMathType_t) -> Result<(), Error> { + match cudnnSetRNNMatrixMathType( + rnn_desc, + math_type + ) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), + _ => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Matrix Math Type.")), + } + } +} - unsafe fn ffi_destroy_rnn_descriptor(rnn_desc: cudnnRNNDescriptor_t) -> Result<(), Error> { - match cudnnDestroyRNNDescriptor(rnn_desc) { +// Forward Training & Inference +impl API { + /// Trains a RNN through the Forward Process + /// + /// # Arguments + /// `handle` Handle to a previously created cudNN context [0] + /// `rnn_desc` A previously initialised RNN descriptor [1] + /// `seq_length` Number of iterations for the RNN to unroll over. + /// `x_desc` Array of seqLength packed tensor descriptors [1]. Each descriptor should have + /// 3D that describe the input data format to one recurrent iterator - one descriptor per + /// RNN time-step. ```[Batch Size, Input Size, 1]``` + /// Input vectors should be column-major, so should be set + /// ```strideA[0]=inputSize, strideA[1]=1, strideA[2]=1``` + /// `x` Data Pointer to GPU memory associated with the input. + /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. + /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. + /// `cx_desc` Tensor descriptor for the initial cell state for an LSTM network. + /// `cx` Data pointer for initial cell state - if null will initialize state to zero.A + /// `w_desc` Handle to descriptors for weights + /// `w` Data Pointer to weights + /// `y_desc` Output for each recurrent iteration. Second dimension should match size of the + /// hidden layer. First dimension should match the first dimension of the tensor in input. + /// `y` Output Memory + /// `hy_desc` Final hidden state of the RNN + /// `hy` Memory for final hidden state + /// `cy_desc` Final cell state for the RNN + /// `cy` Memory for the final cell state - can be NULL. + /// `workspace` Data pointer to GPU memory to be used as a workspace for this call + /// `workspace_in_bytes` Size in bytes of the provided workspace + /// `reserve_space` Data pointer for GPU memory to be used as a reserve space for this call + /// `reserve_space_in_bytes` Size in bytes for `reserve_space` + /// [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t + /// [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t + /// [2] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnFilterDescriptor_t + pub fn rnn_forward_training( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: Vec, + x: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *const ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *const ::libc::c_void, + y_desc: Vec, + y: *mut ::libc::c_void, + hy_desc: cudnnTensorDescriptor_t, + hy: *mut ::libc::c_void, + cy_desc: cudnnTensorDescriptor_t, + cy: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + work_space_size_in_bytes: usize, + reserve_space: *mut ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + unsafe { + API::ffi_rnn_forward_training( + handle, + rnn_desc, + seq_length, + x_desc.as_slice(), + x, + hx_desc, + hx, + cx_desc, + cx, + w_desc, + w, + y_desc.as_slice(), + y, + hy_desc, + hy, + cy_desc, + cy, + workspace, + work_space_size_in_bytes, + reserve_space, + reserve_space_size_in_bytes, + ) + } + } + unsafe fn ffi_rnn_forward_training( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: &[cudnnTensorDescriptor_t], + x: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *const ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *const ::libc::c_void, + y_desc: &[cudnnTensorDescriptor_t], + y: *mut ::libc::c_void, + hy_desc: cudnnTensorDescriptor_t, + hy: *mut ::libc::c_void, + cy_desc: cudnnTensorDescriptor_t, + cy: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + work_space_size_in_bytes: usize, + reserve_space: *mut ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + let status = cudnnRNNForwardTraining( + handle, + rnn_desc, + seq_length, + x_desc.as_ptr(), + x, + hx_desc, + hx, + cx_desc, + cx, + w_desc, + w, + y_desc.as_ptr(), + y, + hy_desc, + hy, + cy_desc, + cy, + workspace, + work_space_size_in_bytes, + reserve_space, + reserve_space_size_in_bytes, + ); + match status { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Dropout Descriptor", - )), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions was met: rnnDesc is invalid, hx_desc, w_desc, hy_desc, cy_desc, or one of the x_desc or y_desc is invalid. The descriptors for x_desc, cx_desc, _hx_desc, w_desc, y_desc, hy_desc, cy_desc have incorrect strides/diemnsions. Workspace size is too small. Reserve space size is too small.")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `rnn_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), + _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.")), } } -} - -// cudnnStatus_t -// cudnnRNNForwardInference( cudnnHandle_t handle, -// const cudnnRNNDescriptor_t rnnDesc, -// const int seqLength, -// const cudnnTensorDescriptor_t * xDesc, -// const void * x, -// const cudnnTensorDescriptor_t hxDesc, -// const void * hx, -// const cudnnTensorDescriptor_t cxDesc, -// const void * cx, -// const cudnnFilterDescriptor_t wDesc, -// const void * w, -// const cudnnTensorDescriptor_t *yDesc, -// void * y, -// const cudnnTensorDescriptor_t hyDesc, -// void * hy, -// const cudnnTensorDescriptor_t cyDesc, -// void * cy, -// void * workspace, -// size_t workSpaceSizeInBytes) - -// cudnnStatus_t -// cudnnRNNForwardTraining( cudnnHandle_t handle, -// const cudnnRNNDescriptor_t rnnDesc, -// const int seqLength, -// const cudnnTensorDescriptor_t *xDesc, -// const void * x, -// const cudnnTensorDescriptor_t hxDesc, -// const void * hx, -// const cudnnTensorDescriptor_t cxDesc, -// const void * cx, -// const cudnnFilterDescriptor_t wDesc, -// const void * w, -// const cudnnTensorDescriptor_t *yDesc, -// void * y, -// const cudnnTensorDescriptor_t hyDesc, -// void * hy, -// const cudnnTensorDescriptor_t cyDesc, -// void * cy, -// void * workspace, -// size_t workSpaceSizeInBytes, -// void * reserveSpace, -// size_t reserveSpaceSizeInBytes) - -// /// Computes a rnn forward function. -// pub fn rnn_forward( -// handle: cudnnHandle_t, -// algo: cudnnRNNFwdAlgo_t, -// conv_desc: cudnnRNNDescriptor_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// rnn_desc: cudnnRNNDescriptor_t, -// rnn_data: *const ::libc::c_void, -// beta: *const ::libc::c_void, -// dest_desc: cudnnTensorDescriptor_t, -// dest_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// unsafe { API::ffi_rnn_forward(handle, alpha, src_desc, src_data, rnn_desc, rnn_data, conv_desc, algo, work_space, work_size_in_bytes, beta, dest_desc, dest_data) } -// } - -// /// Computes a rnn backward function w.r.t the bias. -// pub fn rnn_backward_bias( -// handle: cudnnHandle_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// beta: *const ::libc::c_void, -// dest_desc: cudnnTensorDescriptor_t, -// dest_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// unsafe { API::ffi_rnn_backward_bias(handle, alpha, src_desc, src_data, beta, dest_desc, dest_data) } -// } - -// /// Computes a rnn backward function w.r.t rnn coefficient. -// pub fn rnn_backward_rnn( -// handle: cudnnHandle_t, -// algo: cudnnRNNBwdRNNAlgo_t, -// conv_desc: cudnnRNNDescriptor_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// diff_desc: cudnnTensorDescriptor_t, -// diff_data: *const ::libc::c_void, -// beta: *const ::libc::c_void, -// grad_desc: cudnnRNNDescriptor_t, -// grad_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// unsafe { API::ffi_rnn_backward_rnn(handle, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc, algo, work_space, work_size_in_bytes, beta, grad_desc, grad_data) } -// } - -// /// Computes a rnn backward function w.r.t the output tensor. -// pub fn rnn_backward_data( -// handle: cudnnHandle_t, -// algo: cudnnRNNBwdDataAlgo_t, -// conv_desc: cudnnRNNDescriptor_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// alpha: *const ::libc::c_void, -// rnn_desc: cudnnRNNDescriptor_t, -// rnn_data: *const ::libc::c_void, -// diff_desc: cudnnTensorDescriptor_t, -// diff_data: *const ::libc::c_void, -// beta: *const ::libc::c_void, -// grad_desc: cudnnTensorDescriptor_t, -// grad_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// unsafe { API::ffi_rnn_backward_data(handle, alpha, rnn_desc, rnn_data, diff_desc, diff_data, conv_desc, algo, work_space, work_size_in_bytes, beta, grad_desc, grad_data) } -// } - -// unsafe fn ffi_create_rnn_descriptor() -> Result { -// let mut desc: cudnnRNNDescriptor_t = ::std::ptr::null_mut(); -// match cudnnCreateRNNDescriptor(&mut desc) { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(desc), -// cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The resources could not be allocated.")), -// _ => Err(Error::Unknown("Unable to create generic CUDA cuDNN RNN Descriptor.")), -// } -// } - -// unsafe fn ffi_destroy_rnn_descriptor(desc: cudnnRNNDescriptor_t) -> Result<(), Error> { -// match cudnnDestroyRNNDescriptor(desc) { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), -// _ => Err(Error::Unknown("Unable to destroy CUDA cuDNN RNN Descriptor.")), -// } -// } -// unsafe fn ffi_rnn_forward( -// handle: cudnnHandle_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// rnn_desc: cudnnRNNDescriptor_t, -// rnn_data: *const ::libc::c_void, -// conv_desc: cudnnRNNDescriptor_t, -// algo: cudnnRNNFwdAlgo_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// beta: *const ::libc::c_void, -// dest_desc: cudnnTensorDescriptor_t, -// dest_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// let status = cudnnRNNForward(handle, alpha, src_desc, src_data, rnn_desc, rnn_data, conv_desc, algo, work_space, work_size_in_bytes, beta, dest_desc, dest_data); -// match status { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), -// cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `rnn_desc`, `conv_desc`, `dest_desc`, `src_data`, `alpha`, `beta`. `src_desc` and `dest_desc` have a non-matching number of dimensions. `src_desc` and `rnn_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`s number of dimensions is not equal to `conv_desc`s `array_length` + 2. `src_desc` and `rnn_desc` have a non-matching number of input feature maps per image. `src_desc`, `rnn_desc` and `dest_desc` have a non-matching data type. For some spatial dimension, `rnn_desc` has a spatial size that is larger than the input spatial size (including zero-padding size).")), -// cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `rnn_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), -// _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.")), -// } -// } + /// Execute a RNN without Training + /// This routine executes the recurrent neural network described by rnnDesc with inputs x, hx, + /// and cx, weights w and outputs y, hy, and cy. workspace is required for intermediate storage. + /// This function does not store intermediate data required for training; + /// cudnnRNNForwardTraining() should be used for that purpose + /// + /// # Arguments + /// `handle` Handle to a previously created cudNN context [0] + /// `rnn_desc` A previously initialised RNN descriptor [1] + /// `seq_length` Number of iterations for the RNN to unroll over. + /// `x_desc` Array of seqLength packed tensor descriptors [1]. Each descriptor should have + /// 3D that describe the input data format to one recurrent iterator - one descriptor per + /// RNN time-step. ```[Batch Size, Input Size, 1]``` + /// Input vectors should be column-major, so should be set + /// strideA 0 = inputSize, strideA 1 = 1, strideA 2 =1 + /// `x` Data Pointer to GPU memory associated with the input. + /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. + /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. + /// `cx_desc` Tensor descriptor for the initial cell state for an LSTM network. + /// `cx` Data pointer for initial cell state - if null will initialize state to zero.A + /// `w_desc` Handle to descriptors for weights + /// `w` Data Pointer to weights + /// `y_desc` Output for each recurrent iteration. Second dimension should match size of the + /// hidden layer. First dimension should match the first dimension of the tensor in input. + /// `y` Output Memory + /// `hy_desc` Final hidden state of the RNN + /// `hy` Memory for final hidden state + /// `cy_desc` Final cell state for the RNN + /// `cy` Memory for the final cell state - can be NULL. + /// `workspace` Data pointer to GPU memory to be used as a workspace for this call + /// `workspace_in_bytes` Size in bytes of the provided workspace + /// [0] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t + /// [1] https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t + pub fn rnn_forward_inference( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: *const cudnnTensorDescriptor_t, + x: *mut ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *mut ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *mut ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *mut ::libc::c_void, + y_desc: *const cudnnTensorDescriptor_t, + y: *mut ::libc::c_void, + hy_desc: cudnnTensorDescriptor_t, + hy: *mut ::libc::c_void, + cy_desc: cudnnTensorDescriptor_t, + cy: *mut ::libc::c_void, + work_space: *mut ::libc::c_void, + work_size_in_bytes: ::libc::size_t, + ) -> Result<(), Error> { + unsafe { + API::ffi_rnn_forward_inference( + handle, + rnn_desc, + seq_length, + x_desc, + x, + hx_desc, + hx, + cx_desc, + cx, + w_desc, + w, + y_desc, + y, + hy_desc, + hy, + cy_desc, + cy, + work_space, + work_size_in_bytes, -// unsafe fn ffi_rnn_backward_bias( -// handle: cudnnHandle_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// beta: *const ::libc::c_void, -// dest_desc: cudnnTensorDescriptor_t, -// dest_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// match cudnnRNNBackwardBias(handle, alpha, src_desc, src_data, beta, dest_desc, dest_data) { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), -// cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters n,h,w of the output tensor is not 1. The numbers of feature maps of the input tensor and output tensor differ. The dataType of the two tensor descriptors are different.")), -// _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward bias.")), -// } -// } + ) + } + } + unsafe fn ffi_rnn_forward_inference( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: *const cudnnTensorDescriptor_t, + x: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *const ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *const ::libc::c_void, + y_desc: *const cudnnTensorDescriptor_t, + y: *mut ::libc::c_void, + hy_desc: cudnnTensorDescriptor_t, + hy: *mut ::libc::c_void, + cy_desc: cudnnTensorDescriptor_t, + cy: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + work_space_size_in_bytes: usize, + ) -> Result<(), Error> { + let status = cudnnRNNForwardInference( + handle, + rnn_desc, + seq_length, + x_desc, + x, + hx_desc, + hx, + cx_desc, + cx, + w_desc, + w, + y_desc, + y, + hy_desc, + hy, + cy_desc, + cy, + workspace, + work_space_size_in_bytes, + ); + match status { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `rnn_desc`, `conv_desc`, `dest_desc`, `src_data`, `alpha`, `beta`. `src_desc` and `dest_desc` have a non-matching number of dimensions. `src_desc` and `rnn_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`s number of dimensions is not equal to `conv_desc`s `array_length` + 2. `src_desc` and `rnn_desc` have a non-matching number of input feature maps per image. `src_desc`, `rnn_desc` and `dest_desc` have a non-matching data type. For some spatial dimension, `rnn_desc` has a spatial size that is larger than the input spatial size (including zero-padding size).")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `rnn_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), + _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.")), + } + } +} -// unsafe fn ffi_rnn_backward_rnn( -// handle: cudnnHandle_t, -// alpha: *const ::libc::c_void, -// src_desc: cudnnTensorDescriptor_t, -// src_data: *const ::libc::c_void, -// diff_desc: cudnnTensorDescriptor_t, -// diff_data: *const ::libc::c_void, -// conv_desc: cudnnRNNDescriptor_t, -// algo: cudnnRNNBwdRNNAlgo_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// beta: *const ::libc::c_void, -// grad_desc: cudnnRNNDescriptor_t, -// grad_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// match cudnnRNNBackwardRNN(handle, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc, algo, work_space, work_size_in_bytes, beta, grad_desc, grad_data) { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), -// cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `diff_desc`, `conv_desc`, `grad_desc`, `src_data`, `diff_data`, `grad_data`, `alpha`, `beta`. `src_desc` and `diff_desc` have a non-matching number of dimensions. `src_desc` and `grad_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`, `diff_desc` and `grad_desc` have a non-matching data type. `src_desc` and `grad_desc` have a non-matching number of input feature maps per image.")), -// cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `diff_desc` have negative tensor striding. `src_desc`, `diff_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), -// cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data.")), -// cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), -// _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward rnn.")), -// } -// } +// Backward Training, Bias, Weights, and IInference +impl API { + /// CUDNN Rnn Backward Data + /// This routine executes the recurrent neural network described by rnnDesc with output + /// gradients dy, dhy, and dhc, weights w and input gradients dx, dhx, and dcx. + /// Workspace is required for intermediate storage. + /// The data in reserveSpace must have previously been generated by cudnnRNNForwardTraining(). + /// The same reserveSpace data must be used for future calls to cudnnRNNBackwardWeights() + /// if they execute on the same input data. + /// + /// # Arguments + /// `handle` Handle to a previously created [cudNN context][0] + /// `rnn_desc` A previously initialised [RNN descriptor][1] + /// `seq_length` Number of iterations for the RNN to unroll over. + /// `y_desc` Array of packed [tensor descriptors][1] describing the *output* from each recurrent + /// iteration. + /// `y` Data pointer to GPU memory for output at each iteration + /// `dy_desc` Array of packed [tensor descriptors][1] describing the *gradient* at the output + /// from each recurrent iteration. + /// `dy` Data pointer to GPU memory for gradient at output iterations + /// `dhy_desc` Array of packed [tensor descriptors][1] describing the *gradients* at the final *hidden* + /// state of the RNN. + /// `dhy` Data pointer to GPU memory for gradient at the final hidden state of the network. + /// If this is a NULL pointer, the gradients at the final hidden state of the network will be + /// initialised to zero. + /// `dcy_desc` Array of packed [tensor descriptors][1] describing the *gradients* at the final *cell* + /// state of the RNN. + /// `dcy` Data pointer to GPU memory for gradients at the final cell state of the RNN. + /// `w_desc` Handle to a previously initialized filter descriptor for the weights in the RNN + /// `w` Data pointer to GPU memory for the filter descriptor for the weights. + /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. + /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. + /// `cx_desc` Tensor descriptor for the initial cell state for an LSTM network. + /// `cx` Data pointer for initial cell state - if null will initialize state to zero. + /// `dx_desc` Array of fully packed tensor descriptors for the gradient at the input of each + /// iteration. + /// `dx` Data pointer for the gradient of the input of each recurrent iteration. + /// `dhx_desc` Fully packed tensor for the gradient of the initial hidden state of the RNN. + /// `dhx` Data pointer for gradient of the initial hidden state of the RNN. + /// `workspace` Data pointer to GPU memory to be used as a workspace for this call + /// `workspace_in_bytes` Size in bytes of the provided workspace + /// `reserve_space` Data pointer for GPU memory to be used as a reserve space for this call + /// `reserve_space_in_bytes` Size in bytes for `reserve_space` + /// [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t + /// [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t + pub fn rnn_backward_data( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + y_desc: *const cudnnTensorDescriptor_t, + y: *const ::libc::c_void, + dy_desc: *const cudnnTensorDescriptor_t, + dy: *const ::libc::c_void, + dhy_desc: cudnnTensorDescriptor_t, + dhy: *const ::libc::c_void, + dcy_desc: cudnnTensorDescriptor_t, + dcy: *const ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *const ::libc::c_void, + dx_desc: *const cudnnTensorDescriptor_t, + dx: *mut ::libc::c_void, + dhx_desc: cudnnTensorDescriptor_t, + dhx: *mut ::libc::c_void, + dcx_desc: cudnnTensorDescriptor_t, + dcx: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + workspace_size_in_bytes: usize, + reserve_space: *mut ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + unsafe { + API::ffi_rnn_backward_data( + handle, + rnn_desc, + seq_length, + y_desc, + y, + dy_desc, + dy, + dhy_desc, + dhy, + dcy_desc, + dcy, + w_desc, + w, + hx_desc, + hx, + cx_desc, + cx, + dx_desc, + dx, + dhx_desc, + dhx, + dcx_desc, + dcx, + workspace, + workspace_size_in_bytes, + reserve_space, + reserve_space_size_in_bytes, + ) + } + } + unsafe fn ffi_rnn_backward_data( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + y_desc: *const cudnnTensorDescriptor_t, + y: *const ::libc::c_void, + dy_desc: *const cudnnTensorDescriptor_t, + dy: *const ::libc::c_void, + dhy_desc: cudnnTensorDescriptor_t, + dhy: *const ::libc::c_void, + dcy_desc: cudnnTensorDescriptor_t, + dcy: *const ::libc::c_void, + w_desc: cudnnFilterDescriptor_t, + w: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + cx_desc: cudnnTensorDescriptor_t, + cx: *const ::libc::c_void, + dx_desc: *const cudnnTensorDescriptor_t, + dx: *mut ::libc::c_void, + dhx_desc: cudnnTensorDescriptor_t, + dhx: *mut ::libc::c_void, + dcx_desc: cudnnTensorDescriptor_t, + dcx: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + workspace_size_in_bytes: usize, + reserve_space: *mut ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + match cudnnRNNBackwardData( + handle, + rnn_desc, + seq_length, + y_desc, + y, + dy_desc, + dy, + dhy_desc, + dhy, + dcy_desc, + dcy, + w_desc, + w, + hx_desc, + hx, + cx_desc, + cx, + dx_desc, + dx, + dhx_desc, + dhx, + dcx_desc, + dcx, + workspace, + workspace_size_in_bytes, + reserve_space, + reserve_space_size_in_bytes + ) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `diff_desc`, `rnn_desc`, `conv_desc`, `grad_desc`, `diff_data`, `rnn_data`, `grad_data`, `alpha`, `beta`. `rnn_desc` and `diff_desc` have a non-matching number of dimensions. `rnn_desc` and `grad_desc` have a non-matching number of dimensions. `rnn_desc has fewer than three number of dimensions. `rnn_desc`, `grad_desc` and `diff_desc` have a non-matching data type. `rnn_desc` and `grad_desc` have a non-matching number of input feature maps per image. `diff_desc`s spatial sizes do not match with the expected size as determined by `cudnnGetRNNNdForwardOutputDim()`.")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `diff_desc` or `grad_desc` have negative tensor striding. `diff_desc`, `rnn_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), + cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data or the input differential tensor data.")), + cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), + _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward data.")), + } + } -// unsafe fn ffi_rnn_backward_data( -// handle: cudnnHandle_t, -// alpha: *const ::libc::c_void, -// rnn_desc: cudnnRNNDescriptor_t, -// rnn_data: *const ::libc::c_void, -// diff_desc: cudnnTensorDescriptor_t, -// diff_data: *const ::libc::c_void, -// conv_desc: cudnnRNNDescriptor_t, -// algo: cudnnRNNBwdDataAlgo_t, -// work_space: *mut ::libc::c_void, -// work_size_in_bytes: ::libc::size_t, -// beta: *const ::libc::c_void, -// grad_desc: cudnnTensorDescriptor_t, -// grad_data: *mut ::libc::c_void, -// ) -> Result<(), Error> { -// match cudnnRNNBackwardData(handle, alpha, rnn_desc, rnn_data, diff_desc, diff_data, conv_desc, algo, work_space, work_size_in_bytes, beta, grad_desc, grad_data) { -// cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), -// cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `diff_desc`, `rnn_desc`, `conv_desc`, `grad_desc`, `diff_data`, `rnn_data`, `grad_data`, `alpha`, `beta`. `rnn_desc` and `diff_desc` have a non-matching number of dimensions. `rnn_desc` and `grad_desc` have a non-matching number of dimensions. `rnn_desc has fewer than three number of dimensions. `rnn_desc`, `grad_desc` and `diff_desc` have a non-matching data type. `rnn_desc` and `grad_desc` have a non-matching number of input feature maps per image. `diff_desc`s spatial sizes do not match with the expected size as determined by `cudnnGetRNNNdForwardOutputDim()`.")), -// cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `diff_desc` or `grad_desc` have negative tensor striding. `diff_desc`, `rnn_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), -// cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data or the input differential tensor data.")), -// cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), -// _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward data.")), -// } -// } + /// CUDNN Rnn Backward Weights + /// This routine accumulates weight gradients `dw` from the recurrent neural network described by + /// rnnDesc with inputs `x`, `hx` and outputs `y`. The mode of operation in this case is additive, + /// the weight gradients calculated will be added to those already existing in `dw`. + /// Workspace is required for intermediate storage. + /// The data in reserveSpace must have previously been generated by cudnnRNNBackwardData(). + /// + /// # Arguments + /// `handle` Handle to a previously created [cudNN context][0] + /// `rnn_desc` A previously initialised [RNN descriptor][1] + /// `seq_length` Number of iterations for the RNN to unroll over. + /// `x_desc` Array of packed tensor descriptors. + /// `x` Data pointer for Input + /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. + /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. + /// `y_desc` Array of packed [tensor descriptors][1] describing the *output* from each recurrent + /// iteration. + /// `y` Data pointer to GPU memory for output at each iteration + /// `dw_desc` Handle to previously initialized filter descriptor for the gradient of the + /// weights. + /// `dw` Data pointer to GPU memory for the descriptor of the gradient of the weights. + /// `workspace` Data pointer to GPU memory to be used as a workspace for this call + /// `workspace_in_bytes` Size in bytes of the provided workspace + /// `reserve_space` Data pointer for GPU memory to be used as a reserve space for this call + /// `reserve_space_in_bytes` Size in bytes for `reserve_space` + /// [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t + /// [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t + pub fn rnn_backward_weights( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: *const cudnnTensorDescriptor_t, + x: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + y_desc: *const cudnnTensorDescriptor_t, + y: *const ::libc::c_void, + workspace: *const ::libc::c_void, + work_space_size_in_bytes: usize, + dw_desc: cudnnFilterDescriptor_t, + dw: *mut ::libc::c_void, + reserve_space: *const ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + unsafe { + API::ffi_rnn_backward_weights( + handle, + rnn_desc, + seq_length, + x_desc, + x, + hx_desc, + hx, + y_desc, + y, + workspace, + work_space_size_in_bytes, + dw_desc, + dw, + reserve_space, + reserve_space_size_in_bytes, + ) + } + } + unsafe fn ffi_rnn_backward_weights( + handle: cudnnHandle_t, + rnn_desc: cudnnRNNDescriptor_t, + seq_length: ::libc::c_int, + x_desc: *const cudnnTensorDescriptor_t, + x: *const ::libc::c_void, + hx_desc: cudnnTensorDescriptor_t, + hx: *const ::libc::c_void, + y_desc: *const cudnnTensorDescriptor_t, + y: *const ::libc::c_void, + workspace: *const ::libc::c_void, + work_space_size_in_bytes: usize, + dw_desc: cudnnFilterDescriptor_t, + dw: *mut ::libc::c_void, + reserve_space: *const ::libc::c_void, + reserve_space_size_in_bytes: usize, + ) -> Result<(), Error> { + match cudnnRNNBackwardWeights( + handle, + rnn_desc, + seq_length, + x_desc, + x, + hx_desc, + hx, + y_desc, + y, + workspace, + work_space_size_in_bytes, + dw_desc, + dw, + reserve_space, + reserve_space_size_in_bytes, + ) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `diff_desc`, `conv_desc`, `grad_desc`, `src_data`, `diff_data`, `grad_data`, `alpha`, `beta`. `src_desc` and `diff_desc` have a non-matching number of dimensions. `src_desc` and `grad_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`, `diff_desc` and `grad_desc` have a non-matching data type. `src_desc` and `grad_desc` have a non-matching number of input feature maps per image.")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `diff_desc` have negative tensor striding. `src_desc`, `diff_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), + cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data.")), + cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), + _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward rnn.")), + } + } +} diff --git a/cudnn/src/api/utils.rs b/cudnn/src/api/utils.rs index 4cc9b99..88a1eae 100644 --- a/cudnn/src/api/utils.rs +++ b/cudnn/src/api/utils.rs @@ -3,6 +3,7 @@ use crate::ffi::*; use std::ptr; use crate::{Error, API}; +use utils::DataType; impl API { /// Initialize the CUDA cuDNN API with needed context and resources. @@ -53,3 +54,15 @@ impl API { } } } + +impl API { + /// Convert to CUDNN Data Type + pub fn to_cudnn_data_type(data_type: DataType) -> cudnnDataType_t { + let data_type = match data_type { + DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, + DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + }; + data_type + } +} diff --git a/cudnn/src/cuda.rs b/cudnn/src/cuda.rs index d9d05f5..0bc0f83 100644 --- a/cudnn/src/cuda.rs +++ b/cudnn/src/cuda.rs @@ -38,8 +38,8 @@ impl CudaDeviceMemory { } /// Returns the size of the CUDA Device Memory chunk. - pub fn size(&self) -> &usize { - &self.size + pub fn size(&self) -> usize { + self.size } } diff --git a/cudnn/src/cudnn.rs b/cudnn/src/cudnn.rs index d5c5767..4210bd7 100644 --- a/cudnn/src/cudnn.rs +++ b/cudnn/src/cudnn.rs @@ -6,13 +6,15 @@ use super::utils::{ ActivationConfig, ConvolutionConfig, DataTypeInfo, DropoutConfig, NormalizationConfig, - PoolingConfig, ScalParams, + PoolingConfig, ScalParams, RnnConfig }; use super::*; use crate::cuda::CudaDeviceMemory; use num::traits::Float; use std::mem::transmute_copy; +use utils::DataType; +use tensor_descriptor::tensor_vec_id_c; #[derive(Debug, Clone)] /// Provides a the high-level interface to CUDA's cuDNN. @@ -178,6 +180,116 @@ impl Cudnn { Ok(DropoutConfig::new(dropout, reserve)) } + /// Initialize RNN + pub fn init_rnn( + &self, + x_desc: &[TensorDescriptor], + rnn_desc: RnnDescriptor, + hidden_size: i32, + num_layers: i32, + seq_length: i32, + dropout_desc: &DropoutDescriptor, + input_mode: cudnnRNNInputMode_t, + direction_mode: cudnnDirectionMode_t, + network_mode: cudnnRNNMode_t, + algorithm: cudnnRNNAlgo_t, + data_type: DataType, + math_type: cudnnMathType_t + ) -> Result { + + let data_type = match data_type { + DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, + DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + }; + + API::set_rnn_matrix_math_type( + *rnn_desc.id_c(), + math_type + )?; + + let workspace_size : usize = API::get_rnn_workspace_size( + *self.id_c(), + *rnn_desc.id_c(), + seq_length, + tensor_vec_id_c(x_desc) + )?; + + let training_reserve_size : usize = API::get_rnn_training_reserve_size( + *self.id_c(), + *rnn_desc.id_c(), + seq_length, + tensor_vec_id_c(x_desc) + )?; + + let training_reserve : CudaDeviceMemory = CudaDeviceMemory::new(training_reserve_size)?; + + Ok(RnnConfig::new( + rnn_desc, + hidden_size, + num_layers, + seq_length, + *dropout_desc.id_c(), + input_mode, + direction_mode, + network_mode, + algorithm, + data_type, + workspace_size, + training_reserve_size, + training_reserve + )) + } + + /// Train & Return Results for RNN + pub fn rnn_forward( + &self, + rnn_config: &RnnConfig, + src_desc: Vec, + src: *const ::libc::c_void, + output_desc: Vec, + output: *mut ::libc::c_void, + hidden_desc: &TensorDescriptor, + // Planning to initially pass NULLs to this + hidden: *const ::libc::c_void, + cell_desc: &TensorDescriptor, + // Planning to initially pass NULLs to this + cell: *const ::libc::c_void, + weight_desc: &FilterDescriptor, + weight: *const ::libc::c_void, + hidden_output_desc: &TensorDescriptor, + hidden_output: *mut ::libc::c_void, + cell_output_desc: &TensorDescriptor, + cell_output: *mut ::libc::c_void, + workspace: *mut ::libc::c_void, + reserve_data: *mut ::libc::c_void + ) -> Result<(), Error> + where T: Float + DataTypeInfo { + API::rnn_forward_training( + *self.id_c(), + *(rnn_config.rnn_desc().id_c()), + *rnn_config.sequence_length(), + tensor_vec_id_c(&src_desc), + src, + *hidden_desc.id_c(), + hidden, + *cell_desc.id_c(), + cell, + *weight_desc.id_c(), + weight, + tensor_vec_id_c(&output_desc), + output, + *hidden_output_desc.id_c(), + hidden_output, + *cell_output_desc.id_c(), + cell_output, + workspace, + rnn_config.rnn_workspace_size(), + reserve_data, + rnn_config.training_reserve_size() + ) + } + /// Computes the forward Sigmoid Activation function. /// /// Writes the result of the computation to `dest_data`. @@ -386,7 +498,7 @@ impl Cudnn { *conv_config.forward_algo(), *conv_config.conv_desc().id_c(), workspace, - *conv_config.forward_workspace_size(), + conv_config.forward_workspace_size(), unsafe { transmute_copy(&&scale.a) }, *src_desc.id_c(), src_data, @@ -445,7 +557,7 @@ impl Cudnn { *conv_config.backward_filter_algo(), *conv_config.conv_desc().id_c(), workspace, - *conv_config.backward_filter_workspace_size(), + conv_config.backward_filter_workspace_size(), unsafe { transmute_copy(&&scale.a) }, *src_desc.id_c(), src_data, @@ -479,7 +591,7 @@ impl Cudnn { *conv_config.backward_data_algo(), *conv_config.conv_desc().id_c(), workspace, - *conv_config.backward_data_workspace_size(), + conv_config.backward_data_workspace_size(), unsafe { transmute_copy(&&scale.a) }, *conv_config.filter_desc().id_c(), filter_data, @@ -817,7 +929,7 @@ impl Cudnn { *dest_desc.id_c(), dest_data, *dropout_conf.reserved_space().id_c(), - *dropout_conf.reserved_space().size(), + dropout_conf.reserved_space().size(), ) } @@ -843,7 +955,7 @@ impl Cudnn { *dest_desc.id_c(), dest_data, *dropout_conf.reserved_space().id_c(), - *dropout_conf.reserved_space().size(), + dropout_conf.reserved_space().size(), ) } } diff --git a/cudnn/src/dropout_descriptor.rs b/cudnn/src/dropout_descriptor.rs index 3faf328..b74a984 100644 --- a/cudnn/src/dropout_descriptor.rs +++ b/cudnn/src/dropout_descriptor.rs @@ -36,7 +36,7 @@ impl DropoutDescriptor { *handle.id_c(), dropout, *reserve.id_c(), - *reserve.size(), + reserve.size(), seed, )?; diff --git a/cudnn/src/lib.rs b/cudnn/src/lib.rs index ecfdef3..fee6daf 100644 --- a/cudnn/src/lib.rs +++ b/cudnn/src/lib.rs @@ -83,6 +83,7 @@ pub use self::filter_descriptor::FilterDescriptor; pub use self::normalization_descriptor::NormalizationDescriptor; pub use self::pooling_descriptor::PoolingDescriptor; pub use self::tensor_descriptor::TensorDescriptor; +pub use self::rnn_descriptor::RnnDescriptor; pub use crate::ffi::*; #[derive(Debug, Copy, Clone)] @@ -100,4 +101,5 @@ mod filter_descriptor; mod normalization_descriptor; mod pooling_descriptor; mod tensor_descriptor; +mod rnn_descriptor; pub mod utils; diff --git a/cudnn/src/rnn_descriptor.rs b/cudnn/src/rnn_descriptor.rs index 7201a8a..8616f8f 100644 --- a/cudnn/src/rnn_descriptor.rs +++ b/cudnn/src/rnn_descriptor.rs @@ -6,11 +6,12 @@ use super::{API, Error}; use super::utils::DataType; use ffi::*; +use ::{Cudnn, DropoutDescriptor}; #[derive(Debug, Clone)] /// Describes a Recurrent Descriptor. pub struct RnnDescriptor { - id: cudnnRnnDescriptor_t, + id: cudnnRNNDescriptor_t, } impl Drop for RnnDescriptor { @@ -26,48 +27,39 @@ impl RnnDescriptor { handle: &Cudnn, hidden_size: i32, num_layers: i32, - dropout_desc: DropoutDescriptor, + dropout_desc: &DropoutDescriptor, input_mode: cudnnRNNInputMode_t, direction: cudnnDirectionMode_t, mode: cudnnRNNMode_t, algorithm: cudnnRNNAlgo_t, data_type: DataType, ) -> Result { - let array_length = pad.len() as i32; - let upscale: Vec = ::std::iter::repeat(1i32) - .take(array_length as usize) - .collect(); - let generic_rnn_desc = API::create_rnn_descriptor()?; - let data_type = match data_type { - DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, - DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, - DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF, - _ => return Err(Error::InvalidValue("Invalid data type value passed")), - }; - API::set_rnn_descriptor( - handle.id_c(), + API::set_rnn_descriptor( + *handle.id_c(), generic_rnn_desc, hidden_size, num_layers, - dropout_desc, + *dropout_desc.id_c(), input_mode, direction, mode, algorithm, data_type, - ); - Ok(RnnDescriptor::from_c(generic_rnn_desc)) + )?; + Ok(RnnDescriptor { + id: generic_rnn_desc + }) } /// Initializes a new CUDA cuDNN RnnDescriptor from its C type. - pub fn from_c(id: cudnnRnnDescriptor_t) -> RnnDescriptor { + pub fn from_c(id: cudnnRNNDescriptor_t) -> RnnDescriptor { RnnDescriptor { id: id } } /// Returns the CUDA cuDNN RnnDescriptor as its C type. - pub fn id_c(&self) -> &cudnnRnnDescriptor_t { + pub fn id_c(&self) -> &cudnnRNNDescriptor_t { &self.id } } diff --git a/cudnn/src/tensor_descriptor.rs b/cudnn/src/tensor_descriptor.rs index 0e22ccf..ce83322 100644 --- a/cudnn/src/tensor_descriptor.rs +++ b/cudnn/src/tensor_descriptor.rs @@ -14,6 +14,13 @@ pub struct TensorDescriptor { id: cudnnTensorDescriptor_t, } +/// Return C Handle for a Vector of Tensor Descriptors +pub fn tensor_vec_id_c(tensor_vec: &[TensorDescriptor]) -> Vec { + tensor_vec.iter().map(|tensor| { + *(*tensor).id_c() + }).collect() +} + impl Drop for TensorDescriptor { #[allow(unused_must_use)] fn drop(&mut self) { @@ -85,3 +92,4 @@ impl TensorDescriptor { &self.id } } + diff --git a/cudnn/src/utils.rs b/cudnn/src/utils.rs index 43b4800..5aedae9 100644 --- a/cudnn/src/utils.rs +++ b/cudnn/src/utils.rs @@ -2,7 +2,7 @@ use super::{ ActivationDescriptor, ConvolutionDescriptor, DropoutDescriptor, FilterDescriptor, - NormalizationDescriptor, PoolingDescriptor, + NormalizationDescriptor, PoolingDescriptor, RnnDescriptor }; use crate::cuda::CudaDeviceMemory; @@ -81,7 +81,7 @@ impl ConvolutionConfig { /// Returns the largest workspace size out of the three. /// /// Useful for creating a shared workspace. - pub fn largest_workspace_size(&self) -> &usize { + pub fn largest_workspace_size(&self) -> usize { if self.backward_data_workspace_size() >= self.backward_filter_workspace_size() && self.backward_data_workspace_size() >= self.forward_workspace_size() { @@ -101,8 +101,8 @@ impl ConvolutionConfig { } /// Returns `forward_workspace_size`. - pub fn forward_workspace_size(&self) -> &usize { - &self.forward_workspace_size + pub fn forward_workspace_size(&self) -> usize { + self.forward_workspace_size } /// Returns `backward_filter_algo`. @@ -111,8 +111,8 @@ impl ConvolutionConfig { } /// Returns `backward_filter_workspace_size`. - pub fn backward_filter_workspace_size(&self) -> &usize { - &self.backward_filter_workspace_size + pub fn backward_filter_workspace_size(&self) -> usize { + self.backward_filter_workspace_size } /// Returns `backward_data_algo`. @@ -121,8 +121,8 @@ impl ConvolutionConfig { } /// Returns `backward_data_workspace_size`. - pub fn backward_data_workspace_size(&self) -> &usize { - &self.backward_data_workspace_size + pub fn backward_data_workspace_size(&self) -> usize { + self.backward_data_workspace_size } /// Returns `conv_desc`. @@ -261,10 +261,114 @@ impl DropoutConfig { } } +#[allow(missing_debug_implementations, missing_copy_implementations)] +/// Provides an interfaces for CUDNN's Rnn Descriptor +/// # Arguments +/// * `rnn_desc` Previously created descriptor +/// * `hidden_size` Size of the hidden layer +/// * `num_layers` Number of layers +/// * `dropout_desc` Descriptor to a previously created & initialized dropout descriptor, applied +/// between layers. +/// * `input_mode` Specifies behaviour at the input to the first layer +/// * `direction_mode` Specifies the recurrence pattern - i.e bidirectional +/// * `rnn_mode` Type of network used in routines ForwardInference, ForwardTraining, BackwardData, +/// BackwardWeights. Can be ReLU, tanh, LSTM (Long Short Term Memory), or GRU (Gated Recurrent Unit). +/// * `algo` - Only required in v6 implementation FIXME: Should this be checked in compilation? +/// * `data_type` Math Precision - default f32 +/// +/// The LSTM network offered by CUDNN is a four-gate network that does not use peephole connections. +/// Greff, et al. (2015)[1] suggests it doesn't matter what kind of network it is, although +/// Jozefowicz, et al. (2015)[2] suggests that the most important gates are the forget and input, +/// followed by the output gate, so the peephole connection isn't as important to be concerned with. +/// A positive bias, as encouraged in the paper, can be achieved by setting `bias_mode` to +/// CUDNN_RNN_DOUBLE_BIAS, which is the default, or CUDN_RNN_SINGLE_INP_BIAS or +/// CUDNN_RNN_SINGLE_REC_BIAS +/// +/// [1]: arxiv.org/pdf/1503.04069.pdf +/// [2]: jmlr.org/proceedings/papers/v37/jozefowicz15.pdf +pub struct RnnConfig { + rnn_desc: RnnDescriptor, + /// Size of Hidden Layer + pub hidden_size: ::libc::c_int, + num_layers: ::libc::c_int, + /// Length of Sequence + pub sequence_length: ::libc::c_int, + dropout_desc: cudnnDropoutDescriptor_t, + input_mode: cudnnRNNInputMode_t, + direction_mode: cudnnDirectionMode_t, + rnn_mode: cudnnRNNMode_t, + algo: cudnnRNNAlgo_t, + data_type: cudnnDataType_t, + workspace_size: usize, + training_reserve_size: usize, + training_reserve: CudaDeviceMemory, +} + +impl RnnConfig { + /// Initialise a RNN Config + pub fn new( + rnn_desc: RnnDescriptor, + hidden_size: i32, + num_layers: i32, + sequence_length: i32, + dropout_desc: cudnnDropoutDescriptor_t, + input_mode: cudnnRNNInputMode_t, + direction_mode: cudnnDirectionMode_t, + rnn_mode: cudnnRNNMode_t, + // Requires v6 + algo: cudnnRNNAlgo_t, + data_type: cudnnDataType_t, + workspace_size: usize, + training_reserve_size: usize, + training_reserve: CudaDeviceMemory + ) -> RnnConfig { + RnnConfig { + rnn_desc, + hidden_size, + num_layers, + sequence_length, + dropout_desc, + input_mode, + direction_mode, + rnn_mode, + algo, + data_type, + workspace_size, + training_reserve_size, + training_reserve + } + } + + /// Workspace Size required for RNN Operations + pub fn rnn_workspace_size(&self) -> usize { + self.workspace_size + } + /// Largest Workspace Size for RNN + pub fn largest_workspace_size(&self) -> usize { + self.rnn_workspace_size() + } + /// Training Reserve Size for RNN + pub fn training_reserve_size(&self) -> usize { self.training_reserve_size } + /// Training Reserve Space on GPU for RNN + pub fn training_reserve(&self) -> &CudaDeviceMemory { + &self.training_reserve + } + + /// Accessor function for Rnn Descriptor + pub fn rnn_desc(&self) -> &RnnDescriptor { + &self.rnn_desc + } + + /// Accessor function for Sequence Length + pub fn sequence_length(&self) -> &i32 { + &self.sequence_length + } +} + #[allow(missing_debug_implementations, missing_copy_implementations)] /// Provides a convenient interface for cuDNN's scaling parameters `alpha` and `beta`. /// -/// Scaling paramarters lend the source value with prior value in the destination +/// Scaling parameters lend the source value with prior value in the destination /// tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. When beta[0] is /// zero, the output is not read and can contain any uninitialized data (including NaN). The /// storage data type for alpha[0], beta[0] is float for HALF and SINGLE tensors, and double