Skip to content

Commit

Permalink
Add and validated test cases for CUDA ConvTranspose support.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Jan 23, 2024
1 parent 7fcd66f commit cd44618
Show file tree
Hide file tree
Showing 6 changed files with 379 additions and 4 deletions.
2 changes: 1 addition & 1 deletion lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ typedef struct {
struct {
int count; /**< [convolution_transpose.count] The number of filters for convolutional layer. */
int groups; /**< [convolution_transpose.groups] The number of groups for convolutional layer. */
int output_padding; /**< [convolution_transpose.output_padding] The output padding to resolve ambiguity when treat this as inverse of convolution. */
int dilation[CCV_NNC_MAX_DIM_ALLOC]; /**< [convolution_transpose.dilation[]] The dilation factor for convolutional layer. Default to 1. */
int output_padding; /**< [convolution_transpose.output_padding] The output padding to resolve ambiguity when treat this as inverse of convolution. */
} convolution_transpose;
struct {
int hidden_size; /**< [rnn.hidden_size] The number of features in the hidden state h. */
Expand Down
4 changes: 4 additions & 0 deletions lib/nnc/cmd/ccv_nnc_cmd.inc
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@ void _register_command_CCV_NNC_COMPRESSION_LSSC_FORWARD_backend_CCV_NNC_BACKEND_
void _register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
void _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(ccv_nnc_cmd_backend_registry_t* const registry);
Expand Down Expand Up @@ -950,6 +952,8 @@ static inline void _ccv_nnc_cmd_init(void)
_register_command_CCV_NNC_COMPRESSION_LSSC_BACKWARD_backend_CCV_NNC_BACKEND_GPU_REF(&(init_map[13].backends[5]));
_register_command_CCV_NNC_CONVOLUTION_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[116].backends[3]));
_register_command_CCV_NNC_CONVOLUTION_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[117].backends[3]));
_register_command_CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[48].backends[3]));
_register_command_CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[49].backends[3]));
_register_command_CCV_NNC_DROPOUT_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[4].backends[3]));
_register_command_CCV_NNC_DROPOUT_BACKWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[5].backends[3]));
_register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_GPU_CUDNN(&(init_map[98].backends[3]));
Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/cmd/config.mk
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CMD_SRCS := ./adam/ccv_nnc_adam_cpu_ref.c ./adam/ccv_nnc_adamw_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_opt.c ./blas/ccv_nnc_add_cpu_ref.c ./blas/ccv_nnc_mul_cpu_ref.c ./blas/ccv_nnc_cmul_cpu_ref.c ./compare/ccv_nnc_min_cpu_ref.c ./compare/ccv_nnc_max_cpu_ref.c ./compression/ccv_nnc_lssc_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_conv_transpose_cpu_ref.c ./dropout/ccv_nnc_dropout_cpu_ref.c ./ew/ccv_nnc_ew_cpu_ref.c ./gelu/ccv_nnc_gelu_cpu_ref.c ./histogram/ccv_nnc_histogram_cpu_ref.c ./index/ccv_nnc_index_select_cpu_ref.c ./isnan/ccv_nnc_reduce_isnan_cpu_ref.c ./lamb/ccv_nnc_lamb_cpu_ref.c ./leaky_relu/ccv_nnc_leaky_relu_cpu_ref.c ./loss/ccv_nnc_binary_crossentropy_cpu_ref.c ./loss/ccv_nnc_categorical_crossentropy_cpu_ref.c ./loss/ccv_nnc_mse_cpu_ref.c ./loss/ccv_nnc_smooth_l1_cpu_ref.c ./nms/ccv_nnc_nms_cpu_ref.c ./norm/ccv_nnc_batch_norm_cpu_ref.c ./norm/ccv_nnc_layer_norm_cpu_ref.c ./norm/ccv_nnc_group_norm_cpu_ref.c ./norm/ccv_nnc_rmsnorm_cpu_ref.c ./pool/ccv_nnc_max_pool_cpu_ref.c ./pool/ccv_nnc_avg_pool_cpu_ref.c ./rand/ccv_nnc_rand_uniform_cpu_ref.c ./rand/ccv_nnc_rand_normal_cpu_ref.c ./reduce/ccv_nnc_reduce_sum_cpu_ref.c ./reduce/ccv_nnc_reduce_mean_cpu_ref.c ./reduce/ccv_nnc_reduce_max_cpu_ref.c ./reduce/ccv_nnc_reduce_min_cpu_ref.c ./reduce/ccv_nnc_reduce_norm2_cpu_ref.c ./reduce/ccv_nnc_argmax_cpu_ref.c ./reduce/ccv_nnc_argmin_cpu_ref.c ./relu/ccv_nnc_relu_cpu_ref.c ./rmsprop/ccv_nnc_rmsprop_cpu_ref.c ./roi/ccv_nnc_roi_align_cpu_ref.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c ./sgd/ccv_nnc_sgd_cpu_ref.c ./sigmoid/ccv_nnc_sigmoid_cpu_ref.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c ./softmax/ccv_nnc_softmax_cpu_ref.c ./softmax_loss/ccv_nnc_softmax_crossentropy_cpu_ref.c ./swish/ccv_nnc_swish_cpu_ref.c ./tanh/ccv_nnc_tanh_cpu_ref.c ./upsample/ccv_nnc_upsample_cpu_ref.c ./util/ccv_nnc_util_cpu_ref.c ./adam/ccv_nnc_adam.c ./blas/ccv_nnc_blas.c ./blas/cpu_opt/_ccv_nnc_gemm_cpu_opt.c ./blas/cpu_sys/_ccv_nnc_gemm_cpu_sys.c ./comm/ccv_nnc_comm.c ./compare/ccv_nnc_cmp.c ./compression/ccv_nnc_compression.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_fft.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_gemm.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_convolution.c ./dropout/ccv_nnc_dropout.c ./ew/ccv_nnc_ew.c ./gelu/ccv_nnc_gelu.c ./histogram/ccv_nnc_histogram.c ./index/ccv_nnc_index_select.c ./isnan/ccv_nnc_reduce_isnan.c ./lamb/ccv_nnc_lamb.c ./leaky_relu/ccv_nnc_leaky_relu.c ./loss/ccv_nnc_binary_crossentropy.c ./loss/ccv_nnc_categorical_crossentropy.c ./loss/ccv_nnc_mse.c ./loss/ccv_nnc_smooth_l1.c ./nms/ccv_nnc_nms.c ./norm/ccv_nnc_norm.c ./pool/ccv_nnc_pool.c ./rand/ccv_nnc_rand.c ./reduce/ccv_nnc_reduce.c ./relu/ccv_nnc_relu.c ./rmsprop/ccv_nnc_rmsprop.c ./rnn/ccv_nnc_lstm.c ./roi/ccv_nnc_roi_align.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention.c ./sgd/ccv_nnc_sgd.c ./sigmoid/ccv_nnc_sigmoid.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy.c ./softmax/ccv_nnc_softmax.c ./softmax_loss/ccv_nnc_softmax_crossentropy.c ./swish/ccv_nnc_swish.c ./tanh/ccv_nnc_tanh.c ./upsample/ccv_nnc_upsample.c ./util/ccv_nnc_util.c
CUDA_CMD_SRCS := ./adam/gpu/ccv_nnc_adam_gpu_ref.cu ./adam/gpu/ccv_nnc_adamw_gpu_ref.cu ./blas/gpu/ccv_nnc_gemm_gpu_cublas.cu ./blas/gpu/ccv_nnc_add_gpu_cudnn.cu ./blas/gpu/ccv_nnc_mul_gpu_cudnn.cu ./blas/gpu/ccv_nnc_cmul_gpu_ref.cu ./comm/gpu/ccv_nnc_comm_gpu_nccl.cu ./compare/gpu/ccv_nnc_min_gpu_ref.cu ./compare/gpu/ccv_nnc_max_gpu_ref.cu ./compression/gpu/ccv_nnc_lssc_gpu_ref.cu ./convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu ./dropout/gpu/ccv_nnc_dropout_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_ref.cu ./gelu/gpu/ccv_nnc_gelu_gpu_ref.cu ./index/gpu/ccv_nnc_index_select_gpu_ref.cu ./isnan/gpu/ccv_nnc_reduce_isnan_gpu_cudnn.cu ./lamb/gpu/ccv_nnc_lamb_gpu_ref.cu ./leaky_relu/gpu/ccv_nnc_leaky_relu_gpu_ref.cu ./loss/gpu/ccv_nnc_binary_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_mse_gpu_ref.cu ./loss/gpu/ccv_nnc_smooth_l1_gpu_ref.cu ./nms/gpu/ccv_nnc_nms_gpu_ref.cu ./norm/gpu/ccv_nnc_batch_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_layer_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_group_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu ./pool/gpu/ccv_nnc_max_pool_gpu_cudnn.cu ./pool/gpu/ccv_nnc_avg_pool_gpu_cudnn.cu ./rand/gpu/ccv_nnc_rand_uniform_gpu_ref.cu ./rand/gpu/ccv_nnc_rand_normal_gpu_ref.cu ./reduce/gpu/ccv_nnc_reduce_sum_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_mean_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_norm2_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_argmax_gpu_ref.cu ./reduce/gpu/ccv_nnc_argmin_gpu_ref.cu ./relu/gpu/ccv_nnc_relu_gpu_cudnn.cu ./rmsprop/gpu/ccv_nnc_rmsprop_gpu_ref.cu ./rnn/gpu/ccv_nnc_lstm_gpu_cudnn.cu ./roi/gpu/ccv_nnc_roi_align_gpu_ref.cu ./scaled_dot_product_attention/gpu/ccv_nnc_scaled_dot_product_attention_flash_attn.cu ./sgd/gpu/ccv_nnc_sgd_gpu_ref.cu ./sigmoid/gpu/ccv_nnc_sigmoid_gpu_cudnn.cu ./sigmoid_loss/gpu/ccv_nnc_sigmoid_binary_crossentropy_gpu_ref.cu ./softmax/gpu/ccv_nnc_softmax_gpu_cudnn.cu ./softmax_loss/gpu/ccv_nnc_softmax_crossentropy_gpu_cudnn.cu ./swish/gpu/ccv_nnc_swish_gpu_ref.cu ./tanh/gpu/ccv_nnc_tanh_gpu_cudnn.cu ./upsample/gpu/ccv_nnc_upsample_gpu_ref.cu ./util/gpu/ccv_nnc_util_gpu_cudnn.cu ./util/gpu/ccv_nnc_util_gpu_ref.cu
CUDA_CMD_SRCS := ./adam/gpu/ccv_nnc_adam_gpu_ref.cu ./adam/gpu/ccv_nnc_adamw_gpu_ref.cu ./blas/gpu/ccv_nnc_gemm_gpu_cublas.cu ./blas/gpu/ccv_nnc_add_gpu_cudnn.cu ./blas/gpu/ccv_nnc_mul_gpu_cudnn.cu ./blas/gpu/ccv_nnc_cmul_gpu_ref.cu ./comm/gpu/ccv_nnc_comm_gpu_nccl.cu ./compare/gpu/ccv_nnc_min_gpu_ref.cu ./compare/gpu/ccv_nnc_max_gpu_ref.cu ./compression/gpu/ccv_nnc_lssc_gpu_ref.cu ./convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu ./convolution/gpu/ccv_nnc_conv_transpose_gpu_cudnn.cu ./dropout/gpu/ccv_nnc_dropout_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_ref.cu ./gelu/gpu/ccv_nnc_gelu_gpu_ref.cu ./index/gpu/ccv_nnc_index_select_gpu_ref.cu ./isnan/gpu/ccv_nnc_reduce_isnan_gpu_cudnn.cu ./lamb/gpu/ccv_nnc_lamb_gpu_ref.cu ./leaky_relu/gpu/ccv_nnc_leaky_relu_gpu_ref.cu ./loss/gpu/ccv_nnc_binary_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_mse_gpu_ref.cu ./loss/gpu/ccv_nnc_smooth_l1_gpu_ref.cu ./nms/gpu/ccv_nnc_nms_gpu_ref.cu ./norm/gpu/ccv_nnc_batch_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_layer_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_group_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu ./pool/gpu/ccv_nnc_max_pool_gpu_cudnn.cu ./pool/gpu/ccv_nnc_avg_pool_gpu_cudnn.cu ./rand/gpu/ccv_nnc_rand_uniform_gpu_ref.cu ./rand/gpu/ccv_nnc_rand_normal_gpu_ref.cu ./reduce/gpu/ccv_nnc_reduce_sum_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_mean_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_norm2_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_argmax_gpu_ref.cu ./reduce/gpu/ccv_nnc_argmin_gpu_ref.cu ./relu/gpu/ccv_nnc_relu_gpu_cudnn.cu ./rmsprop/gpu/ccv_nnc_rmsprop_gpu_ref.cu ./rnn/gpu/ccv_nnc_lstm_gpu_cudnn.cu ./roi/gpu/ccv_nnc_roi_align_gpu_ref.cu ./scaled_dot_product_attention/gpu/ccv_nnc_scaled_dot_product_attention_flash_attn.cu ./sgd/gpu/ccv_nnc_sgd_gpu_ref.cu ./sigmoid/gpu/ccv_nnc_sigmoid_gpu_cudnn.cu ./sigmoid_loss/gpu/ccv_nnc_sigmoid_binary_crossentropy_gpu_ref.cu ./softmax/gpu/ccv_nnc_softmax_gpu_cudnn.cu ./softmax_loss/gpu/ccv_nnc_softmax_crossentropy_gpu_cudnn.cu ./swish/gpu/ccv_nnc_swish_gpu_ref.cu ./tanh/gpu/ccv_nnc_tanh_gpu_cudnn.cu ./upsample/gpu/ccv_nnc_upsample_gpu_ref.cu ./util/gpu/ccv_nnc_util_gpu_cudnn.cu ./util/gpu/ccv_nnc_util_gpu_ref.cu
MPS_CMD_SRCS := ./adam/mps/ccv_nnc_adam_mps.m ./adam/mps/ccv_nnc_adamw_mps.m ./blas/mps/ccv_nnc_gemm_mps.m ./blas/mps/ccv_nnc_add_mps.m ./blas/mps/ccv_nnc_mul_mps.m ./blas/mps/ccv_nnc_cmul_mps.m ./convolution/mps/ccv_nnc_conv_mps.m ./ew/mps/ccv_nnc_ew_mps.m ./gelu/mps/ccv_nnc_gelu_mps.m ./index/mps/ccv_nnc_index_select_mps.m ./isnan/mps/ccv_nnc_reduce_isnan_mps.m ./leaky_relu/mps/ccv_nnc_leaky_relu_mps.m ./loss/mps/ccv_nnc_mse_mps.m ./norm/mps/ccv_nnc_layer_norm_mps.m ./norm/mps/ccv_nnc_group_norm_mps.m ./norm/mps/ccv_nnc_rmsnorm_mps.m ./pool/mps/ccv_nnc_max_pool_mps.m ./pool/mps/ccv_nnc_avg_pool_mps.m ./rand/mps/ccv_nnc_rand_uniform_mps.m ./rand/mps/ccv_nnc_rand_normal_mps.m ./reduce/mps/ccv_nnc_reduce_sum_mps.m ./reduce/mps/ccv_nnc_reduce_mean_mps.m ./reduce/mps/ccv_nnc_reduce_max_mps.m ./reduce/mps/ccv_nnc_reduce_min_mps.m ./reduce/mps/ccv_nnc_argmax_mps.m ./reduce/mps/ccv_nnc_argmin_mps.m ./relu/mps/ccv_nnc_relu_mps.m ./scaled_dot_product_attention/mps/ccv_nnc_scaled_dot_product_attention_mps.m ./sigmoid/mps/ccv_nnc_sigmoid_mps.m ./softmax/mps/ccv_nnc_softmax_mps.m ./swish/mps/ccv_nnc_swish_mps.m ./upsample/mps/ccv_nnc_upsample_mps.m ./util/mps/ccv_nnc_util_mps.m
4 changes: 2 additions & 2 deletions lib/nnc/cmd/convolution/ccv_nnc_convolution.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@ static void _ccv_nnc_conv_transpose_tensor_auto_forw(const ccv_nnc_cmd_param_t c
}

REGISTER_COMMAND(CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c)
FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c, gpu/ccv_nnc_conv_transpose_gpu_cudnn.cu)
{
registry->bitmask = _ccv_nnc_conv_forw_bitmask;
registry->tensor_auto = _ccv_nnc_conv_transpose_tensor_auto_forw;
}

REGISTER_COMMAND(CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c)
FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c, gpu/ccv_nnc_conv_transpose_gpu_cudnn.cu)
{
registry->bitmask = _ccv_nnc_conv_back_bitmask;
registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
Expand Down
Loading

0 comments on commit cd44618

Please sign in to comment.