Skip to content

Commit

Permalink
Add convolution transpose draft impl. Need add more test cases.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Jan 23, 2024
1 parent 02156f7 commit 8e461ea
Show file tree
Hide file tree
Showing 7 changed files with 967 additions and 674 deletions.
4 changes: 3 additions & 1 deletion lib/nnc/cmd/ccv_nnc_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ enum {
CCV_NNC_COMPRESSION_LSSC_BACKWARD = 0x17ea8f73,
CCV_NNC_CONVOLUTION_FORWARD = 0x254d05f4,
CCV_NNC_CONVOLUTION_BACKWARD = 0x254d05f5,
CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD = 0xd691f78e,
CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD = 0xd691f78f,
CCV_NNC_DATATYPE_CONVERSION_FORWARD = 0xd873e38c,
CCV_NNC_DATATYPE_CONVERSION_BACKWARD = 0xd873e38d,
CCV_NNC_DATA_TRANSFER_FORWARD = 0x12d21e1a,
Expand Down Expand Up @@ -142,6 +144,6 @@ enum {
CCV_NNC_TRANSPOSE_BACKWARD = 0xb4d506e1,
CCV_NNC_UPSAMPLE_FORWARD = 0x73875556,
CCV_NNC_UPSAMPLE_BACKWARD = 0x73875557,
CCV_NNC_COUNT = 137,
CCV_NNC_COUNT = 139,
};
/** @} */
1,354 changes: 682 additions & 672 deletions lib/nnc/cmd/ccv_nnc_cmd.inc

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions lib/nnc/cmd/ccv_nnc_cmd_easy.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
#define CMD_CONVOLUTION_FORWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0)
// CCV_NNC_CONVOLUTION_BACKWARD
#define CMD_CONVOLUTION_BACKWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0)
// CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD
#define CMD_CONVOLUTION_TRANSPOSE_FORWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0)
// CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD
#define CMD_CONVOLUTION_TRANSPOSE_BACKWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0)
// CCV_NNC_DROPOUT_FORWARD
#define CMD_DROPOUT_FORWARD_X_F(...) ("This should not be used, you should have either 1 parameter or 2 parameters for CMD_DROPOUT_FORWARD")
#define CMD_DROPOUT_FORWARD_X_1(_p) ccv_nnc_cmd(CCV_NNC_DROPOUT_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.dropout={.p=_p,.entirety=0}}), 0)
Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/cmd/config.mk
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CMD_SRCS := ./adam/ccv_nnc_adam_cpu_ref.c ./adam/ccv_nnc_adamw_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_opt.c ./blas/ccv_nnc_add_cpu_ref.c ./blas/ccv_nnc_mul_cpu_ref.c ./blas/ccv_nnc_cmul_cpu_ref.c ./compare/ccv_nnc_min_cpu_ref.c ./compare/ccv_nnc_max_cpu_ref.c ./compression/ccv_nnc_lssc_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_opt.c ./dropout/ccv_nnc_dropout_cpu_ref.c ./ew/ccv_nnc_ew_cpu_ref.c ./gelu/ccv_nnc_gelu_cpu_ref.c ./histogram/ccv_nnc_histogram_cpu_ref.c ./index/ccv_nnc_index_select_cpu_ref.c ./isnan/ccv_nnc_reduce_isnan_cpu_ref.c ./lamb/ccv_nnc_lamb_cpu_ref.c ./leaky_relu/ccv_nnc_leaky_relu_cpu_ref.c ./loss/ccv_nnc_binary_crossentropy_cpu_ref.c ./loss/ccv_nnc_categorical_crossentropy_cpu_ref.c ./loss/ccv_nnc_mse_cpu_ref.c ./loss/ccv_nnc_smooth_l1_cpu_ref.c ./nms/ccv_nnc_nms_cpu_ref.c ./norm/ccv_nnc_batch_norm_cpu_ref.c ./norm/ccv_nnc_layer_norm_cpu_ref.c ./norm/ccv_nnc_group_norm_cpu_ref.c ./norm/ccv_nnc_rmsnorm_cpu_ref.c ./pool/ccv_nnc_max_pool_cpu_ref.c ./pool/ccv_nnc_avg_pool_cpu_ref.c ./rand/ccv_nnc_rand_uniform_cpu_ref.c ./rand/ccv_nnc_rand_normal_cpu_ref.c ./reduce/ccv_nnc_reduce_sum_cpu_ref.c ./reduce/ccv_nnc_reduce_mean_cpu_ref.c ./reduce/ccv_nnc_reduce_max_cpu_ref.c ./reduce/ccv_nnc_reduce_min_cpu_ref.c ./reduce/ccv_nnc_reduce_norm2_cpu_ref.c ./reduce/ccv_nnc_argmax_cpu_ref.c ./reduce/ccv_nnc_argmin_cpu_ref.c ./relu/ccv_nnc_relu_cpu_ref.c ./rmsprop/ccv_nnc_rmsprop_cpu_ref.c ./roi/ccv_nnc_roi_align_cpu_ref.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c ./sgd/ccv_nnc_sgd_cpu_ref.c ./sigmoid/ccv_nnc_sigmoid_cpu_ref.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c ./softmax/ccv_nnc_softmax_cpu_ref.c ./softmax_loss/ccv_nnc_softmax_crossentropy_cpu_ref.c ./swish/ccv_nnc_swish_cpu_ref.c ./tanh/ccv_nnc_tanh_cpu_ref.c ./upsample/ccv_nnc_upsample_cpu_ref.c ./util/ccv_nnc_util_cpu_ref.c ./adam/ccv_nnc_adam.c ./blas/ccv_nnc_blas.c ./blas/cpu_opt/_ccv_nnc_gemm_cpu_opt.c ./blas/cpu_sys/_ccv_nnc_gemm_cpu_sys.c ./comm/ccv_nnc_comm.c ./compare/ccv_nnc_cmp.c ./compression/ccv_nnc_compression.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_fft.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_gemm.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_convolution.c ./dropout/ccv_nnc_dropout.c ./ew/ccv_nnc_ew.c ./gelu/ccv_nnc_gelu.c ./histogram/ccv_nnc_histogram.c ./index/ccv_nnc_index_select.c ./isnan/ccv_nnc_reduce_isnan.c ./lamb/ccv_nnc_lamb.c ./leaky_relu/ccv_nnc_leaky_relu.c ./loss/ccv_nnc_binary_crossentropy.c ./loss/ccv_nnc_categorical_crossentropy.c ./loss/ccv_nnc_mse.c ./loss/ccv_nnc_smooth_l1.c ./nms/ccv_nnc_nms.c ./norm/ccv_nnc_norm.c ./pool/ccv_nnc_pool.c ./rand/ccv_nnc_rand.c ./reduce/ccv_nnc_reduce.c ./relu/ccv_nnc_relu.c ./rmsprop/ccv_nnc_rmsprop.c ./rnn/ccv_nnc_lstm.c ./roi/ccv_nnc_roi_align.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention.c ./sgd/ccv_nnc_sgd.c ./sigmoid/ccv_nnc_sigmoid.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy.c ./softmax/ccv_nnc_softmax.c ./softmax_loss/ccv_nnc_softmax_crossentropy.c ./swish/ccv_nnc_swish.c ./tanh/ccv_nnc_tanh.c ./upsample/ccv_nnc_upsample.c ./util/ccv_nnc_util.c
CMD_SRCS := ./adam/ccv_nnc_adam_cpu_ref.c ./adam/ccv_nnc_adamw_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_ref.c ./blas/ccv_nnc_gemm_cpu_opt.c ./blas/ccv_nnc_add_cpu_ref.c ./blas/ccv_nnc_mul_cpu_ref.c ./blas/ccv_nnc_cmul_cpu_ref.c ./compare/ccv_nnc_min_cpu_ref.c ./compare/ccv_nnc_max_cpu_ref.c ./compression/ccv_nnc_lssc_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_ref.c ./convolution/ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_conv_transpose_cpu_ref.c ./dropout/ccv_nnc_dropout_cpu_ref.c ./ew/ccv_nnc_ew_cpu_ref.c ./gelu/ccv_nnc_gelu_cpu_ref.c ./histogram/ccv_nnc_histogram_cpu_ref.c ./index/ccv_nnc_index_select_cpu_ref.c ./isnan/ccv_nnc_reduce_isnan_cpu_ref.c ./lamb/ccv_nnc_lamb_cpu_ref.c ./leaky_relu/ccv_nnc_leaky_relu_cpu_ref.c ./loss/ccv_nnc_binary_crossentropy_cpu_ref.c ./loss/ccv_nnc_categorical_crossentropy_cpu_ref.c ./loss/ccv_nnc_mse_cpu_ref.c ./loss/ccv_nnc_smooth_l1_cpu_ref.c ./nms/ccv_nnc_nms_cpu_ref.c ./norm/ccv_nnc_batch_norm_cpu_ref.c ./norm/ccv_nnc_layer_norm_cpu_ref.c ./norm/ccv_nnc_group_norm_cpu_ref.c ./norm/ccv_nnc_rmsnorm_cpu_ref.c ./pool/ccv_nnc_max_pool_cpu_ref.c ./pool/ccv_nnc_avg_pool_cpu_ref.c ./rand/ccv_nnc_rand_uniform_cpu_ref.c ./rand/ccv_nnc_rand_normal_cpu_ref.c ./reduce/ccv_nnc_reduce_sum_cpu_ref.c ./reduce/ccv_nnc_reduce_mean_cpu_ref.c ./reduce/ccv_nnc_reduce_max_cpu_ref.c ./reduce/ccv_nnc_reduce_min_cpu_ref.c ./reduce/ccv_nnc_reduce_norm2_cpu_ref.c ./reduce/ccv_nnc_argmax_cpu_ref.c ./reduce/ccv_nnc_argmin_cpu_ref.c ./relu/ccv_nnc_relu_cpu_ref.c ./rmsprop/ccv_nnc_rmsprop_cpu_ref.c ./roi/ccv_nnc_roi_align_cpu_ref.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c ./sgd/ccv_nnc_sgd_cpu_ref.c ./sigmoid/ccv_nnc_sigmoid_cpu_ref.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c ./softmax/ccv_nnc_softmax_cpu_ref.c ./softmax_loss/ccv_nnc_softmax_crossentropy_cpu_ref.c ./swish/ccv_nnc_swish_cpu_ref.c ./tanh/ccv_nnc_tanh_cpu_ref.c ./upsample/ccv_nnc_upsample_cpu_ref.c ./util/ccv_nnc_util_cpu_ref.c ./adam/ccv_nnc_adam.c ./blas/ccv_nnc_blas.c ./blas/cpu_opt/_ccv_nnc_gemm_cpu_opt.c ./blas/cpu_sys/_ccv_nnc_gemm_cpu_sys.c ./comm/ccv_nnc_comm.c ./compare/ccv_nnc_cmp.c ./compression/ccv_nnc_compression.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_fft.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_gemm.c ./convolution/cpu_opt/_ccv_nnc_conv_cpu_opt.c ./convolution/ccv_nnc_convolution.c ./dropout/ccv_nnc_dropout.c ./ew/ccv_nnc_ew.c ./gelu/ccv_nnc_gelu.c ./histogram/ccv_nnc_histogram.c ./index/ccv_nnc_index_select.c ./isnan/ccv_nnc_reduce_isnan.c ./lamb/ccv_nnc_lamb.c ./leaky_relu/ccv_nnc_leaky_relu.c ./loss/ccv_nnc_binary_crossentropy.c ./loss/ccv_nnc_categorical_crossentropy.c ./loss/ccv_nnc_mse.c ./loss/ccv_nnc_smooth_l1.c ./nms/ccv_nnc_nms.c ./norm/ccv_nnc_norm.c ./pool/ccv_nnc_pool.c ./rand/ccv_nnc_rand.c ./reduce/ccv_nnc_reduce.c ./relu/ccv_nnc_relu.c ./rmsprop/ccv_nnc_rmsprop.c ./rnn/ccv_nnc_lstm.c ./roi/ccv_nnc_roi_align.c ./scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention.c ./sgd/ccv_nnc_sgd.c ./sigmoid/ccv_nnc_sigmoid.c ./sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy.c ./softmax/ccv_nnc_softmax.c ./softmax_loss/ccv_nnc_softmax_crossentropy.c ./swish/ccv_nnc_swish.c ./tanh/ccv_nnc_tanh.c ./upsample/ccv_nnc_upsample.c ./util/ccv_nnc_util.c
CUDA_CMD_SRCS := ./adam/gpu/ccv_nnc_adam_gpu_ref.cu ./adam/gpu/ccv_nnc_adamw_gpu_ref.cu ./blas/gpu/ccv_nnc_gemm_gpu_cublas.cu ./blas/gpu/ccv_nnc_add_gpu_cudnn.cu ./blas/gpu/ccv_nnc_mul_gpu_cudnn.cu ./blas/gpu/ccv_nnc_cmul_gpu_ref.cu ./comm/gpu/ccv_nnc_comm_gpu_nccl.cu ./compare/gpu/ccv_nnc_min_gpu_ref.cu ./compare/gpu/ccv_nnc_max_gpu_ref.cu ./compression/gpu/ccv_nnc_lssc_gpu_ref.cu ./convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu ./dropout/gpu/ccv_nnc_dropout_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_cudnn.cu ./ew/gpu/ccv_nnc_ew_gpu_ref.cu ./gelu/gpu/ccv_nnc_gelu_gpu_ref.cu ./index/gpu/ccv_nnc_index_select_gpu_ref.cu ./isnan/gpu/ccv_nnc_reduce_isnan_gpu_cudnn.cu ./lamb/gpu/ccv_nnc_lamb_gpu_ref.cu ./leaky_relu/gpu/ccv_nnc_leaky_relu_gpu_ref.cu ./loss/gpu/ccv_nnc_binary_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu ./loss/gpu/ccv_nnc_mse_gpu_ref.cu ./loss/gpu/ccv_nnc_smooth_l1_gpu_ref.cu ./nms/gpu/ccv_nnc_nms_gpu_ref.cu ./norm/gpu/ccv_nnc_batch_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_layer_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_group_norm_gpu_cudnn.cu ./norm/gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu ./pool/gpu/ccv_nnc_max_pool_gpu_cudnn.cu ./pool/gpu/ccv_nnc_avg_pool_gpu_cudnn.cu ./rand/gpu/ccv_nnc_rand_uniform_gpu_ref.cu ./rand/gpu/ccv_nnc_rand_normal_gpu_ref.cu ./reduce/gpu/ccv_nnc_reduce_sum_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_mean_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_reduce_norm2_gpu_cudnn.cu ./reduce/gpu/ccv_nnc_argmax_gpu_ref.cu ./reduce/gpu/ccv_nnc_argmin_gpu_ref.cu ./relu/gpu/ccv_nnc_relu_gpu_cudnn.cu ./rmsprop/gpu/ccv_nnc_rmsprop_gpu_ref.cu ./rnn/gpu/ccv_nnc_lstm_gpu_cudnn.cu ./roi/gpu/ccv_nnc_roi_align_gpu_ref.cu ./scaled_dot_product_attention/gpu/ccv_nnc_scaled_dot_product_attention_flash_attn.cu ./sgd/gpu/ccv_nnc_sgd_gpu_ref.cu ./sigmoid/gpu/ccv_nnc_sigmoid_gpu_cudnn.cu ./sigmoid_loss/gpu/ccv_nnc_sigmoid_binary_crossentropy_gpu_ref.cu ./softmax/gpu/ccv_nnc_softmax_gpu_cudnn.cu ./softmax_loss/gpu/ccv_nnc_softmax_crossentropy_gpu_cudnn.cu ./swish/gpu/ccv_nnc_swish_gpu_ref.cu ./tanh/gpu/ccv_nnc_tanh_gpu_cudnn.cu ./upsample/gpu/ccv_nnc_upsample_gpu_ref.cu ./util/gpu/ccv_nnc_util_gpu_cudnn.cu ./util/gpu/ccv_nnc_util_gpu_ref.cu
MPS_CMD_SRCS := ./adam/mps/ccv_nnc_adam_mps.m ./adam/mps/ccv_nnc_adamw_mps.m ./blas/mps/ccv_nnc_gemm_mps.m ./blas/mps/ccv_nnc_add_mps.m ./blas/mps/ccv_nnc_mul_mps.m ./blas/mps/ccv_nnc_cmul_mps.m ./convolution/mps/ccv_nnc_conv_mps.m ./ew/mps/ccv_nnc_ew_mps.m ./gelu/mps/ccv_nnc_gelu_mps.m ./index/mps/ccv_nnc_index_select_mps.m ./isnan/mps/ccv_nnc_reduce_isnan_mps.m ./leaky_relu/mps/ccv_nnc_leaky_relu_mps.m ./loss/mps/ccv_nnc_mse_mps.m ./norm/mps/ccv_nnc_layer_norm_mps.m ./norm/mps/ccv_nnc_group_norm_mps.m ./norm/mps/ccv_nnc_rmsnorm_mps.m ./pool/mps/ccv_nnc_max_pool_mps.m ./pool/mps/ccv_nnc_avg_pool_mps.m ./rand/mps/ccv_nnc_rand_uniform_mps.m ./rand/mps/ccv_nnc_rand_normal_mps.m ./reduce/mps/ccv_nnc_reduce_sum_mps.m ./reduce/mps/ccv_nnc_reduce_mean_mps.m ./reduce/mps/ccv_nnc_reduce_max_mps.m ./reduce/mps/ccv_nnc_reduce_min_mps.m ./reduce/mps/ccv_nnc_argmax_mps.m ./reduce/mps/ccv_nnc_argmin_mps.m ./relu/mps/ccv_nnc_relu_mps.m ./scaled_dot_product_attention/mps/ccv_nnc_scaled_dot_product_attention_mps.m ./sigmoid/mps/ccv_nnc_sigmoid_mps.m ./softmax/mps/ccv_nnc_softmax_mps.m ./swish/mps/ccv_nnc_swish_mps.m ./upsample/mps/ccv_nnc_upsample_mps.m ./util/mps/ccv_nnc_util_mps.m
Loading

0 comments on commit 8e461ea

Please sign in to comment.