@@ -263,7 +263,7 @@ void __MATX_INLINE__ reduce(OutType dest, const InType &in, ReduceOp op,
263263 */
264264template <typename OutType, typename InType>
265265void __MATX_INLINE__ mean_impl (OutType dest, const InType &in,
266- cudaExecutor exec = 0 )
266+ const cudaExecutor & exec)
267267{
268268#ifdef __CUDACC__
269269 MATX_NVTX_START (" mean_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -468,7 +468,7 @@ void __MATX_INLINE__ softmax_impl(OutType dest, const InType &in, PermDims dims,
468468 */
469469template <typename OutType, typename InType>
470470void __MATX_INLINE__ median_impl (OutType dest,
471- const InType &in, cudaExecutor exec = 0 )
471+ const InType &in, const cudaExecutor & exec)
472472{
473473#ifdef __CUDACC__
474474 if constexpr ( OutType::Rank () <= 1 && InType::Rank () <=2 ) {
@@ -640,7 +640,7 @@ void __MATX_INLINE__ median_impl(OutType dest, const InType &in, [[maybe_unused]
640640 * CUDA executor
641641 */
642642template <typename OutType, typename InType>
643- void __MATX_INLINE__ sum_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
643+ void __MATX_INLINE__ sum_impl (OutType dest, const InType &in, const cudaExecutor & exec)
644644{
645645#ifdef __CUDACC__
646646 MATX_NVTX_START (" sum_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -706,7 +706,7 @@ void __MATX_INLINE__ sum_impl(OutType dest, const InType &in, [[maybe_unused]] c
706706 * CUDA executor
707707 */
708708template <typename OutType, typename InType>
709- void __MATX_INLINE__ prod_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
709+ void __MATX_INLINE__ prod_impl (OutType dest, const InType &in, const cudaExecutor & exec)
710710{
711711#ifdef __CUDACC__
712712 MATX_NVTX_START (" prod_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -778,7 +778,7 @@ void __MATX_INLINE__ prod_impl(OutType dest, const InType &in, [[maybe_unused]]
778778 * CUDA executor or stream ID
779779 */
780780template <typename OutType, typename InType>
781- void __MATX_INLINE__ max_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
781+ void __MATX_INLINE__ max_impl (OutType dest, const InType &in, const cudaExecutor & exec)
782782{
783783#ifdef __CUDACC__
784784 MATX_NVTX_START (" max_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -848,7 +848,7 @@ void __MATX_INLINE__ max_impl(OutType dest, const InType &in, [[maybe_unused]] c
848848 * CUDA executor or stream ID
849849 */
850850template <typename OutType, typename TensorIndexType, typename InType>
851- void __MATX_INLINE__ argmax_impl (OutType dest, TensorIndexType &idest, const InType &in, cudaExecutor exec = 0 )
851+ void __MATX_INLINE__ argmax_impl (OutType dest, TensorIndexType &idest, const InType &in, const cudaExecutor & exec)
852852{
853853#ifdef __CUDACC__
854854 MATX_NVTX_START (" argmax_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -926,7 +926,7 @@ void __MATX_INLINE__ argmax_impl(OutType dest, TensorIndexType &idest, const InT
926926 * CUDA executor or stream ID
927927 */
928928template <typename OutType, typename InType>
929- void __MATX_INLINE__ min_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
929+ void __MATX_INLINE__ min_impl (OutType dest, const InType &in, const cudaExecutor & exec)
930930{
931931#ifdef __CUDACC__
932932 MATX_NVTX_START (" min_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -995,7 +995,7 @@ void __MATX_INLINE__ min_impl(OutType dest, const InType &in, [[maybe_unused]] c
995995 * CUDA executor or stream ID
996996 */
997997template <typename OutType, typename TensorIndexType, typename InType>
998- void __MATX_INLINE__ argmin_impl (OutType dest, TensorIndexType &idest, const InType &in, cudaExecutor exec = 0 )
998+ void __MATX_INLINE__ argmin_impl (OutType dest, TensorIndexType &idest, const InType &in, const cudaExecutor & exec)
999999{
10001000 static_assert (OutType::Rank () == TensorIndexType::Rank ());
10011001#ifdef __CUDACC__
@@ -1082,7 +1082,7 @@ void __MATX_INLINE__ argmin_impl(OutType dest, TensorIndexType &idest, const InT
10821082 * CUDA executor or stream ID
10831083 */
10841084template <typename OutType, typename TensorIndexType, typename InType>
1085- void __MATX_INLINE__ argminmax_impl (OutType destmin, TensorIndexType &idestmin, OutType destmax, TensorIndexType &idestmax, const InType &in, cudaExecutor exec = 0 )
1085+ void __MATX_INLINE__ argminmax_impl (OutType destmin, TensorIndexType &idestmin, OutType destmax, TensorIndexType &idestmax, const InType &in, const cudaExecutor & exec)
10861086{
10871087 static_assert (OutType::Rank () == TensorIndexType::Rank ());
10881088#ifdef __CUDACC__
@@ -1162,7 +1162,7 @@ void __MATX_INLINE__ argminmax_impl(OutType destmin, TensorIndexType &idestmin,
11621162 * CUDA executor or stream ID
11631163 */
11641164template <typename OutType, typename InType>
1165- void __MATX_INLINE__ any_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
1165+ void __MATX_INLINE__ any_impl (OutType dest, const InType &in, const cudaExecutor & exec)
11661166{
11671167#ifdef __CUDACC__
11681168 MATX_NVTX_START (" any_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -1235,7 +1235,7 @@ void __MATX_INLINE__ any_impl(OutType dest, const InType &in, [[maybe_unused]] c
12351235 * CUDA executor or stream ID
12361236 */
12371237template <typename OutType, typename InType>
1238- void __MATX_INLINE__ all_impl (OutType dest, const InType &in, cudaExecutor exec = 0 )
1238+ void __MATX_INLINE__ all_impl (OutType dest, const InType &in, const cudaExecutor & exec)
12391239{
12401240#ifdef __CUDACC__
12411241 MATX_NVTX_START (" all_impl(" + get_type_str (in) + " )" , matx::MATX_NVTX_LOG_API)
@@ -1313,7 +1313,7 @@ void __MATX_INLINE__ all_impl(OutType dest, const InType &in, [[maybe_unused]] c
13131313 * CUDA executor or stream ID
13141314 */
13151315template <typename OutType, typename InType1, typename InType2>
1316- void __MATX_INLINE__ allclose (OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, cudaExecutor exec = 0 )
1316+ void __MATX_INLINE__ allclose (OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, const cudaExecutor & exec)
13171317{
13181318#ifdef __CUDACC__
13191319 MATX_NVTX_START (" allclose(" + get_type_str (in1) + " , " + get_type_str (in2) + " )" , matx::MATX_NVTX_LOG_API)
0 commit comments