Skip to content

Commit 2d5fb4f

Browse files
authored
Changed cudaExecutor to be const& (#1104)
1 parent 02c4f47 commit 2d5fb4f

File tree

5 files changed

+26
-26
lines changed

5 files changed

+26
-26
lines changed

docs_input/api/logic/truth/allclose.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ from allclose is an ``int`` value since boolean reductions are not available in
88

99

1010
.. doxygenfunction:: allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, const HostExecutor<MODE> &exec)
11-
.. doxygenfunction:: allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, cudaExecutor exec = 0)
11+
.. doxygenfunction:: allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, const cudaExecutor &exec)
1212

1313
Examples
1414
~~~~~~~~

include/matx/transforms/chol/chol_cuda.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct DnCholCUDAParams_t {
5858
size_t batch_size;
5959
cublasFillMode_t uplo;
6060
MatXDataType_t dtype;
61-
cudaExecutor exec;
61+
cudaStream_t stream;
6262
};
6363

6464
template <typename OutputTensor, typename ATensor>
@@ -128,7 +128,7 @@ class matxDnCholCUDAPlan_t : matxDnCUDASolver_t {
128128
params.n = a.Size(RANK - 1);
129129
params.A = a.Data();
130130
params.uplo = uplo;
131-
params.exec = exec;
131+
params.stream = exec.getStream();
132132
params.dtype = TypeToInt<T1>();
133133

134134
return params;
@@ -208,7 +208,7 @@ struct DnCholCUDAParamsKeyHash {
208208
{
209209
return (std::hash<uint64_t>()(k.n)) +
210210
(std::hash<uint64_t>()(k.batch_size)) +
211-
(std::hash<uint64_t>()((uint64_t)(k.exec.getStream())));
211+
(std::hash<uint64_t>()((uint64_t)(k.stream)));
212212
}
213213
};
214214

@@ -223,7 +223,7 @@ struct DnCholCUDAParamsKeyEq {
223223
return l.n == t.n &&
224224
l.batch_size == t.batch_size &&
225225
l.dtype == t.dtype &&
226-
l.exec.getStream() == t.exec.getStream();
226+
l.stream == t.stream;
227227
}
228228
};
229229

include/matx/transforms/cub.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,7 +1523,7 @@ using cub_cache_t = std::unordered_map<CubParams_t, std::any, CubParamsKeyHash,
15231523
template <typename OutputTensor, typename InputOperator>
15241524
void sort_impl_inner(OutputTensor &a_out, const InputOperator &a,
15251525
const SortDirection_t dir,
1526-
cudaExecutor exec = 0)
1526+
const cudaExecutor &exec)
15271527
{
15281528
#ifdef __CUDACC__
15291529
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
@@ -1569,7 +1569,7 @@ template <typename OutputIndexTensor, typename InputIndexTensor, typename Output
15691569
void sort_pairs_impl_inner(OutputIndexTensor &idx_out, const InputIndexTensor &idx_in,
15701570
OutputKeyTensor &a_out, const InputKeyTensor &a_in,
15711571
const SortDirection_t dir,
1572-
[[maybe_unused]] cudaExecutor exec = 0)
1572+
const cudaExecutor &exec)
15731573
{
15741574
#ifdef __CUDACC__
15751575
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
@@ -2074,7 +2074,7 @@ void cub_dualargreduce(OutputTensor &a1_out,
20742074
template <typename OutputTensor, typename InputOperator>
20752075
void sort_impl(OutputTensor &a_out, const InputOperator &a,
20762076
const SortDirection_t dir,
2077-
cudaExecutor exec = 0)
2077+
const cudaExecutor &exec)
20782078
{
20792079
#ifdef __CUDACC__
20802080
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
@@ -2139,7 +2139,7 @@ void sort_impl(OutputTensor &a_out, const InputOperator &a,
21392139
template <typename OutputTensor, typename InputOperator>
21402140
void argsort_impl(OutputTensor &idx_out, const InputOperator &a,
21412141
const SortDirection_t dir,
2142-
cudaExecutor exec = 0)
2142+
const cudaExecutor &exec)
21432143
{
21442144
#ifdef __CUDACC__
21452145
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
@@ -2294,7 +2294,7 @@ void sort_impl(OutputTensor &a_out, const InputOperator &a,
22942294
*/
22952295
template <typename OutputTensor, typename InputOperator>
22962296
void cumsum_impl(OutputTensor &a_out, const InputOperator &a,
2297-
cudaExecutor exec = 0)
2297+
const cudaExecutor &exec)
22982298
{
22992299
#ifdef __CUDACC__
23002300
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
@@ -2531,7 +2531,7 @@ struct GTE
25312531
* CUDA executor or stream
25322532
*/
25332533
template <typename SelectType, typename CountTensor, typename OutputTensor, typename InputOperator>
2534-
void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, SelectType sel, cudaExecutor exec = 0)
2534+
void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, SelectType sel, const cudaExecutor &exec)
25352535
{
25362536
#ifdef __CUDACC__
25372537
static_assert(CountTensor::Rank() == 0, "Num found output tensor rank must be 0");
@@ -2652,7 +2652,7 @@ void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator
26522652
* CUDA executor stream
26532653
*/
26542654
template <typename SelectType, typename CountTensor, typename OutputTensor, typename InputOperator>
2655-
void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, SelectType sel, cudaExecutor exec = 0)
2655+
void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, SelectType sel, const cudaExecutor &exec)
26562656
{
26572657
#ifdef __CUDACC__
26582658
static_assert(CountTensor::Rank() == 0, "Num found output tensor rank must be 0");
@@ -2767,7 +2767,7 @@ void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOpera
27672767
* CUDA executor
27682768
*/
27692769
template <typename CountTensor, typename OutputTensor, typename InputOperator>
2770-
void unique_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, cudaExecutor exec = 0)
2770+
void unique_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator &a, const cudaExecutor &exec)
27712771
{
27722772
#ifdef __CUDACC__
27732773
static_assert(CountTensor::Rank() == 0, "Num found output tensor rank must be 0");

include/matx/transforms/reduce.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ void __MATX_INLINE__ reduce(OutType dest, const InType &in, ReduceOp op,
263263
*/
264264
template <typename OutType, typename InType>
265265
void __MATX_INLINE__ mean_impl(OutType dest, const InType &in,
266-
cudaExecutor exec = 0)
266+
const cudaExecutor &exec)
267267
{
268268
#ifdef __CUDACC__
269269
MATX_NVTX_START("mean_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -468,7 +468,7 @@ void __MATX_INLINE__ softmax_impl(OutType dest, const InType &in, PermDims dims,
468468
*/
469469
template <typename OutType, typename InType>
470470
void __MATX_INLINE__ median_impl(OutType dest,
471-
const InType &in, cudaExecutor exec = 0)
471+
const InType &in, const cudaExecutor &exec)
472472
{
473473
#ifdef __CUDACC__
474474
if constexpr ( OutType::Rank() <= 1 && InType::Rank() <=2 ) {
@@ -640,7 +640,7 @@ void __MATX_INLINE__ median_impl(OutType dest, const InType &in, [[maybe_unused]
640640
* CUDA executor
641641
*/
642642
template <typename OutType, typename InType>
643-
void __MATX_INLINE__ sum_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
643+
void __MATX_INLINE__ sum_impl(OutType dest, const InType &in, const cudaExecutor &exec)
644644
{
645645
#ifdef __CUDACC__
646646
MATX_NVTX_START("sum_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -706,7 +706,7 @@ void __MATX_INLINE__ sum_impl(OutType dest, const InType &in, [[maybe_unused]] c
706706
* CUDA executor
707707
*/
708708
template <typename OutType, typename InType>
709-
void __MATX_INLINE__ prod_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
709+
void __MATX_INLINE__ prod_impl(OutType dest, const InType &in, const cudaExecutor &exec)
710710
{
711711
#ifdef __CUDACC__
712712
MATX_NVTX_START("prod_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -778,7 +778,7 @@ void __MATX_INLINE__ prod_impl(OutType dest, const InType &in, [[maybe_unused]]
778778
* CUDA executor or stream ID
779779
*/
780780
template <typename OutType, typename InType>
781-
void __MATX_INLINE__ max_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
781+
void __MATX_INLINE__ max_impl(OutType dest, const InType &in, const cudaExecutor &exec)
782782
{
783783
#ifdef __CUDACC__
784784
MATX_NVTX_START("max_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -848,7 +848,7 @@ void __MATX_INLINE__ max_impl(OutType dest, const InType &in, [[maybe_unused]] c
848848
* CUDA executor or stream ID
849849
*/
850850
template <typename OutType, typename TensorIndexType, typename InType>
851-
void __MATX_INLINE__ argmax_impl(OutType dest, TensorIndexType &idest, const InType &in, cudaExecutor exec = 0)
851+
void __MATX_INLINE__ argmax_impl(OutType dest, TensorIndexType &idest, const InType &in, const cudaExecutor &exec)
852852
{
853853
#ifdef __CUDACC__
854854
MATX_NVTX_START("argmax_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -926,7 +926,7 @@ void __MATX_INLINE__ argmax_impl(OutType dest, TensorIndexType &idest, const InT
926926
* CUDA executor or stream ID
927927
*/
928928
template <typename OutType, typename InType>
929-
void __MATX_INLINE__ min_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
929+
void __MATX_INLINE__ min_impl(OutType dest, const InType &in, const cudaExecutor &exec)
930930
{
931931
#ifdef __CUDACC__
932932
MATX_NVTX_START("min_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -995,7 +995,7 @@ void __MATX_INLINE__ min_impl(OutType dest, const InType &in, [[maybe_unused]] c
995995
* CUDA executor or stream ID
996996
*/
997997
template <typename OutType, typename TensorIndexType, typename InType>
998-
void __MATX_INLINE__ argmin_impl(OutType dest, TensorIndexType &idest, const InType &in, cudaExecutor exec = 0)
998+
void __MATX_INLINE__ argmin_impl(OutType dest, TensorIndexType &idest, const InType &in, const cudaExecutor &exec)
999999
{
10001000
static_assert(OutType::Rank() == TensorIndexType::Rank());
10011001
#ifdef __CUDACC__
@@ -1082,7 +1082,7 @@ void __MATX_INLINE__ argmin_impl(OutType dest, TensorIndexType &idest, const InT
10821082
* CUDA executor or stream ID
10831083
*/
10841084
template <typename OutType, typename TensorIndexType, typename InType>
1085-
void __MATX_INLINE__ argminmax_impl(OutType destmin, TensorIndexType &idestmin, OutType destmax, TensorIndexType &idestmax, const InType &in, cudaExecutor exec = 0)
1085+
void __MATX_INLINE__ argminmax_impl(OutType destmin, TensorIndexType &idestmin, OutType destmax, TensorIndexType &idestmax, const InType &in, const cudaExecutor &exec)
10861086
{
10871087
static_assert(OutType::Rank() == TensorIndexType::Rank());
10881088
#ifdef __CUDACC__
@@ -1162,7 +1162,7 @@ void __MATX_INLINE__ argminmax_impl(OutType destmin, TensorIndexType &idestmin,
11621162
* CUDA executor or stream ID
11631163
*/
11641164
template <typename OutType, typename InType>
1165-
void __MATX_INLINE__ any_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
1165+
void __MATX_INLINE__ any_impl(OutType dest, const InType &in, const cudaExecutor &exec)
11661166
{
11671167
#ifdef __CUDACC__
11681168
MATX_NVTX_START("any_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -1235,7 +1235,7 @@ void __MATX_INLINE__ any_impl(OutType dest, const InType &in, [[maybe_unused]] c
12351235
* CUDA executor or stream ID
12361236
*/
12371237
template <typename OutType, typename InType>
1238-
void __MATX_INLINE__ all_impl(OutType dest, const InType &in, cudaExecutor exec = 0)
1238+
void __MATX_INLINE__ all_impl(OutType dest, const InType &in, const cudaExecutor &exec)
12391239
{
12401240
#ifdef __CUDACC__
12411241
MATX_NVTX_START("all_impl(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
@@ -1313,7 +1313,7 @@ void __MATX_INLINE__ all_impl(OutType dest, const InType &in, [[maybe_unused]] c
13131313
* CUDA executor or stream ID
13141314
*/
13151315
template <typename OutType, typename InType1, typename InType2>
1316-
void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, cudaExecutor exec = 0)
1316+
void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, const cudaExecutor &exec)
13171317
{
13181318
#ifdef __CUDACC__
13191319
MATX_NVTX_START("allclose(" + get_type_str(in1) + ", " + get_type_str(in2) + ")", matx::MATX_NVTX_LOG_API)

include/matx/transforms/transpose.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ namespace matx
6060
*/
6161
template <typename OutputTensor, typename InputTensor>
6262
__MATX_INLINE__ void transpose_matrix_impl([[maybe_unused]] OutputTensor &out,
63-
const InputTensor &in, cudaExecutor exec)
63+
const InputTensor &in, const cudaExecutor &exec)
6464
{
6565
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
6666

0 commit comments

Comments
 (0)