@@ -24,13 +24,13 @@ int gpu_aware_alltoall(alltoall_ftn f,
2424
2525 char * cpu_sendbuf ;
2626 char * cpu_recvbuf ;
27- cudaMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
28- cudaMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
27+ gpuMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
28+ gpuMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
2929
3030 int ierr = f (sendbuf , sendcount , sendtype , recvbuf , recvcount , recvtype , comm );
3131
32- cudaFreeHost (cpu_sendbuf );
33- cudaFreeHost (cpu_recvbuf );
32+ gpuFreeHost (cpu_sendbuf );
33+ gpuFreeHost (cpu_recvbuf );
3434
3535 return ierr ;
3636}
@@ -94,8 +94,8 @@ int copy_to_cpu_alltoall(alltoall_ftn f,
9494
9595 char * cpu_sendbuf ;
9696 char * cpu_recvbuf ;
97- cudaMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
98- cudaMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
97+ gpuMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
98+ gpuMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
9999
100100 // Copy from GPU to CPU
101101 ierr += gpuMemcpy (cpu_sendbuf , sendbuf , total_bytes_s , gpuMemcpyDeviceToHost );
@@ -106,8 +106,8 @@ int copy_to_cpu_alltoall(alltoall_ftn f,
106106 // Copy from CPU to GPU
107107 ierr += gpuMemcpy (recvbuf , cpu_recvbuf , total_bytes_r , gpuMemcpyHostToDevice );
108108
109- cudaFreeHost (cpu_sendbuf );
110- cudaFreeHost (cpu_recvbuf );
109+ gpuFreeHost (cpu_sendbuf );
110+ gpuFreeHost (cpu_recvbuf );
111111
112112 return ierr ;
113113}
@@ -174,8 +174,8 @@ int threaded_alltoall_pairwise(const void* sendbuf,
174174
175175 char * cpu_sendbuf ;
176176 char * cpu_recvbuf ;
177- cudaMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
178- cudaMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
177+ gpuMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
178+ gpuMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
179179
180180 // Copy from GPU to CPU
181181 ierr += gpuMemcpy (cpu_sendbuf , sendbuf , total_bytes_s , gpuMemcpyDeviceToHost );
@@ -236,8 +236,8 @@ int threaded_alltoall_pairwise(const void* sendbuf,
236236
237237 ierr += gpuMemcpy (recvbuf , cpu_recvbuf , total_bytes_r , gpuMemcpyHostToDevice );
238238
239- cudaFreeHost (cpu_sendbuf );
240- cudaFreeHost (cpu_recvbuf );
239+ gpuFreeHost (cpu_sendbuf );
240+ gpuFreeHost (cpu_recvbuf );
241241
242242 return ierr ;
243243}
@@ -263,8 +263,8 @@ int threaded_alltoall_nonblocking(const void* sendbuf,
263263
264264 char * cpu_sendbuf ;
265265 char * cpu_recvbuf ;
266- cudaMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
267- cudaMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
266+ gpuMallocHost ((void * * )& cpu_sendbuf , total_bytes_s );
267+ gpuMallocHost ((void * * )& cpu_recvbuf , total_bytes_r );
268268
269269 int ierr = 0 ;
270270 ierr += gpuMemcpy (cpu_sendbuf , sendbuf , total_bytes_s , gpuMemcpyDeviceToHost );
@@ -330,8 +330,8 @@ int threaded_alltoall_nonblocking(const void* sendbuf,
330330}
331331
332332 ierr += gpuMemcpy (recvbuf , cpu_recvbuf , total_bytes_r , gpuMemcpyHostToDevice );
333- cudaFreeHost (cpu_sendbuf );
334- cudaFreeHost (cpu_recvbuf );
333+ gpuFreeHost (cpu_sendbuf );
334+ gpuFreeHost (cpu_recvbuf );
335335
336336 return ierr ;
337337}
0 commit comments