Skip to content

Commit bcf92c2

Browse files
committed
Replaced cuda with gpu to allow for compiling on HIP
1 parent 09768f2 commit bcf92c2

2 files changed

Lines changed: 28 additions & 28 deletions

File tree

src/heterogeneous/gpu_alltoall.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ int gpu_aware_alltoall(alltoall_ftn f,
2424

2525
char* cpu_sendbuf;
2626
char* cpu_recvbuf;
27-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
28-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
27+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
28+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
2929

3030
int ierr = f(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
3131

32-
cudaFreeHost(cpu_sendbuf);
33-
cudaFreeHost(cpu_recvbuf);
32+
gpuFreeHost(cpu_sendbuf);
33+
gpuFreeHost(cpu_recvbuf);
3434

3535
return ierr;
3636
}
@@ -94,8 +94,8 @@ int copy_to_cpu_alltoall(alltoall_ftn f,
9494

9595
char* cpu_sendbuf;
9696
char* cpu_recvbuf;
97-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
98-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
97+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
98+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
9999

100100
// Copy from GPU to CPU
101101
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -106,8 +106,8 @@ int copy_to_cpu_alltoall(alltoall_ftn f,
106106
// Copy from CPU to GPU
107107
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
108108

109-
cudaFreeHost(cpu_sendbuf);
110-
cudaFreeHost(cpu_recvbuf);
109+
gpuFreeHost(cpu_sendbuf);
110+
gpuFreeHost(cpu_recvbuf);
111111

112112
return ierr;
113113
}
@@ -174,8 +174,8 @@ int threaded_alltoall_pairwise(const void* sendbuf,
174174

175175
char* cpu_sendbuf;
176176
char* cpu_recvbuf;
177-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
178-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
177+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
178+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
179179

180180
// Copy from GPU to CPU
181181
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -236,8 +236,8 @@ int threaded_alltoall_pairwise(const void* sendbuf,
236236

237237
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
238238

239-
cudaFreeHost(cpu_sendbuf);
240-
cudaFreeHost(cpu_recvbuf);
239+
gpuFreeHost(cpu_sendbuf);
240+
gpuFreeHost(cpu_recvbuf);
241241

242242
return ierr;
243243
}
@@ -263,8 +263,8 @@ int threaded_alltoall_nonblocking(const void* sendbuf,
263263

264264
char* cpu_sendbuf;
265265
char* cpu_recvbuf;
266-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
267-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
266+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
267+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
268268

269269
int ierr = 0;
270270
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -330,8 +330,8 @@ int threaded_alltoall_nonblocking(const void* sendbuf,
330330
}
331331

332332
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
333-
cudaFreeHost(cpu_sendbuf);
334-
cudaFreeHost(cpu_recvbuf);
333+
gpuFreeHost(cpu_sendbuf);
334+
gpuFreeHost(cpu_recvbuf);
335335

336336
return ierr;
337337
}

src/heterogeneous/gpu_alltoallv.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ int copy_to_cpu_alltoallv(alltoallv_ftn f,
144144

145145
char* cpu_sendbuf;
146146
char* cpu_recvbuf;
147-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
148-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
147+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
148+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
149149

150150
// Copy from GPU to CPU
151151
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -157,8 +157,8 @@ int copy_to_cpu_alltoallv(alltoallv_ftn f,
157157
// Copy from CPU to GPU
158158
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
159159

160-
cudaFreeHost(cpu_sendbuf);
161-
cudaFreeHost(cpu_recvbuf);
160+
gpuFreeHost(cpu_sendbuf);
161+
gpuFreeHost(cpu_recvbuf);
162162

163163
return ierr;
164164
}
@@ -286,8 +286,8 @@ int threaded_alltoallv_pairwise(const void* sendbuf,
286286

287287
char* cpu_sendbuf;
288288
char* cpu_recvbuf;
289-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
290-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
289+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
290+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
291291

292292
// Copy from GPU to CPU
293293
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -348,8 +348,8 @@ int threaded_alltoallv_pairwise(const void* sendbuf,
348348

349349
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
350350

351-
cudaFreeHost(cpu_sendbuf);
352-
cudaFreeHost(cpu_recvbuf);
351+
gpuFreeHost(cpu_sendbuf);
352+
gpuFreeHost(cpu_recvbuf);
353353

354354
return ierr;
355355
}
@@ -387,8 +387,8 @@ int threaded_alltoallv_nonblocking(const void* sendbuf,
387387

388388
char* cpu_sendbuf;
389389
char* cpu_recvbuf;
390-
cudaMallocHost((void**)&cpu_sendbuf, total_bytes_s);
391-
cudaMallocHost((void**)&cpu_recvbuf, total_bytes_r);
390+
gpuMallocHost((void**)&cpu_sendbuf, total_bytes_s);
391+
gpuMallocHost((void**)&cpu_recvbuf, total_bytes_r);
392392

393393
// Copy from GPU to CPU
394394
ierr += gpuMemcpy(cpu_sendbuf, sendbuf, total_bytes_s, gpuMemcpyDeviceToHost);
@@ -455,8 +455,8 @@ int threaded_alltoallv_nonblocking(const void* sendbuf,
455455

456456
ierr += gpuMemcpy(recvbuf, cpu_recvbuf, total_bytes_r, gpuMemcpyHostToDevice);
457457

458-
cudaFreeHost(cpu_sendbuf);
459-
cudaFreeHost(cpu_recvbuf);
458+
gpuFreeHost(cpu_sendbuf);
459+
gpuFreeHost(cpu_recvbuf);
460460

461461
return ierr;
462462
}

0 commit comments

Comments
 (0)