-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnine_nine_game.cu
57 lines (45 loc) · 1.33 KB
/
nine_nine_game.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
multiplication table using CUDA
refer : http://blog.daum.net/heoly/7 (Thank you)
*/
#include <stdio.h>
#include <malloc.h>
#include <cuda_runtime.h>
#define BLOCK_SIZE 8
#define THREAD_SIZE 9
// Device code
__global__ void test(int *result)
{
int tidx, bidx;
tidx = threadIdx.x; //x-coordinate of thread
bidx = blockIdx.x; //x-coordinate of block
result[THREAD_SIZE * bidx + tidx] = (bidx + 2) * (tidx + 1);
}
// Host code
int main()
{
int *host_Result; //Save result data of host
int *device_Result; //Save result data of device
int i=0, j=0;
//Allocate host memory
host_Result = (int *)malloc( BLOCK_SIZE * THREAD_SIZE * sizeof(int) );
//Allocate device memory
cudaMalloc( (void**) &device_Result, sizeof(int) * BLOCK_SIZE * THREAD_SIZE);
//Function name <<BLOCK_SIZE, THREAD_SIZE>>> parameters
test <<<BLOCK_SIZE, THREAD_SIZE>>>(device_Result); //Execute Device code
//Copy device result to host result
cudaMemcpy( host_Result, device_Result, sizeof(int) * BLOCK_SIZE * THREAD_SIZE, cudaMemcpyDeviceToHost );
//Print result
for(j=0; j<BLOCK_SIZE; j++)
{
printf("%3d step\n", (j + 2));
for(i=0; i<THREAD_SIZE; i++)
{
printf("%3d X %3d = %3d\n", j+2, i+1, host_Result[j * THREAD_SIZE + i]);
}
printf("\n");
}
free(host_Result); //Free host memory
cudaFree(device_Result); //Free device memory
return 1;
}