Skip to content

Commit

Permalink
task07 solution
Browse files Browse the repository at this point in the history
  • Loading branch information
koufesser committed Jan 13, 2025
1 parent 286de39 commit aaa577e
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 5 deletions.
20 changes: 19 additions & 1 deletion src/cl/prefix_sum.cl
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
// TODO
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#line 6

__kernel void prefix_sum_binary(__global unsigned int* sum, unsigned int n, unsigned int rate)
{
const int i = (get_global_id(0) + 1) * rate - 1;
if (i < n)
sum[i] += sum[i - rate / 2];
}

__kernel void prefix_sum_second_part(__global unsigned int* sum, unsigned int n, unsigned int rate)
{
const int i = (get_global_id(0) + 1) * rate - 1 + rate / 2;
if (i < n)
sum[i] += sum[i - rate / 2];
}
30 changes: 26 additions & 4 deletions src/main_prefix_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ std::vector<unsigned int> computeCPU(const std::vector<unsigned int> &as)

int main(int argc, char **argv)
{
gpu::Device device = gpu::chooseGPUDevice(argc, argv);

gpu::Context context;
context.init(device.device_id_opencl);
context.activate();

for (unsigned int n = 4096; n <= max_n; n *= 4) {
std::cout << "______________________________________________" << std::endl;
unsigned int values_range = std::min<unsigned int>(1023, std::numeric_limits<int>::max() / n);
Expand Down Expand Up @@ -83,25 +89,41 @@ int main(int argc, char **argv)
#endif

// work-efficient prefix sum
#if 0
{
std::vector<unsigned int> res(n);
ocl::Kernel prefix_sum_binary(prefix_sum_kernel, prefix_sum_kernel_length, "prefix_sum_binary");
ocl::Kernel prefix_sum_second_part(prefix_sum_kernel, prefix_sum_kernel_length, "prefix_sum_second_part");
prefix_sum_binary.compile();
prefix_sum_second_part.compile();

gpu::gpu_mem_32u gpu;
gpu.resizeN(n);

timer t;
for (int iter = 0; iter < benchmarkingIters; ++iter) {
// TODO
gpu.writeN(as.data(), as.size());
t.restart();
// TODO

for (unsigned int rate = 2; rate <= n; rate *= 2) {
prefix_sum_binary.exec(gpu::WorkSize(64, n / rate), gpu, n, rate);

}

for (unsigned int rate = n / 2; rate >= 2; rate /= 2) {
prefix_sum_second_part.exec(gpu::WorkSize(64, (n + rate - 1) / rate), gpu, n, rate);
}

t.nextLap();
}

gpu.readN(res.data(), as.size());

std::cout << "GPU [work-efficient]: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "GPU [work-efficient]: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;

for (int i = 0; i < n; ++i) {
EXPECT_THE_SAME(cpu_reference[i], res[i], "GPU result should be consistent!");
}
}
#endif
}
}

0 comments on commit aaa577e

Please sign in to comment.