From c858d6cb53cbe33e562568e449b4ba753b5fae6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9D=D0=B8=D0=BD=D0=B0=20=D0=A7=D0=B5=D0=BA=D0=B0=D0=BB?= =?UTF-8?q?=D0=B8=D0=BD=D0=B0?= Date: Tue, 7 Jan 2025 10:37:37 +0300 Subject: [PATCH] Add files: bitonic.cl, main_bitonic.cpp --- src/cl/bitonic.cl | 14 +++++++++++++- src/main_bitonic.cpp | 14 ++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/cl/bitonic.cl b/src/cl/bitonic.cl index ecf336b9..8cefbd59 100644 --- a/src/cl/bitonic.cl +++ b/src/cl/bitonic.cl @@ -1,4 +1,16 @@ -__kernel void bitonic() +__kernel void bitonic(__global int *data, unsigned int step_size, unsigned int sub_step_size) { + unsigned int thread_id = get_global_id(0); + unsigned int segment_index = thread_id / step_size; + bool sort_direction = segment_index % 2 == 0; + unsigned int local_idx = thread_id / sub_step_size * (sub_step_size * 2) + (thread_id % sub_step_size); + unsigned int pair_idx = local_idx + sub_step_size; + if (sort_direction && data[local_idx] > data[pair_idx] || + !sort_direction && data[local_idx] < data[pair_idx] + ) { + int temp = data[local_idx]; + data[local_idx] = data[pair_idx]; + data[pair_idx] = temp; + } } diff --git a/src/main_bitonic.cpp b/src/main_bitonic.cpp index 9b508a6c..0513ee7e 100644 --- a/src/main_bitonic.cpp +++ b/src/main_bitonic.cpp @@ -68,16 +68,22 @@ int main(int argc, char **argv) { ocl::Kernel bitonic(bitonic_kernel, bitonic_kernel_length, "bitonic"); bitonic.compile(); + unsigned int workGroupSize = 64; + unsigned int globalWorkSize = (n / 2 + workGroupSize - 1) / workGroupSize * workGroupSize; timer t; for (int iter = 0; iter < benchmarkingIters; ++iter) { as_gpu.writeN(as.data(), n); t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфер данных - - /*TODO*/ - + for (unsigned int blockHalfSize = 1; blockHalfSize <= n / 2; blockHalfSize *= 2) { + for (unsigned int subBlockHalfSize = blockHalfSize; subBlockHalfSize >= 1; subBlockHalfSize /= 2) { + bitonic.exec( + gpu::WorkSize(workGroupSize, globalWorkSize), + as_gpu, blockHalfSize, subBlockHalfSize + ); + } + } t.nextLap(); } - std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl; std::cout << "GPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl; }