Skip to content

Commit

Permalink
CWBVH traversal kernel added to traverse.cl.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 20, 2024
1 parent bbf9a48 commit 6a4fef6
Show file tree
Hide file tree
Showing 3 changed files with 367 additions and 1 deletion.
5 changes: 4 additions & 1 deletion tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

// Nov 20, '24: version 0.9.5 : CWBVH traversal on GPU.
// Nov 19, '24: version 0.9.2 : 4-way traversal on GPU.
// Nov 18, '24: version 0.9.1 : Added custom alloc/free (tcantenot).
// Mov 16, '24: version 0.9.0 : (external) OpenCL in speedtest.
// Nov 15, '24: version 0.8.3 : Incremental update / bugfixes.
Expand Down Expand Up @@ -69,6 +71,7 @@ THE SOFTWARE.
// Aras Pranckevičius: non-Intel architecture support
// Jefferson Amstutz: CMake surpport
// Christian Oliveros: WASM / EMSCRIPTEN support
// Thierry Cantenot: user-defined alloc & free

#ifndef TINY_BVH_H_
#define TINY_BVH_H_
Expand Down Expand Up @@ -96,7 +99,7 @@ THE SOFTWARE.
// library version
#define TINY_BVH_VERSION_MAJOR 0
#define TINY_BVH_VERSION_MINOR 9
#define TINY_BVH_VERSION_SUB 3
#define TINY_BVH_VERSION_SUB 5

// ============================================================================
//
Expand Down
43 changes: 43 additions & 0 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ int main()
// This also triggers OpenCL init and device identification.
tinyocl::Kernel ailalaine_kernel( "traverse.cl", "traverse_ailalaine" );
tinyocl::Kernel gpu4way_kernel( "traverse.cl", "traverse_gpu4way" );
tinyocl::Kernel cwbvh_kernel( "traverse.cl", "traverse_cwbvh" );
printf( "----------------------------------------------------------------\n" );

#endif
Expand Down Expand Up @@ -424,6 +425,48 @@ int main()

#endif

#ifdef GPU_CWBVH

// trace the rays on GPU using OpenCL
printf( "- GPU, coherent, CWBVH layout, " );
bvh.Convert( BVH::WALD_32BYTE, BVH::BASIC_BVH8 );
bvh.Convert( BVH::BASIC_BVH8, BVH::CWBVH );
printf( "ocl: " );
// create OpenCL buffers for the BVH data calculated by tiny_bvh.h
tinyocl::Buffer cwbvhNodes( bvh.usedCWBVHBlocks * sizeof( tinybvh::bvhvec4 ), bvh.bvh8Compact );
tinyocl::Buffer cwbvhTris( bvh.idxCount * 3 * sizeof( tinybvh::bvhvec4 ), bvh.bvh8Tris );
// synchronize the host-side data to the gpu side
cwbvhNodes.CopyToDevice();
cwbvhTris.CopyToDevice();
#if !defined GPU_2WAY && !defined GPU_4WAY // otherwise these already exist.
// create an event to time the OpenCL kernel
cl_event event;
cl_ulong startTime, endTime;
// create rays and send them to the gpu side
tinyocl::Buffer rayData( N * sizeof( tinybvh::Ray ), rays );
rayData.CopyToDevice();
#endif
// start timer and start kernel on gpu
t.reset();
float traceTimeGPU8 = 0;
cwbvh_kernel.SetArguments( &cwbvhNodes, &cwbvhTris, &rayData );
for (int pass = 0; pass < 8; pass++)
{
cwbvh_kernel.Run( N, 64, 0, &event ); // for now, todo.
clWaitForEvents( 1, &event ); // OpenCL kernsl run asynchronously
clGetEventProfilingInfo( event, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &startTime, 0 );
clGetEventProfilingInfo( event, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &endTime, 0 );
traceTimeGPU8 += (endTime - startTime) * 1e-9f; // event timing is in nanoseconds
}
// get results from GPU - this also syncs the queue.
rayData.CopyFromDevice();
// report on timing
traceTimeGPU8 /= 8.0f;
mrays = (float)N / traceTimeGPU8;
printf( "%8.1fms for %6.2fM rays => %6.2fMRay/s\n", traceTimeGPU8 * 1000, (float)N * 1e-6f, mrays * 1e-6f );

#endif

#endif

#ifdef TRAVERSE_SOA2WAY_ST
Expand Down
Loading

0 comments on commit 6a4fef6

Please sign in to comment.