Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging dev to main #137

Merged
merged 10 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ project(tiny_bvh LANGUAGES CXX)

if (APPLE)
find_library(COCOA_LIBRARY Cocoa)
find_library(OPENCL_LIBRARY OpenCL)
if (OPENCL_LIBRARY)
set(OPENCL_FRAMEWORK "-framework OpenCL")
endif()
elseif (UNIX AND NOT EMSCRIPTEN)
find_package(X11)
elseif (EMSCRIPTEN)
Expand Down Expand Up @@ -118,6 +122,8 @@ if (NOT MSVC)
set(tiny_bvh_speedtest_link_flags ${tiny_bvh_speedtest_link_flags} -sPROXY_TO_PTHREAD=1)
endif()
endif()
elseif (APPLE)
target_link_libraries(tiny_bvh_speedtest ${COCOA_LIBRARY} ${OPENCL_FRAMEWORK})
endif()
target_compile_options(tiny_bvh_speedtest PRIVATE ${tiny_bvh_speedtest_cxx_flags})
target_link_options(tiny_bvh_speedtest PRIVATE ${tiny_bvh_speedtest_link_flags})
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ The **performance measurement tool** can be compiled with:

````g++ -std=c++20 -mavx2 -mfma -Ofast tiny_bvh_speedtest.cpp -o tiny_bvh_speedtest````

# Version 1.4.1
# Version 1.4.2

Version 1.4.0 introduces a new BVH layout for fast single-ray traversal on CPU: BVH8_CPU. This supersedes the previous fastest scheme, BVH4_CPU.

Expand Down Expand Up @@ -125,6 +125,7 @@ This version of the library includes the following functionality:
* Fast AVX2 ray tracing: Implements the 2017 paper by [Fuetterling et al.](https://web.cs.ucdavis.edu/~hamann/FuetterlingLojewskiPfreundtHamannEbertHPG2017PaperFinal06222017.pdf)
* Fast triangle intersection: Implements the 2016 paper by [Baldwin & Weber](https://jcgt.org/published/0005/03/03/paper.pdf)
* OpenCL traversal example code: Aila & Laine, 4-way quantized, CWBVH
* OpenCL support for MacOS, by [wuyakuma](https://github.com/wuyakuma)
* Support for WASM / EMSCRIPTEN, g++, clang, Visual Studio
* Optional user-defined memory allocation, by [Thierry Cantenot](https://github.com/tcantenot)
* Vertex array can now have a custom stride, by [David Peicho](https://github.com/DavidPeicho)
Expand All @@ -142,6 +143,9 @@ Plans, ordered by priority:
* Speed improvements:
* Faster optimizer for AVX-capable CPUs
* Improve speed of SBVH builder
* Features & outstanding issues:
* 'Watertight' triangle intersection option
* Load/save/Optimize/Refit for BVH8_CPU
* Demo of tinybvh on GPU using other apis:
* Ray tracing in pure OpenGL
* Ray tracing in pure DirectX
Expand Down
59 changes: 31 additions & 28 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
The MIT License (MIT)

Copyright (c) 2024, Jacco Bikker / Breda University of Applied Sciences.
Copyright (c) 2024-2025, Jacco Bikker / Breda University of Applied Sciences.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -170,7 +170,7 @@ THE SOFTWARE.
// library version
#define TINY_BVH_VERSION_MAJOR 1
#define TINY_BVH_VERSION_MINOR 4
#define TINY_BVH_VERSION_SUB 1
#define TINY_BVH_VERSION_SUB 2

// ============================================================================
//
Expand Down Expand Up @@ -351,7 +351,7 @@ inline bvhvec4 tinybvh_max( const bvhvec4& a, const bvhvec4& b ) { return bvhvec
inline float tinybvh_clamp( const float x, const float a, const float b ) { return x > a ? (x < b ? x : b) : a; /* NaN safe */ }
inline int32_t tinybvh_clamp( const int32_t x, const int32_t a, const int32_t b ) { return x > a ? (x < b ? x : b) : a; /* NaN safe */ }
template <class T> inline static void tinybvh_swap( T& a, T& b ) { T t = a; a = b; b = t; }
inline float tinybvh_halfAreaf(const bvhvec3& v) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations
inline float tinybvh_half_area( const bvhvec3& v ) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations

// Operator overloads.
// Only a minimal set is provided.
Expand Down Expand Up @@ -483,7 +483,7 @@ inline bvhdbl3 tinybvh_cross( const bvhdbl3& a, const bvhdbl3& b )
}
inline double tinybvh_dot( const bvhdbl3& a, const bvhdbl3& b ) { return a.x * b.x + a.y * b.y + a.z * b.z; }

inline double tinybvh_halfAread(const bvhdbl3& v) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations
inline double tinybvh_half_area( const bvhdbl3& v ) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations

#endif // DOUBLE_PRECISION_SUPPORT

Expand Down Expand Up @@ -1142,9 +1142,9 @@ class BVH8_CPU : public BVHBase
struct BVHNodeCompact
{
// Novel 8-way BVH node, with quantized child node bounds, similar to CWBVH.
uint64_t cbminx8; // 8, stores aabbMin.x for 8 children, quantized.
float bminx, bminy, bminz; // 12, actually: bmin - ext.
float bextx, bexty, bextz; // 12, extend of the node, scaled conversatively.
uint64_t cbminx8; // 8, stores aabbMin.x for 8 children, quantized.
__m256i cbminmaxyz8; // 32, stores cbminy8, cbminz8, cbmaxy8, cbmaxz8
__m256i child8, perm8; // 64, includes cbmaxx8<<24 in perm8.
};
Expand Down Expand Up @@ -1831,8 +1831,8 @@ void BVH::Build()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[BVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][BVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][BVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < BVHBINS - 1; i++)
Expand Down Expand Up @@ -1982,7 +1982,7 @@ void BVH::BuildHQ()
uint32_t nextFrag = triCount;
// subdivide recursively
BVHNode& root = bvhNode[0];
const float rootArea = tinybvh_halfAreaf(root.aabbMax - root.aabbMin);
const float rootArea = tinybvh_half_area( root.aabbMax - root.aabbMin );
struct Task { uint32_t node, sliceStart, sliceEnd, dummy; };
ALIGNED( 64 ) Task task[1024];
uint32_t taskCount = 0, nodeIdx = 0, sliceStart = 0, sliceEnd = triCount + slack;
Expand Down Expand Up @@ -2028,8 +2028,8 @@ void BVH::BuildHQ()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[HQBVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][HQBVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][HQBVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < HQBVHBINS - 1; i++)
Expand All @@ -2044,7 +2044,7 @@ void BVH::BuildHQ()
bool spatial = false;
uint32_t NL[HQBVHBINS - 1], NR[HQBVHBINS - 1], budget = sliceEnd - sliceStart, bestNL = 0, bestNR = 0;
bvhvec3 spatialUnion = bestLMax - bestRMin;
float spatialOverlap = (tinybvh_halfAreaf(spatialUnion)) / rootArea;
float spatialOverlap = (tinybvh_half_area( spatialUnion )) / rootArea;
if (budget > node.triCount && splitCost < 1e30f && spatialOverlap > 1e-5f)
{
for (uint32_t a = 0; a < 3; a++) if ((node.aabbMax[a] - node.aabbMin[a]) > minDim[a])
Expand Down Expand Up @@ -2087,8 +2087,8 @@ void BVH::BuildHQ()
lBMin[i] = l1 = tinybvh_min( l1, binaMin[i] ), rBMin[HQBVHBINS - 2 - i] = r1 = tinybvh_min( r1, binaMin[HQBVHBINS - 1 - i] );
lBMax[i] = l2 = tinybvh_max( l2, binaMax[i] ), rBMax[HQBVHBINS - 2 - i] = r2 = tinybvh_max( r2, binaMax[HQBVHBINS - 1 - i] );
lN += countIn[i], rN += countOut[HQBVHBINS - 1 - i], NL[i] = lN, NR[HQBVHBINS - 2 - i] = rN;
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// find best position for spatial split
for (uint32_t i = 0; i < HQBVHBINS - 1; i++)
Expand Down Expand Up @@ -2132,8 +2132,8 @@ void BVH::BuildHQ()
{
bvhvec3 unsplitLMin = tinybvh_min( bestLMin, fragment[fragIdx].bmin );
bvhvec3 unsplitLMax = tinybvh_max( bestLMax, fragment[fragIdx].bmax );
float AL = tinybvh_halfAreaf(unsplitLMax - unsplitLMin);
float AR = tinybvh_halfAreaf(bestRMax - bestRMin);
float AL = tinybvh_half_area( unsplitLMax - unsplitLMin );
float AR = tinybvh_half_area( bestRMax - bestRMin );
float CunsplitLeft = c_trav + c_int * rSAV * (AL * bestNL + AR * (bestNR - 1));
if (CunsplitLeft < splitCost)
{
Expand All @@ -2147,8 +2147,8 @@ void BVH::BuildHQ()
{
const bvhvec3 unsplitRMin = tinybvh_min( bestRMin, fragment[fragIdx].bmin );
const bvhvec3 unsplitRMax = tinybvh_max( bestRMax, fragment[fragIdx].bmax );
const float AL = tinybvh_halfAreaf(bestLMax - bestLMin);
const float AR = tinybvh_halfAreaf(unsplitRMax - unsplitRMin);
const float AL = tinybvh_half_area( bestLMax - bestLMin );
const float AR = tinybvh_half_area( unsplitRMax - unsplitRMin );
const float CunsplitRight = c_trav + c_int * rSAV * (AL * (bestNL - 1) + AR * bestNR);
if (CunsplitRight < splitCost)
{
Expand Down Expand Up @@ -4329,10 +4329,6 @@ void BVH8_CPU::ConvertFrom( const MBVH<8>& original, bool compact )
{
const MBVH<8>::MBVHNode& orig = bvh8.mbvhNode[nodeIdx];
BVHNode& newNode = bvh8Node[newAlt8Ptr++];
if (newAlt8Ptr == 4940)
{
int w= 0;
}
memset( &newNode, 0, sizeof( BVHNode ) );
// calculate the permutation offsets for the node
for (uint32_t q = 0; q < 8; q++)
Expand Down Expand Up @@ -5858,11 +5854,14 @@ int32_t BVH8_CPU::Intersect( Ray& ray ) const
__m256 ox8 = _mm256_set1_ps( ray.O.x ), rdx8 = _mm256_set1_ps( ray.rD.x );
__m256 oy8 = _mm256_set1_ps( ray.O.y ), rdy8 = _mm256_set1_ps( ray.rD.y );
__m256 oz8 = _mm256_set1_ps( ray.O.z ), rdz8 = _mm256_set1_ps( ray.rD.z );
__m256 t8 = _mm256_set1_ps( ray.hit.t ), zero8 = _mm256_setzero_ps();
const __m256i permMask8 = _mm256_set1_epi32( 7 );
const __m256i signShift8 = _mm256_set1_epi32( (ray.D.x > 0 ? 3 : 0) + (ray.D.y > 0 ? 6 : 0) + (ray.D.z > 0 ? 12 : 0) );
__m256 t8 = _mm256_set1_ps( ray.hit.t );
#ifdef BVH8_CPU_COMPACT
const __m256 zero8 = _mm256_setzero_ps();
const __m256i mantissa8 = _mm256_set1_epi32( 255 << 15 );
const __m256i exponent8 = _mm256_set1_epi32( 0x3f800000 );
#endif
const __m256i permMask8 = _mm256_set1_epi32( 7 );
const __m256i signShift8 = _mm256_set1_epi32( (ray.D.x > 0 ? 3 : 0) + (ray.D.y > 0 ? 6 : 0) + (ray.D.z > 0 ? 12 : 0) );
__m128 dx4 = _mm_set1_ps( ray.D.x ), dy4 = _mm_set1_ps( ray.D.y ), dz4 = _mm_set1_ps( ray.D.z );
const __m128 epsNeg4 = _mm_set1_ps( -0.000001f ), eps4 = _mm_set1_ps( 0.000001f ), one4 = _mm_set1_ps( 1.0f );
uint32_t stackPtr = 0, nodeIdx = 0, steps = 0;
Expand Down Expand Up @@ -6022,9 +6021,12 @@ bool BVH8_CPU::IsOccluded( const Ray& ray ) const
__m256 ox8 = _mm256_set1_ps( ray.O.x ), rdx8 = _mm256_set1_ps( ray.rD.x );
__m256 oy8 = _mm256_set1_ps( ray.O.y ), rdy8 = _mm256_set1_ps( ray.rD.y );
__m256 oz8 = _mm256_set1_ps( ray.O.z ), rdz8 = _mm256_set1_ps( ray.rD.z );
const __m256 t8 = _mm256_set1_ps( ray.hit.t ), zero8 = _mm256_setzero_ps();
const __m256 t8 = _mm256_set1_ps( ray.hit.t );
#ifdef BVH8_CPU_COMPACT
const __m256 zero8 = _mm256_setzero_ps();
const __m256i mantissa8 = _mm256_set1_epi32( 255 << 15 );
const __m256i exponent8 = _mm256_set1_epi32( 0x3f800000 );
#endif
__m128 dx4 = _mm_set1_ps( ray.D.x ), dy4 = _mm_set1_ps( ray.D.y ), dz4 = _mm_set1_ps( ray.D.z );
const __m128 epsNeg4 = _mm_set1_ps( -0.000001f ), eps4 = _mm_set1_ps( 0.000001f ), t4 = _mm_set1_ps( ray.hit.t );
const __m128 one4 = _mm_set1_ps( 1.0f ), zero4 = _mm_setzero_ps();
Expand All @@ -6036,9 +6038,10 @@ bool BVH8_CPU::IsOccluded( const Ray& ray ) const
#ifdef BVH8_CPU_COMPACT
const BVHNodeCompact& n = bvh8Small[nodeIdx & 0x1fffffff /* bits 0..28 */];
const __m256i c8 = n.child8;
const __m256i perm8 = n.perm8;
const __m256i cbminmax8 = n.cbminmaxyz8;
const __m256i bminx8i = _mm256_or_si256( exponent8, _mm256_slli_epi32( _mm256_cvtepu8_epi32( _mm_cvtsi64_si128( n.cbminx8 ) ), 15 ) );
const __m256i bmaxx8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( n.perm8, 9 ), mantissa8 ) );
const __m256i bmaxx8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( perm8, 9 ), mantissa8 ) );
const __m256i bminy8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( cbminmax8, 9 ), mantissa8 ) );
const __m256i bmaxy8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( cbminmax8, 1 ), mantissa8 ) );
const __m256i bminz8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_slli_epi32( cbminmax8, 7 ), mantissa8 ) );
Expand Down Expand Up @@ -7097,8 +7100,8 @@ void BVH_Double::Build()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[BVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][BVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][BVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_DBL_FAR : (tinybvh_halfAread(l2 - l1) * (double)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_DBL_FAR : (tinybvh_halfAread(r2 - r1) * (double)rN);
ANL[i] = lN == 0 ? BVH_DBL_FAR : (tinybvh_half_area( l2 - l1 ) * (double)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_DBL_FAR : (tinybvh_half_area( r2 - r1 ) * (double)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < BVHBINS - 1; i++)
Expand Down
28 changes: 17 additions & 11 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#define BUILD_REFERENCE
#define BUILD_DOUBLE
#define BUILD_AVX
#define BUILD_NEON
// #define BUILD_NEON
#define BUILD_SBVH
#define REFIT_BVH2
#define REFIT_MBVH4
Expand All @@ -25,7 +25,7 @@
#define TRAVERSE_4WAY
#define TRAVERSE_WIVE
#define TRAVERSE_2WAY_DBL
#define TRAVERSE_CWBVH
// #define TRAVERSE_CWBVH
#define TRAVERSE_2WAY_MT
#define TRAVERSE_2WAY_MT_PACKET
#define TRAVERSE_OPTIMIZED_ST
Expand All @@ -48,6 +48,8 @@ using namespace tinybvh;
#endif
#ifdef _WIN32
#include <intrin.h> // for __cpuidex
#elif defined(__APPLE__) && defined(__MACH__)
// Keep ENABLE_OPENCL for APPLE
#elif defined ENABLE_OPENCL
#undef ENABLE_OPENCL
#endif
Expand Down Expand Up @@ -208,9 +210,9 @@ float TestShadowRays( uint32_t layout, unsigned N, unsigned passes )
#endif
case _GPU2: for (unsigned i = 0; i < N; i++) occluded += bvh_gpu->IsOccluded( batch[i] ); break;
case _CPU4: for (unsigned i = 0; i < N; i++) occluded += bvh4_cpu->IsOccluded( batch[i] ); break;
#ifdef BVH_USEAVX2
#ifdef BVH_USEAVX2
case _CPU8: for (unsigned i = 0; i < N; i++) occluded += bvh8_cpu->IsOccluded( batch[i] ); break;
#endif
#endif
default: break;
}
}
Expand All @@ -221,7 +223,7 @@ float TestShadowRays( uint32_t layout, unsigned N, unsigned passes )
if (abs( (int)occluded - (int)refOccluded[0] ) > 500) // allow some slack, we're using various tri intersectors
{
fprintf( stderr, "\nValidation for shadow rays failed (%i != %i).\n", (int)occluded, (int)refOccluded[0] );
exit( 1 );
// exit( 1 ); // don't terminate, just warn.
}
return t.elapsed() / passes;
}
Expand Down Expand Up @@ -733,6 +735,7 @@ int main()
#endif

#ifdef TRAVERSE_CWBVH
#ifdef BVH_USEAVX

// CWBVH - Not efficient on CPU.
if (!cwbvh)
Expand All @@ -746,6 +749,7 @@ int main()
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s)\n", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );

#endif
#endif

#if defined TRAVERSE_OPTIMIZED_ST || defined TRAVERSE_4WAY_OPTIMIZED

Expand All @@ -766,21 +770,23 @@ int main()
#endif

#ifdef TRAVERSE_OPTIMIZED_ST
#ifdef BVH_USEAVX

// ALT_SOA
delete bvh_soa;
// Building a BVH_SoA over an optimized BVH: Careful, do not delete the
// passed BVH; we use some of its data in the BVH_SoA.
bvh_soa = new BVH_SoA();
bvh_soa->ConvertFrom( *bvh );
printf( "- ALT_SOA - primary: " );
printf( "- BVH_SOA - primary: " );
traceTime = TestPrimaryRays( _SOA, Nsmall, 3 );
ValidateTraceResult( refDist, Nsmall, __LINE__ );
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s), ", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );
traceTime = TestShadowRays( _SOA, Nsmall, 3 );
printf( "shadow: %5.1fms (%7.2fMRays/s)\n", traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );

#endif
#endif

#ifdef TRAVERSE_4WAY_OPTIMIZED

Expand All @@ -793,7 +799,7 @@ int main()
bvh4_cpu = new BVH4_CPU();
bvh4->ConvertFrom( *bvh );
bvh4_cpu->ConvertFrom( *bvh4 );
printf( "- BVH4_AFRA - primary: " );
printf( "- BVH4_CPU - primary: " );
traceTime = TestPrimaryRays( _CPU4, Nsmall, 3 );
ValidateTraceResult( refDist, Nsmall, __LINE__ );
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s), ", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );
Expand Down Expand Up @@ -821,7 +827,7 @@ int main()
#ifdef GPU_2WAY

// trace the rays on GPU using OpenCL
printf( "- AILA_LAINE - primary: " );
printf( "- BVH_GPU - primary: " );
if (!bvh_gpu)
{
bvh_gpu = new BVH_GPU();
Expand Down Expand Up @@ -917,7 +923,7 @@ int main()
#ifdef GPU_CWBVH

// trace the rays on GPU using OpenCL
printf( "- BVH8/CWBVH - primary: " );
printf( "- BVH8_CWBVH - primary: " );
if (!cwbvh)
{
cwbvh = new BVH8_CWBVH();
Expand Down Expand Up @@ -974,7 +980,7 @@ int main()
#ifdef TRAVERSE_2WAY_MT

// using OpenMP and batches of 10,000 rays
printf( "- WALD_32BYTE - primary: " );
printf( "- BVH (plain) - primary: " );
for (int pass = 0; pass < 4; pass++)
{
if (pass == 1) t.reset(); // first pass is cache warming
Expand Down Expand Up @@ -1082,4 +1088,4 @@ int main()

printf( "all done." );
return 0;
}
}
Loading