Skip to content

Commit

Permalink
Merge pull request #137 from jbikker/dev
Browse files Browse the repository at this point in the history
Merging dev to main
  • Loading branch information
jbikker authored Mar 3, 2025
2 parents 11d6df0 + 95fbc2e commit f72924e
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 44 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ project(tiny_bvh LANGUAGES CXX)

if (APPLE)
find_library(COCOA_LIBRARY Cocoa)
find_library(OPENCL_LIBRARY OpenCL)
if (OPENCL_LIBRARY)
set(OPENCL_FRAMEWORK "-framework OpenCL")
endif()
elseif (UNIX AND NOT EMSCRIPTEN)
find_package(X11)
elseif (EMSCRIPTEN)
Expand Down Expand Up @@ -118,6 +122,8 @@ if (NOT MSVC)
set(tiny_bvh_speedtest_link_flags ${tiny_bvh_speedtest_link_flags} -sPROXY_TO_PTHREAD=1)
endif()
endif()
elseif (APPLE)
target_link_libraries(tiny_bvh_speedtest ${COCOA_LIBRARY} ${OPENCL_FRAMEWORK})
endif()
target_compile_options(tiny_bvh_speedtest PRIVATE ${tiny_bvh_speedtest_cxx_flags})
target_link_options(tiny_bvh_speedtest PRIVATE ${tiny_bvh_speedtest_link_flags})
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ The **performance measurement tool** can be compiled with:

````g++ -std=c++20 -mavx2 -mfma -Ofast tiny_bvh_speedtest.cpp -o tiny_bvh_speedtest````

# Version 1.4.1
# Version 1.4.2

Version 1.4.0 introduces a new BVH layout for fast single-ray traversal on CPU: BVH8_CPU. This supersedes the previous fastest scheme, BVH4_CPU.

Expand Down Expand Up @@ -125,6 +125,7 @@ This version of the library includes the following functionality:
* Fast AVX2 ray tracing: Implements the 2017 paper by [Fuetterling et al.](https://web.cs.ucdavis.edu/~hamann/FuetterlingLojewskiPfreundtHamannEbertHPG2017PaperFinal06222017.pdf)
* Fast triangle intersection: Implements the 2016 paper by [Baldwin & Weber](https://jcgt.org/published/0005/03/03/paper.pdf)
* OpenCL traversal example code: Aila & Laine, 4-way quantized, CWBVH
* OpenCL support for MacOS, by [wuyakuma](https://github.com/wuyakuma)
* Support for WASM / EMSCRIPTEN, g++, clang, Visual Studio
* Optional user-defined memory allocation, by [Thierry Cantenot](https://github.com/tcantenot)
* Vertex array can now have a custom stride, by [David Peicho](https://github.com/DavidPeicho)
Expand All @@ -142,6 +143,9 @@ Plans, ordered by priority:
* Speed improvements:
* Faster optimizer for AVX-capable CPUs
* Improve speed of SBVH builder
* Features & outstanding issues:
* 'Watertight' triangle intersection option
* Load/save/Optimize/Refit for BVH8_CPU
* Demo of tinybvh on GPU using other apis:
* Ray tracing in pure OpenGL
* Ray tracing in pure DirectX
Expand Down
59 changes: 31 additions & 28 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
The MIT License (MIT)
Copyright (c) 2024, Jacco Bikker / Breda University of Applied Sciences.
Copyright (c) 2024-2025, Jacco Bikker / Breda University of Applied Sciences.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -170,7 +170,7 @@ THE SOFTWARE.
// library version
#define TINY_BVH_VERSION_MAJOR 1
#define TINY_BVH_VERSION_MINOR 4
#define TINY_BVH_VERSION_SUB 1
#define TINY_BVH_VERSION_SUB 2

// ============================================================================
//
Expand Down Expand Up @@ -351,7 +351,7 @@ inline bvhvec4 tinybvh_max( const bvhvec4& a, const bvhvec4& b ) { return bvhvec
inline float tinybvh_clamp( const float x, const float a, const float b ) { return x > a ? (x < b ? x : b) : a; /* NaN safe */ }
inline int32_t tinybvh_clamp( const int32_t x, const int32_t a, const int32_t b ) { return x > a ? (x < b ? x : b) : a; /* NaN safe */ }
template <class T> inline static void tinybvh_swap( T& a, T& b ) { T t = a; a = b; b = t; }
inline float tinybvh_halfAreaf(const bvhvec3& v) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations
inline float tinybvh_half_area( const bvhvec3& v ) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations

// Operator overloads.
// Only a minimal set is provided.
Expand Down Expand Up @@ -483,7 +483,7 @@ inline bvhdbl3 tinybvh_cross( const bvhdbl3& a, const bvhdbl3& b )
}
inline double tinybvh_dot( const bvhdbl3& a, const bvhdbl3& b ) { return a.x * b.x + a.y * b.y + a.z * b.z; }

inline double tinybvh_halfAread(const bvhdbl3& v) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations
inline double tinybvh_half_area( const bvhdbl3& v ) { return v.x < -BVH_FAR ? 0 : (v.x * v.y + v.y * v.z + v.z * v.x); } // for SAH calculations

#endif // DOUBLE_PRECISION_SUPPORT

Expand Down Expand Up @@ -1142,9 +1142,9 @@ class BVH8_CPU : public BVHBase
struct BVHNodeCompact
{
// Novel 8-way BVH node, with quantized child node bounds, similar to CWBVH.
uint64_t cbminx8; // 8, stores aabbMin.x for 8 children, quantized.
float bminx, bminy, bminz; // 12, actually: bmin - ext.
float bextx, bexty, bextz; // 12, extend of the node, scaled conversatively.
uint64_t cbminx8; // 8, stores aabbMin.x for 8 children, quantized.
__m256i cbminmaxyz8; // 32, stores cbminy8, cbminz8, cbmaxy8, cbmaxz8
__m256i child8, perm8; // 64, includes cbmaxx8<<24 in perm8.
};
Expand Down Expand Up @@ -1831,8 +1831,8 @@ void BVH::Build()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[BVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][BVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][BVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < BVHBINS - 1; i++)
Expand Down Expand Up @@ -1982,7 +1982,7 @@ void BVH::BuildHQ()
uint32_t nextFrag = triCount;
// subdivide recursively
BVHNode& root = bvhNode[0];
const float rootArea = tinybvh_halfAreaf(root.aabbMax - root.aabbMin);
const float rootArea = tinybvh_half_area( root.aabbMax - root.aabbMin );
struct Task { uint32_t node, sliceStart, sliceEnd, dummy; };
ALIGNED( 64 ) Task task[1024];
uint32_t taskCount = 0, nodeIdx = 0, sliceStart = 0, sliceEnd = triCount + slack;
Expand Down Expand Up @@ -2028,8 +2028,8 @@ void BVH::BuildHQ()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[HQBVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][HQBVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][HQBVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < HQBVHBINS - 1; i++)
Expand All @@ -2044,7 +2044,7 @@ void BVH::BuildHQ()
bool spatial = false;
uint32_t NL[HQBVHBINS - 1], NR[HQBVHBINS - 1], budget = sliceEnd - sliceStart, bestNL = 0, bestNR = 0;
bvhvec3 spatialUnion = bestLMax - bestRMin;
float spatialOverlap = (tinybvh_halfAreaf(spatialUnion)) / rootArea;
float spatialOverlap = (tinybvh_half_area( spatialUnion )) / rootArea;
if (budget > node.triCount && splitCost < 1e30f && spatialOverlap > 1e-5f)
{
for (uint32_t a = 0; a < 3; a++) if ((node.aabbMax[a] - node.aabbMin[a]) > minDim[a])
Expand Down Expand Up @@ -2087,8 +2087,8 @@ void BVH::BuildHQ()
lBMin[i] = l1 = tinybvh_min( l1, binaMin[i] ), rBMin[HQBVHBINS - 2 - i] = r1 = tinybvh_min( r1, binaMin[HQBVHBINS - 1 - i] );
lBMax[i] = l2 = tinybvh_max( l2, binaMax[i] ), rBMax[HQBVHBINS - 2 - i] = r2 = tinybvh_max( r2, binaMax[HQBVHBINS - 1 - i] );
lN += countIn[i], rN += countOut[HQBVHBINS - 1 - i], NL[i] = lN, NR[HQBVHBINS - 2 - i] = rN;
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_halfAreaf(l2 - l1) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_halfAreaf(r2 - r1) * (float)rN);
ANL[i] = lN == 0 ? BVH_FAR : (tinybvh_half_area( l2 - l1 ) * (float)lN);
ANR[HQBVHBINS - 2 - i] = rN == 0 ? BVH_FAR : (tinybvh_half_area( r2 - r1 ) * (float)rN);
}
// find best position for spatial split
for (uint32_t i = 0; i < HQBVHBINS - 1; i++)
Expand Down Expand Up @@ -2132,8 +2132,8 @@ void BVH::BuildHQ()
{
bvhvec3 unsplitLMin = tinybvh_min( bestLMin, fragment[fragIdx].bmin );
bvhvec3 unsplitLMax = tinybvh_max( bestLMax, fragment[fragIdx].bmax );
float AL = tinybvh_halfAreaf(unsplitLMax - unsplitLMin);
float AR = tinybvh_halfAreaf(bestRMax - bestRMin);
float AL = tinybvh_half_area( unsplitLMax - unsplitLMin );
float AR = tinybvh_half_area( bestRMax - bestRMin );
float CunsplitLeft = c_trav + c_int * rSAV * (AL * bestNL + AR * (bestNR - 1));
if (CunsplitLeft < splitCost)
{
Expand All @@ -2147,8 +2147,8 @@ void BVH::BuildHQ()
{
const bvhvec3 unsplitRMin = tinybvh_min( bestRMin, fragment[fragIdx].bmin );
const bvhvec3 unsplitRMax = tinybvh_max( bestRMax, fragment[fragIdx].bmax );
const float AL = tinybvh_halfAreaf(bestLMax - bestLMin);
const float AR = tinybvh_halfAreaf(unsplitRMax - unsplitRMin);
const float AL = tinybvh_half_area( bestLMax - bestLMin );
const float AR = tinybvh_half_area( unsplitRMax - unsplitRMin );
const float CunsplitRight = c_trav + c_int * rSAV * (AL * (bestNL - 1) + AR * bestNR);
if (CunsplitRight < splitCost)
{
Expand Down Expand Up @@ -4329,10 +4329,6 @@ void BVH8_CPU::ConvertFrom( const MBVH<8>& original, bool compact )
{
const MBVH<8>::MBVHNode& orig = bvh8.mbvhNode[nodeIdx];
BVHNode& newNode = bvh8Node[newAlt8Ptr++];
if (newAlt8Ptr == 4940)
{
int w= 0;
}
memset( &newNode, 0, sizeof( BVHNode ) );
// calculate the permutation offsets for the node
for (uint32_t q = 0; q < 8; q++)
Expand Down Expand Up @@ -5858,11 +5854,14 @@ int32_t BVH8_CPU::Intersect( Ray& ray ) const
__m256 ox8 = _mm256_set1_ps( ray.O.x ), rdx8 = _mm256_set1_ps( ray.rD.x );
__m256 oy8 = _mm256_set1_ps( ray.O.y ), rdy8 = _mm256_set1_ps( ray.rD.y );
__m256 oz8 = _mm256_set1_ps( ray.O.z ), rdz8 = _mm256_set1_ps( ray.rD.z );
__m256 t8 = _mm256_set1_ps( ray.hit.t ), zero8 = _mm256_setzero_ps();
const __m256i permMask8 = _mm256_set1_epi32( 7 );
const __m256i signShift8 = _mm256_set1_epi32( (ray.D.x > 0 ? 3 : 0) + (ray.D.y > 0 ? 6 : 0) + (ray.D.z > 0 ? 12 : 0) );
__m256 t8 = _mm256_set1_ps( ray.hit.t );
#ifdef BVH8_CPU_COMPACT
const __m256 zero8 = _mm256_setzero_ps();
const __m256i mantissa8 = _mm256_set1_epi32( 255 << 15 );
const __m256i exponent8 = _mm256_set1_epi32( 0x3f800000 );
#endif
const __m256i permMask8 = _mm256_set1_epi32( 7 );
const __m256i signShift8 = _mm256_set1_epi32( (ray.D.x > 0 ? 3 : 0) + (ray.D.y > 0 ? 6 : 0) + (ray.D.z > 0 ? 12 : 0) );
__m128 dx4 = _mm_set1_ps( ray.D.x ), dy4 = _mm_set1_ps( ray.D.y ), dz4 = _mm_set1_ps( ray.D.z );
const __m128 epsNeg4 = _mm_set1_ps( -0.000001f ), eps4 = _mm_set1_ps( 0.000001f ), one4 = _mm_set1_ps( 1.0f );
uint32_t stackPtr = 0, nodeIdx = 0, steps = 0;
Expand Down Expand Up @@ -6022,9 +6021,12 @@ bool BVH8_CPU::IsOccluded( const Ray& ray ) const
__m256 ox8 = _mm256_set1_ps( ray.O.x ), rdx8 = _mm256_set1_ps( ray.rD.x );
__m256 oy8 = _mm256_set1_ps( ray.O.y ), rdy8 = _mm256_set1_ps( ray.rD.y );
__m256 oz8 = _mm256_set1_ps( ray.O.z ), rdz8 = _mm256_set1_ps( ray.rD.z );
const __m256 t8 = _mm256_set1_ps( ray.hit.t ), zero8 = _mm256_setzero_ps();
const __m256 t8 = _mm256_set1_ps( ray.hit.t );
#ifdef BVH8_CPU_COMPACT
const __m256 zero8 = _mm256_setzero_ps();
const __m256i mantissa8 = _mm256_set1_epi32( 255 << 15 );
const __m256i exponent8 = _mm256_set1_epi32( 0x3f800000 );
#endif
__m128 dx4 = _mm_set1_ps( ray.D.x ), dy4 = _mm_set1_ps( ray.D.y ), dz4 = _mm_set1_ps( ray.D.z );
const __m128 epsNeg4 = _mm_set1_ps( -0.000001f ), eps4 = _mm_set1_ps( 0.000001f ), t4 = _mm_set1_ps( ray.hit.t );
const __m128 one4 = _mm_set1_ps( 1.0f ), zero4 = _mm_setzero_ps();
Expand All @@ -6036,9 +6038,10 @@ bool BVH8_CPU::IsOccluded( const Ray& ray ) const
#ifdef BVH8_CPU_COMPACT
const BVHNodeCompact& n = bvh8Small[nodeIdx & 0x1fffffff /* bits 0..28 */];
const __m256i c8 = n.child8;
const __m256i perm8 = n.perm8;
const __m256i cbminmax8 = n.cbminmaxyz8;
const __m256i bminx8i = _mm256_or_si256( exponent8, _mm256_slli_epi32( _mm256_cvtepu8_epi32( _mm_cvtsi64_si128( n.cbminx8 ) ), 15 ) );
const __m256i bmaxx8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( n.perm8, 9 ), mantissa8 ) );
const __m256i bmaxx8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( perm8, 9 ), mantissa8 ) );
const __m256i bminy8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( cbminmax8, 9 ), mantissa8 ) );
const __m256i bmaxy8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_srli_epi32( cbminmax8, 1 ), mantissa8 ) );
const __m256i bminz8i = _mm256_or_si256( exponent8, _mm256_and_si256( _mm256_slli_epi32( cbminmax8, 7 ), mantissa8 ) );
Expand Down Expand Up @@ -7097,8 +7100,8 @@ void BVH_Double::Build()
lBMax[i] = l2 = tinybvh_max( l2, binMax[a][i] );
rBMax[BVHBINS - 2 - i] = r2 = tinybvh_max( r2, binMax[a][BVHBINS - 1 - i] );
lN += count[a][i], rN += count[a][BVHBINS - 1 - i];
ANL[i] = lN == 0 ? BVH_DBL_FAR : (tinybvh_halfAread(l2 - l1) * (double)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_DBL_FAR : (tinybvh_halfAread(r2 - r1) * (double)rN);
ANL[i] = lN == 0 ? BVH_DBL_FAR : (tinybvh_half_area( l2 - l1 ) * (double)lN);
ANR[BVHBINS - 2 - i] = rN == 0 ? BVH_DBL_FAR : (tinybvh_half_area( r2 - r1 ) * (double)rN);
}
// evaluate bin totals to find best position for object split
for (uint32_t i = 0; i < BVHBINS - 1; i++)
Expand Down
28 changes: 17 additions & 11 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#define BUILD_REFERENCE
#define BUILD_DOUBLE
#define BUILD_AVX
#define BUILD_NEON
// #define BUILD_NEON
#define BUILD_SBVH
#define REFIT_BVH2
#define REFIT_MBVH4
Expand All @@ -25,7 +25,7 @@
#define TRAVERSE_4WAY
#define TRAVERSE_WIVE
#define TRAVERSE_2WAY_DBL
#define TRAVERSE_CWBVH
// #define TRAVERSE_CWBVH
#define TRAVERSE_2WAY_MT
#define TRAVERSE_2WAY_MT_PACKET
#define TRAVERSE_OPTIMIZED_ST
Expand All @@ -48,6 +48,8 @@ using namespace tinybvh;
#endif
#ifdef _WIN32
#include <intrin.h> // for __cpuidex
#elif defined(__APPLE__) && defined(__MACH__)
// Keep ENABLE_OPENCL for APPLE
#elif defined ENABLE_OPENCL
#undef ENABLE_OPENCL
#endif
Expand Down Expand Up @@ -208,9 +210,9 @@ float TestShadowRays( uint32_t layout, unsigned N, unsigned passes )
#endif
case _GPU2: for (unsigned i = 0; i < N; i++) occluded += bvh_gpu->IsOccluded( batch[i] ); break;
case _CPU4: for (unsigned i = 0; i < N; i++) occluded += bvh4_cpu->IsOccluded( batch[i] ); break;
#ifdef BVH_USEAVX2
#ifdef BVH_USEAVX2
case _CPU8: for (unsigned i = 0; i < N; i++) occluded += bvh8_cpu->IsOccluded( batch[i] ); break;
#endif
#endif
default: break;
}
}
Expand All @@ -221,7 +223,7 @@ float TestShadowRays( uint32_t layout, unsigned N, unsigned passes )
if (abs( (int)occluded - (int)refOccluded[0] ) > 500) // allow some slack, we're using various tri intersectors
{
fprintf( stderr, "\nValidation for shadow rays failed (%i != %i).\n", (int)occluded, (int)refOccluded[0] );
exit( 1 );
// exit( 1 ); // don't terminate, just warn.
}
return t.elapsed() / passes;
}
Expand Down Expand Up @@ -733,6 +735,7 @@ int main()
#endif

#ifdef TRAVERSE_CWBVH
#ifdef BVH_USEAVX

// CWBVH - Not efficient on CPU.
if (!cwbvh)
Expand All @@ -746,6 +749,7 @@ int main()
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s)\n", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );

#endif
#endif

#if defined TRAVERSE_OPTIMIZED_ST || defined TRAVERSE_4WAY_OPTIMIZED

Expand All @@ -766,21 +770,23 @@ int main()
#endif

#ifdef TRAVERSE_OPTIMIZED_ST
#ifdef BVH_USEAVX

// ALT_SOA
delete bvh_soa;
// Building a BVH_SoA over an optimized BVH: Careful, do not delete the
// passed BVH; we use some of its data in the BVH_SoA.
bvh_soa = new BVH_SoA();
bvh_soa->ConvertFrom( *bvh );
printf( "- ALT_SOA - primary: " );
printf( "- BVH_SOA - primary: " );
traceTime = TestPrimaryRays( _SOA, Nsmall, 3 );
ValidateTraceResult( refDist, Nsmall, __LINE__ );
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s), ", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );
traceTime = TestShadowRays( _SOA, Nsmall, 3 );
printf( "shadow: %5.1fms (%7.2fMRays/s)\n", traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );

#endif
#endif

#ifdef TRAVERSE_4WAY_OPTIMIZED

Expand All @@ -793,7 +799,7 @@ int main()
bvh4_cpu = new BVH4_CPU();
bvh4->ConvertFrom( *bvh );
bvh4_cpu->ConvertFrom( *bvh4 );
printf( "- BVH4_AFRA - primary: " );
printf( "- BVH4_CPU - primary: " );
traceTime = TestPrimaryRays( _CPU4, Nsmall, 3 );
ValidateTraceResult( refDist, Nsmall, __LINE__ );
printf( "%4.2fM rays in %5.1fms (%7.2fMRays/s), ", (float)Nsmall * 1e-6f, traceTime * 1000, (float)Nsmall / traceTime * 1e-6f );
Expand Down Expand Up @@ -821,7 +827,7 @@ int main()
#ifdef GPU_2WAY

// trace the rays on GPU using OpenCL
printf( "- AILA_LAINE - primary: " );
printf( "- BVH_GPU - primary: " );
if (!bvh_gpu)
{
bvh_gpu = new BVH_GPU();
Expand Down Expand Up @@ -917,7 +923,7 @@ int main()
#ifdef GPU_CWBVH

// trace the rays on GPU using OpenCL
printf( "- BVH8/CWBVH - primary: " );
printf( "- BVH8_CWBVH - primary: " );
if (!cwbvh)
{
cwbvh = new BVH8_CWBVH();
Expand Down Expand Up @@ -974,7 +980,7 @@ int main()
#ifdef TRAVERSE_2WAY_MT

// using OpenMP and batches of 10,000 rays
printf( "- WALD_32BYTE - primary: " );
printf( "- BVH (plain) - primary: " );
for (int pass = 0; pass < 4; pass++)
{
if (pass == 1) t.reset(); // first pass is cache warming
Expand Down Expand Up @@ -1082,4 +1088,4 @@ int main()

printf( "all done." );
return 0;
}
}
Loading

0 comments on commit f72924e

Please sign in to comment.