diff --git a/tiny_bvh.h b/tiny_bvh.h index 367a2ae..8ef1b01 100644 --- a/tiny_bvh.h +++ b/tiny_bvh.h @@ -163,7 +163,7 @@ THE SOFTWARE. // library version #define TINY_BVH_VERSION_MAJOR 1 #define TINY_BVH_VERSION_MINOR 3 -#define TINY_BVH_VERSION_SUB 4 +#define TINY_BVH_VERSION_SUB 5 // ============================================================================ // @@ -850,6 +850,8 @@ class BVH_GPU : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); + float SAHCost( const uint32_t nodeIdx = 0 ) const { return bvh.SAHCost( nodeIdx ); } void ConvertFrom( const BVH& original, bool compact = true ); int32_t Intersect( Ray& ray ) const; bool IsOccluded( const Ray& ray ) const { FALLBACK_SHADOW_QUERY( ray ); } @@ -881,6 +883,8 @@ class BVH_SoA : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); + float SAHCost( const uint32_t nodeIdx = 0 ) const { return bvh.SAHCost( nodeIdx ); } void Save( const char* fileName ); bool Load( const char* fileName, const bvhvec4* vertices, const uint32_t primCount ); bool Load( const char* fileName, const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); @@ -921,7 +925,7 @@ class BVH_Verbose : public BVHBase void Compact(); void SplitLeafs( const uint32_t maxPrims = 1 ); void MergeLeafs(); - void Optimize( const uint32_t iterations, const bool extreme = false ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); private: struct SortItem { uint32_t idx; float cost; }; void RefitUp( uint32_t nodeIdx ); @@ -961,7 +965,9 @@ template class MBVH : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); void Refit( const uint32_t nodeIdx = 0 ); + float SAHCost( const uint32_t nodeIdx = 0 ) const; void ConvertFrom( const BVH& original, bool compact = true ); void SplitBVHLeaf( const uint32_t nodeIdx, const uint32_t maxPrims ); // BVH data @@ -1003,7 +1009,9 @@ class BVH4_GPU : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); void ConvertFrom( const MBVH<4>& original, bool compact = true ); + float SAHCost( const uint32_t nodeIdx = 0 ) const { return bvh4.SAHCost( nodeIdx ); } int32_t Intersect( Ray& ray ) const; bool IsOccluded( const Ray& ray ) const { FALLBACK_SHADOW_QUERY( ray ); } // BVH data @@ -1038,8 +1046,10 @@ class BVH4_CPU : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); void Save( const char* fileName ); bool Load( const char* fileName, const uint32_t primCount ); + float SAHCost( const uint32_t nodeIdx = 0 ) const; void ConvertFrom( const MBVH<4>& original, bool compact = true ); int32_t Intersect( Ray& ray ) const; bool IsOccluded( const Ray& ray ) const; @@ -1066,7 +1076,9 @@ class BVH8_CWBVH : public BVHBase void BuildHQ( const bvhvec4slice& vertices ); void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount ); void BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount ); + void Optimize( const uint32_t iterations = 25, bool extreme = false ); void ConvertFrom( MBVH<8>& original, bool compact = true ); + float SAHCost( const uint32_t nodeIdx = 0 ) const; int32_t Intersect( Ray& ray ) const; bool IsOccluded( const Ray& ray ) const { FALLBACK_SHADOW_QUERY( ray ); } // BVH8 data @@ -2801,11 +2813,6 @@ void BVH_Verbose::CheckFit( const uint32_t nodeIdx, bool skipLeafs ) bmin = tinybvh_min( bvhNode[node.left].aabbMin, bvhNode[node.right].aabbMin ); bmax = tinybvh_max( bvhNode[node.left].aabbMax, bvhNode[node.right].aabbMax ); } - if (node.aabbMin.x != bmin.x || node.aabbMin.y != bmin.y || node.aabbMin.z != bmin.z || - node.aabbMax.x != bmax.x || node.aabbMax.y != bmax.y || node.aabbMax.z != bmax.z) - { - int w = 0; - } } void BVH_Verbose::Compact() @@ -2854,15 +2861,9 @@ void BVH_Verbose::Optimize( const uint32_t iterations, const bool extreme ) sortList[interiorNodes].idx = j, sortList[interiorNodes++].cost = Mcomb; } // last couple of iterations we will process more nodes. - #if 0 - const int tail = extreme ? (tinybvh_min( 14u, iterations - 1 - i ) * 2) : 30; - const int limit = tinybvh_max( (uint32_t)(0.01f * (float)interiorNodes), interiorNodes >> tail ); - const int step = 1; - #else const float portion = extreme ? (0.01f + (0.6f * (float)i) / (float)iterations) : 0.01f; const int limit = (uint32_t)(portion * (float)interiorNodes); const int step = tinybvh_max( 1, (int)(portion / 0.02f) ); - #endif // sort list - partial quick sort. struct Task { uint32_t first, last; } stack[64]; int pivot, first = 0, last = (int)interiorNodes - 1, stackPtr = 0; @@ -3097,6 +3098,12 @@ void BVH_GPU::BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, ui ConvertFrom( bvh, false ); } +void BVH_GPU::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh.Optimize( iterations, extreme ); + ConvertFrom( bvh, false ); +} + void BVH_GPU::ConvertFrom( const BVH& original, bool compact ) { // get a copy of the original bvh @@ -3251,6 +3258,12 @@ void BVH_SoA::BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, ui ConvertFrom( bvh, false ); } +void BVH_SoA::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh.Optimize( iterations, extreme ); + ConvertFrom( bvh, false ); +} + void BVH_SoA::Save( const char* fileName ) { bvh.Save( fileName ); @@ -3382,6 +3395,12 @@ template void MBVH::BuildHQ( const bvhvec4slice& vertices, const uint3 ConvertFrom( bvh, true ); } +template void MBVH::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh.Optimize( iterations, extreme ); + ConvertFrom( bvh, true ); +} + template void MBVH::Refit( const uint32_t nodeIdx ) { MBVHNode& node = mbvhNode[nodeIdx]; @@ -3426,6 +3445,18 @@ template void MBVH::Refit( const uint32_t nodeIdx ) if (nodeIdx == 0) aabbMin = node.aabbMin, aabbMax = node.aabbMax; } +template float MBVH::SAHCost( const uint32_t nodeIdx ) const +{ + // Determine the SAH cost of the tree. This provides an indication + // of the quality of the BVH: Lower is better. + const MBVHNode& n = mbvhNode[nodeIdx]; + const float sa = BVH::SA( n.aabbMin, n.aabbMax ); + if (n.isLeaf()) return C_INT * sa * n.triCount; + float cost = C_TRAV * sa; + for (unsigned i = 0; i < M; i++) if (n.child[i] != 0) cost += SAHCost( n.child[i] ); + return nodeIdx == 0 ? (cost / sa) : cost; +} + template void MBVH::ConvertFrom( const BVH& original, bool compact ) { // get a copy of the original bvh @@ -3600,6 +3631,17 @@ void BVH4_CPU::BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, u ConvertFrom( bvh4, true ); } +void BVH4_CPU::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh4.Optimize( iterations, extreme ); + ConvertFrom( bvh4, true ); +} + +float BVH4_CPU::SAHCost( const uint32_t nodeIdx ) const +{ + return bvh4.SAHCost( nodeIdx ); +} + void BVH4_CPU::Save( const char* fileName ) { // saving is easy, it's the loadingn that will be complex. @@ -3788,6 +3830,12 @@ void BVH4_GPU::BuildHQ( const bvhvec4slice& vertices, const uint32_t* indices, u ConvertFrom( bvh4, true ); } +void BVH4_GPU::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh4.Optimize( iterations, extreme ); + ConvertFrom( bvh4, true ); +} + void BVH4_GPU::ConvertFrom( const MBVH<4>& original, bool compact ) { // get a copy of the original bvh4 @@ -4038,6 +4086,17 @@ BVH8_CWBVH::~BVH8_CWBVH() AlignedFree( bvh8Tris ); } +void BVH8_CWBVH::Optimize( const uint32_t iterations, bool extreme ) +{ + bvh8.Optimize( iterations, extreme ); + ConvertFrom( bvh8, true ); +} + +float BVH8_CWBVH::SAHCost( const uint32_t nodeIdx ) const +{ + return bvh8.SAHCost( nodeIdx ); +} + void BVH8_CWBVH::Save( const char* fileName ) { std::fstream s{ fileName, s.binary | s.out }; @@ -6808,7 +6867,8 @@ float BVH_Verbose::SAHCostUp( uint32_t nodeIdx ) const // K.I.S.S. version with brute-force array search. uint32_t BVH_Verbose::FindBestNewPosition( const uint32_t Lid ) const { - ALIGNED( 64 ) struct Task { float ci; uint32_t node; } task[512]; + struct Task { float ci; uint32_t node; }; + ALIGNED( 64 ) Task task[512]; float Cbest = BVH_FAR; int tasks = 1 /* doesn't exceed 70 for Crytek Sponza */, Xbest = 0; const BVHNode& L = bvhNode[Lid]; @@ -6926,4 +6986,4 @@ void BVH_Verbose::MergeSubtree( const uint32_t nodeIdx, uint32_t* newIdx, uint32 #pragma GCC diagnostic pop #endif -#endif // TINYBVH_IMPLEMENTATION +#endif // TINYBVH_IMPLEMENTATION \ No newline at end of file diff --git a/tiny_bvh_fenster.cpp b/tiny_bvh_fenster.cpp index 8b9c1f0..7ce5db6 100644 --- a/tiny_bvh_fenster.cpp +++ b/tiny_bvh_fenster.cpp @@ -4,9 +4,9 @@ #include "external/fenster.h" // https://github.com/zserge/fenster //#define COLOR_PRIM // compute color as hashed triangle Index -//#define COLOR_DEPTH // compute color as depth of intersection +//#define COLOR_DEPTH // compute color as depth of intersection -// #define LOADSCENE +#define LOADSCENE #define TINYBVH_IMPLEMENTATION #include "tiny_bvh.h" @@ -14,7 +14,7 @@ using namespace tinybvh; -BVH4_CPU bvh; +BVH bvh; int frameIdx = 0; Ray* rays = 0; #ifdef COLOR_DEPTH @@ -180,7 +180,7 @@ void Tick( float delta_time_s, fenster& f, uint32_t* buf ) for (int i = 0; i < N; i++) { #ifdef COLOR_DEPTH depths[i] = bvh.Intersect( rays[i] ); - #else + #else bvh.Intersect( rays[i] ); #endif } @@ -192,7 +192,6 @@ void Tick( float delta_time_s, fenster& f, uint32_t* buf ) { int pixel_x = tx * 4 + x, pixel_y = ty * 4 + y, primIdx = rays[i].hit.prim; - #ifdef COLOR_DEPTH buf[pixel_x + pixel_y * SCRWIDTH] = depths[i] << 17; // render depth as red #elif defined COLOR_PRIM @@ -214,7 +213,7 @@ void Tick( float delta_time_s, fenster& f, uint32_t* buf ) char title[50]; sprintf( title, "tiny_bvh %.2f s %.2f Hz", delta_time_s, 1.0f / delta_time_s ); fenster_update_title( &f, title ); -} + } void Shutdown() { diff --git a/tmpl8/game.cpp b/tmpl8/game.cpp index 7e28f70..ae49ab5 100644 --- a/tmpl8/game.cpp +++ b/tmpl8/game.cpp @@ -44,7 +44,7 @@ void Game::Init() AddMesh( "./testdata/dragon.bin", 1, float3( 0 ) ); swap( verts, dragonVerts ); swap( triCount, dragonTriCount ); - dragon.Build( dragonVerts, dragonTriCount ); + dragon.BuildHQ( dragonVerts, dragonTriCount ); // create dragon instances for (int d = 0; d < DRAGONS; d++) @@ -71,7 +71,8 @@ void Game::Init() AddMesh( "./testdata/bistro_ext_part2.bin", 1, float3( 0 ) ); // build bvh (here: 'compressed wide bvh', for efficient GPU rendering) - bistro.Build( verts, triCount ); + bistro.BuildHQ( verts, triCount ); + instance[0] = BLASInstance( 0 /* static geometry */ ); tlas.Build( instance, DRAGONS + 1, blasList, 2 ); diff --git a/tmpl8/template/template.cpp b/tmpl8/template/template.cpp index 519d201..9f0d649 100644 --- a/tmpl8/template/template.cpp +++ b/tmpl8/template/template.cpp @@ -93,7 +93,7 @@ void ErrorCallback( int, const char* description ) } // Application entry point -void main() +int main() { // open a window if (!glfwInit()) FatalError( "glfwInit failed." ); @@ -362,6 +362,7 @@ void main() app->Shutdown(); glfwDestroyWindow( window ); glfwTerminate(); + return 0; } // Jobmanager implementation