Skip to content

Commit

Permalink
New speed test tool added.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 3, 2024
1 parent b82ca0f commit 0402288
Show file tree
Hide file tree
Showing 5 changed files with 344 additions and 4 deletions.
4 changes: 4 additions & 0 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ THE SOFTWARE.
#define BVH_USEAVX
#endif

// library version
#define TINY_BVH_VERSION_MAJOR 0
#define TINY_BVH_VERSION_MINOR 2

// ============================================================================
//
// P R E L I M I N A R I E S
Expand Down
12 changes: 8 additions & 4 deletions tiny_bvh_fenster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ void Init()

void Tick( uint32_t* buf )
{
// setup camera
// setup view pyramid for a pinhole camera:
// eye, p1 (top-left), p2 (top-right) and p3 (bottom-left)
bvhvec3 eye( -3.5f, -1.5f, -6.5f ), view = normalize( bvhvec3( 3, 1.5f, 5 ) );
bvhvec3 right = normalize( cross( bvhvec3( 0, 1, 0 ), view ) );
bvhvec3 up = 0.8f * cross( view, right ), C = eye + 2 * view;
bvhvec3 p1 = C - right + up, p2 = C + right + up, p3 = C - right - up;

// generate primary rays in a buffer
int N = 0;
Ray* rays = new Ray[SCRWIDTH * SCRHEIGHT * 16];
Expand All @@ -60,18 +62,20 @@ void Tick( uint32_t* buf )
rays[N++] = Ray( eye, normalize( P - eye ) );
}
}

// trace primary rays
for (int i = 0; i < N; i++) bvh.Intersect( rays[i] );

// visualize result
for (int i = 0, y = 0; y < SCRHEIGHT; y++) for (int x = 0; x < SCRWIDTH; x++)
{
float avg = 0;
for (int s = 0; s < 16; s++, i++) if (rays[i].hit.t < 1000)
{
int primIdx = rays[i].hit.prim;
bvhvec3 v0 = *(bvhvec3*)&triangles[primIdx * 3 + 0];
bvhvec3 v1 = *(bvhvec3*)&triangles[primIdx * 3 + 1];
bvhvec3 v2 = *(bvhvec3*)&triangles[primIdx * 3 + 2];
bvhvec3 v0 = triangles[primIdx * 3 + 0];
bvhvec3 v1 = triangles[primIdx * 3 + 1];
bvhvec3 v2 = triangles[primIdx * 3 + 2];
bvhvec3 N = normalize( cross( v1 - v0, v2 - v0 ) );
avg += fabs( dot( N, normalize( bvhvec3( 1, 2, 3 ) ) ) );
}
Expand Down
178 changes: 178 additions & 0 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#define TINYBVH_IMPLEMENTATION
#include "tiny_bvh.h"
#ifdef _MSC_VER
#include "stdio.h" // for printf
#include "stdlib.h" // for rand
#else
#include <cstdio>
#endif

// 'screen resolution': see tiny_bvh_fenster.cpp; this program traces the
// same rays, but without visualization - just performance statistics.
#define SCRWIDTH 800
#define SCRHEIGHT 600

using namespace tinybvh;

bvhvec4 triangles[259 /* level 3 */ * 6 * 2 * 49 * 3]{};
int verts = 0;
BVH bvh;

float uniform_rand() { return (float)rand() / (float)RAND_MAX; }

#include <chrono>
struct Timer
{
Timer() { reset(); }
float elapsed() const
{
auto t2 = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::duration<float>>(t2 - start).count();
}
void reset() { start = std::chrono::high_resolution_clock::now(); }
std::chrono::high_resolution_clock::time_point start;
};

void sphere_flake( float x, float y, float z, float s, int d = 0 )
{
// procedural tesselated sphere flake object
#define P(F,a,b,c) p[i+F*64]={(float)a ,(float)b,(float)c}
bvhvec3 p[384], pos( x, y, z ), ofs( 3.5 );
for (int i = 0, u = 0; u < 8; u++) for (int v = 0; v < 8; v++, i++)
P( 0, u, v, 0 ), P( 1, u, 0, v ), P( 2, 0, u, v ),
P( 3, u, v, 7 ), P( 4, u, 7, v ), P( 5, 7, u, v );
for (int i = 0; i < 384; i++) p[i] = normalize( p[i] - ofs ) * s + pos;
for (int i = 0, side = 0; side < 6; side++, i += 8)
for (int u = 0; u < 7; u++, i++) for (int v = 0; v < 7; v++, i++)
triangles[verts++] = p[i], triangles[verts++] = p[i + 8],
triangles[verts++] = p[i + 1], triangles[verts++] = p[i + 1],
triangles[verts++] = p[i + 9], triangles[verts++] = p[i + 8];
if (d < 3) sphere_flake( x + s * 1.55f, y, z, s * 0.5f, d + 1 );
if (d < 3) sphere_flake( x - s * 1.5f, y, z, s * 0.5f, d + 1 );
if (d < 3) sphere_flake( x, y + s * 1.5f, z, s * 0.5f, d + 1 );
if (d < 3) sphere_flake( x, x - s * 1.5f, z, s * 0.5f, d + 1 );
if (d < 3) sphere_flake( x, y, z + s * 1.5f, s * 0.5f, d + 1 );
if (d < 3) sphere_flake( x, y, z - s * 1.5f, s * 0.5f, d + 1 );
}

int main()
{
// generate a sphere flake scene
sphere_flake( 0, 0, 0, 1.5f );

// setup view pyramid for a pinhole camera:
// eye, p1 (top-left), p2 (top-right) and p3 (bottom-left)
bvhvec3 eye( -3.5f, -1.5f, -6.5f ), view = normalize( bvhvec3( 3, 1.5f, 5 ) );
bvhvec3 right = normalize( cross( bvhvec3( 0, 1, 0 ), view ) );
bvhvec3 up = 0.8f * cross( view, right ), C = eye + 2 * view;
bvhvec3 p1 = C - right + up, p2 = C + right + up, p3 = C - right - up;

// generate primary rays in a cacheline-aligned buffer
int N = 0;
Ray* rays = (Ray*)ALIGNED_MALLOC( SCRWIDTH * SCRHEIGHT * 16 * sizeof( Ray ) );
for (int y = 0; y < SCRHEIGHT; y++) for (int x = 0; x < SCRWIDTH; x++)
{
for (int s = 0; s < 16; s++) // 16 samples per pixel
{
float u = (float)(x * 4 + (s & 3)) / (SCRWIDTH * 4);
float v = (float)(y * 4 + (s >> 2)) / (SCRHEIGHT * 4);
bvhvec3 P = p1 + u * (p2 - p1) + v * (p3 - p1);
rays[N++] = Ray( eye, normalize( P - eye ) );
}
}

// T I N Y _ B V H P E R F O R M A N C E M E A S U R E M E N T S

int minor = TINY_BVH_VERSION_MINOR, major = TINY_BVH_VERSION_MAJOR;
printf( "tiny_bvh version %i.%i performance statistics\n", major, minor );
printf( "----------------------------------------------------------------\n" );

Timer t;

// measure single-core bvh construction time - warming caches
printf( "BVH construction speed\n" );
printf( "warming caches...\n" );
bvh.Build( (bvhvec4*)triangles, verts / 3 );

// measure single-core bvh construction time - reference builder
t.reset();
printf( "- reference builder: " );
for (int pass = 0; pass < 3; pass++)
bvh.Build( (bvhvec4*)triangles, verts / 3 );
float buildTime = t.elapsed() / 3.0f;
printf( "%.2fms for %i triangles ", buildTime * 1000.0f, verts / 3 );
printf( "- %i nodes, SAH=%.2f\n", bvh.newNodePtr, bvh.SAHCost() );

#ifdef BVH_USEAVX
// measure single-core bvh construction time - AVX builder
t.reset();
printf( "- fast AVX builder: " );
for (int pass = 0; pass < 3; pass++) bvh.BuildAVX( (bvhvec4*)triangles, verts / 3 );
float buildTimeAVX = t.elapsed() / 3.0f;
printf( "%.2fms for %i triangles ", buildTimeAVX * 1000.0f, verts / 3 );
printf( "- %i nodes, SAH=%.2f\n", bvh.newNodePtr, bvh.SAHCost() );
#endif

// trace all rays once to warm the caches
printf( "BVH traversal speed\n" );
printf( "warming caches...\n" );
for (int i = 0; i < N; i++) bvh.Intersect( rays[i] );

// trace all rays three times to estimate average performance
// - single core version
t.reset();
printf( "- CPU, coherent, basic 2-way layout, ST: " );
for (int pass = 0; pass < 3; pass++)
for (int i = 0; i < N; i++) bvh.Intersect( rays[i] );
float traceTimeST = t.elapsed() / 3.0f;
float mrays = (float)N / traceTimeST;
printf( "%.2fms for %.2fM rays (%.2fMRays/s)\n", traceTimeST * 1000, (float)N * 1e-6f, mrays * 1e-6f );

// trace all rays three times to estimate average performance
// - multi-core version (using OpenMP and batches of 10,000 rays)
t.reset();
printf( "- CPU, coherent, basic 2-way layout, MT: " );
for (int j = 0; j < 3; j++)
{
const int batchCount = N / 10000;
#pragma omp parallel for schedule(dynamic)
for (int batch = 0; batch < batchCount; batch++)
{
const int batchStart = batch * 10000;
for (int i = 0; i < 10000; i++) bvh.Intersect( rays[batchStart + i] );
}
}
float traceTimeMT = t.elapsed() / 3.0f;
mrays = (float)N / traceTimeMT;
printf( "%.2fms for %.2fM rays (%.2fMRays/s)\n", traceTimeMT * 1000, (float)N * 1e-6f, mrays * 1e-6f );

// shuffle rays for the next experiment
for( int i = 0; i < N; i++ )
{
int j = (i + 17 * rand()) % N;
Ray t = rays[i];
rays[i] = rays[j];
rays[j] = t;
}

// trace all rays three times to estimate average performance
// - divergent distribution, multi-core
t.reset();
printf( "- CPU, incoherent, basic 2-way layout, MT: " );
for (int j = 0; j < 3; j++)
{
const int batchCount = N / 10000;
#pragma omp parallel for schedule(dynamic)
for (int batch = 0; batch < batchCount; batch++)
{
const int batchStart = batch * 10000;
for (int i = 0; i < 10000; i++) bvh.Intersect( rays[batchStart + i] );
}
}
float traceTimeMTI = t.elapsed() / 3.0f;
mrays = (float)N / traceTimeMTI;
printf( "%.2fms for %.2fM rays (%.2fMRays/s)\n", traceTimeMTI * 1000, (float)N * 1e-6f, mrays * 1e-6f );

// all done.
return 0;
}
10 changes: 10 additions & 0 deletions tiny_bvh_test.sln
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_renderer", "vcproj
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_test", "vcproj\tiny_bvh_test.vcxproj", "{0B5C86B2-9438-49E3-BF1A-4E1593BB436D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tiny_bvh_speedtest", "vcproj\tiny_bvh_speedtest.vcxproj", "{547F1A98-C394-46FB-AF15-3DB009D758FA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Expand Down Expand Up @@ -41,6 +43,14 @@ Global
{0B5C86B2-9438-49E3-BF1A-4E1593BB436D}.Release|x64.Build.0 = Release|x64
{0B5C86B2-9438-49E3-BF1A-4E1593BB436D}.Release|x86.ActiveCfg = Release|Win32
{0B5C86B2-9438-49E3-BF1A-4E1593BB436D}.Release|x86.Build.0 = Release|Win32
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Debug|x64.ActiveCfg = Debug|x64
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Debug|x64.Build.0 = Debug|x64
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Debug|x86.ActiveCfg = Debug|Win32
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Debug|x86.Build.0 = Debug|Win32
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Release|x64.ActiveCfg = Release|x64
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Release|x64.Build.0 = Release|x64
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Release|x86.ActiveCfg = Release|Win32
{547F1A98-C394-46FB-AF15-3DB009D758FA}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
144 changes: 144 additions & 0 deletions vcproj/tiny_bvh_speedtest.vcxproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\tiny_bvh.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\tiny_bvh_speedtest.cpp" />
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{547F1A98-C394-46FB-AF15-3DB009D758FA}</ProjectGuid>
<RootNamespace>tinybvhspeedtest</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

0 comments on commit 0402288

Please sign in to comment.