Skip to content

Commit

Permalink
Cleaned up new memory interface.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Nov 18, 2024
1 parent c8c3d3c commit 2eb739a
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 34 deletions.
40 changes: 21 additions & 19 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,51 +124,51 @@ inline size_t make_multiple_64( size_t x ) { return (x + 63) & ~0x3f; }
#ifdef _MSC_VER // Visual Studio / C11
#define ALIGNED( x ) __declspec( align( x ) )
namespace tinybvh {
inline void* default_aligned_malloc( size_t size, void* = nullptr )
inline void* malloc64( size_t size, void* = nullptr )
{
return size == 0 ? 0 : _aligned_malloc( make_multiple_64( size ), 64 );
}
inline void default_aligned_free( void* ptr, void* = nullptr ) { _aligned_free( ptr ); }
inline void free64( void* ptr, void* = nullptr ) { _aligned_free( ptr ); }
}
#elif defined(__EMSCRIPTEN__) // EMSCRIPTEN - needs to be before gcc and clang to avoid misdetection
#define ALIGNED( x ) __attribute__( ( aligned( x ) ) )
#if defined(__wasm_simd128__) || defined(__wasm_relaxed_simd__)
// https://emscripten.org/docs/porting/simd.html
#include <xmmintrin.h>
namespace tinybvh {
inline void* default_aligned_malloc( size_t size, void* = nullptr )
inline void* malloc64( size_t size, void* = nullptr )
{
return size == 0 ? 0 : _mm_malloc( size, 64 );
}
inline void default_aligned_free( void* ptr, void* = nullptr ) { _mm_free( ptr ); }
inline void free64( void* ptr, void* = nullptr ) { _mm_free( ptr ); }
}
#else
namespace tinybvh {
inline void* default_aligned_malloc( size_t size, void* = nullptr )
inline void* malloc64( size_t size, void* = nullptr )
{
return size == 0 ? 0 : aligned_alloc( 64, make_multiple_64( size ) );
}
inline void default_aligned_free( void* ptr, void* = nullptr ) { free( ptr ); }
inline void free64( void* ptr, void* = nullptr ) { free( ptr ); }
}
#endif
#else // gcc / clang
#define ALIGNED( x ) __attribute__( ( aligned( x ) ) )
#if defined(__x86_64__) || defined(_M_X64)
#include <xmmintrin.h>
namespace tinybvh {
inline void* default_aligned_malloc( size_t size, void* = nullptr )
inline void* malloc64( size_t size, void* = nullptr )
{
return size == 0 ? 0 : _mm_malloc( make_multiple_64( size ), 64 );
}
inline void default_aligned_free( void* ptr, void* = nullptr ) { _mm_free( ptr ); }
inline void free64( void* ptr, void* = nullptr ) { _mm_free( ptr ); }
}
#else
namespace tinybvh {
inline void* default_aligned_malloc( size_t size, void* = nullptr )
inline void* malloc64( size_t size, void* = nullptr )
{
return size == 0 ? 0 : aligned_alloc( 64, make_multiple_64( size ) );
}
inline void default_aligned_free( void* ptr, void* = nullptr ) { free( ptr ); }
inline void free64( void* ptr, void* = nullptr ) { free( ptr ); }
}
#endif
#endif
Expand Down Expand Up @@ -375,8 +375,8 @@ struct Ray

struct BVHContext
{
void* (*malloc)(size_t size, void* userdata) = default_aligned_malloc;
void (*free)(void* ptr, void* userdata) = default_aligned_free;
void* (*malloc)(size_t size, void* userdata) = malloc64;
void (*free)(void* ptr, void* userdata) = free64;
void* userdata = nullptr;
};

Expand Down Expand Up @@ -2480,11 +2480,12 @@ void BVH::BuildAVX( const bvhvec4* vertices, const unsigned primCount )
{
int test = BVHBINS;
if (test != 8) assert( false ); // AVX builders require BVHBINS == 8.
assert( ((long long)vertices & 63) == 0 ); // buffer must be cacheline-aligned
// aligned data
ALIGNED( 64 ) __m256 binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) __m256 binboxOrig[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) __m256 binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) __m256 binboxOrig[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) unsigned count[3][BVHBINS]{}; // 96 bytes
ALIGNED( 64 ) __m256 bestLBox, bestRBox; // 64 bytes
ALIGNED( 64 ) __m256 bestLBox, bestRBox; // 64 bytes
// some constants
static const __m128 max4 = _mm_set1_ps( -1e30f ), half4 = _mm_set1_ps( 0.5f );
static const __m128 two4 = _mm_set1_ps( 2.0f ), min1 = _mm_set1_ps( -1 );
Expand Down Expand Up @@ -3141,11 +3142,12 @@ void BVH::BuildNEON( const bvhvec4* vertices, const unsigned primCount )
{
int test = BVHBINS;
if (test != 8) assert( false ); // AVX builders require BVHBINS == 8.
assert( ((long long)vertices & 63) == 0 ); // buffer must be cacheline-aligned
// aligned data
ALIGNED( 64 ) float32x4x2_t binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) float32x4x2_t binboxOrig[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) unsigned count[3][BVHBINS]{}; // 96 bytes
ALIGNED( 64 ) float32x4x2_t bestLBox, bestRBox; // 64 bytes
ALIGNED( 64 ) float32x4x2_t binbox[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) float32x4x2_t binboxOrig[3 * BVHBINS]; // 768 bytes
ALIGNED( 64 ) unsigned count[3][BVHBINS]{}; // 96 bytes
ALIGNED( 64 ) float32x4x2_t bestLBox, bestRBox; // 64 bytes
// some constants
static const float32x4_t max4 = vdupq_n_f32( -1e30f ), half4 = vdupq_n_f32( 0.5f );
static const float32x4_t two4 = vdupq_n_f32( 2.0f ), min1 = vdupq_n_f32( -1 );
Expand Down
22 changes: 11 additions & 11 deletions tiny_bvh_speedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@

// tests to perform
#define BUILD_REFERENCE
// #define BUILD_AVX
#define BUILD_AVX
#define BUILD_NEON
//#define BUILD_SBVH
//#define TRAVERSE_2WAY_ST
//#define TRAVERSE_ALT2WAY_ST
//#define TRAVERSE_SOA2WAY_ST
//#define TRAVERSE_2WAY_MT
//#define TRAVERSE_2WAY_MT_PACKET
//#define TRAVERSE_2WAY_MT_DIVERGENT
//#define TRAVERSE_OPTIMIZED_ST
#define BUILD_SBVH
#define TRAVERSE_2WAY_ST
#define TRAVERSE_ALT2WAY_ST
#define TRAVERSE_SOA2WAY_ST
#define TRAVERSE_2WAY_MT
#define TRAVERSE_2WAY_MT_PACKET
#define TRAVERSE_2WAY_MT_DIVERGENT
#define TRAVERSE_OPTIMIZED_ST
// #define EMBREE_BUILD // win64-only for now.
// #define EMBREE_TRAVERSE // win64-only for now.

Expand Down Expand Up @@ -164,7 +164,7 @@ int main()
s.seekp( 0 );
s.read( (char*)&verts, 4 );
printf( "Loading triangle data (%i tris).\n", verts );
verts *= 3, triangles = (bvhvec4*)tinybvh::default_aligned_malloc( verts * 16 );
verts *= 3, triangles = (bvhvec4*)tinybvh::malloc64( verts * 16 );
s.read( (char*)triangles, verts * 16 );
#else
// generate a sphere flake scene
Expand All @@ -186,7 +186,7 @@ int main()
// generate primary rays in a cacheline-aligned buffer - and, for data locality:
// organized in 4x4 pixel tiles, 16 samples per pixel, so 256 rays per tile.
int N = 0;
Ray* rays = (Ray*)tinybvh::default_aligned_malloc( SCRWIDTH * SCRHEIGHT * 16 * sizeof( Ray ) );
Ray* rays = (Ray*)tinybvh::malloc64( SCRWIDTH * SCRHEIGHT * 16 * sizeof( Ray ) );
for (int ty = 0; ty < SCRHEIGHT / 4; ty++) for (int tx = 0; tx < SCRWIDTH / 4; tx++)
{
for (int y = 0; y < 4; y++) for (int x = 0; x < 4; x++)
Expand Down
10 changes: 6 additions & 4 deletions tiny_ocl.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,11 @@ struct oclvec3 { float x, y, z; };
// ============================================================================

// OpenCL context
// You only need to explicitly instantiate this if you want to override the
// default memory allocator used by tinyocl::Buffer. In all other cases, the
// first use of tinyocl (creating a buffer, loading a kernel) will take care
// of this for you transparently.
// *Only!* in case you want to override the default memory allocator used by
// tinyocl::Buffer: call OpenCL::CreateInstance with OpenCLContext fields describing
// your allocator and deallocator, before using any tinyocl functionality.
// In all other cases, the first use of tinyocl (creating a buffer, loading a
// kernel) will take care of this for you transparently.
struct OpenCLContext
{
void* (*malloc)(size_t size, void* userdata) = default_aligned_malloc;
Expand All @@ -132,6 +133,7 @@ class OpenCL
OpenCLContext context;
void* AlignedAlloc( size_t size );
void AlignedFree( void* ptr );
static void CreateInstance( OpenCLContext ctx ) { ocl = new OpenCL( ctx ); }
static OpenCL* GetInstance() { return ocl; }
inline static OpenCL* ocl = 0;
};
Expand Down

0 comments on commit 2eb739a

Please sign in to comment.