Skip to content

Commit

Permalink
Update gpurt from commit ce49ca42
Browse files Browse the repository at this point in the history
Support pull UserMarker into RRA
[Continuations] Remove _cont_SetupRayGen
Report 0 update scratch size for non-updatable AS
Add HLSL version of static_assert
BatchedBuilder: Remove TLAS serialization workaround
Add ganged queue check
Separate CPS implementation per RtIp level
BatchedBuilder: Support enableInsertBarriersInBuildAS setting
Disable triangle pairing of inactive triangles
BatchedBuilder: Add debug option to check for buffer overlaps
Use packedInstanceContribution for RT_20
Shader Cleanliness Enforcement Script
Enable merged EncodeUpdate for VK
Update RayQuery intrinsic in GPURT library
Unifying dxc options
Unify RootSignature
Canonicalize paths used in validation comparison
Enable dxc optimizations for VK/SPV
Remove glslang compilation path
Fix HLSL root signature mapping for compile time CBV
Fix refit issues with fast LBVH enabled
[Continuations] Restrict RtIp 1_1 and 2_0 Traversal variant compilation
Unify triangle data fetching code, add indirect builds support to trivial builder and STGB, and fixes
[Continuations] Optimize opaque RtIp2_0 Traversal path
[Continuations] Add CpsCandidatePrimitiveMode option
[Continuations] Implement deferred anyhit shading mode
BatchedBuilder: minor fixups
Fix issues with cputraversal and update scratch size
  • Loading branch information
qiaojbao committed Aug 9, 2024
1 parent 6ca06bb commit c5b1079
Show file tree
Hide file tree
Showing 84 changed files with 2,350 additions and 3,028 deletions.
9 changes: 8 additions & 1 deletion backends/pal/gpurtPalBridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,20 @@ size_t GPURT_API_ENTRY GetDeviceSize()

// =====================================================================================================================
PipelineShaderCode GPURT_API_ENTRY GetShaderLibraryCode(
ShaderLibraryFeatureFlags flags)
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION >= 48
const Pal::RayTracingIpLevel rayTracingIpLevel,
#endif
ShaderLibraryFeatureFlags flags)
{
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 40
GPURT_EXPORT_UNMANGLED_SYMBOL_MSVC
#endif

#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION >= 48
return Internal::GetShaderLibraryCode(rayTracingIpLevel, flags);
#else
return Internal::GetShaderLibraryCode(flags);
#endif
}

// =====================================================================================================================
Expand Down
10 changes: 10 additions & 0 deletions cmake/GpuRtGenerateShaders.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ if(GPURT_CLIENT_API STREQUAL "VULKAN")
${gpurtDxcCompiler}
${gpurtSpirvRemap}

COMMAND Python3::Interpreter "${gpurtCompileScript}"
--outputDir "${gpurtOutputDir}"
--validateShadersClean
${COMPILER_ARGUMENT}
--defines "\"${gpurtDefines}\""
--includePaths "\"${gpurtIncludeDirectories}\""
"${gpurtDxilBvhShader}"
"${gpurtShadersSourceDir}"
"${gpurtSscStrict}"

COMMAND Python3::Interpreter "${gpurtCompileScript}"
--vulkan
"${SPIRV_FLAG}"
Expand Down
22 changes: 21 additions & 1 deletion gpurt/gpurt.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,15 @@ enum class AccelStructBuilderType : uint32
static_assert(uint32(AccelStructBuilderType::Gpu) == 0, "Enums encoded in the acceleration structure must not change.");
static_assert(uint32(AccelStructBuilderType::Cpu) == 1, "Enums encoded in the acceleration structure must not change.");

// Modes for when to perform a rebuild instead of an update
enum ForceRebuildForUpdatesMode : uint32
{
None = 0x0, // Disable build flag overrides
TopLevel = 0x1, // Override flags for top level acceleration structures only
BottomLevel = 0x2, // Override flags for bottom level acceleration structures only
All = 0x3, // Override flags for all acceleration structure updates
};

// Modes for which interior box nodes in BLAS are written as fp16
enum class Fp16BoxNodesInBlasMode : uint32
{
Expand Down Expand Up @@ -732,6 +741,8 @@ struct DeviceSettings
Fp16BoxNodesInBlasMode fp16BoxNodesInBlasMode; // Mode for which interior nodes in BLAS are FP16
float fp16BoxModeMixedSaThresh; // For fp16 mode "mixed", surface area threshold

ForceRebuildForUpdatesMode forceRebuildForUpdates; // When to perform a rebuild instead of an update

Pal::RayTracingIpLevel emulatedRtIpLevel; // Client request RTIP level, used to override IP level related GPURT settings.

struct
Expand Down Expand Up @@ -767,6 +778,7 @@ struct DeviceSettings
uint32 enableFastLBVH : 1; // Enable the Fast LBVH path

uint32 enableRemapScratchBuffer : 1; // Enable remapping bvh2 data from ScratchBuffer to ResultBuffer
uint32 checkBufferOverlapsInBatch : 1;
};

uint64 accelerationStructureUUID; // Acceleration Structure UUID
Expand Down Expand Up @@ -1019,6 +1031,8 @@ struct RtDispatchInfo
ShaderTable missShaderTable;
ShaderTable hitGroupTable;
ShaderTable callableShaderTable;

uint64 userMarkerContext;
};

#if GPURT_DEVELOPER
Expand Down Expand Up @@ -1389,7 +1403,10 @@ size_t GPURT_API_ENTRY GetDeviceSize();
// @return Shader code for the shader library
//
PipelineShaderCode GPURT_API_ENTRY GetShaderLibraryCode(
ShaderLibraryFeatureFlags flags);
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION >= 48
const Pal::RayTracingIpLevel rayTracingIpLevel,
#endif
ShaderLibraryFeatureFlags flags);

// =====================================================================================================================
// Returns GPURT shader library function table for input ray tracing IP level.
Expand Down Expand Up @@ -1601,6 +1618,9 @@ class IDevice
virtual const ClientCallbacks& GetClientCallbacks() const = 0;
virtual const DeviceInitInfo& GetInitInfo() const = 0;

// Check if a build is a good candidate for ACE offload (typically barrier-free cases)
virtual bool ShouldUseGangedAceForBuild(const AccelStructBuildInputs& inputs) const = 0;

protected:

/// Client must create objects by explicitly calling CreateDevice method
Expand Down
14 changes: 9 additions & 5 deletions gpurt/gpurtAccelStruct.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,11 @@
#define GPURT_ACCEL_STRUCT_MINOR_VERSION 3
#define GPURT_ACCEL_STRUCT_VERSION ((GPURT_ACCEL_STRUCT_MAJOR_VERSION << 16) | GPURT_ACCEL_STRUCT_MINOR_VERSION)

#include "../src/shared/assert.h"

#ifdef __cplusplus
namespace GpuRt
{

#define GPURT_STATIC_ASSERT(condition, message) static_assert(condition, message)
#else
#define GPURT_STATIC_ASSERT(condition, message)
#endif

typedef uint32_t uint32;
Expand All @@ -76,10 +74,12 @@ struct AccelStructDataOffsets
#define ACCEL_STRUCT_OFFSETS_PRIM_NODE_PTRS_OFFSET 12
#define ACCEL_STRUCT_OFFSETS_SIZE 16

#ifdef __cplusplus
GPURT_STATIC_ASSERT(ACCEL_STRUCT_OFFSETS_INTERNAL_NODES_OFFSET == offsetof(AccelStructDataOffsets, internalNodes), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_OFFSETS_LEAF_NODES_OFFSET == offsetof(AccelStructDataOffsets, leafNodes), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_OFFSETS_GEOMETRY_INFO_OFFSET == offsetof(AccelStructDataOffsets, geometryInfo), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_OFFSETS_PRIM_NODE_PTRS_OFFSET == offsetof(AccelStructDataOffsets, primNodePtrs), "");
#endif
GPURT_STATIC_ASSERT(sizeof(AccelStructDataOffsets) == 16,
"AccelStructDataOffsets size cannot change because it is embedded in AccelStructHeader.");

Expand All @@ -105,10 +105,12 @@ struct AccelStructMetadataHeader
#define ACCEL_STRUCT_METADATA_HEADER_SIZE 84

GPURT_STATIC_ASSERT(ACCEL_STRUCT_METADATA_HEADER_SIZE == sizeof(AccelStructMetadataHeader), "Acceleration structure header mismatch");
#ifdef __cplusplus
GPURT_STATIC_ASSERT(ACCEL_STRUCT_METADATA_VA_LO_OFFSET == offsetof(AccelStructMetadataHeader, addressLo), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_METADATA_VA_HI_OFFSET == offsetof(AccelStructMetadataHeader, addressHi), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_METADATA_SIZE_OFFSET == offsetof(AccelStructMetadataHeader, sizeInBytes), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_METADATA_TASK_COUNTER_OFFSET == offsetof(AccelStructMetadataHeader, taskCounter), "");
#endif

#ifdef __cplusplus
// =====================================================================================================================
Expand Down Expand Up @@ -287,13 +289,14 @@ struct AccelStructHeader

GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_SIZE == sizeof(AccelStructHeader),
"Acceleration structure header mismatch");
#ifdef __cplusplus
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_INFO_OFFSET == offsetof(AccelStructHeader, info), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_METADATA_SIZE_OFFSET == offsetof(AccelStructHeader, metadataSizeInBytes), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_BYTE_SIZE_OFFSET == offsetof(AccelStructHeader, sizeInBytes), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_NUM_PRIMS_OFFSET == offsetof(AccelStructHeader, numPrimitives), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_NUM_ACTIVE_PRIMS_OFFSET == offsetof(AccelStructHeader, numActivePrims), "");

GPURT_STATIC_ASSERT(ACCEL_STRUCT_RESERVERED0_OFFSET == offsetof(AccelStructHeader, reserved0), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_RESERVERED0_OFFSET == offsetof(AccelStructHeader, reserved0), "");

GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_NUM_DESCS_OFFSET == offsetof(AccelStructHeader, numDescs), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_GEOMETRY_TYPE_OFFSET == offsetof(AccelStructHeader, geometryType), "");
Expand All @@ -309,6 +312,7 @@ GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_FP32_ROOT_BOX_OFFSET == offset
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_INFO_2_OFFSET == offsetof(AccelStructHeader, info2), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_PACKED_FLAGS_OFFSET == offsetof(AccelStructHeader, packedFlags), "");
GPURT_STATIC_ASSERT(ACCEL_STRUCT_HEADER_COMPACTED_BYTE_SIZE_OFFSET == offsetof(AccelStructHeader, compactedSizeInBytes), "");
#endif

#ifdef __cplusplus
// =====================================================================================================================
Expand Down
1 change: 1 addition & 0 deletions gpurt/gpurtBuildSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ struct CompileTimeBuildSettings
uint32 rebraidQualityHeuristic;
uint32 unused11;
uint32 unused12;
uint32 unused13;
};

#define BUILD_SETTINGS_DATA_TOP_LEVEL_BUILD_ID 0
Expand Down
1 change: 1 addition & 0 deletions gpurt/gpurtCounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ enum class RayHistoryMetadataKind : uint32
CounterInfo = 1,
Unused = 2,
TraversalFlags = 3,
UserMarkerInfo = 4
};

// ====================================================================================================================
Expand Down
24 changes: 16 additions & 8 deletions gpurt/gpurtDispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ typedef uint64_t uint64;
#define constexpr static const
#endif

#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION >= 47
constexpr uint32 MaxBufferSrdSize = 8;
#else
constexpr uint32 MaxBufferSrdSize = 4;
#endif

// Dispatch rays arguments top-level descriptor table (GPU structure)
struct DispatchRaysTopLevelData
{
uint64 dispatchRaysConstGpuVa; // DispatchRays info constant buffer GPU VA
uint32 internalUavBufferSrd[4]; // Internal UAV shader resource descriptor
uint32 accelStructTrackerSrd[4]; // Structured buffer SRD pointing to the accel struct tracker
uint64 dispatchRaysConstGpuVa; // DispatchRays info constant buffer GPU VA
uint32 internalUavBufferSrd[MaxBufferSrdSize]; // Internal UAV shader resource descriptor
uint32 accelStructTrackerSrd[MaxBufferSrdSize]; // Structured buffer SRD pointing to the accel struct tracker
};

// Dispatch rays constant buffer data (GPU structure). Note, using unaligned uint64_t in HLSL constant buffers requires
Expand Down Expand Up @@ -108,7 +114,6 @@ constexpr uint32 DispatchRaysConstantsDw = sizeof(DispatchRaysConstants) / sizeo
#endif

constexpr uint32 MaxSupportedIndirectCounters = 8;
constexpr uint32 MaxBufferSrdSize = 4;

// Resource bindings required for InitExecuteIndirect
struct InitExecuteIndirectUserData
Expand Down Expand Up @@ -153,9 +158,9 @@ struct InitExecuteIndirectConstants
uint32 counterMode; // Counter mode
uint32 counterRayIdRangeBegin; // Counter ray ID range begin
uint32 counterRayIdRangeEnd; // Counter ray ID range end
uint32 cpsBackendStackSize; // Scratch memory used by a compiler backend, start at offset 0
uint32 padding0; // Padding for 16-byte alignment
uint32 padding1; // Padding for 16-byte alignment
uint32 padding2; // Padding for 16-byte alignment

#if __cplusplus
// Internal counter buffer SRDs
Expand All @@ -164,16 +169,19 @@ struct InitExecuteIndirectConstants
// Internal acceleration structure tracker buffer SRD.
uint32 accelStructTrackerSrd[MaxBufferSrdSize];
#else
uint4 internalUavSrd[MaxSupportedIndirectCounters];
uint4 accelStructTrackerSrd;
uint4 internalUavSrd[MaxSupportedIndirectCounters][MaxBufferSrdSize / 4];
uint4 accelStructTrackerSrd[MaxBufferSrdSize / 4];
#endif

};

constexpr uint32 InitExecuteIndirectConstantsDw = sizeof(InitExecuteIndirectConstants) / sizeof(uint32);

#if __cplusplus
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION >= 47
static_assert((MaxBufferSrdSize == 8), "Buffer SRD size changed, affected shaders and constants need update");
#else
static_assert((MaxBufferSrdSize == 4), "Buffer SRD size changed, affected shaders and constants need update");
#endif
static_assert((sizeof(InitExecuteIndirectConstants) % sizeof(uint32)) == 0,
"InitExecuteIndirectConstants is not dword-aligned");
}
Expand Down
2 changes: 1 addition & 1 deletion gpurt/gpurtLib.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace GpuRt
// update their definition of GPURT_CLIENT_INTERFACE_MAJOR_VERSION to indicate that they have made the required changes
// to support a new version. When the client version is updated, the old interface will be compiled out and only the
// new one will remain.
#define GPURT_INTERFACE_MAJOR_VERSION 46
#define GPURT_INTERFACE_MAJOR_VERSION 49

#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 44
// Minor interface version. This number is incrememnted when a compatible interface change is made. Compatible changes
Expand Down
Loading

0 comments on commit c5b1079

Please sign in to comment.