Skip to content

Commit

Permalink
Update gpurt from commit 34a59e34
Browse files Browse the repository at this point in the history
[Continuations] Fix debug traversal path
Convert HwPipePoint to PipelineStageFlag
Update Trace Source interface
[Continuations] Use packed 32-bit shader IDs
[Continuations] BoxHeuristicMode changes in one _cont_TraceRay call
Consistently use EnqueueDead for dead lanes
[Continuations] Add RayHistory data for debug traversal path
[Continuations] Conditionally pass 32-bit addresses to the middle-end.
[Continuations] Fix debug traversal path compilation
Reduce GL2 flushes in AS-related barriers
Update Pal::CmdDispatch to PAL ver 909
Fix decodeAS with multiple procedural geometry descs
Fix Batch Builder when compiling to SPIRV
Use GetRayId() to handle rayquery in graphics shader
Make Intrinsics more robust
[llvmraytracing] Pass dummy return address to AwaitTraversal
[Continuations] Remove !PASS_32_BIT_CR cases
Simplify dead lanes workflow, cleanup Traversal
Fix for Hitman3 hang on Navi3
[Continuations] Do not set priority in LLPC path
Changed minimum scratch buffer size to 8 bytes
[Continuations] Simplify Vpc32::isValid()
  • Loading branch information
qiaojbao committed Dec 4, 2024
1 parent f2d96f1 commit aa5ba68
Show file tree
Hide file tree
Showing 31 changed files with 1,096 additions and 892 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ option(GPURT_BUILD_CONTINUATION "GpuRt uses continuation traversal" ON)
if (GPURT_BUILD_CONTINUATION)
gpurt_add_compile_definitions(GPURT_BUILD_CONTINUATION=1)
endif()

cmake_dependent_option(GPURT_DEBUG_CONTINUATION_TRAVERSAL "Debug continuation traversal on legacy indirect path" OFF "GPURT_BUILD_CONTINUATION" OFF)
if (GPURT_DEBUG_CONTINUATION_TRAVERSAL)
gpurt_add_compile_definitions(GPURT_DEBUG_CONTINUATION_TRAVERSAL=1)
endif()
#endif

# Disable run time type information
Expand Down
39 changes: 16 additions & 23 deletions backends/pal/gpurtPalBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,6 @@
namespace GpuRt
{

// =====================================================================================================================
// GPURT to PAL enum conversions without undefined behavior.
static Pal::HwPipePoint GpuRtToPalHwPipePoint(
HwPipePoint gpurtHwPipePoint)
{
#define HWPIPEPOINTCASE(x) case static_cast<uint32>(Pal::HwPipePoint::x): return Pal::HwPipePoint::x
switch (static_cast<uint32>(gpurtHwPipePoint))
{
HWPIPEPOINTCASE(HwPipeTop);
HWPIPEPOINTCASE(HwPipePreCs);
HWPIPEPOINTCASE(HwPipeBottom);
default:
PAL_ASSERT_ALWAYS_MSG("Unhandled HwPipePoint value in conversion: %u\n",
static_cast<uint32>(gpurtHwPipePoint));
return Pal::HwPipePoint::HwPipeTop;
}
#undef HWPIPEPOINTCASE
}

// =====================================================================================================================
static Pal::ImmediateDataWidth GpuRtToPalImmediateDataWidth(
ImmediateDataWidth gpurtImmediateDataWidth)
Expand Down Expand Up @@ -132,7 +113,11 @@ void PalBackend::Dispatch(
uint32 z
) const
{
#if PAL_INTERFACE_MAJOR_VERSION >= 909
GetCmdBuffer(cmdBuffer)->CmdDispatch({ x, y, z }, {});
#else
GetCmdBuffer(cmdBuffer)->CmdDispatch({ x, y, z });
#endif
}

// =====================================================================================================================
Expand Down Expand Up @@ -238,6 +223,7 @@ void PalBackend::InsertBarrier(
{
const bool syncDispatch = flags & BarrierFlagSyncDispatch;
const bool syncIndirectArgs = flags & BarrierFlagSyncIndirectArg;
const bool syncPreCpWrite = flags & BarrierFlagSyncPreCpWrite;
const bool syncPostCpWrite = flags & BarrierFlagSyncPostCpWrite;

Pal::ICmdBuffer* pCmdBuffer = GetCmdBuffer(cmdBuffer);
Expand All @@ -247,8 +233,16 @@ void PalBackend::InsertBarrier(

if (syncDispatch || syncIndirectArgs)
{
memoryBarrier.srcStageMask = Pal::PipelineStageCs;
memoryBarrier.srcAccessMask = Pal::CoherShader;
memoryBarrier.srcStageMask |= Pal::PipelineStageCs;
memoryBarrier.srcAccessMask |= Pal::CoherShader;
}

if (syncPreCpWrite)
{
memoryBarrier.srcStageMask |= Pal::PipelineStagePostPrefetch;
memoryBarrier.srcAccessMask |= Pal::CoherShader;
memoryBarrier.dstStageMask |= Pal::PipelineStagePostPrefetch;
memoryBarrier.dstAccessMask |= Pal::CoherCp;
}

if (syncPostCpWrite)
Expand Down Expand Up @@ -359,12 +353,11 @@ void PalBackend::UpdateMemory(
// =====================================================================================================================
void PalBackend::WriteTimestamp(
ClientCmdBufferHandle cmdBuffer,
HwPipePoint hwPipePoint,
const Pal::IGpuMemory& timeStampVidMem,
uint64 offset
) const
{
GetCmdBuffer(cmdBuffer)->CmdWriteTimestamp(GpuRtToPalHwPipePoint(hwPipePoint), timeStampVidMem, offset);
GetCmdBuffer(cmdBuffer)->CmdWriteTimestamp(Pal::PipelineStageBottomOfPipe, timeStampVidMem, offset);
}

// =====================================================================================================================
Expand Down
1 change: 0 additions & 1 deletion backends/pal/gpurtPalBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ class PalBackend : public IBackend

virtual void WriteTimestamp(
ClientCmdBufferHandle cmdBuffer,
HwPipePoint hwPipePoint,
const Pal::IGpuMemory& timeStampVidMem,
uint64 offset
) const override;
Expand Down
2 changes: 1 addition & 1 deletion gpurt/gpurt.h
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ struct DeviceSettings
uint32 numRebraidIterations;
uint32 rebraidQualityHeuristic;

uint32 plocRadius; // PLOC Radius
uint32 plocRadius; // PLOC nearest neighbor search adius
uint32 maxTopDownBuildInstances; // Max instances allowed for top down build
uint32 parallelBuildWavesPerSimd; // Waves per SIMD to launch for parallel build

Expand Down
13 changes: 2 additions & 11 deletions gpurt/gpurtBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,13 @@ struct BufferViewInfo
BufferViewSwizzle swizzle;
};

// =====================================================================================================================
// Copy of Pal::HwPipePoint with values we use.
enum class HwPipePoint : uint32
{
HwPipeTop = 0x0,
HwPipePreCs = 0x1,
HwPipeBottom = 0x7,
};

// =====================================================================================================================
enum BarrierFlags : uint32
{
BarrierFlagSyncDispatch = 0x1, // Stall the following dispatch until all previous dispatch done
BarrierFlagSyncIndirectArg = 0x2, // Prepare previous shader output for indirect argument use
BarrierFlagSyncPostCpWrite = 0x4, // Prepare data set by CP for shader use
BarrierFlagSyncPreCpWrite = 0x4, // Prepare for CP write
BarrierFlagSyncPostCpWrite = 0x8, // Prepare data set by CP for shader use
};

// =====================================================================================================================
Expand Down Expand Up @@ -185,7 +177,6 @@ class IBackend
// Will eventually replaced with a callback or other abstraction to avoid referencing video memory.
virtual void WriteTimestamp(
ClientCmdBufferHandle cmdBuffer,
HwPipePoint hwPipePoint,
const Pal::IGpuMemory& timeStampVidMem,
uint64 offset) const = 0;

Expand Down
4 changes: 2 additions & 2 deletions gpurt/gpurtBuildSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct CompileTimeBuildSettings
uint32 radixSortScanLevel;
uint32 emitCompactSize;
uint32 enableBVHBuildDebugCounters;
uint32 plocRadius;
uint32 nnSearchRadius;
uint32 enablePairCostCheck;
uint32 enableVariableBitsMortonCode;
uint32 rebraidType;
Expand Down Expand Up @@ -112,7 +112,7 @@ struct CompileTimeBuildSettings
#define BUILD_SETTINGS_DATA_RADIX_SORT_SCAN_LEVEL_ID 7
#define BUILD_SETTINGS_DATA_EMIT_COMPACT_SIZE_ID 8
#define BUILD_SETTINGS_DATA_ENABLE_BVH_BUILD_DEBUG_COUNTERS_ID 9
#define BUILD_SETTINGS_DATA_PLOC_RADIUS_ID 10
#define BUILD_SETTINGS_DATA_NN_SEARCH_RADIUS_ID 10
#define BUILD_SETTINGS_DATA_ENABLE_PAIR_COST_CHECK_ID 11
#define BUILD_SETTINGS_DATA_ENABLE_VARIABLE_BITS_MC_ID 12
#define BUILD_SETTINGS_DATA_REBRAID_TYPE_ID 13
Expand Down
17 changes: 0 additions & 17 deletions gpurt/gpurtInlineFuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,6 @@ inline BufferViewFormat GetSingleComponentFormatForFormat(BufferViewFormat forma
}
}

//=====================================================================================================================
// Converts the value of a Pal::HwPipePoint into a GpuRt::HwPipePoint without undefined behavior.
inline HwPipePoint PalToGpuRtHwPipePoint(uint32 palHwPipePoint)
{
#define HWPIPEPOINTCASE(x) case static_cast<uint32>(HwPipePoint::x): return HwPipePoint::x
switch (palHwPipePoint)
{
HWPIPEPOINTCASE(HwPipeTop);
HWPIPEPOINTCASE(HwPipePreCs);
HWPIPEPOINTCASE(HwPipeBottom);
default:
PAL_ASSERT_ALWAYS_MSG("Unhandled HwPipePoint value in conversion: %u\n", palHwPipePoint);
return HwPipePoint::HwPipeTop;
}
#undef HWPIPEPOINTCASE
}

//=====================================================================================================================
// Return the number of components for a buffer view format when it's used as a vertex format.
inline uint8 GetNumComponentsForVertexFormat(VertexFormat format)
Expand Down
19 changes: 8 additions & 11 deletions src/gpurtBvhBatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ void BvhBatcher::BuildAccelerationStructureBatch(
// but otherwise do not participate in the rest of the build.
if (isUpdate)
{
builder.EmitPostBuildInfo();
builder.EmitPostBuildInfoDispatch();
}
else
{
Expand Down Expand Up @@ -146,7 +146,11 @@ void BvhBatcher::BuildAccelerationStructureBatch(
{
RGP_PUSH_MARKER("Process Empty BVH builds");
DispatchInitAccelerationStructure<false>(emptyBuilders);
BuildPhase(emptyBuilders, &BvhBuilder::EmitPostBuildInfo);
if (PhaseEnabled(BuildPhaseFlags::SeparateEmitPostBuildInfoPass))
{
Barrier();
BuildPhase(emptyBuilders, &BvhBuilder::EmitPostBuildInfoDispatch);
}
RGP_POP_MARKER();
}

Expand Down Expand Up @@ -264,17 +268,10 @@ void BvhBatcher::BuildRaytracingAccelerationStructureBatch(
{
RGP_PUSH_MARKER("EmitPostBuildInfo");
Barrier();
BuildPhase("Updates", updaters, &BvhBuilder::EmitPostBuildInfo);
BuildPhase("Builds", builders, &BvhBuilder::EmitPostBuildInfo);

BuildPhase(BuildPhaseFlags::SeparateEmitPostBuildInfoPass, updaters, &BvhBuilder::EmitPostBuildInfoDispatch);
BuildPhase(BuildPhaseFlags::SeparateEmitPostBuildInfoPass, builders, &BvhBuilder::EmitPostBuildInfoDispatch);
RGP_POP_MARKER();
}
else
{
// Execute EmitPostBuildInfo without any RGP markers
BuildPhase(updaters, &BvhBuilder::EmitPostBuildInfo);
BuildPhase(builders, &BvhBuilder::EmitPostBuildInfo);
}

if (PhaseEnabled(BuildPhaseFlags::BuildDumpEvents))
{
Expand Down
Loading

0 comments on commit aa5ba68

Please sign in to comment.