-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIGHLIGHTS: - added "NRILowLatency" extension offering latency reduction via REFLEX (D3D and VK supported) - added "NRIStreamer" extension offering comfortable constant, buffer and texture data streaming functionality - added COPY timestamps support - explicit "waitable" SwapChain (it improves behavior in multi-threaded environment, allowing to "wait for present" in one place and "acquire next image" in another) - bug fixes and improvements BREAKING CHANGES: - "SwapChainPresent" => "QueuePresent" to emphasize that it's a queue command - "QueueWait" and "QueueSignal" merged (again!) into "QueueSubmit": it maps better on "VK_KHR_synchronization2" and needed for "low latency" extension DETAILS: - Core: "QueueWait" and "QueueSignal" merged (again!) into "QueueSubmit" - Core: exposed "QueryType::TIMESTAMP_COPY_QUEUE" needed for timestamps issued in COPY queue - Core: slightly reworked "DeviceDesc", explicitly exposed NRI extension support status - SwapChain: added "SwapChainDesc::waitable" swapchain support for all APIs - SwapChain: explicitly exposed "WaitForPresent", which implicitly improves multi-threading behavior allowing to "wait" in one place and "acquire" in another - SwapChain: added "SwapChainDesc::allowLowLatency" - SwapChain: added "SwapChainDesc::queuedFrameNum" (use 0 for auto mode) - Wrappers: minor tweaks - D3D12: hooked up NVAPI and AMDAGS - D3D12: updated Agility SDK - VK: all CORE functions get queried by CORE or KHR names to improve compatibility with VK 1.2 - VK: VK_KHR_swapchain is an optional extension now - VK: command queues gathering improved by introducing a score system - VK: removed EXT suffixes from VK entities, which have been promoted to VK 1.2 - Validation: fixed a rare SwapChain related memory leak - Validation: hooked up Streamer - bug fixes and improvements - improved .clang-format
- Loading branch information
Showing
183 changed files
with
3,244 additions
and
1,610 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// © 2024 NVIDIA Corporation | ||
|
||
#pragma once | ||
|
||
NRI_NAMESPACE_BEGIN | ||
|
||
NRI_FORWARD_STRUCT(SwapChain); | ||
NRI_FORWARD_STRUCT(CommandQueue); | ||
|
||
// us = microseconds | ||
|
||
NRI_ENUM | ||
( | ||
LatencyMarker, uint8_t, | ||
|
||
// Should be called: | ||
SIMULATION_START = 0, // at the start of the simulation execution each frame, but after the call to "LatencySleep" | ||
SIMULATION_END = 1, // at the end of the simulation execution each frame | ||
RENDER_SUBMIT_START = 2, // at the beginning of the render submission execution each frame (must not span into asynchronous rendering) | ||
RENDER_SUBMIT_END = 3, // at the end of the render submission execution each frame | ||
INPUT_SAMPLE = 6, // just before the application gathers input data, but between SIMULATION_START and SIMULATION_END (yes, 6!) | ||
|
||
MAX_NUM | ||
); | ||
|
||
NRI_STRUCT(LatencySleepMode) | ||
{ | ||
uint32_t minIntervalUs; // minimum allowed frame interval (0 - no frame rate limit) | ||
bool lowLatencyMode; // low latency mode enablement | ||
bool lowLatencyBoost; // hint to increase performance to provide additional latency savings at a cost of increased power consumption | ||
}; | ||
|
||
NRI_STRUCT(LatencyReport) | ||
{ | ||
// The time stamp written: | ||
uint64_t inputSampleTimeUs; // when "SetLatencyMarker(INPUT_SAMPLE)" is called | ||
uint64_t simulationStartTimeUs; // when "SetLatencyMarker(SIMULATION_START)" is called | ||
uint64_t simulationEndTimeUs; // when "SetLatencyMarker(SIMULATION_END)" is called | ||
uint64_t renderSubmitStartTimeUs; // when "SetLatencyMarker(RENDER_SUBMIT_START)" is called | ||
uint64_t renderSubmitEndTimeUs; // when "SetLatencyMarker(RENDER_SUBMIT_END)" is called | ||
uint64_t presentStartTimeUs; // right before "Present" | ||
uint64_t presentEndTimeUs; // right after "Present" | ||
uint64_t driverStartTimeUs; // when the first "QueueSubmitTrackable" is called | ||
uint64_t driverEndTimeUs; // when the final "QueueSubmitTrackable" hands off from the driver | ||
uint64_t osRenderQueueStartTimeUs; | ||
uint64_t osRenderQueueEndTimeUs; | ||
uint64_t gpuRenderStartTimeUs; // when the first submission reaches the GPU | ||
uint64_t gpuRenderEndTimeUs; // when the final submission finishes on the GPU | ||
}; | ||
|
||
// Multi-swapchain is supported only by VK | ||
NRI_STRUCT(LowLatencyInterface) | ||
{ | ||
NRI_NAME(Result) (NRI_CALL *SetLatencySleepMode)(NRI_NAME_REF(SwapChain) swapChain, const NRI_NAME_REF(LatencySleepMode) latencySleepMode); | ||
NRI_NAME(Result) (NRI_CALL *SetLatencyMarker)(NRI_NAME_REF(SwapChain) swapChain, NRI_NAME(LatencyMarker) latencyMarker); | ||
NRI_NAME(Result) (NRI_CALL *LatencySleep)(NRI_NAME_REF(SwapChain) swapChain); // call once before INPUT_SAMPLE | ||
NRI_NAME(Result) (NRI_CALL *GetLatencyReport)(const NRI_NAME_REF(SwapChain) swapChain, NRI_NAME_REF(LatencyReport) latencyReport); | ||
|
||
// This function must be used in "low latency" mode instead of "QueueSubmit" | ||
void (NRI_CALL *QueueSubmitTrackable)(NRI_NAME_REF(CommandQueue) commandQueue, const NRI_NAME_REF(QueueSubmitDesc) queueSubmitDesc, const NRI_NAME_REF(SwapChain) swapChain); | ||
}; | ||
|
||
NRI_NAMESPACE_END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// © 2024 NVIDIA Corporation | ||
|
||
#pragma once | ||
|
||
NRI_NAMESPACE_BEGIN | ||
|
||
NRI_FORWARD_STRUCT(Streamer); | ||
|
||
NRI_STRUCT(StreamerDesc) | ||
{ | ||
// Statically allocated ring-buffer for dynamic constants (optional) | ||
NRI_NAME(MemoryLocation) constantBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD | ||
uint64_t constantBufferSize; | ||
|
||
// Dynamically (re)allocated ring-buffer for copying and rendering (mandatory) | ||
NRI_NAME(MemoryLocation) dynamicBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD | ||
NRI_NAME(BufferUsageBits) dynamicBufferUsageBits; | ||
uint32_t frameInFlightNum; | ||
}; | ||
|
||
NRI_STRUCT(BufferUpdateRequestDesc) | ||
{ | ||
// Data to upload | ||
const void* data; // pointer must be valid until "CopyStreamerUpdateRequests" call | ||
uint64_t dataSize; | ||
|
||
// Destination (optional, ignored for constants) | ||
NRI_NAME(Buffer)* dstBuffer; | ||
uint64_t dstBufferOffset; | ||
}; | ||
|
||
NRI_STRUCT(TextureUpdateRequestDesc) | ||
{ | ||
// Data to upload | ||
const void* data; // pointer must be valid until "CopyStreamerUpdateRequests" call | ||
uint32_t dataRowPitch; | ||
uint32_t dataSlicePitch; | ||
|
||
// Destination (mandatory) | ||
NRI_NAME(Texture)* dstTexture; | ||
NRI_NAME(TextureRegionDesc) dstRegionDesc; | ||
}; | ||
|
||
NRI_STRUCT(StreamerInterface) | ||
{ | ||
NRI_NAME(Result) (NRI_CALL *CreateStreamer)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(StreamerDesc) streamerDesc, NRI_NAME_REF(Streamer*) streamer); | ||
void (NRI_CALL *DestroyStreamer)(NRI_NAME_REF(Streamer) streamer); | ||
|
||
// Get internal buffers | ||
NRI_NAME(Buffer*) (NRI_CALL *GetStreamerConstantBuffer)(NRI_NAME_REF(Streamer) streamer); // Never changes | ||
NRI_NAME(Buffer*) (NRI_CALL *GetStreamerDynamicBuffer)(NRI_NAME_REF(Streamer) streamer); // Valid only after "CopyStreamerUpdateRequests" | ||
|
||
// Add an update request. Return the offset in the ring buffer and don't invoke any work | ||
uint64_t (NRI_CALL *AddStreamerBufferUpdateRequest)(NRI_NAME_REF(Streamer) streamer, const NRI_NAME_REF(BufferUpdateRequestDesc) bufferUpdateRequestDesc); | ||
uint64_t (NRI_CALL *AddStreamerTextureUpdateRequest)(NRI_NAME_REF(Streamer) streamer, const NRI_NAME_REF(TextureUpdateRequestDesc) textureUpdateRequestDesc); | ||
|
||
// (HOST) Copy data and get the offset in the dedicated ring buffer (for dynamic constant buffers) | ||
uint32_t (NRI_CALL *UpdateStreamerConstantBuffer)(NRI_NAME_REF(Streamer) streamer, const void* data, uint32_t dataSize); | ||
|
||
// (HOST) Copy gathered requests to the internal buffer, potentially a new one if the capacity exceeded. Must be called once per frame | ||
NRI_NAME(Result) (NRI_CALL *CopyStreamerUpdateRequests)(NRI_NAME_REF(Streamer) streamer); | ||
|
||
// (DEVICE) Copy data to destinations (if any), barriers are externally controlled. Must be called after "CopyStreamerUpdateRequests" | ||
void (NRI_CALL *CmdUploadStreamerUpdateRequests)(NRI_NAME_REF(CommandBuffer) commandBuffer, NRI_NAME_REF(Streamer) streamer); | ||
}; | ||
|
||
NRI_NAMESPACE_END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.