Skip to content
This repository has been archived by the owner on Dec 25, 2023. It is now read-only.

Commit

Permalink
Fix thread priority logic bugs. Evictions no longer unmap, which gene…
Browse files Browse the repository at this point in the history
…rally improves perf by halving the calls to UpdateTileMappings. Evicts more aggressively.
  • Loading branch information
allenhux-intel committed Nov 18, 2022
1 parent 5c73edf commit 461a8c3
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 107 deletions.
1 change: 1 addition & 0 deletions TileUpdateManager/DataUploader.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ namespace Streaming
float GetGpuStreamingTime() const { return m_gpuTimer.GetTimes()[0].first; }

UINT GetTotalNumUploads() const { return m_numTotalUploads; }
void AddEvictions(UINT in_numEvictions) { m_numTotalEvictions += in_numEvictions; }
UINT GetTotalNumEvictions() const { return m_numTotalEvictions; }
float GetApproximateTileCopyLatency() const { return m_pFenceThreadTimer->GetSecondsFromDelta(m_totalTileCopyLatency); } // sum of per-tile latencies so far

Expand Down
20 changes: 8 additions & 12 deletions TileUpdateManager/SamplerFeedbackStreaming.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ struct StreamingResource
//=============================================================================
struct TileUpdateManagerDesc
{
// the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready
ID3D12CommandQueue* m_pDirectCommandQueue{ nullptr };

// maximum number of in-flight batches
UINT m_maxNumCopyBatches{ 128 };

Expand All @@ -130,11 +133,11 @@ struct TileUpdateManagerDesc

// applied to all internal threads: submit, fenceMonitor, processFeedback, updateResidency
// on hybrid systems: performance prefers P cores, efficiency prefers E cores, normal is OS default
enum class ThreadPriority
enum class ThreadPriority : int
{
Prefer_Normal,
Prefer_Performance,
Prefer_Efficiency
Prefer_Normal = 0,
Prefer_Performance = 1,
Prefer_Efficiency = -1
};
ThreadPriority m_threadPriority{ ThreadPriority::Prefer_Normal };

Expand All @@ -147,14 +150,7 @@ struct TileUpdateManagerDesc
//=============================================================================
struct TileUpdateManager
{
static TileUpdateManager* Create(
// query resource for tiling properties. use its device to create internal resources
ID3D12Device8* in_pDevice,

// the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready
ID3D12CommandQueue* in_pDirectCommandQueue,

const TileUpdateManagerDesc& in_desc);
static TileUpdateManager* Create(const TileUpdateManagerDesc& in_desc);

virtual void Destroy() = 0;

Expand Down
2 changes: 1 addition & 1 deletion TileUpdateManager/Streaming.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ namespace Streaming

inline void SetThreadPriority(std::thread& in_thread, int in_priority)
{
if (in_priority)
if (in_priority) // 0 = default (do nothing). -1 = efficiency. otherwise, performance.
{
THREAD_POWER_THROTTLING_STATE throttlingState{ THREAD_POWER_THROTTLING_CURRENT_VERSION, THREAD_POWER_THROTTLING_EXECUTION_SPEED, 0 };
if (-1 == in_priority) { throttlingState.StateMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED; } // speed, speed = prefer e cores
Expand Down
45 changes: 22 additions & 23 deletions TileUpdateManager/StreamingResourceBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,41 +497,29 @@ UINT Streaming::StreamingResourceBase::QueueTiles()
{
UINT uploadsRequested = 0;

UINT numEvictions = (UINT)m_pendingEvictions.GetReadyToEvict().size();
UINT numLoads = (UINT)m_pendingTileLoads.size();

// pushes as many tiles as it can into a single UpdateList
if ((numLoads && m_pHeap->GetAllocator().GetAvailable()) || numEvictions)
if (m_pendingTileLoads.size() && m_pHeap->GetAllocator().GetAvailable())
{
UpdateList scratchUL;
if (numEvictions)
{
QueuePendingTileEvictions(&scratchUL);
numEvictions = (UINT)m_pendingEvictions.GetReadyToEvict().size();
}

// queue as many new tiles as possible
if (numLoads && m_pHeap->GetAllocator().GetAvailable())
{
QueuePendingTileLoads(&scratchUL);
uploadsRequested = (UINT)scratchUL.m_coords.size(); // number of uploads in UpdateList
numLoads = (UINT)m_pendingTileLoads.size();
}
QueuePendingTileLoads(&scratchUL);
uploadsRequested = (UINT)scratchUL.m_coords.size(); // number of uploads in UpdateList

// only allocate an UpdateList if we have updates
if (scratchUL.m_coords.size() || scratchUL.m_evictCoords.size())
if (scratchUL.m_coords.size())
{
// calling function checked for availability, so UL allocation must succeed
UpdateList* pUpdateList = m_pTileUpdateManager->AllocateUpdateList(this);
ASSERT(pUpdateList);

pUpdateList->m_coords.swap(scratchUL.m_coords);
pUpdateList->m_heapIndices.swap(scratchUL.m_heapIndices);
pUpdateList->m_evictCoords.swap(scratchUL.m_evictCoords);

m_pTileUpdateManager->SubmitUpdateList(*pUpdateList);
}
}

return uploadsRequested;
}

Expand Down Expand Up @@ -575,14 +563,14 @@ Note that the multi-frame delay for evictions prevents allocation of an index th
// note there are only tiles to evict after processing feedback, but it's possible
// there was no UpdateList available at the time, so they haven't been evicted yet.
//-----------------------------------------------------------------------------
void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::UpdateList* out_pUpdateList)
UINT Streaming::StreamingResourceBase::QueuePendingTileEvictions()
{
ASSERT(out_pUpdateList);
ASSERT(m_pendingEvictions.GetReadyToEvict().size());
if (0 == m_pendingEvictions.GetReadyToEvict().size()) { return 0; }

auto& pendingEvictions = m_pendingEvictions.GetReadyToEvict();

UINT numDelayed = 0;
UINT numEvictions = 0;
for (auto& coord : pendingEvictions)
{
// if the heap index is valid, but the tile is not resident, there's a /pending load/
Expand All @@ -597,11 +585,17 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda
auto residency = m_tileMappingState.GetResidency(coord);
if (TileMappingState::Residency::Resident == residency)
{
m_tileMappingState.SetResidency(coord, TileMappingState::Residency::Evicting);
// NOTE: effectively removed "Evicting." Now remove tiles from data structure, not from memory mapping.
// result is improved perf from fewer UpdateTileMappings() calls.
// existing artifacts (cracks when sampler crosses tile boundaries) are "no worse"
// to put it back: set residency to evicting and add tiles to updatelist for eviction

m_tileMappingState.SetResidency(coord, TileMappingState::Residency::NotResident);
UINT& heapIndex = m_tileMappingState.GetHeapIndex(coord);
m_pHeap->GetAllocator().Free(heapIndex);
heapIndex = TileMappingState::InvalidIndex;
out_pUpdateList->m_evictCoords.push_back(coord);

numEvictions++;
}
// valid index but not resident means there is a pending load, do not evict
// try again later
Expand All @@ -615,8 +609,14 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda
// else: refcount positive or eviction already in progress? rescue this eviction (by not adding to pending evictions)
}

if (numEvictions)
{
SetResidencyChanged();
}

// replace the ready evictions with just the delayed evictions.
pendingEvictions.resize(numDelayed);
return numEvictions;
}

//-----------------------------------------------------------------------------
Expand All @@ -640,7 +640,6 @@ void Streaming::StreamingResourceBase::QueuePendingTileLoads(Streaming::UpdateLi

// if the heap index is not valid, but the tile is resident, there's a /pending eviction/
// a pending eviction might be streaming
// it will not be in the updatelist, because eviction happens before load, and we would have seen refcount == 0

// NOTE! assumes refcount is non-zero
// ProcessFeedback() clears all pending loads with refcount == 0
Expand Down
8 changes: 4 additions & 4 deletions TileUpdateManager/StreamingResourceBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,13 @@ namespace Streaming
// if a feedback buffer is ready, process it to generate lists of tiles to load/evict
void ProcessFeedback(UINT64 in_frameFenceCompletedValue);

// try to load/evict tiles. only queue evictions once per frame.
// try to load/evict tiles.
// returns # tiles requested for upload
UINT QueueTiles();

// returns # tiles evicted
UINT QueuePendingTileEvictions();

bool IsStale()
{
return (m_pendingTileLoads.size() || m_pendingEvictions.GetReadyToEvict().size());
Expand Down Expand Up @@ -322,9 +325,6 @@ namespace Streaming
// DecRef may decline
void DecTileRef(UINT in_x, UINT in_y, UINT in_s);

// only push evictions to DataUploader once per rendered frame (i.e. "on the next frame")
void QueuePendingTileEvictions(Streaming::UpdateList* out_pUpdateList);

void QueuePendingTileLoads(Streaming::UpdateList* out_pUpdateList); // returns # tiles queued

void LoadPackedMips();
Expand Down
13 changes: 4 additions & 9 deletions TileUpdateManager/TileUpdateManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,11 @@
//--------------------------------------------
// instantiate streaming library
//--------------------------------------------
TileUpdateManager* TileUpdateManager::Create(
// query resource for tiling properties. use its device to create internal resources
ID3D12Device8* in_pDevice,

// the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready
ID3D12CommandQueue* in_pDirectCommandQueue,

const TileUpdateManagerDesc& in_desc)
TileUpdateManager* TileUpdateManager::Create(const TileUpdateManagerDesc& in_desc)
{
return new Streaming::TileUpdateManagerBase(in_pDevice, in_pDirectCommandQueue, in_desc);
Streaming::ComPtr<ID3D12Device8> device;
in_desc.m_pDirectCommandQueue->GetDevice(IID_PPV_ARGS(&device));
return new Streaming::TileUpdateManagerBase(in_desc, device.Get());
}

void Streaming::TileUpdateManagerBase::Destroy()
Expand Down
60 changes: 20 additions & 40 deletions TileUpdateManager/TileUpdateManagerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,19 @@
//=============================================================================
// constructor for streaming library base class
//=============================================================================
Streaming::TileUpdateManagerBase::TileUpdateManagerBase(
ID3D12Device8* in_pDevice,
ID3D12CommandQueue* in_pDirectCommandQueue, // application's Direct command queue. Used to know when new feedback is ready.
const TileUpdateManagerDesc& in_desc) :
Streaming::TileUpdateManagerBase::TileUpdateManagerBase(const TileUpdateManagerDesc& in_desc, ID3D12Device8* in_pDevice) :// required for constructor
m_numSwapBuffers(in_desc.m_swapChainBufferCount)
, m_gpuTimerResolve(in_pDevice, in_desc.m_swapChainBufferCount, D3D12GpuTimer::TimerType::Direct)
, m_renderFrameIndex(0)
, m_directCommandQueue(in_pDirectCommandQueue)
, m_directCommandQueue(in_desc.m_pDirectCommandQueue)
, m_device(in_pDevice)
, m_commandLists((UINT)CommandListName::Num)
, m_maxTileMappingUpdatesPerApiCall(in_desc.m_maxTileMappingUpdatesPerApiCall)
, m_addAliasingBarriers(in_desc.m_addAliasingBarriers)
, m_addAliasingBarriers(in_desc.m_addAliasingBarriers)
, m_minNumUploadRequests(in_desc.m_minNumUploadRequests)
, m_threadPriority((int)in_desc.m_threadPriority)
{
ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == in_pDirectCommandQueue->GetDesc().Type);

switch (in_desc.m_threadPriority)
{
case TileUpdateManagerDesc::ThreadPriority::Prefer_Performance: m_threadPriority = 1; break;
case TileUpdateManagerDesc::ThreadPriority::Prefer_Efficiency: m_threadPriority = 1; break;
default:
m_threadPriority = 0;
}
ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == m_directCommandQueue->GetDesc().Type);

ThrowIfFailed(in_pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_frameFence)));
m_frameFence->SetName(L"Streaming::TileUpdateManagerBase::m_frameFence");
Expand Down Expand Up @@ -212,42 +202,32 @@ void Streaming::TileUpdateManagerBase::ProcessFeedbackThread()

// push uploads and evictions for stale resources
{
UINT numEvictions = 0;
UINT newStaleSize = 0; // track number of stale resources, then resize the array to the updated number
UINT staleIndex = 0;

// loop over stale resources
for (; staleIndex < staleResources.size(); staleIndex++)
for (auto resourceIndex : staleResources)
{
if (m_pDataUploader->GetNumUpdateListsAvailable() && m_threadsRunning)
{
UINT resourceIndex = staleResources[staleIndex];
uploadsRequested += m_streamingResources[resourceIndex]->QueueTiles();
}

if (m_streamingResources[resourceIndex]->IsStale()) // still have work to do?
{
// keep stale resource in compacted array while retaining oldest-first ordering
staleResources[newStaleSize] = resourceIndex;
newStaleSize++;
}
else
{
pending[resourceIndex] = 0; // clear the flag that prevents duplicates
}
// tiles that are "loading" can't be evicted. as soon as they arrive, they can be.
// note: since we aren't unmapping evicted tiles, we can evict even if no UpdateLists are available
numEvictions += m_streamingResources[resourceIndex]->QueuePendingTileEvictions();

if (m_streamingResources[resourceIndex]->IsStale()) // still have work to do?
{
// keep stale resource in compacted array while retaining oldest-first ordering
staleResources[newStaleSize] = resourceIndex;
newStaleSize++;
}
else // compact the stale array with a memcpy and do no further processing.
else
{
UINT numNotUpdated = UINT(staleResources.size() - staleIndex);
if (0 != staleIndex) // no need to move contents if no changes made
{
// copy of overlapping memory region requires left-to-right safety (not safe for memcpy)
std::copy(staleResources.begin() + staleIndex, staleResources.end(), staleResources.begin() + newStaleSize);
}
newStaleSize += numNotUpdated;

break;
pending[resourceIndex] = 0; // clear the flag that prevents duplicates
}
}
staleResources.resize(newStaleSize); // compact array
if (numEvictions) { m_pDataUploader->AddEvictions(numEvictions); }
}

// if there are uploads, maybe signal depending on heuristic to minimize # signals
Expand Down
9 changes: 1 addition & 8 deletions TileUpdateManager/TileUpdateManagerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,7 @@ namespace Streaming
//--------------------------------------------
void Finish();

TileUpdateManagerBase(
// query resource for tiling properties. use its device to create internal resources
ID3D12Device8* in_pDevice,

// the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready
ID3D12CommandQueue* in_pDirectCommandQueue,

const struct TileUpdateManagerDesc& in_desc);
TileUpdateManagerBase(const struct TileUpdateManagerDesc& in_desc, ID3D12Device8* in_pDevice); // required for constructor

virtual ~TileUpdateManagerBase();

Expand Down
22 changes: 12 additions & 10 deletions src/Scene.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,15 +670,17 @@ void Scene::WaitForGpu()
void Scene::StartStreamingLibrary()
{
TileUpdateManagerDesc tumDesc;
tumDesc.m_pDirectCommandQueue = m_commandQueue.Get();
tumDesc.m_maxNumCopyBatches = m_args.m_numStreamingBatches;
tumDesc.m_stagingBufferSizeMB = m_args.m_stagingSizeMB;
tumDesc.m_maxTileMappingUpdatesPerApiCall = m_args.m_maxTileUpdatesPerApiCall;
tumDesc.m_swapChainBufferCount = SharedConstants::SWAP_CHAIN_BUFFER_COUNT;
tumDesc.m_addAliasingBarriers = m_args.m_addAliasingBarriers;
tumDesc.m_minNumUploadRequests = m_args.m_minNumUploadRequests;
tumDesc.m_useDirectStorage = m_args.m_useDirectStorage;
tumDesc.m_threadPriority = (TileUpdateManagerDesc::ThreadPriority)m_args.m_threadPriority;

m_pTileUpdateManager = TileUpdateManager::Create(m_device.Get(), m_commandQueue.Get(), tumDesc);
m_pTileUpdateManager = TileUpdateManager::Create(tumDesc);

// create 1 or more heaps to contain our StreamingResources
for (UINT i = 0; i < m_args.m_numHeaps; i++)
Expand Down Expand Up @@ -1510,15 +1512,15 @@ void Scene::DrawUI()
if (m_args.m_showFeedbackMapVertical)
{
UINT areaHeight = UINT(m_viewport.Height - minDim);
UINT numMips = areaHeight / (UINT)minDim;
if (numMips > 1)
{
DirectX::XMFLOAT2 windowPos = DirectX::XMFLOAT2(m_viewport.Width - minDim, 0);
m_pTextureViewer->Draw(m_commandList.Get(), windowPos, windowSize,
m_viewport,
m_args.m_visualizationBaseMip, numMips - 1,
m_args.m_showFeedbackMapVertical);
}
UINT numMips = areaHeight / (UINT)minDim;
if (numMips > 1)
{
DirectX::XMFLOAT2 windowPos = DirectX::XMFLOAT2(m_viewport.Width - minDim, 0);
m_pTextureViewer->Draw(m_commandList.Get(), windowPos, windowSize,
m_viewport,
m_args.m_visualizationBaseMip, numMips - 1,
m_args.m_showFeedbackMapVertical);
}
}
else
{
Expand Down

0 comments on commit 461a8c3

Please sign in to comment.