diff --git a/TileUpdateManager/DataUploader.h b/TileUpdateManager/DataUploader.h index a08dd61..03ca414 100644 --- a/TileUpdateManager/DataUploader.h +++ b/TileUpdateManager/DataUploader.h @@ -89,6 +89,7 @@ namespace Streaming float GetGpuStreamingTime() const { return m_gpuTimer.GetTimes()[0].first; } UINT GetTotalNumUploads() const { return m_numTotalUploads; } + void AddEvictions(UINT in_numEvictions) { m_numTotalEvictions += in_numEvictions; } UINT GetTotalNumEvictions() const { return m_numTotalEvictions; } float GetApproximateTileCopyLatency() const { return m_pFenceThreadTimer->GetSecondsFromDelta(m_totalTileCopyLatency); } // sum of per-tile latencies so far diff --git a/TileUpdateManager/SamplerFeedbackStreaming.h b/TileUpdateManager/SamplerFeedbackStreaming.h index 0ba6c27..5624677 100644 --- a/TileUpdateManager/SamplerFeedbackStreaming.h +++ b/TileUpdateManager/SamplerFeedbackStreaming.h @@ -109,6 +109,9 @@ struct StreamingResource //============================================================================= struct TileUpdateManagerDesc { + // the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready + ID3D12CommandQueue* m_pDirectCommandQueue{ nullptr }; + // maximum number of in-flight batches UINT m_maxNumCopyBatches{ 128 }; @@ -130,11 +133,11 @@ struct TileUpdateManagerDesc // applied to all internal threads: submit, fenceMonitor, processFeedback, updateResidency // on hybrid systems: performance prefers P cores, efficiency prefers E cores, normal is OS default - enum class ThreadPriority + enum class ThreadPriority : int { - Prefer_Normal, - Prefer_Performance, - Prefer_Efficiency + Prefer_Normal = 0, + Prefer_Performance = 1, + Prefer_Efficiency = -1 }; ThreadPriority m_threadPriority{ ThreadPriority::Prefer_Normal }; @@ -147,14 +150,7 @@ struct TileUpdateManagerDesc //============================================================================= struct TileUpdateManager { - static TileUpdateManager* Create( - // query resource for tiling properties. use its device to create internal resources - ID3D12Device8* in_pDevice, - - // the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready - ID3D12CommandQueue* in_pDirectCommandQueue, - - const TileUpdateManagerDesc& in_desc); + static TileUpdateManager* Create(const TileUpdateManagerDesc& in_desc); virtual void Destroy() = 0; diff --git a/TileUpdateManager/Streaming.h b/TileUpdateManager/Streaming.h index 92ef76e..d345310 100644 --- a/TileUpdateManager/Streaming.h +++ b/TileUpdateManager/Streaming.h @@ -107,7 +107,7 @@ namespace Streaming inline void SetThreadPriority(std::thread& in_thread, int in_priority) { - if (in_priority) + if (in_priority) // 0 = default (do nothing). -1 = efficiency. otherwise, performance. { THREAD_POWER_THROTTLING_STATE throttlingState{ THREAD_POWER_THROTTLING_CURRENT_VERSION, THREAD_POWER_THROTTLING_EXECUTION_SPEED, 0 }; if (-1 == in_priority) { throttlingState.StateMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED; } // speed, speed = prefer e cores diff --git a/TileUpdateManager/StreamingResourceBase.cpp b/TileUpdateManager/StreamingResourceBase.cpp index 07f2d9d..8e33f53 100644 --- a/TileUpdateManager/StreamingResourceBase.cpp +++ b/TileUpdateManager/StreamingResourceBase.cpp @@ -497,29 +497,17 @@ UINT Streaming::StreamingResourceBase::QueueTiles() { UINT uploadsRequested = 0; - UINT numEvictions = (UINT)m_pendingEvictions.GetReadyToEvict().size(); - UINT numLoads = (UINT)m_pendingTileLoads.size(); - // pushes as many tiles as it can into a single UpdateList - if ((numLoads && m_pHeap->GetAllocator().GetAvailable()) || numEvictions) + if (m_pendingTileLoads.size() && m_pHeap->GetAllocator().GetAvailable()) { UpdateList scratchUL; - if (numEvictions) - { - QueuePendingTileEvictions(&scratchUL); - numEvictions = (UINT)m_pendingEvictions.GetReadyToEvict().size(); - } // queue as many new tiles as possible - if (numLoads && m_pHeap->GetAllocator().GetAvailable()) - { - QueuePendingTileLoads(&scratchUL); - uploadsRequested = (UINT)scratchUL.m_coords.size(); // number of uploads in UpdateList - numLoads = (UINT)m_pendingTileLoads.size(); - } + QueuePendingTileLoads(&scratchUL); + uploadsRequested = (UINT)scratchUL.m_coords.size(); // number of uploads in UpdateList // only allocate an UpdateList if we have updates - if (scratchUL.m_coords.size() || scratchUL.m_evictCoords.size()) + if (scratchUL.m_coords.size()) { // calling function checked for availability, so UL allocation must succeed UpdateList* pUpdateList = m_pTileUpdateManager->AllocateUpdateList(this); @@ -527,11 +515,11 @@ UINT Streaming::StreamingResourceBase::QueueTiles() pUpdateList->m_coords.swap(scratchUL.m_coords); pUpdateList->m_heapIndices.swap(scratchUL.m_heapIndices); - pUpdateList->m_evictCoords.swap(scratchUL.m_evictCoords); m_pTileUpdateManager->SubmitUpdateList(*pUpdateList); } } + return uploadsRequested; } @@ -575,14 +563,14 @@ Note that the multi-frame delay for evictions prevents allocation of an index th // note there are only tiles to evict after processing feedback, but it's possible // there was no UpdateList available at the time, so they haven't been evicted yet. //----------------------------------------------------------------------------- -void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::UpdateList* out_pUpdateList) +UINT Streaming::StreamingResourceBase::QueuePendingTileEvictions() { - ASSERT(out_pUpdateList); - ASSERT(m_pendingEvictions.GetReadyToEvict().size()); + if (0 == m_pendingEvictions.GetReadyToEvict().size()) { return 0; } auto& pendingEvictions = m_pendingEvictions.GetReadyToEvict(); UINT numDelayed = 0; + UINT numEvictions = 0; for (auto& coord : pendingEvictions) { // if the heap index is valid, but the tile is not resident, there's a /pending load/ @@ -597,11 +585,17 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda auto residency = m_tileMappingState.GetResidency(coord); if (TileMappingState::Residency::Resident == residency) { - m_tileMappingState.SetResidency(coord, TileMappingState::Residency::Evicting); + // NOTE: effectively removed "Evicting." Now remove tiles from data structure, not from memory mapping. + // result is improved perf from fewer UpdateTileMappings() calls. + // existing artifacts (cracks when sampler crosses tile boundaries) are "no worse" + // to put it back: set residency to evicting and add tiles to updatelist for eviction + + m_tileMappingState.SetResidency(coord, TileMappingState::Residency::NotResident); UINT& heapIndex = m_tileMappingState.GetHeapIndex(coord); m_pHeap->GetAllocator().Free(heapIndex); heapIndex = TileMappingState::InvalidIndex; - out_pUpdateList->m_evictCoords.push_back(coord); + + numEvictions++; } // valid index but not resident means there is a pending load, do not evict // try again later @@ -615,8 +609,14 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda // else: refcount positive or eviction already in progress? rescue this eviction (by not adding to pending evictions) } + if (numEvictions) + { + SetResidencyChanged(); + } + // replace the ready evictions with just the delayed evictions. pendingEvictions.resize(numDelayed); + return numEvictions; } //----------------------------------------------------------------------------- @@ -640,7 +640,6 @@ void Streaming::StreamingResourceBase::QueuePendingTileLoads(Streaming::UpdateLi // if the heap index is not valid, but the tile is resident, there's a /pending eviction/ // a pending eviction might be streaming - // it will not be in the updatelist, because eviction happens before load, and we would have seen refcount == 0 // NOTE! assumes refcount is non-zero // ProcessFeedback() clears all pending loads with refcount == 0 diff --git a/TileUpdateManager/StreamingResourceBase.h b/TileUpdateManager/StreamingResourceBase.h index e6d9d4a..d95ff01 100644 --- a/TileUpdateManager/StreamingResourceBase.h +++ b/TileUpdateManager/StreamingResourceBase.h @@ -132,10 +132,13 @@ namespace Streaming // if a feedback buffer is ready, process it to generate lists of tiles to load/evict void ProcessFeedback(UINT64 in_frameFenceCompletedValue); - // try to load/evict tiles. only queue evictions once per frame. + // try to load/evict tiles. // returns # tiles requested for upload UINT QueueTiles(); + // returns # tiles evicted + UINT QueuePendingTileEvictions(); + bool IsStale() { return (m_pendingTileLoads.size() || m_pendingEvictions.GetReadyToEvict().size()); @@ -322,9 +325,6 @@ namespace Streaming // DecRef may decline void DecTileRef(UINT in_x, UINT in_y, UINT in_s); - // only push evictions to DataUploader once per rendered frame (i.e. "on the next frame") - void QueuePendingTileEvictions(Streaming::UpdateList* out_pUpdateList); - void QueuePendingTileLoads(Streaming::UpdateList* out_pUpdateList); // returns # tiles queued void LoadPackedMips(); diff --git a/TileUpdateManager/TileUpdateManager.cpp b/TileUpdateManager/TileUpdateManager.cpp index 74c326f..7f21677 100644 --- a/TileUpdateManager/TileUpdateManager.cpp +++ b/TileUpdateManager/TileUpdateManager.cpp @@ -39,16 +39,11 @@ //-------------------------------------------- // instantiate streaming library //-------------------------------------------- -TileUpdateManager* TileUpdateManager::Create( - // query resource for tiling properties. use its device to create internal resources - ID3D12Device8* in_pDevice, - - // the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready - ID3D12CommandQueue* in_pDirectCommandQueue, - - const TileUpdateManagerDesc& in_desc) +TileUpdateManager* TileUpdateManager::Create(const TileUpdateManagerDesc& in_desc) { - return new Streaming::TileUpdateManagerBase(in_pDevice, in_pDirectCommandQueue, in_desc); + Streaming::ComPtr device; + in_desc.m_pDirectCommandQueue->GetDevice(IID_PPV_ARGS(&device)); + return new Streaming::TileUpdateManagerBase(in_desc, device.Get()); } void Streaming::TileUpdateManagerBase::Destroy() diff --git a/TileUpdateManager/TileUpdateManagerBase.cpp b/TileUpdateManager/TileUpdateManagerBase.cpp index 3e92e76..808f464 100644 --- a/TileUpdateManager/TileUpdateManagerBase.cpp +++ b/TileUpdateManager/TileUpdateManagerBase.cpp @@ -36,29 +36,19 @@ //============================================================================= // constructor for streaming library base class //============================================================================= -Streaming::TileUpdateManagerBase::TileUpdateManagerBase( - ID3D12Device8* in_pDevice, - ID3D12CommandQueue* in_pDirectCommandQueue, // application's Direct command queue. Used to know when new feedback is ready. - const TileUpdateManagerDesc& in_desc) : +Streaming::TileUpdateManagerBase::TileUpdateManagerBase(const TileUpdateManagerDesc& in_desc, ID3D12Device8* in_pDevice) :// required for constructor m_numSwapBuffers(in_desc.m_swapChainBufferCount) , m_gpuTimerResolve(in_pDevice, in_desc.m_swapChainBufferCount, D3D12GpuTimer::TimerType::Direct) , m_renderFrameIndex(0) -, m_directCommandQueue(in_pDirectCommandQueue) +, m_directCommandQueue(in_desc.m_pDirectCommandQueue) , m_device(in_pDevice) , m_commandLists((UINT)CommandListName::Num) , m_maxTileMappingUpdatesPerApiCall(in_desc.m_maxTileMappingUpdatesPerApiCall) -, m_addAliasingBarriers(in_desc.m_addAliasingBarriers) +, m_addAliasingBarriers(in_desc.m_addAliasingBarriers) , m_minNumUploadRequests(in_desc.m_minNumUploadRequests) +, m_threadPriority((int)in_desc.m_threadPriority) { - ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == in_pDirectCommandQueue->GetDesc().Type); - - switch (in_desc.m_threadPriority) - { - case TileUpdateManagerDesc::ThreadPriority::Prefer_Performance: m_threadPriority = 1; break; - case TileUpdateManagerDesc::ThreadPriority::Prefer_Efficiency: m_threadPriority = 1; break; - default: - m_threadPriority = 0; - } + ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == m_directCommandQueue->GetDesc().Type); ThrowIfFailed(in_pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_frameFence))); m_frameFence->SetName(L"Streaming::TileUpdateManagerBase::m_frameFence"); @@ -212,42 +202,32 @@ void Streaming::TileUpdateManagerBase::ProcessFeedbackThread() // push uploads and evictions for stale resources { + UINT numEvictions = 0; UINT newStaleSize = 0; // track number of stale resources, then resize the array to the updated number - UINT staleIndex = 0; - - // loop over stale resources - for (; staleIndex < staleResources.size(); staleIndex++) + for (auto resourceIndex : staleResources) { if (m_pDataUploader->GetNumUpdateListsAvailable() && m_threadsRunning) { - UINT resourceIndex = staleResources[staleIndex]; uploadsRequested += m_streamingResources[resourceIndex]->QueueTiles(); + } - if (m_streamingResources[resourceIndex]->IsStale()) // still have work to do? - { - // keep stale resource in compacted array while retaining oldest-first ordering - staleResources[newStaleSize] = resourceIndex; - newStaleSize++; - } - else - { - pending[resourceIndex] = 0; // clear the flag that prevents duplicates - } + // tiles that are "loading" can't be evicted. as soon as they arrive, they can be. + // note: since we aren't unmapping evicted tiles, we can evict even if no UpdateLists are available + numEvictions += m_streamingResources[resourceIndex]->QueuePendingTileEvictions(); + + if (m_streamingResources[resourceIndex]->IsStale()) // still have work to do? + { + // keep stale resource in compacted array while retaining oldest-first ordering + staleResources[newStaleSize] = resourceIndex; + newStaleSize++; } - else // compact the stale array with a memcpy and do no further processing. + else { - UINT numNotUpdated = UINT(staleResources.size() - staleIndex); - if (0 != staleIndex) // no need to move contents if no changes made - { - // copy of overlapping memory region requires left-to-right safety (not safe for memcpy) - std::copy(staleResources.begin() + staleIndex, staleResources.end(), staleResources.begin() + newStaleSize); - } - newStaleSize += numNotUpdated; - - break; + pending[resourceIndex] = 0; // clear the flag that prevents duplicates } } staleResources.resize(newStaleSize); // compact array + if (numEvictions) { m_pDataUploader->AddEvictions(numEvictions); } } // if there are uploads, maybe signal depending on heuristic to minimize # signals diff --git a/TileUpdateManager/TileUpdateManagerBase.h b/TileUpdateManager/TileUpdateManagerBase.h index a74ff4d..c979760 100644 --- a/TileUpdateManager/TileUpdateManagerBase.h +++ b/TileUpdateManager/TileUpdateManagerBase.h @@ -101,14 +101,7 @@ namespace Streaming //-------------------------------------------- void Finish(); - TileUpdateManagerBase( - // query resource for tiling properties. use its device to create internal resources - ID3D12Device8* in_pDevice, - - // the Direct command queue the application is using to render, which TUM monitors to know when new feedback is ready - ID3D12CommandQueue* in_pDirectCommandQueue, - - const struct TileUpdateManagerDesc& in_desc); + TileUpdateManagerBase(const struct TileUpdateManagerDesc& in_desc, ID3D12Device8* in_pDevice); // required for constructor virtual ~TileUpdateManagerBase(); diff --git a/src/Scene.cpp b/src/Scene.cpp index eb85e55..f8afa33 100644 --- a/src/Scene.cpp +++ b/src/Scene.cpp @@ -670,6 +670,7 @@ void Scene::WaitForGpu() void Scene::StartStreamingLibrary() { TileUpdateManagerDesc tumDesc; + tumDesc.m_pDirectCommandQueue = m_commandQueue.Get(); tumDesc.m_maxNumCopyBatches = m_args.m_numStreamingBatches; tumDesc.m_stagingBufferSizeMB = m_args.m_stagingSizeMB; tumDesc.m_maxTileMappingUpdatesPerApiCall = m_args.m_maxTileUpdatesPerApiCall; @@ -677,8 +678,9 @@ void Scene::StartStreamingLibrary() tumDesc.m_addAliasingBarriers = m_args.m_addAliasingBarriers; tumDesc.m_minNumUploadRequests = m_args.m_minNumUploadRequests; tumDesc.m_useDirectStorage = m_args.m_useDirectStorage; + tumDesc.m_threadPriority = (TileUpdateManagerDesc::ThreadPriority)m_args.m_threadPriority; - m_pTileUpdateManager = TileUpdateManager::Create(m_device.Get(), m_commandQueue.Get(), tumDesc); + m_pTileUpdateManager = TileUpdateManager::Create(tumDesc); // create 1 or more heaps to contain our StreamingResources for (UINT i = 0; i < m_args.m_numHeaps; i++) @@ -1510,15 +1512,15 @@ void Scene::DrawUI() if (m_args.m_showFeedbackMapVertical) { UINT areaHeight = UINT(m_viewport.Height - minDim); - UINT numMips = areaHeight / (UINT)minDim; - if (numMips > 1) - { - DirectX::XMFLOAT2 windowPos = DirectX::XMFLOAT2(m_viewport.Width - minDim, 0); - m_pTextureViewer->Draw(m_commandList.Get(), windowPos, windowSize, - m_viewport, - m_args.m_visualizationBaseMip, numMips - 1, - m_args.m_showFeedbackMapVertical); - } + UINT numMips = areaHeight / (UINT)minDim; + if (numMips > 1) + { + DirectX::XMFLOAT2 windowPos = DirectX::XMFLOAT2(m_viewport.Width - minDim, 0); + m_pTextureViewer->Draw(m_commandList.Get(), windowPos, windowSize, + m_viewport, + m_args.m_visualizationBaseMip, numMips - 1, + m_args.m_showFeedbackMapVertical); + } } else {