Skip to content
This repository has been archived by the owner on Dec 25, 2023. It is now read-only.

Commit

Permalink
Performance improvements, simplifications, and overdue re-org. Fix to…
Browse files Browse the repository at this point in the history
… -waitforassetload.
allenhux-intel committed Jun 21, 2022
1 parent 189a5cf commit f9a352e
Showing 44 changed files with 2,265 additions and 2,362 deletions.
76 changes: 41 additions & 35 deletions TileUpdateManager/DataUploader.cpp
Original file line number Diff line number Diff line change
@@ -34,7 +34,6 @@

//=============================================================================
// Internal class that uploads texture data into a reserved resource
// Takes a streamer. creates a HeapAllocator sized to match texture atlas
//=============================================================================
Streaming::DataUploader::DataUploader(
ID3D12Device* in_pDevice,
@@ -43,8 +42,8 @@ Streaming::DataUploader::DataUploader(
UINT in_maxTileMappingUpdatesPerApiCall // some HW/drivers seem to have a limit
) :
m_updateLists(in_maxCopyBatches)
, m_updateListAllocator(in_maxCopyBatches)
, m_stagingBufferSizeMB(in_stagingBufferSizeMB)
, m_updateListFreeCount(in_maxCopyBatches)
, m_gpuTimer(in_pDevice, in_maxCopyBatches, D3D12GpuTimer::TimerType::Copy)
, m_mappingUpdater(in_maxTileMappingUpdatesPerApiCall)
{
@@ -197,10 +196,10 @@ void Streaming::DataUploader::StartThreads()
{
FenceMonitorThread();

// check constructed this way so we can wake the thread to allow for exit
if (m_updateLists.size() == m_updateListFreeCount)
// if no outstanding work, sleep
if (0 == m_updateListAllocator.GetAllocated())
{
m_monitorFenceFlag.Wait();
m_fenceMonitorFlag.Wait();
}
}
DebugPrint(L"Destroyed Fence Monitor Thread\n");
@@ -215,7 +214,7 @@ void Streaming::DataUploader::StopThreads()

// wake up threads so they can exit
m_submitFlag.Set();
m_monitorFenceFlag.Set();
m_fenceMonitorFlag.Set();

// stop submitting new work
if (m_submitThread.joinable())
@@ -238,9 +237,10 @@ void Streaming::DataUploader::StopThreads()
//-----------------------------------------------------------------------------
void Streaming::DataUploader::FlushCommands()
{
DebugPrint("DataUploader Flush ", m_updateListFreeCount.load(), "/", m_updateLists.size(), " batches freed\n");
while (m_updateListFreeCount.load() < m_updateLists.size())
DebugPrint("DataUploader waiting on ", m_updateListAllocator.GetAllocated(), " tasks to complete\n");
while (m_updateListAllocator.GetAllocated()) // wait so long as there is outstanding work
{
m_fenceMonitorFlag.Set(); // (paranoia)
_mm_pause();
}
// if this loop doesn't exit, then a race condition occurred while allocating/freeing updatelists
@@ -258,40 +258,44 @@ void Streaming::DataUploader::FlushCommands()
//-----------------------------------------------------------------------------
// tries to find an available UpdateList, may return null
//-----------------------------------------------------------------------------
Streaming::UpdateList* Streaming::DataUploader::AllocateUpdateList(Streaming::StreamingResourceBase* in_pStreamingResource)
Streaming::UpdateList* Streaming::DataUploader::AllocateUpdateList(Streaming::StreamingResourceDU* in_pStreamingResource)
{
UpdateList* pUpdateList = nullptr;

// early out if there are none available
if (m_updateListFreeCount.load() > 0)
// Heuristic
// if all the updatelists are in-flight, do not allocate another updatelist until the free pool hits a watermark
if (m_updateListsEmpty)
{
// there is definitely at least one updatelist that is STATE_FREE
m_updateListFreeCount.fetch_sub(1);
// FIXME: what should the watermark be? 25% seems to be a good trade-off of latency vs. BW
UINT w = (UINT)m_updateLists.size() / 4;

// treat the array as a ring buffer
// the next index is the most-likely to be available because it has had the most time to complete
const UINT numLists = (UINT)m_updateLists.size();
for (UINT i = 0; i < numLists; i++)
if (w > m_updateListAllocator.GetAvailable())
{
m_updateListAllocIndex = (m_updateListAllocIndex + 1) % numLists;
auto& p = m_updateLists[m_updateListAllocIndex];
return nullptr;
}
else
{
m_updateListsEmpty = false;
}
}

UpdateList::State expected = UpdateList::State::STATE_FREE;
if (p.m_executionState.compare_exchange_weak(expected, UpdateList::State::STATE_ALLOCATED))
{
pUpdateList = &p;
// it is only safe to clear the state within the allocating thread
p.Reset((Streaming::StreamingResourceDU*)in_pStreamingResource);
if (m_updateListAllocator.GetAvailable())
{
UINT index = m_updateListAllocator.Allocate();
pUpdateList = &m_updateLists[index];
ASSERT(UpdateList::State::STATE_FREE == pUpdateList->m_executionState);

// start fence polling thread now
m_monitorFenceFlag.Set();
break;
}
}
// pUpdateList might be null: more than 1 thread can enter the loop with initial condition of 1 free updatelist
// m_updateListFreeCount > 0 is an optimization, not a guarantee.
// calling functions must handle nullptr returned
pUpdateList->Reset(in_pStreamingResource);
pUpdateList->m_executionState = UpdateList::State::STATE_ALLOCATED;

// start fence polling thread now
m_fenceMonitorFlag.Set();
}
else
{
m_updateListsEmpty = true;
}

return pUpdateList;
}

@@ -303,8 +307,10 @@ void Streaming::DataUploader::FreeUpdateList(Streaming::UpdateList& in_updateLis
// NOTE: updatelist is deliberately not cleared until after allocation
// otherwise there can be a race with the mapping thread
in_updateList.m_executionState = UpdateList::State::STATE_FREE;
m_updateListFreeCount.fetch_add(1);
ASSERT(m_updateListFreeCount.load() <= m_updateLists.size());

// return the index to this updatelist to the pool
UINT i = UINT(&in_updateList - m_updateLists.data());
m_updateListAllocator.Free(i);
}

//-----------------------------------------------------------------------------
25 changes: 13 additions & 12 deletions TileUpdateManager/DataUploader.h
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@
#include "D3D12GpuTimer.h"
#include "Timer.h"

#include "TileUpdateManager.h"
#include "SimpleAllocator.h"

//==================================================
// UploadBuffer keeps an upload buffer per swapchain backbuffer
@@ -44,6 +44,8 @@
//==================================================
namespace Streaming
{
class StreamingResourceDU;

class DataUploader
{
public:
@@ -62,19 +64,18 @@ namespace Streaming

ID3D12CommandQueue* GetMappingQueue() const { return m_mappingCommandQueue.Get(); }

// return true if there is at least one update list in the FREE state
bool UpdateListAvailable() { return (0 != m_updateListAllocator.GetAvailable()); }

// may return null. called by StreamingResource.
UpdateList* AllocateUpdateList(StreamingResourceBase* in_pStreamingResource);
UpdateList* AllocateUpdateList(StreamingResourceDU* in_pStreamingResource);

// StreamingResource requests tiles to be uploaded
void SubmitUpdateList(Streaming::UpdateList& in_updateList);

// TUM requests file streamer to signal its fence after StreamingResources have queued tile uploads
void SignalFileStreamer() { m_pFileStreamer->Signal(); }

// free updatelist after processing
// Streaming resource may call this (via TUM) if it allocates but doesn't use an updatelist
void FreeUpdateList(Streaming::UpdateList& in_updateList);

enum class StreamerType
{
Reference,
@@ -108,13 +109,13 @@ namespace Streaming
// pool of all updatelists
// copy thread loops over these
std::vector<UpdateList> m_updateLists;
Streaming::AllocatorMT m_updateListAllocator;

// early out: don't bother trying to allocate if nothing is available
// that is, it's O(1) to determine there are none available
std::atomic<UINT> m_updateListFreeCount;
// only the fence thread (which looks for final completion) frees UpdateLists
void FreeUpdateList(Streaming::UpdateList& in_updateList);

// pointer to next address to attempt allocation from
UINT m_updateListAllocIndex{ 0 };
// flag that all UpdateLists were allocated to indicate mitigation heuristic should engage
bool m_updateListsEmpty{ false };

// object that performs UpdateTileMappings() requests
Streaming::MappingUpdater m_mappingUpdater;
@@ -132,7 +133,7 @@ namespace Streaming
// compromise solution is to keep this thread awake so long as there are live UpdateLists.
void FenceMonitorThread();
std::thread m_fenceMonitorThread;
Streaming::SynchronizationFlag m_monitorFenceFlag;
Streaming::SynchronizationFlag m_fenceMonitorFlag;
RawCpuTimer* m_pFenceThreadTimer{ nullptr }; // init timer on the thread that uses it. can't really worry about thread migration.

void StartThreads();
8 changes: 5 additions & 3 deletions TileUpdateManager/FileStreamerReference.cpp
Original file line number Diff line number Diff line change
@@ -40,9 +40,11 @@ Streaming::FileStreamerReference::FileStreamerReference(ID3D12Device* in_pDevice
UINT in_maxTileCopiesInFlight): // upload buffer size. 1024 would become a 64MB upload buffer
Streaming::FileStreamer(in_pDevice),
m_copyBatches(in_maxNumCopyBatches + 2) // padded by a couple to try to help with observed issue perhaps due to OS thread sched.
, m_uploadAllocator(in_pDevice, in_maxTileCopiesInFlight)
, m_uploadAllocator(in_maxTileCopiesInFlight)
, m_requests(in_maxTileCopiesInFlight) // pre-allocate an array of event handles corresponding to # of tiles that can fit in the upload heap
{
m_uploadBuffer.Allocate(in_pDevice, in_maxTileCopiesInFlight * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES);

D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
@@ -173,7 +175,7 @@ void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerRefere
{
Streaming::UpdateList* pUpdateList = in_copyBatch.m_pUpdateList;

BYTE* pStagingBaseAddress = (BYTE*)m_uploadAllocator.GetBuffer().m_pData;
BYTE* pStagingBaseAddress = (BYTE*)m_uploadBuffer.GetData();

UINT startIndex = in_copyBatch.m_numEvents;
UINT endIndex = startIndex + in_numtilesToLoad;
@@ -323,7 +325,7 @@ void Streaming::FileStreamerReference::CopyThread()
ID3D12Resource* pAtlas = c.m_pUpdateList->m_pStreamingResource->GetHeap()->ComputeCoordFromTileIndex(coord, c.m_pUpdateList->m_heapIndices[i], textureFormat);

m_copyCommandList->CopyTiles(pAtlas, &coord,
&tileRegionSize, m_uploadAllocator.GetBuffer().m_resource.Get(),
&tileRegionSize, m_uploadBuffer.GetResource(),
D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES * c.m_uploadIndices[i],
D3D12_TILE_COPY_FLAG_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE | D3D12_TILE_COPY_FLAG_NO_HAZARD);
}
5 changes: 3 additions & 2 deletions TileUpdateManager/FileStreamerReference.h
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@
#include "FileStreamer.h"
#include "Timer.h"

#include "UploadAllocator.h"
#include "SimpleAllocator.h"

//=======================================================================================
//=======================================================================================
@@ -118,7 +118,8 @@ namespace Streaming
UINT m_batchAllocIndex{ 0 }; // allocation optimization

// structure for finding space to upload tiles
Streaming::UploadAllocator m_uploadAllocator;
Streaming::SimpleAllocator m_uploadAllocator;
Streaming::UploadBuffer m_uploadBuffer;

void CopyThread();
std::atomic<bool> m_copyThreadRunning{ false };
105 changes: 0 additions & 105 deletions TileUpdateManager/HeapAllocator.cpp

This file was deleted.

Loading

0 comments on commit f9a352e

Please sign in to comment.