Skip to content
This repository has been archived by the owner on Dec 25, 2023. It is now read-only.

Commit

Permalink
misc. improvements. heuristic change can result in substantial bandwi…
Browse files Browse the repository at this point in the history
…dth & latency improvements under heavy load (benchmark).
  • Loading branch information
allenhux-intel committed Dec 2, 2022
1 parent 461a8c3 commit c378b3a
Show file tree
Hide file tree
Showing 19 changed files with 89 additions and 99 deletions.
23 changes: 12 additions & 11 deletions TileUpdateManager/DataUploader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,15 @@ void Streaming::DataUploader::StopThreads()
//-----------------------------------------------------------------------------
void Streaming::DataUploader::FlushCommands()
{
DebugPrint("DataUploader waiting on ", m_updateListAllocator.GetAllocated(), " tasks to complete\n");
while (m_updateListAllocator.GetAllocated()) // wait so long as there is outstanding work
if (m_updateListAllocator.GetAllocated())
{
m_submitFlag.Set(); // (paranoia)
m_fenceMonitorFlag.Set(); // (paranoia)
_mm_pause();
DebugPrint("DataUploader waiting on ", m_updateListAllocator.GetAllocated(), " tasks to complete\n");
while (m_updateListAllocator.GetAllocated()) // wait so long as there is outstanding work
{
m_submitFlag.Set(); // (paranoia)
m_fenceMonitorFlag.Set(); // (paranoia)
_mm_pause();
}
}
// if this loop doesn't exit, then a race condition occurred while allocating/freeing updatelists

Expand Down Expand Up @@ -276,7 +279,7 @@ Streaming::UpdateList* Streaming::DataUploader::AllocateUpdateList(Streaming::St

// start fence polling thread now
{
m_monitorTasks[m_monitorTaskAlloc.GetWriteIndex()] = index;
m_monitorTasks[m_monitorTaskAlloc.GetWriteIndex()] = pUpdateList;
m_monitorTaskAlloc.Allocate();
m_fenceMonitorFlag.Set();
}
Expand Down Expand Up @@ -316,7 +319,7 @@ void Streaming::DataUploader::SubmitUpdateList(Streaming::UpdateList& in_updateL

// add to submit task queue
{
m_submitTasks[m_submitTaskAlloc.GetWriteIndex()] = UINT(&in_updateList - m_updateLists.data());
m_submitTasks[m_submitTaskAlloc.GetWriteIndex()] = &in_updateList;
m_submitTaskAlloc.Allocate();
m_submitFlag.Set();
}
Expand Down Expand Up @@ -345,7 +348,7 @@ void Streaming::DataUploader::FenceMonitorThread()
for (UINT i = startIndex; i < (startIndex + numTasks); i++)
{
ASSERT(numTasks != 0);
auto& updateList = m_updateLists[m_monitorTasks[i % m_monitorTasks.size()]];
auto& updateList = *m_monitorTasks[i % m_monitorTasks.size()];

bool freeUpdateList = false;

Expand Down Expand Up @@ -461,11 +464,9 @@ void Streaming::DataUploader::SubmitThread()
{
signalMap = true;

UINT index = m_submitTasks[m_submitTaskAlloc.GetReadIndex()]; // get the next task
auto& updateList = *m_submitTasks[m_submitTaskAlloc.GetReadIndex()]; // get the next task
m_submitTaskAlloc.Free(); // consume this task

auto& updateList = m_updateLists[index];

ASSERT(UpdateList::State::STATE_SUBMITTED == updateList.m_executionState);

// set to the fence value to be signaled next
Expand Down
9 changes: 4 additions & 5 deletions TileUpdateManager/DataUploader.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ namespace Streaming
ComPtr<ID3D12CommandQueue> m_mappingCommandQueue;

// pool of all updatelists
// copy thread loops over these
std::vector<UpdateList> m_updateLists;
Streaming::AllocatorMT m_updateListAllocator;

Expand All @@ -125,18 +124,18 @@ namespace Streaming
// thread to handle UpdateList submissions
void SubmitThread();
std::thread m_submitThread;
Streaming::SynchronizationFlag m_submitFlag;
std::vector<UINT> m_submitTasks;
Streaming::SynchronizationFlag m_submitFlag; // sleeps until flag set
std::vector<UpdateList*> m_submitTasks;
RingBuffer m_submitTaskAlloc;

// thread to poll copy and mapping fences
// this thread could have been designed using WaitForMultipleObjects, but it was found that SetEventOnCompletion() was expensive in a tight thread loop
// compromise solution is to keep this thread awake so long as there are live UpdateLists.
void FenceMonitorThread();
std::thread m_fenceMonitorThread;
Streaming::SynchronizationFlag m_fenceMonitorFlag;
Streaming::SynchronizationFlag m_fenceMonitorFlag; // sleeps until flag set
RawCpuTimer* m_pFenceThreadTimer{ nullptr }; // init timer on the thread that uses it. can't really worry about thread migration.
std::vector<UINT> m_monitorTasks;
std::vector<UpdateList*> m_monitorTasks;
RingBuffer m_monitorTaskAlloc;

void StartThreads();
Expand Down
4 changes: 3 additions & 1 deletion TileUpdateManager/FileStreamerDS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ void Streaming::FileStreamerDS::StreamTexture(Streaming::UpdateList& in_updateLi
UINT numCoords = (UINT)in_updateList.m_coords.size();
for (UINT i = 0; i < numCoords; i++)
{
request.Source.File.Offset = pTextureFileInfo->GetFileOffset(in_updateList.m_coords[i], request.Source.File.Size);
auto fileOffset = pTextureFileInfo->GetFileOffset(in_updateList.m_coords[i]);
request.Source.File.Offset = fileOffset.offset;
request.Source.File.Size = fileOffset.numBytes;

D3D12_TILED_RESOURCE_COORDINATE coord{};
ID3D12Resource* pAtlas = pDstHeap->ComputeCoordFromTileIndex(coord, in_updateList.m_heapIndices[i], textureFormat);
Expand Down
7 changes: 3 additions & 4 deletions TileUpdateManager/FileStreamerReference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerRefere
for (UINT i = startIndex; i < endIndex; i++)
{
// get file offset to tile
UINT32 numBytes = 0;
UINT fileOffset = pTextureFileInfo->GetFileOffset(pUpdateList->m_coords[i], numBytes);
auto fileOffset = pTextureFileInfo->GetFileOffset(pUpdateList->m_coords[i]);

// convert tile index into byte offset
UINT byteOffset = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES * in_copyBatch.m_uploadIndices[i];
Expand All @@ -204,11 +203,11 @@ void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerRefere
o.Internal = 0;
o.InternalHigh = 0;
o.OffsetHigh = 0;
o.Offset = fileOffset;
o.Offset = fileOffset.offset;

// align # bytes read
UINT alignment = FileStreamerReference::MEDIA_SECTOR_SIZE - 1;
numBytes = (numBytes + alignment) & ~(alignment);
UINT numBytes = (fileOffset.numBytes + alignment) & ~(alignment);
o.Offset &= ~alignment; // rewind the offset to alignment

::ReadFile(pFileHandle, pDst, numBytes, nullptr, &o);
Expand Down
10 changes: 5 additions & 5 deletions TileUpdateManager/InternalResources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,18 @@
//-----------------------------------------------------------------------------
Streaming::InternalResources::InternalResources(
ID3D12Device8* in_pDevice,
XeTexture* m_pTextureFileInfo,
const XeTexture& m_textureFileInfo,
// need the swap chain count so we can create per-frame upload buffers
UINT in_swapChainBufferCount) :
m_packedMipInfo{}, m_tileShape{}, m_numTilesTotal(0)
{
// create reserved resource
{
D3D12_RESOURCE_DESC rd = CD3DX12_RESOURCE_DESC::Tex2D(
m_pTextureFileInfo->GetFormat(),
m_pTextureFileInfo->GetImageWidth(),
m_pTextureFileInfo->GetImageHeight(), 1,
(UINT16)m_pTextureFileInfo->GetMipCount()
m_textureFileInfo.GetFormat(),
m_textureFileInfo.GetImageWidth(),
m_textureFileInfo.GetImageHeight(), 1,
(UINT16)m_textureFileInfo.GetMipCount()
);

// Layout must be D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE when creating reserved resources
Expand Down
2 changes: 1 addition & 1 deletion TileUpdateManager/InternalResources.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace Streaming
class InternalResources
{
public:
InternalResources(ID3D12Device8* in_pDevice, class XeTexture* m_pTextureFileInfo,
InternalResources(ID3D12Device8* in_pDevice, const class XeTexture& m_textureFileInfo,
// need the swap chain count so we can create per-frame upload buffers
UINT in_swapChainBufferCount);

Expand Down
9 changes: 4 additions & 5 deletions TileUpdateManager/StreamingResourceBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include "StreamingResourceBase.h"
#include "TileUpdateManagerSR.h"
#include "XeTexture.h"

#include "UpdateList.h"

Expand Down Expand Up @@ -61,9 +60,9 @@ Streaming::StreamingResourceBase::StreamingResourceBase(
, m_pHeap(in_pHeap)
, m_pFileHandle(in_pFileHandle)
, m_filename(in_filename)
, m_textureFileInfo(in_filename)
{
m_pTextureFileInfo = std::make_unique<Streaming::XeTexture>(in_filename);
m_resources = std::make_unique<Streaming::InternalResources>(in_pTileUpdateManager->GetDevice(), m_pTextureFileInfo.get(), (UINT)m_queuedFeedback.size());
m_resources = std::make_unique<Streaming::InternalResources>(in_pTileUpdateManager->GetDevice(), m_textureFileInfo, (UINT)m_queuedFeedback.size());
m_tileMappingState.Init(m_resources->GetPackedMipInfo().NumStandardMips, m_resources->GetTiling());

// no packed mips. odd, but possible. no need to check/update this variable again.
Expand All @@ -85,7 +84,7 @@ Streaming::StreamingResourceBase::StreamingResourceBase(
m_minMipMap.resize(m_tileReferences.size(), m_maxMip);

// make sure my heap has an atlas corresponding to my format
m_pHeap->AllocateAtlas(in_pTileUpdateManager->GetMappingQueue(), m_pTextureFileInfo->GetFormat());
m_pHeap->AllocateAtlas(in_pTileUpdateManager->GetMappingQueue(), m_textureFileInfo.GetFormat());

// Load packed mips. packed mips are not streamed or evicted.
LoadPackedMips();
Expand Down Expand Up @@ -837,7 +836,7 @@ void Streaming::StreamingResourceBase::EvictionDelay::Rescue(const Streaming::St
void Streaming::StreamingResourceBase::LoadPackedMips()
{
UINT numBytes = 0;
UINT offset = m_pTextureFileInfo->GetPackedMipFileOffset(&numBytes, &m_packedMipsUncompressedSize);
UINT offset = m_textureFileInfo.GetPackedMipFileOffset(&numBytes, &m_packedMipsUncompressedSize);
m_packedMips.resize(numBytes);
std::ifstream inFile(m_filename.c_str(), std::ios::binary);
inFile.seekg(offset);
Expand Down
4 changes: 2 additions & 2 deletions TileUpdateManager/StreamingResourceBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ Base class for StreamingResource

#include "SamplerFeedbackStreaming.h"
#include "InternalResources.h"
#include "XeTexture.h"

namespace Streaming
{
class TileUpdateManagerSR;
struct UpdateList;
class XeTexture;
class Heap;
class FileHandle;

Expand Down Expand Up @@ -161,7 +161,7 @@ namespace Streaming
const std::wstring m_filename;

// object that streams data from a file
std::unique_ptr<Streaming::XeTexture> m_pTextureFileInfo;
const Streaming::XeTexture m_textureFileInfo;
std::unique_ptr<Streaming::InternalResources> m_resources;
std::unique_ptr<Streaming::FileHandle> m_pFileHandle;
Streaming::Heap* m_pHeap{ nullptr };
Expand Down
2 changes: 1 addition & 1 deletion TileUpdateManager/StreamingResourceDU.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace Streaming
class StreamingResourceDU : private StreamingResourceBase
{
public:
XeTexture* GetTextureFileInfo() const { return m_pTextureFileInfo.get(); }
const XeTexture* GetTextureFileInfo() const { return &m_textureFileInfo; }
Streaming::Heap* GetHeap() const { return m_pHeap; }

// just for packed mips
Expand Down
20 changes: 10 additions & 10 deletions TileUpdateManager/TileUpdateManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void Streaming::TileUpdateManagerBase::Destroy()
//--------------------------------------------
StreamingHeap* Streaming::TileUpdateManagerBase::CreateStreamingHeap(UINT in_maxNumTilesHeap)
{
auto pStreamingHeap = new Streaming::Heap(m_pDataUploader->GetMappingQueue(), in_maxNumTilesHeap);
auto pStreamingHeap = new Streaming::Heap(m_dataUploader.GetMappingQueue(), in_maxNumTilesHeap);
return (StreamingHeap*)pStreamingHeap;
}

Expand All @@ -69,7 +69,7 @@ StreamingResource* Streaming::TileUpdateManagerBase::CreateStreamingResource(con
// if threads are running, stop them. they have state that depends on knowing the # of StreamingResources
Finish();

Streaming::FileHandle* pFileHandle = m_pDataUploader->OpenFile(in_filename);
Streaming::FileHandle* pFileHandle = m_dataUploader.OpenFile(in_filename);
auto pRsrc = new Streaming::StreamingResourceBase(in_filename, pFileHandle, (Streaming::TileUpdateManagerSR*)this, (Streaming::Heap*)in_pHeap);
m_streamingResources.push_back(pRsrc);
m_numStreamingResourcesChanged = true;
Expand All @@ -92,11 +92,11 @@ void Streaming::TileUpdateManagerBase::UseDirectStorage(bool in_useDS)
streamerType = Streaming::DataUploader::StreamerType::DirectStorage;
}

auto pOldStreamer = m_pDataUploader->SetStreamer(streamerType);
auto pOldStreamer = m_dataUploader.SetStreamer(streamerType);

for (auto& s : m_streamingResources)
{
s->SetFileHandle(m_pDataUploader.get());
s->SetFileHandle(&m_dataUploader);
}

delete pOldStreamer;
Expand Down Expand Up @@ -142,13 +142,13 @@ float Streaming::TileUpdateManagerBase::GetCpuProcessFeedbackTime()
//-----------------------------------------------------------------------------
// performance and visualization
//-----------------------------------------------------------------------------
float Streaming::TileUpdateManagerBase::GetGpuStreamingTime() const { return m_pDataUploader->GetGpuStreamingTime(); }
float Streaming::TileUpdateManagerBase::GetTotalTileCopyLatency() const { return m_pDataUploader->GetApproximateTileCopyLatency(); }
float Streaming::TileUpdateManagerBase::GetGpuStreamingTime() const { return m_dataUploader.GetGpuStreamingTime(); }
float Streaming::TileUpdateManagerBase::GetTotalTileCopyLatency() const { return m_dataUploader.GetApproximateTileCopyLatency(); }

// the total time the GPU spent resolving feedback during the previous frame
float Streaming::TileUpdateManagerBase::GetGpuTime() const { return m_gpuTimerResolve.GetTimes()[m_renderFrameIndex].first; }
UINT Streaming::TileUpdateManagerBase::GetTotalNumUploads() const { return m_pDataUploader->GetTotalNumUploads(); }
UINT Streaming::TileUpdateManagerBase::GetTotalNumEvictions() const { return m_pDataUploader->GetTotalNumEvictions(); }
UINT Streaming::TileUpdateManagerBase::GetTotalNumUploads() const { return m_dataUploader.GetTotalNumUploads(); }
UINT Streaming::TileUpdateManagerBase::GetTotalNumEvictions() const { return m_dataUploader.GetTotalNumEvictions(); }
UINT Streaming::TileUpdateManagerBase::GetTotalNumSubmits() const { return m_numTotalSubmits; }

void Streaming::TileUpdateManagerBase::SetVisualizationMode(UINT in_mode)
Expand All @@ -160,12 +160,12 @@ void Streaming::TileUpdateManagerBase::SetVisualizationMode(UINT in_mode)
o->ClearAllocations();
}

m_pDataUploader->SetVisualizationMode(in_mode);
m_dataUploader.SetVisualizationMode(in_mode);
}

void Streaming::TileUpdateManagerBase::CaptureTraceFile(bool in_captureTrace)
{
m_pDataUploader->CaptureTraceFile(in_captureTrace);
m_dataUploader.CaptureTraceFile(in_captureTrace);
}

//-----------------------------------------------------------------------------
Expand Down
29 changes: 14 additions & 15 deletions TileUpdateManager/TileUpdateManagerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include "pch.h"

#include "TileUpdateManagerBase.h"
#include "DataUploader.h"
#include "StreamingResourceBase.h"
#include "XeTexture.h"
#include "StreamingHeap.h"
Expand All @@ -47,19 +46,13 @@ m_numSwapBuffers(in_desc.m_swapChainBufferCount)
, m_addAliasingBarriers(in_desc.m_addAliasingBarriers)
, m_minNumUploadRequests(in_desc.m_minNumUploadRequests)
, m_threadPriority((int)in_desc.m_threadPriority)
, m_dataUploader(in_pDevice, in_desc.m_maxNumCopyBatches, in_desc.m_stagingBufferSizeMB, in_desc.m_maxTileMappingUpdatesPerApiCall, m_threadPriority)
{
ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == m_directCommandQueue->GetDesc().Type);

ThrowIfFailed(in_pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_frameFence)));
m_frameFence->SetName(L"Streaming::TileUpdateManagerBase::m_frameFence");

m_pDataUploader = std::make_unique<Streaming::DataUploader>(
in_pDevice,
in_desc.m_maxNumCopyBatches,
in_desc.m_stagingBufferSizeMB,
in_desc.m_maxTileMappingUpdatesPerApiCall,
m_threadPriority);

const UINT numAllocators = m_numSwapBuffers;
for (UINT c = 0; c < (UINT)CommandListName::Num; c++)
{
Expand Down Expand Up @@ -137,7 +130,7 @@ void Streaming::TileUpdateManagerBase::StartThreads()
//-----------------------------------------------------------------------------
void Streaming::TileUpdateManagerBase::SignalFileStreamer()
{
m_pDataUploader->SignalFileStreamer();
m_dataUploader.SignalFileStreamer();
m_numTotalSubmits.fetch_add(1, std::memory_order_relaxed);
}
void Streaming::TileUpdateManagerBase::ProcessFeedbackThread()
Expand Down Expand Up @@ -206,7 +199,14 @@ void Streaming::TileUpdateManagerBase::ProcessFeedbackThread()
UINT newStaleSize = 0; // track number of stale resources, then resize the array to the updated number
for (auto resourceIndex : staleResources)
{
if (m_pDataUploader->GetNumUpdateListsAvailable() && m_threadsRunning)
if (m_dataUploader.GetNumUpdateListsAvailable()
// with DirectStorage Queue::EnqueueRequest() can block.
// when there are many pending uploads, there can be multiple frames of waiting.
// if we wait too long in this loop, we miss calling ProcessFeedback() above which adds pending uploads & evictions
// this is a vicious feedback cycle that leads to even more pending requests, and even longer delays.
// the following check avoids enqueueing more uploads if the frame has changed:
&& (m_frameFence->GetCompletedValue() == previousFrameFenceValue)
&& m_threadsRunning) // don't add work while exiting
{
uploadsRequested += m_streamingResources[resourceIndex]->QueueTiles();
}
Expand All @@ -227,7 +227,7 @@ void Streaming::TileUpdateManagerBase::ProcessFeedbackThread()
}
}
staleResources.resize(newStaleSize); // compact array
if (numEvictions) { m_pDataUploader->AddEvictions(numEvictions); }
if (numEvictions) { m_dataUploader.AddEvictions(numEvictions); }
}

// if there are uploads, maybe signal depending on heuristic to minimize # signals
Expand All @@ -238,7 +238,7 @@ void Streaming::TileUpdateManagerBase::ProcessFeedbackThread()
(0 == staleResources.size()) || // flush because there's no more work to be done (no stale resources, all feedback has been processed)
// if we need updatelists and there is a minimum amount of pending work, go ahead and submit
// this minimum heuristic prevents "storms" of submits with too few tiles to sustain good throughput
((0 == m_pDataUploader->GetNumUpdateListsAvailable()) && (uploadsRequested > m_minNumUploadRequests)))
((0 == m_dataUploader.GetNumUpdateListsAvailable()) && (uploadsRequested > m_minNumUploadRequests)))
{
SignalFileStreamer();
uploadsRequested = 0;
Expand Down Expand Up @@ -286,10 +286,9 @@ void Streaming::TileUpdateManagerBase::Finish()
{
m_updateResidencyThread.join();
}

// now we are no longer producing work for the DataUploader, so its commands can be drained
m_pDataUploader->FlushCommands();
}
// now we are no longer producing work for the DataUploader, so its commands can be drained
m_dataUploader.FlushCommands();
}

//-----------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit c378b3a

Please sign in to comment.