diff --git a/DdsToXet/DdsToXet.cpp b/DdsToXet/DdsToXet.cpp index 17adc8f..319b14b 100644 --- a/DdsToXet/DdsToXet.cpp +++ b/DdsToXet/DdsToXet.cpp @@ -25,16 +25,18 @@ //********************************************************* // Convert a DDS into a custom layout -// internally aligned (according to file header) and 64KB tiled +// also converts v2 XeT to latest format #include #include #include #include +#include #include "ArgParser.h" #include "d3dx12.h" +#include "XeTv2.h" #include "XetFileHeader.h" using Microsoft::WRL::ComPtr; @@ -50,7 +52,25 @@ struct SourceSubResourceData UINT m_rowPitch; UINT m_slicePitch; }; -std::vector m_subresourceData; +std::vector m_subresourceData; // read from source DDS + +// output subresource info +std::vector m_subresourceInfo; + +// offsets table +std::vector m_offsets; + +// texture bytes +std::vector m_textureData; + +// packed mip bytes +std::vector m_packedMipData; + +D3D12_RESOURCE_DESC m_resourceDesc; // will be created and re-used + +UINT32 m_compressionFormat{ 0 }; + +bool m_convertFromXet2{ false }; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -71,6 +91,52 @@ static DXGI_FORMAT GetFormatFromHeader(const DirectX::DDS_HEADER& in_ddsHeader) { return DXGI_FORMAT_BC1_UNORM; } + if (MAKEFOURCC('D', 'X', 'T', '3') == ddpf.fourCC) + { + return DXGI_FORMAT_BC2_UNORM; + } + if (MAKEFOURCC('D', 'X', 'T', '5') == ddpf.fourCC) + { + return DXGI_FORMAT_BC3_UNORM; + } + + // While pre-multiplied alpha isn't directly supported by the DXGI formats, + // they are basically the same as these BC formats so they can be mapped + if (MAKEFOURCC('D', 'X', 'T', '2') == ddpf.fourCC) + { + return DXGI_FORMAT_BC2_UNORM; + } + if (MAKEFOURCC('D', 'X', 'T', '4') == ddpf.fourCC) + { + return DXGI_FORMAT_BC3_UNORM; + } + + if (MAKEFOURCC('A', 'T', 'I', '1') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_UNORM; + } + if (MAKEFOURCC('B', 'C', '4', 'U') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_UNORM; + } + if (MAKEFOURCC('B', 'C', '4', 'S') == ddpf.fourCC) + { + return DXGI_FORMAT_BC4_SNORM; + } + + if (MAKEFOURCC('A', 'T', 'I', '2') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_UNORM; + } + if (MAKEFOURCC('B', 'C', '5', 'U') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_UNORM; + } + if (MAKEFOURCC('B', 'C', '5', 'S') == ddpf.fourCC) + { + return DXGI_FORMAT_BC5_SNORM; + } + // other formats? } @@ -80,12 +146,11 @@ static DXGI_FORMAT GetFormatFromHeader(const DirectX::DDS_HEADER& in_ddsHeader) } //----------------------------------------------------------------------------- -// return aligned # bytes -// e.g. if alignment is 4096, and input is 42, returned value will be 4096. +// return aligned # bytes based on conservative 4KB alignment //----------------------------------------------------------------------------- UINT GetAlignedSize(UINT in_numBytes) { - UINT alignment = XetFileHeader::GetAlignment() - 1; + UINT alignment = 4096 - 1; UINT aligned = (in_numBytes + alignment) & (~alignment); return aligned; } @@ -94,62 +159,62 @@ UINT GetAlignedSize(UINT in_numBytes) //----------------------------------------------------------------------------- void GetTiling(XetFileHeader& out_header) { - struct TiledResourceDesc - { - D3D12_PACKED_MIP_INFO m_mipInfo; // last n mips may be packed into a single tile - std::vector m_tiling; - D3D12_TILE_SHAPE m_tileShape; // e.g. a 64K tile may contain 128x128 texels @ 4B/pixel - UINT m_numTilesTotal; - }; - TiledResourceDesc tiledDesc; + std::vector subresourceTiling; + D3D12_TILE_SHAPE tileShape{}; // e.g. a 64K tile may contain 128x128 texels @ 4B/pixel + UINT numTilesTotal = 0; + + D3D12_PACKED_MIP_INFO packedMipInfo; // last n mips may be packed into a single tile UINT imageWidth = out_header.m_ddsHeader.width; UINT imageHeight = out_header.m_ddsHeader.height; + + if (0 == out_header.m_ddsHeader.mipMapCount) + { + out_header.m_ddsHeader.mipMapCount = 1; + } UINT mipCount = out_header.m_ddsHeader.mipMapCount; ComPtr device; D3D12CreateDevice(0, D3D_FEATURE_LEVEL_12_0, IID_PPV_ARGS(&device)); - - D3D12_RESOURCE_DESC rd = CD3DX12_RESOURCE_DESC::Tex2D(out_header.m_extensionHeader.dxgiFormat, imageWidth, imageHeight, 1, mipCount); + m_resourceDesc = CD3DX12_RESOURCE_DESC::Tex2D(out_header.m_extensionHeader.dxgiFormat, imageWidth, imageHeight, 1, mipCount); // Layout must be D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE when creating reserved resources - rd.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + m_resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; ComPtr resource; - device->CreateReservedResource(&rd, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&resource)); + device->CreateReservedResource(&m_resourceDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&resource)); // query the reserved resource for its tile properties // allocate data structure according to tile properties - { - UINT subresourceCount = resource->GetDesc().MipLevels; - tiledDesc.m_tiling.resize(subresourceCount); - device->GetResourceTiling(resource.Get(), - &tiledDesc.m_numTilesTotal, - &tiledDesc.m_mipInfo, - &tiledDesc.m_tileShape, &subresourceCount, 0, - tiledDesc.m_tiling.data()); - } + UINT subresourceCount = resource->GetDesc().MipLevels; + subresourceTiling.resize(subresourceCount); + device->GetResourceTiling(resource.Get(), + &numTilesTotal, + &packedMipInfo, + &tileShape, &subresourceCount, 0, + subresourceTiling.data()); //-------------------------- // pre-fill header information based on tiling //-------------------------- + m_subresourceInfo.resize(subresourceCount); UINT subresourceTileIndex = 0; - for (UINT s = 0; s < tiledDesc.m_mipInfo.NumStandardMips; s++) + for (UINT s = 0; s < packedMipInfo.NumStandardMips; s++) { - out_header.m_subresourceInfo[s].m_standardMipInfo = XetFileHeader::StandardMipInfo{ - tiledDesc.m_tiling[s].WidthInTiles, - tiledDesc.m_tiling[s].HeightInTiles, + m_subresourceInfo[s].m_standardMipInfo = XetFileHeader::StandardMipInfo{ + subresourceTiling[s].WidthInTiles, + subresourceTiling[s].HeightInTiles, 0, // FIXME? texture array not supported subresourceTileIndex }; - subresourceTileIndex += tiledDesc.m_tiling[s].WidthInTiles * tiledDesc.m_tiling[s].HeightInTiles; + subresourceTileIndex += subresourceTiling[s].WidthInTiles * subresourceTiling[s].HeightInTiles; } - out_header.m_mipInfo = XetFileHeader::MipInfo{ - tiledDesc.m_mipInfo.NumStandardMips, - tiledDesc.m_mipInfo.NumPackedMips, - tiledDesc.m_mipInfo.NumTilesForPackedMips, - tiledDesc.m_mipInfo.StartTileIndexInOverallResource - }; + + out_header.m_mipInfo.m_numStandardMips = packedMipInfo.NumStandardMips; + out_header.m_mipInfo.m_numTilesForStandardMips = numTilesTotal - packedMipInfo.NumTilesForPackedMips; + out_header.m_mipInfo.m_numPackedMips = packedMipInfo.NumPackedMips; + out_header.m_mipInfo.m_numTilesForPackedMips = packedMipInfo.NumTilesForPackedMips; + out_header.m_mipInfo.m_numUncompressedBytesForPackedMips = 0; // will be filled in later } //----------------------------------------------------------------------------- @@ -197,15 +262,15 @@ void FillSubresourceData(std::vector& out_subresourceData //----------------------------------------------------------------------------- // convert standard dds into tiled layout //----------------------------------------------------------------------------- -UINT WriteBits(BYTE* out_pDst, +UINT WriteTile(BYTE* out_pDst, const D3D12_TILED_RESOURCE_COORDINATE& in_coord, const SourceSubResourceData& in_subresourceData, const BYTE* in_pSrc) { - // this is a BC7 decoder + // this is a BC7 or BC1 decoder // we know that tiles will be 64KB - // 1 tile of size 256x256 will have a row size of 1024 bytes, and 64 rows (4 texels per row) - // we will always be copying 64 rows, since every row is 4 texels + // 1 tile of BC7 size 256x256 will have a row size of 1024 bytes, and 64 rows (4 texels per row) + // 1 tile of BC1 size 512x256 will also have row size 1024 bytes and 64 rows const UINT tileRowBytes = 1024; const UINT numRowsPerTile = 64; @@ -230,11 +295,8 @@ UINT WriteBits(BYTE* out_pDst, //----------------------------------------------------------------------------- // builds offset table and fills tiled texture data -// returns # bytes written to texture data //----------------------------------------------------------------------------- -void WriteTiles( - std::vector& out_textureData, std::vector& out_offsets, - const XetFileHeader& in_header, const BYTE* in_pSrc) +void WriteTiles(const XetFileHeader& in_header, const BYTE* in_pSrc) { UINT imageWidth = in_header.m_ddsHeader.width; UINT imageHeight = in_header.m_ddsHeader.height; @@ -243,34 +305,107 @@ void WriteTiles( // texture data starts after the header, and after the table of offsets UINT offset = 0; - // note (uncompressed) tiles are naturally aligned at greater than file alignment granularity. - const UINT tileSizeBytes = GetAlignedSize(XetFileHeader::GetTileSize()); + std::vector tile; // scratch space for writing tiled texture data // find the base address of each /tiled/ mip level + for (UINT s = 0; s < in_header.m_mipInfo.m_numStandardMips; s++) { - for (UINT s = 0; s < in_header.m_mipInfo.m_numStandardMips; s++) + for (UINT y = 0; y < m_subresourceInfo[s].m_standardMipInfo.m_heightTiles; y++) { - for (UINT y = 0; y < in_header.m_subresourceInfo[s].m_standardMipInfo.m_heightTiles; y++) + for (UINT x = 0; x < m_subresourceInfo[s].m_standardMipInfo.m_widthTiles; x++) { - for (UINT x = 0; x < in_header.m_subresourceInfo[s].m_standardMipInfo.m_widthTiles; x++) + tile.resize(D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); // reset to standard tile size + + if (m_convertFromXet2) { - // write tile - out_textureData.resize(out_textureData.size() + tileSizeBytes); - WriteBits(&out_textureData[offset], D3D12_TILED_RESOURCE_COORDINATE{ x, y, 0, s }, m_subresourceData[s], in_pSrc); + memcpy(tile.data(), in_pSrc, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); + in_pSrc += D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; + } + else + { + WriteTile(tile.data(), D3D12_TILED_RESOURCE_COORDINATE{ x, y, 0, s }, m_subresourceData[s], in_pSrc); + } - // add tileData to array - XetFileHeader::TileData outData{0}; - outData.m_offset = offset; - outData.m_numBytes = tileSizeBytes; - out_offsets.push_back(outData); + m_textureData.resize(m_textureData.size() + tile.size()); // grow the texture space to hold the new tile + memcpy(&m_textureData[offset], tile.data(), tile.size()); // copy bytes - offset = (UINT)out_textureData.size(); - } + // add tileData to array + XetFileHeader::TileData outData{0}; + outData.m_offset = offset; + outData.m_numBytes = (UINT)tile.size(); + m_offsets.push_back(outData); + + offset = (UINT)m_textureData.size(); } } } } +//----------------------------------------------------------------------------- +// pad packed mips according to copyable footprint requirements +//----------------------------------------------------------------------------- +void PadPackedMips(const XetFileHeader& in_header, const BYTE* in_psrc, std::vector& out_paddedPackedMips) +{ + UINT firstSubresource = in_header.m_mipInfo.m_numStandardMips; + UINT numSubresources = in_header.m_mipInfo.m_numPackedMips; + UINT64 totalBytes = 0; + std::vector srcLayout(numSubresources); + std::vector numRows(numSubresources); + std::vector rowSizeBytes(numSubresources); + + ComPtr device; + D3D12CreateDevice(0, D3D_FEATURE_LEVEL_12_0, IID_PPV_ARGS(&device)); + + device->GetCopyableFootprints(&m_resourceDesc, firstSubresource, numSubresources, + 0, srcLayout.data(), numRows.data(), rowSizeBytes.data(), &totalBytes); + + out_paddedPackedMips.resize(totalBytes); + + BYTE* pDst = out_paddedPackedMips.data(); + + for (UINT i = 0; i < numSubresources; i++) + { + for (UINT r = 0; r < numRows[i]; r++) + { + memcpy(pDst, in_psrc, rowSizeBytes[i]); + pDst += srcLayout[i].Footprint.RowPitch; + in_psrc += rowSizeBytes[i]; + } + } +} + +//----------------------------------------------------------------------------- +// stores padded packed mips. returns uncompressed size. +//----------------------------------------------------------------------------- +UINT WritePackedMips(const XetFileHeader& in_header, BYTE* in_pBytes, size_t in_numBytes) +{ + UINT numPackedMipBytes = 0; + for (UINT i = 0; i < in_header.m_mipInfo.m_numPackedMips; i++) + { + UINT s = in_header.m_mipInfo.m_numStandardMips + i; + m_subresourceInfo[s].m_packedMipInfo = XetFileHeader::PackedMipInfo{ + m_subresourceData[s].m_rowPitch, + m_subresourceData[s].m_slicePitch, + 0xbaadbaad, 0xbaadbaad }; // FIXME: include padded row pitch and slice pitch + numPackedMipBytes += m_subresourceData[s].m_slicePitch; + } + + // packed mip data is at the end of the DDS file + UINT srcOffset = UINT(in_numBytes - numPackedMipBytes); + + BYTE* pSrc = &in_pBytes[srcOffset]; + PadPackedMips(in_header, pSrc, m_packedMipData); + UINT numBytesPadded = (UINT)m_packedMipData.size(); // uncompressed and padded + + // last offset structure points at the packed mips + XetFileHeader::TileData outData{ 0 }; + outData.m_offset = m_offsets.back().m_offset + m_offsets.back().m_numBytes; + outData.m_numBytes = numBytesPadded; + m_offsets.push_back(outData); + + return numBytesPadded; +} + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- int main() @@ -286,41 +421,74 @@ int main() //-------------------------- // read dds file //-------------------------- - std::ifstream inFile(inFileName, std::ios::in | std::ios::binary); - if (!inFile.is_open()) { Error(L"File not found"); } + HANDLE inFileHandle = CreateFile(inFileName.data(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (NULL == inFileHandle) + { + Error(L"Failed to open file"); + } + + HANDLE inFileMapping = CreateFileMapping(inFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); + if (NULL == inFileMapping) + { + Error(L"Failed to create mapping"); + } + BYTE* pInFileBytes = (BYTE*)MapViewOfFile(inFileMapping, FILE_MAP_READ, 0, 0, 0); + if (NULL == pInFileBytes) + { + Error(L"Failed to map file"); + } - // pre-size an array to hold the bits, then load them all - std::vector bytes; - inFile.seekg(0, std::ios::end); - bytes.reserve(inFile.tellg()); - inFile.seekg(0, std::ios::beg); - bytes.insert(bytes.begin(), std::istreambuf_iterator(inFile), std::istreambuf_iterator()); - inFile.close(); + XetFileHeader header; + header.m_compressionFormat = m_compressionFormat; //-------------------------- // interpret contents based on dds header //-------------------------- - BYTE* pBits = bytes.data(); - - UINT32 magic = *(UINT32*)pBits; - pBits += sizeof(magic); - if (DirectX::DDS_MAGIC != magic) { Error(L"Not a valid DDS file"); } - - XetFileHeader header; - header.m_ddsHeader = *(DirectX::DDS_HEADER*)pBits; - pBits += header.m_ddsHeader.size; - - if ((header.m_ddsHeader.ddspf.flags & DDS_FOURCC) && (MAKEFOURCC('D', 'X', '1', '0') == header.m_ddsHeader.ddspf.fourCC)) + BYTE* pBits = pInFileBytes; + if (DirectX::DDS_MAGIC == *(UINT32*)pBits) { - header.m_extensionHeader = *(DirectX::DDS_HEADER_DXT10*)pBits; - pBits += sizeof(DirectX::DDS_HEADER_DXT10); + pBits += sizeof(UINT32); + header.m_ddsHeader = *(DirectX::DDS_HEADER*)pBits; + pBits += header.m_ddsHeader.size; + + if ((header.m_ddsHeader.ddspf.flags & DDS_FOURCC) && (MAKEFOURCC('D', 'X', '1', '0') == header.m_ddsHeader.ddspf.fourCC)) + { + header.m_extensionHeader = *(DirectX::DDS_HEADER_DXT10*)pBits; + pBits += sizeof(DirectX::DDS_HEADER_DXT10); + } + else + { + DirectX::DDS_HEADER_DXT10 extensionHeader{}; + header.m_extensionHeader = extensionHeader; + header.m_extensionHeader.dxgiFormat = GetFormatFromHeader(header.m_ddsHeader); + } } else { - DirectX::DDS_HEADER_DXT10 extensionHeader{}; - header.m_extensionHeader = extensionHeader; - header.m_extensionHeader.dxgiFormat = GetFormatFromHeader(header.m_ddsHeader); + XetFileHeaderV2 srcHeader = *(XetFileHeaderV2*)pBits; + if (XetFileHeaderV2::GetMagic() != srcHeader.m_magic) + { + Error(L"Not a valid DDS or XET file"); + } + + if (XetFileHeaderV2::GetVersion() != srcHeader.m_version) + { + Error(L"Not a valid XET version"); + } + + m_convertFromXet2 = true; // changes behavior within WriteTiles() + + header.m_ddsHeader = srcHeader.m_ddsHeader; + header.m_extensionHeader = srcHeader.m_extensionHeader; + + XetFileHeaderV2::TileData* pTileData = (XetFileHeaderV2::TileData*)pBits; + size_t numBytes = sizeof(XetFileHeaderV2); + numBytes += sizeof(XetFileHeaderV2::TileData) * srcHeader.m_mipInfo.m_numTilesForStandardMips; + size_t alignment = XetFileHeaderV2::GetAlignment() - 1; + auto aligned = (numBytes + alignment) & (~alignment); + pBits += aligned; } + // NOTE: pBits now points at beginning of DDS data GetTiling(header); @@ -330,90 +498,58 @@ int main() //-------------------------- // reserve output space //-------------------------- - std::vector textureData; - textureData.reserve(bytes.size()); - // offsets table - std::vector offsets; - offsets.reserve(header.m_mipInfo.m_numTilesForStandardMips); - - // offsets into metadata - // m : n relationship where n tiles may use m metadata blocks, where m <= n - std::vector metadataOffsets; - metadataOffsets.reserve(offsets.size()); - // metadata itself should take less space than tile data - std::vector metadata; - metadata.reserve(bytes.size()); + std::filesystem::path inFilePath(inFileName ); + auto fileSize = std::filesystem::file_size(inFilePath); + + m_textureData.reserve(fileSize); // reserve enough space to hold the whole uncompressed source + m_offsets.reserve(header.m_mipInfo.m_numTilesForStandardMips + 1); //-------------------------- // write tiles //-------------------------- - header.m_numMetadataBlobs = 0; - WriteTiles(textureData, offsets, header, pBits); - assert(textureData.size() == header.m_mipInfo.m_numTilesForStandardMips * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); + WriteTiles(header, pBits); + header.m_mipInfo.m_numUncompressedBytesForPackedMips = WritePackedMips(header, pInFileBytes, fileSize); //------------------------------------------ - // compute gap between header and aligned texture data + // correct offsets to account for alignment after header //------------------------------------------ - UINT offsetsSize = UINT(sizeof(offsets[0]) * offsets.size()); - UINT metadataOffsetsSize = UINT(sizeof(metadataOffsets[0]) * metadataOffsets.size()); - // no gap between header and offets table - UINT numHeaderBytes = UINT(sizeof(XetFileHeader) + offsetsSize + metadataOffsetsSize); - - // alignment gap after header - UINT alignedHeaderSize = GetAlignedSize(numHeaderBytes); - UINT headerGapNumBytes = alignedHeaderSize - numHeaderBytes; - std::vector headerGap(headerGapNumBytes, 0); + UINT64 textureDataOffset = sizeof(header) + + (m_subresourceInfo.size() * sizeof(m_subresourceInfo[0])) + + (m_offsets.size() * sizeof(m_offsets[0])); - // note the metadata blobs are already in an aligned-size block - UINT alignedMetadataSize = UINT(sizeof(metadata[0]) * metadata.size()); - - UINT metadataBaseOffset = alignedHeaderSize; - UINT textureBaseOffset = metadataBaseOffset + alignedMetadataSize; - - //------------------------------------------ - // correct offsets to account for alignment after header or metadata - //------------------------------------------ - UINT packedMipBytes = 0; - UINT packedMipOffset = textureBaseOffset + (UINT)textureData.size(); - for (UINT i = 0; i < header.m_mipInfo.m_numPackedMips; i++) - { - UINT s = header.m_mipInfo.m_numStandardMips + i; - header.m_subresourceInfo[s].m_packedMipInfo = XetFileHeader::PackedMipInfo{ - m_subresourceData[s].m_rowPitch, - m_subresourceData[s].m_slicePitch, - packedMipBytes + packedMipOffset }; - packedMipBytes += m_subresourceData[s].m_slicePitch; - } - - for (auto& m : metadataOffsets) + // align only for legacy support for uncompressed file formats + std::vector alignedTextureDataGap; + if (!m_compressionFormat) { - m.m_offset += metadataBaseOffset; + UINT alignedTextureDataOffset = GetAlignedSize((UINT)textureDataOffset); + alignedTextureDataGap.resize(alignedTextureDataOffset - textureDataOffset, 0); + textureDataOffset += alignedTextureDataGap.size(); } - for (auto& o : offsets) + // correct the tile offsets to account for the preceding data + for (auto& o : m_offsets) { - o.m_offset += textureBaseOffset; + o.m_offset += (UINT)textureDataOffset; } std::ofstream outFile(outFileName, std::ios::out | std::ios::binary); outFile.write((char*)&header, sizeof(header)); - outFile.write((char*)offsets.data(), offsetsSize); - outFile.write((char*)metadataOffsets.data(), metadataOffsetsSize); - outFile.write((char*)headerGap.data(), headerGapNumBytes); + outFile.write((char*)m_subresourceInfo.data(), m_subresourceInfo.size() * sizeof(m_subresourceInfo[0])); + outFile.write((char*)m_offsets.data(), m_offsets.size() * sizeof(m_offsets[0])); - outFile.write((char*)metadata.data(), alignedMetadataSize); - - outFile.write((char*)textureData.data(), (UINT)textureData.size()); + // alignment is here only for legacy support for uncompressed file formats + if (alignedTextureDataGap.size()) + { + outFile.write((char*)alignedTextureDataGap.data(), alignedTextureDataGap.size()); + } - //------------------------------------------ - // copy packed mip bits directly from source dds - // packed mip data will be aligned - //------------------------------------------ - UINT srcOffset = (UINT)bytes.size() - packedMipBytes; - outFile.write((char*)&bytes[srcOffset], packedMipBytes); + outFile.write((char*)m_textureData.data(), (UINT)m_textureData.size()); + outFile.write((char*)m_packedMipData.data(), (UINT)m_packedMipData.size()); - outFile.close(); + UnmapViewOfFile(pInFileBytes); + CloseHandle(inFileMapping); + CloseHandle(inFileHandle); return 0; } diff --git a/DdsToXet/DdsToXet.vcxproj b/DdsToXet/DdsToXet.vcxproj index f2f65ee..5f8ecf5 100644 --- a/DdsToXet/DdsToXet.vcxproj +++ b/DdsToXet/DdsToXet.vcxproj @@ -58,11 +58,16 @@ true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true + stdcpp17 Console true + + cd /d $(TargetDir) +convert.bat $(SolutionDir)dds media + @@ -72,6 +77,7 @@ true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true + stdcpp17 Console @@ -79,6 +85,10 @@ true true + + cd /d $(TargetDir) +convert.bat $(SolutionDir)dds media + @@ -86,6 +96,11 @@ + + + Document + + \ No newline at end of file diff --git a/DdsToXet/DdsToXet.vcxproj.filters b/DdsToXet/DdsToXet.vcxproj.filters index 6875de2..c525b39 100644 --- a/DdsToXet/DdsToXet.vcxproj.filters +++ b/DdsToXet/DdsToXet.vcxproj.filters @@ -9,6 +9,9 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + {e864060a-b20d-4932-b5fe-ebfa3a47eda9} + @@ -20,4 +23,9 @@ Header Files + + + scripts + + \ No newline at end of file diff --git a/DdsToXet/DdsToXet_vs2022.vcxproj b/DdsToXet/DdsToXet_vs2022.vcxproj index c08587c..86fcab2 100644 --- a/DdsToXet/DdsToXet_vs2022.vcxproj +++ b/DdsToXet/DdsToXet_vs2022.vcxproj @@ -58,11 +58,16 @@ true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true + stdcpp17 Console true + + cd /d $(TargetDir) +convert.bat $(SolutionDir)dds media + @@ -72,6 +77,7 @@ true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true + stdcpp17 Console @@ -79,12 +85,22 @@ true true + + cd /d $(TargetDir) +convert.bat $(SolutionDir)dds media + + + + + + Document + diff --git a/DdsToXet/DdsToXet_vs2022.vcxproj.filters b/DdsToXet/DdsToXet_vs2022.vcxproj.filters index 6875de2..146a4fa 100644 --- a/DdsToXet/DdsToXet_vs2022.vcxproj.filters +++ b/DdsToXet/DdsToXet_vs2022.vcxproj.filters @@ -9,6 +9,9 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + {6d432c77-37cd-43cf-9691-b7787e8ae3f1} + @@ -19,5 +22,13 @@ Header Files + + Header Files + + + + + scripts + \ No newline at end of file diff --git a/DdsToXet/XeTv2.h b/DdsToXet/XeTv2.h new file mode 100644 index 0000000..b52d469 --- /dev/null +++ b/DdsToXet/XeTv2.h @@ -0,0 +1,115 @@ +//********************************************************* +// +// Copyright 2020 Intel Corporation +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files(the "Software"), to deal in the Software +// without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to +// whom the Software is furnished to do so, subject to the +// following conditions : +// The above copyright notice and this permission notice shall +// be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +//********************************************************* + +#pragma once + +#include "DDS.h" + +/*----------------------------------------------------------------------------- +File Layout: + +- Header +- Array of Per-tile info: file offset, # bytes, index into metadata array +- Array of Per-metadata blob info: file offset, size of metadata blob +- Metadata blobs, each blob is aligned +- Texture Data, each tile is aligned +- packed mips, treat as not aligned + +-----------------------------------------------------------------------------*/ +struct XetFileHeaderV2 +{ + static UINT GetMagic() { return 0x20544558; } + static UINT GetAlignment() { return 4096; } + static UINT GetTileSize() { return 65536; } + static UINT GetVersion() { return 2; } + + UINT m_magic{ GetMagic() }; + UINT m_version{ GetVersion() }; + DirectX::DDS_HEADER m_ddsHeader; + DirectX::DDS_HEADER_DXT10 m_extensionHeader; + + UINT m_numMetadataBlobs; + + struct MipInfo + { + UINT m_numStandardMips; + UINT m_numPackedMips; + UINT m_numTilesForPackedMips; + UINT m_numTilesForStandardMips; // the number of TileData[] entries after the header + }; + MipInfo m_mipInfo; + + // use subresource tile dimensions to generate linear tile index + struct StandardMipInfo + { + UINT m_widthTiles; + UINT m_heightTiles; + UINT m_depthTiles; + + // convenience value, can be computed from sum of previous subresource dimensions + UINT m_subresourceTileIndex; + }; + + // if required, compute dimensions by shifting mip 0 dimensions by mip level + struct PackedMipInfo + { + UINT m_rowPitch; + UINT m_slicePitch; + UINT m_fileOffset; + }; + + struct SubresourceInfo + { + union + { + StandardMipInfo m_standardMipInfo; + PackedMipInfo m_packedMipInfo; + }; + }; + + // indices < m_numStandardMips are standard mips, >= are packed mips + SubresourceInfo m_subresourceInfo[16]; + + // array TileData[m_numTilesForStandardMips] for each tile + struct TileData + { + UINT m_offset; // file offset to tile data + UINT m_numBytes; // # bytes for the tile + UINT m_metadataIndex; // index of metadata to use + }; + + // array of MetaData[m_numMetadataBlobs] (may be size 0) + struct MetaData + { + UINT m_offset; // file offset to metadata blob + UINT m_numBytes; // size of the metadata blob + }; + + // metadata here (aligned) + // tile data here (aligned) + + // packed mip data from m_packedMipInfo.m_fileOffset until EOF +}; diff --git a/README.md b/README.md index e26c2b7..988d481 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ This repository contains an [MIT licensed](LICENSE) demo of _DirectX12 Sampler F The demo requires ***Windows 10 20H1 (aka May 2020 Update, build 19041)*** or later and a GPU with Sampler Feedback Support, such as Intel Iris Xe Graphics as found in 11th Generation Intel® Core™ processors and discrete GPUs (driver version **[30.0.100.9667](https://downloadcenter.intel.com/product/80939/Graphics) or later**). -This repository has been updated with DirectStorage 1.0.0 for Windows® from https://www.nuget.org/packages/Microsoft.Direct3D.DirectStorage/ -- Note the legacy streaming code using ReadFile() had sector alignment constraints for the file internals that are not required by DirectStorage for Windows. -- Documentation for accessing nuget packages may be added in the future, but there are many online resources. +This repository has been updated with DirectStorage 1.0.2 for Windows® from https://www.nuget.org/packages/Microsoft.Direct3D.DirectStorage/ + +Notes: +- The file format has changed since large textures were provided as "releases." See the [log](#log) below. +- The legacy streaming code using ReadFile() had sector alignment constraints for the file internals that are not required by DirectStorage for Windows. See also: @@ -20,9 +22,9 @@ See also: Textures derived from [Hubble Images](https://www.nasa.gov/mission_pages/hubble/multimedia/index.html), see the [Hubble Copyright](https://hubblesite.org/copyright) -Note the textures shown above, which total over 13GB, are not part of the repo. A few 16k x 16k textures are available as a [release](https://github.com/GameTechDev/SamplerFeedbackStreaming/releases/tag/1) +Note the textures shown above, which total over 13GB, are not part of the repo. A few 16k x 16k textures are available as [release 1](https://github.com/GameTechDev/SamplerFeedbackStreaming/releases/tag/1) and [release 2](https://github.com/GameTechDev/SamplerFeedbackStreaming/releases/tag/2) -Test textures are provided, as is a mechanism to convert from BCx format DDS files into the custom .XET format. +Test textures are provided. At build time, BCx textures (BC7 and BC1 tested) in the dds/ directory are converted into the custom .XET format and placed in the ($TargetDir)/media directory (e.g. x64/Release/media) ## Build Instructions @@ -282,17 +284,13 @@ ID3D12CommandList* pCommandLists[] = { commandLists.m_beforeDrawCommands, m_comm ## Log -- 2021-06-21: initial commit -- 2021-07-23: use windows events to reduce cpu overhead -- 2021-08-10: use WaitOnAddress to further reduce cpu overhead. some 16k x 16k textures (BC7 format) posted as "release 1". -- 2021-08-28: proof-of-concept culling: textures for objects behind view are evicted -- 2021-09-20: fixed race condition that could result in hang on exit -- 2021-10-21: code refactor to improve sampler feedback streaming library API -- 2021-12-03: added BC1 asset collection as "release 2." All texture assets (.xet files) can reside in the same directory despite format differences, and can co-exist in the same GPU heap. Also minor source tweaks, including fix to not cull base "terrain" object. -- 2021-12-15: "-addAliasingBarriers" command line option to add an aliasing barrier to assist PIX analysis. Can also be enabled in config.json. -- 2022-01-19: eliminated aliasing barrier perf cost (still requires command line/config to enable). config file parser fixes. terrain texture not applied to planets for prettier out-of-box demo experience. -- 2022-03-14: DirectStorage integrated into mainline +- 2022-06-10: File format (.xet) change. DdsToXet can upgrade old Xet files to the new format. Assets in the DDS directory are exported at build time into media directory. Upgrade to DirectStorage v1.0.2. Many misc. improvements. - 2022-05-05: Workaround for rare race condition. Many tweaks and improvements. +- 2022-03-14: DirectStorage 1.0.0 integrated into mainline +- 2021-12-15: "-addAliasingBarriers" command line option to add an aliasing barrier to assist PIX analysis. Can also be enabled in config.json. +- 2021-12-03: added BC1 asset collection as "release 2." All texture assets (.xet files) can reside in the same directory despite format differences, and can co-exist in the same GPU heap. Also minor source tweaks, including fix to not cull base "terrain" object. +- 2021-10-21: code refactor to improve sampler feedback streaming library API +- 2021-08-10: Added some 16k x 16k textures (BC7 format) posted as "release 1". ## License diff --git a/SamplerFeedbackStreaming_vs2022.sln b/SamplerFeedbackStreaming_vs2022.sln index 358aeb8..a41874c 100644 --- a/SamplerFeedbackStreaming_vs2022.sln +++ b/SamplerFeedbackStreaming_vs2022.sln @@ -13,8 +13,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DdsToXet", "DdsToXet\DdsToX EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Expanse", "src\Expanse_vs2022.vcxproj", "{E9BCC846-C1E9-402F-AC99-0E72195202AC}" ProjectSection(ProjectDependencies) = postProject - {45087328-C272-4BB6-BB09-95D899D2276A} = {45087328-C272-4BB6-BB09-95D899D2276A} {12A36A45-4A15-48E3-B886-257E81FD57C6} = {12A36A45-4A15-48E3-B886-257E81FD57C6} + {369039E2-4C18-40D9-A7FE-E3D87BA23149} = {369039E2-4C18-40D9-A7FE-E3D87BA23149} + {45087328-C272-4BB6-BB09-95D899D2276A} = {45087328-C272-4BB6-BB09-95D899D2276A} EndProjectSection EndProject Global diff --git a/TileUpdateManager/DataUploader.cpp b/TileUpdateManager/DataUploader.cpp index 508c909..197d139 100644 --- a/TileUpdateManager/DataUploader.cpp +++ b/TileUpdateManager/DataUploader.cpp @@ -39,13 +39,11 @@ Streaming::DataUploader::DataUploader( ID3D12Device* in_pDevice, UINT in_maxCopyBatches, // maximum number of batches - UINT in_maxTileCopiesPerBatch, // batch size. a small number, like 32 - UINT in_maxTileCopiesInFlight, // upload buffer size. 1024 would become a 64MB upload buffer + UINT in_stagingBufferSizeMB, // upload buffer size UINT in_maxTileMappingUpdatesPerApiCall // some HW/drivers seem to have a limit ) : m_updateLists(in_maxCopyBatches) - , m_maxTileCopiesInFlight(in_maxTileCopiesInFlight) - , m_maxBatchSize(in_maxTileCopiesPerBatch) + , m_stagingBufferSizeMB(in_stagingBufferSizeMB) , m_updateListFreeCount(in_maxCopyBatches) , m_gpuTimer(in_pDevice, in_maxCopyBatches, D3D12GpuTimer::TimerType::Copy) , m_mappingUpdater(in_maxTileMappingUpdatesPerApiCall) @@ -94,7 +92,7 @@ void Streaming::DataUploader::InitDirectStorage(ID3D12Device* in_pDevice) #endif m_dsFactory->SetDebugFlags(debugFlags); - m_dsFactory->SetStagingBufferSize(DSTORAGE_STAGING_BUFFER_SIZE_32MB); + m_dsFactory->SetStagingBufferSize(m_stagingBufferSizeMB * 1024 * 1024); DSTORAGE_QUEUE_DESC queueDesc{}; queueDesc.Capacity = DSTORAGE_MAX_QUEUE_CAPACITY; @@ -110,17 +108,18 @@ void Streaming::DataUploader::InitDirectStorage(ID3D12Device* in_pDevice) // handle request to load a texture from cpu memory // used for packed mips, which don't participate in fine-grained streaming //----------------------------------------------------------------------------- -UINT64 Streaming::DataUploader::LoadTexture(ID3D12Resource* in_pResource, - const std::vector& in_paddedData, UINT in_firstSubresource) +UINT64 Streaming::DataUploader::LoadTexture(ID3D12Resource* in_pResource, UINT in_firstSubresource, + const std::vector& in_paddedData, UINT in_uncompressedSize, UINT32 in_compressionFormat) { DSTORAGE_REQUEST request = {}; request.Options.SourceType = DSTORAGE_REQUEST_SOURCE_MEMORY; request.Options.DestinationType = DSTORAGE_REQUEST_DESTINATION_MULTIPLE_SUBRESOURCES; request.Source.Memory.Source = in_paddedData.data(); request.Source.Memory.Size = (UINT32)in_paddedData.size(); - request.UncompressedSize = (UINT32)in_paddedData.size(); + request.UncompressedSize = in_uncompressedSize; request.Destination.MultipleSubresources.Resource = in_pResource; request.Destination.MultipleSubresources.FirstSubresource = in_firstSubresource; + request.Options.CompressionFormat = (DSTORAGE_COMPRESSION_FORMAT)in_compressionFormat; m_memoryQueue->EnqueueRequest(&request); return m_memoryFenceValue; @@ -151,12 +150,11 @@ Streaming::FileStreamer* Streaming::DataUploader::SetStreamer(StreamerType in_st if (StreamerType::Reference == in_streamerType) { - // the streamer must support least one fully loaded updatelist, or a full updatelist will never be able to complete - // it's really a user error for max in flight to be less than the max number in 1 update list - UINT minNumUploads = std::max(m_maxTileCopiesInFlight, m_maxBatchSize); + // buffer size in megabytes * 1024 * 1024 bytes / (tile size = 64 * 1024 bytes) + UINT maxTileCopiesInFlight = m_stagingBufferSizeMB * (1024 / 64); m_pFileStreamer = std::make_unique(device.Get(), - (UINT)m_updateLists.size(), m_maxBatchSize, minNumUploads); + (UINT)m_updateLists.size(), maxTileCopiesInFlight); } else { @@ -190,6 +188,10 @@ void Streaming::DataUploader::StartThreads() // launch thread to monitor fences m_fenceMonitorThread = std::thread([&] { + // initialize timer on the thread that will use it + RawCpuTimer fenceMonitorThread; + m_pFenceThreadTimer = &fenceMonitorThread; + DebugPrint(L"Created Fence Monitor Thread\n"); while (m_threadsRunning) { @@ -268,7 +270,7 @@ Streaming::UpdateList* Streaming::DataUploader::AllocateUpdateList(Streaming::St // treat the array as a ring buffer // the next index is the most-likely to be available because it has had the most time to complete - UINT numLists = (UINT)m_updateLists.size(); + const UINT numLists = (UINT)m_updateLists.size(); for (UINT i = 0; i < numLists; i++) { m_updateListAllocIndex = (m_updateListAllocIndex + 1) % numLists; @@ -311,13 +313,15 @@ void Streaming::DataUploader::SubmitUpdateList(Streaming::UpdateList& in_updateL { ASSERT(UpdateList::State::STATE_ALLOCATED == in_updateList.m_executionState); + // set to submitted, allowing mapping within submitThread + // fenceMonitorThread will wait for the copy fence to become valid before progressing state + in_updateList.m_executionState = UpdateList::State::STATE_SUBMITTED; + if (in_updateList.GetNumStandardUpdates()) { m_pFileStreamer->StreamTexture(in_updateList); } - in_updateList.m_executionState = UpdateList::State::STATE_SUBMITTED; - m_submitFlag.Set(); } @@ -335,17 +339,30 @@ void Streaming::DataUploader::FenceMonitorThread() bool loadTextures = false; for (auto& updateList : m_updateLists) { + // assign a start time to every in-flight update list. this will give us an upper bound on latency. + // latency is only measured for tile uploads + if ((UpdateList::State::STATE_FREE != updateList.m_executionState) && (0 == updateList.m_copyLatencyTimer)) + { + updateList.m_copyLatencyTimer = m_pFenceThreadTimer->GetTime(); + } + switch (updateList.m_executionState) { case UpdateList::State::STATE_PACKED_MAPPING: - ASSERT(updateList.GetNumPackedUpdates()); + ASSERT(0 == updateList.GetNumStandardUpdates()); + ASSERT(0 == updateList.GetNumEvictions()); + // wait for mapping complete before streaming packed tiles if (updateList.m_mappingFenceValue <= m_mappingFence->GetCompletedValue()) { - updateList.m_copyFenceValue = LoadTexture(updateList.m_pStreamingResource->GetTiledResource(), - updateList.m_pStreamingResource->GetPaddedPackedMips(), - updateList.m_pStreamingResource->GetPackedMipInfo().NumStandardMips); + UINT uncompressedSize = 0; + auto& data = updateList.m_pStreamingResource->GetPaddedPackedMips(uncompressedSize); + updateList.m_copyFenceValue = LoadTexture( + updateList.m_pStreamingResource->GetTiledResource(), + updateList.m_pStreamingResource->GetPackedMipInfo().NumStandardMips, + data, uncompressedSize, + updateList.m_pStreamingResource->GetTextureFileInfo()->GetCompressionFormat()); updateList.m_executionState = UpdateList::State::STATE_PACKED_COPY_PENDING; loadTextures = true; @@ -355,7 +372,6 @@ void Streaming::DataUploader::FenceMonitorThread() case UpdateList::State::STATE_PACKED_COPY_PENDING: ASSERT(0 == updateList.GetNumStandardUpdates()); ASSERT(0 == updateList.GetNumEvictions()); - ASSERT(updateList.GetNumPackedUpdates()); if (m_memoryFence->GetCompletedValue() >= updateList.m_copyFenceValue) { @@ -375,8 +391,6 @@ void Streaming::DataUploader::FenceMonitorThread() case UpdateList::State::STATE_COPY_PENDING: { - ASSERT(0 == updateList.GetNumPackedUpdates()); - // standard updates? check if copy complete if (updateList.GetNumStandardUpdates()) { @@ -396,19 +410,21 @@ void Streaming::DataUploader::FenceMonitorThread() // notify evictions if (updateList.GetNumEvictions()) { - m_numTotalEvictions.fetch_add(updateList.GetNumEvictions(), std::memory_order_relaxed); - updateList.m_pStreamingResource->NotifyEvicted(updateList.m_evictCoords); + + m_numTotalEvictions.fetch_add(updateList.GetNumEvictions(), std::memory_order_relaxed); } // notify regular tiles if (updateList.GetNumStandardUpdates()) { - // a gpu copy has completed, so we can update the corresponding timer - //timings.m_gpuTime = m_gpuTimer.MapReadBack(in_updateList.m_streamingTimeIndex); + updateList.m_pStreamingResource->NotifyCopyComplete(updateList.m_coords); + + auto updateLatency = m_pFenceThreadTimer->GetTime() - updateList.m_copyLatencyTimer; + m_totalTileCopyLatency.fetch_add(updateLatency * updateList.GetNumStandardUpdates(), std::memory_order_relaxed); + m_numTotalUploads.fetch_add(updateList.GetNumStandardUpdates(), std::memory_order_relaxed); - updateList.m_pStreamingResource->NotifyCopyComplete(updateList.m_coords); } // UpdateList complete @@ -467,12 +483,12 @@ void Streaming::DataUploader::SubmitThread() updateList.m_pStreamingResource->GetHeap()->GetHeap(), updateList.m_coords, updateList.m_heapIndices); } - else if (0 == updateList.GetNumPackedUpdates()) + else if (updateList.GetNumEvictions()) { // if no uploads, skip the uploading state updateList.m_executionState = UpdateList::State::STATE_COPY_PENDING; } - else + else // must be mapping packed mips { updateList.m_pStreamingResource->MapPackedMips(GetMappingQueue()); // special state for packed mips: mapping must happen before copying diff --git a/TileUpdateManager/DataUploader.h b/TileUpdateManager/DataUploader.h index c57fcb4..44ff3e9 100644 --- a/TileUpdateManager/DataUploader.h +++ b/TileUpdateManager/DataUploader.h @@ -50,8 +50,7 @@ namespace Streaming DataUploader( ID3D12Device* in_pDevice, UINT in_maxCopyBatches, // maximum number of batches - UINT in_maxTileCopiesPerBatch, // batch size. a small number, like 32 - UINT in_maxTileCopiesInFlight, // upload buffer size. 1024 would become a 64MB upload buffer + UINT in_stagingBufferSizeMB, // upload buffer size UINT in_maxTileMappingUpdatesPerApiCall // some HW/drivers seem to have a limit ); ~DataUploader(); @@ -90,12 +89,12 @@ namespace Streaming UINT GetTotalNumUploads() const { return m_numTotalUploads; } UINT GetTotalNumEvictions() const { return m_numTotalEvictions; } + float GetApproximateTileCopyLatency() const { return m_pFenceThreadTimer->GetSecondsFromDelta(m_totalTileCopyLatency); } // sum of per-tile latencies so far void SetVisualizationMode(UINT in_mode) { m_pFileStreamer->SetVisualizationMode(in_mode); } private: - // affects upload buffer size. 1024 would become a 64MB upload buffer - const UINT m_maxTileCopiesInFlight{ 0 }; - const UINT m_maxBatchSize{ 0 }; + // upload buffer size + const UINT m_stagingBufferSizeMB{ 0 }; D3D12GpuTimer m_gpuTimer; RawCpuTimer m_cpuTimer; @@ -134,6 +133,7 @@ namespace Streaming void FenceMonitorThread(); std::thread m_fenceMonitorThread; Streaming::SynchronizationFlag m_monitorFenceFlag; + RawCpuTimer* m_pFenceThreadTimer{ nullptr }; // init timer on the thread that uses it. can't really worry about thread migration. void StartThreads(); void StopThreads(); @@ -146,7 +146,8 @@ namespace Streaming ComPtr m_memoryQueue; ComPtr m_memoryFence; UINT64 m_memoryFenceValue{ 1 }; - UINT64 LoadTexture(ID3D12Resource* in_pResource, const std::vector& in_paddedData, UINT in_firstSubresource); + UINT64 LoadTexture(ID3D12Resource* in_pResource, UINT in_firstSubresource, + const std::vector& in_paddedData, UINT in_uncompressedSize, UINT32 in_compressionFormat); void SubmitTextureLoads(); //------------------------------------------- @@ -155,5 +156,6 @@ namespace Streaming std::atomic m_numTotalEvictions{ 0 }; std::atomic m_numTotalUploads{ 0 }; std::atomic m_numTotalUpdateListsProcessed{ 0 }; + std::atomic m_totalTileCopyLatency{ 0 }; // total approximate latency for all copies. divide by m_numTotalUploads then get the time with m_cpuTimer.GetSecondsFromDelta() }; } diff --git a/TileUpdateManager/FileStreamer.cpp b/TileUpdateManager/FileStreamer.cpp index 8f9af57..0d2d390 100644 --- a/TileUpdateManager/FileStreamer.cpp +++ b/TileUpdateManager/FileStreamer.cpp @@ -27,6 +27,15 @@ #include "pch.h" #include "FileStreamer.h" +#include "UpdateList.h" + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +bool Streaming::FileStreamer::GetCompleted(const Streaming::UpdateList& in_updateList) const +{ + return in_updateList.m_copyFenceValue <= m_copyFence->GetCompletedValue();; +} + //----------------------------------------------------------------------------- // color lookup table diff --git a/TileUpdateManager/FileStreamer.h b/TileUpdateManager/FileStreamer.h index 9223def..c5e1796 100644 --- a/TileUpdateManager/FileStreamer.h +++ b/TileUpdateManager/FileStreamer.h @@ -59,7 +59,7 @@ namespace Streaming }; void SetVisualizationMode(UINT in_mode) { m_visualizationMode = (VisualizationMode)in_mode; } - virtual bool GetCompleted(const UpdateList& in_updateList) const = 0; + bool GetCompleted(const UpdateList& in_updateList) const; protected: // copy queue fence ComPtr m_copyFence; diff --git a/TileUpdateManager/FileStreamerDS.cpp b/TileUpdateManager/FileStreamerDS.cpp index 805ff62..225d886 100644 --- a/TileUpdateManager/FileStreamerDS.cpp +++ b/TileUpdateManager/FileStreamerDS.cpp @@ -56,7 +56,6 @@ Streaming::FileHandle* Streaming::FileStreamerDS::OpenFile(const std::wstring& i //----------------------------------------------------------------------------- void Streaming::FileStreamerDS::StreamTexture(Streaming::UpdateList& in_updateList) { - ASSERT(0 == in_updateList.GetNumPackedUpdates()); ASSERT(in_updateList.GetNumStandardUpdates()); auto pTextureFileInfo = in_updateList.m_pStreamingResource->GetTextureFileInfo(); @@ -70,20 +69,20 @@ void Streaming::FileStreamerDS::StreamTexture(Streaming::UpdateList& in_updateLi if (VisualizationMode::DATA_VIZ_NONE == m_visualizationMode) { request.Options.SourceType = DSTORAGE_REQUEST_SOURCE_FILE; - request.Source.File.Size = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; request.Source.File.Source = GetFileHandle(in_updateList.m_pStreamingResource->GetFileHandle()); request.UncompressedSize = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; UINT numCoords = (UINT)in_updateList.m_coords.size(); for (UINT i = 0; i < numCoords; i++) { - request.Source.File.Offset = pTextureFileInfo->GetFileOffset(in_updateList.m_coords[i]); + request.Source.File.Offset = pTextureFileInfo->GetFileOffset(in_updateList.m_coords[i], request.Source.File.Size); D3D12_TILED_RESOURCE_COORDINATE coord; ID3D12Resource* pAtlas = pDstHeap->ComputeCoordFromTileIndex(coord, in_updateList.m_heapIndices[i], textureFormat); request.Destination.Tiles.Resource = pAtlas; request.Destination.Tiles.TiledRegionStartCoordinate = coord; + request.Options.CompressionFormat = (DSTORAGE_COMPRESSION_FORMAT)pTextureFileInfo->GetCompressionFormat(); m_fileQueue->EnqueueRequest(&request); } @@ -113,15 +112,6 @@ void Streaming::FileStreamerDS::StreamTexture(Streaming::UpdateList& in_updateLi in_updateList.m_copyFenceValid = true; } -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -inline bool Streaming::FileStreamerDS::GetCompleted(const Streaming::UpdateList& in_updateList) const -{ - ASSERT(0 == in_updateList.GetNumPackedUpdates()); - - return in_updateList.m_copyFenceValue <= m_copyFence->GetCompletedValue();; -} - //----------------------------------------------------------------------------- // signal to submit a set of batches // must be executed in the same thread as the load methods above to avoid atomic m_copyFenceValue diff --git a/TileUpdateManager/FileStreamerDS.h b/TileUpdateManager/FileStreamerDS.h index d486cad..e581116 100644 --- a/TileUpdateManager/FileStreamerDS.h +++ b/TileUpdateManager/FileStreamerDS.h @@ -18,8 +18,6 @@ namespace Streaming static IDStorageFile* GetFileHandle(const FileHandle* in_pHandle); - virtual bool GetCompleted(const Streaming::UpdateList& in_updateList) const override; - // for DS, we don't have a way to batch batches // this allows the calling thread to periodically request Submit() vs. every enqueue virtual void Signal() override; diff --git a/TileUpdateManager/FileStreamerReference.cpp b/TileUpdateManager/FileStreamerReference.cpp index 1e7d073..049481a 100644 --- a/TileUpdateManager/FileStreamerReference.cpp +++ b/TileUpdateManager/FileStreamerReference.cpp @@ -37,11 +37,11 @@ //----------------------------------------------------------------------------- Streaming::FileStreamerReference::FileStreamerReference(ID3D12Device* in_pDevice, UINT in_maxNumCopyBatches, // maximum number of in-flight batches - UINT in_maxTileCopiesPerBatch, // batch size. a small number, like 32 UINT in_maxTileCopiesInFlight): // upload buffer size. 1024 would become a 64MB upload buffer Streaming::FileStreamer(in_pDevice), m_copyBatches(in_maxNumCopyBatches + 2) // padded by a couple to try to help with observed issue perhaps due to OS thread sched. , m_uploadAllocator(in_pDevice, in_maxTileCopiesInFlight) + , m_requests(in_maxTileCopiesInFlight) // pre-allocate an array of event handles corresponding to # of tiles that can fit in the upload heap { D3D12_COMMAND_QUEUE_DESC queueDesc = {}; queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; @@ -55,7 +55,7 @@ Streaming::FileStreamerReference::FileStreamerReference(ID3D12Device* in_pDevice UINT copyBatchIndex = 0; for (auto& copyBatch : m_copyBatches) { - copyBatch.Init(in_maxTileCopiesPerBatch, in_pDevice); + copyBatch.Init(in_pDevice); std::wstringstream name; name << "CopyBatch[" << copyBatchIndex << "]::m_commandAllocator"; @@ -94,49 +94,11 @@ Streaming::FileStreamerReference::~FileStreamerReference() //============================================================================= //============================================================================= -void Streaming::FileStreamerReference::CopyBatch::Init(UINT in_maxNumCopies, ID3D12Device* in_pDevice) +void Streaming::FileStreamerReference::CopyBatch::Init(ID3D12Device* in_pDevice) { - m_requests.resize(in_maxNumCopies); - m_uploadIndices.reserve(in_maxNumCopies); ThrowIfFailed(in_pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&m_commandAllocator))); } -//----------------------------------------------------------------------------- -// start copying from a file -//----------------------------------------------------------------------------- -void Streaming::FileStreamerReference::CopyBatch::ReadFile(const HANDLE in_fileHandle, void* in_pDst, UINT in_numBytes, UINT in_fileOffset) -{ - ASSERT(m_numEvents < m_requests.size()); - - auto& o = m_requests[m_numEvents]; - m_numEvents++; - - o.Internal = 0; - o.InternalHigh = 0; - o.OffsetHigh = 0; - o.Offset = in_fileOffset; - - // align # bytes read - UINT alignment = FileStreamerReference::MEDIA_SECTOR_SIZE - 1; - UINT numBytes = (in_numBytes + alignment) & ~(alignment); - - ::ReadFile(in_fileHandle, in_pDst, numBytes, nullptr, &o); -} - -//----------------------------------------------------------------------------- -// all loads complete? check from oldest to newest -//----------------------------------------------------------------------------- -bool Streaming::FileStreamerReference::CopyBatch::GetReadsComplete() -{ - for (; m_lastSignaled < m_numEvents; m_lastSignaled++) - { - if (0 != WaitForSingleObject(m_requests[m_lastSignaled].hEvent, 0)) - { - return false; - } - } - return true; -} //----------------------------------------------------------------------------- // opening a file returns an opaque file handle @@ -172,8 +134,10 @@ Streaming::FileHandle* Streaming::FileStreamerReference::OpenFile(const std::wst // Best guess is OS pauses the thread delaying when the copybatch is released // very rarely, the result is a (very) long delay waiting for an available batch //----------------------------------------------------------------------------- -void Streaming::FileStreamerReference::AllocateCopyBatch(Streaming::UpdateList& in_updateList, CopyBatch::State in_desiredState) +void Streaming::FileStreamerReference::StreamTexture(Streaming::UpdateList& in_updateList) { + ASSERT(in_updateList.GetNumStandardUpdates()); + const UINT numBatches = (UINT)m_copyBatches.size(); while (1) @@ -187,46 +151,42 @@ void Streaming::FileStreamerReference::AllocateCopyBatch(Streaming::UpdateList& CopyBatch::State expected = CopyBatch::State::FREE; if (batch.m_state.compare_exchange_weak(expected, CopyBatch::State::ALLOCATED)) { - // set the update list while the CopyBatch is in the "allocated" state + // initialize while the CopyBatch is in the "allocated" state batch.m_pUpdateList = &in_updateList; - // as soon as this state changes, the - batch.m_state = in_desiredState; + batch.m_uploadIndices.resize(in_updateList.GetNumStandardUpdates()); + batch.m_copyStart = 0; + batch.m_copyEnd = 0; + batch.m_numEvents = 0; + batch.m_lastSignaled = 0; + + // as soon as this state changes, the copy thread can start executing copies + batch.m_state = CopyBatch::State::COPY_TILES; break; } } } -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -void Streaming::FileStreamerReference::StreamTexture(Streaming::UpdateList& in_updateList) -{ - ASSERT(0 == in_updateList.GetNumPackedUpdates()); - ASSERT(in_updateList.GetNumStandardUpdates()); - - AllocateCopyBatch(in_updateList, CopyBatch::State::LOAD_TILES); -} - //----------------------------------------------------------------------------- // Generate ReadFile()s for each tile in the texture //----------------------------------------------------------------------------- -void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerReference::CopyBatch& in_copyBatch) +void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerReference::CopyBatch& in_copyBatch, UINT in_numtilesToLoad) { - in_copyBatch.Reset(); - Streaming::UpdateList* pUpdateList = in_copyBatch.m_pUpdateList; BYTE* pStagingBaseAddress = (BYTE*)m_uploadAllocator.GetBuffer().m_pData; - const UINT numReads = (UINT)pUpdateList->GetNumStandardUpdates(); + UINT startIndex = in_copyBatch.m_numEvents; + UINT endIndex = startIndex + in_numtilesToLoad; if (VisualizationMode::DATA_VIZ_NONE == m_visualizationMode) { auto pTextureFileInfo = pUpdateList->m_pStreamingResource->GetTextureFileInfo(); auto pFileHandle = FileStreamerReference::GetFileHandle(pUpdateList->m_pStreamingResource->GetFileHandle()); - for (UINT i = 0; i < numReads; i++) + for (UINT i = startIndex; i < endIndex; i++) { // get file offset to tile - UINT fileOffset = pTextureFileInfo->GetFileOffset(pUpdateList->m_coords[i]); + UINT32 numBytes = 0; + UINT fileOffset = pTextureFileInfo->GetFileOffset(pUpdateList->m_coords[i], numBytes); // convert tile index into byte offset UINT byteOffset = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES * in_copyBatch.m_uploadIndices[i]; @@ -234,12 +194,25 @@ void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerRefere // add to base address of upload buffer BYTE* pDst = pStagingBaseAddress + byteOffset; - in_copyBatch.ReadFile(pFileHandle, pDst, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES, fileOffset); + auto& o = m_requests[in_copyBatch.m_uploadIndices[i]]; + in_copyBatch.m_numEvents++; + + o.Internal = 0; + o.InternalHigh = 0; + o.OffsetHigh = 0; + o.Offset = fileOffset; + + // align # bytes read + UINT alignment = FileStreamerReference::MEDIA_SECTOR_SIZE - 1; + numBytes = (numBytes + alignment) & ~(alignment); + + ::ReadFile(pFileHandle, pDst, numBytes, nullptr, &o); } + ASSERT(in_copyBatch.m_numEvents == endIndex); } else // visualization enabled { - for (UINT i = 0; i < numReads; i++) + for (UINT i = startIndex; i < endIndex; i++) { // convert tile index into byte offset UINT byteOffset = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES * in_copyBatch.m_uploadIndices[i]; @@ -249,6 +222,8 @@ void Streaming::FileStreamerReference::LoadTexture(Streaming::FileStreamerRefere void* pSrc = GetVisualizationData(pUpdateList->m_coords[i], in_copyBatch.m_pUpdateList->m_pStreamingResource->GetTextureFileInfo()->GetFormat()); memcpy(pDst, pSrc, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); } + // fast-forward last signaled to the end. there are no events to check because there are no file accesses + in_copyBatch.m_lastSignaled = endIndex; } } @@ -285,15 +260,6 @@ void Streaming::FileStreamerReference::ExecuteCopyCommandList(ID3D12GraphicsComm m_copyCommandQueue->Signal(m_copyFence.Get(), m_copyFenceValue); } -//----------------------------------------------------------------------------- -// updatelist complete if the copy fence has signaled -//----------------------------------------------------------------------------- -bool Streaming::FileStreamerReference::GetCompleted(const UpdateList& in_updateList) const -{ - bool completed = in_updateList.m_copyFenceValue <= m_copyFence->GetCompletedValue(); - return completed; -} - //----------------------------------------------------------------------------- // move through CopyBatch state machine //----------------------------------------------------------------------------- @@ -305,40 +271,83 @@ void Streaming::FileStreamerReference::CopyThread() { switch (c.m_state) { - case CopyBatch::State::LOAD_TILES: - ASSERT(c.m_pUpdateList->GetNumStandardUpdates()); - ASSERT(0 == c.m_pUpdateList->GetNumPackedUpdates()); - if (m_uploadAllocator.Allocate(c.m_uploadIndices, c.m_pUpdateList->GetNumStandardUpdates())) + case CopyBatch::State::COPY_TILES: + // have any copies completed? + // do this first to free some heap space for loading + if ((c.m_copyStart != c.m_copyEnd) && (c.m_copyFenceValue <= m_copyFence->GetCompletedValue())) { - LoadTexture(c); - c.m_state = CopyBatch::State::COPY_TILES; + m_uploadAllocator.Free(&c.m_uploadIndices[c.m_copyStart], c.m_copyEnd - c.m_copyStart); + c.m_copyStart = c.m_copyEnd; } - break; - case CopyBatch::State::COPY_TILES: - // file loads in progress - if (c.GetReadsComplete()) + // can we start new loads? + if (c.m_numEvents < c.m_pUpdateList->GetNumStandardUpdates()) + { + UINT numtilesToLoad = c.m_pUpdateList->GetNumStandardUpdates() - c.m_numEvents; + numtilesToLoad = std::min(numtilesToLoad, m_uploadAllocator.GetAvailable()); + if (numtilesToLoad) + { + m_uploadAllocator.Allocate(&c.m_uploadIndices[c.m_numEvents], numtilesToLoad); + LoadTexture(c, numtilesToLoad); + ASSERT(c.m_numEvents <= c.m_pUpdateList->GetNumStandardUpdates()); + } + } + + // have any loads completed? + for (; c.m_lastSignaled < c.m_numEvents; c.m_lastSignaled++) + { + UINT requestIndex = c.m_uploadIndices[c.m_lastSignaled]; + if (0 != WaitForSingleObject(m_requests[requestIndex].hEvent, 0)) + { + break; + } + } + + // start copies for any completed events ONLY IF there are no in-flight copies + if ((c.m_copyEnd < c.m_lastSignaled) && (c.m_copyStart == c.m_copyEnd)) { c.m_copyFenceValue = m_copyFenceValue; - if (!submitCopyCommands) { submitCopyCommands = true; m_copyCommandList->Reset(c.GetCommandAllocator(), nullptr); } - CopyTiles(m_copyCommandList.Get(), m_uploadAllocator.GetBuffer().m_resource.Get(), c.m_pUpdateList, c.m_uploadIndices); + // generate copy commands + // copy from we left of last time (copyEnd) until the last load that completed (lastSignaled) + D3D12_TILE_REGION_SIZE tileRegionSize{ 1, FALSE, 0, 0, 0 }; + DXGI_FORMAT textureFormat = c.m_pUpdateList->m_pStreamingResource->GetTextureFileInfo()->GetFormat(); + for (UINT i = c.m_copyEnd; i < c.m_lastSignaled; i++) + { + D3D12_TILED_RESOURCE_COORDINATE coord; + ID3D12Resource* pAtlas = c.m_pUpdateList->m_pStreamingResource->GetHeap()->ComputeCoordFromTileIndex(coord, c.m_pUpdateList->m_heapIndices[i], textureFormat); + + m_copyCommandList->CopyTiles(pAtlas, &coord, + &tileRegionSize, m_uploadAllocator.GetBuffer().m_resource.Get(), + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES * c.m_uploadIndices[i], + D3D12_TILE_COPY_FLAG_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE | D3D12_TILE_COPY_FLAG_NO_HAZARD); + } + c.m_copyEnd = c.m_lastSignaled; + ASSERT(c.m_copyEnd <= c.m_pUpdateList->GetNumStandardUpdates()); + } + + // if the outstanding copy is for the rest of the tiles, we can hand this batch off... + if (c.m_pUpdateList->GetNumStandardUpdates() == c.m_copyEnd) + { c.m_pUpdateList->m_copyFenceValue = c.m_copyFenceValue; c.m_pUpdateList->m_copyFenceValid = true; + c.m_pUpdateList = nullptr; // clear for debugging purposes. the updatelist can be re-cycled before the copyBatch c.m_state = CopyBatch::State::WAIT_COMPLETE; } break; case CopyBatch::State::WAIT_COMPLETE: // can't recycle this command allocator until the corresponding fence has completed + // note that the updatelist pointer is invalid if (c.m_copyFenceValue <= m_copyFence->GetCompletedValue()) { - m_uploadAllocator.Free(c.m_uploadIndices); + ASSERT(nullptr == c.m_pUpdateList); + m_uploadAllocator.Free(&c.m_uploadIndices[c.m_copyStart], c.m_copyEnd - c.m_copyStart); c.m_state = CopyBatch::State::FREE; } break; diff --git a/TileUpdateManager/FileStreamerReference.h b/TileUpdateManager/FileStreamerReference.h index 9089b26..70b4a76 100644 --- a/TileUpdateManager/FileStreamerReference.h +++ b/TileUpdateManager/FileStreamerReference.h @@ -39,7 +39,6 @@ namespace Streaming public: FileStreamerReference(ID3D12Device* in_pDevice, UINT in_maxNumCopyBatches, // maximum number of in-flight batches - UINT in_maxTileCopiesPerBatch, // batch size. a small number, like 32 UINT in_maxTileCopiesInFlight); // upload buffer size. 1024 would become a 64MB upload buffer virtual ~FileStreamerReference(); @@ -49,8 +48,6 @@ namespace Streaming virtual void Signal() override {} // reference auto-submits - virtual bool GetCompleted(const UpdateList& in_updateList) const override; - static HANDLE GetFileHandle(const FileHandle* in_pHandle) { return dynamic_cast(in_pHandle)->GetHandle(); } static const UINT MEDIA_SECTOR_SIZE = 4096; // see https://docs.microsoft.com/en-us/windows/win32/fileio/file-buffering @@ -78,7 +75,6 @@ namespace Streaming { FREE = 0, ALLOCATED, - LOAD_TILES, COPY_TILES, WAIT_COMPLETE, }; @@ -86,13 +82,9 @@ namespace Streaming std::atomic m_state{ State::FREE }; Streaming::UpdateList* m_pUpdateList{ nullptr }; - std::vector m_uploadIndices; // indices into upload buffer + std::vector m_uploadIndices; // indices into upload buffer. also serves as indices into the shared array of event handles. UINT64 m_copyFenceValue{ 0 }; // tracked independently from UpdateList so CopyBatch lifetime can be independent - void Reset() { m_lastSignaled = 0; m_numEvents = 0; } - void ReadFile(const HANDLE in_fileHandle, void* in_pDst, UINT in_numBytes, UINT in_fileOffset); - bool GetReadsComplete(); - ID3D12CommandAllocator* GetCommandAllocator() { m_commandAllocator->Reset(); @@ -100,20 +92,25 @@ namespace Streaming } // call only once - void Init(UINT in_maxNumCopies, ID3D12Device* in_pDevice); - private: - struct Request : public OVERLAPPED - { - Request() { hEvent = ::CreateEvent(nullptr, FALSE, FALSE, nullptr); } - ~Request() { ::CloseHandle(hEvent); } - }; - std::vector m_requests; + void Init(ID3D12Device* in_pDevice); + + UINT m_copyStart{ 0 }; + UINT m_copyEnd{ 0 }; + UINT m_numEvents{ 0 }; UINT m_lastSignaled{ 0 }; + private: ComPtr m_commandAllocator; }; + struct Request : public OVERLAPPED + { + Request() { hEvent = ::CreateEvent(nullptr, FALSE, FALSE, nullptr); } + ~Request() { ::CloseHandle(hEvent); } + }; + std::vector m_requests; + // close command list, execute on m_copyCommandQueue, signal fence, increment fence value void ExecuteCopyCommandList(ID3D12GraphicsCommandList* in_pCmdList); @@ -127,9 +124,7 @@ namespace Streaming std::atomic m_copyThreadRunning{ false }; std::thread m_copyThread; - void AllocateCopyBatch(Streaming::UpdateList& in_updateList, CopyBatch::State in_desiredState); - - void LoadTexture(CopyBatch& in_copyBatch); + void LoadTexture(CopyBatch& in_copyBatch, UINT in_numtilesToLoad); void CopyTiles(ID3D12GraphicsCommandList* out_pCopyCmdList, ID3D12Resource* in_pSrcResource, const UpdateList* in_pUpdateList, const std::vector& in_indices); }; diff --git a/TileUpdateManager/SamplerFeedbackStreaming.h b/TileUpdateManager/SamplerFeedbackStreaming.h index 38b36d0..4830d4b 100644 --- a/TileUpdateManager/SamplerFeedbackStreaming.h +++ b/TileUpdateManager/SamplerFeedbackStreaming.h @@ -134,13 +134,8 @@ struct TileUpdateManagerDesc // maximum number of in-flight batches UINT m_maxNumCopyBatches{ 128 }; - // limit the number of tile uploads per batch. Multiple batches can be submitted per frame - UINT m_maxTileCopiesPerBatch{ 32 }; - - // affects size of gpu upload buffer, that is, staging between file read and gpu copy - // uploads should move fast, so it should be hard to hit even a small value. - // 1024 would become a 64MB upload buffer - UINT m_maxTileCopiesInFlight{ 512 }; + // size of the staging buffer for DirectStorage or reference streaming code + UINT m_stagingBufferSizeMB{ 64 }; // the following is product dependent (some HW/drivers seem to have a limit) UINT m_maxTileMappingUpdatesPerApiCall{ 512 }; @@ -148,8 +143,6 @@ struct TileUpdateManagerDesc // need the swap chain count so we can create per-frame upload buffers UINT m_swapChainBufferCount{ 2 }; - UINT m_timingNumBatchesToCapture{ 512 }; - // Aliasing barriers are unnecessary, as draw commands only access modified resources after a fence has signaled on the copy queue // Note it is also theoretically possible for tiles to be re-assigned while a draw command is executing // However, performance analysis tools like to know about changes to resources @@ -243,12 +236,14 @@ class TileUpdateManager : private Streaming::TileUpdateManagerBase //-------------------------------------------- // for visualization //-------------------------------------------- + void SetVisualizationMode(UINT in_mode); + float GetGpuStreamingTime() const; float GetCpuProcessFeedbackTime(); // returns time since last query. expected usage is once per frame. UINT GetTotalNumUploads() const; UINT GetTotalNumEvictions() const; - void SetVisualizationMode(UINT in_mode); + float GetTotalTileCopyLatency() const; private: TileUpdateManager(const TileUpdateManager&) = delete; TileUpdateManager(TileUpdateManager&&) = delete; diff --git a/TileUpdateManager/StreamingResource.cpp b/TileUpdateManager/StreamingResource.cpp index 4225eaa..635038e 100644 --- a/TileUpdateManager/StreamingResource.cpp +++ b/TileUpdateManager/StreamingResource.cpp @@ -530,7 +530,6 @@ bool Streaming::StreamingResourceBase::QueueTiles() else { ASSERT(0 == pUpdateList->GetNumStandardUpdates()); - ASSERT(0 == pUpdateList->GetNumPackedUpdates()); ASSERT(0 == pUpdateList->m_evictCoords.size()); m_pTileUpdateManager->FreeEmptyUpdateList(*pUpdateList); @@ -592,10 +591,10 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda ASSERT(out_pUpdateList); ASSERT(m_pendingEvictions.GetReadyToEvict().size()); - std::vector& evictions = m_pendingEvictions.GetReadyToEvict(); + auto& pendingEvictions = m_pendingEvictions.GetReadyToEvict(); UINT numDelayed = 0; - for (auto& coord : m_pendingEvictions.GetReadyToEvict()) + for (auto& coord : pendingEvictions) { // if the heap index is valid, but the tile is not resident, there's a /pending load/ // a pending load might be streaming OR it might be in the pending list @@ -606,7 +605,7 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda // Hence, ProcessFeedback() must be called before this function ASSERT(0 == m_tileMappingState.GetRefCount(coord)); - auto residency = m_tileMappingState.GetResidency(coord); + auto residency = m_tileMappingState.GetResidency(coord); if (TileMappingState::Residency::Resident == residency) { m_tileMappingState.SetEvicting(coord); @@ -619,7 +618,7 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda // try again later else if (TileMappingState::Residency::Loading == residency) { - evictions[numDelayed] = coord; + pendingEvictions[numDelayed] = coord; numDelayed++; } // if evicting or not resident, drop @@ -628,7 +627,7 @@ void Streaming::StreamingResourceBase::QueuePendingTileEvictions(Streaming::Upda } // replace the ready evictions with just the delayed evictions. - evictions.resize(numDelayed); + pendingEvictions.resize(numDelayed); } //----------------------------------------------------------------------------- @@ -641,11 +640,8 @@ void Streaming::StreamingResourceBase::QueuePendingTileLoads(Streaming::UpdateLi ASSERT(out_pUpdateList); ASSERT(m_pHeap->GetAllocator().GetNumFree()); - // clamp to maximum allowed in a batch - UINT maxCopies = std::min((UINT)m_pendingTileLoads.size(), m_pTileUpdateManager->GetMaxTileCopiesPerBatch()); - // clamp to heap availability - maxCopies = std::min(maxCopies, m_pHeap->GetAllocator().GetNumFree()); + UINT maxCopies = std::min((UINT)m_pendingTileLoads.size(), m_pHeap->GetAllocator().GetNumFree()); UINT skippedIndex = 0; UINT numConsumed = 0; @@ -666,10 +662,13 @@ void Streaming::StreamingResourceBase::QueuePendingTileLoads(Streaming::UpdateLi // only load if definitely not resident if (TileMappingState::Residency::NotResident == residency) { + UINT heapIndex = m_pHeap->GetAllocator().Allocate(); + m_tileMappingState.SetLoading(coord); - UINT& heapIndex = m_tileMappingState.GetHeapIndex(coord); - heapIndex = m_pHeap->GetAllocator().Allocate(); - out_pUpdateList->AddUpdate(coord, heapIndex); + m_tileMappingState.GetHeapIndex(coord) = heapIndex; + + out_pUpdateList->m_coords.push_back(coord); + out_pUpdateList->m_heapIndices.push_back(heapIndex); // limit # of copies in a single updatelist maxCopies--; @@ -843,40 +842,6 @@ void Streaming::StreamingResourceBase::EvictionDelay::Rescue(const Streaming::St } } -//----------------------------------------------------------------------------- -// pad packed mips according to copyable footprint requirements -//----------------------------------------------------------------------------- -void Streaming::StreamingResourceBase::PadPackedMips() -{ - UINT firstSubresource = m_resources->GetPackedMipInfo().NumStandardMips; - UINT numSubresources = m_resources->GetPackedMipInfo().NumPackedMips; - D3D12_RESOURCE_DESC desc = m_resources->GetTiledResource()->GetDesc(); - UINT64 totalBytes = 0; - - std::vector srcLayout(numSubresources); - std::vector numRows(numSubresources); - std::vector rowSizeBytes(numSubresources); - - m_pTileUpdateManager->GetDevice()->GetCopyableFootprints(&desc, firstSubresource, numSubresources, - 0, srcLayout.data(), numRows.data(), rowSizeBytes.data(), &totalBytes); - - std::vector paddedPackedMips(totalBytes); - - BYTE* pDst = paddedPackedMips.data(); - BYTE* pSrc = m_packedMips.data(); - - for (UINT i = 0; i < numSubresources; i++) - { - for (UINT r = 0; r < numRows[i]; r++) - { - memcpy(pDst, pSrc, rowSizeBytes[i]); - pDst += srcLayout[i].Footprint.RowPitch; - pSrc += rowSizeBytes[i]; - } - } - m_packedMips.swap(paddedPackedMips); -} - //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- void Streaming::StreamingResourceBase::LoadPackedMips() @@ -884,13 +849,12 @@ void Streaming::StreamingResourceBase::LoadPackedMips() // FIXME: future file format should contain padded packed mips to allow DS to load directly from disk to GPU UINT numBytes = 0; - UINT offset = m_pTextureFileInfo->GetPackedMipFileOffset(&numBytes); + UINT offset = m_pTextureFileInfo->GetPackedMipFileOffset(&numBytes, &m_packedMipsUncompressedSize); m_packedMips.resize(numBytes); std::ifstream inFile(m_filename.c_str(), std::ios::binary); inFile.seekg(offset); inFile.read((char*)m_packedMips.data(), numBytes); inFile.close(); - PadPackedMips(); } //----------------------------------------------------------------------------- @@ -942,7 +906,6 @@ bool Streaming::StreamingResourceBase::InitPackedMips() if (pUpdateList) { - pUpdateList->AddPackedMipRequest(m_resources->GetPackedMipInfo().NumPackedMips); pUpdateList->m_heapIndices = m_packedMipHeapIndices; m_pTileUpdateManager->SubmitUpdateList(*pUpdateList); diff --git a/TileUpdateManager/StreamingResource.h b/TileUpdateManager/StreamingResource.h index 940488a..a519f54 100644 --- a/TileUpdateManager/StreamingResource.h +++ b/TileUpdateManager/StreamingResource.h @@ -284,6 +284,7 @@ namespace Streaming // bytes for packed mips std::vector m_packedMips; + UINT m_packedMipsUncompressedSize{ 0 }; private: // non-packed mip copy complete notification std::atomic m_tileResidencyChanged{ false }; @@ -318,7 +319,6 @@ namespace Streaming void QueuePendingTileLoads(Streaming::UpdateList* out_pUpdateList); // returns # tiles queued void LoadPackedMips(); - void PadPackedMips(); // used by QueueEviction() bool m_refCountsZero{ true }; diff --git a/TileUpdateManager/StreamingResourceDU.cpp b/TileUpdateManager/StreamingResourceDU.cpp index a4a179b..37a6752 100644 --- a/TileUpdateManager/StreamingResourceDU.cpp +++ b/TileUpdateManager/StreamingResourceDU.cpp @@ -104,5 +104,5 @@ void Streaming::StreamingResourceDU::NotifyPackedMips() // don't need to hold on to packed mips any longer. std::vector empty; - GetPaddedPackedMips().swap(empty); + m_packedMips.swap(empty); } diff --git a/TileUpdateManager/StreamingResourceDU.h b/TileUpdateManager/StreamingResourceDU.h index c490519..790e1cb 100644 --- a/TileUpdateManager/StreamingResourceDU.h +++ b/TileUpdateManager/StreamingResourceDU.h @@ -51,7 +51,7 @@ namespace Streaming const FileHandle* GetFileHandle() const { return m_pFileHandle.get(); } - std::vector& GetPaddedPackedMips() { return m_packedMips; } + std::vector& GetPaddedPackedMips(UINT& out_uncompressedSize) { out_uncompressedSize = m_packedMipsUncompressedSize; return m_packedMips; } // packed mips are treated differently from regular tiles: they aren't tracked by the data structure, and share heap indices void MapPackedMips(ID3D12CommandQueue* in_pCommandQueue); diff --git a/TileUpdateManager/TileUpdateManager.cpp b/TileUpdateManager/TileUpdateManager.cpp index ea5c63b..f1fc30b 100644 --- a/TileUpdateManager/TileUpdateManager.cpp +++ b/TileUpdateManager/TileUpdateManager.cpp @@ -53,7 +53,6 @@ m_numSwapBuffers(in_desc.m_swapChainBufferCount) , m_device(in_pDevice) , m_commandLists((UINT)CommandListName::Num) , m_maxTileMappingUpdatesPerApiCall(in_desc.m_maxTileMappingUpdatesPerApiCall) -, m_maxTileCopiesPerBatch(in_desc.m_maxTileCopiesPerBatch) , m_addAliasingBarriers(in_desc.m_addAliasingBarriers) { ASSERT(D3D12_COMMAND_LIST_TYPE_DIRECT == in_pDirectCommandQueue->GetDesc().Type); @@ -64,8 +63,7 @@ m_numSwapBuffers(in_desc.m_swapChainBufferCount) m_pDataUploader = std::make_unique( in_pDevice, in_desc.m_maxNumCopyBatches, - in_desc.m_maxTileCopiesPerBatch, - in_desc.m_maxTileCopiesInFlight, + in_desc.m_stagingBufferSizeMB, in_desc.m_maxTileMappingUpdatesPerApiCall); const UINT numAllocators = m_numSwapBuffers; @@ -179,7 +177,7 @@ void Streaming::TileUpdateManagerBase::StartThreads() // continuously push uploads and evictions bool uploadRequested = false; - for (UINT i = 0; i < staleResources.size(); i++) + for (UINT i = 0; i < staleResources.size(); ) { if (!m_threadsRunning) { @@ -188,7 +186,8 @@ void Streaming::TileUpdateManagerBase::StartThreads() UINT resourceIndex = staleResources[i]; auto p = m_streamingResources[resourceIndex]; - uploadRequested = (uploadRequested || p->QueueTiles()); + bool tilesQueued = p->QueueTiles(); + uploadRequested = uploadRequested || tilesQueued; // if all loads/evictions handled, remove from staleResource list if (!p->IsStale()) @@ -198,6 +197,10 @@ void Streaming::TileUpdateManagerBase::StartThreads() staleResources[i] = staleResources.back(); staleResources.resize(staleResources.size() - 1); } + else + { + i++; + } } // if uploads were queued, tell the file streamer to signal the corresponding fence diff --git a/TileUpdateManager/TileUpdateManager.h b/TileUpdateManager/TileUpdateManager.h index 4831a74..5424bfb 100644 --- a/TileUpdateManager/TileUpdateManager.h +++ b/TileUpdateManager/TileUpdateManager.h @@ -72,7 +72,6 @@ namespace Streaming ID3D12Device8* GetDevice() const { return m_device.Get(); } UINT GetNumSwapBuffers() const { return m_numSwapBuffers; } - UINT GetMaxTileCopiesPerBatch() const { return m_maxTileCopiesPerBatch; } // stop tracking this StreamingResource. Called by its destructor void Remove(StreamingResourceBase* in_pResource) @@ -202,7 +201,6 @@ namespace Streaming std::vector m_commandLists; private: const UINT m_maxTileMappingUpdatesPerApiCall; - const UINT m_maxTileCopiesPerBatch; ComPtr m_device; diff --git a/TileUpdateManager/TileUpdateManager.vcxproj b/TileUpdateManager/TileUpdateManager.vcxproj index 980e394..0439cd8 100644 --- a/TileUpdateManager/TileUpdateManager.vcxproj +++ b/TileUpdateManager/TileUpdateManager.vcxproj @@ -134,12 +134,12 @@ - + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/TileUpdateManager/TileUpdateManagerExt.cpp b/TileUpdateManager/TileUpdateManagerExt.cpp index c1ffaf6..36c49b4 100644 --- a/TileUpdateManager/TileUpdateManagerExt.cpp +++ b/TileUpdateManager/TileUpdateManagerExt.cpp @@ -169,6 +169,7 @@ float TileUpdateManager::GetCpuProcessFeedbackTime() // performance and visualization //----------------------------------------------------------------------------- float TileUpdateManager::GetGpuStreamingTime() const { return m_pDataUploader->GetGpuStreamingTime(); } +float TileUpdateManager::GetTotalTileCopyLatency() const { return m_pDataUploader->GetApproximateTileCopyLatency(); } // the total time the GPU spent resolving feedback during the previous frame float TileUpdateManager::GetGpuTime() const { return m_gpuTimerResolve.GetTimes()[m_renderFrameIndex].first; } @@ -304,7 +305,7 @@ TileUpdateManager::CommandLists TileUpdateManager::EndFrame() { m_gpuTimerResolve.BeginTimer(pCommandList, m_renderFrameIndex); - // transition all feeback resources UAV->RESOLVE_SOURCE + // transition all feedback resources UAV->RESOLVE_SOURCE // also transition the (non-opaque) resolved resources COPY_SOURCE->RESOLVE_DEST pCommandList->ResourceBarrier((UINT)m_barrierUavToResolveSrc.size(), m_barrierUavToResolveSrc.data()); m_barrierUavToResolveSrc.clear(); diff --git a/TileUpdateManager/TileUpdateManager_vs2022.vcxproj b/TileUpdateManager/TileUpdateManager_vs2022.vcxproj index 2fce60c..daee4b2 100644 --- a/TileUpdateManager/TileUpdateManager_vs2022.vcxproj +++ b/TileUpdateManager/TileUpdateManager_vs2022.vcxproj @@ -136,12 +136,12 @@ - + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/TileUpdateManager/UpdateList.cpp b/TileUpdateManager/UpdateList.cpp index 5c40a4a..520c525 100644 --- a/TileUpdateManager/UpdateList.cpp +++ b/TileUpdateManager/UpdateList.cpp @@ -39,27 +39,5 @@ void Streaming::UpdateList::Reset(Streaming::StreamingResourceDU* in_pStreamingR m_coords.clear(); // indicates standard tile map & upload m_heapIndices.clear(); // because AddUpdate() does a push_back() m_evictCoords.clear(); // indicates tiles to un-map - m_numPackedMips = 0; // indicates to map & load packed mips -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -void Streaming::UpdateList::AddPackedMipRequest(UINT in_numMips) -{ - ASSERT(UpdateList::State::STATE_ALLOCATED == m_executionState); - ASSERT(0 == GetNumStandardUpdates()); - - m_numPackedMips = in_numMips; -} - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -void Streaming::UpdateList::AddUpdate( - const D3D12_TILED_RESOURCE_COORDINATE& in_coord, - UINT in_heapIndex) -{ - ASSERT(State::STATE_ALLOCATED == m_executionState); - - m_coords.push_back(in_coord); - m_heapIndices.push_back(in_heapIndex); + m_copyLatencyTimer = 0; // clear latency timer } diff --git a/TileUpdateManager/UpdateList.h b/TileUpdateManager/UpdateList.h index 41e80d4..9afbc76 100644 --- a/TileUpdateManager/UpdateList.h +++ b/TileUpdateManager/UpdateList.h @@ -77,16 +77,11 @@ namespace Streaming // tile evictions: std::vector m_evictCoords; - // packed mips: - UINT m_numPackedMips = 0; - UINT GetNumStandardUpdates() const { return (UINT)m_coords.size(); } - UINT GetNumPackedUpdates() const { return m_numPackedMips; } UINT GetNumEvictions() const { return (UINT)m_evictCoords.size(); } - void AddPackedMipRequest(UINT in_numMips); - void AddUpdate(const D3D12_TILED_RESOURCE_COORDINATE& in_coord, UINT in_heapIndex); - void Reset(Streaming::StreamingResourceDU* in_pStreamingResource); + + INT64 m_copyLatencyTimer{ 0 }; // used only to get an approximate latency for tile copies }; } diff --git a/TileUpdateManager/UploadAllocator.cpp b/TileUpdateManager/UploadAllocator.cpp index c45bcde..b3360ef 100644 --- a/TileUpdateManager/UploadAllocator.cpp +++ b/TileUpdateManager/UploadAllocator.cpp @@ -31,8 +31,8 @@ //----------------------------------------------------------------------------- // allocates simply by increasing/decreasing an index into an array of available indices //----------------------------------------------------------------------------- -Streaming::SimpleAllocator::SimpleAllocator(UINT in_maxNumTiles) : - m_index(0), m_heap(in_maxNumTiles) +Streaming::SimpleAllocator::SimpleAllocator(UINT in_maxNumElements) : + m_index(0), m_heap(in_maxNumElements) { for (auto& i : m_heap) { @@ -59,21 +59,31 @@ Streaming::SimpleAllocator::~SimpleAllocator() // input is array sized to receive tile indices // returns false and does no allocations if there wasn't space //----------------------------------------------------------------------------- -bool Streaming::SimpleAllocator::Allocate(std::vector& out_indices, UINT in_numTiles) +bool Streaming::SimpleAllocator::Allocate(std::vector& out_indices, UINT in_numIndices) { bool result = false; - if (m_index >= in_numTiles) + if (m_index >= in_numIndices) { - out_indices.resize(in_numTiles); - m_index -= in_numTiles; - memcpy(out_indices.data(), &m_heap[m_index], in_numTiles * sizeof(UINT)); + out_indices.resize(in_numIndices); + m_index -= in_numIndices; + memcpy(out_indices.data(), &m_heap[m_index], in_numIndices * sizeof(UINT)); result = true; } return result; } +//----------------------------------------------------------------------------- +// like above, but expects caller to have checked availability first and provided a safe destination +//----------------------------------------------------------------------------- +void Streaming::SimpleAllocator::Allocate(UINT* out_pIndices, UINT in_numIndices) +{ + ASSERT(m_index >= in_numIndices); + m_index -= in_numIndices; + memcpy(out_pIndices, &m_heap[m_index], in_numIndices * sizeof(UINT)); +} + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- void Streaming::SimpleAllocator::Free(const std::vector& in_indices) @@ -85,6 +95,16 @@ void Streaming::SimpleAllocator::Free(const std::vector& in_indices) m_index += numIndices; } +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +void Streaming::SimpleAllocator::Free(const UINT* in_pIndices, UINT in_numIndices) +{ + ASSERT(in_numIndices); + ASSERT((m_index + in_numIndices) <= (UINT)m_heap.size()); + memcpy(&m_heap[m_index], in_pIndices, sizeof(UINT) * in_numIndices); + m_index += in_numIndices; +} + //----------------------------------------------------------------------------- // UploadAllocator tracks tiles in an upload buffer // relies on objects to return their indices when they are done diff --git a/TileUpdateManager/UploadAllocator.h b/TileUpdateManager/UploadAllocator.h index a409476..f05958f 100644 --- a/TileUpdateManager/UploadAllocator.h +++ b/TileUpdateManager/UploadAllocator.h @@ -36,16 +36,21 @@ namespace Streaming class SimpleAllocator { public: - SimpleAllocator(UINT in_maxNumTiles); + SimpleAllocator(UINT in_maxNumElements); virtual ~SimpleAllocator(); // output array will be sized to receive tile indices - bool Allocate(std::vector& out_indices, UINT in_numTiles); + bool Allocate(std::vector& out_indices, UINT in_numIndices); void Free(const std::vector& in_indices); + // assumes caller is doing due-diligence to allocate destination appropriately and check availability before calling + void Allocate(UINT* out_pIndices, UINT in_numIndices); + void Free(const UINT* in_pIndices, UINT in_numIndices); + // for debug static const UINT InvalidIndex{ UINT(-1) }; UINT GetAvailable() const { return m_index; } + UINT GetCapacity() const { return (UINT)m_heap.size(); } private: std::vector m_heap; UINT m_index; diff --git a/TileUpdateManager/XeTexture.cpp b/TileUpdateManager/XeTexture.cpp index 4095094..620dd85 100644 --- a/TileUpdateManager/XeTexture.cpp +++ b/TileUpdateManager/XeTexture.cpp @@ -49,30 +49,24 @@ Streaming::XeTexture::XeTexture(const std::wstring& in_fileName) if (!inFile.good()) { Error(in_fileName + L"Unexpected Error"); } if (m_fileHeader.m_magic != XetFileHeader::GetMagic()) { Error(in_fileName + L" Not a valid XET file"); } - if (m_fileHeader.m_version != XetFileHeader::GetVersion()) { Error(in_fileName + L"Incorrect XET version"); } + if (m_fileHeader.m_version != XetFileHeader::GetVersion()) { Error(in_fileName + L" Incorrect XET version"); } - m_tileOffsets.resize(m_fileHeader.m_mipInfo.m_numTilesForStandardMips); - inFile.read((char*)m_tileOffsets.data(), m_tileOffsets.size() * sizeof(m_tileOffsets[0])); + m_subresourceInfo.resize(m_fileHeader.m_ddsHeader.mipMapCount); + inFile.read((char*)m_subresourceInfo.data(), m_subresourceInfo.size() * sizeof(m_subresourceInfo[0])); if (!inFile.good()) { Error(in_fileName + L"Unexpected Error"); } - m_metadataOffsets.resize(m_fileHeader.m_numMetadataBlobs); - inFile.read((char*)m_metadataOffsets.data(), m_metadataOffsets.size() * sizeof(m_metadataOffsets[0])); + m_tileOffsets.resize(m_fileHeader.m_mipInfo.m_numTilesForStandardMips + 1); // plus 1 for the packed mips offset & size + inFile.read((char*)m_tileOffsets.data(), m_tileOffsets.size() * sizeof(m_tileOffsets[0])); if (!inFile.good()) { Error(in_fileName + L"Unexpected Error"); } - - inFile.seekg(0, std::ios::end); - m_fileSize = inFile.tellg(); - inFile.close(); } //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -UINT Streaming::XeTexture::GetPackedMipFileOffset(UINT* out_pNumBytesTotal) +UINT Streaming::XeTexture::GetPackedMipFileOffset(UINT* out_pNumBytesTotal, UINT* out_pNumBytesUncompressed) { - UINT packedOffset = m_fileHeader.m_subresourceInfo[m_fileHeader.m_mipInfo.m_numStandardMips].m_packedMipInfo.m_fileOffset; - if (out_pNumBytesTotal) - { - *out_pNumBytesTotal = UINT(m_fileSize - packedOffset); - } + UINT packedOffset = m_tileOffsets[m_fileHeader.m_mipInfo.m_numTilesForStandardMips].m_offset; + *out_pNumBytesTotal = m_tileOffsets[m_fileHeader.m_mipInfo.m_numTilesForStandardMips].m_numBytes; + *out_pNumBytesUncompressed = m_fileHeader.m_mipInfo.m_numUncompressedBytesForPackedMips; return packedOffset; } @@ -81,17 +75,18 @@ UINT Streaming::XeTexture::GetPackedMipFileOffset(UINT* out_pNumBytesTotal) //----------------------------------------------------------------------------- UINT Streaming::XeTexture::GetLinearIndex(const D3D12_TILED_RESOURCE_COORDINATE& in_coord) const { - const auto& data = m_fileHeader.m_subresourceInfo[in_coord.Subresource].m_standardMipInfo; + const auto& data = m_subresourceInfo[in_coord.Subresource].m_standardMipInfo; return data.m_subresourceTileIndex + (in_coord.Y * data.m_widthTiles) + in_coord.X; } //----------------------------------------------------------------------------- // return value is byte offset into file //----------------------------------------------------------------------------- -UINT Streaming::XeTexture::GetFileOffset(const D3D12_TILED_RESOURCE_COORDINATE& in_coord) const +UINT Streaming::XeTexture::GetFileOffset(const D3D12_TILED_RESOURCE_COORDINATE& in_coord, UINT32& out_numBytes) const { UINT index = GetLinearIndex(in_coord); // use index to look up file offset and number of bytes + out_numBytes = m_tileOffsets[index].m_numBytes; return m_tileOffsets[index].m_offset; } diff --git a/TileUpdateManager/XeTexture.h b/TileUpdateManager/XeTexture.h index cc9e766..f818a73 100644 --- a/TileUpdateManager/XeTexture.h +++ b/TileUpdateManager/XeTexture.h @@ -46,11 +46,12 @@ namespace Streaming UINT GetImageWidth() const { return m_fileHeader.m_ddsHeader.width; } UINT GetImageHeight() const { return m_fileHeader.m_ddsHeader.height; } UINT GetMipCount() const { return m_fileHeader.m_ddsHeader.mipMapCount; } + UINT32 GetCompressionFormat() const { return m_fileHeader.m_compressionFormat; } // return value is # bytes. out_offset is byte offset into file - UINT GetFileOffset(const D3D12_TILED_RESOURCE_COORDINATE& in_coord) const; + UINT GetFileOffset(const D3D12_TILED_RESOURCE_COORDINATE& in_coord, UINT32& out_numBytes) const; - UINT GetPackedMipFileOffset(UINT* out_pNumBytesTotal = nullptr); + UINT GetPackedMipFileOffset(UINT* out_pNumBytesTotal, UINT* out_pNumBytesUncompressed); XeTexture(const std::wstring& in_filename); protected: @@ -63,10 +64,9 @@ namespace Streaming static const UINT NUM_BYTES_PER_TILE{ D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES }; // tiles are always 64KB in size XetFileHeader m_fileHeader; - size_t m_fileSize{ 0 }; + std::vector m_subresourceInfo; std::vector m_tileOffsets; - std::vector m_metadataOffsets; UINT GetLinearIndex(const D3D12_TILED_RESOURCE_COORDINATE& in_coord) const; }; diff --git a/TileUpdateManager/XetFileHeader.h b/TileUpdateManager/XetFileHeader.h index 0629724..4106e0e 100644 --- a/TileUpdateManager/XetFileHeader.h +++ b/TileUpdateManager/XetFileHeader.h @@ -32,55 +32,58 @@ File Layout: - Header -- Array of Per-tile info: file offset, # bytes, index into metadata array -- Array of Per-metadata blob info: file offset, size of metadata blob -- Metadata blobs, each blob is aligned -- Texture Data, each tile is aligned -- packed mips, treat as not aligned +- Array of Per-tile info: file offset, # bytes. + note all uncompressed tiles are 64KB. if the number of bytes = 64KB, then the tile is assumed uncompressed +- Texture Data. tiles are not aligned +- packed mips. the data is unaligned, but the contents have been pre-padded -----------------------------------------------------------------------------*/ struct XetFileHeader { static UINT GetMagic() { return 0x20544558; } - static UINT GetAlignment() { return 4096; } - static UINT GetTileSize() { return 65536; } - static UINT GetVersion() { return 2; } + static UINT GetTileSize() { return 65536; } // uncompressed size + static UINT GetVersion() { return 3; } UINT m_magic{ GetMagic() }; UINT m_version{ GetVersion() }; DirectX::DDS_HEADER m_ddsHeader; DirectX::DDS_HEADER_DXT10 m_extensionHeader; - UINT m_numMetadataBlobs; + UINT32 m_compressionFormat{ 0 }; // 0 is no compression struct MipInfo { - UINT m_numStandardMips; - UINT m_numPackedMips; - UINT m_numTilesForPackedMips; - UINT m_numTilesForStandardMips; // the number of TileData[] entries after the header + UINT32 m_numStandardMips; + UINT32 m_numTilesForStandardMips; // the TileData[] array has # entries = (m_numTilesForStandardMips + 1) + UINT32 m_numPackedMips; + UINT32 m_numTilesForPackedMips; // only 1 entry for all packed mips at TileData[m_numTilesForStandardMips] + UINT32 m_numUncompressedBytesForPackedMips; // if this equals the size at TileData[m_numTilesForStandardMips], then not compressed }; MipInfo m_mipInfo; // use subresource tile dimensions to generate linear tile index struct StandardMipInfo { - UINT m_widthTiles; - UINT m_heightTiles; - UINT m_depthTiles; + UINT32 m_widthTiles; + UINT32 m_heightTiles; + UINT32 m_depthTiles; // convenience value, can be computed from sum of previous subresource dimensions - UINT m_subresourceTileIndex; + UINT32 m_subresourceTileIndex; }; - // if required, compute dimensions by shifting mip 0 dimensions by mip level + // properties of the uncompressed packed mips + // all packed mips are padded and treated as a single entity struct PackedMipInfo { - UINT m_rowPitch; - UINT m_slicePitch; - UINT m_fileOffset; + UINT32 m_rowPitch; // before padding + UINT32 m_slicePitch; // before padding + + UINT32 m_rowPitchPadded; // after padding, from footprint + UINT32 m_slicePitchPadded; // after padding, from footprint }; + // array SubresourceInfo[m_ddsHeader.mipMapCount] struct SubresourceInfo { union @@ -90,26 +93,15 @@ struct XetFileHeader }; }; - // indices < m_numStandardMips are standard mips, >= are packed mips - SubresourceInfo m_subresourceInfo[16]; - - // array TileData[m_numTilesForStandardMips] for each tile + // array TileData[m_numTilesForStandardMips + 1], 1 entry for each tile plus a final entry for packed mips struct TileData { - UINT m_offset; // file offset to tile data - UINT m_numBytes; // # bytes for the tile - UINT m_metadataIndex; // index of metadata to use - }; - - // array of MetaData[m_numMetadataBlobs] (may be size 0) - struct MetaData - { - UINT m_offset; // file offset to metadata blob - UINT m_numBytes; // size of the metadata blob + UINT32 m_offset; // file offset to tile data + UINT32 m_numBytes; // # bytes for the tile }; - // metadata here (aligned) - // tile data here (aligned) - - // packed mip data from m_packedMipInfo.m_fileOffset until EOF + // arrays for file lookup start after sizeof(XetFileHeader) + // 1st: array SubresourceInfo[m_ddsHeader.mipMapCount] + // 2nd: array TileData[m_numTilesForStandardMips + 1] + // 3rd: packed mip data can be found at TileData[m_numTilesForStandardMips].m_offset TileData[m_numTilesForStandardMips].m_numBytes }; diff --git a/TileUpdateManager/packages.config b/TileUpdateManager/packages.config index 9c064d0..220e8cc 100644 --- a/TileUpdateManager/packages.config +++ b/TileUpdateManager/packages.config @@ -1,4 +1,4 @@  - + \ No newline at end of file diff --git a/TileUpdateManager/pch.h b/TileUpdateManager/pch.h index 3d5f633..1ab5e4c 100644 --- a/TileUpdateManager/pch.h +++ b/TileUpdateManager/pch.h @@ -46,5 +46,6 @@ #include #include #include +#include #include "d3dx12.h" diff --git a/config/config.json b/config/config.json index cb5fe33..67c4943 100644 --- a/config/config.json +++ b/config/config.json @@ -8,6 +8,9 @@ "anisotropy": 4, // sampler anisotropy "directStorage": true, // use directstorage vs. dedicated thread with ReadFile() and CopyTiles() + "stagingSizeMB": 256, // size of the staging buffer for DirectStorage or reference streaming code + + "addAliasingBarriers": false, //// adds a barrier for each streaming resource: alias(nullptr, pResource) "adapter": "", // in a multi-adapter system, searches description for substring ignoring case e.g. "intel" @@ -24,17 +27,6 @@ "numSpheres": 0, // number of objects besides the terrain "lightFromView": false, // light direction is look direction - // sphere geometry - "sphereLong": 64, // # steps vertically. must be even - "sphereLat": 65, // # steps around. must be odd - - // terrain (heightmap) geometry - "terrainSideSize": 128, // resolution of terrain vertex buffer, e.g. 256x256 grid - "heightScale": 50, - "noiseScale": 25, - "octaves": 8, - "mountainSize": 4000, - "maxFeedbackTime": 5.0, // maximum milliseconds for GPU to resolve feedback "visualizeMinMip": false, // color overlayed onto texture by PS corresponding to mip level @@ -52,25 +44,23 @@ // note a command list can only target a single resource // based on guidance around maximum outstanding SSD read requests: drives aren't expected to benefit from lots of queueing // more queueing is counter-productive, as copies might arrive after the target is no longer visible - "numStreamingBatches": 128, - - // the maximum number of tile copies in a single command list - // a larger value allows the system to absorb a big change with a single UpdateList - "streamingBatchSize": 64, - - // affects size of gpu upload buffer, that is, staging between file read and gpu copy - // uploads should move fast, so it should be hard to hit even a small value. - // NOTE: do consider # tiles in flight, that is # command lists * avg # copies per command list - // e.g. 128 command lists * 32 copies each would be 4096 tiles in flight - // 1024 would be a 64MB upload buffer. Suggested 16-32MB. - "maxTilesInFlight": 512, + "numStreamingBatches": 64, "waitForAssetLoad": false, - "addAliasingBarriers": false, //// adds a barrier for each streaming resource: alias(nullptr, pResource) - "timingStart": 0, // start recording statistics this frame "timingStop": 0, // stop recording statistics this frame "timingFileFrames": "", // file name for per-frame statistics. no statistics unless set. ".csv" will be appended - "exitImageFile": "" // if set, outputs final image on exit. extension (e.g. .png) will be appended + "exitImageFile": "", // if set, outputs final image on exit. extension (e.g. .png) will be appended + + // sphere geometry + "sphereLong": 64, // # steps vertically. must be even + "sphereLat": 65, // # steps around. must be odd + + // terrain (heightmap) geometry + "terrainSideSize": 128, // resolution of terrain vertex buffer, e.g. 256x256 grid + "heightScale": 50, + "noiseScale": 25, + "octaves": 8, + "mountainSize": 4000 } diff --git a/config/fragmentationWA.json b/config/fragmentationWA.json index e7d2e80..dca5b9b 100644 --- a/config/fragmentationWA.json +++ b/config/fragmentationWA.json @@ -8,6 +8,9 @@ "anisotropy": 4, // sampler anisotropy "directStorage": true, // use directstorage vs. dedicated thread with ReadFile() and CopyTiles() + "stagingSizeMB": 256, // size of the staging buffer for DirectStorage or reference streaming code + + "addAliasingBarriers": false, //// adds a barrier for each streaming resource: alias(nullptr, pResource) "adapter": "", // in a multi-adapter system, searches description for substring ignoring case e.g. "intel" @@ -24,17 +27,6 @@ "numSpheres": 0, // number of objects besides the terrain "lightFromView": false, // light direction is look direction - // sphere geometry - "sphereLong": 64, // # steps vertically. must be even - "sphereLat": 65, // # steps around. must be odd - - // terrain (heightmap) geometry - "terrainSideSize": 128, // resolution of terrain vertex buffer, e.g. 256x256 grid - "heightScale": 50, - "noiseScale": 25, - "octaves": 8, - "mountainSize": 4000, - "maxFeedbackTime": 5.0, // maximum milliseconds for GPU to resolve feedback "visualizeMinMip": false, // color overlayed onto texture by PS corresponding to mip level @@ -52,11 +44,7 @@ // note a command list can only target a single resource // based on guidance around maximum outstanding SSD read requests: drives aren't expected to benefit from lots of queueing // more queueing is counter-productive, as copies might arrive after the target is no longer visible - "numStreamingBatches": 128, - - // the maximum number of tile copies in a single command list - // a larger value allows the system to absorb a big change with a single UpdateList - "streamingBatchSize": 64, + "numStreamingBatches": 64, // affects size of gpu upload buffer, that is, staging between file read and gpu copy // uploads should move fast, so it should be hard to hit even a small value. @@ -67,10 +55,19 @@ "waitForAssetLoad": false, - "addAliasingBarriers": false, //// adds a barrier for each streaming resource: alias(nullptr, pResource) - "timingStart": 0, // start recording statistics this frame "timingStop": 0, // stop recording statistics this frame "timingFileFrames": "", // file name for per-frame statistics. no statistics unless set. ".csv" will be appended - "exitImageFile": "" // if set, outputs final image on exit. extension (e.g. .png) will be appended + "exitImageFile": "", // if set, outputs final image on exit. extension (e.g. .png) will be appended + + // sphere geometry + "sphereLong": 64, // # steps vertically. must be even + "sphereLat": 65, // # steps around. must be odd + + // terrain (heightmap) geometry + "terrainSideSize": 128, // resolution of terrain vertex buffer, e.g. 256x256 grid + "heightScale": 50, + "noiseScale": 25, + "octaves": 8, + "mountainSize": 4000 } diff --git a/dds/4ktiles.DDS b/dds/4ktiles.DDS new file mode 100644 index 0000000..969bcf8 Binary files /dev/null and b/dds/4ktiles.DDS differ diff --git a/media/heic0707a-4k.xet b/dds/heic0707a-l.dds similarity index 82% rename from media/heic0707a-4k.xet rename to dds/heic0707a-l.dds index 2eb710a..8ab81bd 100644 Binary files a/media/heic0707a-4k.xet and b/dds/heic0707a-l.dds differ diff --git a/media/potw2039a-4k.xet b/dds/potw2039a.dds similarity index 86% rename from media/potw2039a-4k.xet rename to dds/potw2039a.dds index 45421bc..6ab1134 100644 Binary files a/media/potw2039a-4k.xet and b/dds/potw2039a.dds differ diff --git a/include/ArgParser.h b/include/ArgParser.h index 1bfc322..2329bb5 100644 --- a/include/ArgParser.h +++ b/include/ArgParser.h @@ -44,8 +44,8 @@ ArgParser argParser; argParser.AddArg(L"gravity", m_float); argParser.AddArg(L"upisdown", m_flipGravity); // inverts m_flipGravity argParser.AddArg(L"downisup", L"whoops!", m_flipGravity); // inverts current value, includes help message -argParser.AddArg(L"dothing", [=](std::wstring) { DoTheThing(); } ); // call custom function to handle param -argParser.AddArg(L"option", L"a function", [=](std::wstring) { DoOption(); } ); // custom function with help message +argParser.AddArg(L"dothing", [&]() { DoTheThing(); } ); // call custom function to handle param +argParser.AddArg(L"option", L"a function", [&]() { DoOption(GetNextArg()); } ); // custom function with help message that reads the next arg from the command line argParser.Parse(); after running, m_float=20.27 and m_flipGravity=true @@ -75,8 +75,10 @@ after running, m_float=20.27 and m_flipGravity=true class ArgParser { public: - // custom function to handle value passed to command line arg - typedef std::function ArgFunction; + // custom function to perform for a command line arg + // use GetNextArg() to read the subsequent command line argument(s) as needed + typedef std::function ArgFunction; + static std::wstring GetNextArg(); void AddArg(std::wstring token, ArgFunction f); void AddArg(std::wstring token, std::wstring description, ArgFunction f); @@ -89,46 +91,69 @@ class ArgParser class ArgPair { public: - ArgPair(std::wstring s, std::function f) : m_arg(s), m_func(f) + ArgPair(std::wstring s, ArgFunction f) : m_arg(s), m_func(f) { for (auto& c : m_arg) { c = ::towlower(c); } } - void TestEqual(std::wstring in_arg, const WCHAR* in_value) + void TestEqual(std::wstring in_arg) { for (auto& c : in_arg) { c = ::towlower(c); } if (m_arg == in_arg) { - m_func(in_value); + m_func(); + m_arg.clear(); // this argument has been consumed } } - const std::wstring& Get() { return m_arg; } private: std::wstring m_arg; - std::function m_func; + ArgFunction m_func; }; - std::vector < ArgPair > m_args; + std::vector m_args; std::wstringstream m_help; - template void AddArg(std::wstring s, std::wstring d, std::function f, T& value) + template void AddArg(std::wstring s, std::wstring d, ArgFunction f, T& value) { std::wstringstream w; w << ": " << d << " (default: " << value << ") "; AddArg(s, w.str().c_str(), f);; } - template<> void AddArg(std::wstring s, std::wstring d, std::function f, bool& value) + template<> void AddArg(std::wstring s, std::wstring d, ArgFunction f, bool& value) { std::wstringstream w; std::string b = value ? "True" : "False"; w << ": " << d << " (default: " << b.c_str() << ") "; AddArg(s, w.str().c_str(), f);; } + + // function to hold the static command line arguments array + static std::vector& GetCmdLine() + { + static std::vector m_commandLineArgs; + return m_commandLineArgs; + } }; +//----------------------------------------------------------------------------- +// from GetCommandLine(), reversed to make iteration easy +//----------------------------------------------------------------------------- +inline std::wstring ArgParser::GetNextArg() +{ + auto& args = GetCmdLine(); + if (0 == args.size()) + { + std::wcerr << "Not enough command line arguments\n"; + exit(0); + } + std::wstring t = args.back(); + args.resize(args.size() - 1); + return t; +} + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -void ArgParser::AddArg(std::wstring token, ArgFunction f) +inline void ArgParser::AddArg(std::wstring token, ArgFunction f) { AddArg(token, L"", f); } @@ -146,6 +171,13 @@ inline void ArgParser::Parse() int numArgs = 0; LPWSTR* cmdLine = CommandLineToArgvW(GetCommandLineW(), &numArgs); + auto& args = GetCmdLine(); + args.resize(numArgs - 1); // don't need arg 0, that's just the exe path + for (int i = 1; i < numArgs; i++) + { + args[numArgs - i - 1] = cmdLine[i]; + } + if ((2 == numArgs) && (std::wstring(L"?") == cmdLine[1])) { BOOL allocConsole = AllocConsole(); // returns false for console applications @@ -166,21 +198,22 @@ inline void ArgParser::Parse() exit(0); } - for (int i = 0; i < numArgs; i++) + while (args.size()) { + std::wstring s = GetNextArg(); for (auto& arg : m_args) { - arg.TestEqual(cmdLine[i], (i < numArgs - 1) ? cmdLine[i + 1] : L""); + arg.TestEqual(s); } } } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, long& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stol(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, UINT& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stoul(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, int& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stoi(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, float& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stof(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, bool& value) { AddArg(arg, desc, [&](std::wstring s) { value = !value; }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, std::wstring& value) { AddArg(arg, desc, [&](std::wstring s) { value = s; }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, double& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stod(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, INT64& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stoll(s); }, value); } -template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, UINT64& value) { AddArg(arg, desc, [&](std::wstring s) { value = std::stoull(s); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, long& value) { AddArg(arg, desc, [&]() { value = std::stol(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, UINT& value) { AddArg(arg, desc, [&]() { value = std::stoul(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, int& value) { AddArg(arg, desc, [&]() { value = std::stoi(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, float& value) { AddArg(arg, desc, [&]() { value = std::stof(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, bool& value) { AddArg(arg, desc, [&]() { value = !value; }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, std::wstring& value) { AddArg(arg, desc, [&]() { value = GetNextArg(); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, double& value) { AddArg(arg, desc, [&]() { value = std::stod(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, INT64& value) { AddArg(arg, desc, [&]() { value = std::stoll(GetNextArg()); }, value); } +template<> inline void ArgParser::AddArg(std::wstring arg, std::wstring desc, UINT64& value) { AddArg(arg, desc, [&]() { value = std::stoull(GetNextArg()); }, value); } diff --git a/include/WindowCapture.h b/include/WindowCapture.h index e4218ae..1d6a45e 100644 --- a/include/WindowCapture.h +++ b/include/WindowCapture.h @@ -122,14 +122,15 @@ namespace WindowCapture in_pQ->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); HANDLE renderFenceEvent = ::CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (renderFenceEvent == nullptr) + if (NULL == renderFenceEvent) { ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); + exit(-1); } ThrowIfFailed(in_pQ->Signal(renderFence.Get(), 1)); ThrowIfFailed(renderFence->SetEventOnCompletion(1, renderFenceEvent)); - WaitForSingleObject(renderFenceEvent, INFINITE); + ::WaitForSingleObject(renderFenceEvent, INFINITE); ::CloseHandle(renderFenceEvent); BYTE* pData = nullptr; diff --git a/media/4ktiles.xet b/media/4ktiles.xet deleted file mode 100644 index 33f35c7..0000000 Binary files a/media/4ktiles.xet and /dev/null differ diff --git a/scripts/convert.bat b/scripts/convert.bat index 98254e2..c4ab89d 100644 --- a/scripts/convert.bat +++ b/scripts/convert.bat @@ -1,6 +1,8 @@ echo usage: convert srcdir dstdir @echo off +if not exist %2 mkdir %2 + set exedir=%cd% pushd %2 set outdir=%cd% @@ -8,7 +10,7 @@ popd pushd %1 for /R %%f in (*.dds) do ( + echo %%~nf.dds %exedir%\DdsToXet.exe -in %%~nf.dds -out %outdir%\%%~nf.xet %3 ) popd - diff --git a/src/CommandLineArgs.h b/src/CommandLineArgs.h index a600083..a805622 100644 --- a/src/CommandLineArgs.h +++ b/src/CommandLineArgs.h @@ -81,6 +81,7 @@ struct CommandLineArgs std::wstring m_adapterDescription; // e.g. "intel", will pick the GPU with this substring in the adapter description (not case sensitive) bool m_useDirectStorage{ false }; + UINT m_stagingSizeMB{ 64 }; // size of the staging buffer for DirectStorage or reference streaming code //------------------------------------------------------- // state that is not settable from command line: @@ -102,9 +103,7 @@ struct CommandLineArgs bool m_showFeedbackViewer{ true }; // toggle just the raw feedback view in the feedback viewer UINT m_statisticsNumFrames{ 30 }; bool m_cameraUpLock{ true }; // navigation locks "up" to be y=1 - UINT m_numStreamingBatches{ 128 }; // # UpdateLists - UINT m_streamingBatchSize{ 32 }; // max tile copies per updatelist - UINT m_maxTilesInFlight{ 512 }; // size of upload buffer (in tiles), does not apply to DS + UINT m_numStreamingBatches{ 128 }; // number of in-flight batches of updates (UpdateLists) // planet parameters UINT m_sphereLong{ 128 }; // # steps vertically. must be even diff --git a/src/Expanse.vcxproj b/src/Expanse.vcxproj index a14fa5c..1e1e715 100644 --- a/src/Expanse.vcxproj +++ b/src/Expanse.vcxproj @@ -118,6 +118,7 @@ + @@ -134,26 +135,6 @@ - - Document - $(OutDir)media - $(OutDir)media - - - Document - $(OutDir)media - $(OutDir)media - - - Document - $(OutDir)media - $(OutDir)media - - - - - Document - Document @@ -236,12 +217,12 @@ - + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/src/Expanse.vcxproj.filters b/src/Expanse.vcxproj.filters index f3e8943..6e367f5 100644 --- a/src/Expanse.vcxproj.filters +++ b/src/Expanse.vcxproj.filters @@ -15,9 +15,6 @@ {bcb9e86a-bb53-4591-9f4d-4b4750c97c11} - - {95fee67b-a66c-49dd-a19f-43e22f6ef6fb} - {62863df4-8463-4043-82e4-00ee2fa92921} @@ -32,34 +29,30 @@ config - - media - - - scripts - shaders shaders - - - - - Source Files + + config + + + shaders - Source Files + scripts + + + scripts - Source Files + scripts - Source Files + scripts - @@ -160,6 +153,9 @@ include + + Header Files + diff --git a/src/Expanse_vs2022.vcxproj b/src/Expanse_vs2022.vcxproj index 33b183e..3ff21d2 100644 --- a/src/Expanse_vs2022.vcxproj +++ b/src/Expanse_vs2022.vcxproj @@ -87,9 +87,6 @@ - - Document - Document @@ -102,28 +99,12 @@ Document - - Document - $(OutDir)media - $(OutDir)media - - - Document - $(OutDir)media - $(OutDir)media - - - Document - $(OutDir)media - $(OutDir)media - Document Document - @@ -228,14 +209,17 @@ Document + + + - + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + \ No newline at end of file diff --git a/src/Expanse_vs2022.vcxproj.filters b/src/Expanse_vs2022.vcxproj.filters index c8346ee..dece9a0 100644 --- a/src/Expanse_vs2022.vcxproj.filters +++ b/src/Expanse_vs2022.vcxproj.filters @@ -15,9 +15,6 @@ {23f41b89-5d6f-48b1-8016-ea5107d4976f} - - {6e46226a-0c46-4bfa-a464-5190adf0fe4e} - {aca759ef-33ea-4723-8a6f-84302ce25d77} @@ -25,9 +22,6 @@ {71df60be-5005-472b-a6c3-c52100ccae2b} - - - Source Files @@ -161,24 +155,12 @@ Shaders - - scripts - - - media - config config - - media - - - media - scripts @@ -192,4 +174,7 @@ scripts + + + \ No newline at end of file diff --git a/src/FrameEventTracing.h b/src/FrameEventTracing.h index 862f5e2..7297986 100644 --- a/src/FrameEventTracing.h +++ b/src/FrameEventTracing.h @@ -59,6 +59,9 @@ class FrameEventTracing : public WriteCSV FrameEventTracing(const std::wstring& in_fileName, const std::wstring& in_adapterDescription); virtual ~FrameEventTracing() {} + // pre-allocate the right amount of memory as an optimization when collecting statistics + void Reserve(UINT in_numExpectedEvents) { m_events.reserve(in_numExpectedEvents); } + void Append( const RenderEventList& in_renderList, const UpdateEventList& in_updateList, @@ -67,7 +70,8 @@ class FrameEventTracing : public WriteCSV float in_cpuProcessFeedbackTime, float in_gpuProcessFeedbackTime) { - m_events.push_back({ in_renderList.GetLatest(), in_updateList.GetLatest(), + m_events.push_back({ + in_renderList.GetLatest(), in_updateList.GetLatest(), in_numUploads, in_numEvictions, in_cpuProcessFeedbackTime, in_gpuProcessFeedbackTime, in_numFeedbackResolves }); } diff --git a/src/Scene.cpp b/src/Scene.cpp index 43a1410..ab39cae 100644 --- a/src/Scene.cpp +++ b/src/Scene.cpp @@ -207,6 +207,7 @@ Scene::Scene(const CommandLineArgs& in_args, HWND in_hwnd) : if (m_args.m_timingFrameFileName.size() && (m_args.m_timingStopFrame >= m_args.m_timingStartFrame)) { m_csvFile = std::make_unique(m_args.m_timingFrameFileName, adapterDescription); + m_csvFile->Reserve(m_args.m_timingStopFrame - m_args.m_timingStartFrame); } } @@ -658,8 +659,7 @@ void Scene::StartStreamingLibrary() { TileUpdateManagerDesc tumDesc; tumDesc.m_maxNumCopyBatches = m_args.m_numStreamingBatches; - tumDesc.m_maxTileCopiesPerBatch = m_args.m_streamingBatchSize; - tumDesc.m_maxTileCopiesInFlight = m_args.m_maxTilesInFlight; + tumDesc.m_stagingBufferSizeMB = m_args.m_stagingSizeMB; tumDesc.m_maxTileMappingUpdatesPerApiCall = m_args.m_maxTileUpdatesPerApiCall; tumDesc.m_swapChainBufferCount = SharedConstants::SWAP_CHAIN_BUFFER_COUNT; tumDesc.m_addAliasingBarriers = m_args.m_addAliasingBarriers; @@ -1275,11 +1275,13 @@ void Scene::GatherStatistics(float in_cpuProcessFeedbackTime, float in_gpuProces float tilesPerSecond = float(measuredNumUploads) / measuredTime; float bytesPerTileDivMega = float(64 * 1024) / (1000.f * 1000.f); float mbps = tilesPerSecond * bytesPerTileDivMega; + m_totalTileLatency = m_pTileUpdateManager->GetTotalTileCopyLatency() - m_totalTileLatency; + float approximatePerTileLatency = 1000.f * (m_totalTileLatency / measuredNumUploads); DebugPrint(L"Gathering final statistics before exiting\n"); m_csvFile->WriteEvents(m_hwnd, m_args); - *m_csvFile << measuredNumUploads << " " << measuredTime << " " << mbps << " uploads|seconds|bandwidth\n"; + *m_csvFile << measuredNumUploads << " " << measuredTime << " " << mbps << " " << approximatePerTileLatency << " uploads|seconds|bandwidth|latency_ms\n"; m_csvFile->close(); m_csvFile = nullptr; } @@ -1297,6 +1299,7 @@ void Scene::GatherStatistics(float in_cpuProcessFeedbackTime, float in_gpuProces if (m_args.m_timingFrameFileName.size() && (m_frameNumber == m_args.m_timingStartFrame)) { m_startUploadCount = m_pTileUpdateManager->GetTotalNumUploads(); + m_totalTileLatency = m_pTileUpdateManager->GetTotalTileCopyLatency(); m_cpuTimer.Start(); } } diff --git a/src/Scene.h b/src/Scene.h index fad4a98..040d087 100644 --- a/src/Scene.h +++ b/src/Scene.h @@ -248,6 +248,7 @@ class Scene void GatherStatistics(float in_cpuProcessFeedbackTime, float in_gpuProcessFeedbackTime); UINT m_startUploadCount{ 0 }; + float m_totalTileLatency{ 0 }; // per-tile upload latency. NOT the same as per-UpdateList Timer m_cpuTimer; void StartScene(); diff --git a/src/SceneObject.cpp b/src/SceneObject.cpp index 1023553..3211424 100644 --- a/src/SceneObject.cpp +++ b/src/SceneObject.cpp @@ -28,6 +28,7 @@ #include "pch.h" +#include #include #include #include @@ -236,10 +237,8 @@ std::wstring SceneObjects::BaseObject::GetAssetFullPath(const std::wstring& in_f { WCHAR buffer[MAX_PATH]; GetModuleFileName(nullptr, buffer, MAX_PATH); - std::wstring exePath(buffer); - exePath.resize(exePath.rfind('\\') + 1); - std::wstring path = exePath + in_filename; - return path; + std::filesystem::path exePath(buffer); + return exePath.remove_filename().append(in_filename); } //------------------------------------------------------------------------- @@ -369,68 +368,6 @@ void SceneObjects::BaseObject::Draw(ID3D12GraphicsCommandList1* in_pCommandList, } } -//----------------------------------------------------------------------------- -// helper class -//----------------------------------------------------------------------------- -class Staging -{ -public: - Staging(ID3D12Resource* out_pBuffer, const D3D12_RESOURCE_DESC& in_desc, - D3D12_RESOURCE_STATES in_finalState) : - m_finalState(in_finalState), m_pBuffer(out_pBuffer) - { - ComPtr device; - out_pBuffer->GetDevice(IID_PPV_ARGS(&device)); - - ThrowIfFailed(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator))); - device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), nullptr, IID_PPV_ARGS(&m_commandList)); - m_commandList->SetName(L"Staging::m_commandList"); - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - ThrowIfFailed(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue))); - ThrowIfFailed(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_renderFence))); - - const auto heapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - ThrowIfFailed(device->CreateCommittedResource( - &heapProperties, D3D12_HEAP_FLAG_NONE, - &in_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&m_stagingResource))); - } - ID3D12Resource* GetResource() { return m_stagingResource.Get(); } - ID3D12GraphicsCommandList* GetCommandList() { return m_commandList.Get(); } - ~Staging() - { - D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(m_pBuffer, D3D12_RESOURCE_STATE_COPY_DEST, m_finalState); - m_commandList->ResourceBarrier(1, &barrier); - - // submit all our initialization commands - m_commandList->Close(); - ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; - m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); - - HANDLE renderFenceEvent = ::CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (renderFenceEvent == nullptr) - { - ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); - } - - ThrowIfFailed(m_commandQueue->Signal(m_renderFence.Get(), 1)); - ThrowIfFailed(m_renderFence->SetEventOnCompletion(1, renderFenceEvent)); - WaitForSingleObject(renderFenceEvent, INFINITE); - ::CloseHandle(renderFenceEvent); - } -private: - template using ComPtr = Microsoft::WRL::ComPtr; - - ComPtr m_commandAllocator; - ComPtr m_commandList; - ComPtr m_commandQueue; - ComPtr m_renderFence; - ComPtr m_stagingResource; - ID3D12Resource* m_pBuffer; - D3D12_RESOURCE_STATES m_finalState; -}; - //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- void SceneObjects::CreateSphereResources( diff --git a/src/TextureViewer.cpp b/src/TextureViewer.cpp index 2333df4..96076a9 100644 --- a/src/TextureViewer.cpp +++ b/src/TextureViewer.cpp @@ -26,6 +26,7 @@ #include "pch.h" +#include #include "TextureViewer.h" #include "DebugHelper.h" @@ -51,17 +52,14 @@ struct ConstantBuffer }; //----------------------------------------------------------------------------- - - // expect hlsl files to be next to the exe //----------------------------------------------------------------------------- inline std::wstring GetAssetFullPath(const std::wstring in_filename) { WCHAR buffer[MAX_PATH]; GetModuleFileName(nullptr, buffer, MAX_PATH); - std::wstring exePath(buffer); - exePath.resize(exePath.rfind('\\') + 1); - return (exePath + in_filename); + std::filesystem::path exePath(buffer); + return exePath.remove_filename().append(in_filename); } //----------------------------------------------------------------------------- diff --git a/src/packages.config b/src/packages.config index 9c064d0..220e8cc 100644 --- a/src/packages.config +++ b/src/packages.config @@ -1,4 +1,4 @@  - + \ No newline at end of file diff --git a/src/winMain.cpp b/src/winMain.cpp index 8902466..a2d0385 100644 --- a/src/winMain.cpp +++ b/src/winMain.cpp @@ -81,7 +81,7 @@ struct MouseState { POINT pos{}; POINT move{}; - bool m_dragging{false}; + bool m_dragging{ false }; } g_mouseState; //----------------------------------------------------------------------------- @@ -114,14 +114,17 @@ void AdjustArguments(CommandLineArgs& out_args) out_args.m_mediaDir += L'\\'; } - // if the desired media path doesn't exist, try looking relative to the current directory + // if the desired media path doesn't exist, try looking relative to the executable if (!std::filesystem::exists(out_args.m_mediaDir)) { WCHAR buffer[MAX_PATH]; GetModuleFileName(nullptr, buffer, MAX_PATH); - std::wstring exePath(buffer); - exePath.resize(exePath.rfind('\\') + 1); - out_args.m_mediaDir = exePath + out_args.m_mediaDir; + std::filesystem::path exePath(buffer); + exePath.remove_filename().append(out_args.m_mediaDir); + if (std::filesystem::exists(exePath)) + { + out_args.m_mediaDir = exePath; + } } if (std::filesystem::exists(out_args.m_mediaDir)) @@ -196,10 +199,10 @@ void ParseCommandLine(CommandLineArgs& out_args) argParser.AddArg(L"-rollerCoaster", out_args.m_cameraRollerCoaster); argParser.AddArg(L"-paintMixer", out_args.m_cameraPaintMixer); - argParser.AddArg(L"-visualizeMinMip", [&](std::wstring) { out_args.m_visualizeMinMip = true; }); + argParser.AddArg(L"-visualizeMinMip", [&]() { out_args.m_visualizeMinMip = true; }); argParser.AddArg(L"-hideFeedback", out_args.m_showFeedbackMaps); - argParser.AddArg(L"-hideUI", [&](std::wstring) { out_args.m_showUI = false; }); - argParser.AddArg(L"-miniUI", [&](std::wstring) { out_args.m_uiModeMini = true; }); + argParser.AddArg(L"-hideUI", [&]() { out_args.m_showUI = false; }); + argParser.AddArg(L"-miniUI", [&]() { out_args.m_uiModeMini = true; }); argParser.AddArg(L"-updateAll", out_args.m_updateEveryObjectEveryFrame); argParser.AddArg(L"-addAliasingBarriers", L"Add per-draw aliasing barriers to assist PIX analysis", out_args.m_addAliasingBarriers); @@ -211,9 +214,9 @@ void ParseCommandLine(CommandLineArgs& out_args) argParser.AddArg(L"-waitForAssetLoad", L"stall animation & statistics until assets have minimally loaded", out_args.m_waitForAssetLoad); argParser.AddArg(L"-adapter", L"find an adapter containing this string in the description, ignoring case", out_args.m_adapterDescription); - argParser.AddArg(L"-directStorage", L"force enable DirectStorage", [&](std::wstring) { out_args.m_useDirectStorage = true; }); - argParser.AddArg(L"-directStorageOff", L"force disable DirectStorage", [&](std::wstring) { out_args.m_useDirectStorage = false; }); - + argParser.AddArg(L"-directStorage", L"force enable DirectStorage", [&]() { out_args.m_useDirectStorage = true; }); + argParser.AddArg(L"-directStorageOff", L"force disable DirectStorage", [&]() { out_args.m_useDirectStorage = false; }); + argParser.AddArg(L"-stagingSizeMB", L"DirectStorage staging buffer size", out_args.m_stagingSizeMB); argParser.Parse(); } @@ -470,6 +473,7 @@ void LoadConfigFile(CommandLineArgs& out_args) if (root.isMember("anisotropy")) out_args.m_anisotropy = root["anisotropy"].asUInt(); if (root.isMember("directStorage")) out_args.m_useDirectStorage = root["directStorage"].asBool(); + if (root.isMember("stagingSizeMB")) out_args.m_stagingSizeMB = root["stagingSizeMB"].asUInt(); if (root.isMember("animationrate")) out_args.m_animationRate = root["animationrate"].asFloat(); if (root.isMember("cameraRate")) out_args.m_cameraAnimationRate = root["cameraRate"].asFloat(); @@ -497,10 +501,7 @@ void LoadConfigFile(CommandLineArgs& out_args) if (root.isMember("heapSizeTiles")) out_args.m_streamingHeapSize = root["heapSizeTiles"].asUInt(); if (root.isMember("numHeaps")) out_args.m_numHeaps = root["numHeaps"].asUInt(); if (root.isMember("maxTileUpdatesPerApiCall")) out_args.m_maxTileUpdatesPerApiCall = root["maxTileUpdatesPerApiCall"].asUInt(); - if (root.isMember("numStreamingBatches")) out_args.m_numStreamingBatches = root["numStreamingBatches"].asUInt(); - if (root.isMember("streamingBatchSize")) out_args.m_streamingBatchSize = root["streamingBatchSize"].asUInt(); - if (root.isMember("maxTilesInFlight")) out_args.m_maxTilesInFlight = root["maxTilesInFlight"].asUInt(); if (root.isMember("maxFeedbackTime")) out_args.m_maxGpuFeedbackTimeMs = root["maxFeedbackTime"].asFloat();