diff --git a/vk_video_decoder/demos/vk-video-dec/Main.cpp b/vk_video_decoder/demos/vk-video-dec/Main.cpp index ead99589..3c147084 100644 --- a/vk_video_decoder/demos/vk-video-dec/Main.cpp +++ b/vk_video_decoder/demos/vk-video-dec/Main.cpp @@ -34,8 +34,10 @@ int main(int argc, const char **argv) { VulkanDeviceContext vkDevCtxt(programConfig.deviceId); if (programConfig.validate) { - vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_LUNARG_standard_validation"); - vkDevCtxt.AddRequiredInstanceLayer(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + // REVIEW: Should we make an effort to check for the old LUNARG validation layer name? + vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_KHRONOS_validation"); + //vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_LUNARG_standard_validation"); + vkDevCtxt.AddRequiredInstanceExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); } /********** Start WSI instance extensions support *******************************************/ diff --git a/vk_video_decoder/libs/VkCodecUtils/VulkanDeviceContext.cpp b/vk_video_decoder/libs/VkCodecUtils/VulkanDeviceContext.cpp index 54a7fa77..5c2f6498 100644 --- a/vk_video_decoder/libs/VkCodecUtils/VulkanDeviceContext.cpp +++ b/vk_video_decoder/libs/VkCodecUtils/VulkanDeviceContext.cpp @@ -271,11 +271,27 @@ bool VulkanDeviceContext::DebugReportCallback(VkDebugReportFlagsEXT flags, VkDeb else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) prio = LOG_DEBUG; + std::vector ignoredVUIDs = { + "VUID-VkBufferCreateInfo-usage-04813", // buffer profiles (they will be optional soon) + "VUID-vkCmdDecodeVideoKHR-pDecodeInfo-07135", // ditto + }; std::stringstream ss; - ss << layer_prefix << ": " << msg; - std::ostream &st = (prio >= LOG_ERR) ? std::cerr : std::cout; - st << msg << "\n"; + if (prio >= LOG_ERR) { + bool ignored = false; + for (const auto& ignoredVUID : ignoredVUIDs) { + if (strstr(msg, ignoredVUID.c_str()) != nullptr) { + ignored = true; + break; + } + } + if (ignored) + return false; + ss << layer_prefix << ": " << msg; + + std::ostream &st = (prio >= LOG_ERR) ? std::cerr : std::cout; + st << msg << "\n"; + } return false; } @@ -329,7 +345,7 @@ VkResult VulkanDeviceContext::InitPhysicalDevice(const VkQueueFlags requestQueue continue; } - if (!HasAllDeviceExtensions(physicalDevice)) { + if (!HasAllDeviceExtensions(physicalDevice, true)) { continue; } @@ -501,9 +517,47 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, devInfo.enabledExtensionCount = static_cast(m_reqDeviceExtensions.size()); devInfo.ppEnabledExtensionNames = m_reqDeviceExtensions.data(); - // disable all features - VkPhysicalDeviceFeatures features = {}; - devInfo.pEnabledFeatures = &features; + VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptorBufferFeature = {}; + descriptorBufferFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; + + VkPhysicalDeviceVulkan13Features features_13 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = &descriptorBufferFeature, + }; + VkPhysicalDeviceVulkan12Features features_12 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, + .pNext = &features_13, + }; + VkPhysicalDeviceVulkan11Features features_11 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, + .pNext = &features_12, + }; + VkPhysicalDeviceFeatures2 devFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &features_11, + }; + + GetPhysicalDeviceFeatures2(m_physDevice, &devFeatures); + + VkPhysicalDeviceVulkan13Features chosen13 = {}; + chosen13.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; + VkPhysicalDeviceVulkan12Features chosen12 = {}; + chosen12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + chosen12.pNext = &chosen13; + VkPhysicalDeviceVulkan11Features chosen11 = {}; + chosen11.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + chosen11.pNext = &chosen12; + + // TODO: Review all the usages of features and ensure they are checked here. Descriptor buffers, samplers, lots of things probably... + // if presenting: assert sampler conversion... + chosen11.samplerYcbcrConversion = features_11.samplerYcbcrConversion; + assert(features_13.synchronization2); + chosen13.synchronization2 = features_13.synchronization2; + + // Use all the supported core features, probably we should trim this down a bit. + devFeatures.pNext = &chosen11; + devInfo.pEnabledFeatures = nullptr; // use features2 + devInfo.pNext = &devFeatures; VkResult result = CreateDevice(m_physDevice, &devInfo, nullptr, &m_device); if (result != VK_SUCCESS) { @@ -512,8 +566,10 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues, vk::InitDispatchTableBottom(m_instance,m_device, this); - GetDeviceQueue(m_device, GetGfxQueueFamilyIdx(), 0, &m_gfxQueue); - GetDeviceQueue(m_device, GetPresentQueueFamilyIdx(), 0, &m_presentQueue); + if (createGraphicsQueue) + GetDeviceQueue(m_device, GetGfxQueueFamilyIdx(), 0, &m_gfxQueue); + if (createPresentQueue) + GetDeviceQueue(m_device, GetPresentQueueFamilyIdx(), 0, &m_presentQueue); if (numDecodeQueues) { assert(GetVideoDecodeQueueFamilyIdx() != -1); diff --git a/vk_video_decoder/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/vk_video_decoder/libs/VkCodecUtils/VulkanVideoProcessor.cpp index 4df3c17a..00ff6814 100644 --- a/vk_video_decoder/libs/VkCodecUtils/VulkanVideoProcessor.cpp +++ b/vk_video_decoder/libs/VkCodecUtils/VulkanVideoProcessor.cpp @@ -403,6 +403,8 @@ size_t VulkanVideoProcessor::ConvertFrameToNv12(DecodedFrame* pFrame, retryCount--; } while ((result == VK_TIMEOUT) && (retryCount > 0)); + // Transition the resource to the queue that has + // Map the image and read the image data. VkDeviceSize imageOffset = imageResource->GetImageDeviceMemoryOffset(); VkDeviceSize maxSize = 0; diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp index e8fea0cd..dd6f993d 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkParserVideoPictureParameters.cpp @@ -231,8 +231,11 @@ VkResult VkParserVideoPictureParameters::UpdateParametersObject(const StdVideoPi return VK_ERROR_INITIALIZATION_FAILED; } - updateInfo.updateSequenceCount = std::max(pStdVideoPictureParametersSet->GetUpdateSequenceCount(), updateInfo.updateSequenceCount); - + // The parser doesn't seem to be setting this correctly on the parameter objects. It keeps a per object count rather than a global + // one as the spec requires. Hack this for now, but check with NVIDIA what the best thing to do here is + static int updateCounter = 1; + // updateInfo.updateSequenceCount = std::max(pStdVideoPictureParametersSet->GetUpdateSequenceCount(), updateInfo.updateSequenceCount); + updateInfo.updateSequenceCount = updateCounter++; VkResult result = m_vkDevCtx->UpdateVideoSessionParametersKHR(*m_vkDevCtx, m_sessionParameters, diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp index 1db08ff4..d5d324a0 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp @@ -26,6 +26,7 @@ #undef min #define GPU_ALIGN(x) (((x) + 0xff) & ~0xff) +#define ALIGN(addr, width) ((addr + (width - 1)) & ~(width - 1)) const uint64_t gFenceTimeout = 100 * 1000 * 1000 /* 100 mSec */; const uint64_t gLongTimeout = 1000 * 1000 * 1000 /* 1000 mSec */; @@ -174,6 +175,10 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo assert(!"Could not get Video Capabilities!"); return -1; } + + m_minBitstreamBufferOffsetAlignment = videoCapabilities.minBitstreamBufferOffsetAlignment; + m_minBitstreamBufferSizeAlignment = videoCapabilities.minBitstreamBufferSizeAlignment; + m_capabilityFlags = videoDecodeCapabilities.flags; m_dpbAndOutputCoincide = (m_capabilityFlags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); VkFormat dpbImageFormat = VK_FORMAT_UNDEFINED; @@ -191,10 +196,8 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo imageExtent.width = std::max(imageExtent.width, videoCapabilities.minCodedExtent.width); imageExtent.height = std::max(imageExtent.height, videoCapabilities.minCodedExtent.height); - uint32_t alignWidth = videoCapabilities.pictureAccessGranularity.width - 1; - imageExtent.width = ((imageExtent.width + alignWidth) & ~alignWidth); - uint32_t alignHeight = videoCapabilities.pictureAccessGranularity.height - 1; - imageExtent.height = ((imageExtent.height + alignHeight) & ~alignHeight); + imageExtent.width = ALIGN(imageExtent.width, videoCapabilities.pictureAccessGranularity.width); + imageExtent.height = ALIGN(imageExtent.height, videoCapabilities.pictureAccessGranularity.height); if (!m_videoSession || !m_videoSession->IsCompatible( m_vkDevCtx, @@ -222,9 +225,14 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo } VkImageUsageFlags outImageUsage = (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + + if (!m_useLinearOutput) + { + outImageUsage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + VkImageUsageFlags dpbImageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; if (m_dpbAndOutputCoincide) { @@ -424,8 +432,8 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters pPicParams->decodeFrameInfo.srcBuffer = pPicParams->bitstreamData->GetBuffer(); assert(pPicParams->bitstreamDataOffset == 0); assert(pPicParams->firstSliceIndex == 0); - pPicParams->decodeFrameInfo.srcBufferOffset = pPicParams->bitstreamDataOffset; - pPicParams->decodeFrameInfo.srcBufferRange = pPicParams->bitstreamDataLen; + pPicParams->decodeFrameInfo.srcBufferOffset = ALIGN(pPicParams->bitstreamDataOffset, m_minBitstreamBufferOffsetAlignment); + pPicParams->decodeFrameInfo.srcBufferRange = ALIGN(pPicParams->bitstreamDataLen, m_minBitstreamBufferSizeAlignment); // pPicParams->decodeFrameInfo.dstImageView = VkImageView(); VkVideoBeginCodingInfoKHR decodeBeginInfo = { VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR }; @@ -569,9 +577,25 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters } } + std::vector allReferenceSlots; + decodeBeginInfo.referenceSlotCount = pPicParams->decodeFrameInfo.referenceSlotCount; decodeBeginInfo.pReferenceSlots = pPicParams->decodeFrameInfo.pReferenceSlots; + if (pPicParams->decodeFrameInfo.pSetupReferenceSlot != nullptr) + { + for (int i = 0; i < decodeBeginInfo.referenceSlotCount; i++) + allReferenceSlots.push_back(decodeBeginInfo.pReferenceSlots[i]); + + VkVideoReferenceSlotInfoKHR setupActivationSlot = {}; + setupActivationSlot.sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR; + setupActivationSlot.slotIndex = -1; + setupActivationSlot.pPictureResource = &pPicParams->dpbSetupPictureResource; + allReferenceSlots.push_back(setupActivationSlot); + decodeBeginInfo.referenceSlotCount++; + decodeBeginInfo.pReferenceSlots = allReferenceSlots.data(); + } + if (pDecodePictureInfo->flags.unpairedField) { // assert(pFrameSyncinfo->frameCompleteSemaphore == VkSemaphore()); pDecodePictureInfo->flags.syncFirstReady = true; diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.h b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.h index b4dc927c..a0cd1df5 100644 --- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.h +++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.h @@ -211,6 +211,8 @@ class VkVideoDecoder : public IVulkanVideoDecoderHandler { , m_videoSession(nullptr) , m_videoFrameBuffer(videoFrameBuffer) , m_decodeFramesData(vkDevCtx) + , m_minBitstreamBufferSizeAlignment(0) + , m_minBitstreamBufferOffsetAlignment(0) , m_decodePicCount(0) , m_hwLoadBalancingTimelineSemaphore() , m_dpbAndOutputCoincide(true) @@ -299,6 +301,8 @@ class VkVideoDecoder : public IVulkanVideoDecoderHandler { VkSharedBaseObj m_videoSession; VkSharedBaseObj m_videoFrameBuffer; NvVkDecodeFrameData m_decodeFramesData; + uint32_t m_minBitstreamBufferSizeAlignment; + uint32_t m_minBitstreamBufferOffsetAlignment; uint64_t m_decodePicCount; // Also used for the HW load balancing timeline semaphore VkSharedBaseObj m_currentPictureParameters; diff --git a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp index d17e4a9b..efc619d6 100644 --- a/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp +++ b/vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp @@ -1794,8 +1794,19 @@ bool VulkanVideoParser::DecodePicture( // TODO: Remove it is for debugging only. Reserved fields must be set to "0". pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots; assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0)); + + // HACK: The DPB map doesn't take account of the setup slot for some reason, we we can't use the existing logic to setup + // the picture flags and frame number from the dpbEntry. REVIEW. Silences a validation warning. + VkVideoDecodeH264DpbSlotInfoKHR h264SlotInfo = {}; + StdVideoDecodeH264ReferenceInfo h264RefInfo = {}; + + h264SlotInfo.sType =VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR; + h264SlotInfo.pNext = nullptr; + h264SlotInfo.pStdReferenceInfo = &h264RefInfo; + if (setupReferenceSlot.slotIndex >= 0) { setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource; + setupReferenceSlot.pNext = &h264SlotInfo; pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot; } if (pCurrFrameDecParams->numGopReferenceSlots) { diff --git a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp index b93f7945..785ec55d 100644 --- a/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp +++ b/vk_video_decoder/libs/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp @@ -994,6 +994,28 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx, m_videoProfile.InitFromProfile(pDecodeProfile); + VkPhysicalDeviceVideoFormatInfoKHR videoFormatInfo = {}; + videoFormatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR; + videoFormatInfo.pNext = m_videoProfile.GetProfileListInfo(); + videoFormatInfo.imageUsage = dpbImageUsage; + + std::vector formatProperties; + uint32_t numFormats = 0; + vkDevCtx->GetPhysicalDeviceVideoFormatPropertiesKHR(vkDevCtx->getPhysicalDevice(), &videoFormatInfo, &numFormats, nullptr); + formatProperties.resize(numFormats); + for (auto& fp : formatProperties) + fp.sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + vkDevCtx->GetPhysicalDeviceVideoFormatPropertiesKHR(vkDevCtx->getPhysicalDevice(), &videoFormatInfo, &numFormats, formatProperties.data()); + bool haveLinearOutput = false; + for (auto& fp : formatProperties) + if (fp.imageTiling == VK_IMAGE_TILING_LINEAR) + haveLinearOutput = true; + + m_usesLinearOutput = haveLinearOutput; + m_usesSeparateOutputImage = haveLinearOutput; + useLinearOutput = haveLinearOutput; + useSeparateOutputImage = haveLinearOutput; + m_queueFamilyIndex = queueFamilyIndex; m_dpbRequiredMemProps = dpbRequiredMemProps; m_outRequiredMemProps = outRequiredMemProps;