Skip to content
Closed
6 changes: 4 additions & 2 deletions vk_video_decoder/demos/vk-video-dec/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ int main(int argc, const char **argv) {
VulkanDeviceContext vkDevCtxt(programConfig.deviceId);

if (programConfig.validate) {
vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_LUNARG_standard_validation");
vkDevCtxt.AddRequiredInstanceLayer(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
// REVIEW: Should we make an effort to check for the old LUNARG validation layer name?
vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_KHRONOS_validation");
//vkDevCtxt.AddRequiredInstanceLayer("VK_LAYER_LUNARG_standard_validation");
vkDevCtxt.AddRequiredInstanceExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
}

/********** Start WSI instance extensions support *******************************************/
Expand Down
74 changes: 65 additions & 9 deletions vk_video_decoder/libs/VkCodecUtils/VulkanDeviceContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,27 @@ bool VulkanDeviceContext::DebugReportCallback(VkDebugReportFlagsEXT flags, VkDeb
else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT)
prio = LOG_DEBUG;

std::vector<std::string> ignoredVUIDs = {
"VUID-VkBufferCreateInfo-usage-04813", // buffer profiles (they will be optional soon)
"VUID-vkCmdDecodeVideoKHR-pDecodeInfo-07135", // ditto
};
std::stringstream ss;
ss << layer_prefix << ": " << msg;

std::ostream &st = (prio >= LOG_ERR) ? std::cerr : std::cout;
st << msg << "\n";
if (prio >= LOG_ERR) {
bool ignored = false;
for (const auto& ignoredVUID : ignoredVUIDs) {
if (strstr(msg, ignoredVUID.c_str()) != nullptr) {
ignored = true;
break;
}
}
if (ignored)
return false;
ss << layer_prefix << ": " << msg;

std::ostream &st = (prio >= LOG_ERR) ? std::cerr : std::cout;
st << msg << "\n";
}

return false;
}
Expand Down Expand Up @@ -329,7 +345,7 @@ VkResult VulkanDeviceContext::InitPhysicalDevice(const VkQueueFlags requestQueue
continue;
}

if (!HasAllDeviceExtensions(physicalDevice)) {
if (!HasAllDeviceExtensions(physicalDevice, true)) {
continue;
}

Expand Down Expand Up @@ -501,9 +517,47 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues,
devInfo.enabledExtensionCount = static_cast<uint32_t>(m_reqDeviceExtensions.size());
devInfo.ppEnabledExtensionNames = m_reqDeviceExtensions.data();

// disable all features
VkPhysicalDeviceFeatures features = {};
devInfo.pEnabledFeatures = &features;
VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptorBufferFeature = {};
descriptorBufferFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;

VkPhysicalDeviceVulkan13Features features_13 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
.pNext = &descriptorBufferFeature,
};
VkPhysicalDeviceVulkan12Features features_12 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
.pNext = &features_13,
};
VkPhysicalDeviceVulkan11Features features_11 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
.pNext = &features_12,
};
VkPhysicalDeviceFeatures2 devFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &features_11,
};

GetPhysicalDeviceFeatures2(m_physDevice, &devFeatures);

VkPhysicalDeviceVulkan13Features chosen13 = {};
chosen13.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
VkPhysicalDeviceVulkan12Features chosen12 = {};
chosen12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
chosen12.pNext = &chosen13;
VkPhysicalDeviceVulkan11Features chosen11 = {};
chosen11.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
chosen11.pNext = &chosen12;

// TODO: Review all the usages of features and ensure they are checked here. Descriptor buffers, samplers, lots of things probably...
// if presenting: assert sampler conversion...
chosen11.samplerYcbcrConversion = features_11.samplerYcbcrConversion;
assert(features_13.synchronization2);
chosen13.synchronization2 = features_13.synchronization2;

// Use all the supported core features, probably we should trim this down a bit.
devFeatures.pNext = &chosen11;
devInfo.pEnabledFeatures = nullptr; // use features2
devInfo.pNext = &devFeatures;

VkResult result = CreateDevice(m_physDevice, &devInfo, nullptr, &m_device);
if (result != VK_SUCCESS) {
Expand All @@ -512,8 +566,10 @@ VkResult VulkanDeviceContext::CreateVulkanDevice(int32_t numDecodeQueues,

vk::InitDispatchTableBottom(m_instance,m_device, this);

GetDeviceQueue(m_device, GetGfxQueueFamilyIdx(), 0, &m_gfxQueue);
GetDeviceQueue(m_device, GetPresentQueueFamilyIdx(), 0, &m_presentQueue);
if (createGraphicsQueue)
GetDeviceQueue(m_device, GetGfxQueueFamilyIdx(), 0, &m_gfxQueue);
if (createPresentQueue)
GetDeviceQueue(m_device, GetPresentQueueFamilyIdx(), 0, &m_presentQueue);

if (numDecodeQueues) {
assert(GetVideoDecodeQueueFamilyIdx() != -1);
Expand Down
2 changes: 2 additions & 0 deletions vk_video_decoder/libs/VkCodecUtils/VulkanVideoProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ size_t VulkanVideoProcessor::ConvertFrameToNv12(DecodedFrame* pFrame,
retryCount--;
} while ((result == VK_TIMEOUT) && (retryCount > 0));

// Transition the resource to the queue that has

// Map the image and read the image data.
VkDeviceSize imageOffset = imageResource->GetImageDeviceMemoryOffset();
VkDeviceSize maxSize = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,11 @@ VkResult VkParserVideoPictureParameters::UpdateParametersObject(const StdVideoPi
return VK_ERROR_INITIALIZATION_FAILED;
}

updateInfo.updateSequenceCount = std::max(pStdVideoPictureParametersSet->GetUpdateSequenceCount(), updateInfo.updateSequenceCount);

// The parser doesn't seem to be setting this correctly on the parameter objects. It keeps a per object count rather than a global
// one as the spec requires. Hack this for now, but check with NVIDIA what the best thing to do here is
static int updateCounter = 1;
// updateInfo.updateSequenceCount = std::max(pStdVideoPictureParametersSet->GetUpdateSequenceCount(), updateInfo.updateSequenceCount);
updateInfo.updateSequenceCount = updateCounter++;

VkResult result = m_vkDevCtx->UpdateVideoSessionParametersKHR(*m_vkDevCtx,
m_sessionParameters,
Expand Down
38 changes: 31 additions & 7 deletions vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#undef min

#define GPU_ALIGN(x) (((x) + 0xff) & ~0xff)
#define ALIGN(addr, width) ((addr + (width - 1)) & ~(width - 1))

const uint64_t gFenceTimeout = 100 * 1000 * 1000 /* 100 mSec */;
const uint64_t gLongTimeout = 1000 * 1000 * 1000 /* 1000 mSec */;
Expand Down Expand Up @@ -174,6 +175,10 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
assert(!"Could not get Video Capabilities!");
return -1;
}

m_minBitstreamBufferOffsetAlignment = videoCapabilities.minBitstreamBufferOffsetAlignment;
m_minBitstreamBufferSizeAlignment = videoCapabilities.minBitstreamBufferSizeAlignment;

m_capabilityFlags = videoDecodeCapabilities.flags;
m_dpbAndOutputCoincide = (m_capabilityFlags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
VkFormat dpbImageFormat = VK_FORMAT_UNDEFINED;
Expand All @@ -191,10 +196,8 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
imageExtent.width = std::max(imageExtent.width, videoCapabilities.minCodedExtent.width);
imageExtent.height = std::max(imageExtent.height, videoCapabilities.minCodedExtent.height);

uint32_t alignWidth = videoCapabilities.pictureAccessGranularity.width - 1;
imageExtent.width = ((imageExtent.width + alignWidth) & ~alignWidth);
uint32_t alignHeight = videoCapabilities.pictureAccessGranularity.height - 1;
imageExtent.height = ((imageExtent.height + alignHeight) & ~alignHeight);
imageExtent.width = ALIGN(imageExtent.width, videoCapabilities.pictureAccessGranularity.width);
imageExtent.height = ALIGN(imageExtent.height, videoCapabilities.pictureAccessGranularity.height);

if (!m_videoSession ||
!m_videoSession->IsCompatible( m_vkDevCtx,
Expand Down Expand Up @@ -222,9 +225,14 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
}

VkImageUsageFlags outImageUsage = (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT);

if (!m_useLinearOutput)
{
outImageUsage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}

VkImageUsageFlags dpbImageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;

if (m_dpbAndOutputCoincide) {
Expand Down Expand Up @@ -424,8 +432,8 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
pPicParams->decodeFrameInfo.srcBuffer = pPicParams->bitstreamData->GetBuffer();
assert(pPicParams->bitstreamDataOffset == 0);
assert(pPicParams->firstSliceIndex == 0);
pPicParams->decodeFrameInfo.srcBufferOffset = pPicParams->bitstreamDataOffset;
pPicParams->decodeFrameInfo.srcBufferRange = pPicParams->bitstreamDataLen;
pPicParams->decodeFrameInfo.srcBufferOffset = ALIGN(pPicParams->bitstreamDataOffset, m_minBitstreamBufferOffsetAlignment);
pPicParams->decodeFrameInfo.srcBufferRange = ALIGN(pPicParams->bitstreamDataLen, m_minBitstreamBufferSizeAlignment);
// pPicParams->decodeFrameInfo.dstImageView = VkImageView();

VkVideoBeginCodingInfoKHR decodeBeginInfo = { VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR };
Expand Down Expand Up @@ -569,9 +577,25 @@ int VkVideoDecoder::DecodePictureWithParameters(VkParserPerFrameDecodeParameters
}
}

std::vector<VkVideoReferenceSlotInfoKHR> allReferenceSlots;

decodeBeginInfo.referenceSlotCount = pPicParams->decodeFrameInfo.referenceSlotCount;
decodeBeginInfo.pReferenceSlots = pPicParams->decodeFrameInfo.pReferenceSlots;

if (pPicParams->decodeFrameInfo.pSetupReferenceSlot != nullptr)
{
for (int i = 0; i < decodeBeginInfo.referenceSlotCount; i++)
allReferenceSlots.push_back(decodeBeginInfo.pReferenceSlots[i]);

VkVideoReferenceSlotInfoKHR setupActivationSlot = {};
setupActivationSlot.sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR;
setupActivationSlot.slotIndex = -1;
setupActivationSlot.pPictureResource = &pPicParams->dpbSetupPictureResource;
allReferenceSlots.push_back(setupActivationSlot);
decodeBeginInfo.referenceSlotCount++;
decodeBeginInfo.pReferenceSlots = allReferenceSlots.data();
}

if (pDecodePictureInfo->flags.unpairedField) {
// assert(pFrameSyncinfo->frameCompleteSemaphore == VkSemaphore());
pDecodePictureInfo->flags.syncFirstReady = true;
Expand Down
4 changes: 4 additions & 0 deletions vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ class VkVideoDecoder : public IVulkanVideoDecoderHandler {
, m_videoSession(nullptr)
, m_videoFrameBuffer(videoFrameBuffer)
, m_decodeFramesData(vkDevCtx)
, m_minBitstreamBufferSizeAlignment(0)
, m_minBitstreamBufferOffsetAlignment(0)
, m_decodePicCount(0)
, m_hwLoadBalancingTimelineSemaphore()
, m_dpbAndOutputCoincide(true)
Expand Down Expand Up @@ -299,6 +301,8 @@ class VkVideoDecoder : public IVulkanVideoDecoderHandler {
VkSharedBaseObj<VulkanVideoSession> m_videoSession;
VkSharedBaseObj<VulkanVideoFrameBuffer> m_videoFrameBuffer;
NvVkDecodeFrameData m_decodeFramesData;
uint32_t m_minBitstreamBufferSizeAlignment;
uint32_t m_minBitstreamBufferOffsetAlignment;

uint64_t m_decodePicCount; // Also used for the HW load balancing timeline semaphore
VkSharedBaseObj<VkParserVideoPictureParameters> m_currentPictureParameters;
Expand Down
11 changes: 11 additions & 0 deletions vk_video_decoder/libs/VkVideoParser/VulkanVideoParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1794,8 +1794,19 @@ bool VulkanVideoParser::DecodePicture(
// TODO: Remove it is for debugging only. Reserved fields must be set to "0".
pout->stdPictureInfo.reserved1 = pCurrFrameDecParams->numGopReferenceSlots;
assert(!pd->ref_pic_flag || (setupReferenceSlot.slotIndex >= 0));

// HACK: The DPB map doesn't take account of the setup slot for some reason, we we can't use the existing logic to setup
// the picture flags and frame number from the dpbEntry. REVIEW. Silences a validation warning.
VkVideoDecodeH264DpbSlotInfoKHR h264SlotInfo = {};
StdVideoDecodeH264ReferenceInfo h264RefInfo = {};

h264SlotInfo.sType =VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR;
h264SlotInfo.pNext = nullptr;
h264SlotInfo.pStdReferenceInfo = &h264RefInfo;

if (setupReferenceSlot.slotIndex >= 0) {
setupReferenceSlot.pPictureResource = &pCurrFrameDecParams->dpbSetupPictureResource;
setupReferenceSlot.pNext = &h264SlotInfo;
pCurrFrameDecParams->decodeFrameInfo.pSetupReferenceSlot = &setupReferenceSlot;
}
if (pCurrFrameDecParams->numGopReferenceSlots) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,28 @@ int32_t NvPerFrameDecodeImageSet::init(const VulkanDeviceContext* vkDevCtx,

m_videoProfile.InitFromProfile(pDecodeProfile);

VkPhysicalDeviceVideoFormatInfoKHR videoFormatInfo = {};
videoFormatInfo.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR;
videoFormatInfo.pNext = m_videoProfile.GetProfileListInfo();
videoFormatInfo.imageUsage = dpbImageUsage;

std::vector<VkVideoFormatPropertiesKHR> formatProperties;
uint32_t numFormats = 0;
vkDevCtx->GetPhysicalDeviceVideoFormatPropertiesKHR(vkDevCtx->getPhysicalDevice(), &videoFormatInfo, &numFormats, nullptr);
formatProperties.resize(numFormats);
for (auto& fp : formatProperties)
fp.sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
vkDevCtx->GetPhysicalDeviceVideoFormatPropertiesKHR(vkDevCtx->getPhysicalDevice(), &videoFormatInfo, &numFormats, formatProperties.data());
bool haveLinearOutput = false;
for (auto& fp : formatProperties)
if (fp.imageTiling == VK_IMAGE_TILING_LINEAR)
haveLinearOutput = true;

m_usesLinearOutput = haveLinearOutput;
m_usesSeparateOutputImage = haveLinearOutput;
useLinearOutput = haveLinearOutput;
useSeparateOutputImage = haveLinearOutput;

m_queueFamilyIndex = queueFamilyIndex;
m_dpbRequiredMemProps = dpbRequiredMemProps;
m_outRequiredMemProps = outRequiredMemProps;
Expand Down