From 420d26c8adae85d5fbc2c3585d39b7c89ecf333a Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Thu, 28 Aug 2025 11:30:10 +1000 Subject: [PATCH 01/25] Vulkan patch for 8bit,16bit storage and 8 Bit arithmetic --- taichi/codegen/spirv/spirv_ir_builder.cpp | 32 ++++- taichi/inc/rhi_constants.inc.h | 2 + taichi/rhi/vulkan/vulkan_device_creator.cpp | 122 +++++++++++++++++--- 3 files changed, 134 insertions(+), 22 deletions(-) diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index 8a9a696cf701f..eef1289d79c5b 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -80,6 +80,24 @@ void IRBuilder::init_header() { .add("SPV_KHR_storage_buffer_storage_class") .commit(&header_); + // === BEGIN: 8/16-bit storage support (emit only when device supports it) === + if (caps_->get(cap::spirv_has_8bit_storage)) { + // ib_.begin(spv::OpExtension).add("SPV_KHR_8bit_storage").commit(&header_); + // Pick only the storage caps your device has; these two are the common ones: + ib_.begin(spv::OpCapability).add(spv::CapabilityStorageBuffer8BitAccess).commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityUniformAndStorageBuffer8BitAccess).commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityStoragePushConstant8).commit(&header_); + } + + if (caps_->get(cap::spirv_has_16bit_storage)) { + ib_.begin(spv::OpExtension).add("SPV_KHR_16bit_storage").commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityStorageBuffer16BitAccess).commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityUniformAndStorageBuffer16BitAccess).commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityStoragePushConstant16).commit(&header_); + ib_.begin(spv::OpCapability).add(spv::CapabilityStorageInputOutput16).commit(&header_); + } + // === END: 8/16-bit storage support === + if (caps_->get(cap::spirv_has_no_integer_wrap_decoration)) { ib_.begin(spv::OpExtension) .add("SPV_KHR_no_integer_wrap_decoration") @@ -153,12 +171,14 @@ void IRBuilder::init_pre_defs() { } t_bool_ = declare_primitive_type(get_data_type()); - if (caps_->get(cap::spirv_has_int8)) { - t_int8_ = declare_primitive_type(get_data_type()); + // 8-bit integers: declare if arithmetic *or* storage is available + if (caps_->get(cap::spirv_has_int8) || caps_->get(cap::spirv_has_8bit_storage)) { + t_int8_ = declare_primitive_type(get_data_type()); t_uint8_ = declare_primitive_type(get_data_type()); } - if (caps_->get(cap::spirv_has_int16)) { - t_int16_ = declare_primitive_type(get_data_type()); + // 16-bit integers: declare if arithmetic *or* storage is available + if (caps_->get(cap::spirv_has_int16) || caps_->get(cap::spirv_has_16bit_storage)) { + t_int16_ = declare_primitive_type(get_data_type()); t_uint16_ = declare_primitive_type(get_data_type()); } t_int32_ = declare_primitive_type(get_data_type()); @@ -313,11 +333,11 @@ SType IRBuilder::get_primitive_type(const DataType &dt) const { TI_ERROR("Type {} not supported.", dt->to_string()); return t_int64_; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { - if (!caps_->get(cap::spirv_has_int8)) + if (!caps_->get(cap::spirv_has_int8) && !caps_->get(cap::spirv_has_8bit_storage)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint8_; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { - if (!caps_->get(cap::spirv_has_int16)) + if (!caps_->get(cap::spirv_has_int16) && !caps_->get(cap::spirv_has_16bit_storage)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint16_; } else if (dt->is_primitive(PrimitiveTypeID::u32)) { diff --git a/taichi/inc/rhi_constants.inc.h b/taichi/inc/rhi_constants.inc.h index 4ba1c4af639b1..bd16f193e432f 100644 --- a/taichi/inc/rhi_constants.inc.h +++ b/taichi/inc/rhi_constants.inc.h @@ -14,6 +14,8 @@ PER_DEVICE_CAPABILITY(spirv_has_int16) PER_DEVICE_CAPABILITY(spirv_has_int64) PER_DEVICE_CAPABILITY(spirv_has_float16) PER_DEVICE_CAPABILITY(spirv_has_float64) +PER_DEVICE_CAPABILITY(spirv_has_8bit_storage) +PER_DEVICE_CAPABILITY(spirv_has_16bit_storage) PER_DEVICE_CAPABILITY(spirv_has_atomic_int64) PER_DEVICE_CAPABILITY(spirv_has_atomic_float16) // load, store, exchange PER_DEVICE_CAPABILITY(spirv_has_atomic_float16_add) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 46676843d487c..0b922e0e393a8 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -672,11 +672,10 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { } } - create_info.pEnabledFeatures = &device_features; + create_info.pEnabledFeatures = nullptr; create_info.enabledExtensionCount = enabled_extensions.size(); create_info.ppEnabledExtensionNames = enabled_extensions.data(); - void **pNextEnd = (void **)&create_info.pNext; // Use physicalDeviceFeatures2 to features enabled by extensions VkPhysicalDeviceVariablePointersFeaturesKHR variable_ptr_feature{}; @@ -707,6 +706,21 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { dynamic_rendering_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR; +// BEGIN PATCH: enabled feature structs we will pass to vkCreateDevice +VkPhysicalDevice8BitStorageFeatures shader_8bit_storage_enable{}; +shader_8bit_storage_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + +VkPhysicalDevice16BitStorageFeatures shader_16bit_storage_enable{}; +shader_16bit_storage_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + +VkPhysicalDeviceFloat16Int8FeaturesKHR shader_f16_i8_enable{}; +shader_f16_i8_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; +// END PATCH + + if (ti_device_->vk_caps().physical_device_features2) { VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -730,8 +744,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { variable_ptr_feature.variablePointersStorageBuffer) { caps.set(DeviceCapability::spirv_has_variable_ptr, true); } - *pNextEnd = &variable_ptr_feature; - pNextEnd = &variable_ptr_feature.pNext; } // Atomic float @@ -750,8 +762,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_atomic_float_feature.shaderBufferFloat64Atomics) { caps.set(DeviceCapability::spirv_has_atomic_float64, true); } - *pNextEnd = &shader_atomic_float_feature; - pNextEnd = &shader_atomic_float_feature.pNext; } // Atomic float 2 @@ -773,8 +783,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_atomic_float_2_feature.shaderBufferFloat64AtomicMinMax) { caps.set(DeviceCapability::spirv_has_atomic_float64_minmax, true); } - *pNextEnd = &shader_atomic_float_2_feature; - pNextEnd = &shader_atomic_float_2_feature.pNext; } // F16 / I8 @@ -785,12 +793,13 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_f16_i8_feature.shaderFloat16) { caps.set(DeviceCapability::spirv_has_float16, true); + shader_f16_i8_enable.shaderFloat16 = VK_TRUE; // enable if supported } if (shader_f16_i8_feature.shaderInt8) { caps.set(DeviceCapability::spirv_has_int8, true); + shader_f16_i8_enable.shaderInt8 = VK_TRUE; // enable if supported } - *pNextEnd = &shader_f16_i8_feature; - pNextEnd = &shader_f16_i8_feature.pNext; + } if (CHECK_VERSION(1, 1) || @@ -798,18 +807,53 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { features2.pNext = &shader_8bit_storage_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); - *pNextEnd = &shader_8bit_storage_feature; - pNextEnd = &shader_8bit_storage_feature.pNext; + // Enable only what the driver supports + shader_8bit_storage_enable.storageBuffer8BitAccess = + shader_8bit_storage_feature.storageBuffer8BitAccess ? VK_TRUE : VK_FALSE; + shader_8bit_storage_enable.uniformAndStorageBuffer8BitAccess = + shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess ? VK_TRUE : VK_FALSE; + shader_8bit_storage_enable.storagePushConstant8 = + shader_8bit_storage_feature.storagePushConstant8 ? VK_TRUE : VK_FALSE; + + // 8-bit storage present? + const bool has_8bit_storage = + (shader_8bit_storage_feature.storageBuffer8BitAccess == VK_TRUE) || + (shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess == VK_TRUE) || + (shader_8bit_storage_feature.storagePushConstant8 == VK_TRUE); + if (has_8bit_storage) { + caps.set(DeviceCapability::spirv_has_8bit_storage, true); + } + } + if (CHECK_VERSION(1, 1) || CHECK_EXTENSION(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) { features2.pNext = &shader_16bit_storage_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); - *pNextEnd = &shader_16bit_storage_feature; - pNextEnd = &shader_16bit_storage_feature.pNext; + shader_16bit_storage_enable.storageBuffer16BitAccess = + shader_16bit_storage_feature.storageBuffer16BitAccess ? VK_TRUE : VK_FALSE; + shader_16bit_storage_enable.uniformAndStorageBuffer16BitAccess = + shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess ? VK_TRUE : VK_FALSE; + shader_16bit_storage_enable.storagePushConstant16 = + shader_16bit_storage_feature.storagePushConstant16 ? VK_TRUE : VK_FALSE; + shader_16bit_storage_enable.storageInputOutput16 = + shader_16bit_storage_feature.storageInputOutput16 ? VK_TRUE : VK_FALSE; + + + const bool has_16bit_storage = + (shader_16bit_storage_feature.storageBuffer16BitAccess == VK_TRUE) || + (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == VK_TRUE) || + (shader_16bit_storage_feature.storagePushConstant16 == VK_TRUE) || + (shader_16bit_storage_feature.storageInputOutput16 == VK_TRUE); + if (has_16bit_storage) { + // Tell Taichi it's OK to use 16-bit *types* (storage), even if shaderInt16 arithmetic is not supported. + caps.set(DeviceCapability::spirv_has_16bit_storage, true); + } + } + // Buffer Device Address if (CHECK_VERSION(1, 2) || CHECK_EXTENSION(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { @@ -827,8 +871,7 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { #endif } } - *pNextEnd = &buffer_device_address_feature; - pNextEnd = &buffer_device_address_feature.pNext; + } // Dynamic rendering @@ -851,6 +894,53 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // TODO: add atomic min/max feature } + // ---- BEGIN: Build VkPhysicalDeviceFeatures2 enable chain ---- + + // Use the Features2 path (not pEnabledFeatures) + create_info.pEnabledFeatures = nullptr; + + // Root of the device features2 ENABLE chain + VkPhysicalDeviceFeatures2 features2_enable{}; + features2_enable.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + + // Carry over any core features you selected in device_features + features2_enable.features = device_features; + + // Helper to append under features2_enable + auto append2 = [&](VkBaseOutStructure *node) { + VkBaseOutStructure *tail = reinterpret_cast(&features2_enable); + while (tail->pNext) tail = tail->pNext; + tail->pNext = node; + }; + + // We only append an enable-struct if at least one bit was turned on in it + if (shader_f16_i8_enable.shaderInt8 || shader_f16_i8_enable.shaderFloat16) { + append2(reinterpret_cast(&shader_f16_i8_enable)); + } + if (shader_8bit_storage_enable.storageBuffer8BitAccess || + shader_8bit_storage_enable.uniformAndStorageBuffer8BitAccess || + shader_8bit_storage_enable.storagePushConstant8) { + append2(reinterpret_cast(&shader_8bit_storage_enable)); + } + if (shader_16bit_storage_enable.storageBuffer16BitAccess || + shader_16bit_storage_enable.uniformAndStorageBuffer16BitAccess || + shader_16bit_storage_enable.storagePushConstant16 || + shader_16bit_storage_enable.storageInputOutput16) { + append2(reinterpret_cast(&shader_16bit_storage_enable)); + } + + // Append features2_enable to the TAIL of whatever is already in create_info.pNext (validation etc.) + VkBaseOutStructure *tail = reinterpret_cast(create_info.pNext); + if (!tail) { + create_info.pNext = &features2_enable; // no validation chain present + } else { + while (tail->pNext) tail = tail->pNext; + tail->pNext = reinterpret_cast(&features2_enable); + } + + // ---- END: Build VkPhysicalDeviceFeatures2 enable chain ---- + + if (params_.enable_validation_layer) { create_info.enabledLayerCount = (uint32_t)kValidationLayers.size(); create_info.ppEnabledLayerNames = kValidationLayers.data(); From c4004c66550899b699fe5d9d716b2be1b8f71258 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 02:01:07 +0000 Subject: [PATCH 02/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/codegen/spirv/spirv_ir_builder.cpp | 47 +++++++---- taichi/rhi/vulkan/vulkan_device_creator.cpp | 89 ++++++++++++--------- 2 files changed, 82 insertions(+), 54 deletions(-) diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index eef1289d79c5b..30e7274357834 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -83,18 +83,33 @@ void IRBuilder::init_header() { // === BEGIN: 8/16-bit storage support (emit only when device supports it) === if (caps_->get(cap::spirv_has_8bit_storage)) { // ib_.begin(spv::OpExtension).add("SPV_KHR_8bit_storage").commit(&header_); - // Pick only the storage caps your device has; these two are the common ones: - ib_.begin(spv::OpCapability).add(spv::CapabilityStorageBuffer8BitAccess).commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityUniformAndStorageBuffer8BitAccess).commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityStoragePushConstant8).commit(&header_); + // Pick only the storage caps your device has; these two are the common + // ones: + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStorageBuffer8BitAccess) + .commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityUniformAndStorageBuffer8BitAccess) + .commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStoragePushConstant8) + .commit(&header_); } if (caps_->get(cap::spirv_has_16bit_storage)) { ib_.begin(spv::OpExtension).add("SPV_KHR_16bit_storage").commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityStorageBuffer16BitAccess).commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityUniformAndStorageBuffer16BitAccess).commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityStoragePushConstant16).commit(&header_); - ib_.begin(spv::OpCapability).add(spv::CapabilityStorageInputOutput16).commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStorageBuffer16BitAccess) + .commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityUniformAndStorageBuffer16BitAccess) + .commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStoragePushConstant16) + .commit(&header_); + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStorageInputOutput16) + .commit(&header_); } // === END: 8/16-bit storage support === @@ -172,13 +187,15 @@ void IRBuilder::init_pre_defs() { t_bool_ = declare_primitive_type(get_data_type()); // 8-bit integers: declare if arithmetic *or* storage is available - if (caps_->get(cap::spirv_has_int8) || caps_->get(cap::spirv_has_8bit_storage)) { - t_int8_ = declare_primitive_type(get_data_type()); + if (caps_->get(cap::spirv_has_int8) || + caps_->get(cap::spirv_has_8bit_storage)) { + t_int8_ = declare_primitive_type(get_data_type()); t_uint8_ = declare_primitive_type(get_data_type()); } // 16-bit integers: declare if arithmetic *or* storage is available - if (caps_->get(cap::spirv_has_int16) || caps_->get(cap::spirv_has_16bit_storage)) { - t_int16_ = declare_primitive_type(get_data_type()); + if (caps_->get(cap::spirv_has_int16) || + caps_->get(cap::spirv_has_16bit_storage)) { + t_int16_ = declare_primitive_type(get_data_type()); t_uint16_ = declare_primitive_type(get_data_type()); } t_int32_ = declare_primitive_type(get_data_type()); @@ -333,11 +350,13 @@ SType IRBuilder::get_primitive_type(const DataType &dt) const { TI_ERROR("Type {} not supported.", dt->to_string()); return t_int64_; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { - if (!caps_->get(cap::spirv_has_int8) && !caps_->get(cap::spirv_has_8bit_storage)) + if (!caps_->get(cap::spirv_has_int8) && + !caps_->get(cap::spirv_has_8bit_storage)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint8_; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { - if (!caps_->get(cap::spirv_has_int16) && !caps_->get(cap::spirv_has_16bit_storage)) + if (!caps_->get(cap::spirv_has_int16) && + !caps_->get(cap::spirv_has_16bit_storage)) TI_ERROR("Type {} not supported.", dt->to_string()); return t_uint16_; } else if (dt->is_primitive(PrimitiveTypeID::u32)) { diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 0b922e0e393a8..4c72ff5aff098 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -676,7 +676,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { create_info.enabledExtensionCount = enabled_extensions.size(); create_info.ppEnabledExtensionNames = enabled_extensions.data(); - // Use physicalDeviceFeatures2 to features enabled by extensions VkPhysicalDeviceVariablePointersFeaturesKHR variable_ptr_feature{}; variable_ptr_feature.sType = @@ -706,20 +705,19 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { dynamic_rendering_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR; -// BEGIN PATCH: enabled feature structs we will pass to vkCreateDevice -VkPhysicalDevice8BitStorageFeatures shader_8bit_storage_enable{}; -shader_8bit_storage_enable.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + // BEGIN PATCH: enabled feature structs we will pass to vkCreateDevice + VkPhysicalDevice8BitStorageFeatures shader_8bit_storage_enable{}; + shader_8bit_storage_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; -VkPhysicalDevice16BitStorageFeatures shader_16bit_storage_enable{}; -shader_16bit_storage_enable.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + VkPhysicalDevice16BitStorageFeatures shader_16bit_storage_enable{}; + shader_16bit_storage_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; -VkPhysicalDeviceFloat16Int8FeaturesKHR shader_f16_i8_enable{}; -shader_f16_i8_enable.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; -// END PATCH - + VkPhysicalDeviceFloat16Int8FeaturesKHR shader_f16_i8_enable{}; + shader_f16_i8_enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + // END PATCH if (ti_device_->vk_caps().physical_device_features2) { VkPhysicalDeviceFeatures2KHR features2{}; @@ -797,9 +795,8 @@ shader_f16_i8_enable.sType = } if (shader_f16_i8_feature.shaderInt8) { caps.set(DeviceCapability::spirv_has_int8, true); - shader_f16_i8_enable.shaderInt8 = VK_TRUE; // enable if supported + shader_f16_i8_enable.shaderInt8 = VK_TRUE; // enable if supported } - } if (CHECK_VERSION(1, 1) || @@ -809,21 +806,24 @@ shader_f16_i8_enable.sType = // Enable only what the driver supports shader_8bit_storage_enable.storageBuffer8BitAccess = - shader_8bit_storage_feature.storageBuffer8BitAccess ? VK_TRUE : VK_FALSE; + shader_8bit_storage_feature.storageBuffer8BitAccess ? VK_TRUE + : VK_FALSE; shader_8bit_storage_enable.uniformAndStorageBuffer8BitAccess = - shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess ? VK_TRUE : VK_FALSE; + shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess + ? VK_TRUE + : VK_FALSE; shader_8bit_storage_enable.storagePushConstant8 = shader_8bit_storage_feature.storagePushConstant8 ? VK_TRUE : VK_FALSE; - // 8-bit storage present? + // 8-bit storage present? const bool has_8bit_storage = - (shader_8bit_storage_feature.storageBuffer8BitAccess == VK_TRUE) || - (shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess == VK_TRUE) || - (shader_8bit_storage_feature.storagePushConstant8 == VK_TRUE); + (shader_8bit_storage_feature.storageBuffer8BitAccess == VK_TRUE) || + (shader_8bit_storage_feature.uniformAndStorageBuffer8BitAccess == + VK_TRUE) || + (shader_8bit_storage_feature.storagePushConstant8 == VK_TRUE); if (has_8bit_storage) { caps.set(DeviceCapability::spirv_has_8bit_storage, true); } - } if (CHECK_VERSION(1, 1) || @@ -832,28 +832,32 @@ shader_f16_i8_enable.sType = vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); shader_16bit_storage_enable.storageBuffer16BitAccess = - shader_16bit_storage_feature.storageBuffer16BitAccess ? VK_TRUE : VK_FALSE; + shader_16bit_storage_feature.storageBuffer16BitAccess ? VK_TRUE + : VK_FALSE; shader_16bit_storage_enable.uniformAndStorageBuffer16BitAccess = - shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess ? VK_TRUE : VK_FALSE; + shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess + ? VK_TRUE + : VK_FALSE; shader_16bit_storage_enable.storagePushConstant16 = - shader_16bit_storage_feature.storagePushConstant16 ? VK_TRUE : VK_FALSE; + shader_16bit_storage_feature.storagePushConstant16 ? VK_TRUE + : VK_FALSE; shader_16bit_storage_enable.storageInputOutput16 = - shader_16bit_storage_feature.storageInputOutput16 ? VK_TRUE : VK_FALSE; - + shader_16bit_storage_feature.storageInputOutput16 ? VK_TRUE + : VK_FALSE; const bool has_16bit_storage = (shader_16bit_storage_feature.storageBuffer16BitAccess == VK_TRUE) || - (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == VK_TRUE) || + (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == + VK_TRUE) || (shader_16bit_storage_feature.storagePushConstant16 == VK_TRUE) || (shader_16bit_storage_feature.storageInputOutput16 == VK_TRUE); if (has_16bit_storage) { - // Tell Taichi it's OK to use 16-bit *types* (storage), even if shaderInt16 arithmetic is not supported. + // Tell Taichi it's OK to use 16-bit *types* (storage), even if + // shaderInt16 arithmetic is not supported. caps.set(DeviceCapability::spirv_has_16bit_storage, true); } - } - // Buffer Device Address if (CHECK_VERSION(1, 2) || CHECK_EXTENSION(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { @@ -871,7 +875,6 @@ shader_f16_i8_enable.sType = #endif } } - } // Dynamic rendering @@ -908,8 +911,10 @@ shader_f16_i8_enable.sType = // Helper to append under features2_enable auto append2 = [&](VkBaseOutStructure *node) { - VkBaseOutStructure *tail = reinterpret_cast(&features2_enable); - while (tail->pNext) tail = tail->pNext; + VkBaseOutStructure *tail = + reinterpret_cast(&features2_enable); + while (tail->pNext) + tail = tail->pNext; tail->pNext = node; }; @@ -920,27 +925,31 @@ shader_f16_i8_enable.sType = if (shader_8bit_storage_enable.storageBuffer8BitAccess || shader_8bit_storage_enable.uniformAndStorageBuffer8BitAccess || shader_8bit_storage_enable.storagePushConstant8) { - append2(reinterpret_cast(&shader_8bit_storage_enable)); + append2( + reinterpret_cast(&shader_8bit_storage_enable)); } if (shader_16bit_storage_enable.storageBuffer16BitAccess || shader_16bit_storage_enable.uniformAndStorageBuffer16BitAccess || shader_16bit_storage_enable.storagePushConstant16 || shader_16bit_storage_enable.storageInputOutput16) { - append2(reinterpret_cast(&shader_16bit_storage_enable)); + append2( + reinterpret_cast(&shader_16bit_storage_enable)); } - // Append features2_enable to the TAIL of whatever is already in create_info.pNext (validation etc.) - VkBaseOutStructure *tail = reinterpret_cast(create_info.pNext); + // Append features2_enable to the TAIL of whatever is already in + // create_info.pNext (validation etc.) + VkBaseOutStructure *tail = + reinterpret_cast(create_info.pNext); if (!tail) { create_info.pNext = &features2_enable; // no validation chain present } else { - while (tail->pNext) tail = tail->pNext; + while (tail->pNext) + tail = tail->pNext; tail->pNext = reinterpret_cast(&features2_enable); } // ---- END: Build VkPhysicalDeviceFeatures2 enable chain ---- - if (params_.enable_validation_layer) { create_info.enabledLayerCount = (uint32_t)kValidationLayers.size(); create_info.ppEnabledLayerNames = kValidationLayers.data(); From 3471ca51a6c3e0209dd72957a31cbe25d6c1f0ad Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Thu, 28 Aug 2025 15:59:37 +1000 Subject: [PATCH 03/25] Better checks if vulkan SDK already exists --- .github/workflows/scripts/ti_build/vulkan.py | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/scripts/ti_build/vulkan.py b/.github/workflows/scripts/ti_build/vulkan.py index e6b532fd377b5..f861bd150061e 100644 --- a/.github/workflows/scripts/ti_build/vulkan.py +++ b/.github/workflows/scripts/ti_build/vulkan.py @@ -15,6 +15,30 @@ @banner("Setup Vulkan 1.3.296.0") def setup_vulkan(): u = platform.uname() + + # Check if Vulkan SDK is already available + if u.system == "Windows": + # Check common Vulkan SDK installation paths + possible_paths = [ + "C:\\VulkanSDK", + "C:\\Program Files\\VulkanSDK", + "C:\\Program Files (x86)\\VulkanSDK", + ] + + for path in possible_paths: + if os.path.exists(path): + # Find the latest version + versions = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] + if versions: + latest_version = sorted(versions)[-1] + vulkan_sdk_path = os.path.join(path, latest_version) + os.environ["VULKAN_SDK"] = vulkan_sdk_path + os.environ["VK_SDK_PATH"] = vulkan_sdk_path + os.environ["VK_LAYER_PATH"] = os.path.join(vulkan_sdk_path, "Bin") + path_prepend("PATH", os.path.join(vulkan_sdk_path, "Bin")) + print(f"Using existing Vulkan SDK at: {vulkan_sdk_path}") + return + if u.system == "Linux": url = "https://sdk.lunarg.com/sdk/download/1.3.296.0/linux/vulkansdk-linux-x86_64-1.3.296.0.tar.xz" prefix = get_cache_home() / "vulkan-1.3.296.0" From 954879cfafef000e3d473aa2d89befc9d80c3bc4 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Fri, 29 Aug 2025 17:48:17 +1000 Subject: [PATCH 04/25] style(black): reformat ti_build/vulkan.py --- .github/workflows/scripts/ti_build/vulkan.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/scripts/ti_build/vulkan.py b/.github/workflows/scripts/ti_build/vulkan.py index f861bd150061e..5454210c7fed7 100644 --- a/.github/workflows/scripts/ti_build/vulkan.py +++ b/.github/workflows/scripts/ti_build/vulkan.py @@ -15,7 +15,7 @@ @banner("Setup Vulkan 1.3.296.0") def setup_vulkan(): u = platform.uname() - + # Check if Vulkan SDK is already available if u.system == "Windows": # Check common Vulkan SDK installation paths @@ -24,11 +24,13 @@ def setup_vulkan(): "C:\\Program Files\\VulkanSDK", "C:\\Program Files (x86)\\VulkanSDK", ] - + for path in possible_paths: if os.path.exists(path): # Find the latest version - versions = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] + versions = [ + d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) + ] if versions: latest_version = sorted(versions)[-1] vulkan_sdk_path = os.path.join(path, latest_version) @@ -38,7 +40,7 @@ def setup_vulkan(): path_prepend("PATH", os.path.join(vulkan_sdk_path, "Bin")) print(f"Using existing Vulkan SDK at: {vulkan_sdk_path}") return - + if u.system == "Linux": url = "https://sdk.lunarg.com/sdk/download/1.3.296.0/linux/vulkansdk-linux-x86_64-1.3.296.0.tar.xz" prefix = get_cache_home() / "vulkan-1.3.296.0" From be8c7b3ab28bda4c934e728124bf9759b6221215 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Fri, 29 Aug 2025 18:04:04 +1000 Subject: [PATCH 05/25] style(pre-commit): match pinned Black formatting --- .github/workflows/scripts/ti_build/vulkan.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/scripts/ti_build/vulkan.py b/.github/workflows/scripts/ti_build/vulkan.py index 5454210c7fed7..a3f978c995954 100644 --- a/.github/workflows/scripts/ti_build/vulkan.py +++ b/.github/workflows/scripts/ti_build/vulkan.py @@ -28,9 +28,7 @@ def setup_vulkan(): for path in possible_paths: if os.path.exists(path): # Find the latest version - versions = [ - d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) - ] + versions = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] if versions: latest_version = sorted(versions)[-1] vulkan_sdk_path = os.path.join(path, latest_version) From b7ee168e685dc73f1ade15b10b5a6e287e288630 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Sun, 31 Aug 2025 15:38:16 +1000 Subject: [PATCH 06/25] fix: resolve Vulkan build and compatibility issues for AMD GPUs - Fix const-correctness issue in vulkan_device_creator.cpp by adding const_cast for create_info.pNext pointer casting - Reorder SPIRV-Tools include directory in CMakeLists.txt to resolve build configuration issues - Add CHANGELOG.md to document recent Vulkan patch work and improvements These changes resolve compilation errors and improve Vulkan backend compatibility, particularly for AMD GPU support and 8-bit/16-bit operations. Part of Vulkan-patch-for-AMD branch development. --- cmake/TaichiCore.cmake | 3 ++- python/taichi/CHANGELOG.md | 8 ++++++++ taichi/rhi/vulkan/vulkan_device_creator.cpp | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 python/taichi/CHANGELOG.md diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index b716b071d48fc..28f4e8ae04bbe 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -138,11 +138,12 @@ add_library(${CORE_LIBRARY_NAME} OBJECT ${TAICHI_CORE_SOURCE}) target_include_directories(${CORE_LIBRARY_NAME} PRIVATE ${CMAKE_SOURCE_DIR}) target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/include) -target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Tools/include) target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/PicoSHA2) target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/eigen) target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/FP16/include) +target_include_directories(${CORE_LIBRARY_NAME} PRIVATE external/SPIRV-Tools/include) + target_link_libraries(${CORE_LIBRARY_NAME} PUBLIC ti_device_api) if(TI_WITH_LLVM) diff --git a/python/taichi/CHANGELOG.md b/python/taichi/CHANGELOG.md new file mode 100644 index 0000000000000..4945d4e304b48 --- /dev/null +++ b/python/taichi/CHANGELOG.md @@ -0,0 +1,8 @@ +Highlights: + +Full changelog: + - style(pre-commit): match pinned Black formatting (by **Yunus Berndt**) + - style(black): reformat ti_build/vulkan.py (by **Yunus Berndt**) + - Better checks if vulkan SDK already exists (by **Yunus Berndt**) + - [pre-commit.ci] auto fixes from pre-commit.com hooks (by **pre-commit-ci[bot]**) + - Vulkan patch for 8bit,16bit storage and 8 Bit arithmetic (by **Yunus Berndt**) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 4c72ff5aff098..e1771e099a624 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -939,7 +939,7 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // Append features2_enable to the TAIL of whatever is already in // create_info.pNext (validation etc.) VkBaseOutStructure *tail = - reinterpret_cast(create_info.pNext); + reinterpret_cast(const_cast(create_info.pNext)); if (!tail) { create_info.pNext = &features2_enable; // no validation chain present } else { From 6de05c025ff325db8b996e04dc9c25a3b9c39ddc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 05:39:23 +0000 Subject: [PATCH 07/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index e1771e099a624..09394b5e648b3 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -938,8 +938,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // Append features2_enable to the TAIL of whatever is already in // create_info.pNext (validation etc.) - VkBaseOutStructure *tail = - reinterpret_cast(const_cast(create_info.pNext)); + VkBaseOutStructure *tail = reinterpret_cast( + const_cast(create_info.pNext)); if (!tail) { create_info.pNext = &features2_enable; // no validation chain present } else { From c0f15dbe87670dca5a10476d43c6b4a169f44e20 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 13:28:50 +1000 Subject: [PATCH 08/25] Enhance Vulkan support for AMD GPUs by refining 16-bit storage capabilities - Updated Vulkan device creation logic to set granular 16-bit storage capabilities based on supported features. - Improved SPIRV IR builder to conditionally enable specific 16-bit capabilities for better compatibility. - Added logging during Python module initialization to aid in debugging. - Updated .gitignore to exclude new build artifacts. - BUILD_CONFIGURATION.md contains info on how to build wheels using the correct linker, and CMake flags in entry.py. More recent linkers and no flags seem fail the build. These changes address compatibility issues and enhance the Vulkan backend for AMD GPU users. --- .github/workflows/scripts/ti_build/entry.py | 14 ++++++ .gitignore | 3 ++ BUILD_CONFIGURATION.md | 49 +++++++++++++++++++ python/taichi/CHANGELOG.md | 1 + taichi/codegen/spirv/spirv_ir_builder.cpp | 35 ++++++++----- taichi/inc/rhi_constants.inc.h | 5 ++ taichi/python/export.cpp | 54 +++++++++++++++++---- taichi/rhi/vulkan/vulkan_device_creator.cpp | 14 ++++++ 8 files changed, 154 insertions(+), 21 deletions(-) create mode 100644 BUILD_CONFIGURATION.md diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index 5c7613e65822b..5613f7a008c31 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -37,6 +37,20 @@ def build_wheel(python: Command, pip: Command) -> None: proj_tags = [] extra = [] + # Explicitly set flags to match official Windows release build EXACTLY + # This matches .github/workflows/release.yml lines 238-244 + cmake_args["TI_WITH_OPENGL"] = True + cmake_args["TI_WITH_VULKAN"] = True + cmake_args["TI_WITH_DX11"] = True + cmake_args["TI_WITH_DX12"] = True + cmake_args["TI_BUILD_TESTS"] = True + cmake_args["TI_WITH_C_API"] = True + + # NOTE: Official release does NOT include: + # - TI_WITH_GGUI=ON (this adds IMM32.dll dependency) + # - TI_WITH_LTO=ON (this affects linker behavior) + # - TI_GENERATE_PDB=ON (this affects DLL structure) + cmake_args.writeback() if misc.options.tag_local: wheel_tag = f"+{misc.options.tag_local}" diff --git a/.gitignore b/.gitignore index d2a3084363882..bd3a77c011d4b 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,6 @@ imgui.ini .do-not-clean *.dylib *.ply +/taichi_env/ +/wheel_contents/ +/dist_official/ \ No newline at end of file diff --git a/BUILD_CONFIGURATION.md b/BUILD_CONFIGURATION.md new file mode 100644 index 0000000000000..0d64d5c8a309c --- /dev/null +++ b/BUILD_CONFIGURATION.md @@ -0,0 +1,49 @@ +# Taichi Build Configuration + +## **Environment Setup:** +```powershell +# Set LLVM_DIR to official Taichi LLVM (CRITICAL) +$env:LLVM_DIR = "$env:LOCALAPPDATA\ti-build-cache\llvm15" + +# Set CMake arguments to match official Windows release build +$env:TAICHI_CMAKE_ARGS = "-DTI_WITH_OPENGL=ON -DTI_WITH_VULKAN=ON -DTI_WITH_DX11=ON -DTI_WITH_DX12=ON -DTI_BUILD_TESTS=ON -DTI_WITH_C_API=ON" +``` + +## **Build Command:** +```powershell +# Use older linker version (14.34) to match official Taichi +cmd /c 'call "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" amd64 -vcvars_ver=14.34 && python build.py wheel --tag-local v174.granular16bit --python native --permissive' +``` + +## **Required Visual Studio Components:** +- **Desktop development with C++** workload +- **MSVC v143 - VS 2022 C++ x64/x86 build tools (Latest) - 14.34** (non-Spectre-mitigated) +- **C++ Clang Compiler for Windows** +- **C++ 2022 Redistributable MSM** +- **MS Build support for LLVM (clang-cl) toolset** + +## **Critical CMake Flags (matching official release):** +- **`TI_WITH_OPENGL=ON`** - OpenGL backend +- **`TI_WITH_VULKAN=ON`** - Vulkan backend +- **`TI_WITH_DX11=ON`** - DirectX 11 backend +- **`TI_WITH_DX12=ON`** - DirectX 12 backend +- **`TI_BUILD_TESTS=ON`** - Build test suite +- **`TI_WITH_C_API=ON`** - C API support + +## **Excluded Flags (not in official release):** +- **`TI_WITH_GGUI=ON`** - Adds IMM32.dll dependency (not in official) +- **`TI_WITH_LTO=ON`** - Link Time Code Generation (not in official) +- **`TI_GENERATE_PDB=ON`** - Debug symbols (not in official) + +## **Build System Configuration:** +- **Visual Studio 2022 Build Tools** with MSVC 14.34.31948.0 +- **Python 3.12.4** with native build +- **Official Taichi LLVM** from `%LOCALAPPDATA%\ti-build-cache\llvm15` +- **MSBuild** as the build system + +## **Key Dependencies:** +- **`d3d11.dll`** - DirectX 11 runtime +- **`D3DCOMPILER_47.dll`** - DirectX shader compiler +- **`msvcp140.dll`** - MSVC C++ runtime +- **`vcruntime140.dll`** - MSVC runtime +- **`vcruntime140_1.dll`** - MSVC runtime extension \ No newline at end of file diff --git a/python/taichi/CHANGELOG.md b/python/taichi/CHANGELOG.md index 4945d4e304b48..d62073e6631c1 100644 --- a/python/taichi/CHANGELOG.md +++ b/python/taichi/CHANGELOG.md @@ -1,6 +1,7 @@ Highlights: Full changelog: + - fix: resolve Vulkan build and compatibility issues for AMD GPUs (by **Yunus Berndt**) - style(pre-commit): match pinned Black formatting (by **Yunus Berndt**) - style(black): reformat ti_build/vulkan.py (by **Yunus Berndt**) - Better checks if vulkan SDK already exists (by **Yunus Berndt**) diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index 30e7274357834..170e551273e04 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -98,18 +98,29 @@ void IRBuilder::init_header() { if (caps_->get(cap::spirv_has_16bit_storage)) { ib_.begin(spv::OpExtension).add("SPV_KHR_16bit_storage").commit(&header_); - ib_.begin(spv::OpCapability) - .add(spv::CapabilityStorageBuffer16BitAccess) - .commit(&header_); - ib_.begin(spv::OpCapability) - .add(spv::CapabilityUniformAndStorageBuffer16BitAccess) - .commit(&header_); - ib_.begin(spv::OpCapability) - .add(spv::CapabilityStoragePushConstant16) - .commit(&header_); - ib_.begin(spv::OpCapability) - .add(spv::CapabilityStorageInputOutput16) - .commit(&header_); + + // Only enable specific 16-bit storage capabilities that are supported + // This fixes AMD GPU compatibility issues where not all 16-bit features are supported + if (caps_->get(cap::spirv_has_storage_buffer_16bit_access)) { + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStorageBuffer16BitAccess) + .commit(&header_); + } + if (caps_->get(cap::spirv_has_uniform_and_storage_buffer_16bit_access)) { + ib_.begin(spv::OpCapability) + .add(spv::CapabilityUniformAndStorageBuffer16BitAccess) + .commit(&header_); + } + if (caps_->get(cap::spirv_has_storage_push_constant_16)) { + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStoragePushConstant16) + .commit(&header_); + } + if (caps_->get(cap::spirv_has_storage_input_output_16)) { + ib_.begin(spv::OpCapability) + .add(spv::CapabilityStorageInputOutput16) + .commit(&header_); + } } // === END: 8/16-bit storage support === diff --git a/taichi/inc/rhi_constants.inc.h b/taichi/inc/rhi_constants.inc.h index bd16f193e432f..f6bcff14052eb 100644 --- a/taichi/inc/rhi_constants.inc.h +++ b/taichi/inc/rhi_constants.inc.h @@ -16,6 +16,11 @@ PER_DEVICE_CAPABILITY(spirv_has_float16) PER_DEVICE_CAPABILITY(spirv_has_float64) PER_DEVICE_CAPABILITY(spirv_has_8bit_storage) PER_DEVICE_CAPABILITY(spirv_has_16bit_storage) +// Granular 16-bit storage capabilities for AMD GPU compatibility +PER_DEVICE_CAPABILITY(spirv_has_storage_buffer_16bit_access) +PER_DEVICE_CAPABILITY(spirv_has_uniform_and_storage_buffer_16bit_access) +PER_DEVICE_CAPABILITY(spirv_has_storage_push_constant_16) +PER_DEVICE_CAPABILITY(spirv_has_storage_input_output_16) PER_DEVICE_CAPABILITY(spirv_has_atomic_int64) PER_DEVICE_CAPABILITY(spirv_has_atomic_float16) // load, store, exchange PER_DEVICE_CAPABILITY(spirv_has_atomic_float16_add) diff --git a/taichi/python/export.cpp b/taichi/python/export.cpp index 2cf3183a1f08d..f4dab06b53ab5 100644 --- a/taichi/python/export.cpp +++ b/taichi/python/export.cpp @@ -6,21 +6,57 @@ #include "taichi/python/export.h" #include "taichi/common/interface.h" #include "taichi/util/io.h" +#include +#include namespace taichi { +static void log_boot(const char *msg) { + const char *temp = std::getenv("TEMP"); + std::string dir = temp ? std::string(temp) : std::string("."); + std::string path = join_path(dir, "taichi_pyd_boot.log"); + if (FILE *f = std::fopen(path.c_str(), "a")) { + std::fprintf(f, "%s\n", msg); + std::fclose(f); + } +} + PYBIND11_MODULE(taichi_python, m) { - m.doc() = "taichi_python"; + try { + log_boot("PYD: enter taichi_python module init"); + m.doc() = "taichi_python"; - for (auto &kv : InterfaceHolder::get_instance()->methods) { - kv.second(&m); - } + log_boot("PYD: before InterfaceHolder methods"); + for (auto &kv : InterfaceHolder::get_instance()->methods) { + kv.second(&m); + } + log_boot("PYD: after InterfaceHolder methods"); + + log_boot("PYD: before export_lang"); + export_lang(m); + log_boot("PYD: after export_lang"); - export_lang(m); - export_math(m); - export_misc(m); - export_visual(m); - export_ggui(m); + export_math(m); + log_boot("PYD: after export_math"); + + export_misc(m); + log_boot("PYD: after export_misc"); + + export_visual(m); + log_boot("PYD: after export_visual"); + + export_ggui(m); + log_boot("PYD: after export_ggui"); + + log_boot("PYD: module init completed"); + } catch (const std::exception &e) { + log_boot("PYD: exception during module init"); + log_boot(e.what()); + throw; + } catch (...) { + log_boot("PYD: unknown exception during module init"); + throw; + } } } // namespace taichi diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index e1771e099a624..e90bad1ddead9 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -845,6 +845,20 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { shader_16bit_storage_feature.storageInputOutput16 ? VK_TRUE : VK_FALSE; + // Set granular 16-bit storage capabilities for AMD GPU compatibility + if (shader_16bit_storage_feature.storageBuffer16BitAccess == VK_TRUE) { + caps.set(DeviceCapability::spirv_has_storage_buffer_16bit_access, true); + } + if (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == VK_TRUE) { + caps.set(DeviceCapability::spirv_has_uniform_and_storage_buffer_16bit_access, true); + } + if (shader_16bit_storage_feature.storagePushConstant16 == VK_TRUE) { + caps.set(DeviceCapability::spirv_has_storage_push_constant_16, true); + } + if (shader_16bit_storage_feature.storageInputOutput16 == VK_TRUE) { + caps.set(DeviceCapability::spirv_has_storage_input_output_16, true); + } + const bool has_16bit_storage = (shader_16bit_storage_feature.storageBuffer16BitAccess == VK_TRUE) || (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == From d313084a4e0c4e821ed959aa2c25c476e6f99bd0 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 13:31:05 +1000 Subject: [PATCH 09/25] reverted gitignore to upstream standard --- .gitignore | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index bd3a77c011d4b..c7070222ce201 100644 --- a/.gitignore +++ b/.gitignore @@ -89,7 +89,4 @@ imgui.ini .cache .do-not-clean *.dylib -*.ply -/taichi_env/ -/wheel_contents/ -/dist_official/ \ No newline at end of file +*.ply \ No newline at end of file From 19f2341c8f8879c8a94862a95a9f1c546123dc50 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 13:37:57 +1000 Subject: [PATCH 10/25] Refine Vulkan device creation and update build configuration - Improved formatting in `vulkan_device_creator.cpp` for better readability. - Cleaned up whitespace in `entry.py` to adhere to style guidelines. - Updated `BUILD_CONFIGURATION.md` to ensure consistent documentation of CMake flags. These changes enhance code clarity and maintainability while ensuring accurate build instructions. --- .github/workflows/scripts/ti_build/entry.py | 6 ++++-- BUILD_CONFIGURATION.md | 4 ++-- taichi/rhi/vulkan/vulkan_device_creator.cpp | 7 +++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index 5613f7a008c31..b48b3de6e2db5 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -45,7 +45,7 @@ def build_wheel(python: Command, pip: Command) -> None: cmake_args["TI_WITH_DX12"] = True cmake_args["TI_BUILD_TESTS"] = True cmake_args["TI_WITH_C_API"] = True - + # NOTE: Official release does NOT include: # - TI_WITH_GGUI=ON (this adds IMM32.dll dependency) # - TI_WITH_LTO=ON (this affects linker behavior) @@ -73,7 +73,9 @@ def build_wheel(python: Command, pip: Command) -> None: extra.extend(["-p", "manylinux_2_27_x86_64"]) python("setup.py", "clean") - python("misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save") + python( + "misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save" + ) with nice(): python("setup.py", *proj_tags, "bdist_wheel", *extra) diff --git a/BUILD_CONFIGURATION.md b/BUILD_CONFIGURATION.md index 0d64d5c8a309c..2b9546bb321c3 100644 --- a/BUILD_CONFIGURATION.md +++ b/BUILD_CONFIGURATION.md @@ -24,7 +24,7 @@ cmd /c 'call "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools\VC\Aux ## **Critical CMake Flags (matching official release):** - **`TI_WITH_OPENGL=ON`** - OpenGL backend -- **`TI_WITH_VULKAN=ON`** - Vulkan backend +- **`TI_WITH_VULKAN=ON`** - Vulkan backend - **`TI_WITH_DX11=ON`** - DirectX 11 backend - **`TI_WITH_DX12=ON`** - DirectX 12 backend - **`TI_BUILD_TESTS=ON`** - Build test suite @@ -46,4 +46,4 @@ cmd /c 'call "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools\VC\Aux - **`D3DCOMPILER_47.dll`** - DirectX shader compiler - **`msvcp140.dll`** - MSVC C++ runtime - **`vcruntime140.dll`** - MSVC runtime -- **`vcruntime140_1.dll`** - MSVC runtime extension \ No newline at end of file +- **`vcruntime140_1.dll`** - MSVC runtime extension diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 05e5baa8422b9..b0f9a584cd4ea 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -849,8 +849,11 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_16bit_storage_feature.storageBuffer16BitAccess == VK_TRUE) { caps.set(DeviceCapability::spirv_has_storage_buffer_16bit_access, true); } - if (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == VK_TRUE) { - caps.set(DeviceCapability::spirv_has_uniform_and_storage_buffer_16bit_access, true); + if (shader_16bit_storage_feature.uniformAndStorageBuffer16BitAccess == + VK_TRUE) { + caps.set( + DeviceCapability::spirv_has_uniform_and_storage_buffer_16bit_access, + true); } if (shader_16bit_storage_feature.storagePushConstant16 == VK_TRUE) { caps.set(DeviceCapability::spirv_has_storage_push_constant_16, true); From 5f166000d7859636015ffe2955340dfc6c589b44 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 13:41:05 +1000 Subject: [PATCH 11/25] Fix formatting issues: black, clang-format, trailing whitespace, and end-of-file fixes --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c7070222ce201..d2a3084363882 100644 --- a/.gitignore +++ b/.gitignore @@ -89,4 +89,4 @@ imgui.ini .cache .do-not-clean *.dylib -*.ply \ No newline at end of file +*.ply From 89ec3798665be569f879e1dc1443812e9da2f0d5 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 13:52:29 +1000 Subject: [PATCH 12/25] Fix pre-commit formatting issues: Black 25.1.0 and clang-format --- .github/workflows/scripts/ti_build/entry.py | 4 +--- taichi/codegen/spirv/spirv_ir_builder.cpp | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index b48b3de6e2db5..1339338c13255 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -73,9 +73,7 @@ def build_wheel(python: Command, pip: Command) -> None: extra.extend(["-p", "manylinux_2_27_x86_64"]) python("setup.py", "clean") - python( - "misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save" - ) + python("misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save") with nice(): python("setup.py", *proj_tags, "bdist_wheel", *extra) diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index 170e551273e04..6a7ce05eff5fb 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -98,9 +98,10 @@ void IRBuilder::init_header() { if (caps_->get(cap::spirv_has_16bit_storage)) { ib_.begin(spv::OpExtension).add("SPV_KHR_16bit_storage").commit(&header_); - + // Only enable specific 16-bit storage capabilities that are supported - // This fixes AMD GPU compatibility issues where not all 16-bit features are supported + // This fixes AMD GPU compatibility issues where not all 16-bit features are + // supported if (caps_->get(cap::spirv_has_storage_buffer_16bit_access)) { ib_.begin(spv::OpCapability) .add(spv::CapabilityStorageBuffer16BitAccess) From c2c8e3040d37afd883cffe27172803c49afa8b6b Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 14:51:16 +1000 Subject: [PATCH 13/25] Fix Vulkan 1.1 version check for AMD 8-bit/16-bit support - Changed CHECK_VERSION(1, 2) to CHECK_VERSION(1, 1) for VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME - This allows AMD GPUs with Vulkan 1.1.73 to properly detect 8-bit/16-bit arithmetic capabilities - Fixes 'Type u8 not supported' error on AMD Radeon R7 450 --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index b0f9a584cd4ea..1061ba57068ce 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -784,7 +784,7 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { } // F16 / I8 - if (CHECK_VERSION(1, 2) || + if (CHECK_VERSION(1, 1) || CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); From f2a0879eea27f7b47694c51d159d2ea618af04d2 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Mon, 8 Sep 2025 15:19:02 +1000 Subject: [PATCH 14/25] Add debug output for AMD GPU capability detection --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 1061ba57068ce..ad56d77333404 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -786,6 +786,7 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // F16 / I8 if (CHECK_VERSION(1, 1) || CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { + TI_DEBUG("AMD GPU: Checking VK_KHR_SHADER_FLOAT16_INT8 extension"); features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); @@ -796,6 +797,9 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_f16_i8_feature.shaderInt8) { caps.set(DeviceCapability::spirv_has_int8, true); shader_f16_i8_enable.shaderInt8 = VK_TRUE; // enable if supported + TI_DEBUG("AMD GPU: shaderInt8 feature detected and enabled"); + } else { + TI_DEBUG("AMD GPU: shaderInt8 feature NOT detected"); } } From b4eeb77db772cc77efdf6b580e4eaae5696a10ad Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 09:27:55 +1000 Subject: [PATCH 15/25] Add detailed debug output for feature detection values --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index ad56d77333404..0d022382c593c 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -789,6 +789,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { TI_DEBUG("AMD GPU: Checking VK_KHR_SHADER_FLOAT16_INT8 extension"); features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); + TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderInt8 = %d", shader_f16_i8_feature.shaderInt8); + TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderFloat16 = %d", shader_f16_i8_feature.shaderFloat16); if (shader_f16_i8_feature.shaderFloat16) { caps.set(DeviceCapability::spirv_has_float16, true); From 48dd4f5d8977197b6cf92ec1d81188502a667168 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 10:14:19 +1000 Subject: [PATCH 16/25] Fix debug output formatting for shaderInt8/shaderFloat16 values --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 0d022382c593c..c22c9b94af5bc 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -789,8 +789,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { TI_DEBUG("AMD GPU: Checking VK_KHR_SHADER_FLOAT16_INT8 extension"); features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); - TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderInt8 = %d", shader_f16_i8_feature.shaderInt8); - TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderFloat16 = %d", shader_f16_i8_feature.shaderFloat16); + TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderInt8 = " + std::to_string(shader_f16_i8_feature.shaderInt8)); + TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderFloat16 = " + std::to_string(shader_f16_i8_feature.shaderFloat16)); if (shader_f16_i8_feature.shaderFloat16) { caps.set(DeviceCapability::spirv_has_float16, true); From 07864964f59f1286a4f8fad0d620afe6e8cce3fd Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 10:34:31 +1000 Subject: [PATCH 17/25] Add debug output to check if VK_KHR_SHADER_FLOAT16_INT8 extension is actually enabled --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index c22c9b94af5bc..57e5aa3ea45b3 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -787,6 +787,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (CHECK_VERSION(1, 1) || CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { TI_DEBUG("AMD GPU: Checking VK_KHR_SHADER_FLOAT16_INT8 extension"); + TI_DEBUG("AMD GPU: Extension enabled: " + std::to_string(CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))); + TI_DEBUG("AMD GPU: Vulkan version check: " + std::to_string(CHECK_VERSION(1, 1))); features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderInt8 = " + std::to_string(shader_f16_i8_feature.shaderInt8)); From 276790c48a27ee96a7ab0b1f6afed8adf4088779 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 10:53:31 +1000 Subject: [PATCH 18/25] Add debug output to check if VK_KHR_SHADER_FLOAT16_INT8 extension is detected in device extensions --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 57e5aa3ea45b3..9f4f4608d1763 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -555,6 +555,11 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { RHI_LOG_DEBUG(msg_buf); std::string name = std::string(ext.extensionName); + + // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is detected + if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { + TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 extension in device extensions!"); + } if (name == "VK_KHR_portability_subset") { RHI_LOG_ERROR( From c77a3fe550c1c6e78778e118735d32b8c40c4e23 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 11:20:44 +1000 Subject: [PATCH 19/25] Add comprehensive debug output for AMD GPU extension detection and device selection --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 9f4f4608d1763..c8f1472f834e4 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -437,12 +437,20 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), "Found Vulkan Device %d (%s)", i, properties.deviceName); RHI_LOG_DEBUG(msg_buf); + + // Debug: Show device properties for AMD GPU debugging + TI_DEBUG("AMD GPU: Device " + std::to_string(i) + " - " + std::string(properties.deviceName)); + TI_DEBUG("AMD GPU: Device " + std::to_string(i) + " - API Version: " + + std::to_string(VK_VERSION_MAJOR(properties.apiVersion)) + "." + + std::to_string(VK_VERSION_MINOR(properties.apiVersion)) + "." + + std::to_string(VK_VERSION_PATCH(properties.apiVersion))); } auto device_id = VulkanLoader::instance().visible_device_id; bool has_visible_device{false}; if (!device_id.empty()) { int id = std::stoi(device_id); + TI_DEBUG("AMD GPU: TI_VISIBLE_DEVICE set to: " + device_id); if (id < 0 || id >= device_count) { char msg_buf[128]; snprintf(msg_buf, sizeof(msg_buf), @@ -452,6 +460,7 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { } else if (get_device_score(devices[id], test_surface)) { physical_device_ = devices[id]; has_visible_device = true; + TI_DEBUG("AMD GPU: Selected device " + std::to_string(id) + " via TI_VISIBLE_DEVICE"); } } @@ -469,6 +478,15 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { RHI_ASSERT(physical_device_ != VK_NULL_HANDLE && "failed to find a suitable GPU"); + // Debug: Show final selected device + VkPhysicalDeviceProperties final_properties{}; + vkGetPhysicalDeviceProperties(physical_device_, &final_properties); + TI_DEBUG("AMD GPU: Final selected device: " + std::string(final_properties.deviceName)); + TI_DEBUG("AMD GPU: Final device API version: " + + std::to_string(VK_VERSION_MAJOR(final_properties.apiVersion)) + "." + + std::to_string(VK_VERSION_MINOR(final_properties.apiVersion)) + "." + + std::to_string(VK_VERSION_PATCH(final_properties.apiVersion))); + queue_family_indices_ = find_queue_families(physical_device_, test_surface); } @@ -516,6 +534,13 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { VK_API_VERSION_PATCH(physical_device_properties.apiVersion)); RHI_LOG_DEBUG(msg_buf); } + + // Debug: Show device properties for AMD GPU debugging + TI_DEBUG("AMD GPU: Logical device creation - Device: " + std::string(physical_device_properties.deviceName)); + TI_DEBUG("AMD GPU: Logical device creation - API Version: " + + std::to_string(VK_VERSION_MAJOR(physical_device_properties.apiVersion)) + "." + + std::to_string(VK_VERSION_MINOR(physical_device_properties.apiVersion)) + "." + + std::to_string(VK_VERSION_PATCH(physical_device_properties.apiVersion))); // (penguinliong) The actual logical device is created with lastest version of // Vulkan but we use the device like it has a lower version (if the user @@ -547,6 +572,9 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { [[maybe_unused]] bool portability_subset_enabled = false; + // Debug: Show total number of extensions detected + TI_DEBUG("AMD GPU: Total device extensions detected: " + std::to_string(extension_properties.size())); + for (auto &ext : extension_properties) { char msg_buf[256]; RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), @@ -560,6 +588,9 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 extension in device extensions!"); } + + // Debug: Show all extensions for AMD GPU debugging + TI_DEBUG("AMD GPU: Detected extension: " + name); if (name == "VK_KHR_portability_subset") { RHI_LOG_ERROR( @@ -590,6 +621,7 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { } else if (name == VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { + TI_DEBUG("AMD GPU: Enabling VK_KHR_SHADER_FLOAT16_INT8 extension!"); enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); @@ -622,6 +654,12 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (has_swapchain) { ti_device_->vk_caps().present = true; } + + // Debug: Show final enabled extensions list + TI_DEBUG("AMD GPU: Total enabled extensions: " + std::to_string(enabled_extensions.size())); + for (const auto& ext : enabled_extensions) { + TI_DEBUG("AMD GPU: Enabled extension: " + std::string(ext)); + } VkPhysicalDeviceFeatures device_features{}; @@ -734,6 +772,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { enabled_extensions.end() uint32_t vk_api_version = ti_device_->vk_caps().vk_api_version; + TI_DEBUG("AMD GPU: Vulkan API version: " + std::to_string(VK_VERSION_MAJOR(vk_api_version)) + "." + + std::to_string(VK_VERSION_MINOR(vk_api_version)) + "." + std::to_string(VK_VERSION_PATCH(vk_api_version))); #define CHECK_VERSION(major, minor) \ vk_api_version >= VK_MAKE_API_VERSION(0, major, minor, 0) From 5abcc6f16f7c209dc2c66709d496e5e75c49c3d9 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 11:49:48 +1000 Subject: [PATCH 20/25] Add debug output for Vulkan instance creation and API version fallback --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index c8f1472f834e4..b825c159a55ad 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -385,6 +385,12 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, create_info.enabledExtensionCount = (uint32_t)confirmed_extensions.size(); create_info.ppEnabledExtensionNames = confirmed_extensions.data(); + // Debug: Show what API version we're requesting + TI_DEBUG("AMD GPU: Requesting Vulkan API version: " + + std::to_string(VK_VERSION_MAJOR(vk_api_version)) + "." + + std::to_string(VK_VERSION_MINOR(vk_api_version)) + "." + + std::to_string(VK_VERSION_PATCH(vk_api_version))); + VkResult res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); @@ -392,11 +398,18 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html // Vulkan 1.0 implementation will return this when api version is not 1.0 // Vulkan 1.1+ implementation will work with maximum version set + TI_DEBUG("AMD GPU: VK_ERROR_INCOMPATIBLE_DRIVER - falling back to Vulkan 1.0"); ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_0; app_info.apiVersion = VK_API_VERSION_1_0; res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); + if (res == VK_SUCCESS) { + TI_DEBUG("AMD GPU: Successfully created Vulkan 1.0 instance"); + } else { + TI_DEBUG("AMD GPU: Failed to create Vulkan 1.0 instance with error: " + std::to_string(res)); + } } else { + TI_DEBUG("AMD GPU: Successfully created Vulkan instance with requested version"); ti_device_->vk_caps().vk_api_version = vk_api_version; } From 03dd5ca0a34da76d48e05e8c3e1406adeb890300 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 11:50:32 +1000 Subject: [PATCH 21/25] Fix Vulkan API version fallback: try 1.1 before 1.0 for AMD GPU compatibility --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index b825c159a55ad..2f9167c9bf96e 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -398,15 +398,24 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html // Vulkan 1.0 implementation will return this when api version is not 1.0 // Vulkan 1.1+ implementation will work with maximum version set - TI_DEBUG("AMD GPU: VK_ERROR_INCOMPATIBLE_DRIVER - falling back to Vulkan 1.0"); - ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_0; - app_info.apiVersion = VK_API_VERSION_1_0; + TI_DEBUG("AMD GPU: VK_ERROR_INCOMPATIBLE_DRIVER - trying Vulkan 1.1 fallback"); + ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_1; + app_info.apiVersion = VK_API_VERSION_1_1; res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); if (res == VK_SUCCESS) { - TI_DEBUG("AMD GPU: Successfully created Vulkan 1.0 instance"); + TI_DEBUG("AMD GPU: Successfully created Vulkan 1.1 instance"); } else { - TI_DEBUG("AMD GPU: Failed to create Vulkan 1.0 instance with error: " + std::to_string(res)); + TI_DEBUG("AMD GPU: Vulkan 1.1 failed, trying Vulkan 1.0 fallback"); + ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_0; + app_info.apiVersion = VK_API_VERSION_1_0; + + res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); + if (res == VK_SUCCESS) { + TI_DEBUG("AMD GPU: Successfully created Vulkan 1.0 instance"); + } else { + TI_DEBUG("AMD GPU: Failed to create Vulkan 1.0 instance with error: " + std::to_string(res)); + } } } else { TI_DEBUG("AMD GPU: Successfully created Vulkan instance with requested version"); From 3f9f23cb398dfa0b9b8f370bd1daf1f406aeff7b Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 13:35:20 +1000 Subject: [PATCH 22/25] Add debug output to check VK_KHR_SHADER_FLOAT16_INT8 at both instance and device levels --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 2f9167c9bf96e..8a274bd9a12fb 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -359,8 +359,14 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, vkEnumerateInstanceExtensionProperties(nullptr, &num_instance_extensions, supported_extensions.data()); + // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is available at instance level + bool found_shader_float16_int8_instance = false; for (auto &ext : supported_extensions) { std::string name = ext.extensionName; + if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { + found_shader_float16_int8_instance = true; + TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 at instance level!"); + } if (name == VK_KHR_SURFACE_EXTENSION_NAME) { extensions.insert(name); ti_device_->vk_caps().surface = true; @@ -375,6 +381,9 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, extensions.insert(name); } } + if (!found_shader_float16_int8_instance) { + TI_DEBUG("AMD GPU: VK_KHR_SHADER_FLOAT16_INT8 NOT found at instance level"); + } std::vector confirmed_extensions; confirmed_extensions.reserve(extensions.size()); @@ -597,6 +606,19 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // Debug: Show total number of extensions detected TI_DEBUG("AMD GPU: Total device extensions detected: " + std::to_string(extension_properties.size())); + // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is in the device extensions + bool found_shader_float16_int8 = false; + for (auto &ext : extension_properties) { + if (std::string(ext.extensionName) == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { + found_shader_float16_int8 = true; + TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 in device extensions!"); + break; + } + } + if (!found_shader_float16_int8) { + TI_DEBUG("AMD GPU: VK_KHR_SHADER_FLOAT16_INT8 NOT found in device extensions"); + } + for (auto &ext : extension_properties) { char msg_buf[256]; RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), From 4babaee3ace1e03950b5d754ffec5f2a119a9daa Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 15:17:29 +1000 Subject: [PATCH 23/25] Remove debug statements from vulkan_device_creator.cpp - Removed all TI_DEBUG statements added for troubleshooting - Cleaned up AMD GPU debugging output - Kept the core functionality and fixes intact - Ready for production build --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 89 +-------------------- 1 file changed, 3 insertions(+), 86 deletions(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index 8a274bd9a12fb..b4fa908baac78 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -225,6 +225,7 @@ size_t get_device_score(VkPhysicalDevice device, VkSurfaceKHR surface) { 1000; score += VK_API_VERSION_MINOR(properties.apiVersion) * 100; + return score; } @@ -359,14 +360,8 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, vkEnumerateInstanceExtensionProperties(nullptr, &num_instance_extensions, supported_extensions.data()); - // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is available at instance level - bool found_shader_float16_int8_instance = false; for (auto &ext : supported_extensions) { std::string name = ext.extensionName; - if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { - found_shader_float16_int8_instance = true; - TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 at instance level!"); - } if (name == VK_KHR_SURFACE_EXTENSION_NAME) { extensions.insert(name); ti_device_->vk_caps().surface = true; @@ -381,9 +376,6 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, extensions.insert(name); } } - if (!found_shader_float16_int8_instance) { - TI_DEBUG("AMD GPU: VK_KHR_SHADER_FLOAT16_INT8 NOT found at instance level"); - } std::vector confirmed_extensions; confirmed_extensions.reserve(extensions.size()); @@ -394,11 +386,6 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, create_info.enabledExtensionCount = (uint32_t)confirmed_extensions.size(); create_info.ppEnabledExtensionNames = confirmed_extensions.data(); - // Debug: Show what API version we're requesting - TI_DEBUG("AMD GPU: Requesting Vulkan API version: " + - std::to_string(VK_VERSION_MAJOR(vk_api_version)) + "." + - std::to_string(VK_VERSION_MINOR(vk_api_version)) + "." + - std::to_string(VK_VERSION_PATCH(vk_api_version))); VkResult res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); @@ -407,27 +394,17 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkApplicationInfo.html // Vulkan 1.0 implementation will return this when api version is not 1.0 // Vulkan 1.1+ implementation will work with maximum version set - TI_DEBUG("AMD GPU: VK_ERROR_INCOMPATIBLE_DRIVER - trying Vulkan 1.1 fallback"); ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_1; app_info.apiVersion = VK_API_VERSION_1_1; res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); - if (res == VK_SUCCESS) { - TI_DEBUG("AMD GPU: Successfully created Vulkan 1.1 instance"); - } else { - TI_DEBUG("AMD GPU: Vulkan 1.1 failed, trying Vulkan 1.0 fallback"); + if (res != VK_SUCCESS) { ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_0; app_info.apiVersion = VK_API_VERSION_1_0; res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); - if (res == VK_SUCCESS) { - TI_DEBUG("AMD GPU: Successfully created Vulkan 1.0 instance"); - } else { - TI_DEBUG("AMD GPU: Failed to create Vulkan 1.0 instance with error: " + std::to_string(res)); - } } } else { - TI_DEBUG("AMD GPU: Successfully created Vulkan instance with requested version"); ti_device_->vk_caps().vk_api_version = vk_api_version; } @@ -469,19 +446,12 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { i, properties.deviceName); RHI_LOG_DEBUG(msg_buf); - // Debug: Show device properties for AMD GPU debugging - TI_DEBUG("AMD GPU: Device " + std::to_string(i) + " - " + std::string(properties.deviceName)); - TI_DEBUG("AMD GPU: Device " + std::to_string(i) + " - API Version: " + - std::to_string(VK_VERSION_MAJOR(properties.apiVersion)) + "." + - std::to_string(VK_VERSION_MINOR(properties.apiVersion)) + "." + - std::to_string(VK_VERSION_PATCH(properties.apiVersion))); } auto device_id = VulkanLoader::instance().visible_device_id; bool has_visible_device{false}; if (!device_id.empty()) { int id = std::stoi(device_id); - TI_DEBUG("AMD GPU: TI_VISIBLE_DEVICE set to: " + device_id); if (id < 0 || id >= device_count) { char msg_buf[128]; snprintf(msg_buf, sizeof(msg_buf), @@ -491,7 +461,6 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { } else if (get_device_score(devices[id], test_surface)) { physical_device_ = devices[id]; has_visible_device = true; - TI_DEBUG("AMD GPU: Selected device " + std::to_string(id) + " via TI_VISIBLE_DEVICE"); } } @@ -509,14 +478,6 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { RHI_ASSERT(physical_device_ != VK_NULL_HANDLE && "failed to find a suitable GPU"); - // Debug: Show final selected device - VkPhysicalDeviceProperties final_properties{}; - vkGetPhysicalDeviceProperties(physical_device_, &final_properties); - TI_DEBUG("AMD GPU: Final selected device: " + std::string(final_properties.deviceName)); - TI_DEBUG("AMD GPU: Final device API version: " + - std::to_string(VK_VERSION_MAJOR(final_properties.apiVersion)) + "." + - std::to_string(VK_VERSION_MINOR(final_properties.apiVersion)) + "." + - std::to_string(VK_VERSION_PATCH(final_properties.apiVersion))); queue_family_indices_ = find_queue_families(physical_device_, test_surface); } @@ -566,12 +527,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { RHI_LOG_DEBUG(msg_buf); } - // Debug: Show device properties for AMD GPU debugging - TI_DEBUG("AMD GPU: Logical device creation - Device: " + std::string(physical_device_properties.deviceName)); - TI_DEBUG("AMD GPU: Logical device creation - API Version: " + - std::to_string(VK_VERSION_MAJOR(physical_device_properties.apiVersion)) + "." + - std::to_string(VK_VERSION_MINOR(physical_device_properties.apiVersion)) + "." + - std::to_string(VK_VERSION_PATCH(physical_device_properties.apiVersion))); // (penguinliong) The actual logical device is created with lastest version of // Vulkan but we use the device like it has a lower version (if the user @@ -599,25 +554,11 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { vkEnumerateDeviceExtensionProperties( physical_device_, nullptr, &extension_count, extension_properties.data()); + bool has_swapchain = false; [[maybe_unused]] bool portability_subset_enabled = false; - // Debug: Show total number of extensions detected - TI_DEBUG("AMD GPU: Total device extensions detected: " + std::to_string(extension_properties.size())); - - // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is in the device extensions - bool found_shader_float16_int8 = false; - for (auto &ext : extension_properties) { - if (std::string(ext.extensionName) == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { - found_shader_float16_int8 = true; - TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 in device extensions!"); - break; - } - } - if (!found_shader_float16_int8) { - TI_DEBUG("AMD GPU: VK_KHR_SHADER_FLOAT16_INT8 NOT found in device extensions"); - } for (auto &ext : extension_properties) { char msg_buf[256]; @@ -627,14 +568,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { RHI_LOG_DEBUG(msg_buf); std::string name = std::string(ext.extensionName); - - // Debug: Check if VK_KHR_SHADER_FLOAT16_INT8 is detected - if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { - TI_DEBUG("AMD GPU: Found VK_KHR_SHADER_FLOAT16_INT8 extension in device extensions!"); - } - - // Debug: Show all extensions for AMD GPU debugging - TI_DEBUG("AMD GPU: Detected extension: " + name); if (name == "VK_KHR_portability_subset") { RHI_LOG_ERROR( @@ -665,7 +598,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { } else if (name == VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) { - TI_DEBUG("AMD GPU: Enabling VK_KHR_SHADER_FLOAT16_INT8 extension!"); enabled_extensions.push_back(ext.extensionName); } else if (name == VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME) { enabled_extensions.push_back(ext.extensionName); @@ -699,11 +631,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { ti_device_->vk_caps().present = true; } - // Debug: Show final enabled extensions list - TI_DEBUG("AMD GPU: Total enabled extensions: " + std::to_string(enabled_extensions.size())); - for (const auto& ext : enabled_extensions) { - TI_DEBUG("AMD GPU: Enabled extension: " + std::string(ext)); - } VkPhysicalDeviceFeatures device_features{}; @@ -816,8 +743,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { enabled_extensions.end() uint32_t vk_api_version = ti_device_->vk_caps().vk_api_version; - TI_DEBUG("AMD GPU: Vulkan API version: " + std::to_string(VK_VERSION_MAJOR(vk_api_version)) + "." + - std::to_string(VK_VERSION_MINOR(vk_api_version)) + "." + std::to_string(VK_VERSION_PATCH(vk_api_version))); #define CHECK_VERSION(major, minor) \ vk_api_version >= VK_MAKE_API_VERSION(0, major, minor, 0) @@ -875,13 +800,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { // F16 / I8 if (CHECK_VERSION(1, 1) || CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) { - TI_DEBUG("AMD GPU: Checking VK_KHR_SHADER_FLOAT16_INT8 extension"); - TI_DEBUG("AMD GPU: Extension enabled: " + std::to_string(CHECK_EXTENSION(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))); - TI_DEBUG("AMD GPU: Vulkan version check: " + std::to_string(CHECK_VERSION(1, 1))); features2.pNext = &shader_f16_i8_feature; vkGetPhysicalDeviceFeatures2KHR(physical_device_, &features2); - TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderInt8 = " + std::to_string(shader_f16_i8_feature.shaderInt8)); - TI_DEBUG("AMD GPU: shader_f16_i8_feature.shaderFloat16 = " + std::to_string(shader_f16_i8_feature.shaderFloat16)); if (shader_f16_i8_feature.shaderFloat16) { caps.set(DeviceCapability::spirv_has_float16, true); @@ -890,9 +810,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (shader_f16_i8_feature.shaderInt8) { caps.set(DeviceCapability::spirv_has_int8, true); shader_f16_i8_enable.shaderInt8 = VK_TRUE; // enable if supported - TI_DEBUG("AMD GPU: shaderInt8 feature detected and enabled"); - } else { - TI_DEBUG("AMD GPU: shaderInt8 feature NOT detected"); } } From 5674276bc10954308aa412362e966add3f2cc6b6 Mon Sep 17 00:00:00 2001 From: Yunus Berndt Date: Tue, 9 Sep 2025 15:42:09 +1000 Subject: [PATCH 24/25] changelog messages --- python/taichi/CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/taichi/CHANGELOG.md b/python/taichi/CHANGELOG.md index d62073e6631c1..4af465a562ac0 100644 --- a/python/taichi/CHANGELOG.md +++ b/python/taichi/CHANGELOG.md @@ -1,6 +1,27 @@ Highlights: + - Fix Vulkan API version fallback: try 1.1 before 1.0 for AMD GPU compatibility (by **Yunus Berndt**) + - Refine Vulkan device creation and update build configuration (by **Yunus Berndt**) + - Better checks if vulkan SDK already exists (by **Yunus Berndt**) + - Vulkan patch for 8bit,16bit storage and 8 Bit arithmetic (by **Yunus Berndt**) Full changelog: + - Add debug output to check VK_KHR_SHADER_FLOAT16_INT8 at both instance and device levels (by **Yunus Berndt**) + - Fix Vulkan API version fallback: try 1.1 before 1.0 for AMD GPU compatibility (by **Yunus Berndt**) + - Add debug output for Vulkan instance creation and API version fallback (by **Yunus Berndt**) + - Add comprehensive debug output for AMD GPU extension detection and device selection (by **Yunus Berndt**) + - Add debug output to check if VK_KHR_SHADER_FLOAT16_INT8 extension is detected in device extensions (by **Yunus Berndt**) + - Add debug output to check if VK_KHR_SHADER_FLOAT16_INT8 extension is actually enabled (by **Yunus Berndt**) + - Fix debug output formatting for shaderInt8/shaderFloat16 values (by **Yunus Berndt**) + - Add detailed debug output for feature detection values (by **Yunus Berndt**) + - Add debug output for AMD GPU capability detection (by **Yunus Berndt**) + - Fix Vulkan 1.1 version check for AMD 8-bit/16-bit support (by **Yunus Berndt**) + - Fix pre-commit formatting issues: Black 25.1.0 and clang-format (by **Yunus Berndt**) + - Fix formatting issues: black, clang-format, trailing whitespace, and end-of-file fixes (by **Yunus Berndt**) + - Refine Vulkan device creation and update build configuration (by **Yunus Berndt**) + - Merge branch 'Vulkan-patch-for-AMD' of https://github.com/yunusberndt/taichi into Vulkan-patch-for-AMD (by **Yunus Berndt**) + - reverted gitignore to upstream standard (by **Yunus Berndt**) + - Enhance Vulkan support for AMD GPUs by refining 16-bit storage capabilities (by **Yunus Berndt**) + - [pre-commit.ci] auto fixes from pre-commit.com hooks (by **pre-commit-ci[bot]**) - fix: resolve Vulkan build and compatibility issues for AMD GPUs (by **Yunus Berndt**) - style(pre-commit): match pinned Black formatting (by **Yunus Berndt**) - style(black): reformat ti_build/vulkan.py (by **Yunus Berndt**) From 2424598c5053a47222f2810fda41e0de19cc0c93 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 05:44:25 +0000 Subject: [PATCH 25/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/rhi/vulkan/vulkan_device_creator.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp index b4fa908baac78..db62ef57dea5d 100644 --- a/taichi/rhi/vulkan/vulkan_device_creator.cpp +++ b/taichi/rhi/vulkan/vulkan_device_creator.cpp @@ -225,7 +225,6 @@ size_t get_device_score(VkPhysicalDevice device, VkSurfaceKHR surface) { 1000; score += VK_API_VERSION_MINOR(properties.apiVersion) * 100; - return score; } @@ -386,7 +385,6 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, create_info.enabledExtensionCount = (uint32_t)confirmed_extensions.size(); create_info.ppEnabledExtensionNames = confirmed_extensions.data(); - VkResult res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); @@ -401,7 +399,7 @@ void VulkanDeviceCreator::create_instance(uint32_t vk_api_version, if (res != VK_SUCCESS) { ti_device_->vk_caps().vk_api_version = VK_API_VERSION_1_0; app_info.apiVersion = VK_API_VERSION_1_0; - + res = vkCreateInstance(&create_info, kNoVkAllocCallbacks, &instance_); } } else { @@ -445,7 +443,6 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), "Found Vulkan Device %d (%s)", i, properties.deviceName); RHI_LOG_DEBUG(msg_buf); - } auto device_id = VulkanLoader::instance().visible_device_id; @@ -478,7 +475,6 @@ void VulkanDeviceCreator::pick_physical_device(VkSurfaceKHR test_surface) { RHI_ASSERT(physical_device_ != VK_NULL_HANDLE && "failed to find a suitable GPU"); - queue_family_indices_ = find_queue_families(physical_device_, test_surface); } @@ -526,7 +522,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { VK_API_VERSION_PATCH(physical_device_properties.apiVersion)); RHI_LOG_DEBUG(msg_buf); } - // (penguinliong) The actual logical device is created with lastest version of // Vulkan but we use the device like it has a lower version (if the user @@ -554,12 +549,10 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { vkEnumerateDeviceExtensionProperties( physical_device_, nullptr, &extension_count, extension_properties.data()); - bool has_swapchain = false; [[maybe_unused]] bool portability_subset_enabled = false; - for (auto &ext : extension_properties) { char msg_buf[256]; RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), @@ -630,7 +623,6 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) { if (has_swapchain) { ti_device_->vk_caps().present = true; } - VkPhysicalDeviceFeatures device_features{};