diff --git a/CHANGELOG.md b/CHANGELOG.md index c9391b2b65..db6a1c23d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ Bottom level categories: #### General +- Support BLAS compaction in wgpu-hal. By @Vecvec in [#7101](https://github.com/gfx-rs/wgpu/pull/7101). - Avoid using default features in many dependencies, etc. By Brody in [#7031](https://github.com/gfx-rs/wgpu/pull/7031) - Use `hashbrown` to simplify no-std support. By Brody in [#6938](https://github.com/gfx-rs/wgpu/pull/6938) & [#6925](https://github.com/gfx-rs/wgpu/pull/6925). - If you use Binding Arrays in a bind group, you may not use Dynamic Offset Buffers or Uniform Buffers in that bind group. By @cwfitzgerald in [#6811](https://github.com/gfx-rs/wgpu/pull/6811) diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs index e5170ad790..950d53bcf7 100644 --- a/wgpu-core/src/device/ray_tracing.rs +++ b/wgpu-core/src/device/ray_tracing.rs @@ -85,6 +85,8 @@ impl Device { label: blas_desc.label.as_deref(), size: size_info.acceleration_structure_size, format: hal::AccelerationStructureFormat::BottomLevel, + // change this once compaction is implemented in wgpu-core + allow_compaction: false, }) } .map_err(DeviceError::from_hal)?; @@ -136,6 +138,7 @@ impl Device { label: desc.label.as_deref(), size: size_info.acceleration_structure_size, format: hal::AccelerationStructureFormat::TopLevel, + allow_compaction: false, }) } .map_err(DeviceError::from_hal)?; diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 24f9c8b87d..a15aef8412 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -525,6 +525,7 @@ impl Example { label: Some("blas"), size: blas_sizes.acceleration_structure_size, format: hal::AccelerationStructureFormat::BottomLevel, + allow_compaction: false, }) } .unwrap(); @@ -534,6 +535,7 @@ impl Example { label: Some("tlas"), size: tlas_sizes.acceleration_structure_size, format: hal::AccelerationStructureFormat::TopLevel, + allow_compaction: false, }) } .unwrap(); diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 914eaa62a3..033f932b4f 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -383,7 +383,9 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; self.temp.barriers.push(raw); - } else if barrier.usage.from == wgt::BufferUses::STORAGE_READ_WRITE { + } else if barrier.usage.from == wgt::BufferUses::STORAGE_READ_WRITE + || barrier.usage.from == wgt::BufferUses::ACCELERATION_STRUCTURE_QUERY + { let raw = Direct3D12::D3D12_RESOURCE_BARRIER { Type: Direct3D12::D3D12_RESOURCE_BARRIER_TYPE_UAV, Flags: Direct3D12::D3D12_RESOURCE_BARRIER_FLAG_NONE, @@ -681,6 +683,29 @@ impl crate::CommandEncoder for super::CommandEncoder { ) }; } + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &super::AccelerationStructure, + buf: &super::Buffer, + ) { + let list = self + .list + .as_ref() + .unwrap() + .cast::() + .unwrap(); + unsafe { + list.EmitRaytracingAccelerationStructurePostbuildInfo( + &Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC { + DestBuffer: buf.resource.GetGPUVirtualAddress(), + InfoType: Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE, + }, + &[ + acceleration_structure.resource.GetGPUVirtualAddress() + ], + ) + } + } unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range) { // nothing to do here } @@ -1505,4 +1530,25 @@ impl crate::CommandEncoder for super::CommandEncoder { }]) } } + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &super::AccelerationStructure, + dst: &super::AccelerationStructure, + copy: wgt::AccelerationStructureCopy, + ) { + let list = self + .list + .as_ref() + .unwrap() + .cast::() + .unwrap(); + unsafe { + list.CopyRaytracingAccelerationStructure( + dst.resource.GetGPUVirtualAddress(), + src.resource.GetGPUVirtualAddress(), + conv::map_acceleration_structure_copy_mode(copy), + ) + } + } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 72ed13e793..3342a6ab06 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -4,7 +4,9 @@ pub fn map_buffer_usage_to_resource_flags( usage: wgt::BufferUses, ) -> Direct3D12::D3D12_RESOURCE_FLAGS { let mut flags = Direct3D12::D3D12_RESOURCE_FLAG_NONE; - if usage.contains(wgt::BufferUses::STORAGE_READ_WRITE) { + if usage.contains(wgt::BufferUses::STORAGE_READ_WRITE) + || usage.contains(wgt::BufferUses::ACCELERATION_STRUCTURE_QUERY) + { flags |= Direct3D12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } flags @@ -141,6 +143,9 @@ pub fn map_buffer_usage_to_state(usage: wgt::BufferUses) -> Direct3D12::D3D12_RE if usage.intersects(Bu::INDIRECT) { state |= Direct3D12::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; } + if usage.intersects(Bu::ACCELERATION_STRUCTURE_QUERY) { + state |= Direct3D12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } state } @@ -398,3 +403,16 @@ pub(crate) fn map_acceleration_structure_geometry_flags( } d3d_flags } + +pub(crate) fn map_acceleration_structure_copy_mode( + mode: wgt::AccelerationStructureCopy, +) -> Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE { + match mode { + wgt::AccelerationStructureCopy::Clone => { + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_CLONE + } + wgt::AccelerationStructureCopy::Compact => { + Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_COMPACT + } + } +} diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs index 8fb65fa161..cc6dd81fe0 100644 --- a/wgpu-hal/src/dynamic/command.rs +++ b/wgpu-hal/src/dynamic/command.rs @@ -179,6 +179,18 @@ pub trait DynCommandEncoder: DynResource + std::fmt::Debug { &mut self, barrier: AccelerationStructureBarrier, ); + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &dyn DynAccelerationStructure, + dst: &dyn DynAccelerationStructure, + copy: wgt::AccelerationStructureCopy, + ); + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &dyn DynAccelerationStructure, + buf: &dyn DynBuffer, + ); } impl DynCommandEncoder for C { @@ -611,6 +623,26 @@ impl DynCommandEncoder for C { ) { unsafe { C::place_acceleration_structure_barrier(self, barrier) }; } + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &dyn DynAccelerationStructure, + dst: &dyn DynAccelerationStructure, + copy: wgt::AccelerationStructureCopy, + ) { + let src = src.expect_downcast_ref(); + let dst = dst.expect_downcast_ref(); + unsafe { C::copy_acceleration_structure_to_acceleration_structure(self, src, dst, copy) }; + } + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &dyn DynAccelerationStructure, + buf: &dyn DynBuffer, + ) { + let acceleration_structure = acceleration_structure.expect_downcast_ref(); + let buf = buf.expect_downcast_ref(); + unsafe { C::read_acceleration_structure_compact_size(self, acceleration_structure, buf) } + } } impl<'a> PassTimestampWrites<'a, dyn DynQuerySet> { diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index d3d3908ac3..27bd440ff7 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -378,6 +378,12 @@ impl crate::CommandEncoder for Encoder { unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} unsafe fn end_query(&mut self, set: &Resource, index: u32) {} unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &Resource, + buf: &Resource, + ) { + } unsafe fn reset_queries(&mut self, set: &Resource, range: Range) {} unsafe fn copy_query_results( &mut self, @@ -510,4 +516,12 @@ impl crate::CommandEncoder for Encoder { _barriers: crate::AccelerationStructureBarrier, ) { } + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &Resource, + dst: &Resource, + copy: wgt::AccelerationStructureCopy, + ) { + } } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index b706c116e8..6f047682db 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1209,4 +1209,21 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unimplemented!() } + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + _src: &super::AccelerationStructure, + _dst: &super::AccelerationStructure, + _copy: wgt::AccelerationStructureCopy, + ) { + unimplemented!() + } + + unsafe fn read_acceleration_structure_compact_size( + &mut self, + _acceleration_structure: &super::AccelerationStructure, + _buf: &super::Buffer, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index fd5f272b61..bf7cb36429 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1249,6 +1249,12 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { ) where T: Iterator; + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &::AccelerationStructure, + dst: &::AccelerationStructure, + copy: wgt::AccelerationStructureCopy, + ); // pass common /// Sets the bind group at `index` to `group`. @@ -1509,6 +1515,12 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { &mut self, barrier: AccelerationStructureBarrier, ); + // modeled off dx12, because this is able to be polyfilled in vulkan as opposed to the other way round + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &::AccelerationStructure, + buf: &::Buffer, + ); } bitflags!( @@ -2311,6 +2323,7 @@ pub struct AccelerationStructureDescriptor<'a> { pub label: Label<'a>, pub size: wgt::BufferAddress, pub format: AccelerationStructureFormat, + pub allow_compaction: bool, } #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -2397,6 +2410,11 @@ pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> { pub flags: AccelerationStructureGeometryFlags, } +pub struct AccelerationStructureCopy { + pub copy_flags: wgt::AccelerationStructureCopy, + pub type_flags: wgt::AccelerationStructureType, +} + /// * `offset` - offset in bytes #[derive(Clone, Debug)] pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> { @@ -2433,6 +2451,12 @@ bitflags::bitflags! { const BUILD_OUTPUT = 1 << 1; // Tlas used in a shader const SHADER_INPUT = 1 << 2; + // Blas used to query compacted size + const QUERY_INPUT = 1 << 3; + // BLAS used as a src for a copy operation + const COPY_SRC = 1 << 4; + // BLAS used as a dst for a copy operation + const COPY_DST = 1 << 5; } } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index c3f2c8cc59..a836765b18 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -392,6 +392,15 @@ impl crate::CommandEncoder for super::CommandEncoder { } } + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + _src: &super::AccelerationStructure, + _dst: &super::AccelerationStructure, + _copy: wgt::AccelerationStructureCopy, + ) { + unimplemented!() + } + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { match set.ty { wgt::QueryType::Occlusion => { @@ -1292,6 +1301,14 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unimplemented!() } + + unsafe fn read_acceleration_structure_compact_size( + &mut self, + _acceleration_structure: &super::AccelerationStructure, + _buf: &super::Buffer, + ) { + unimplemented!() + } } impl Drop for super::CommandEncoder { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 8e5f243ee5..ca19d0dcae 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -388,6 +388,46 @@ impl crate::CommandEncoder for super::CommandEncoder { ) }; } + unsafe fn read_acceleration_structure_compact_size( + &mut self, + acceleration_structure: &super::AccelerationStructure, + buffer: &super::Buffer, + ) { + let ray_tracing_functions = self + .device + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + let query_pool = acceleration_structure + .compacted_size_query + .as_ref() + .unwrap(); + unsafe { + self.device + .raw + .cmd_reset_query_pool(self.active, *query_pool, 0, 1); + ray_tracing_functions + .acceleration_structure + .cmd_write_acceleration_structures_properties( + self.active, + &[acceleration_structure.raw], + vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, + *query_pool, + 0, + ); + self.device.raw.cmd_copy_query_pool_results( + self.active, + *query_pool, + 0, + 1, + buffer.raw, + 0, + wgt::QUERY_SIZE as vk::DeviceSize, + vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT, + ) + }; + } unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) { unsafe { self.device.raw.cmd_reset_query_pool( @@ -1152,6 +1192,43 @@ impl crate::CommandEncoder for super::CommandEncoder { .cmd_dispatch_indirect(self.active, buffer.raw, offset) } } + + unsafe fn copy_acceleration_structure_to_acceleration_structure( + &mut self, + src: &super::AccelerationStructure, + dst: &super::AccelerationStructure, + copy: wgt::AccelerationStructureCopy, + ) { + let ray_tracing_functions = self + .device + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let mode = match copy { + wgt::AccelerationStructureCopy::Clone => vk::CopyAccelerationStructureModeKHR::CLONE, + wgt::AccelerationStructureCopy::Compact => { + vk::CopyAccelerationStructureModeKHR::COMPACT + } + }; + + unsafe { + ray_tracing_functions + .acceleration_structure + .cmd_copy_acceleration_structure( + self.active, + &vk::CopyAccelerationStructureInfoKHR { + s_type: vk::StructureType::COPY_ACCELERATION_STRUCTURE_INFO_KHR, + p_next: std::ptr::null(), + src: src.raw, + dst: dst.raw, + mode, + _marker: Default::default(), + }, + ); + } + } } #[test] diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 194e86b947..24cc3f4564 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -551,6 +551,9 @@ pub fn map_buffer_usage(usage: wgt::BufferUses) -> vk::BufferUsageFlags { flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } + if usage.intersects(wgt::BufferUses::ACCELERATION_STRUCTURE_QUERY) { + flags |= vk::BufferUsageFlags::TRANSFER_DST; + } flags } @@ -612,6 +615,10 @@ pub fn map_buffer_usage_to_barrier( access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; } + if usage.contains(wgt::BufferUses::ACCELERATION_STRUCTURE_QUERY) { + stages |= vk::PipelineStageFlags::TRANSFER; + access |= vk::AccessFlags::TRANSFER_WRITE; + } (stages, access) } @@ -974,6 +981,10 @@ pub fn map_acceleration_structure_usage_to_barrier( stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; } + if usage.contains(crate::AccelerationStructureUses::QUERY_INPUT) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } if usage.contains(crate::AccelerationStructureUses::BUILD_OUTPUT) { stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; @@ -986,6 +997,14 @@ pub fn map_acceleration_structure_usage_to_barrier( | vk::PipelineStageFlags::COMPUTE_SHADER; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; } + if usage.contains(crate::AccelerationStructureUses::COPY_SRC) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } + if usage.contains(crate::AccelerationStructureUses::COPY_DST) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } (stages, access) } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index e17f211b05..6e528d6bb1 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2525,10 +2525,26 @@ impl crate::Device for super::Device { .set_object_name(raw_acceleration_structure, label); } + let pool = if desc.allow_compaction { + let vk_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR) + .query_count(1); + + let raw = self + .shared + .raw + .create_query_pool(&vk_info, None) + .map_err(super::map_host_oom_and_ioca_err)?; + Some(raw) + } else { + None + }; + Ok(super::AccelerationStructure { raw: raw_acceleration_structure, buffer: raw_buffer, block: Mutex::new(block), + compacted_size_query: pool, }) } } @@ -2554,6 +2570,9 @@ impl crate::Device for super::Device { self.mem_allocator .lock() .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + if let Some(query) = acceleration_structure.compacted_size_query { + self.shared.raw.destroy_query_pool(query, None) + } } } diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index ed8dfc39d3..8a3c68b6ec 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -786,6 +786,7 @@ pub struct AccelerationStructure { raw: vk::AccelerationStructureKHR, buffer: vk::Buffer, block: Mutex>, + compacted_size_query: Option, } impl crate::DynAccelerationStructure for AccelerationStructure {} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index b9ac751a26..98456eb836 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -4736,6 +4736,8 @@ bitflags::bitflags! { const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; /// Buffer used for top level acceleration structure building. const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; + /// A buffer used to store the compacted size of an acceleration structure + const ACCELERATION_STRUCTURE_QUERY = 1 << 14; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | @@ -7195,6 +7197,26 @@ bitflags::bitflags!( } ); +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +/// What a copy between acceleration structures should do +pub enum AccelerationStructureCopy { + /// Directly duplicate an acceleration structure to another + Clone, + /// Duplicate and compact an acceleration structure + Compact, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +/// What type the data of an acceleration structure is +pub enum AccelerationStructureType { + /// The types of the acceleration structure are triangles + Triangles, + /// The types of the acceleration structure are axis aligned bounding boxes + AABBs, + /// The types of the acceleration structure are instances + Instances, +} + /// Alignment requirement for transform buffers used in acceleration structure builds pub const TRANSFORM_BUFFER_ALIGNMENT: BufferAddress = 16;