diff --git a/CHANGELOG.md b/CHANGELOG.md index 4360ce16004..c8482e5ef27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,10 @@ Naga now requires that no type be larger than 1 GB. This limit may be lowered in ### Bug Fixes +#### DX12 + +- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721). + #### Vulkan Fix `STATUS_HEAP_CORRUPTION` crash when concurrently calling `create_sampler`. By @atlv24 in [#8043](https://github.com/gfx-rs/wgpu/pull/8043). diff --git a/tests/tests/wgpu-gpu/regression/issue_6827.rs b/tests/tests/wgpu-gpu/regression/issue_6827.rs index af29947285e..d873721269b 100644 --- a/tests/tests/wgpu-gpu/regression/issue_6827.rs +++ b/tests/tests/wgpu-gpu/regression/issue_6827.rs @@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new() .expect_fail(FailureCase::backend_adapter( wgpu::Backends::METAL, "Apple Paravirtual device", // CI on M1 - )) - .expect_fail( - // Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` - // is true, which we have no way to encode. This reproduces in CI though, so not too worried about it. - FailureCase::backend(wgpu::Backends::DX12) - .flaky() - .validation_error( - "D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512", - ) - .panic("GraphicsCommandList::close failed: The parameter is incorrect"), - ), + )), ) .run_async(|ctx| async move { run_test(ctx, true).await }); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 22d52be4d7e..1142b3cab39 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -198,6 +198,21 @@ impl super::Adapter { .is_ok() }; + let unrestricted_buffer_texture_copy_pitch_supported = { + let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default(); + unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13, + <*mut _>::cast(&mut features13), + size_of_val(&features13) as u32, + ) + } + .is_ok() + && features13 + .UnrestrictedBufferTextureCopyPitchSupported + .as_bool() + }; + let mut max_sampler_descriptor_heap_size = Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; { @@ -302,6 +317,7 @@ impl super::Adapter { suballocation_supported: !info.name.contains("Iris(R) Xe"), shader_model, max_sampler_descriptor_heap_size, + unrestricted_buffer_texture_copy_pitch_supported, }; // Theoretically vram limited, but in practice 2^20 is the limit diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index edfea952ed2..e798924516a 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -14,7 +14,7 @@ use crate::{ dxgi::{name::ObjectExt, result::HResult as _}, }, dx12::borrow_interface_temporarily, - AccelerationStructureEntries, + AccelerationStructureEntries, CommandEncoder as _, }; fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX { @@ -312,6 +312,78 @@ impl super::CommandEncoder { } } } + + unsafe fn buf_tex_intermediate( + &mut self, + region: crate::BufferTextureCopy, + tex_fmt: wgt::TextureFormat, + copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T, + ) -> Option<(T, super::Buffer)> { + let size = { + let copy_info = region.buffer_layout.get_buffer_texture_copy_info( + tex_fmt, + region.texture_base.aspect.map(), + ®ion.size.into(), + ); + copy_info.unwrap().bytes_in_copy + }; + + let size = wgt::BufferSize::new(size)?; + + let buffer = { + let (resource, allocation) = + super::suballocation::DeviceAllocationContext::from(&*self) + .create_buffer(&crate::BufferDescriptor { + label: None, + size: size.get(), + usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST, + memory_flags: crate::MemoryFlags::empty(), + }) + .expect(concat!( + "internal error: ", + "failed to allocate intermediate buffer ", + "for offset alignment" + )); + super::Buffer { + resource, + size: size.get(), + allocation, + } + }; + + let mut region = region; + region.buffer_layout.offset = 0; + + unsafe { + self.transition_buffers( + [crate::BufferBarrier { + buffer: &buffer, + usage: crate::StateTransition { + from: wgt::BufferUses::empty(), + to: wgt::BufferUses::COPY_DST, + }, + }] + .into_iter(), + ) + }; + + let t = copy_op(self, &buffer, size, region); + + unsafe { + self.transition_buffers( + [crate::BufferBarrier { + buffer: &buffer, + usage: crate::StateTransition { + from: wgt::BufferUses::COPY_DST, + to: wgt::BufferUses::COPY_SRC, + }, + }] + .into_iter(), + ) + }; + + Some((t, buffer)) + } } impl crate::CommandEncoder for super::CommandEncoder { @@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder { Ok(super::CommandBuffer { raw }) } unsafe fn reset_all>(&mut self, command_buffers: I) { + self.intermediate_copy_bufs.clear(); for cmd_buf in command_buffers { self.free_lists.push(cmd_buf.raw); } @@ -612,30 +685,61 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - let list = self.list.as_ref().unwrap(); - for r in regions { + let offset_alignment = self.shared.private_caps.texture_data_placement_alignment(); + + for naive_copy_region in regions { + let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0; + let (final_copy_region, src) = if is_offset_aligned { + (naive_copy_region, src) + } else { + let Some((intermediate_to_dst_region, intermediate_buf)) = (unsafe { + let src_offset = naive_copy_region.buffer_layout.offset; + self.buf_tex_intermediate( + naive_copy_region, + dst.format, + |this, buf, size, intermediate_to_dst_region| { + let layout = crate::BufferCopy { + src_offset, + dst_offset: 0, + size, + }; + this.copy_buffer_to_buffer(src, buf, [layout].into_iter()); + intermediate_to_dst_region + }, + ) + }) else { + continue; + }; + self.intermediate_copy_bufs.push(intermediate_buf); + let intermediate_buf = self.intermediate_copy_bufs.last().unwrap(); + (intermediate_to_dst_region, intermediate_buf) + }; + + let list = self.list.as_ref().unwrap(); + let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&src.resource) }, Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 { - PlacedFootprint: r.to_subresource_footprint(dst.format), + PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format), }, }; let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&dst.resource) }, Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 { - SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base), + SubresourceIndex: dst + .calc_subresource_for_copy(&final_copy_region.texture_base), }, }; - let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size); unsafe { list.CopyTextureRegion( &dst_location, - r.texture_base.origin.x, - r.texture_base.origin.y, - r.texture_base.origin.z, + final_copy_region.texture_base.origin.x, + final_copy_region.texture_base.origin.y, + final_copy_region.texture_base.origin.z, &src_location, Some(&src_box), ) @@ -652,8 +756,12 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - let list = self.list.as_ref().unwrap(); - for r in regions { + let copy_aligned = |this: &mut Self, + src: &super::Texture, + dst: &super::Buffer, + r: crate::BufferTextureCopy| { + let list = this.list.as_ref().unwrap(); + let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION { pResource: unsafe { borrow_interface_temporarily(&src.resource) }, Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, @@ -673,6 +781,39 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe { list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box)) }; + }; + + let offset_alignment = self.shared.private_caps.texture_data_placement_alignment(); + + for r in regions { + let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0; + if is_offset_aligned { + copy_aligned(self, src, dst, r) + } else { + let orig_offset = r.buffer_layout.offset; + let Some((intermediate_to_dst_region, src)) = (unsafe { + self.buf_tex_intermediate( + r, + src.format, + |this, buf, size, intermediate_region| { + copy_aligned(this, src, buf, intermediate_region); + crate::BufferCopy { + src_offset: orig_offset, + dst_offset: 0, + size, + } + }, + ) + }) else { + continue; + }; + + unsafe { + self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter()); + } + + self.intermediate_copy_bufs.push(src); + }; } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index d8bdd2b1d5f..b6476a60f7a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -778,6 +778,7 @@ impl crate::Device for super::Device { mem_allocator: self.mem_allocator.clone(), rtv_pool: Arc::clone(&self.rtv_pool), temp_rtv_handles: Vec::new(), + intermediate_copy_bufs: Vec::new(), null_rtv_handle: self.null_rtv_handle, list: None, free_lists: Vec::new(), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index a102006acb0..7d368038edb 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -95,7 +95,11 @@ use windows::{ core::{Free, Interface}, Win32::{ Foundation, - Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi}, + Graphics::{ + Direct3D, + Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT}, + DirectComposition, Dxgi, + }, System::Threading, }, }; @@ -581,6 +585,17 @@ struct PrivateCapabilities { suballocation_supported: bool, shader_model: naga::back::hlsl::ShaderModel, max_sampler_descriptor_heap_size: u32, + unrestricted_buffer_texture_copy_pitch_supported: bool, +} + +impl PrivateCapabilities { + fn texture_data_placement_alignment(&self) -> u64 { + if self.unrestricted_buffer_texture_copy_pitch_supported { + 4 + } else { + D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into() + } + } } #[derive(Default)] @@ -816,6 +831,8 @@ pub struct CommandEncoder { rtv_pool: Arc>, temp_rtv_handles: Vec, + intermediate_copy_bufs: Vec, + null_rtv_handle: descriptor::Handle, list: Option, free_lists: Vec, diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index e6b4e0b0f89..7e9a357f588 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2430,6 +2430,36 @@ pub struct CopyExtent { pub depth: u32, } +impl From for CopyExtent { + fn from(value: wgt::Extent3d) -> Self { + let wgt::Extent3d { + width, + height, + depth_or_array_layers, + } = value; + Self { + width, + height, + depth: depth_or_array_layers, + } + } +} + +impl From for wgt::Extent3d { + fn from(value: CopyExtent) -> Self { + let CopyExtent { + width, + height, + depth, + } = value; + Self { + width, + height, + depth_or_array_layers: depth, + } + } +} + #[derive(Clone, Debug)] pub struct TextureCopy { pub src_base: TextureCopyBase,