Skip to content

Commit 60d2d96

Browse files
WIP: fix(dx12): align tex. <-> buf. copies via intermediate buffer if !UnrestrictedBufferTextureCopyPitchSupported
TODO: resolve `TODO` comments
1 parent e13a3e1 commit 60d2d96

File tree

5 files changed

+155
-18
lines changed

5 files changed

+155
-18
lines changed

tests/tests/wgpu-gpu/regression/issue_6827.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
1919
.expect_fail(FailureCase::backend_adapter(
2020
wgpu::Backends::METAL,
2121
"Apple Paravirtual device", // CI on M1
22-
))
23-
.expect_fail(
24-
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
25-
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
26-
FailureCase::backend(wgpu::Backends::DX12)
27-
.flaky()
28-
.validation_error(
29-
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
30-
)
31-
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
32-
),
22+
)),
3323
)
3424
.run_async(|ctx| async move { run_test(ctx, true).await });
3525

wgpu-hal/src/dx12/adapter.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,7 @@ impl super::Adapter {
317317
suballocation_supported: !info.name.contains("Iris(R) Xe"),
318318
shader_model,
319319
max_sampler_descriptor_heap_size,
320-
_unrestricted_buffer_texture_copy_pitch_supported:
321-
unrestricted_buffer_texture_copy_pitch_supported,
320+
unrestricted_buffer_texture_copy_pitch_supported,
322321
};
323322

324323
// Theoretically vram limited, but in practice 2^20 is the limit
@@ -682,6 +681,7 @@ impl crate::Adapter for super::Adapter {
682681
queue: super::Queue {
683682
raw: queue,
684683
temp_lists: Mutex::new(Vec::new()),
684+
intermediate_copy_bufs: Mutex::new(Vec::new()),
685685
},
686686
})
687687
}

wgpu-hal/src/dx12/command.rs

Lines changed: 128 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
dxgi::{name::ObjectExt, result::HResult as _},
1515
},
1616
dx12::borrow_interface_temporarily,
17-
AccelerationStructureEntries,
17+
AccelerationStructureEntries, CommandEncoder as _,
1818
};
1919

2020
fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -312,6 +312,78 @@ impl super::CommandEncoder {
312312
}
313313
}
314314
}
315+
316+
unsafe fn buf_tex_intermediate<T>(
317+
&mut self,
318+
region: crate::BufferTextureCopy,
319+
tex_fmt: wgt::TextureFormat,
320+
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
321+
) -> Option<(T, super::Buffer)> {
322+
let size = {
323+
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
324+
tex_fmt,
325+
region.texture_base.aspect.map(),
326+
&region.size.into(),
327+
);
328+
copy_info.bytes_in_copy
329+
};
330+
331+
let size = wgt::BufferSize::new(size)?;
332+
333+
let buffer = {
334+
let (resource, allocation) =
335+
super::suballocation::DeviceAllocationContext::from(&*self)
336+
.create_buffer(&crate::BufferDescriptor {
337+
label: None,
338+
size: size.get(),
339+
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
340+
memory_flags: crate::MemoryFlags::empty(),
341+
})
342+
.expect(concat!(
343+
"internal error: ",
344+
"failed to allocate intermediate buffer ",
345+
"for offset alignment"
346+
));
347+
super::Buffer {
348+
resource,
349+
size: size.get(),
350+
allocation,
351+
}
352+
};
353+
354+
let mut region = region;
355+
region.buffer_layout.offset = 0;
356+
357+
unsafe {
358+
self.transition_buffers(
359+
[crate::BufferBarrier {
360+
buffer: &buffer,
361+
usage: crate::StateTransition {
362+
from: wgt::BufferUses::empty(),
363+
to: wgt::BufferUses::COPY_DST,
364+
},
365+
}]
366+
.into_iter(),
367+
)
368+
};
369+
370+
let t = copy_op(self, &buffer, size, region);
371+
372+
unsafe {
373+
self.transition_buffers(
374+
[crate::BufferBarrier {
375+
buffer: &buffer,
376+
usage: crate::StateTransition {
377+
from: wgt::BufferUses::COPY_DST,
378+
to: wgt::BufferUses::COPY_SRC,
379+
},
380+
}]
381+
.into_iter(),
382+
)
383+
};
384+
385+
Some((t, buffer))
386+
}
315387
}
316388

317389
impl crate::CommandEncoder for super::CommandEncoder {
@@ -363,7 +435,10 @@ impl crate::CommandEncoder for super::CommandEncoder {
363435
unsafe fn end_encoding(&mut self) -> Result<super::CommandBuffer, crate::DeviceError> {
364436
let raw = self.list.take().unwrap();
365437
unsafe { raw.Close() }.into_device_result("GraphicsCommandList::close")?;
366-
Ok(super::CommandBuffer { raw })
438+
Ok(super::CommandBuffer {
439+
raw,
440+
intermediate_copy_bufs: mem::take(&mut self.intermediate_copy_bufs).into(),
441+
})
367442
}
368443
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
369444
for cmd_buf in command_buffers {
@@ -612,7 +687,32 @@ impl crate::CommandEncoder for super::CommandEncoder {
612687
) where
613688
T: Iterator<Item = crate::BufferTextureCopy>,
614689
{
690+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
691+
615692
for r in regions {
693+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
694+
let (r, src) = if is_offset_aligned {
695+
(r, src)
696+
} else {
697+
let Some((r, src)) = (unsafe {
698+
let src_offset = r.buffer_layout.offset;
699+
self.buf_tex_intermediate(r, dst.format, |this, buf, size, r| {
700+
let layout = crate::BufferCopy {
701+
src_offset,
702+
dst_offset: 0,
703+
size,
704+
};
705+
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
706+
r
707+
})
708+
}) else {
709+
continue;
710+
};
711+
self.intermediate_copy_bufs.push(src);
712+
let src = self.intermediate_copy_bufs.last().unwrap();
713+
(r, src)
714+
};
715+
616716
let list = self.list.as_ref().unwrap();
617717

618718
let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
@@ -680,8 +780,33 @@ impl crate::CommandEncoder for super::CommandEncoder {
680780
};
681781
};
682782

783+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
784+
683785
for r in regions {
684-
copy_aligned(this, src, dst, r);
786+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
787+
if is_offset_aligned {
788+
copy_aligned(self, src, dst, r)
789+
} else {
790+
let orig_offset = r.buffer_layout.offset;
791+
let Some((r, src)) = (unsafe {
792+
self.buf_tex_intermediate(r, src.format, |this, buf, size, r| {
793+
copy_aligned(this, src, buf, r);
794+
crate::BufferCopy {
795+
src_offset: orig_offset,
796+
dst_offset: 0,
797+
size,
798+
}
799+
})
800+
}) else {
801+
continue;
802+
};
803+
804+
unsafe {
805+
self.copy_buffer_to_buffer(&src, dst, [r].into_iter());
806+
}
807+
808+
self.intermediate_copy_bufs.push(src);
809+
};
685810
}
686811
}
687812

wgpu-hal/src/dx12/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@ impl crate::Device for super::Device {
732732
mem_allocator: self.mem_allocator.clone(),
733733
rtv_pool: Arc::clone(&self.rtv_pool),
734734
temp_rtv_handles: Vec::new(),
735+
intermediate_copy_bufs: Vec::new(),
735736
null_rtv_handle: self.null_rtv_handle,
736737
list: None,
737738
free_lists: Vec::new(),

wgpu-hal/src/dx12/mod.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ use windows::{
9494
core::{Free, Interface},
9595
Win32::{
9696
Foundation,
97-
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
97+
Graphics::{
98+
Direct3D,
99+
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
100+
DirectComposition, Dxgi,
101+
},
98102
System::Threading,
99103
},
100104
};
@@ -573,7 +577,17 @@ struct PrivateCapabilities {
573577
suballocation_supported: bool,
574578
shader_model: naga::back::hlsl::ShaderModel,
575579
max_sampler_descriptor_heap_size: u32,
576-
_unrestricted_buffer_texture_copy_pitch_supported: bool,
580+
unrestricted_buffer_texture_copy_pitch_supported: bool,
581+
}
582+
583+
impl PrivateCapabilities {
584+
fn texture_data_placement_alignment(&self) -> u64 {
585+
if self.unrestricted_buffer_texture_copy_pitch_supported {
586+
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
587+
} else {
588+
4
589+
}
590+
}
577591
}
578592

579593
#[derive(Default)]
@@ -680,6 +694,7 @@ unsafe impl Sync for Device {}
680694
pub struct Queue {
681695
raw: Direct3D12::ID3D12CommandQueue,
682696
temp_lists: Mutex<Vec<Option<Direct3D12::ID3D12CommandList>>>,
697+
intermediate_copy_bufs: Mutex<Vec<Arc<Vec<Buffer>>>>,
683698
}
684699

685700
impl Queue {
@@ -802,6 +817,8 @@ pub struct CommandEncoder {
802817
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
803818
temp_rtv_handles: Vec<descriptor::Handle>,
804819

820+
intermediate_copy_bufs: Vec<Buffer>,
821+
805822
null_rtv_handle: descriptor::Handle,
806823
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
807824
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
@@ -830,6 +847,7 @@ impl fmt::Debug for CommandEncoder {
830847
#[derive(Debug)]
831848
pub struct CommandBuffer {
832849
raw: Direct3D12::ID3D12GraphicsCommandList,
850+
intermediate_copy_bufs: Arc<Vec<Buffer>>,
833851
}
834852

835853
impl crate::DynCommandBuffer for CommandBuffer {}
@@ -1414,8 +1432,11 @@ impl crate::Queue for Queue {
14141432
) -> Result<(), crate::DeviceError> {
14151433
let mut temp_lists = self.temp_lists.lock();
14161434
temp_lists.clear();
1435+
let mut intermediate_copy_bufs = self.intermediate_copy_bufs.lock();
14171436
for cmd_buf in command_buffers {
14181437
temp_lists.push(Some(cmd_buf.raw.clone().into()));
1438+
intermediate_copy_bufs.push(Arc::clone(&cmd_buf.intermediate_copy_bufs));
1439+
// TODO: When to clean accumulated copy buffers up?
14191440
}
14201441

14211442
{

0 commit comments

Comments
 (0)