Skip to content

Commit cba77d6

Browse files
bors[bot]kvark
andcommitted
Merge #2185
2185: Various Metal performance optimizations r=grovesNL a=kvark Helps #2161 . I'm now getting 80-85 fps on the test run. Aside from Metal, also changes HAL to avoid heap allocation for vertex buffer binding. PR checklist: - [x] `make` succeeds (on *nix) - [x] `make reftests` succeeds - [x] tested examples with the following backends: metal Co-authored-by: Dzmitry Malyshau <[email protected]>
2 parents bf3948a + 96871af commit cba77d6

File tree

15 files changed

+152
-106
lines changed

15 files changed

+152
-106
lines changed

Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ else
3636
endif
3737

3838

39-
.PHONY: all check test reftests travis-sdl2
39+
.PHONY: all check quad test reftests travis-sdl2
4040

4141
all: check test
4242

@@ -62,6 +62,9 @@ reftests-ci:
6262
cd src/warden && cargo test --features "gl"
6363
cd src/warden && cargo run --features "gl" -- ci #TODO: "gl-headless"
6464

65+
quad:
66+
cd examples && cargo run --bin quad --features ${FEATURES_HAL}
67+
6568
travis-sdl2:
6669
#TODO
6770
#if [ -e $(SDL2_CONFIG) ]; then exit 1; fi

examples/quad/main.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ use hal::pso::{PipelineStage, ShaderStageFlags, Specialization};
3030
use hal::queue::Submission;
3131

3232
use std::fs;
33-
use std::io::Cursor;
34-
use std::io::Read;
33+
use std::io::{Cursor, Read};
34+
3535

3636
const ENTRY_NAME: &str = "main";
3737

@@ -480,7 +480,7 @@ fn main() {
480480
cmd_buffer.set_viewports(0, &[viewport.clone()]);
481481
cmd_buffer.set_scissors(0, &[viewport.rect]);
482482
cmd_buffer.bind_graphics_pipeline(&pipeline);
483-
cmd_buffer.bind_vertex_buffers(0, pso::VertexBufferSet(vec![(&vertex_buffer, 0)]));
483+
cmd_buffer.bind_vertex_buffers(0, Some((&vertex_buffer, 0)));
484484
cmd_buffer.bind_graphics_descriptor_sets(&pipeline_layout, 0, Some(&desc_set), &[]); //TODO
485485

486486
{

src/backend/dx11/src/lib.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -866,9 +866,14 @@ impl hal::command::RawCommandBuffer<Backend> for CommandBuffer {
866866
}
867867
}
868868

869-
fn bind_vertex_buffers(&mut self, first_binding: u32, vbs: pso::VertexBufferSet<Backend>) {
870-
let (buffers, offsets): (Vec<*mut d3d11::ID3D11Buffer>, Vec<u32>) = vbs.0.iter()
871-
.map(|(buf, offset)| (buf.internal.raw, *offset as u32))
869+
fn bind_vertex_buffers<I, T>(&mut self, first_binding: u32, buffers: I)
870+
where
871+
I: IntoIterator<Item = (T, buffer::Offset)>,
872+
T: Borrow<Buffer>,
873+
{
874+
let (buffers, offsets): (Vec<*mut d3d11::ID3D11Buffer>, Vec<u32>) = buffers
875+
.into_iter()
876+
.map(|(buf, offset)| (buf.borrow().internal.raw, offset as u32))
872877
.unzip();
873878

874879
// TODO: strides

src/backend/dx12/src/command.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -1634,15 +1634,21 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
16341634
}
16351635
}
16361636

1637-
fn bind_vertex_buffers(&mut self, first_binding: u32, vbs: pso::VertexBufferSet<Backend>) {
1637+
fn bind_vertex_buffers<I, T>(&mut self, first_binding: u32, buffers: I)
1638+
where
1639+
I: IntoIterator<Item = (T, buffer::Offset)>,
1640+
T: Borrow<n::Buffer>,
1641+
{
16381642
// Only cache the vertex buffer views as we don't know the stride (PSO).
16391643
assert!(first_binding as usize <= MAX_VERTEX_BUFFERS);
1640-
for (&(buffer, offset), view) in vbs.0.iter()
1644+
for ((buffer, offset), view) in buffers
1645+
.into_iter()
16411646
.zip(self.vertex_buffer_views[first_binding as _..].iter_mut())
16421647
{
1643-
let base = unsafe { (*buffer.resource).GetGPUVirtualAddress() };
1644-
view.BufferLocation = base + offset as u64;
1645-
view.SizeInBytes = buffer.size_in_bytes - offset as u32;
1648+
let b = buffer.borrow();
1649+
let base = unsafe { (*b.resource).GetGPUVirtualAddress() };
1650+
view.BufferLocation = base + offset;
1651+
view.SizeInBytes = b.size_in_bytes - offset as u32;
16461652
}
16471653
}
16481654

src/backend/empty/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,11 @@ impl command::RawCommandBuffer<Backend> for RawCommandBuffer {
499499
unimplemented!()
500500
}
501501

502-
fn bind_vertex_buffers(&mut self, _: u32, _: pso::VertexBufferSet<Backend>) {
502+
fn bind_vertex_buffers<I, T>(&mut self, _: u32, _: I)
503+
where
504+
I: IntoIterator<Item = (T, buffer::Offset)>,
505+
T: Borrow<()>,
506+
{
503507
unimplemented!()
504508
}
505509

src/backend/gl/src/command.rs

+14-11
Original file line numberDiff line numberDiff line change
@@ -698,17 +698,20 @@ impl command::RawCommandBuffer<Backend> for RawCommandBuffer {
698698
self.push_cmd(Command::BindIndexBuffer(ibv.buffer.raw));
699699
}
700700

701-
fn bind_vertex_buffers(&mut self, _first_binding: u32, vbs: hal::pso::VertexBufferSet<Backend>) {
702-
if vbs.0.len() == 0 {
703-
return
704-
}
705-
706-
let needed_length = vbs.0.iter().map(|vb| vb.1).max().unwrap() + 1;
707-
708-
self.cache.vertex_buffers.resize(needed_length as usize, 0);
709-
710-
for vb in vbs.0 {
711-
self.cache.vertex_buffers[vb.1 as usize] = vb.0.raw;
701+
fn bind_vertex_buffers<I, T>(&mut self, first_binding: u32, buffers: I)
702+
where
703+
I: IntoIterator<Item = (T, buffer::Offset)>,
704+
T: Borrow<n::Buffer>,
705+
{
706+
for (i, (buffer, offset)) in buffers.into_iter().enumerate() {
707+
let index = first_binding as usize + i;
708+
if self.cache.vertex_buffers.len() <= index {
709+
self.cache.vertex_buffers.resize(index+1, 0);
710+
}
711+
self.cache.vertex_buffers[index] = buffer.borrow().raw;
712+
if offset != 0 {
713+
error!("Vertex buffer offset {} is not supported", offset);
714+
}
712715
}
713716
}
714717

src/backend/metal/src/command.rs

+56-35
Original file line numberDiff line numberDiff line change
@@ -821,11 +821,11 @@ impl CommandSink {
821821
) where
822822
I: Iterator<Item = soft::RenderCommand<&'a soft::Own>>,
823823
{
824+
//assert!(AutoReleasePool::is_active());
824825
self.stop_encoding();
825826

826827
match *self {
827828
CommandSink::Immediate { ref cmd_buffer, ref mut encoder_state, .. } => {
828-
let _ap = AutoreleasePool::new();
829829
let encoder = cmd_buffer.new_render_command_encoder(descriptor);
830830
for command in init_commands {
831831
exec_render(encoder, command);
@@ -1446,7 +1446,7 @@ impl pool::RawCommandPool<Backend> for CommandPool {
14461446
framebuffer_inner: native::FramebufferInner {
14471447
extent: Extent::default(),
14481448
aspects: Aspects::empty(),
1449-
colors: Vec::new(),
1449+
colors: SmallVec::new(),
14501450
depth_stencil: None,
14511451
}
14521452
},
@@ -1653,6 +1653,8 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
16531653
T: IntoIterator,
16541654
T::Item: Borrow<SubresourceRange>,
16551655
{
1656+
let _ap = AutoreleasePool::new();
1657+
16561658
let CommandBufferInner {
16571659
ref mut retained_textures,
16581660
ref mut sink,
@@ -1690,51 +1692,63 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
16901692
&*image.raw
16911693
};
16921694

1693-
let clear_color_attachment = sub.aspects.contains(Aspects::COLOR);
1694-
if image.format_desc.aspects.contains(Aspects::COLOR) {
1695+
let color_attachment = if image.format_desc.aspects.contains(Aspects::COLOR) {
16951696
let attachment = descriptor
16961697
.color_attachments()
16971698
.object_at(0)
16981699
.unwrap();
16991700
attachment.set_texture(Some(texture));
17001701
attachment.set_store_action(metal::MTLStoreAction::Store);
1701-
if clear_color_attachment {
1702+
if sub.aspects.contains(Aspects::COLOR) {
17021703
attachment.set_load_action(metal::MTLLoadAction::Clear);
17031704
attachment.set_clear_color(clear_color.clone());
1705+
Some(attachment)
17041706
} else {
17051707
attachment.set_load_action(metal::MTLLoadAction::Load);
1708+
None
17061709
}
1707-
}
1710+
} else {
1711+
assert!(!sub.aspects.contains(Aspects::COLOR));
1712+
None
1713+
};
17081714

1709-
let clear_depth_attachment = sub.aspects.contains(Aspects::DEPTH);
1710-
if image.format_desc.aspects.contains(Aspects::DEPTH) {
1715+
let depth_attachment = if image.format_desc.aspects.contains(Aspects::DEPTH) {
17111716
let attachment = descriptor
17121717
.depth_attachment()
17131718
.unwrap();
17141719
attachment.set_texture(Some(texture));
17151720
attachment.set_store_action(metal::MTLStoreAction::Store);
1716-
if clear_depth_attachment {
1721+
if sub.aspects.contains(Aspects::DEPTH) {
17171722
attachment.set_load_action(metal::MTLLoadAction::Clear);
17181723
attachment.set_clear_depth(depth_stencil.depth as _);
1724+
Some(attachment)
17191725
} else {
17201726
attachment.set_load_action(metal::MTLLoadAction::Load);
1727+
None
17211728
}
1722-
}
1729+
} else {
1730+
assert!(!sub.aspects.contains(Aspects::DEPTH));
1731+
None
1732+
};
17231733

1724-
let clear_stencil_attachment = sub.aspects.contains(Aspects::STENCIL);
1725-
if image.format_desc.aspects.contains(Aspects::STENCIL) {
1734+
let stencil_attachment = if image.format_desc.aspects.contains(Aspects::STENCIL) {
17261735
let attachment = descriptor
17271736
.stencil_attachment()
17281737
.unwrap();
17291738
attachment.set_texture(Some(texture));
17301739
attachment.set_store_action(metal::MTLStoreAction::Store);
1731-
if clear_stencil_attachment {
1740+
if sub.aspects.contains(Aspects::STENCIL) {
17321741
attachment.set_load_action(metal::MTLLoadAction::Clear);
17331742
attachment.set_clear_stencil(depth_stencil.stencil);
1743+
Some(attachment)
17341744
} else {
17351745
attachment.set_load_action(metal::MTLLoadAction::Load);
1746+
None
17361747
}
1737-
}
1748+
} else {
1749+
assert!(!sub.aspects.contains(Aspects::STENCIL));
1750+
None
1751+
};
17381752

17391753
for layer in layers {
17401754
for level in sub.levels.clone() {
@@ -1746,29 +1760,19 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
17461760
descriptor.set_render_target_array_length(num_layers);
17471761
};
17481762

1749-
if clear_color_attachment {
1750-
let attachment = descriptor
1751-
.color_attachments()
1752-
.object_at(0)
1753-
.unwrap();
1763+
if let Some(attachment) = color_attachment {
17541764
attachment.set_level(level as _);
17551765
if !CLEAR_IMAGE_ARRAY {
17561766
attachment.set_slice(layer as _);
17571767
}
17581768
}
1759-
if clear_depth_attachment {
1760-
let attachment = descriptor
1761-
.depth_attachment()
1762-
.unwrap();
1769+
if let Some(attachment) = depth_attachment {
17631770
attachment.set_level(level as _);
17641771
if !CLEAR_IMAGE_ARRAY {
17651772
attachment.set_slice(layer as _);
17661773
}
17671774
}
1768-
if clear_stencil_attachment {
1769-
let attachment = descriptor
1770-
.stencil_attachment()
1771-
.unwrap();
1775+
if let Some(attachment) = stencil_attachment {
17721776
attachment.set_level(level as _);
17731777
if !CLEAR_IMAGE_ARRAY {
17741778
attachment.set_slice(layer as _);
@@ -2015,6 +2019,8 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
20152019
T: IntoIterator,
20162020
T::Item: Borrow<com::ImageBlit>
20172021
{
2022+
let _ap = AutoreleasePool::new();
2023+
20182024
let vertices = &mut self.temp.blit_vertices;
20192025
vertices.clear();
20202026

@@ -2218,7 +2224,9 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
22182224
.chain(&extra)
22192225
.cloned();
22202226

2221-
inner.sink().begin_render_pass(false, &descriptor, commands);
2227+
inner
2228+
.sink()
2229+
.begin_render_pass(false, &descriptor, commands);
22222230
}
22232231
}
22242232

@@ -2233,13 +2241,26 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
22332241
});
22342242
}
22352243

2236-
fn bind_vertex_buffers(&mut self, first_binding: u32, buffer_set: pso::VertexBufferSet<Backend>) {
2237-
while self.state.vertex_buffers.len() < first_binding as usize + buffer_set.0.len() {
2238-
self.state.vertex_buffers.push(None);
2239-
}
2240-
for (i, &(buffer, offset)) in buffer_set.0.iter().enumerate() {
2241-
let buffer_ptr = BufferPtr(buffer.raw.as_ptr());
2242-
self.state.vertex_buffers[first_binding as usize + i] = Some((buffer_ptr, buffer.range.start + offset));
2244+
2245+
fn bind_vertex_buffers<I, T>(&mut self, first_binding: u32, buffers: I)
2246+
where
2247+
I: IntoIterator<Item = (T, buffer::Offset)>,
2248+
T: Borrow<native::Buffer>,
2249+
{
2250+
if self.state.vertex_buffers.len() <= first_binding as usize {
2251+
self.state.vertex_buffers.resize(first_binding as usize + 1, None);
2252+
}
2253+
for (i, (buffer, offset)) in buffers.into_iter().enumerate() {
2254+
let b = buffer.borrow();
2255+
let buffer_ptr = BufferPtr(b.raw.as_ptr());
2256+
let index = first_binding as usize + i;
2257+
let value = Some((buffer_ptr, b.range.start + offset));
2258+
if index >= self.state.vertex_buffers.len() {
2259+
debug_assert_eq!(index, self.state.vertex_buffers.len());
2260+
self.state.vertex_buffers.push(value);
2261+
} else {
2262+
self.state.vertex_buffers[index] = value;
2263+
}
22432264
}
22442265

22452266
let mask = self.state.set_vertex_buffers();

0 commit comments

Comments
 (0)