Skip to content

Commit c7718d0

Browse files
bors[bot]kvark
andcommitted
Merge #2264
2264: Render pass descriptor cache for Metal r=grovesNL a=kvark ~~Includes #2260~~ Fixes two of the performance issues in #2161 (RP desc locking and copying costs). Immediate recording FPS doesn't seem to change, maybe slightly lower (touching 90 from below more than from above, as I recall it doing - need to confirm. Edit - confirmed to not be caused by the PR). Deferred recording FPS seem to go from lower 100s to higher, or even from 100 to 110, roughly speaking. There are barely any bottlenecks left for it, outside of the general architecture. Main thread now spends about 14.5% in our code, which is mostly covered by driver interaction. PR checklist: - [ ] `make` succeeds (on *nix) - [x] `make reftests` succeeds - [x] tested examples with the following backends: Metal - [ ] `rustfmt` run on changed code Co-authored-by: Dzmitry Malyshau <kvarkus@gmail.com>
2 parents 35f2348 + 867315c commit c7718d0

4 files changed

Lines changed: 149 additions & 93 deletions

File tree

src/backend/metal/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ dispatch = "0.1"
3434
smallvec = "0.6"
3535
spirv_cross = "0.9"
3636
parking_lot = "0.6.3"
37-
storage-map = "0.1"
37+
storage-map = "0.1.1"

src/backend/metal/src/command.rs

Lines changed: 77 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -989,13 +989,7 @@ impl CommandSink {
989989
}
990990
}
991991
CommandSink::Deferred { ref mut is_encoding, ref mut journal } => {
992-
//Note: the original descriptor belongs to the framebuffer,
993-
// and will me mutated afterwards.
994-
let pass = soft::Pass::Render( unsafe {
995-
let desc: metal::RenderPassDescriptor = msg_send![descriptor, copy];
996-
msg_send![desc.as_ptr(), retain];
997-
desc
998-
});
992+
let pass = soft::Pass::Render(descriptor.to_owned());
999993
let mut range = journal.render_commands.len() .. 0;
1000994
journal.render_commands.extend(init_commands.map(soft::RenderCommand::own));
1001995
match door {
@@ -1005,14 +999,9 @@ impl CommandSink {
1005999
journal.passes.push((pass, range))
10061000
}
10071001
CommandSink::Remote { ref queue, ref cmd_buffer, ref mut pass, ref capacity, .. } => {
1008-
let desc = unsafe {
1009-
let desc: metal::RenderPassDescriptor = msg_send![descriptor, copy];
1010-
msg_send![desc.as_ptr(), retain];
1011-
desc
1012-
};
10131002
let mut list = Vec::with_capacity(capacity.render);
10141003
list.extend(init_commands.map(soft::RenderCommand::own));
1015-
let new_pass = EncodePass::Render(list, desc);
1004+
let new_pass = EncodePass::Render(list, descriptor.to_owned());
10161005
match door {
10171006
PassDoor::Open => *pass = Some(new_pass),
10181007
PassDoor::Closed { .. } => new_pass.schedule(queue, cmd_buffer),
@@ -2696,61 +2685,8 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
26962685
T::Item: Borrow<com::ClearValueRaw>,
26972686
{
26982687
// FIXME: subpasses
2699-
let _ap = AutoreleasePool::new();
2700-
2701-
// We are going to modify the RP descriptor here, so
2702-
// locking to avoid data races.
2703-
//TODO: if we know that we aren't in the `Immediate` recording mode,
2704-
// we can copy here right away and void the lock entirely.
2705-
let descriptor = framebuffer.descriptor.lock();
2706-
2707-
let mut num_colors = 0;
2708-
let mut full_aspects = Aspects::empty();
2709-
let mut inner = self.inner.borrow_mut();
2710-
2711-
let dummy_value = com::ClearValueRaw {
2712-
color: com:: ClearColorRaw {
2713-
int32: [0; 4],
2714-
},
2715-
};
2716-
let clear_values_iter = clear_values
2717-
.into_iter()
2718-
.map(|c| *c.borrow())
2719-
.chain(iter::repeat(dummy_value));
2720-
2721-
for (rat, clear_value) in render_pass.attachments.iter().zip(clear_values_iter) {
2722-
let (aspects, channel) = match rat.format {
2723-
Some(format) => (format.surface_desc().aspects, Channel::from(format.base_format().1)),
2724-
None => continue,
2725-
};
2726-
full_aspects |= aspects;
2727-
if aspects.contains(Aspects::COLOR) {
2728-
let color_desc = descriptor
2729-
.color_attachments()
2730-
.object_at(num_colors)
2731-
.unwrap();
2732-
if set_operations(color_desc, rat.ops) == AttachmentLoadOp::Clear {
2733-
let mtl_color = channel
2734-
.interpret(unsafe { clear_value.color });
2735-
color_desc.set_clear_color(mtl_color);
2736-
}
2737-
num_colors += 1;
2738-
}
2739-
if aspects.contains(Aspects::DEPTH) {
2740-
let depth_desc = descriptor.depth_attachment().unwrap();
2741-
if set_operations(depth_desc, rat.ops) == AttachmentLoadOp::Clear {
2742-
let mtl_depth = unsafe { clear_value.depth_stencil.depth as f64 };
2743-
depth_desc.set_clear_depth(mtl_depth);
2744-
}
2745-
}
2746-
if aspects.contains(Aspects::STENCIL) {
2747-
let stencil_desc = descriptor.stencil_attachment().unwrap();
2748-
if set_operations(stencil_desc, rat.stencil_ops) == AttachmentLoadOp::Clear {
2749-
let mtl_stencil = unsafe { clear_value.depth_stencil.stencil };
2750-
stencil_desc.set_clear_stencil(mtl_stencil);
2751-
}
2752-
}
2753-
}
2688+
let desc_guard;
2689+
let (rp_key, full_aspects) = render_pass.build_key(clear_values);
27542690

27552691
self.state.render_pso_is_compatible = match self.state.render_pso {
27562692
Some(ref ps) => ps.at_formats.len() == render_pass.attachments.len() &&
@@ -2759,6 +2695,7 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
27592695
};
27602696

27612697
self.state.framebuffer_inner = framebuffer.inner.clone();
2698+
27622699
let ds_store = &self.shared.service_pipes.depth_stencil_states;
27632700
let ds_state;
27642701
let com_ds = if full_aspects.intersects(Aspects::DEPTH | Aspects::STENCIL) {
@@ -2776,9 +2713,62 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
27762713
.make_render_commands(full_aspects)
27772714
.chain(com_ds);
27782715

2779-
inner
2716+
desc_guard = framebuffer.desc_storage
2717+
.get_or_create_with(&rp_key, || {
2718+
let _ap = AutoreleasePool::new();
2719+
let mut clear_id = 0;
2720+
let mut num_colors = 0;
2721+
let rp_desc = unsafe {
2722+
let desc: metal::RenderPassDescriptor = msg_send![framebuffer.descriptor, copy];
2723+
msg_send![desc.as_ptr(), retain];
2724+
desc
2725+
};
2726+
2727+
for rat in &render_pass.attachments {
2728+
let (aspects, channel) = match rat.format {
2729+
Some(format) => (format.surface_desc().aspects, Channel::from(format.base_format().1)),
2730+
None => continue,
2731+
};
2732+
if aspects.contains(Aspects::COLOR) {
2733+
let color_desc = rp_desc
2734+
.color_attachments()
2735+
.object_at(num_colors)
2736+
.unwrap();
2737+
if set_operations(color_desc, rat.ops) == AttachmentLoadOp::Clear {
2738+
let d = &rp_key.clear_data[clear_id .. clear_id + 4];
2739+
clear_id += 4;
2740+
let raw = com::ClearColorRaw {
2741+
uint32: [d[0], d[1], d[2], d[3]],
2742+
};
2743+
color_desc.set_clear_color(channel.interpret(raw));
2744+
}
2745+
num_colors += 1;
2746+
}
2747+
if aspects.contains(Aspects::DEPTH) {
2748+
let depth_desc = rp_desc.depth_attachment().unwrap();
2749+
if set_operations(depth_desc, rat.ops) == AttachmentLoadOp::Clear {
2750+
let raw = unsafe { *(&rp_key.clear_data[clear_id] as *const _ as *const f32) };
2751+
clear_id += 1;
2752+
depth_desc.set_clear_depth(raw as f64);
2753+
}
2754+
}
2755+
if aspects.contains(Aspects::STENCIL) {
2756+
let stencil_desc = rp_desc.stencil_attachment().unwrap();
2757+
if set_operations(stencil_desc, rat.stencil_ops) == AttachmentLoadOp::Clear {
2758+
let raw = rp_key.clear_data[clear_id];
2759+
clear_id += 1;
2760+
stencil_desc.set_clear_stencil(raw);
2761+
}
2762+
}
2763+
}
2764+
2765+
rp_desc
2766+
});
2767+
2768+
self.inner
2769+
.borrow_mut()
27802770
.sink()
2781-
.begin_render_pass(PassDoor::Open, &*descriptor, init_commands);
2771+
.begin_render_pass(PassDoor::Open, &**desc_guard, init_commands);
27822772
}
27832773

27842774
fn next_subpass(&mut self, _contents: com::SubpassContents) {
@@ -2821,38 +2811,39 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
28212811
let mut pre = inner.sink().pre_render();
28222812

28232813
self.state.render_pso_is_compatible = true; //assume good intent :)
2824-
let mut set_pipeline = false;
2825-
match self.state.render_pso {
2814+
let set_pipeline = match self.state.render_pso {
2815+
Some(ref ps) if ps.raw.as_ptr() == pipeline.raw.as_ptr() => {
2816+
false // chill out
2817+
}
28262818
Some(ref mut ps) => {
2827-
// try to avoid extra states or new heap allocations
2828-
if ps.raw.as_ptr() != pipeline.raw.as_ptr() {
2829-
ps.raw = pipeline.raw.to_owned();
2830-
set_pipeline = true;
2831-
}
2832-
ps.ds_desc = pipeline.depth_stencil_desc.clone();
2819+
ps.raw = pipeline.raw.to_owned();
28332820
ps.vbuf_map.clear();
28342821
ps.vbuf_map.extend(&pipeline.vertex_buffer_map);
2822+
ps.ds_desc = pipeline.depth_stencil_desc.clone();
28352823
ps.at_formats.clear();
28362824
ps.at_formats.extend_from_slice(&pipeline.attachment_formats);
2825+
true
28372826
}
28382827
None => {
2839-
set_pipeline = true;
28402828
self.state.render_pso = Some(RenderPipelineState {
28412829
raw: pipeline.raw.to_owned(),
28422830
ds_desc: pipeline.depth_stencil_desc.clone(),
28432831
vbuf_map: pipeline.vertex_buffer_map.clone(),
28442832
at_formats: pipeline.attachment_formats.clone(),
28452833
});
2834+
true
28462835
}
2847-
}
2836+
};
28482837
if set_pipeline {
28492838
pre.issue(soft::RenderCommand::BindPipeline(&*pipeline.raw));
2850-
}
2851-
2852-
self.state.rasterizer_state = pipeline.rasterizer_state.clone();
2853-
self.state.primitive_type = pipeline.primitive_type;
2854-
if let Some(ref rs) = pipeline.rasterizer_state {
2855-
pre.issue(soft::RenderCommand::SetRasterizerState(rs.clone()))
2839+
self.state.rasterizer_state = pipeline.rasterizer_state.clone();
2840+
self.state.primitive_type = pipeline.primitive_type;
2841+
if let Some(ref rs) = pipeline.rasterizer_state {
2842+
pre.issue(soft::RenderCommand::SetRasterizerState(rs.clone()))
2843+
}
2844+
} else {
2845+
debug_assert_eq!(self.state.rasterizer_state, pipeline.rasterizer_state);
2846+
debug_assert_eq!(self.state.primitive_type, pipeline.primitive_type);
28562847
}
28572848

28582849
if let Some(desc) = self.state.build_depth_stencil() {

src/backend/metal/src/device.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use {
33
Shared, Surface, Swapchain, validate_line_width, BufferPtr, SamplerPtr, TexturePtr,
44
};
55
use {conversions as conv, command, native as n};
6+
use internal::FastStorageMap;
67
use native;
78
use range_alloc::RangeAllocator;
89

@@ -1095,7 +1096,8 @@ impl hal::Device<Backend> for Device {
10951096
}
10961097

10971098
Ok(n::Framebuffer {
1098-
descriptor: Mutex::new(descriptor),
1099+
descriptor,
1100+
desc_storage: FastStorageMap::default(),
10991101
inner,
11001102
})
11011103
}

src/backend/metal/src/native.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
use {Backend, BufferPtr, SamplerPtr, TexturePtr};
2-
use internal::Channel;
2+
use internal::{Channel, FastStorageMap};
33
use range_alloc::RangeAllocator;
44
use window::SwapchainImage;
55

6+
use std::borrow::Borrow;
67
use std::cell::RefCell;
7-
use std::fmt;
8+
use std::{fmt, iter};
89
use std::ops::Range;
910
use std::os::raw::{c_void, c_long};
1011
use std::sync::Arc;
1112

1213
use hal::{self, image, pso};
1314
use hal::backend::FastHashMap;
15+
use hal::command::{ClearColorRaw, ClearValueRaw};
1416
use hal::format::{Aspects, Format, FormatDesc};
17+
use hal::pass::{Attachment, AttachmentLoadOp, AttachmentOps};
1518

1619
use cocoa::foundation::{NSUInteger};
1720
use foreign_types::ForeignType;
@@ -49,14 +52,73 @@ impl fmt::Debug for ShaderModule {
4952
unsafe impl Send for ShaderModule {}
5053
unsafe impl Sync for ShaderModule {}
5154

55+
#[derive(Clone, Debug, Default, Hash, PartialEq, Eq)]
56+
pub struct RenderPassKey {
57+
// enough room for 4 color targets + depth/stencil
58+
operations: SmallVec<[AttachmentOps; 5]>,
59+
pub clear_data: SmallVec<[u32; 10]>,
60+
}
61+
5262
#[derive(Debug)]
5363
pub struct RenderPass {
54-
pub(crate) attachments: Vec<hal::pass::Attachment>,
64+
pub(crate) attachments: Vec<Attachment>,
5565
}
5666

5767
unsafe impl Send for RenderPass {}
5868
unsafe impl Sync for RenderPass {}
5969

70+
impl RenderPass {
71+
pub fn build_key<T>(&self, clear_values: T) -> (RenderPassKey, Aspects)
72+
where
73+
T: IntoIterator,
74+
T::Item: Borrow<ClearValueRaw>,
75+
{
76+
let mut key = RenderPassKey::default();
77+
let mut full_aspects = Aspects::empty();
78+
79+
let dummy_value = ClearValueRaw {
80+
color: ClearColorRaw {
81+
int32: [0; 4],
82+
},
83+
};
84+
let clear_values_iter = clear_values
85+
.into_iter()
86+
.map(|c| *c.borrow())
87+
.chain(iter::repeat(dummy_value));
88+
89+
for (rat, clear_value) in self.attachments.iter().zip(clear_values_iter) {
90+
//TODO: avoid calling `surface_desc` as often
91+
let aspects = match rat.format {
92+
Some(format) => format.surface_desc().aspects,
93+
None => continue,
94+
};
95+
full_aspects |= aspects;
96+
let cv = clear_value.borrow();
97+
98+
if aspects.contains(Aspects::COLOR) {
99+
key.operations.push(rat.ops);
100+
if rat.ops.load == AttachmentLoadOp::Clear {
101+
key.clear_data.extend_from_slice(unsafe { &cv.color.uint32 });
102+
}
103+
}
104+
if aspects.contains(Aspects::DEPTH) {
105+
key.operations.push(rat.ops);
106+
if rat.ops.load == AttachmentLoadOp::Clear {
107+
key.clear_data.push(unsafe { *(&cv.depth_stencil.depth as *const _ as *const u32) });
108+
}
109+
}
110+
if aspects.contains(Aspects::STENCIL) {
111+
key.operations.push(rat.stencil_ops);
112+
if rat.stencil_ops.load == AttachmentLoadOp::Clear {
113+
key.clear_data.push(unsafe { cv.depth_stencil.stencil });
114+
}
115+
}
116+
}
117+
118+
(key, full_aspects)
119+
}
120+
}
121+
60122
#[derive(Clone, Debug)]
61123
pub struct ColorAttachment {
62124
pub mtl_format: metal::MTLPixelFormat,
@@ -73,7 +135,8 @@ pub struct FramebufferInner {
73135

74136
#[derive(Debug)]
75137
pub struct Framebuffer {
76-
pub(crate) descriptor: Mutex<metal::RenderPassDescriptor>,
138+
pub(crate) descriptor: metal::RenderPassDescriptor,
139+
pub(crate) desc_storage: FastStorageMap<RenderPassKey, metal::RenderPassDescriptor>,
77140
pub(crate) inner: FramebufferInner,
78141
}
79142

@@ -115,7 +178,7 @@ impl PipelineLayout {
115178
}
116179
}
117180

118-
#[derive(Clone, Debug)]
181+
#[derive(Clone, Debug, PartialEq)]
119182
pub struct RasterizerState {
120183
//TODO: more states
121184
pub front_winding: metal::MTLWinding,

0 commit comments

Comments
 (0)