Skip to content

Commit e304151

Browse files
committed
add a retry mechanism for waiting on the last submission in Queue::drop
1 parent f4a65c4 commit e304151

File tree

1 file changed

+66
-28
lines changed

1 file changed

+66
-28
lines changed

wgpu-core/src/device/queue.rs

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -149,37 +149,75 @@ impl Drop for Queue {
149149
.load(Ordering::Acquire);
150150

151151
let fence = self.device.fence.read();
152-
let wait_res = unsafe {
153-
self.device.raw().wait(
154-
fence.as_ref(),
155-
last_successful_submission_index,
156-
#[cfg(not(target_arch = "wasm32"))]
157-
crate::device::CLEANUP_WAIT_MS,
158-
#[cfg(target_arch = "wasm32")]
159-
0, // WebKit and Chromium don't support a non-0 timeout
160-
)
161-
};
162-
drop(fence);
163152

164-
match wait_res {
165-
Ok(true) => {}
166-
// Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU).
167-
Ok(false) => {
168-
// It's fine that we timed out on WebGL; GL objects can be deleted early as they
169-
// will be kept around by the driver if GPU work hasn't finished.
170-
// Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
171-
// backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
172-
// TODO: Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
173-
// we should find ways to improve this.
174-
#[cfg(not(target_arch = "wasm32"))]
175-
panic!("We timed out while waiting on the last successful submission to complete!");
176-
}
177-
Err(e) => {
178-
panic!(
179-
"We ran into an error while waiting on the last successful submission to complete! - {e}"
180-
);
153+
// Try waiting on the last submission using the following sequence of timeouts
154+
let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
155+
156+
for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
157+
let is_last_iter = i == timeouts_in_ms.len() - 1;
158+
159+
api_log!(
160+
"Waiting on last submission. try: {}/{}. timeout: {}ms",
161+
i + 1,
162+
timeouts_in_ms.len(),
163+
timeout_ms
164+
);
165+
166+
let wait_res = unsafe {
167+
self.device.raw().wait(
168+
fence.as_ref(),
169+
last_successful_submission_index,
170+
#[cfg(not(target_arch = "wasm32"))]
171+
timeout_ms,
172+
#[cfg(target_arch = "wasm32")]
173+
0, // WebKit and Chromium don't support a non-0 timeout
174+
)
175+
};
176+
// Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
177+
match wait_res {
178+
Ok(true) => break,
179+
Ok(false) => {
180+
// It's fine that we timed out on WebGL; GL objects can be deleted early as they
181+
// will be kept around by the driver if GPU work hasn't finished.
182+
// Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
183+
// backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
184+
// TODO: Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
185+
// we should find ways to improve this.
186+
#[cfg(target_arch = "wasm32")]
187+
{
188+
break;
189+
}
190+
#[cfg(not(target_arch = "wasm32"))]
191+
{
192+
if is_last_iter {
193+
panic!(
194+
"We timed out while waiting on the last successful submission to complete!"
195+
);
196+
}
197+
}
198+
}
199+
Err(e) => match e {
200+
hal::DeviceError::OutOfMemory => {
201+
if is_last_iter {
202+
panic!(
203+
"We ran into an OOM error while waiting on the last successful submission to complete!"
204+
);
205+
}
206+
}
207+
hal::DeviceError::Lost => {
208+
self.device.handle_hal_error(e); // will lose the device
209+
break;
210+
}
211+
hal::DeviceError::ResourceCreationFailed => unreachable!(),
212+
hal::DeviceError::Unexpected => {
213+
panic!(
214+
"We ran into an unexpected error while waiting on the last successful submission to complete!"
215+
);
216+
}
217+
},
181218
}
182219
}
220+
drop(fence);
183221

184222
let snatch_guard = self.device.snatchable_lock.read();
185223
let (submission_closures, mapping_closures, queue_empty) =

0 commit comments

Comments
 (0)