Skip to content

Commit b359740

Browse files
kettle11mockersfalice-i-ceciledaxpedda
authored
Get Bevy building for WebAssembly with multithreading (#12205)
# Objective This gets Bevy building on Wasm when the `atomics` flag is enabled. This does not yet multithread Bevy itself, but it allows Bevy users to use a crate like `wasm_thread` to spawn their own threads and manually parallelize work. This is a first step towards resolving #4078 . Also fixes #9304. This provides a foothold so that Bevy contributors can begin to think about multithreaded Wasm's constraints and Bevy can work towards changes to get the engine itself multithreaded. Some flags need to be set on the Rust compiler when compiling for Wasm multithreading. Here's what my build script looks like, with the correct flags set, to test out Bevy examples on web: ```bash set -e RUSTFLAGS='-C target-feature=+atomics,+bulk-memory,+mutable-globals' \ cargo build --example breakout --target wasm32-unknown-unknown -Z build-std=std,panic_abort --release wasm-bindgen --out-name wasm_example \ --out-dir examples/wasm/target \ --target web target/wasm32-unknown-unknown/release/examples/breakout.wasm devserver --header Cross-Origin-Opener-Policy='same-origin' --header Cross-Origin-Embedder-Policy='require-corp' --path examples/wasm ``` A few notes: 1. `cpal` crashes immediately when the `atomics` flag is set. That is patched in RustAudio/cpal#837, but not yet in the latest crates.io release. That can be temporarily worked around by patching Cpal like so: ```toml [patch.crates-io] cpal = { git = "https://github.com/RustAudio/cpal" } ``` 2. When testing out `wasm_thread` you need to enable the `es_modules` feature. ## Solution The largest obstacle to compiling Bevy with `atomics` on web is that `wgpu` types are _not_ Send and Sync. Longer term Bevy will need an approach to handle that, but in the near term Bevy is already configured to be single-threaded on web. Therefor it is enough to wrap `wgpu` types in a `send_wrapper::SendWrapper` that _is_ Send / Sync, but panics if accessed off the `wgpu` thread. --- ## Changelog - `wgpu` types that are not `Send` are wrapped in `send_wrapper::SendWrapper` on Wasm + 'atomics' - CommandBuffers are not generated in parallel on Wasm + 'atomics' ## Questions - Bevy should probably add CI checks to make sure this doesn't regress. Should that go in this PR or a separate PR? **Edit:** Added checks to build Wasm with atomics --------- Co-authored-by: François <[email protected]> Co-authored-by: Alice Cecile <[email protected]> Co-authored-by: daxpedda <[email protected]> Co-authored-by: François <[email protected]>
1 parent 3200331 commit b359740

File tree

7 files changed

+154
-35
lines changed

7 files changed

+154
-35
lines changed

.github/workflows/ci.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,31 @@ jobs:
145145
- name: Check wasm
146146
run: cargo check --target wasm32-unknown-unknown
147147

148+
build-wasm-atomics:
149+
runs-on: ubuntu-latest
150+
timeout-minutes: 30
151+
needs: build
152+
steps:
153+
- uses: actions/checkout@v4
154+
- uses: actions/cache@v4
155+
with:
156+
path: |
157+
~/.cargo/bin/
158+
~/.cargo/registry/index/
159+
~/.cargo/registry/cache/
160+
~/.cargo/git/db/
161+
target/
162+
key: ubuntu-assets-cargo-build-wasm-nightly-${{ hashFiles('**/Cargo.toml') }}
163+
- uses: dtolnay/rust-toolchain@master
164+
with:
165+
toolchain: ${{ env.NIGHTLY_TOOLCHAIN }}
166+
targets: wasm32-unknown-unknown
167+
components: rust-src
168+
- name: Check wasm
169+
run: cargo check --target wasm32-unknown-unknown -Z build-std=std,panic_abort
170+
env:
171+
RUSTFLAGS: "-C target-feature=+atomics,+bulk-memory"
172+
148173
markdownlint:
149174
runs-on: ubuntu-latest
150175
timeout-minutes: 30

crates/bevy_render/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ image = { version = "0.24", default-features = false }
6666
# misc
6767
codespan-reporting = "0.11.0"
6868
# `fragile-send-sync-non-atomic-wasm` feature means we can't use WASM threads for rendering
69-
# It is enabled for now to avoid having to do a significant overhaul of the renderer just for wasm
69+
# It is enabled for now to avoid having to do a significant overhaul of the renderer just for wasm.
70+
# When the 'atomics' feature is enabled `fragile-send-sync-non-atomic` does nothing
71+
# and Bevy instead wraps `wgpu` types to verify they are not used off their origin thread.
7072
wgpu = { version = "0.19.3", default-features = false, features = [
7173
"wgsl",
7274
"dx12",
@@ -120,6 +122,9 @@ web-sys = { version = "0.3.67", features = [
120122
] }
121123
wasm-bindgen = "0.2"
122124

125+
[target.'cfg(all(target_arch = "wasm32", target_feature = "atomics"))'.dependencies]
126+
send_wrapper = "0.6.0"
127+
123128
[lints]
124129
workspace = true
125130

crates/bevy_render/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ use globals::GlobalsPlugin;
6262
use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue};
6363

6464
use crate::deterministic::DeterministicRenderingConfig;
65+
use crate::renderer::WgpuWrapper;
6566
use crate::{
6667
camera::CameraPlugin,
6768
mesh::{morph::MorphPlugin, Mesh, MeshPlugin},
@@ -305,7 +306,7 @@ impl Plugin for RenderPlugin {
305306
queue,
306307
adapter_info,
307308
render_adapter,
308-
RenderInstance(Arc::new(instance)),
309+
RenderInstance(Arc::new(WgpuWrapper::new(instance))),
309310
));
310311
};
311312
// In wasm, spawn a task and detach it for execution

crates/bevy_render/src/render_resource/resource_macros.rs

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,25 @@
99
#[macro_export]
1010
macro_rules! render_resource_wrapper {
1111
($wrapper_type:ident, $wgpu_type:ty) => {
12+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
1213
#[derive(Debug)]
1314
// SAFETY: while self is live, self.0 comes from `into_raw` of an Arc<$wgpu_type> with a strong ref.
1415
pub struct $wrapper_type(*const ());
1516

17+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
18+
#[derive(Debug)]
19+
pub struct $wrapper_type(send_wrapper::SendWrapper<*const ()>);
20+
1621
impl $wrapper_type {
1722
pub fn new(value: $wgpu_type) -> Self {
1823
let arc = std::sync::Arc::new(value);
1924
let value_ptr = std::sync::Arc::into_raw(arc);
2025
let unit_ptr = value_ptr.cast::<()>();
21-
Self(unit_ptr)
26+
27+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
28+
return Self(unit_ptr);
29+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
30+
return Self(send_wrapper::SendWrapper::new(unit_ptr));
2231
}
2332

2433
pub fn try_unwrap(self) -> Option<$wgpu_type> {
@@ -53,13 +62,16 @@ macro_rules! render_resource_wrapper {
5362
}
5463
}
5564

65+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
5666
// SAFETY: We manually implement Send and Sync, which is valid for Arc<T> when T: Send + Sync.
5767
// We ensure correctness by checking that $wgpu_type does implement Send and Sync.
5868
// If in future there is a case where a wrapper is required for a non-send/sync type
5969
// we can implement a macro variant that omits these manual Send + Sync impls
6070
unsafe impl Send for $wrapper_type {}
71+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
6172
// SAFETY: As explained above, we ensure correctness by checking that $wgpu_type implements Send and Sync.
6273
unsafe impl Sync for $wrapper_type {}
74+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
6375
const _: () = {
6476
trait AssertSendSyncBound: Send + Sync {}
6577
impl AssertSendSyncBound for $wgpu_type {}
@@ -75,7 +87,14 @@ macro_rules! render_resource_wrapper {
7587
std::mem::forget(arc);
7688
let cloned_value_ptr = std::sync::Arc::into_raw(cloned);
7789
let cloned_unit_ptr = cloned_value_ptr.cast::<()>();
78-
Self(cloned_unit_ptr)
90+
91+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
92+
return Self(cloned_unit_ptr);
93+
94+
// Note: this implementation means that this Clone will panic
95+
// when called off the wgpu thread.
96+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
97+
return Self(send_wrapper::SendWrapper::new(cloned_unit_ptr));
7998
}
8099
}
81100
};
@@ -85,16 +104,28 @@ macro_rules! render_resource_wrapper {
85104
#[macro_export]
86105
macro_rules! render_resource_wrapper {
87106
($wrapper_type:ident, $wgpu_type:ty) => {
107+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
88108
#[derive(Clone, Debug)]
89109
pub struct $wrapper_type(std::sync::Arc<$wgpu_type>);
110+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
111+
#[derive(Clone, Debug)]
112+
pub struct $wrapper_type(std::sync::Arc<send_wrapper::SendWrapper<$wgpu_type>>);
90113

91114
impl $wrapper_type {
92115
pub fn new(value: $wgpu_type) -> Self {
93-
Self(std::sync::Arc::new(value))
116+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
117+
return Self(std::sync::Arc::new(value));
118+
119+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
120+
return Self(std::sync::Arc::new(send_wrapper::SendWrapper::new(value)));
94121
}
95122

96123
pub fn try_unwrap(self) -> Option<$wgpu_type> {
97-
std::sync::Arc::try_unwrap(self.0).ok()
124+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
125+
return std::sync::Arc::try_unwrap(self.0).ok();
126+
127+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
128+
return std::sync::Arc::try_unwrap(self.0).ok().map(|p| p.take());
98129
}
99130
}
100131

@@ -106,6 +137,7 @@ macro_rules! render_resource_wrapper {
106137
}
107138
}
108139

140+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
109141
const _: () = {
110142
trait AssertSendSyncBound: Send + Sync {}
111143
impl AssertSendSyncBound for $wgpu_type {}

crates/bevy_render/src/renderer/mod.rs

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,23 +117,54 @@ pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, Wi
117117
}
118118
}
119119

120+
/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled.
121+
/// On web with `atomics` enabled the inner value can only be accessed
122+
/// or dropped on the `wgpu` thread or else a panic will occur.
123+
/// On other platforms the wrapper simply contains the wrapped value.
124+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
125+
#[derive(Debug, Clone, Deref, DerefMut)]
126+
pub struct WgpuWrapper<T>(T);
127+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
128+
#[derive(Debug, Clone, Deref, DerefMut)]
129+
pub struct WgpuWrapper<T>(send_wrapper::SendWrapper<T>);
130+
131+
// SAFETY: SendWrapper is always Send + Sync.
132+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
133+
unsafe impl<T> Send for WgpuWrapper<T> {}
134+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
135+
unsafe impl<T> Sync for WgpuWrapper<T> {}
136+
137+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
138+
impl<T> WgpuWrapper<T> {
139+
pub fn new(t: T) -> Self {
140+
Self(t)
141+
}
142+
}
143+
144+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
145+
impl<T> WgpuWrapper<T> {
146+
pub fn new(t: T) -> Self {
147+
Self(send_wrapper::SendWrapper::new(t))
148+
}
149+
}
150+
120151
/// This queue is used to enqueue tasks for the GPU to execute asynchronously.
121152
#[derive(Resource, Clone, Deref, DerefMut)]
122-
pub struct RenderQueue(pub Arc<Queue>);
153+
pub struct RenderQueue(pub Arc<WgpuWrapper<Queue>>);
123154

124155
/// The handle to the physical device being used for rendering.
125156
/// See [`Adapter`] for more info.
126157
#[derive(Resource, Clone, Debug, Deref, DerefMut)]
127-
pub struct RenderAdapter(pub Arc<Adapter>);
158+
pub struct RenderAdapter(pub Arc<WgpuWrapper<Adapter>>);
128159

129160
/// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`],
130161
/// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces).
131162
#[derive(Resource, Clone, Deref, DerefMut)]
132-
pub struct RenderInstance(pub Arc<Instance>);
163+
pub struct RenderInstance(pub Arc<WgpuWrapper<Instance>>);
133164

134165
/// The [`AdapterInfo`] of the adapter in use by the renderer.
135166
#[derive(Resource, Clone, Deref, DerefMut)]
136-
pub struct RenderAdapterInfo(pub AdapterInfo);
167+
pub struct RenderAdapterInfo(pub WgpuWrapper<AdapterInfo>);
137168

138169
const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") {
139170
"Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md"
@@ -300,12 +331,12 @@ pub async fn initialize_renderer(
300331
)
301332
.await
302333
.unwrap();
303-
let queue = Arc::new(queue);
304-
let adapter = Arc::new(adapter);
334+
let queue = Arc::new(WgpuWrapper::new(queue));
335+
let adapter = Arc::new(WgpuWrapper::new(adapter));
305336
(
306337
RenderDevice::from(device),
307338
RenderQueue(queue),
308-
RenderAdapterInfo(adapter_info),
339+
RenderAdapterInfo(WgpuWrapper::new(adapter_info)),
309340
RenderAdapter(adapter),
310341
)
311342
}
@@ -403,7 +434,10 @@ impl<'w> RenderContext<'w> {
403434
/// buffer.
404435
pub fn add_command_buffer_generation_task(
405436
&mut self,
437+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
406438
task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
439+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
440+
task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w,
407441
) {
408442
self.flush_encoder();
409443

@@ -425,28 +459,46 @@ impl<'w> RenderContext<'w> {
425459
self.flush_encoder();
426460

427461
let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
428-
let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
429-
for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
430-
match queued_command_buffer {
431-
QueuedCommandBuffer::Ready(command_buffer) => {
432-
command_buffers.push((i, command_buffer));
433-
}
434-
QueuedCommandBuffer::Task(command_buffer_generation_task) => {
435-
let render_device = self.render_device.clone();
436-
if self.force_serial {
437-
command_buffers
438-
.push((i, command_buffer_generation_task(render_device)));
439-
} else {
440-
task_pool.spawn(async move {
441-
(i, command_buffer_generation_task(render_device))
442-
});
462+
463+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
464+
{
465+
let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
466+
for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate()
467+
{
468+
match queued_command_buffer {
469+
QueuedCommandBuffer::Ready(command_buffer) => {
470+
command_buffers.push((i, command_buffer));
471+
}
472+
QueuedCommandBuffer::Task(command_buffer_generation_task) => {
473+
let render_device = self.render_device.clone();
474+
if self.force_serial {
475+
command_buffers
476+
.push((i, command_buffer_generation_task(render_device)));
477+
} else {
478+
task_pool.spawn(async move {
479+
(i, command_buffer_generation_task(render_device))
480+
});
481+
}
443482
}
444483
}
445484
}
485+
});
486+
command_buffers.append(&mut task_based_command_buffers);
487+
}
488+
489+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
490+
for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
491+
match queued_command_buffer {
492+
QueuedCommandBuffer::Ready(command_buffer) => {
493+
command_buffers.push((i, command_buffer));
494+
}
495+
QueuedCommandBuffer::Task(command_buffer_generation_task) => {
496+
let render_device = self.render_device.clone();
497+
command_buffers.push((i, command_buffer_generation_task(render_device)));
498+
}
446499
}
447-
});
500+
}
448501

449-
command_buffers.append(&mut task_based_command_buffers);
450502
command_buffers.sort_unstable_by_key(|(i, _)| *i);
451503

452504
let mut command_buffers = command_buffers
@@ -481,5 +533,8 @@ impl<'w> RenderContext<'w> {
481533

482534
enum QueuedCommandBuffer<'w> {
483535
Ready(CommandBuffer),
536+
#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
484537
Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
538+
#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
539+
Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w>),
485540
}

crates/bevy_render/src/renderer/render_device.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,20 @@ use wgpu::{
1111
use super::RenderQueue;
1212

1313
use crate::render_resource::resource_macros::*;
14+
use crate::WgpuWrapper;
1415

1516
render_resource_wrapper!(ErasedRenderDevice, wgpu::Device);
1617

1718
/// This GPU device is responsible for the creation of most rendering and compute resources.
1819
#[derive(Resource, Clone)]
1920
pub struct RenderDevice {
20-
device: ErasedRenderDevice,
21+
device: WgpuWrapper<ErasedRenderDevice>,
2122
}
2223

2324
impl From<wgpu::Device> for RenderDevice {
2425
fn from(device: wgpu::Device) -> Self {
2526
Self {
26-
device: ErasedRenderDevice::new(device),
27+
device: WgpuWrapper::new(ErasedRenderDevice::new(device)),
2728
}
2829
}
2930
}

crates/bevy_render/src/view/window/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{
44
},
55
renderer::{RenderAdapter, RenderDevice, RenderInstance},
66
texture::TextureFormatPixelInfo,
7-
Extract, ExtractSchedule, Render, RenderApp, RenderSet,
7+
Extract, ExtractSchedule, Render, RenderApp, RenderSet, WgpuWrapper,
88
};
99
use bevy_app::{App, Plugin};
1010
use bevy_ecs::{entity::EntityHashMap, prelude::*};
@@ -198,7 +198,7 @@ fn extract_windows(
198198

199199
struct SurfaceData {
200200
// TODO: what lifetime should this be?
201-
surface: wgpu::Surface<'static>,
201+
surface: WgpuWrapper<wgpu::Surface<'static>>,
202202
configuration: SurfaceConfiguration,
203203
}
204204

@@ -488,7 +488,7 @@ pub fn create_surfaces(
488488
render_device.configure_surface(&surface, &configuration);
489489

490490
SurfaceData {
491-
surface,
491+
surface: WgpuWrapper::new(surface),
492492
configuration,
493493
}
494494
});

0 commit comments

Comments
 (0)