From 0bc8f1f58004c870ce9fdf8f233620ac910611a1 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Sun, 28 Dec 2025 19:53:22 -0800 Subject: [PATCH 01/16] compute-shader mesh generation example --- Cargo.toml | 11 + assets/shaders/compute_mesh.wgsl | 82 ++++++++ examples/shader/compute_mesh.rs | 340 +++++++++++++++++++++++++++++++ 3 files changed, 433 insertions(+) create mode 100644 assets/shaders/compute_mesh.wgsl create mode 100644 examples/shader/compute_mesh.rs diff --git a/Cargo.toml b/Cargo.toml index 7dda86c8f1418..65a2f1589b5b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3020,6 +3020,17 @@ description = "A very simple compute shader that writes to a buffer that is read category = "Shaders" wasm = false +[[example]] +name = "compute_mesh" +path = "examples/shader/compute_mesh.rs" +doc-scrape-examples = true + +[package.metadata.example.compute_mesh] +name = "Compute Shader Mesh" +description = "A compute shader that generates a mesh that is controlled by a Handle" +category = "Shaders" +wasm = false + [[example]] name = "array_texture" path = "examples/shader/array_texture.rs" diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl new file mode 100644 index 0000000000000..8adfff19c892a --- /dev/null +++ b/assets/shaders/compute_mesh.wgsl @@ -0,0 +1,82 @@ +// This shader is used for the gpu_readback example +// The actual work it does is not important for the example + +// This is the data that lives in the gpu only buffer +@group(0) @binding(0) var vertex_data: array; +@group(0) @binding(1) var index_data: array; + +@compute @workgroup_size(1) +fn main(@builtin(global_invocation_id) global_id: vec3) { + // We use the global_id to index the array to make sure we don't + // access data used in another workgroup + // data[global_id.x] += 1u; + for (var i = 0u; i < 192; i++) { + vertex_data[i] = vertices[i]; + } + for (var i = 0u; i < 36; i++) { + index_data[i] = u32(indices[i]); + } + // data[0] = -min.x; + // data[1] = min.y; + // data[2] = max.z; + // data[3] = 0.; + // data[4] = 0.; + // data[5] = 1.; + // data[6] = 0.; + // data[7] = 0.; + // Write the same data to the texture + // textureStore(texture, vec2(i32(global_id.x), 0), vec4(data[global_id.x], 0, 0, 0)); +} + +const half_size = vec3(2.); +const min = -half_size; +const max = half_size; + +// Suppose Y-up right hand, and camera look from +Z to -Z +const vertices = array( + // xyz, normal.xyz, uv.xy + // Front + min.x, min.y, max.z, 0.0, 0.0, 1.0, 0.0, 0.0, + max.x, min.y, max.z, 0.0, 0.0, 1.0, 1.0, 0.0, + max.x, max.y, max.z, 0.0, 0.0, 1.0, 1.0, 1.0, + min.x, max.y, max.z, 0.0, 0.0, 1.0, 0.0, 1.0, + // Back + min.x, max.y, min.z, 0.0, 0.0, -1.0, 1.0, 0.0, + max.x, max.y, min.z, 0.0, 0.0, -1.0, 0.0, 0.0, + max.x, min.y, min.z, 0.0, 0.0, -1.0, 0.0, 1.0, + min.x, min.y, min.z, 0.0, 0.0, -1.0, 1.0, 1.0, + // Right + max.x, min.y, min.z, 1.0, 0.0, 0.0, 0.0, 0.0, + max.x, max.y, min.z, 1.0, 0.0, 0.0, 1.0, 0.0, + max.x, max.y, max.z, 1.0, 0.0, 0.0, 1.0, 1.0, + max.x, min.y, max.z, 1.0, 0.0, 0.0, 0.0, 1.0, + // Left + min.x, min.y, max.z, -1.0, 0.0, 0.0, 1.0, 0.0, + min.x, max.y, max.z, -1.0, 0.0, 0.0, 0.0, 0.0, + min.x, max.y, min.z, -1.0, 0.0, 0.0, 0.0, 1.0, + min.x, min.y, min.z, -1.0, 0.0, 0.0, 1.0, 1.0, + // Top + max.x, max.y, min.z, 0.0, 1.0, 0.0, 1.0, 0.0, + min.x, max.y, min.z, 0.0, 1.0, 0.0, 0.0, 0.0, + min.x, max.y, max.z, 0.0, 1.0, 0.0, 0.0, 1.0, + max.x, max.y, max.z, 0.0, 1.0, 0.0, 1.0, 1.0, + // Bottom + max.x, min.y, max.z, 0.0, -1.0, 0.0, 0.0, 0.0, + min.x, min.y, max.z, 0.0, -1.0, 0.0, 1.0, 0.0, + min.x, min.y, min.z, 0.0, -1.0, 0.0, 1.0, 1.0, + max.x, min.y, min.z, 0.0, -1.0, 0.0, 0.0, 1.0 +); + + +// let positions: Vec<_> = vertices.iter().map(|(p, _, _)| *p).collect(); +// let normals: Vec<_> = vertices.iter().map(|(_, n, _)| *n).collect(); +// let uvs: Vec<_> = vertices.iter().map(|(_, _, uv)| *uv).collect(); + +const indices = array( + 0, 1, 2, 2, 3, 0, // front + 4, 5, 6, 6, 7, 4, // back + 8, 9, 10, 10, 11, 8, // right + 12, 13, 14, 14, 15, 12, // left + 16, 17, 18, 18, 19, 16, // top + 20, 21, 22, 22, 23, 20, // bottom +); diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs new file mode 100644 index 0000000000000..bc72615ada3c4 --- /dev/null +++ b/examples/shader/compute_mesh.rs @@ -0,0 +1,340 @@ +//! Simple example demonstrating the use of the [`Readback`] component to read back data from the GPU +//! using both a storage buffer and texture. + +use bevy::{ + asset::RenderAssetUsages, + color::palettes::tailwind::RED_400, + prelude::*, + render::{ + extract_resource::{ExtractResource, ExtractResourcePlugin}, + gpu_readback::{Readback, ReadbackComplete}, + render_asset::RenderAssets, + render_graph::{self, RenderGraph, RenderLabel}, + render_resource::{ + binding_types::{storage_buffer, texture_storage_2d}, + *, + }, + renderer::{RenderContext, RenderDevice}, + storage::{GpuShaderStorageBuffer, ShaderStorageBuffer}, + texture::GpuImage, + Render, RenderApp, RenderStartup, RenderSystems, + }, +}; +use bevy_render::{ + extract_component::{ExtractComponent, ExtractComponentPlugin}, + mesh::{allocator::MeshAllocator, RenderMesh}, +}; + +/// This example uses a shader source file from the assets subdirectory +const SHADER_ASSET_PATH: &str = "shaders/compute_mesh.wgsl"; + +// The length of the buffer sent to the gpu +const BUFFER_LEN: usize = 768; + +fn main() { + App::new() + .add_plugins(( + DefaultPlugins, + GpuReadbackPlugin, + ExtractResourcePlugin::::default(), + ExtractComponentPlugin::::default(), + )) + .insert_resource(ClearColor(Color::BLACK)) + .add_systems(Startup, setup) + // .add_systems(Update, kick_meshes) + .run(); +} + +fn kick_meshes(mut query: Query<&mut Mesh3d>) { + for mesh in &mut query {} +} +// We need a plugin to organize all the systems and render node required for this example +struct GpuReadbackPlugin; +impl Plugin for GpuReadbackPlugin { + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .init_resource::() + .add_systems( + RenderStartup, + (init_compute_pipeline, add_compute_render_graph_node), + ) + .add_systems( + Render, + ( + prepare_bind_group + .in_set(RenderSystems::PrepareBindGroups) + // We don't need to recreate the bind group every frame + .run_if(not(resource_exists::)), + prepare_chunks, + ), + ); + } +} + +#[derive(Component, ExtractComponent, Clone)] +struct GenerateMesh(Handle); + +#[derive(Resource, ExtractResource, Clone)] +struct ComputedBuffers { + vertex: Handle, + index: Handle, +} + +fn setup( + mut commands: Commands, + mut images: ResMut>, + mut meshes: ResMut>, + mut materials: ResMut>, + mut buffers: ResMut>, +) { + // a truly empty mesh will error if used in Mesh3d + // so use a sphere for the example + let mut empty_mesh = Cuboid::new(0.1, 0.1, 0.1).mesh().build(); + let num_indices = empty_mesh.indices().unwrap().len(); + info!( + buffer_size=?empty_mesh.get_vertex_buffer_size(), + vertex_size=?empty_mesh.get_vertex_size(), + num_indices=?num_indices + ); + empty_mesh.asset_usage = RenderAssetUsages::RENDER_WORLD; + + // Create a storage buffer with some data + let buffer: Vec = vec![0.; BUFFER_LEN]; + let mut buffer = ShaderStorageBuffer::from(buffer); + // We need to enable the COPY_SRC usage so we can copy the buffer to the cpu + buffer.buffer_description.usage |= BufferUsages::COPY_SRC; + let vertex_buffer = buffers.add(buffer); + + // Create a storage buffer with some data + let buffer: Vec = vec![0; 36 * 32]; + let mut buffer = ShaderStorageBuffer::from(buffer); + // We need to enable the COPY_SRC usage so we can copy the buffer to the cpu + buffer.buffer_description.usage |= BufferUsages::COPY_SRC; + let index_buffer = buffers.add(buffer); + + // Create a storage texture with some data + let size = Extent3d { + width: BUFFER_LEN as u32, + height: 1, + ..default() + }; + + commands.insert_resource(ComputedBuffers { + vertex: vertex_buffer, + index: index_buffer, + }); + // let mut empty_mesh = Mesh::new( + // PrimitiveTopology::TriangleList, + // RenderAssetUsages::RENDER_WORLD, + // ); + + let handle = meshes.add(empty_mesh); + commands.spawn(( + GenerateMesh(handle.clone()), + Mesh3d(handle.clone()), + MeshMaterial3d(materials.add(StandardMaterial { + base_color: RED_400.into(), + ..default() + })), + Transform::from_xyz(0., 1., 0.), + )); + + // commands.spawn(( + // Mesh3d(handle), + // MeshMaterial3d(materials.add(StandardMaterial { + // base_color: RED_400.into(), + // ..default() + // })), + // Transform::from_xyz(2., 1., 0.), + // )); + + // // spawn some scene + // commands.spawn(( + // Mesh3d(meshes.add(Circle::new(4.0))), + // MeshMaterial3d(materials.add(Color::WHITE)), + // Transform::from_rotation(Quat::from_rotation_x(-std::f32::consts::FRAC_PI_2)), + // )); + commands.spawn(( + PointLight { + shadows_enabled: true, + ..default() + }, + Transform::from_xyz(4.0, 8.0, 4.0), + )); + // camera + commands.spawn(( + Camera3d::default(), + Transform::from_xyz(-2.5, 4.5, 9.0).looking_at(Vec3::ZERO, Vec3::Y), + )); +} + +fn add_compute_render_graph_node(mut render_graph: ResMut) { + // Add the compute node as a top-level node to the render graph. This means it will only execute + // once per frame. Normally, adding a node would use the `RenderGraphApp::add_render_graph_node` + // method, but it does not allow adding as a top-level node. + render_graph.add_node(ComputeNodeLabel, ComputeNode::default()); +} + +#[derive(Resource)] +struct GpuBufferBindGroup(BindGroup); + +#[derive(Resource, Default)] +struct Chunks(Vec>); + +fn prepare_bind_group( + mut commands: Commands, + pipeline: Res, + render_device: Res, + pipeline_cache: Res, + computed_buffers: Res, + buffers: Res>, +) { + let vertex_buffer = buffers.get(&computed_buffers.vertex).unwrap(); + let index_buffer = buffers.get(&computed_buffers.index).unwrap(); + + let bind_group = render_device.create_bind_group( + None, + &pipeline_cache.get_bind_group_layout(&pipeline.layout), + &BindGroupEntries::sequential(( + vertex_buffer.buffer.as_entire_buffer_binding(), + index_buffer.buffer.as_entire_buffer_binding(), + )), + ); + commands.insert_resource(GpuBufferBindGroup(bind_group)); +} + +fn prepare_chunks( + meshes_to_generate: Query<&GenerateMesh>, + mut chunks: ResMut, + mesh_handles: Res>, +) { + let chunk_data: Vec> = meshes_to_generate + .iter() + // sometimes RenderMesh doesn't exist yet! + .map(|gmesh| gmesh.0.id()) + .collect(); + // dbg!(chunk_data); + chunks.0 = chunk_data; +} + +#[derive(Resource)] +struct ComputePipeline { + layout: BindGroupLayoutDescriptor, + pipeline: CachedComputePipelineId, +} + +// init only happens once +fn init_compute_pipeline( + mut commands: Commands, + asset_server: Res, + pipeline_cache: Res, +) { + let layout = BindGroupLayoutDescriptor::new( + "", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + // vertices + storage_buffer::>(false), + // indices + storage_buffer::>(false), + ), + ), + ); + let shader = asset_server.load(SHADER_ASSET_PATH); + let pipeline = pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("Mesh generation compute shader".into()), + layout: vec![layout.clone()], + shader: shader.clone(), + ..default() + }); + commands.insert_resource(ComputePipeline { layout, pipeline }); +} + +/// Label to identify the node in the render graph +#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] +struct ComputeNodeLabel; + +/// The node that will execute the compute shader +#[derive(Default)] +struct ComputeNode {} + +impl render_graph::Node for ComputeNode { + fn run( + &self, + _graph: &mut render_graph::RenderGraphContext, + render_context: &mut RenderContext, + world: &World, + ) -> Result<(), render_graph::NodeRunError> { + let Some(chunks) = world.get_resource::() else { + info!("no chunks"); + return Ok(()); + }; + for mesh_id in &chunks.0 { + let pipeline_cache = world.resource::(); + let pipeline = world.resource::(); + let bind_group = world.resource::(); + + if let Some(init_pipeline) = pipeline_cache.get_compute_pipeline(pipeline.pipeline) { + let mut pass = + render_context + .command_encoder() + .begin_compute_pass(&ComputePassDescriptor { + label: Some("Mesh generation compute pass"), + ..default() + }); + + pass.set_bind_group(0, &bind_group.0, &[]); + pass.set_pipeline(init_pipeline); + pass.dispatch_workgroups(1, 1, 1); + } + let computed_buffers = world.resource::(); + let buffers = world.resource::>(); + let mesh_allocator = world.resource::(); + + // these can be None, read the mesh allocator docs + // to understand when. + let (vertex, index) = mesh_allocator.mesh_slabs(&mesh_id); + + let vertex_data_from_shader = buffers.get(&computed_buffers.vertex).unwrap(); + let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(mesh_id).unwrap(); + info_once!( + data_buffer_size=?vertex_data_from_shader.buffer.size(), + range_start=?vertex_buffer_slice.range.start, + range_end=?vertex_buffer_slice.range.end, + "vertex", + ); + + render_context.command_encoder().copy_buffer_to_buffer( + &vertex_data_from_shader.buffer, + 0, + vertex_buffer_slice.buffer, + 0, + // vertex_buffer_slice.range.start as u64, + vertex_data_from_shader.buffer.size(), + ); + + let index_data_from_shader = buffers.get(&computed_buffers.index).unwrap(); + let index_buffer_slice = mesh_allocator.mesh_index_slice(mesh_id).unwrap(); + info_once!( + data_buffer_size=?index_data_from_shader.buffer.size(), + range_start=?index_buffer_slice.range.start, + range_end=?index_buffer_slice.range.end, + "index" + ); + render_context.command_encoder().copy_buffer_to_buffer( + &index_data_from_shader.buffer, + 0, + index_buffer_slice.buffer, + 0, + // index_buffer_slice.range.start as u64, + index_data_from_shader.buffer.size(), + ); + } + + Ok(()) + } +} From c1ad3bff8765a4bcfa695ae2797959b35bbbbd23 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Sun, 28 Dec 2025 23:14:36 -0800 Subject: [PATCH 02/16] mesh_allocator storage buffers --- assets/shaders/compute_mesh.wgsl | 33 ++---- examples/shader/compute_mesh.rs | 194 ++++++++++--------------------- 2 files changed, 73 insertions(+), 154 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index 8adfff19c892a..e872f08126d49 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -1,33 +1,27 @@ // This shader is used for the gpu_readback example // The actual work it does is not important for the example +struct FirstIndex { + first_vertex_index: u32, + first_index_index: u32, +} + // This is the data that lives in the gpu only buffer -@group(0) @binding(0) var vertex_data: array; -@group(0) @binding(1) var index_data: array; +@group(0) @binding(0) var first_index: FirstIndex; +@group(0) @binding(1) var vertex_data: array; +@group(0) @binding(2) var index_data: array; @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3) { - // We use the global_id to index the array to make sure we don't - // access data used in another workgroup - // data[global_id.x] += 1u; for (var i = 0u; i < 192; i++) { - vertex_data[i] = vertices[i]; + vertex_data[i + first_index.first_vertex_index * 32 ] = vertices[i ]; } for (var i = 0u; i < 36; i++) { - index_data[i] = u32(indices[i]); + index_data[i + first_index.first_index_index * 6] = u32(indices[i]); } - // data[0] = -min.x; - // data[1] = min.y; - // data[2] = max.z; - // data[3] = 0.; - // data[4] = 0.; - // data[5] = 1.; - // data[6] = 0.; - // data[7] = 0.; - // Write the same data to the texture - // textureStore(texture, vec2(i32(global_id.x), 0), vec4(data[global_id.x], 0, 0, 0)); } +// hardcoded compute shader data. const half_size = vec3(2.); const min = -half_size; const max = half_size; @@ -67,11 +61,6 @@ const vertices = array( max.x, min.y, min.z, 0.0, -1.0, 0.0, 0.0, 1.0 ); - -// let positions: Vec<_> = vertices.iter().map(|(p, _, _)| *p).collect(); -// let normals: Vec<_> = vertices.iter().map(|(_, n, _)| *n).collect(); -// let uvs: Vec<_> = vertices.iter().map(|(_, _, uv)| *uv).collect(); - const indices = array( 0, 1, 2, 2, 3, 0, // front 4, 5, 6, 6, 7, 4, // back diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index bc72615ada3c4..5fe6d8f7d2288 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -4,6 +4,7 @@ use bevy::{ asset::RenderAssetUsages, color::palettes::tailwind::RED_400, + mesh::{Indices, MeshVertexAttribute}, prelude::*, render::{ extract_resource::{ExtractResource, ExtractResourcePlugin}, @@ -23,66 +24,55 @@ use bevy::{ use bevy_render::{ extract_component::{ExtractComponent, ExtractComponentPlugin}, mesh::{allocator::MeshAllocator, RenderMesh}, + render_resource::binding_types::uniform_buffer, + renderer::RenderQueue, }; /// This example uses a shader source file from the assets subdirectory const SHADER_ASSET_PATH: &str = "shaders/compute_mesh.wgsl"; -// The length of the buffer sent to the gpu -const BUFFER_LEN: usize = 768; - fn main() { App::new() .add_plugins(( DefaultPlugins, - GpuReadbackPlugin, - ExtractResourcePlugin::::default(), + ComputeShaderMeshGeneratorPlugin, ExtractComponentPlugin::::default(), )) .insert_resource(ClearColor(Color::BLACK)) .add_systems(Startup, setup) - // .add_systems(Update, kick_meshes) .run(); } -fn kick_meshes(mut query: Query<&mut Mesh3d>) { - for mesh in &mut query {} -} // We need a plugin to organize all the systems and render node required for this example -struct GpuReadbackPlugin; -impl Plugin for GpuReadbackPlugin { +struct ComputeShaderMeshGeneratorPlugin; +impl Plugin for ComputeShaderMeshGeneratorPlugin { fn build(&self, app: &mut App) { let Some(render_app) = app.get_sub_app_mut(RenderApp) else { return; }; + render_app .init_resource::() .add_systems( RenderStartup, (init_compute_pipeline, add_compute_render_graph_node), ) - .add_systems( - Render, - ( - prepare_bind_group - .in_set(RenderSystems::PrepareBindGroups) - // We don't need to recreate the bind group every frame - .run_if(not(resource_exists::)), - prepare_chunks, - ), - ); + .add_systems(Render, prepare_chunks); + } + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + render_app + .world_mut() + .resource_mut::() + .extra_buffer_usages = BufferUsages::STORAGE; } } #[derive(Component, ExtractComponent, Clone)] struct GenerateMesh(Handle); -#[derive(Resource, ExtractResource, Clone)] -struct ComputedBuffers { - vertex: Handle, - index: Handle, -} - fn setup( mut commands: Commands, mut images: ResMut>, @@ -92,45 +82,19 @@ fn setup( ) { // a truly empty mesh will error if used in Mesh3d // so use a sphere for the example - let mut empty_mesh = Cuboid::new(0.1, 0.1, 0.1).mesh().build(); - let num_indices = empty_mesh.indices().unwrap().len(); - info!( - buffer_size=?empty_mesh.get_vertex_buffer_size(), - vertex_size=?empty_mesh.get_vertex_size(), - num_indices=?num_indices + let mut empty_mesh = Mesh::new( + PrimitiveTopology::TriangleList, + RenderAssetUsages::RENDER_WORLD, ); + // set up what we want to output from the compute shader. + // We're using 36 indices, 24 vertices which is directly taken from + // the Bevy Cuboid mesh + empty_mesh.insert_attribute(Mesh::ATTRIBUTE_POSITION, vec![[0.; 3]; 24]); + empty_mesh.insert_attribute(Mesh::ATTRIBUTE_NORMAL, vec![[0.; 3]; 24]); + empty_mesh.insert_attribute(Mesh::ATTRIBUTE_UV_0, vec![[0.; 2]; 24]); + empty_mesh.insert_indices(Indices::U32(vec![0; 36])); empty_mesh.asset_usage = RenderAssetUsages::RENDER_WORLD; - // Create a storage buffer with some data - let buffer: Vec = vec![0.; BUFFER_LEN]; - let mut buffer = ShaderStorageBuffer::from(buffer); - // We need to enable the COPY_SRC usage so we can copy the buffer to the cpu - buffer.buffer_description.usage |= BufferUsages::COPY_SRC; - let vertex_buffer = buffers.add(buffer); - - // Create a storage buffer with some data - let buffer: Vec = vec![0; 36 * 32]; - let mut buffer = ShaderStorageBuffer::from(buffer); - // We need to enable the COPY_SRC usage so we can copy the buffer to the cpu - buffer.buffer_description.usage |= BufferUsages::COPY_SRC; - let index_buffer = buffers.add(buffer); - - // Create a storage texture with some data - let size = Extent3d { - width: BUFFER_LEN as u32, - height: 1, - ..default() - }; - - commands.insert_resource(ComputedBuffers { - vertex: vertex_buffer, - index: index_buffer, - }); - // let mut empty_mesh = Mesh::new( - // PrimitiveTopology::TriangleList, - // RenderAssetUsages::RENDER_WORLD, - // ); - let handle = meshes.add(empty_mesh); commands.spawn(( GenerateMesh(handle.clone()), @@ -178,34 +142,9 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { render_graph.add_node(ComputeNodeLabel, ComputeNode::default()); } -#[derive(Resource)] -struct GpuBufferBindGroup(BindGroup); - #[derive(Resource, Default)] struct Chunks(Vec>); -fn prepare_bind_group( - mut commands: Commands, - pipeline: Res, - render_device: Res, - pipeline_cache: Res, - computed_buffers: Res, - buffers: Res>, -) { - let vertex_buffer = buffers.get(&computed_buffers.vertex).unwrap(); - let index_buffer = buffers.get(&computed_buffers.index).unwrap(); - - let bind_group = render_device.create_bind_group( - None, - &pipeline_cache.get_bind_group_layout(&pipeline.layout), - &BindGroupEntries::sequential(( - vertex_buffer.buffer.as_entire_buffer_binding(), - index_buffer.buffer.as_entire_buffer_binding(), - )), - ); - commands.insert_resource(GpuBufferBindGroup(bind_group)); -} - fn prepare_chunks( meshes_to_generate: Query<&GenerateMesh>, mut chunks: ResMut, @@ -237,6 +176,7 @@ fn init_compute_pipeline( &BindGroupLayoutEntries::sequential( ShaderStages::COMPUTE, ( + uniform_buffer::(false), // vertices storage_buffer::>(false), // indices @@ -262,6 +202,12 @@ struct ComputeNodeLabel; #[derive(Default)] struct ComputeNode {} +#[derive(ShaderType)] +struct FirstIndex { + first_vertex_index: u32, + first_index_index: u32, +} + impl render_graph::Node for ComputeNode { fn run( &self, @@ -273,12 +219,38 @@ impl render_graph::Node for ComputeNode { info!("no chunks"); return Ok(()); }; + let mesh_allocator = world.resource::(); + for mesh_id in &chunks.0 { let pipeline_cache = world.resource::(); let pipeline = world.resource::(); - let bind_group = world.resource::(); if let Some(init_pipeline) = pipeline_cache.get_compute_pipeline(pipeline.pipeline) { + let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(mesh_id).unwrap(); + let index_buffer_slice = mesh_allocator.mesh_index_slice(mesh_id).unwrap(); + + dbg!(&vertex_buffer_slice.range); + dbg!(&index_buffer_slice.range); + + let first = FirstIndex { + first_vertex_index: vertex_buffer_slice.range.start * 4, + first_index_index: index_buffer_slice.range.start * 4, + }; + let mut uniforms = UniformBuffer::from(first); + uniforms.write_buffer( + render_context.render_device(), + world.resource::(), + ); + let bind_group = render_context.render_device().create_bind_group( + None, + &pipeline_cache.get_bind_group_layout(&pipeline.layout), + &BindGroupEntries::sequential(( + &uniforms, + vertex_buffer_slice.buffer.as_entire_buffer_binding(), + index_buffer_slice.buffer.as_entire_buffer_binding(), + )), + ); + let mut pass = render_context .command_encoder() @@ -287,52 +259,10 @@ impl render_graph::Node for ComputeNode { ..default() }); - pass.set_bind_group(0, &bind_group.0, &[]); + pass.set_bind_group(0, &bind_group, &[]); pass.set_pipeline(init_pipeline); pass.dispatch_workgroups(1, 1, 1); } - let computed_buffers = world.resource::(); - let buffers = world.resource::>(); - let mesh_allocator = world.resource::(); - - // these can be None, read the mesh allocator docs - // to understand when. - let (vertex, index) = mesh_allocator.mesh_slabs(&mesh_id); - - let vertex_data_from_shader = buffers.get(&computed_buffers.vertex).unwrap(); - let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(mesh_id).unwrap(); - info_once!( - data_buffer_size=?vertex_data_from_shader.buffer.size(), - range_start=?vertex_buffer_slice.range.start, - range_end=?vertex_buffer_slice.range.end, - "vertex", - ); - - render_context.command_encoder().copy_buffer_to_buffer( - &vertex_data_from_shader.buffer, - 0, - vertex_buffer_slice.buffer, - 0, - // vertex_buffer_slice.range.start as u64, - vertex_data_from_shader.buffer.size(), - ); - - let index_data_from_shader = buffers.get(&computed_buffers.index).unwrap(); - let index_buffer_slice = mesh_allocator.mesh_index_slice(mesh_id).unwrap(); - info_once!( - data_buffer_size=?index_data_from_shader.buffer.size(), - range_start=?index_buffer_slice.range.start, - range_end=?index_buffer_slice.range.end, - "index" - ); - render_context.command_encoder().copy_buffer_to_buffer( - &index_data_from_shader.buffer, - 0, - index_buffer_slice.buffer, - 0, - // index_buffer_slice.range.start as u64, - index_data_from_shader.buffer.size(), - ); } Ok(()) From d1d7d28e429479999169c874299949d0527413ae Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 09:41:33 -0800 Subject: [PATCH 03/16] fix offsets --- assets/shaders/compute_mesh.wgsl | 14 ++-- examples/shader/compute_mesh.rs | 113 ++++++++++++++++++++----------- 2 files changed, 84 insertions(+), 43 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index e872f08126d49..bb2aec9db4152 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -1,12 +1,12 @@ -// This shader is used for the gpu_readback example -// The actual work it does is not important for the example +// This shader is used for the compute_mesh example +// The actual work it does is not important for the example and +// has been hardcoded to return a cube mesh struct FirstIndex { first_vertex_index: u32, first_index_index: u32, } -// This is the data that lives in the gpu only buffer @group(0) @binding(0) var first_index: FirstIndex; @group(0) @binding(1) var vertex_data: array; @group(0) @binding(2) var index_data: array; @@ -14,10 +14,14 @@ struct FirstIndex { @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3) { for (var i = 0u; i < 192; i++) { - vertex_data[i + first_index.first_vertex_index * 32 ] = vertices[i ]; + // buffer is bigger than just our mesh, so we use the first_index.vertex + // to write to the correct range + vertex_data[i + first_index.vertex] = vertices[i]; } for (var i = 0u; i < 36; i++) { - index_data[i + first_index.first_index_index * 6] = u32(indices[i]); + // buffer is bigger than just our mesh, so we use the first_index.vertex_index + // to write to the correct range + index_data[i + first_index.vertex_index] = u32(indices[i]); } } diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 5fe6d8f7d2288..33174fca3c5a6 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -1,5 +1,10 @@ -//! Simple example demonstrating the use of the [`Readback`] component to read back data from the GPU -//! using both a storage buffer and texture. +//! This example shows how to initialize an empty mesh with a Handle +//! and a render-world only usage. That buffer is then filled by a +//! compute shader on the GPU without transferring data back +//! to the CPU. +//! +//! The mesh_allocator is used to get references to the relevant slabs +//! that contain the mesh data we're interested in. use bevy::{ asset::RenderAssetUsages, @@ -81,21 +86,28 @@ fn setup( mut buffers: ResMut>, ) { // a truly empty mesh will error if used in Mesh3d - // so use a sphere for the example - let mut empty_mesh = Mesh::new( - PrimitiveTopology::TriangleList, - RenderAssetUsages::RENDER_WORLD, - ); - // set up what we want to output from the compute shader. + // so we set up the data to be what we want the compute shader to output // We're using 36 indices, 24 vertices which is directly taken from - // the Bevy Cuboid mesh - empty_mesh.insert_attribute(Mesh::ATTRIBUTE_POSITION, vec![[0.; 3]; 24]); - empty_mesh.insert_attribute(Mesh::ATTRIBUTE_NORMAL, vec![[0.; 3]; 24]); - empty_mesh.insert_attribute(Mesh::ATTRIBUTE_UV_0, vec![[0.; 2]; 24]); - empty_mesh.insert_indices(Indices::U32(vec![0; 36])); - empty_mesh.asset_usage = RenderAssetUsages::RENDER_WORLD; + // the Bevy Cuboid mesh implementation + let empty_mesh = { + let mut mesh = Mesh::new( + PrimitiveTopology::TriangleList, + RenderAssetUsages::RENDER_WORLD, + ) + .with_inserted_attribute(Mesh::ATTRIBUTE_POSITION, vec![[0.; 3]; 24]) + .with_inserted_attribute(Mesh::ATTRIBUTE_NORMAL, vec![[0.; 3]; 24]) + .with_inserted_attribute(Mesh::ATTRIBUTE_UV_0, vec![[0.; 2]; 24]) + .with_inserted_indices(Indices::U32(vec![0; 36])); + + mesh.asset_usage = RenderAssetUsages::RENDER_WORLD; + mesh + }; let handle = meshes.add(empty_mesh); + + // we spawn two "users" of the mesh handle, + // but only insert `GenerateMesh` on one of them + // to show that the mesh handle works as usual commands.spawn(( GenerateMesh(handle.clone()), Mesh3d(handle.clone()), @@ -103,24 +115,27 @@ fn setup( base_color: RED_400.into(), ..default() })), - Transform::from_xyz(0., 1., 0.), + Transform::from_xyz(-2.5, 1., 0.), )); - // commands.spawn(( - // Mesh3d(handle), - // MeshMaterial3d(materials.add(StandardMaterial { - // base_color: RED_400.into(), - // ..default() - // })), - // Transform::from_xyz(2., 1., 0.), - // )); + commands.spawn(( + Mesh3d(handle), + MeshMaterial3d(materials.add(StandardMaterial { + base_color: RED_400.into(), + ..default() + })), + Transform::from_xyz(2.5, 1., 0.), + )); - // // spawn some scene - // commands.spawn(( - // Mesh3d(meshes.add(Circle::new(4.0))), - // MeshMaterial3d(materials.add(Color::WHITE)), - // Transform::from_rotation(Quat::from_rotation_x(-std::f32::consts::FRAC_PI_2)), - // )); + // some additional scene elements. + // This mesh specifically is here so that we don't assume + // mesh_allocator offsets that would only work if we had + // one mesh in the scene. + commands.spawn(( + Mesh3d(meshes.add(Circle::new(4.0))), + MeshMaterial3d(materials.add(Color::WHITE)), + Transform::from_rotation(Quat::from_rotation_x(-std::f32::consts::FRAC_PI_2)), + )); commands.spawn(( PointLight { shadows_enabled: true, @@ -142,6 +157,8 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { render_graph.add_node(ComputeNodeLabel, ComputeNode::default()); } +/// This is called "Chunks" because this example originated +/// from a use case of generating chunks of landscape or voxels #[derive(Resource, Default)] struct Chunks(Vec>); @@ -150,12 +167,13 @@ fn prepare_chunks( mut chunks: ResMut, mesh_handles: Res>, ) { + // get the AssetId for each Handle + // which we'll use later to get the relevant buffers + // from the mesh_allocator let chunk_data: Vec> = meshes_to_generate .iter() - // sometimes RenderMesh doesn't exist yet! .map(|gmesh| gmesh.0.id()) .collect(); - // dbg!(chunk_data); chunks.0 = chunk_data; } @@ -176,6 +194,7 @@ fn init_compute_pipeline( &BindGroupLayoutEntries::sequential( ShaderStages::COMPUTE, ( + // offsets uniform_buffer::(false), // vertices storage_buffer::>(false), @@ -202,10 +221,12 @@ struct ComputeNodeLabel; #[derive(Default)] struct ComputeNode {} +// A uniform that holds the vertex and index offsets +// for the vertex/index mesh_allocator buffer slabs #[derive(ShaderType)] struct FirstIndex { - first_vertex_index: u32, - first_index_index: u32, + vertex: u32, + vertex_index: u32, } impl render_graph::Node for ComputeNode { @@ -226,21 +247,32 @@ impl render_graph::Node for ComputeNode { let pipeline = world.resource::(); if let Some(init_pipeline) = pipeline_cache.get_compute_pipeline(pipeline.pipeline) { + // the mesh_allocator holds slabs of meshes, so the buffers we get here + // can contain more data than just the mesh we're asking for. + // That's why there is a range field. + // You should *not* touch data in these buffers that is outside of the range. let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(mesh_id).unwrap(); let index_buffer_slice = mesh_allocator.mesh_index_slice(mesh_id).unwrap(); - dbg!(&vertex_buffer_slice.range); - dbg!(&index_buffer_slice.range); - let first = FirstIndex { - first_vertex_index: vertex_buffer_slice.range.start * 4, - first_index_index: index_buffer_slice.range.start * 4, + // there are 8 vertex data values (pos, normal, uv) per vertex + // and the vertex_buffer_slice.range.start is in "vertex elements" + // which includes all of that data, so each index is worth 8 indices + // to our shader code. + vertex: vertex_buffer_slice.range.start * 8, + // but each vertex index is a single value, so the index of the + // vertex indices is exactly what the value is + vertex_index: index_buffer_slice.range.start, }; + let mut uniforms = UniformBuffer::from(first); uniforms.write_buffer( render_context.render_device(), world.resource::(), ); + + // pass in the full mesh_allocator slabs as well as the first index + // offsets for the vertex and index buffers let bind_group = render_context.render_device().create_bind_group( None, &pipeline_cache.get_bind_group_layout(&pipeline.layout), @@ -258,10 +290,15 @@ impl render_graph::Node for ComputeNode { label: Some("Mesh generation compute pass"), ..default() }); + pass.push_debug_group("compute_mesh"); pass.set_bind_group(0, &bind_group, &[]); pass.set_pipeline(init_pipeline); + // we only dispatch 1,1,1 workgroup here, but a real compute shader + // would take advantage of more workgroups pass.dispatch_workgroups(1, 1, 1); + + pass.pop_debug_group(); } } From dacedb20ea8455970aa7312265454c8863cc591c Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 09:43:02 -0800 Subject: [PATCH 04/16] more comments --- examples/shader/compute_mesh.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 33174fca3c5a6..4fab8a3faca60 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -5,25 +5,22 @@ //! //! The mesh_allocator is used to get references to the relevant slabs //! that contain the mesh data we're interested in. +//! +//! This example does not remove the `GenerateMesh` component after +//! generating the mesh. use bevy::{ asset::RenderAssetUsages, color::palettes::tailwind::RED_400, - mesh::{Indices, MeshVertexAttribute}, + mesh::Indices, prelude::*, render::{ - extract_resource::{ExtractResource, ExtractResourcePlugin}, - gpu_readback::{Readback, ReadbackComplete}, render_asset::RenderAssets, render_graph::{self, RenderGraph, RenderLabel}, - render_resource::{ - binding_types::{storage_buffer, texture_storage_2d}, - *, - }, - renderer::{RenderContext, RenderDevice}, - storage::{GpuShaderStorageBuffer, ShaderStorageBuffer}, - texture::GpuImage, - Render, RenderApp, RenderStartup, RenderSystems, + render_resource::{binding_types::storage_buffer, *}, + renderer::RenderContext, + storage::ShaderStorageBuffer, + Render, RenderApp, RenderStartup, }, }; use bevy_render::{ @@ -75,6 +72,8 @@ impl Plugin for ComputeShaderMeshGeneratorPlugin { } } +/// Holds a handle to the empty mesh that should be filled +/// by the compute shader. #[derive(Component, ExtractComponent, Clone)] struct GenerateMesh(Handle); From 41bfb301abc4b4abd52601b04bc569bb9a23d24d Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 09:49:05 -0800 Subject: [PATCH 05/16] cleanup imports --- assets/shaders/compute_mesh.wgsl | 4 ++-- examples/shader/compute_mesh.rs | 25 ++++++++----------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index bb2aec9db4152..23b0c52a22bba 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -3,8 +3,8 @@ // has been hardcoded to return a cube mesh struct FirstIndex { - first_vertex_index: u32, - first_index_index: u32, + vertex: u32, + vertex_index: u32, } @group(0) @binding(0) var first_index: FirstIndex; diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 4fab8a3faca60..2fe946a2d01b4 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -15,20 +15,17 @@ use bevy::{ mesh::Indices, prelude::*, render::{ - render_asset::RenderAssets, + extract_component::{ExtractComponent, ExtractComponentPlugin}, + mesh::allocator::MeshAllocator, render_graph::{self, RenderGraph, RenderLabel}, - render_resource::{binding_types::storage_buffer, *}, - renderer::RenderContext, - storage::ShaderStorageBuffer, + render_resource::{ + binding_types::{storage_buffer, uniform_buffer}, + *, + }, + renderer::{RenderContext, RenderQueue}, Render, RenderApp, RenderStartup, }, }; -use bevy_render::{ - extract_component::{ExtractComponent, ExtractComponentPlugin}, - mesh::{allocator::MeshAllocator, RenderMesh}, - render_resource::binding_types::uniform_buffer, - renderer::RenderQueue, -}; /// This example uses a shader source file from the assets subdirectory const SHADER_ASSET_PATH: &str = "shaders/compute_mesh.wgsl"; @@ -79,10 +76,8 @@ struct GenerateMesh(Handle); fn setup( mut commands: Commands, - mut images: ResMut>, mut meshes: ResMut>, mut materials: ResMut>, - mut buffers: ResMut>, ) { // a truly empty mesh will error if used in Mesh3d // so we set up the data to be what we want the compute shader to output @@ -161,11 +156,7 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { #[derive(Resource, Default)] struct Chunks(Vec>); -fn prepare_chunks( - meshes_to_generate: Query<&GenerateMesh>, - mut chunks: ResMut, - mesh_handles: Res>, -) { +fn prepare_chunks(meshes_to_generate: Query<&GenerateMesh>, mut chunks: ResMut) { // get the AssetId for each Handle // which we'll use later to get the relevant buffers // from the mesh_allocator From 339d8ab726116d1aea33b96e691e816cca72b098 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 10:11:36 -0800 Subject: [PATCH 06/16] example page --- examples/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/README.md b/examples/README.md index da86eac2fbdc1..6fcb06acc6775 100644 --- a/examples/README.md +++ b/examples/README.md @@ -467,6 +467,7 @@ Example | Description [Animated](../examples/shader/animate_shader.rs) | A shader that uses dynamic data like the time since startup [Array Texture](../examples/shader/array_texture.rs) | A shader that shows how to reuse the core bevy PBR shading functionality in a custom material that obtains the base color from an array texture. [Compute - Game of Life](../examples/shader/compute_shader_game_of_life.rs) | A compute shader that simulates Conway's Game of Life +[Compute Shader Mesh](../examples/shader/compute_mesh.rs) | A compute shader that generates a mesh that is controlled by a Handle [Custom Render Phase](../examples/shader_advanced/custom_render_phase.rs) | Shows how to make a complete render phase [Custom Vertex Attribute](../examples/shader_advanced/custom_vertex_attribute.rs) | A shader that reads a mesh's custom vertex attribute [Custom phase item](../examples/shader_advanced/custom_phase_item.rs) | Demonstrates how to enqueue custom draw commands in a render phase From b06c3bee5d6a219564f03e2267caed5d30a7a037 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 10:20:46 -0800 Subject: [PATCH 07/16] markdown lint --- examples/shader/compute_mesh.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 2fe946a2d01b4..34278ffc4a9b6 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -3,7 +3,7 @@ //! compute shader on the GPU without transferring data back //! to the CPU. //! -//! The mesh_allocator is used to get references to the relevant slabs +//! The `mesh_allocator` is used to get references to the relevant slabs //! that contain the mesh data we're interested in. //! //! This example does not remove the `GenerateMesh` component after From 1990f7a616e201246188c53fe345bdbe5de44fff Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 15:53:51 -0800 Subject: [PATCH 08/16] comments --- assets/shaders/compute_mesh.wgsl | 24 ++++++++++++++++++++---- examples/shader/compute_mesh.rs | 15 ++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index 23b0c52a22bba..aae768869a140 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -2,6 +2,8 @@ // The actual work it does is not important for the example and // has been hardcoded to return a cube mesh +// `vertex` is the starting offset of the mesh data in the *vertex_data* storage buffer +// `vertex_index` is the starting offset of the *index* data in the *index_data* storage buffer struct FirstIndex { vertex: u32, vertex_index: u32, @@ -13,14 +15,28 @@ struct FirstIndex { @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3) { + // this loop is iterating over the full list of (position, normal, uv) + // data what we have in `vertices`. + // `192` is used because arrayLength on const arrays doesn't work for (var i = 0u; i < 192; i++) { - // buffer is bigger than just our mesh, so we use the first_index.vertex - // to write to the correct range + // The vertex_data buffer is bigger than just the mesh we're + // processing because Bevy stores meshes in the mesh_allocator + // which allocates slabs that each can contain multiple meshes. + // This buffer is one slab, and first_index.vertex is the starting + // offset for the mesh we care about. + // So the 0 starting value in the for loop is added to first_index.vertex + // which means we start writing at the correct offset. + // + // The "end" of the available space to write into is known by us + // ahead of time in this example, but you may wish to also set the + // end of the range in the uniform buffer *because you should not + // write past the end of the range ever*. Doing this can overwrite + // other mesh data*. vertex_data[i + first_index.vertex] = vertices[i]; } + // `36` is the length of the `indices` array for (var i = 0u; i < 36; i++) { - // buffer is bigger than just our mesh, so we use the first_index.vertex_index - // to write to the correct range + // This is doing the same as the vertex_data offset described above index_data[i + first_index.vertex_index] = u32(indices[i]); } } diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 34278ffc4a9b6..014ec8f0d3b56 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -65,6 +65,10 @@ impl Plugin for ComputeShaderMeshGeneratorPlugin { render_app .world_mut() .resource_mut::() + // This allows using the mesh allocator slabs as + // storage buffers directly in the compute shader. + // Which means that we can write from our compute + // shader directly to the allocated mesh slabs. .extra_buffer_usages = BufferUsages::STORAGE; } } @@ -83,6 +87,15 @@ fn setup( // so we set up the data to be what we want the compute shader to output // We're using 36 indices, 24 vertices which is directly taken from // the Bevy Cuboid mesh implementation + // + // It is *very important* that the amount of data allocated here is + // *bigger* than (or exactly equal to) the amount of data we intend to + // write from the compute shader. This amount of data defines how big + // the buffer we get from the mesh_allocator will be, which in turn + // defines how big the buffer is when we're in the compute shader. + // + // If it turns out you don't need all of the space when the compute shader + // is writing data, you can write NaN to the rest of the data. let empty_mesh = { let mut mesh = Mesh::new( PrimitiveTopology::TriangleList, @@ -285,7 +298,7 @@ impl render_graph::Node for ComputeNode { pass.set_bind_group(0, &bind_group, &[]); pass.set_pipeline(init_pipeline); // we only dispatch 1,1,1 workgroup here, but a real compute shader - // would take advantage of more workgroups + // would take advantage of more and larger size workgroups pass.dispatch_workgroups(1, 1, 1); pass.pop_debug_group(); From 44fb8e18c541e78837fd89357d092df9b026ab5c Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 16:07:25 -0800 Subject: [PATCH 09/16] change cube size, move upward --- assets/shaders/compute_mesh.wgsl | 35 +++++++++++++++------------ examples/shader/compute_mesh.rs | 41 ++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index aae768869a140..d7748b862a24b 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -4,12 +4,14 @@ // `vertex` is the starting offset of the mesh data in the *vertex_data* storage buffer // `vertex_index` is the starting offset of the *index* data in the *index_data* storage buffer -struct FirstIndex { - vertex: u32, - vertex_index: u32, +struct DataRanges { + vertex_start: u32, + vertex_end: u32, + index_start: u32, + index_end: u32, } -@group(0) @binding(0) var first_index: FirstIndex; +@group(0) @binding(0) var data_range: DataRanges; @group(0) @binding(1) var vertex_data: array; @group(0) @binding(2) var index_data: array; @@ -22,27 +24,30 @@ fn main(@builtin(global_invocation_id) global_id: vec3) { // The vertex_data buffer is bigger than just the mesh we're // processing because Bevy stores meshes in the mesh_allocator // which allocates slabs that each can contain multiple meshes. - // This buffer is one slab, and first_index.vertex is the starting - // offset for the mesh we care about. - // So the 0 starting value in the for loop is added to first_index.vertex - // which means we start writing at the correct offset. + // This buffer is one slab, and data_range.vertex_start is the + // starting offset for the mesh we care about. + // So the 0 starting value in the for loop is added to + // data_range.vertex_start which means we start writing at the + // correct offset. // // The "end" of the available space to write into is known by us - // ahead of time in this example, but you may wish to also set the - // end of the range in the uniform buffer *because you should not - // write past the end of the range ever*. Doing this can overwrite - // other mesh data*. - vertex_data[i + first_index.vertex] = vertices[i]; + // ahead of time in this example, so we know this has enough space, + // but you may wish to also check to make sure you are not writing + // past the end of the range *because you should not write past the + // end of the range ever*. Doing this can overwrite a different + // mesh's data. + vertex_data[i + data_range.vertex_start] = vertices[i]; } // `36` is the length of the `indices` array for (var i = 0u; i < 36; i++) { // This is doing the same as the vertex_data offset described above - index_data[i + first_index.vertex_index] = u32(indices[i]); + index_data[i + data_range.index_start] = u32(indices[i]); } } // hardcoded compute shader data. -const half_size = vec3(2.); +// half_size is half the size of the cube +const half_size = vec3(1.5); const min = -half_size; const max = half_size; diff --git a/examples/shader/compute_mesh.rs b/examples/shader/compute_mesh.rs index 014ec8f0d3b56..d6d336009902a 100644 --- a/examples/shader/compute_mesh.rs +++ b/examples/shader/compute_mesh.rs @@ -11,7 +11,7 @@ use bevy::{ asset::RenderAssetUsages, - color::palettes::tailwind::RED_400, + color::palettes::tailwind::{RED_400, SKY_400}, mesh::Indices, prelude::*, render::{ @@ -85,10 +85,11 @@ fn setup( ) { // a truly empty mesh will error if used in Mesh3d // so we set up the data to be what we want the compute shader to output - // We're using 36 indices, 24 vertices which is directly taken from - // the Bevy Cuboid mesh implementation + // We're using 36 indices and 24 vertices which is directly taken from + // the Bevy Cuboid mesh implementation. // - // It is *very important* that the amount of data allocated here is + // We allocate 50 spots for each attribute here because + // it is *very important* that the amount of data allocated here is // *bigger* than (or exactly equal to) the amount of data we intend to // write from the compute shader. This amount of data defines how big // the buffer we get from the mesh_allocator will be, which in turn @@ -101,10 +102,10 @@ fn setup( PrimitiveTopology::TriangleList, RenderAssetUsages::RENDER_WORLD, ) - .with_inserted_attribute(Mesh::ATTRIBUTE_POSITION, vec![[0.; 3]; 24]) - .with_inserted_attribute(Mesh::ATTRIBUTE_NORMAL, vec![[0.; 3]; 24]) - .with_inserted_attribute(Mesh::ATTRIBUTE_UV_0, vec![[0.; 2]; 24]) - .with_inserted_indices(Indices::U32(vec![0; 36])); + .with_inserted_attribute(Mesh::ATTRIBUTE_POSITION, vec![[0.; 3]; 50]) + .with_inserted_attribute(Mesh::ATTRIBUTE_NORMAL, vec![[0.; 3]; 50]) + .with_inserted_attribute(Mesh::ATTRIBUTE_UV_0, vec![[0.; 2]; 50]) + .with_inserted_indices(Indices::U32(vec![0; 50])); mesh.asset_usage = RenderAssetUsages::RENDER_WORLD; mesh @@ -122,16 +123,16 @@ fn setup( base_color: RED_400.into(), ..default() })), - Transform::from_xyz(-2.5, 1., 0.), + Transform::from_xyz(-2.5, 1.5, 0.), )); commands.spawn(( Mesh3d(handle), MeshMaterial3d(materials.add(StandardMaterial { - base_color: RED_400.into(), + base_color: SKY_400.into(), ..default() })), - Transform::from_xyz(2.5, 1., 0.), + Transform::from_xyz(2.5, 1.5, 0.), )); // some additional scene elements. @@ -198,7 +199,7 @@ fn init_compute_pipeline( ShaderStages::COMPUTE, ( // offsets - uniform_buffer::(false), + uniform_buffer::(false), // vertices storage_buffer::>(false), // indices @@ -227,9 +228,11 @@ struct ComputeNode {} // A uniform that holds the vertex and index offsets // for the vertex/index mesh_allocator buffer slabs #[derive(ShaderType)] -struct FirstIndex { - vertex: u32, - vertex_index: u32, +struct DataRanges { + vertex_start: u32, + vertex_end: u32, + index_start: u32, + index_end: u32, } impl render_graph::Node for ComputeNode { @@ -257,15 +260,17 @@ impl render_graph::Node for ComputeNode { let vertex_buffer_slice = mesh_allocator.mesh_vertex_slice(mesh_id).unwrap(); let index_buffer_slice = mesh_allocator.mesh_index_slice(mesh_id).unwrap(); - let first = FirstIndex { + let first = DataRanges { // there are 8 vertex data values (pos, normal, uv) per vertex // and the vertex_buffer_slice.range.start is in "vertex elements" // which includes all of that data, so each index is worth 8 indices // to our shader code. - vertex: vertex_buffer_slice.range.start * 8, + vertex_start: vertex_buffer_slice.range.start * 8, + vertex_end: vertex_buffer_slice.range.end * 8, // but each vertex index is a single value, so the index of the // vertex indices is exactly what the value is - vertex_index: index_buffer_slice.range.start, + index_start: index_buffer_slice.range.start, + index_end: index_buffer_slice.range.end, }; let mut uniforms = UniformBuffer::from(first); From 3786e2eee96ca0d07ae40ddf0ae3614ae525bbf0 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 16:08:19 -0800 Subject: [PATCH 10/16] move to advanced section --- Cargo.toml | 2 +- examples/{shader => shader_advanced}/compute_mesh.rs | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/{shader => shader_advanced}/compute_mesh.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 65a2f1589b5b5..e0a842921b76e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3022,7 +3022,7 @@ wasm = false [[example]] name = "compute_mesh" -path = "examples/shader/compute_mesh.rs" +path = "examples/shader_advanced/compute_mesh.rs" doc-scrape-examples = true [package.metadata.example.compute_mesh] diff --git a/examples/shader/compute_mesh.rs b/examples/shader_advanced/compute_mesh.rs similarity index 100% rename from examples/shader/compute_mesh.rs rename to examples/shader_advanced/compute_mesh.rs From ed34521b2aad0c4a06f181c5fb35a9b2803b26e5 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 16:13:53 -0800 Subject: [PATCH 11/16] update readme --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 6fcb06acc6775..972595e862351 100644 --- a/examples/README.md +++ b/examples/README.md @@ -467,7 +467,7 @@ Example | Description [Animated](../examples/shader/animate_shader.rs) | A shader that uses dynamic data like the time since startup [Array Texture](../examples/shader/array_texture.rs) | A shader that shows how to reuse the core bevy PBR shading functionality in a custom material that obtains the base color from an array texture. [Compute - Game of Life](../examples/shader/compute_shader_game_of_life.rs) | A compute shader that simulates Conway's Game of Life -[Compute Shader Mesh](../examples/shader/compute_mesh.rs) | A compute shader that generates a mesh that is controlled by a Handle +[Compute Shader Mesh](../examples/shader_advanced/compute_mesh.rs) | A compute shader that generates a mesh that is controlled by a Handle [Custom Render Phase](../examples/shader_advanced/custom_render_phase.rs) | Shows how to make a complete render phase [Custom Vertex Attribute](../examples/shader_advanced/custom_vertex_attribute.rs) | A shader that reads a mesh's custom vertex attribute [Custom phase item](../examples/shader_advanced/custom_phase_item.rs) | Demonstrates how to enqueue custom draw commands in a render phase From 516c76d6d83302cbdcdc9566ed46ee989546e594 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 16:35:10 -0800 Subject: [PATCH 12/16] dont update every frame --- examples/shader_advanced/compute_mesh.rs | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/examples/shader_advanced/compute_mesh.rs b/examples/shader_advanced/compute_mesh.rs index d6d336009902a..0f36d13b062e4 100644 --- a/examples/shader_advanced/compute_mesh.rs +++ b/examples/shader_advanced/compute_mesh.rs @@ -9,10 +9,13 @@ //! This example does not remove the `GenerateMesh` component after //! generating the mesh. +use std::ops::Not; + use bevy::{ asset::RenderAssetUsages, color::palettes::tailwind::{RED_400, SKY_400}, mesh::Indices, + platform::collections::HashSet, prelude::*, render::{ extract_component::{ExtractComponent, ExtractComponentPlugin}, @@ -170,14 +173,31 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { #[derive(Resource, Default)] struct Chunks(Vec>); -fn prepare_chunks(meshes_to_generate: Query<&GenerateMesh>, mut chunks: ResMut) { +fn prepare_chunks( + meshes_to_generate: Query<&GenerateMesh>, + mut chunks: ResMut, + // This HashSet contains the AssetIds that have been + // processed. We use that to remove asset_ids that have already + // been processed, which means each unique GenerateMesh will result + // in one compute shader mesh generation process instead of generating + // the mesh every frame. + mut processed: Local>>, +) { // get the AssetId for each Handle // which we'll use later to get the relevant buffers // from the mesh_allocator let chunk_data: Vec> = meshes_to_generate .iter() - .map(|gmesh| gmesh.0.id()) + .filter_map(|gmesh| { + let id = gmesh.0.id(); + processed.contains(&id).not().then_some(id) + }) .collect(); + + for id in &chunk_data { + processed.insert(*id); + } + chunks.0 = chunk_data; } @@ -249,6 +269,7 @@ impl render_graph::Node for ComputeNode { let mesh_allocator = world.resource::(); for mesh_id in &chunks.0 { + info!(?mesh_id, "processing mesh"); let pipeline_cache = world.resource::(); let pipeline = world.resource::(); From eb61f0dd1a94ec21164cfe6cea8bf68571811c1f Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Mon, 29 Dec 2025 17:20:04 -0800 Subject: [PATCH 13/16] add node edge --- examples/shader_advanced/compute_mesh.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/shader_advanced/compute_mesh.rs b/examples/shader_advanced/compute_mesh.rs index 0f36d13b062e4..7a92d552fce82 100644 --- a/examples/shader_advanced/compute_mesh.rs +++ b/examples/shader_advanced/compute_mesh.rs @@ -162,10 +162,9 @@ fn setup( } fn add_compute_render_graph_node(mut render_graph: ResMut) { - // Add the compute node as a top-level node to the render graph. This means it will only execute - // once per frame. Normally, adding a node would use the `RenderGraphApp::add_render_graph_node` - // method, but it does not allow adding as a top-level node. render_graph.add_node(ComputeNodeLabel, ComputeNode::default()); + // add_node_edge guarantees that ComputeNodeLabel will run before CameraDriverLabel + render_graph.add_node_edge(ComputeNodeLabel, bevy::render::graph::CameraDriverLabel); } /// This is called "Chunks" because this example originated From 334181c3b4571efbfb5fec6eb6f39640328157f5 Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Sun, 4 Jan 2026 15:25:29 -0800 Subject: [PATCH 14/16] wait for pipeline to be ready before considering meshes "processed" --- examples/shader_advanced/compute_mesh.rs | 65 ++++++++++++++---------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/examples/shader_advanced/compute_mesh.rs b/examples/shader_advanced/compute_mesh.rs index 7a92d552fce82..94f079f9d1547 100644 --- a/examples/shader_advanced/compute_mesh.rs +++ b/examples/shader_advanced/compute_mesh.rs @@ -54,7 +54,7 @@ impl Plugin for ComputeShaderMeshGeneratorPlugin { }; render_app - .init_resource::() + .init_resource::() .add_systems( RenderStartup, (init_compute_pipeline, add_compute_render_graph_node), @@ -167,37 +167,49 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { render_graph.add_node_edge(ComputeNodeLabel, bevy::render::graph::CameraDriverLabel); } -/// This is called "Chunks" because this example originated +/// This is called "ChunksToProcess" because this example originated /// from a use case of generating chunks of landscape or voxels +/// It only exists in the render world. #[derive(Resource, Default)] -struct Chunks(Vec>); +struct ChunksToProcess(Vec>); +/// `processed` is a `HashSet` contains the `AssetId`s that have been +/// processed. We use that to remove asset_ids that have already +/// been processed, which means each unique `GenerateMesh` will result +/// in one compute shader mesh generation process instead of generating +/// the mesh every frame. fn prepare_chunks( meshes_to_generate: Query<&GenerateMesh>, - mut chunks: ResMut, - // This HashSet contains the AssetIds that have been - // processed. We use that to remove asset_ids that have already - // been processed, which means each unique GenerateMesh will result - // in one compute shader mesh generation process instead of generating - // the mesh every frame. + mut chunks: ResMut, + pipeline_cache: Res, + pipeline: Res, mut processed: Local>>, ) { - // get the AssetId for each Handle - // which we'll use later to get the relevant buffers - // from the mesh_allocator - let chunk_data: Vec> = meshes_to_generate - .iter() - .filter_map(|gmesh| { - let id = gmesh.0.id(); - processed.contains(&id).not().then_some(id) - }) - .collect(); - - for id in &chunk_data { - processed.insert(*id); - } + // If the pipeline isn't ready, then meshes + // won't be processed. So we want to wait until + // the pipeline is ready before considering any mesh processed. + if pipeline_cache + .get_compute_pipeline(pipeline.pipeline) + .is_some() + { + // get the AssetId for each Handle + // which we'll use later to get the relevant buffers + // from the mesh_allocator + let chunk_data: Vec> = meshes_to_generate + .iter() + .filter_map(|gmesh| { + let id = gmesh.0.id(); + processed.contains(&id).not().then_some(id) + }) + .collect(); + + // Cache any meshes we're going to process this frame + for id in &chunk_data { + processed.insert(*id); + } - chunks.0 = chunk_data; + chunks.0 = chunk_data; + } } #[derive(Resource)] @@ -261,10 +273,7 @@ impl render_graph::Node for ComputeNode { render_context: &mut RenderContext, world: &World, ) -> Result<(), render_graph::NodeRunError> { - let Some(chunks) = world.get_resource::() else { - info!("no chunks"); - return Ok(()); - }; + let chunks = world.resource::(); let mesh_allocator = world.resource::(); for mesh_id in &chunks.0 { From f6916e9a6b5c3d78e8934884dfc78395e7dc5d5a Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Sun, 4 Jan 2026 15:33:54 -0800 Subject: [PATCH 15/16] ci clippy --- examples/shader_advanced/compute_mesh.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/shader_advanced/compute_mesh.rs b/examples/shader_advanced/compute_mesh.rs index 94f079f9d1547..0d7b798f22df5 100644 --- a/examples/shader_advanced/compute_mesh.rs +++ b/examples/shader_advanced/compute_mesh.rs @@ -167,14 +167,14 @@ fn add_compute_render_graph_node(mut render_graph: ResMut) { render_graph.add_node_edge(ComputeNodeLabel, bevy::render::graph::CameraDriverLabel); } -/// This is called "ChunksToProcess" because this example originated +/// This is called `ChunksToProcess` because this example originated /// from a use case of generating chunks of landscape or voxels /// It only exists in the render world. #[derive(Resource, Default)] struct ChunksToProcess(Vec>); /// `processed` is a `HashSet` contains the `AssetId`s that have been -/// processed. We use that to remove asset_ids that have already +/// processed. We use that to remove `AssetId`s that have already /// been processed, which means each unique `GenerateMesh` will result /// in one compute shader mesh generation process instead of generating /// the mesh every frame. From 3c5acc413fcc90a3d160daeee8a6ecc4d5aed98f Mon Sep 17 00:00:00 2001 From: Christopher Biscardi Date: Sat, 10 Jan 2026 10:12:57 -0800 Subject: [PATCH 16/16] wgsl comment fix --- assets/shaders/compute_mesh.wgsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/shaders/compute_mesh.wgsl b/assets/shaders/compute_mesh.wgsl index d7748b862a24b..40d7a63aca885 100644 --- a/assets/shaders/compute_mesh.wgsl +++ b/assets/shaders/compute_mesh.wgsl @@ -2,8 +2,8 @@ // The actual work it does is not important for the example and // has been hardcoded to return a cube mesh -// `vertex` is the starting offset of the mesh data in the *vertex_data* storage buffer -// `vertex_index` is the starting offset of the *index* data in the *index_data* storage buffer +// `vertex_start` is the starting offset of the mesh data in the *vertex_data* storage buffer +// `index_start` is the starting offset of the index data in the *index_data* storage buffer struct DataRanges { vertex_start: u32, vertex_end: u32,