Skip to content

Commit 14b6f0a

Browse files
committed
bevy_pbr: Leverage ComputeTaskPool parallelism for assign_lights_to_clusters
For 5000 point lights on an M1 Max MacBook Pro, this cut the system execution time from >7ms to 5ms. I tested different batch sizes and 32 is in the middle of powers of 2 that work well. I also tried allocating the inner indices_entities within the spawned task using Vec::with_capacity() but it either made things worse or had no impact. And I tried doubling the size of the indices_entities Vec when it was full and we needed to push another value onto it. That similarly had no impact. So, I opted for simplicity. One thing I haven't tested is whether this significantly slows down system operation for small numbers of lights such as those in WebGL2 cases.
1 parent e4dc7ef commit 14b6f0a

File tree

2 files changed

+109
-92
lines changed

2 files changed

+109
-92
lines changed

crates/bevy_pbr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ bevy_ecs = { path = "../bevy_ecs", version = "0.6.0" }
2121
bevy_math = { path = "../bevy_math", version = "0.6.0" }
2222
bevy_reflect = { path = "../bevy_reflect", version = "0.6.0", features = ["bevy"] }
2323
bevy_render = { path = "../bevy_render", version = "0.6.0" }
24+
bevy_tasks = { path = "../bevy_tasks", version = "0.6.0" }
2425
bevy_transform = { path = "../bevy_transform", version = "0.6.0" }
2526
bevy_utils = { path = "../bevy_utils", version = "0.6.0" }
2627
bevy_window = { path = "../bevy_window", version = "0.6.0" }

crates/bevy_pbr/src/light.rs

Lines changed: 108 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use bevy_render::{
99
primitives::{Aabb, CubemapFrusta, Frustum, Sphere},
1010
view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities},
1111
};
12+
use bevy_tasks::ComputeTaskPool;
1213
use bevy_transform::components::GlobalTransform;
1314
use bevy_window::Windows;
1415

@@ -483,6 +484,7 @@ fn cluster_to_index(cluster_dimensions: UVec3, cluster: UVec3) -> usize {
483484
// NOTE: Run this before update_point_light_frusta!
484485
pub fn assign_lights_to_clusters(
485486
mut commands: Commands,
487+
task_pool: Res<ComputeTaskPool>,
486488
mut global_lights: ResMut<VisiblePointLights>,
487489
mut views: Query<(Entity, &GlobalTransform, &Camera, &Frustum, &mut Clusters)>,
488490
lights: Query<(Entity, &GlobalTransform, &PointLight)>,
@@ -506,102 +508,116 @@ pub fn assign_lights_to_clusters(
506508
vec![VisiblePointLights::from_light_count(light_count); cluster_count];
507509
let mut visible_lights_set = HashSet::with_capacity(light_count);
508510

509-
for (light_entity, light_transform, light) in lights.iter() {
510-
let light_sphere = Sphere {
511-
center: light_transform.translation,
512-
radius: light.range,
513-
};
514-
515-
// Check if the light is within the view frustum
516-
if !frustum.intersects_sphere(&light_sphere) {
517-
continue;
518-
}
511+
let lights_vec = lights.iter().collect::<Vec<_>>();
512+
let indices_entities = task_pool.scope(|scope| {
513+
let clusters = &*clusters;
514+
for chunk in lights_vec.chunks(32) {
515+
scope.spawn(async move {
516+
let mut indices_entities = Vec::new();
517+
for (light_entity, light_transform, light) in chunk {
518+
let light_sphere = Sphere {
519+
center: light_transform.translation,
520+
radius: light.range,
521+
};
522+
523+
// Check if the light is within the view frustum
524+
if !frustum.intersects_sphere(&light_sphere) {
525+
continue;
526+
}
519527

520-
// Calculate an AABB for the light in view space, find the corresponding clusters for the min and max
521-
// points of the AABB, then iterate over just those clusters for this light
522-
let light_aabb_view = Aabb {
523-
center: (inverse_view_transform * light_sphere.center.extend(1.0)).xyz(),
524-
half_extents: Vec3::splat(light_sphere.radius),
525-
};
526-
let (light_aabb_view_min, light_aabb_view_max) =
527-
(light_aabb_view.min(), light_aabb_view.max());
528-
// Is there a cheaper way to do this? The problem is that because of perspective
529-
// the point at max z but min xy may be less xy in screenspace, and similar. As
530-
// such, projecting the min and max xy at both the closer and further z and taking
531-
// the min and max of those projected points addresses this.
532-
let (
533-
light_aabb_view_xymin_near,
534-
light_aabb_view_xymin_far,
535-
light_aabb_view_xymax_near,
536-
light_aabb_view_xymax_far,
537-
) = (
538-
light_aabb_view_min,
539-
light_aabb_view_min.xy().extend(light_aabb_view_max.z),
540-
light_aabb_view_max.xy().extend(light_aabb_view_min.z),
541-
light_aabb_view_max,
542-
);
543-
let (
544-
light_aabb_clip_xymin_near,
545-
light_aabb_clip_xymin_far,
546-
light_aabb_clip_xymax_near,
547-
light_aabb_clip_xymax_far,
548-
) = (
549-
camera.projection_matrix * light_aabb_view_xymin_near.extend(1.0),
550-
camera.projection_matrix * light_aabb_view_xymin_far.extend(1.0),
551-
camera.projection_matrix * light_aabb_view_xymax_near.extend(1.0),
552-
camera.projection_matrix * light_aabb_view_xymax_far.extend(1.0),
553-
);
554-
let (
555-
light_aabb_ndc_xymin_near,
556-
light_aabb_ndc_xymin_far,
557-
light_aabb_ndc_xymax_near,
558-
light_aabb_ndc_xymax_far,
559-
) = (
560-
light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
561-
light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
562-
light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
563-
light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
564-
);
565-
let (light_aabb_ndc_min, light_aabb_ndc_max) = (
566-
light_aabb_ndc_xymin_near
567-
.min(light_aabb_ndc_xymin_far)
568-
.min(light_aabb_ndc_xymax_near)
569-
.min(light_aabb_ndc_xymax_far),
570-
light_aabb_ndc_xymin_near
571-
.max(light_aabb_ndc_xymin_far)
572-
.max(light_aabb_ndc_xymax_near)
573-
.max(light_aabb_ndc_xymax_far),
574-
);
575-
let min_cluster = ndc_position_to_cluster(
576-
clusters.axis_slices,
577-
cluster_factors,
578-
is_orthographic,
579-
light_aabb_ndc_min,
580-
light_aabb_view_min.z,
581-
);
582-
let max_cluster = ndc_position_to_cluster(
583-
clusters.axis_slices,
584-
cluster_factors,
585-
is_orthographic,
586-
light_aabb_ndc_max,
587-
light_aabb_view_max.z,
588-
);
589-
let (min_cluster, max_cluster) =
590-
(min_cluster.min(max_cluster), min_cluster.max(max_cluster));
591-
for y in min_cluster.y..=max_cluster.y {
592-
for x in min_cluster.x..=max_cluster.x {
593-
for z in min_cluster.z..=max_cluster.z {
594-
let cluster_index =
595-
cluster_to_index(clusters.axis_slices, UVec3::new(x, y, z));
596-
let cluster_aabb = &clusters.aabbs[cluster_index];
597-
if light_sphere.intersects_obb(cluster_aabb, &view_transform) {
598-
global_lights_set.insert(light_entity);
599-
visible_lights_set.insert(light_entity);
600-
clusters_lights[cluster_index].entities.push(light_entity);
528+
// Calculate an AABB for the light in view space, find the corresponding clusters for the min and max
529+
// points of the AABB, then iterate over just those clusters for this light
530+
let light_aabb_view = Aabb {
531+
center: (inverse_view_transform * light_sphere.center.extend(1.0))
532+
.xyz(),
533+
half_extents: Vec3::splat(light_sphere.radius),
534+
};
535+
let (light_aabb_view_min, light_aabb_view_max) =
536+
(light_aabb_view.min(), light_aabb_view.max());
537+
// Is there a cheaper way to do this? The problem is that because of perspective
538+
// the point at max z but min xy may be less xy in screenspace, and similar. As
539+
// such, projecting the min and max xy at both the closer and further z and taking
540+
// the min and max of those projected points addresses this.
541+
let (
542+
light_aabb_view_xymin_near,
543+
light_aabb_view_xymin_far,
544+
light_aabb_view_xymax_near,
545+
light_aabb_view_xymax_far,
546+
) = (
547+
light_aabb_view_min,
548+
light_aabb_view_min.xy().extend(light_aabb_view_max.z),
549+
light_aabb_view_max.xy().extend(light_aabb_view_min.z),
550+
light_aabb_view_max,
551+
);
552+
let (
553+
light_aabb_clip_xymin_near,
554+
light_aabb_clip_xymin_far,
555+
light_aabb_clip_xymax_near,
556+
light_aabb_clip_xymax_far,
557+
) = (
558+
camera.projection_matrix * light_aabb_view_xymin_near.extend(1.0),
559+
camera.projection_matrix * light_aabb_view_xymin_far.extend(1.0),
560+
camera.projection_matrix * light_aabb_view_xymax_near.extend(1.0),
561+
camera.projection_matrix * light_aabb_view_xymax_far.extend(1.0),
562+
);
563+
let (
564+
light_aabb_ndc_xymin_near,
565+
light_aabb_ndc_xymin_far,
566+
light_aabb_ndc_xymax_near,
567+
light_aabb_ndc_xymax_far,
568+
) = (
569+
light_aabb_clip_xymin_near.xyz() / light_aabb_clip_xymin_near.w,
570+
light_aabb_clip_xymin_far.xyz() / light_aabb_clip_xymin_far.w,
571+
light_aabb_clip_xymax_near.xyz() / light_aabb_clip_xymax_near.w,
572+
light_aabb_clip_xymax_far.xyz() / light_aabb_clip_xymax_far.w,
573+
);
574+
let (light_aabb_ndc_min, light_aabb_ndc_max) = (
575+
light_aabb_ndc_xymin_near
576+
.min(light_aabb_ndc_xymin_far)
577+
.min(light_aabb_ndc_xymax_near)
578+
.min(light_aabb_ndc_xymax_far),
579+
light_aabb_ndc_xymin_near
580+
.max(light_aabb_ndc_xymin_far)
581+
.max(light_aabb_ndc_xymax_near)
582+
.max(light_aabb_ndc_xymax_far),
583+
);
584+
let min_cluster = ndc_position_to_cluster(
585+
clusters.axis_slices,
586+
cluster_factors,
587+
is_orthographic,
588+
light_aabb_ndc_min,
589+
light_aabb_view_min.z,
590+
);
591+
let max_cluster = ndc_position_to_cluster(
592+
clusters.axis_slices,
593+
cluster_factors,
594+
is_orthographic,
595+
light_aabb_ndc_max,
596+
light_aabb_view_max.z,
597+
);
598+
let (min_cluster, max_cluster) =
599+
(min_cluster.min(max_cluster), min_cluster.max(max_cluster));
600+
for y in min_cluster.y..=max_cluster.y {
601+
for x in min_cluster.x..=max_cluster.x {
602+
for z in min_cluster.z..=max_cluster.z {
603+
let cluster_index =
604+
cluster_to_index(clusters.axis_slices, UVec3::new(x, y, z));
605+
let cluster_aabb = &clusters.aabbs[cluster_index];
606+
if light_sphere.intersects_obb(cluster_aabb, &view_transform) {
607+
indices_entities.push((cluster_index, *light_entity));
608+
}
609+
}
610+
}
601611
}
602612
}
603-
}
613+
indices_entities
614+
});
604615
}
616+
});
617+
for (cluster_index, light_entity) in indices_entities.into_iter().flatten() {
618+
global_lights_set.insert(light_entity);
619+
visible_lights_set.insert(light_entity);
620+
clusters_lights[cluster_index].entities.push(light_entity);
605621
}
606622

607623
for cluster_lights in &mut clusters_lights {

0 commit comments

Comments
 (0)