Skip to content

Commit

Permalink
Parallelize extract_meshes (#9966)
Browse files Browse the repository at this point in the history
# Objective
`extract_meshes` can easily be one of the most expensive operations in
the blocking extract schedule for 3D apps. It also has no fundamentally
serialized parts and can easily be run across multiple threads. Let's
speed it up by parallelizing it!

## Solution
Use the `ThreadLocal<Cell<Vec<T>>>` approach utilized by #7348 in
conjunction with `Query::par_iter` to build a set of thread-local
queues, and collect them after going wide.

## Performance
Using `cargo run --profile stress-test --features trace_tracy --example
many_cubes`. Yellow is this PR. Red is main.

`extract_meshes`:


![image](https://github.com/bevyengine/bevy/assets/3137680/9d45aa2e-3cfa-4fad-9c08-53498b51a73b)

An average reduction from 1.2ms to 770us is seen, a 41.6% improvement.

Note: this is still not including #9950's changes, so this may actually
result in even faster speedups once that's merged in.
  • Loading branch information
james7132 authored Oct 1, 2023
1 parent 1d7577f commit a1a81e5
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 40 deletions.
1 change: 1 addition & 0 deletions crates/bevy_pbr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ bytemuck = { version = "1", features = ["derive"] }
naga_oil = "0.8"
radsort = "0.1"
smallvec = "1.6"
thread_local = "1.0"
91 changes: 51 additions & 40 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ use bevy_render::{
};
use bevy_transform::components::GlobalTransform;
use bevy_utils::{tracing::error, EntityHashMap, HashMap, Hashed};
use std::cell::Cell;
use thread_local::ThreadLocal;

use crate::render::{
morph::{
Expand Down Expand Up @@ -246,6 +248,7 @@ pub fn extract_meshes(
mut commands: Commands,
mut previous_len: Local<usize>,
mut render_mesh_instances: ResMut<RenderMeshInstances>,
mut thread_local_queues: Local<ThreadLocal<Cell<Vec<(Entity, RenderMeshInstance)>>>>,
meshes_query: Extract<
Query<(
Entity,
Expand All @@ -259,50 +262,58 @@ pub fn extract_meshes(
)>,
>,
) {
meshes_query.par_iter().for_each(
|(
entity,
view_visibility,
transform,
previous_transform,
handle,
not_receiver,
not_caster,
no_automatic_batching,
)| {
if !view_visibility.get() {
return;
}
let transform = transform.affine();
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
let mut flags = if not_receiver.is_some() {
MeshFlags::empty()
} else {
MeshFlags::SHADOW_RECEIVER
};
if transform.matrix3.determinant().is_sign_positive() {
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
}
let transforms = MeshTransforms {
transform: (&transform).into(),
previous_transform: (&previous_transform).into(),
flags: flags.bits(),
};
let tls = thread_local_queues.get_or_default();
let mut queue = tls.take();
queue.push((
entity,
RenderMeshInstance {
mesh_asset_id: handle.id(),
transforms,
shadow_caster: not_caster.is_none(),
material_bind_group_id: MaterialBindGroupId::default(),
automatic_batching: !no_automatic_batching,
},
));
tls.set(queue);
},
);

render_mesh_instances.clear();
let mut entities = Vec::with_capacity(*previous_len);

let visible_meshes = meshes_query.iter().filter(|(_, vis, ..)| vis.get());

for (
entity,
_,
transform,
previous_transform,
handle,
not_receiver,
not_caster,
no_automatic_batching,
) in visible_meshes
{
let transform = transform.affine();
let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform);
let mut flags = if not_receiver.is_some() {
MeshFlags::empty()
} else {
MeshFlags::SHADOW_RECEIVER
};
if transform.matrix3.determinant().is_sign_positive() {
flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3;
}
let transforms = MeshTransforms {
transform: (&transform).into(),
previous_transform: (&previous_transform).into(),
flags: flags.bits(),
};
for queue in thread_local_queues.iter_mut() {
// FIXME: Remove this - it is just a workaround to enable rendering to work as
// render commands require an entity to exist at the moment.
entities.push((entity, Mesh3d));
render_mesh_instances.insert(
entity,
RenderMeshInstance {
mesh_asset_id: handle.id(),
transforms,
shadow_caster: not_caster.is_none(),
material_bind_group_id: MaterialBindGroupId::default(),
automatic_batching: !no_automatic_batching,
},
);
entities.extend(queue.get_mut().iter().map(|(e, _)| (*e, Mesh3d)));
render_mesh_instances.extend(queue.get_mut().drain(..));
}
*previous_len = entities.len();
commands.insert_or_spawn_batch(entities);
Expand Down

0 comments on commit a1a81e5

Please sign in to comment.