Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelize batching #12489

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
14 changes: 12 additions & 2 deletions crates/bevy_pbr/src/prepass/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod prepass_bindings;

use bevy_render::batching::{allocate_batch_buffer, reserve_batch_buffer, clear_batch_buffer};
use bevy_render::mesh::MeshVertexBufferLayoutRef;
use bevy_render::render_resource::binding_types::uniform_buffer;
pub use prepass_bindings::*;
Expand Down Expand Up @@ -152,8 +153,17 @@ where
Render,
(
prepare_previous_view_projection_uniforms,
batch_and_prepare_render_phase::<Opaque3dPrepass, MeshPipeline>,
batch_and_prepare_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
(
reserve_batch_buffer::<Opaque3dPrepass, MeshPipeline>,
reserve_batch_buffer::<AlphaMask3dPrepass, MeshPipeline>,
)
.before(allocate_batch_buffer::<MeshPipeline>)
.after(clear_batch_buffer::<MeshPipeline>),
(
batch_and_prepare_render_phase::<Opaque3dPrepass, MeshPipeline>,
batch_and_prepare_render_phase::<AlphaMask3dPrepass, MeshPipeline>,
)
.after(allocate_batch_buffer::<MeshPipeline>),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This starts to look like we maybe want a batching plugin that is generic over render phase item and the base specialisation pipeline or something.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed there. This is getting unwieldy to manage.

)
.in_set(RenderSet::PrepareResources),
);
Expand Down
30 changes: 22 additions & 8 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ use bevy_ecs::{
use bevy_math::{Affine3, Rect, UVec2, Vec4};
use bevy_render::{
batching::{
batch_and_prepare_render_phase, write_batched_instance_buffer, GetBatchData,
NoAutomaticBatching,
batch_and_prepare_render_phase, GetBatchData,
NoAutomaticBatching, allocate_batch_buffer, clear_batch_buffer, reserve_batch_buffer,
},
mesh::*,
render_asset::RenderAssets,
Expand Down Expand Up @@ -116,14 +116,29 @@ impl Plugin for MeshRenderPlugin {
.init_resource::<SkinIndices>()
.init_resource::<MorphUniform>()
.init_resource::<MorphIndices>()
.allow_ambiguous_resource::<GpuArrayBuffer<MeshUniform>>()
.allow_ambiguous_resource::<GpuArrayBufferPool<MeshUniform>>()
.add_systems(
ExtractSchedule,
(extract_meshes, extract_skins, extract_morphs),
)
.add_systems(
Render,
(
clear_batch_buffer::<MeshPipeline>,
(
reserve_batch_buffer::<Opaque3d, MeshPipeline>,
reserve_batch_buffer::<Transmissive3d, MeshPipeline>,
reserve_batch_buffer::<Transparent3d, MeshPipeline>,
reserve_batch_buffer::<AlphaMask3d, MeshPipeline>,
reserve_batch_buffer::<Shadow, MeshPipeline>,
reserve_batch_buffer::<Opaque3dDeferred, MeshPipeline>,
reserve_batch_buffer::<AlphaMask3dDeferred, MeshPipeline>,
)
.in_set(RenderSet::PrepareResources)
.before(allocate_batch_buffer::<MeshPipeline>)
.after(clear_batch_buffer::<MeshPipeline>),
allocate_batch_buffer::<MeshPipeline>
.in_set(RenderSet::PrepareResources),
(
batch_and_prepare_render_phase::<Opaque3d, MeshPipeline>,
batch_and_prepare_render_phase::<Transmissive3d, MeshPipeline>,
Expand All @@ -133,9 +148,8 @@ impl Plugin for MeshRenderPlugin {
batch_and_prepare_render_phase::<Opaque3dDeferred, MeshPipeline>,
batch_and_prepare_render_phase::<AlphaMask3dDeferred, MeshPipeline>,
)
.in_set(RenderSet::PrepareResources),
write_batched_instance_buffer::<MeshPipeline>
.in_set(RenderSet::PrepareResourcesFlush),
.in_set(RenderSet::PrepareResources)
.after(allocate_batch_buffer::<MeshPipeline>),
prepare_skins.in_set(RenderSet::PrepareResources),
prepare_morphs.in_set(RenderSet::PrepareResources),
prepare_mesh_bind_group.in_set(RenderSet::PrepareBindGroups),
Expand All @@ -159,7 +173,7 @@ impl Plugin for MeshRenderPlugin {
}

render_app
.insert_resource(GpuArrayBuffer::<MeshUniform>::new(
.insert_resource(GpuArrayBufferPool::<MeshUniform>::new(
render_app.world.resource::<RenderDevice>(),
))
.init_resource::<MeshPipeline>();
Expand Down Expand Up @@ -982,7 +996,7 @@ pub fn prepare_mesh_bind_group(
mut groups: ResMut<MeshBindGroups>,
mesh_pipeline: Res<MeshPipeline>,
render_device: Res<RenderDevice>,
mesh_uniforms: Res<GpuArrayBuffer<MeshUniform>>,
mesh_uniforms: Res<GpuArrayBufferPool<MeshUniform>>,
skins_uniform: Res<SkinUniform>,
weights_uniform: Res<MorphUniform>,
render_lightmaps: Res<RenderLightmaps>,
Expand Down
95 changes: 57 additions & 38 deletions crates/bevy_render/src/batching/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use nonmax::NonMaxU32;

use crate::{
render_phase::{CachedRenderPipelinePhaseItem, DrawFunctionId, RenderPhase},
render_resource::{CachedRenderPipelineId, GpuArrayBuffer, GpuArrayBufferable},
render_resource::{CachedRenderPipelineId, GpuArrayBufferable, GpuArrayBufferPool},
renderer::{RenderDevice, RenderQueue},
};

Expand Down Expand Up @@ -74,53 +74,72 @@ pub trait GetBatchData {
) -> Option<(Self::BufferData, Option<Self::CompareData>)>;
}

pub fn clear_batch_buffer<F: GetBatchData>(
mut gpu_array_buffer: ResMut<GpuArrayBufferPool<F::BufferData>>,
) {
gpu_array_buffer.clear();
}

pub fn reserve_batch_buffer<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
mut gpu_array_buffer: ResMut<GpuArrayBufferPool<F::BufferData>>,
mut views: Query<&mut RenderPhase<I>>,
) {
for mut phase in &mut views {
phase.reserved_range = wgpu::BufferSize::new(phase.items.len() as u64).map(|size| gpu_array_buffer.reserve(size));
}
}

pub fn allocate_batch_buffer<F: GetBatchData>(
mut gpu_array_buffer: ResMut<GpuArrayBufferPool<F::BufferData>>,
device: Res<RenderDevice>,
) {
gpu_array_buffer.allocate(&device);
}

/// Batch the items in a render phase. This means comparing metadata needed to draw each phase item
/// and trying to combine the draws into a batch.
pub fn batch_and_prepare_render_phase<I: CachedRenderPipelinePhaseItem, F: GetBatchData>(
gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
gpu_array_buffer: Res<GpuArrayBufferPool<F::BufferData>>,
mut views: Query<&mut RenderPhase<I>>,
render_queue: Res<RenderQueue>,
param: StaticSystemParam<F::Param>,
) {
) where for<'w, 's> <F::Param as SystemParam>::Item<'w, 's>: Sync {
let gpu_array_buffer = gpu_array_buffer.into_inner();
let system_param_item = param.into_inner();

let mut process_item = |item: &mut I| {
let (buffer_data, compare_data) = F::get_batch_data(&system_param_item, item.entity())?;
let buffer_index = gpu_array_buffer.push(buffer_data);
views.par_iter_mut()
.for_each(|mut phase| {
let Some(slice) = phase.reserved_range else {
return
};
let mut writer = gpu_array_buffer.get_writer(slice, &render_queue).expect("GPU Array Buffer was not allocated.");

let index = buffer_index.index;
*item.batch_range_mut() = index..index + 1;
*item.dynamic_offset_mut() = buffer_index.dynamic_offset;
let mut process_item = |item: &mut I| {
let (buffer_data, compare_data) = F::get_batch_data(&system_param_item, item.entity())?;
let buffer_index = writer.write(buffer_data);

if I::AUTOMATIC_BATCHING {
compare_data.map(|compare_data| BatchMeta::new(item, compare_data))
} else {
None
}
};
let index = buffer_index.index;
*item.batch_range_mut() = index..index + 1;
*item.dynamic_offset_mut() = buffer_index.dynamic_offset;

for mut phase in &mut views {
let items = phase.items.iter_mut().map(|item| {
let batch_data = process_item(item);
(item.batch_range_mut(), batch_data)
});
items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
if batch_meta.is_some() && prev_batch_meta == batch_meta {
start_range.end = range.end;
(start_range, prev_batch_meta)
} else {
(range, batch_meta)
}
});
}
}
if I::AUTOMATIC_BATCHING {
compare_data.map(|compare_data| BatchMeta::new(item, compare_data))
} else {
None
}
};

pub fn write_batched_instance_buffer<F: GetBatchData>(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
gpu_array_buffer: ResMut<GpuArrayBuffer<F::BufferData>>,
) {
let gpu_array_buffer = gpu_array_buffer.into_inner();
gpu_array_buffer.write_buffer(&render_device, &render_queue);
gpu_array_buffer.clear();
let items = phase.items.iter_mut().map(|item| {
let batch_data = process_item(item);
(item.batch_range_mut(), batch_data)
});
items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
if batch_meta.is_some() && prev_batch_meta == batch_meta {
start_range.end = range.end;
(start_range, prev_batch_meta)
} else {
(range, batch_meta)
}
});
});
}
4 changes: 1 addition & 3 deletions crates/bevy_render/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ pub enum RenderSet {
Prepare,
/// A sub-set within [`Prepare`](RenderSet::Prepare) for initializing buffers, textures and uniforms for use in bind groups.
PrepareResources,
/// Flush buffers after [`PrepareResources`](RenderSet::PrepareResources), but before ['PrepareBindGroups'](RenderSet::PrepareBindGroups).
PrepareResourcesFlush,
/// A sub-set within [`Prepare`](RenderSet::Prepare) for constructing bind groups, or other data that relies on render resources prepared in [`PrepareResources`](RenderSet::PrepareResources).
PrepareBindGroups,
/// Actual rendering happens here.
Expand Down Expand Up @@ -156,7 +154,7 @@ impl Render {
schedule.configure_sets((ExtractCommands, PrepareAssets, Prepare).chain());
schedule.configure_sets(QueueMeshes.in_set(Queue).after(prepare_assets::<Mesh>));
schedule.configure_sets(
(PrepareResources, PrepareResourcesFlush, PrepareBindGroups)
(PrepareResources, PrepareBindGroups)
.chain()
.in_set(Prepare),
);
Expand Down
5 changes: 3 additions & 2 deletions crates/bevy_render/src/render_phase/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub use draw_state::*;
use nonmax::NonMaxU32;
pub use rangefinder::*;

use crate::render_resource::{CachedRenderPipelineId, PipelineCache};
use crate::render_resource::{CachedRenderPipelineId, PipelineCache, BufferPoolSlice};
use bevy_ecs::{
prelude::*,
system::{lifetimeless::SRes, SystemParamItem},
Expand All @@ -54,11 +54,12 @@ use std::{ops::Range, slice::SliceIndex};
#[derive(Component)]
pub struct RenderPhase<I: PhaseItem> {
pub items: Vec<I>,
pub reserved_range: Option<BufferPoolSlice>,
}

impl<I: PhaseItem> Default for RenderPhase<I> {
fn default() -> Self {
Self { items: Vec::new() }
Self { items: Vec::new(), reserved_range: None }
}
}

Expand Down
Loading
Loading