From 21377825a5cf05c2c665e7dda83eddd40164baaa Mon Sep 17 00:00:00 2001 From: LeshaInc Date: Wed, 12 Jul 2023 18:43:09 +0300 Subject: [PATCH] Add RenderStatistics resource, providing pipeline statistics (such as primitives count) per each render pass --- crates/bevy_render/src/lib.rs | 15 +- .../src/render_phase/draw_state.rs | 8 +- .../bevy_render/src/renderer/graph_runner.rs | 10 +- crates/bevy_render/src/renderer/mod.rs | 36 ++- crates/bevy_render/src/renderer/statistics.rs | 305 ++++++++++++++++++ examples/3d/3d_scene.rs | 5 + 6 files changed, 368 insertions(+), 11 deletions(-) create mode 100644 crates/bevy_render/src/renderer/statistics.rs diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index 61e814e9bc2d9..dba7e76d7c260 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -34,6 +34,7 @@ pub mod prelude { color::Color, mesh::{morph::MorphWeights, shape, Mesh}, render_resource::Shader, + renderer::RenderStatistics, spatial_bundle::SpatialBundle, texture::{Image, ImagePlugin}, view::{ComputedVisibility, Msaa, Visibility, VisibilityBundle}, @@ -43,7 +44,10 @@ pub mod prelude { use bevy_window::{PrimaryWindow, RawHandleWrapper}; use globals::GlobalsPlugin; -use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue}; +use renderer::{ + sync_render_statistics, RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, + RenderStatistics, RenderStatisticsMutex, +}; use wgpu::Instance; use crate::{ @@ -54,7 +58,7 @@ use crate::{ settings::WgpuSettings, view::{ViewPlugin, WindowRenderPlugin}, }; -use bevy_app::{App, AppLabel, Plugin, SubApp}; +use bevy_app::{App, AppLabel, Plugin, PreUpdate, SubApp}; use bevy_asset::{AddAsset, AssetServer}; use bevy_ecs::{prelude::*, schedule::ScheduleLabel, system::SystemState}; use bevy_utils::tracing::debug; @@ -270,11 +274,18 @@ impl Plugin for RenderPlugin { let mut extract_schedule = Schedule::new(); extract_schedule.set_apply_final_deferred(false); + let render_statistics = RenderStatisticsMutex::default(); + + app.insert_resource(render_statistics.clone()) + .add_systems(PreUpdate, sync_render_statistics) + .init_resource::(); + render_app .add_schedule(ExtractSchedule, extract_schedule) .add_schedule(Render, Render::base_schedule()) .init_resource::() .insert_resource(app.world.resource::().clone()) + .insert_resource(render_statistics) .add_systems(ExtractSchedule, PipelineCache::extract_shaders) .add_systems( Render, diff --git a/crates/bevy_render/src/render_phase/draw_state.rs b/crates/bevy_render/src/render_phase/draw_state.rs index 9be2d7ed2bbda..30907e15c43fe 100644 --- a/crates/bevy_render/src/render_phase/draw_state.rs +++ b/crates/bevy_render/src/render_phase/draw_state.rs @@ -5,11 +5,11 @@ use crate::{ BindGroup, BindGroupId, Buffer, BufferId, BufferSlice, RenderPipeline, RenderPipelineId, ShaderStages, }, - renderer::RenderDevice, + renderer::{MeasuredRenderPass, RenderDevice}, }; use bevy_utils::{default, detailed_trace}; use std::ops::Range; -use wgpu::{IndexFormat, RenderPass}; +use wgpu::IndexFormat; /// Tracks the state of a [`TrackedRenderPass`]. /// @@ -101,13 +101,13 @@ impl DrawState { /// It is used to set the current [`RenderPipeline`], [`BindGroup`]s and [`Buffer`]s. /// After all requirements are specified, draw calls can be issued. pub struct TrackedRenderPass<'a> { - pass: RenderPass<'a>, + pass: MeasuredRenderPass<'a>, state: DrawState, } impl<'a> TrackedRenderPass<'a> { /// Tracks the supplied render pass. - pub fn new(device: &RenderDevice, pass: RenderPass<'a>) -> Self { + pub fn new(device: &RenderDevice, pass: MeasuredRenderPass<'a>) -> Self { let limits = device.limits(); let max_bind_groups = limits.max_bind_groups as usize; let max_vertex_buffers = limits.max_vertex_buffers as usize; diff --git a/crates/bevy_render/src/renderer/graph_runner.rs b/crates/bevy_render/src/renderer/graph_runner.rs index c046ef11a17be..0d76bece58684 100644 --- a/crates/bevy_render/src/renderer/graph_runner.rs +++ b/crates/bevy_render/src/renderer/graph_runner.rs @@ -16,6 +16,8 @@ use crate::{ renderer::{RenderContext, RenderDevice}, }; +use super::RenderStatisticsMutex; + pub(crate) struct RenderGraphRunner; #[derive(Error, Debug)] @@ -59,7 +61,7 @@ impl RenderGraphRunner { world: &World, finalizer: impl FnOnce(&mut wgpu::CommandEncoder), ) -> Result<(), RenderGraphRunnerError> { - let mut render_context = RenderContext::new(render_device); + let mut render_context = RenderContext::new(render_device, queue); Self::run_graph(graph, None, &mut render_context, world, &[], None)?; finalizer(render_context.command_encoder()); @@ -68,6 +70,12 @@ impl RenderGraphRunner { let _span = info_span!("submit_graph_commands").entered(); queue.submit(render_context.finish()); } + + let render_statistics_mutex = world.resource::().0.clone(); + render_context.download_statistics(&queue, move |statistics| { + *render_statistics_mutex.lock() = Some(statistics); + }); + Ok(()) } diff --git a/crates/bevy_render/src/renderer/mod.rs b/crates/bevy_render/src/renderer/mod.rs index 6a9d9be6e162d..c8d222e93d8c3 100644 --- a/crates/bevy_render/src/renderer/mod.rs +++ b/crates/bevy_render/src/renderer/mod.rs @@ -1,10 +1,12 @@ mod graph_runner; mod render_device; +mod statistics; use bevy_derive::{Deref, DerefMut}; use bevy_utils::tracing::{error, info, info_span}; pub use graph_runner::*; pub use render_device::*; +pub use statistics::*; use crate::{ render_graph::RenderGraph, @@ -291,15 +293,18 @@ pub struct RenderContext { render_device: RenderDevice, command_encoder: Option, command_buffers: Vec, + statistics_recorder: StatisticsRecorder, } impl RenderContext { /// Creates a new [`RenderContext`] from a [`RenderDevice`]. - pub fn new(render_device: RenderDevice) -> Self { + pub fn new(render_device: RenderDevice, queue: &Queue) -> Self { + let statistics_recorder = StatisticsRecorder::new(&render_device, queue); Self { render_device, command_encoder: None, command_buffers: Vec::new(), + statistics_recorder, } } @@ -308,6 +313,11 @@ impl RenderContext { &self.render_device } + /// Gets the underlying [`StatisticsRecorder`]. + pub fn statistics_encoder(&mut self) -> &mut StatisticsRecorder { + &mut self.statistics_recorder + } + /// Gets the current [`CommandEncoder`]. pub fn command_encoder(&mut self) -> &mut CommandEncoder { self.command_encoder.get_or_insert_with(|| { @@ -327,7 +337,8 @@ impl RenderContext { self.render_device .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()) }); - let render_pass = command_encoder.begin_render_pass(&descriptor); + let render_pass = + MeasuredRenderPass::new(command_encoder, &mut self.statistics_recorder, descriptor); TrackedRenderPass::new(&self.render_device, render_pass) } @@ -342,9 +353,26 @@ impl RenderContext { } /// Finalizes the queue and returns the queue of [`CommandBuffer`]s. - pub fn finish(mut self) -> Vec { + pub fn finish(&mut self) -> Vec { + let command_encoder = self.command_encoder.get_or_insert_with(|| { + self.render_device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()) + }); + + self.statistics_recorder + .resolve(command_encoder, &self.render_device); + self.flush_encoder(); - self.command_buffers + std::mem::take(&mut self.command_buffers) + } + + pub fn download_statistics( + &mut self, + queue: &Queue, + callback: impl FnOnce(RenderStatistics) + Send + 'static, + ) { + self.statistics_recorder + .download(&self.render_device, queue, callback); } fn flush_encoder(&mut self) { diff --git a/crates/bevy_render/src/renderer/statistics.rs b/crates/bevy_render/src/renderer/statistics.rs new file mode 100644 index 0000000000000..56732663d03ca --- /dev/null +++ b/crates/bevy_render/src/renderer/statistics.rs @@ -0,0 +1,305 @@ +use std::sync::Arc; + +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::system::{Res, ResMut, Resource}; +use bevy_utils::{Duration, HashMap, Instant}; +use parking_lot::Mutex; +use wgpu::{ + util::DownloadBuffer, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, + PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, Queue, RenderPass, + RenderPassDescriptor, +}; + +use super::RenderDevice; + +const MAX_TIMESTAMP_QUERIES: u32 = 256; +const MAX_PIPELINE_STATISTICS: u32 = 128; + +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderStatistics(pub HashMap); + +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Hash)] +pub struct RenderPassStatistics { + pub elapsed_cpu: Option, + pub elapsed_gpu: Option, + pub vertex_shader_invocations: Option, + pub clipper_invocations: Option, + pub clipper_primitives_out: Option, + pub fragment_shader_invocations: Option, + pub compute_shader_invocations: Option, +} + +#[derive(Default)] +struct PassRecord { + begin_timestamp_index: Option, + end_timestamp_index: Option, + begin_instant: Option, + end_instant: Option, + pipeline_statistics_index: Option, +} + +pub struct StatisticsRecorder { + timestamp_period: f32, + timestamps_query_set: QuerySet, + num_timestamps: u32, + pipeline_statistics_query_set: QuerySet, + num_pipeline_statistics: u32, + pass_records: HashMap, + buffer: Option, +} + +impl StatisticsRecorder { + pub fn new(device: &RenderDevice, queue: &Queue) -> StatisticsRecorder { + let timestamp_period = queue.get_timestamp_period(); + + let timestamps_query_set = device.wgpu_device().create_query_set(&QuerySetDescriptor { + label: Some("timestamps_query_set"), + ty: QueryType::Timestamp, + count: MAX_TIMESTAMP_QUERIES, + }); + + let pipeline_statistics_query_set = + device.wgpu_device().create_query_set(&QuerySetDescriptor { + label: Some("pipeline_statistics_query_set"), + ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()), + count: MAX_PIPELINE_STATISTICS, + }); + + StatisticsRecorder { + timestamp_period, + timestamps_query_set, + num_timestamps: 0, + pipeline_statistics_query_set, + num_pipeline_statistics: 0, + pass_records: HashMap::default(), + buffer: None, + } + } + + fn pass_record(&mut self, name: &str) -> &mut PassRecord { + self.pass_records.entry(name.into()).or_default() + } + + pub fn begin_render_pass(&mut self, pass: &mut RenderPass, name: &str) { + let begin_instant = Instant::now(); + + let begin_timestamp_index = if self.num_timestamps < MAX_TIMESTAMP_QUERIES { + let index = self.num_timestamps; + pass.write_timestamp(&self.timestamps_query_set, index); + self.num_timestamps += 1; + Some(index) + } else { + None + }; + + let pipeline_statistics_index = if self.num_pipeline_statistics < MAX_PIPELINE_STATISTICS { + let index = self.num_pipeline_statistics; + pass.begin_pipeline_statistics_query(&self.pipeline_statistics_query_set, index); + self.num_pipeline_statistics += 1; + Some(index) + } else { + None + }; + + let record = self.pass_record(name); + record.begin_instant = Some(begin_instant); + record.begin_timestamp_index = begin_timestamp_index; + record.pipeline_statistics_index = pipeline_statistics_index; + } + + pub fn end_render_pass(&mut self, pass: &mut RenderPass, name: &str) { + let end_timestamp_index = if self.num_timestamps < MAX_TIMESTAMP_QUERIES { + let index = self.num_timestamps; + pass.write_timestamp(&self.timestamps_query_set, index); + self.num_timestamps += 1; + Some(index) + } else { + None + }; + + let record = self.pass_record(name); + record.end_timestamp_index = end_timestamp_index; + + if record.pipeline_statistics_index.is_some() { + pass.end_pipeline_statistics_query(); + } + + record.end_instant = Some(Instant::now()); + } + + fn buffer_size(&self) -> (u64, u64) { + // timestamps are stored as u64 + let mut buffer_size = u64::from(self.num_timestamps) * 8; + if buffer_size % 256 != 0 { + buffer_size = buffer_size + 256 - buffer_size % 256; + } + + let pipeline_statistics_offset = buffer_size; + + // pipeline statistics are stored as [u64; 5] + buffer_size += u64::from(self.num_pipeline_statistics) * 40; + + (buffer_size, pipeline_statistics_offset) + } + + pub fn resolve(&mut self, encoder: &mut CommandEncoder, device: &RenderDevice) { + let (buffer_size, pipeline_statistics_offset) = self.buffer_size(); + + let buffer = device.wgpu_device().create_buffer(&BufferDescriptor { + label: Some("download_statistics_bufer"), + size: buffer_size, + usage: BufferUsages::COPY_SRC | BufferUsages::QUERY_RESOLVE, + mapped_at_creation: false, + }); + + if self.num_timestamps > 0 { + encoder.resolve_query_set( + &self.timestamps_query_set, + 0..self.num_timestamps, + &buffer, + 0, + ); + } + + if self.num_pipeline_statistics > 0 { + encoder.resolve_query_set( + &self.pipeline_statistics_query_set, + 0..self.num_pipeline_statistics, + &buffer, + pipeline_statistics_offset, + ); + } + + self.buffer = Some(buffer); + } + + pub fn download( + &mut self, + device: &RenderDevice, + queue: &Queue, + callback: impl FnOnce(RenderStatistics) + Send + 'static, + ) { + let (_, pipeline_statistics_offset) = self.buffer_size(); + let timestamp_period = self.timestamp_period; + let num_timestamps = self.num_timestamps; + let num_pipeline_statistics = self.num_pipeline_statistics; + let pass_records = std::mem::take(&mut self.pass_records); + + let Some(buffer) = &self.buffer else { return }; + DownloadBuffer::read_buffer(device.wgpu_device(), queue, &buffer.slice(..), move |res| { + let buffer = match res { + Ok(v) => v, + Err(e) => { + bevy_log::warn!("Failed to download render statistics buffer: {e}"); + return; + } + }; + + let timestamps = buffer[..(num_timestamps * 8) as usize] + .chunks(8) + .map(|v| u64::from_ne_bytes(v.try_into().unwrap())) + .collect::>(); + + let start = pipeline_statistics_offset as usize; + let len = (num_pipeline_statistics as usize) * 40; + let pipeline_statistics = buffer[start..start + len] + .chunks(8) + .map(|v| u64::from_ne_bytes(v.try_into().unwrap())) + .collect::>(); + + let statistics = pass_records.into_iter().map(|(name, record)| { + let mut statistics = RenderPassStatistics::default(); + + statistics.elapsed_cpu = match (record.begin_instant, record.end_instant) { + (Some(begin), Some(end)) => Some(end - begin), + _ => None, + }; + + statistics.elapsed_gpu = + match (record.begin_timestamp_index, record.end_timestamp_index) { + (Some(begin), Some(end)) => { + let begin = timestamps[begin as usize] as f64; + let end = timestamps[end as usize] as f64; + let nanos = ((end - begin) * (timestamp_period as f64)).round() as u64; + Some(Duration::from_nanos(nanos)) + } + _ => None, + }; + + if let Some(index) = record.pipeline_statistics_index { + let index = (index as usize) * 5; + statistics.vertex_shader_invocations = Some(pipeline_statistics[index]); + statistics.clipper_invocations = Some(pipeline_statistics[index + 1]); + statistics.clipper_primitives_out = Some(pipeline_statistics[index + 2]); + statistics.fragment_shader_invocations = Some(pipeline_statistics[index + 3]); + statistics.compute_shader_invocations = Some(pipeline_statistics[index + 4]); + } + + (name, statistics) + }); + + callback(RenderStatistics(statistics.collect())); + }); + } +} + +#[derive(Deref, DerefMut)] +pub struct MeasuredRenderPass<'a> { + #[deref] + render_pass: RenderPass<'a>, + name: Option, + recorder: &'a mut StatisticsRecorder, +} + +impl MeasuredRenderPass<'_> { + pub fn new<'a>( + encoder: &'a mut CommandEncoder, + recorder: &'a mut StatisticsRecorder, + desc: RenderPassDescriptor<'a, '_>, + ) -> MeasuredRenderPass<'a> { + let name = desc.label.map(|v| v.to_owned()); + let mut render_pass = encoder.begin_render_pass(&desc); + + if let Some(name) = &name { + recorder.begin_render_pass(&mut render_pass, name) + } + + MeasuredRenderPass { + render_pass, + name, + recorder, + } + } +} + +impl Drop for MeasuredRenderPass<'_> { + fn drop(&mut self) { + if std::thread::panicking() { + return; + } + + if let Some(name) = &self.name { + self.recorder.end_render_pass(&mut self.render_pass, &name) + } + } +} + +impl std::fmt::Debug for MeasuredRenderPass<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MeasuredRenderPass") + .field("render_pass", &self.render_pass) + .finish_non_exhaustive() + } +} + +#[derive(Debug, Default, Clone, Resource)] +pub struct RenderStatisticsMutex(pub Arc>>); + +pub fn sync_render_statistics( + mutex: Res, + mut statistics: ResMut, +) { + if let Some(v) = mutex.0.lock().take() { + *statistics = v; + } +} diff --git a/examples/3d/3d_scene.rs b/examples/3d/3d_scene.rs index 9fbbca077cfb7..d3e936f7cd82d 100644 --- a/examples/3d/3d_scene.rs +++ b/examples/3d/3d_scene.rs @@ -6,6 +6,7 @@ fn main() { App::new() .add_plugins(DefaultPlugins) .add_systems(Startup, setup) + .add_systems(Update, print_render_statistics) .run(); } @@ -44,3 +45,7 @@ fn setup( ..default() }); } + +fn print_render_statistics(statistics: Res) { + dbg!(statistics); +}