From 7eadc1d46780c58ced9bf8ad6d1c99afe5589c34 Mon Sep 17 00:00:00 2001 From: vero Date: Fri, 4 Oct 2024 17:24:44 -0400 Subject: [PATCH] Zero Copy Mesh (#15569) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Objective - Another step towards #15558 ## Solution - Instead of allocating a Vec and then having wgpu copy it into a staging buffer, write directly into the staging buffer. - gets rid of another hidden copy, in `pad_to_alignment`. future work: - why is there a gcd implementation in here (and its subpar, use binary_gcd. its in the hot path, run twice for every mesh, every frame i think?) make it better and put it in bevy_math - zero-copy custom mesh api to avoid having to write out a Mesh from a custom rep ## Testing - lighting and many_cubes run fine (and slightly faster. havent benchmarked though) --- ## Showcase - look ma... no copies at least when RenderAssetUsage is GPU only :3 --------- Co-authored-by: Alice Cecile Co-authored-by: Kristoffer Søholm --- crates/bevy_render/src/mesh/allocator.rs | 79 ++++++++++++------------ crates/bevy_render/src/mesh/mesh/mod.rs | 28 +++++++-- 2 files changed, 61 insertions(+), 46 deletions(-) diff --git a/crates/bevy_render/src/mesh/allocator.rs b/crates/bevy_render/src/mesh/allocator.rs index c9d36c5855b51..3493f7dcf1722 100644 --- a/crates/bevy_render/src/mesh/allocator.rs +++ b/crates/bevy_render/src/mesh/allocator.rs @@ -1,9 +1,8 @@ //! Manages mesh vertex and index buffers. -use alloc::{borrow::Cow, vec::Vec}; +use alloc::vec::Vec; use core::{ fmt::{self, Display, Formatter}, - iter, ops::Range, }; @@ -21,8 +20,8 @@ use bevy_utils::{ }; use offset_allocator::{Allocation, Allocator}; use wgpu::{ - util::BufferInitDescriptor, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, - DownlevelFlags, COPY_BUFFER_ALIGNMENT, + BufferDescriptor, BufferSize, BufferUsages, CommandEncoderDescriptor, DownlevelFlags, + COPY_BUFFER_ALIGNMENT, }; use crate::{ @@ -427,7 +426,7 @@ impl MeshAllocator { if self.general_vertex_slabs_supported { self.allocate( mesh_id, - mesh.get_vertex_size() * mesh.count_vertices() as u64, + mesh.get_vertex_buffer_size() as u64, vertex_element_layout, &mut slabs_to_grow, mesh_allocator_settings, @@ -474,12 +473,12 @@ impl MeshAllocator { let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else { return; }; - let vertex_data = mesh.create_packed_vertex_buffer_data(); // Call the generic function. self.copy_element_data( mesh_id, - &vertex_data, + mesh.get_vertex_buffer_size(), + |slice| mesh.write_packed_vertex_buffer_data(slice), BufferUsages::VERTEX, slab_id, render_device, @@ -506,7 +505,8 @@ impl MeshAllocator { // Call the generic function. self.copy_element_data( mesh_id, - index_data, + index_data.len(), + |slice| slice.copy_from_slice(index_data), BufferUsages::INDEX, slab_id, render_device, @@ -519,7 +519,8 @@ impl MeshAllocator { fn copy_element_data( &mut self, mesh_id: &AssetId, - data: &[u8], + len: usize, + fill_data: impl Fn(&mut [u8]), buffer_usages: BufferUsages, slab_id: SlabId, render_device: &RenderDevice, @@ -540,12 +541,18 @@ impl MeshAllocator { let slot_size = general_slab.element_layout.slot_size(); - // Write the data in. - render_queue.write_buffer( - buffer, - allocated_range.allocation.offset as u64 * slot_size, - &pad_to_alignment(data, slot_size as usize), - ); + // round up size to a multiple of the slot size to satisfy wgpu alignment requirements + if let Some(size) = BufferSize::new((len as u64).next_multiple_of(slot_size)) { + // Write the data in. + if let Some(mut buffer) = render_queue.write_buffer_with( + buffer, + allocated_range.allocation.offset as u64 * slot_size, + size, + ) { + let slice = &mut buffer.as_mut()[..len]; + fill_data(slice); + } + } // Mark the allocation as resident. general_slab @@ -557,17 +564,22 @@ impl MeshAllocator { debug_assert!(large_object_slab.buffer.is_none()); // Create the buffer and its data in one go. - large_object_slab.buffer = Some(render_device.create_buffer_with_data( - &BufferInitDescriptor { - label: Some(&format!( - "large mesh slab {} ({}buffer)", - slab_id, - buffer_usages_to_str(buffer_usages) - )), - contents: data, - usage: buffer_usages | BufferUsages::COPY_DST, - }, - )); + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some(&format!( + "large mesh slab {} ({}buffer)", + slab_id, + buffer_usages_to_str(buffer_usages) + )), + size: len as u64, + usage: buffer_usages | BufferUsages::COPY_DST, + mapped_at_creation: true, + }); + { + let slice = &mut buffer.slice(..).get_mapped_range_mut()[..len]; + fill_data(slice); + } + buffer.unmap(); + large_object_slab.buffer = Some(buffer); } } } @@ -1000,21 +1012,6 @@ fn gcd(mut a: u64, mut b: u64) -> u64 { a } -/// Ensures that the size of a buffer is a multiple of the given alignment by -/// padding it with zeroes if necessary. -/// -/// If the buffer already has the required size, then this function doesn't -/// allocate. Otherwise, it copies the buffer into a new one and writes the -/// appropriate number of zeroes to the end. -fn pad_to_alignment(buffer: &[u8], align: usize) -> Cow<[u8]> { - if buffer.len() % align == 0 { - return Cow::Borrowed(buffer); - } - let mut buffer = buffer.to_vec(); - buffer.extend(iter::repeat(0).take(align - buffer.len() % align)); - Cow::Owned(buffer) -} - /// Returns a string describing the given buffer usages. fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str { if buffer_usages.contains(BufferUsages::VERTEX) { diff --git a/crates/bevy_render/src/mesh/mesh/mod.rs b/crates/bevy_render/src/mesh/mesh/mod.rs index 1f40cb8b41a4c..6e6fd3f072f0c 100644 --- a/crates/bevy_render/src/mesh/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mesh/mod.rs @@ -385,6 +385,13 @@ impl Mesh { .sum() } + /// Returns the size required for the vertex buffer in bytes. + pub fn get_vertex_buffer_size(&self) -> usize { + let vertex_size = self.get_vertex_size() as usize; + let vertex_count = self.count_vertices(); + vertex_count * vertex_size + } + /// Computes and returns the index data of the mesh as bytes. /// This is used to transform the index data into a GPU friendly format. pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> { @@ -458,10 +465,24 @@ impl Mesh { /// /// If the vertex attributes have different lengths, they are all truncated to /// the length of the smallest. + /// + /// This is a convenience method which allocates a Vec. + /// Prefer pre-allocating and using [`Mesh::write_packed_vertex_buffer_data`] when possible. pub fn create_packed_vertex_buffer_data(&self) -> Vec { + let mut attributes_interleaved_buffer = vec![0; self.get_vertex_buffer_size()]; + self.write_packed_vertex_buffer_data(&mut attributes_interleaved_buffer); + attributes_interleaved_buffer + } + + /// Computes and write the vertex data of the mesh into a mutable byte slice. + /// The attributes are located in the order of their [`MeshVertexAttribute::id`]. + /// This is used to transform the vertex data into a GPU friendly format. + /// + /// If the vertex attributes have different lengths, they are all truncated to + /// the length of the smallest. + pub fn write_packed_vertex_buffer_data(&self, slice: &mut [u8]) { let vertex_size = self.get_vertex_size() as usize; let vertex_count = self.count_vertices(); - let mut attributes_interleaved_buffer = vec![0; vertex_count * vertex_size]; // bundle into interleaved buffers let mut attribute_offset = 0; for attribute_data in self.attributes.values() { @@ -473,14 +494,11 @@ impl Mesh { .enumerate() { let offset = vertex_index * vertex_size + attribute_offset; - attributes_interleaved_buffer[offset..offset + attribute_size] - .copy_from_slice(attribute_bytes); + slice[offset..offset + attribute_size].copy_from_slice(attribute_bytes); } attribute_offset += attribute_size; } - - attributes_interleaved_buffer } /// Duplicates the vertex attributes so that no vertices are shared.