Zero Copy Mesh (#15569)

# Objective - Another step towards #15558 ## Solution - Instead of allocating a Vec and then having wgpu copy it into a staging buffer, write directly into the staging buffer. - gets rid of another hidden copy, in `pad_to_alignment`. future work: - why is there a gcd implementation in here (and its subpar, use binary_gcd. its in the hot path, run twice for every mesh, every frame i think?) make it better and put it in bevy_math - zero-copy custom mesh api to avoid having to write out a Mesh from a custom rep ## Testing - lighting and many_cubes run fine (and slightly faster. havent benchmarked though) --- ## Showcase - look ma... no copies at least when RenderAssetUsage is GPU only :3 --------- Co-authored-by: Alice Cecile <alice.i.cecile@gmail.com> Co-authored-by: Kristoffer Søholm <k.soeholm@gmail.com>
bevyengine · Oct 4, 2024 · 7eadc1d · 7eadc1d
1 parent 8b0388c
commit 7eadc1d
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 46 deletions.
diff --git a/crates/bevy_render/src/mesh/allocator.rs b/crates/bevy_render/src/mesh/allocator.rs
@@ -1,9 +1,8 @@
 //! Manages mesh vertex and index buffers.
 
-use alloc::{borrow::Cow, vec::Vec};
+use alloc::vec::Vec;
 use core::{
  fmt::{self, Display, Formatter},
- iter,
  ops::Range,
 };
 
@@ -21,8 +20,8 @@ use bevy_utils::{
 };
 use offset_allocator::{Allocation, Allocator};
 use wgpu::{
- util::BufferInitDescriptor, BufferDescriptor, BufferUsages, CommandEncoderDescriptor,
- DownlevelFlags, COPY_BUFFER_ALIGNMENT,
+ BufferDescriptor, BufferSize, BufferUsages, CommandEncoderDescriptor, DownlevelFlags,
+ COPY_BUFFER_ALIGNMENT,
 };
 
 use crate::{
@@ -427,7 +426,7 @@ impl MeshAllocator {
  if self.general_vertex_slabs_supported {
  self.allocate(
  mesh_id,
- mesh.get_vertex_size() * mesh.count_vertices() as u64,
+ mesh.get_vertex_buffer_size() as u64,
  vertex_element_layout,
  &mut slabs_to_grow,
  mesh_allocator_settings,
@@ -474,12 +473,12 @@ impl MeshAllocator {
  let Some(&slab_id) = self.mesh_id_to_vertex_slab.get(mesh_id) else {
  return;
  };
- let vertex_data = mesh.create_packed_vertex_buffer_data();
 
  // Call the generic function.
  self.copy_element_data(
  mesh_id,
- &vertex_data,
+ mesh.get_vertex_buffer_size(),
+ |slice| mesh.write_packed_vertex_buffer_data(slice),
  BufferUsages::VERTEX,
  slab_id,
  render_device,
@@ -506,7 +505,8 @@ impl MeshAllocator {
  // Call the generic function.
  self.copy_element_data(
  mesh_id,
- index_data,
+ index_data.len(),
+ |slice| slice.copy_from_slice(index_data),
  BufferUsages::INDEX,
  slab_id,
  render_device,
@@ -519,7 +519,8 @@ impl MeshAllocator {
  fn copy_element_data(
  &mut self,
  mesh_id: &AssetId<Mesh>,
- data: &[u8],
+ len: usize,
+ fill_data: impl Fn(&mut [u8]),
  buffer_usages: BufferUsages,
  slab_id: SlabId,
  render_device: &RenderDevice,
@@ -540,12 +541,18 @@ impl MeshAllocator {
 
  let slot_size = general_slab.element_layout.slot_size();
 
- // Write the data in.
- render_queue.write_buffer(
- buffer,
- allocated_range.allocation.offset as u64 * slot_size,
- &pad_to_alignment(data, slot_size as usize),
- );
+ // round up size to a multiple of the slot size to satisfy wgpu alignment requirements
+ if let Some(size) = BufferSize::new((len as u64).next_multiple_of(slot_size)) {
+ // Write the data in.
+ if let Some(mut buffer) = render_queue.write_buffer_with(
+ buffer,
+ allocated_range.allocation.offset as u64 * slot_size,
+ size,
+ ) {
+ let slice = &mut buffer.as_mut()[..len];
+ fill_data(slice);
+ }
+ }
 
  // Mark the allocation as resident.
  general_slab
@@ -557,17 +564,22 @@ impl MeshAllocator {
  debug_assert!(large_object_slab.buffer.is_none());
 
  // Create the buffer and its data in one go.
- large_object_slab.buffer = Some(render_device.create_buffer_with_data(
- &BufferInitDescriptor {
- label: Some(&format!(
- "large mesh slab {} ({}buffer)",
- slab_id,
- buffer_usages_to_str(buffer_usages)
- )),
- contents: data,
- usage: buffer_usages | BufferUsages::COPY_DST,
- },
- ));
+ let buffer = render_device.create_buffer(&BufferDescriptor {
+ label: Some(&format!(
+ "large mesh slab {} ({}buffer)",
+ slab_id,
+ buffer_usages_to_str(buffer_usages)
+ )),
+ size: len as u64,
+ usage: buffer_usages | BufferUsages::COPY_DST,
+ mapped_at_creation: true,
+ });
+ {
+ let slice = &mut buffer.slice(..).get_mapped_range_mut()[..len];
+ fill_data(slice);
+ }
+ buffer.unmap();
+ large_object_slab.buffer = Some(buffer);
  }
  }
  }
@@ -1000,21 +1012,6 @@ fn gcd(mut a: u64, mut b: u64) -> u64 {
  a
 }
 
-/// Ensures that the size of a buffer is a multiple of the given alignment by
-/// padding it with zeroes if necessary.
-///
-/// If the buffer already has the required size, then this function doesn't
-/// allocate. Otherwise, it copies the buffer into a new one and writes the
-/// appropriate number of zeroes to the end.
-fn pad_to_alignment(buffer: &[u8], align: usize) -> Cow<[u8]> {
- if buffer.len() % align == 0 {
- return Cow::Borrowed(buffer);
- }
- let mut buffer = buffer.to_vec();
- buffer.extend(iter::repeat(0).take(align - buffer.len() % align));
- Cow::Owned(buffer)
-}
-
 /// Returns a string describing the given buffer usages.
 fn buffer_usages_to_str(buffer_usages: BufferUsages) -> &'static str {
  if buffer_usages.contains(BufferUsages::VERTEX) {

diff --git a/crates/bevy_render/src/mesh/mesh/mod.rs b/crates/bevy_render/src/mesh/mesh/mod.rs
@@ -385,6 +385,13 @@ impl Mesh {
  .sum()
  }
 
+ /// Returns the size required for the vertex buffer in bytes.
+ pub fn get_vertex_buffer_size(&self) -> usize {
+ let vertex_size = self.get_vertex_size() as usize;
+ let vertex_count = self.count_vertices();
+ vertex_count * vertex_size
+ }
+
  /// Computes and returns the index data of the mesh as bytes.
  /// This is used to transform the index data into a GPU friendly format.
  pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> {
@@ -458,10 +465,24 @@ impl Mesh {
  ///
  /// If the vertex attributes have different lengths, they are all truncated to
  /// the length of the smallest.
+ ///
+ /// This is a convenience method which allocates a Vec.
+ /// Prefer pre-allocating and using [`Mesh::write_packed_vertex_buffer_data`] when possible.
  pub fn create_packed_vertex_buffer_data(&self) -> Vec<u8> {
+ let mut attributes_interleaved_buffer = vec![0; self.get_vertex_buffer_size()];
+ self.write_packed_vertex_buffer_data(&mut attributes_interleaved_buffer);
+ attributes_interleaved_buffer
+ }
+
+ /// Computes and write the vertex data of the mesh into a mutable byte slice.
+ /// The attributes are located in the order of their [`MeshVertexAttribute::id`].
+ /// This is used to transform the vertex data into a GPU friendly format.
+ ///
+ /// If the vertex attributes have different lengths, they are all truncated to
+ /// the length of the smallest.
+ pub fn write_packed_vertex_buffer_data(&self, slice: &mut [u8]) {
  let vertex_size = self.get_vertex_size() as usize;
  let vertex_count = self.count_vertices();
- let mut attributes_interleaved_buffer = vec![0; vertex_count * vertex_size];
  // bundle into interleaved buffers
  let mut attribute_offset = 0;
  for attribute_data in self.attributes.values() {
@@ -473,14 +494,11 @@ impl Mesh {
  .enumerate()
  {
  let offset = vertex_index * vertex_size + attribute_offset;
- attributes_interleaved_buffer[offset..offset + attribute_size]
- .copy_from_slice(attribute_bytes);
+ slice[offset..offset + attribute_size].copy_from_slice(attribute_bytes);
  }
 
  attribute_offset += attribute_size;
  }
-
- attributes_interleaved_buffer
  }
 
  /// Duplicates the vertex attributes so that no vertices are shared.