From 675f8ad40380966e3cc9176e95965538df621810 Mon Sep 17 00:00:00 2001 From: ickshonpe Date: Tue, 8 Oct 2024 23:24:27 +0100 Subject: [PATCH] Improved text batching (#14848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Objective The UI text rendering is really slow because it extracts each glyph as a separate ui node even though all the glyphs in a text section have the same texture, color and clipping rects. ## Solution Store the glyphs in a seperate contiguous array, queue one transparent ui item per text section which has indices into the glyph array. ## Testing ```cargo run --example many_glyphs --release``` Runs at about 22fps on main and 95fps with this PR on my computer. I'll do some proper comparisons once I work out why tracy 11 is refusing to run. --------- Co-authored-by: Kristoffer Søholm --- crates/bevy_ui/src/render/mod.rs | 612 +++++++++++++++++++------------ 1 file changed, 386 insertions(+), 226 deletions(-) diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs index 3d6a57cbe57a4..76b6c1d1173bc 100644 --- a/crates/bevy_ui/src/render/mod.rs +++ b/crates/bevy_ui/src/render/mod.rs @@ -116,7 +116,7 @@ pub fn build_ui_render(app: &mut App) { extract_uinode_images.in_set(RenderUiSystem::ExtractImages), extract_uinode_borders.in_set(RenderUiSystem::ExtractBorders), #[cfg(feature = "bevy_text")] - extract_uinode_text.in_set(RenderUiSystem::ExtractText), + extract_text_sections.in_set(RenderUiSystem::ExtractText), ), ) .add_systems( @@ -160,38 +160,65 @@ fn get_ui_graph(render_app: &mut SubApp) -> RenderGraph { ui_graph } -/// The type of UI node. -/// This is used to determine how to render the UI node. -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum NodeType { - Rect, - Border, -} - pub struct ExtractedUiNode { pub stack_index: u32, - pub transform: Mat4, pub color: LinearRgba, pub rect: Rect, pub image: AssetId, - pub atlas_scaling: Option, pub clip: Option, - pub flip_x: bool, - pub flip_y: bool, // Camera to render this UI node to. By the time it is extracted, // it is defaulted to a single camera if only one exists. // Nodes with ambiguous camera will be ignored. pub camera_entity: Entity, - /// Border radius of the UI node. - pub border_radius: ResolvedBorderRadius, - /// Border thickness of the UI node. - pub border: BorderRect, - pub node_type: NodeType, + pub item: ExtractedUiItem, +} + +/// The type of UI node. +/// This is used to determine how to render the UI node. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum NodeType { + Rect, + Border, +} + +pub enum ExtractedUiItem { + Node { + atlas_scaling: Option, + flip_x: bool, + flip_y: bool, + /// Border radius of the UI node. + /// Ordering: top left, top right, bottom right, bottom left. + border_radius: ResolvedBorderRadius, + /// Border thickness of the UI node. + /// Ordering: left, top, right, bottom. + border: BorderRect, + node_type: NodeType, + transform: Mat4, + }, + /// A contiguous sequence of text glyphs from the same section + Glyphs { + atlas_scaling: Vec2, + /// Indices into [`ExtractedUiNodes::glyphs`] + range: Range, + }, +} + +pub struct ExtractedGlyph { + pub transform: Mat4, + pub rect: Rect, } #[derive(Resource, Default)] pub struct ExtractedUiNodes { pub uinodes: EntityHashMap, + pub glyphs: Vec, +} + +impl ExtractedUiNodes { + pub fn clear(&mut self) { + self.uinodes.clear(); + self.glyphs.clear(); + } } #[allow(clippy::too_many_arguments)] @@ -217,7 +244,7 @@ pub fn extract_uinode_background_colors( continue; }; - let Ok(&camera_entity) = mapping.get(camera_entity) else { + let Ok(&render_camera_entity) = mapping.get(camera_entity) else { continue; }; @@ -230,7 +257,6 @@ pub fn extract_uinode_background_colors( commands.spawn(TemporaryRenderEntity).id(), ExtractedUiNode { stack_index: uinode.stack_index, - transform: transform.compute_matrix(), color: background_color.0.into(), rect: Rect { min: Vec2::ZERO, @@ -238,13 +264,16 @@ pub fn extract_uinode_background_colors( }, clip: clip.map(|clip| clip.clip), image: AssetId::default(), - atlas_scaling: None, - flip_x: false, - flip_y: false, - camera_entity: camera_entity.id(), - border: uinode.border(), - border_radius: uinode.border_radius(), - node_type: NodeType::Rect, + camera_entity: render_camera_entity.id(), + item: ExtractedUiItem::Node { + atlas_scaling: None, + transform: transform.compute_matrix(), + flip_x: false, + flip_y: false, + border: uinode.border(), + border_radius: uinode.border_radius(), + node_type: NodeType::Rect, + }, }, ); } @@ -321,18 +350,20 @@ pub fn extract_uinode_images( commands.spawn(TemporaryRenderEntity).id(), ExtractedUiNode { stack_index: uinode.stack_index, - transform: transform.compute_matrix(), color: image.color.into(), rect, clip: clip.map(|clip| clip.clip), image: image.texture.id(), - atlas_scaling, - flip_x: image.flip_x, - flip_y: image.flip_y, camera_entity: render_camera_entity.id(), - border: uinode.border, - border_radius: uinode.border_radius, - node_type: NodeType::Rect, + item: ExtractedUiItem::Node { + atlas_scaling, + transform: transform.compute_matrix(), + flip_x: image.flip_x, + flip_y: image.flip_y, + border: uinode.border, + border_radius: uinode.border_radius, + node_type: NodeType::Rect, + }, }, ); } @@ -372,7 +403,7 @@ pub fn extract_uinode_borders( continue; }; - let Ok(&camera_entity) = mapping.get(camera_entity) else { + let Ok(&render_camera_entity) = mapping.get(camera_entity) else { continue; }; @@ -391,21 +422,23 @@ pub fn extract_uinode_borders( commands.spawn(TemporaryRenderEntity).id(), ExtractedUiNode { stack_index: uinode.stack_index, - transform: global_transform.compute_matrix(), color: border_color.0.into(), rect: Rect { max: uinode.size(), ..Default::default() }, image, - atlas_scaling: None, clip: maybe_clip.map(|clip| clip.clip), - flip_x: false, - flip_y: false, - camera_entity: camera_entity.id(), - border_radius: uinode.border_radius(), - border: uinode.border(), - node_type: NodeType::Border, + camera_entity: render_camera_entity.id(), + item: ExtractedUiItem::Node { + atlas_scaling: None, + transform: global_transform.compute_matrix(), + flip_x: false, + flip_y: false, + border: uinode.border(), + border_radius: uinode.border_radius(), + node_type: NodeType::Border, + }, }, ); } @@ -417,21 +450,23 @@ pub fn extract_uinode_borders( commands.spawn(TemporaryRenderEntity).id(), ExtractedUiNode { stack_index: uinode.stack_index, - transform: global_transform.compute_matrix(), color: outline.color.into(), rect: Rect { max: outline_size, ..Default::default() }, image, - atlas_scaling: None, clip: maybe_clip.map(|clip| clip.clip), - flip_x: false, - flip_y: false, - camera_entity: camera_entity.id(), - border: BorderRect::square(uinode.outline_width()), - border_radius: uinode.outline_radius(), - node_type: NodeType::Border, + camera_entity: render_camera_entity.id(), + item: ExtractedUiItem::Node { + transform: global_transform.compute_matrix(), + atlas_scaling: None, + flip_x: false, + flip_y: false, + border: BorderRect::square(uinode.outline_width()), + border_radius: uinode.outline_radius(), + node_type: NodeType::Border, + }, }, ); } @@ -544,7 +579,7 @@ pub fn extract_default_ui_camera_view( #[cfg(feature = "bevy_text")] #[allow(clippy::too_many_arguments)] -pub fn extract_uinode_text( +pub fn extract_text_sections( mut commands: Commands, mut extracted_uinodes: ResMut, camera_query: Extract>, @@ -564,6 +599,9 @@ pub fn extract_uinode_text( >, mapping: Extract>, ) { + let mut start = 0; + let mut end = 1; + let default_ui_camera = default_ui_camera.get(); for (uinode, global_transform, view_visibility, clip, camera, text, text_layout_info) in &uinode_query @@ -585,7 +623,7 @@ pub fn extract_uinode_text( * ui_scale.0; let inverse_scale_factor = scale_factor.recip(); - let Ok(&camera_entity) = mapping.get(camera_entity) else { + let Ok(&render_camera_entity) = mapping.get(camera_entity) else { continue; }; // Align the text to the nearest physical pixel: @@ -604,44 +642,58 @@ pub fn extract_uinode_text( transform.translation = transform.translation.round(); transform.translation *= inverse_scale_factor; - let mut color = LinearRgba::WHITE; - let mut current_section = usize::MAX; - for PositionedGlyph { - position, - atlas_info, - section_index, - .. - } in &text_layout_info.glyphs + for ( + i, + PositionedGlyph { + position, + atlas_info, + section_index, + .. + }, + ) in text_layout_info.glyphs.iter().enumerate() { - if *section_index != current_section { - color = LinearRgba::from(text.sections[*section_index].style.color); - current_section = *section_index; - } let atlas = texture_atlases.get(&atlas_info.texture_atlas).unwrap(); let mut rect = atlas.textures[atlas_info.location.glyph_index].as_rect(); rect.min *= inverse_scale_factor; rect.max *= inverse_scale_factor; - let id = commands.spawn(TemporaryRenderEntity).id(); - extracted_uinodes.uinodes.insert( - id, - ExtractedUiNode { - stack_index: uinode.stack_index, - transform: transform - * Mat4::from_translation(position.extend(0.) * inverse_scale_factor), - color, - rect, - image: atlas_info.texture.id(), - atlas_scaling: Some(Vec2::splat(inverse_scale_factor)), - clip: clip.map(|clip| clip.clip), - flip_x: false, - flip_y: false, - camera_entity: camera_entity.id(), - border: BorderRect::ZERO, - border_radius: ResolvedBorderRadius::ZERO, - node_type: NodeType::Rect, - }, - ); + + extracted_uinodes.glyphs.push(ExtractedGlyph { + transform: transform + * Mat4::from_translation(position.extend(0.) * inverse_scale_factor), + rect, + }); + + if text_layout_info + .glyphs + .get(i + 1) + .map(|info| { + info.section_index != *section_index + || info.atlas_info.texture != atlas_info.texture + }) + .unwrap_or(true) + { + let id = commands.spawn(TemporaryRenderEntity).id(); + + extracted_uinodes.uinodes.insert( + id, + ExtractedUiNode { + stack_index: uinode.stack_index, + color: LinearRgba::from(text.sections[*section_index].style.color), + image: atlas_info.texture.id(), + clip: clip.map(|clip| clip.clip), + camera_entity: render_camera_entity.id(), + rect, + item: ExtractedUiItem::Glyphs { + atlas_scaling: Vec2::splat(inverse_scale_factor), + range: start..end, + }, + }, + ); + start = end; + } + + end += 1; } } } @@ -870,151 +922,258 @@ pub fn prepare_uinodes( continue; } } + match &extracted_uinode.item { + ExtractedUiItem::Node { + atlas_scaling, + flip_x, + flip_y, + border_radius, + border, + node_type, + transform, + } => { + let mut flags = if extracted_uinode.image != AssetId::default() { + shader_flags::TEXTURED + } else { + shader_flags::UNTEXTURED + }; - let mut flags = if extracted_uinode.image != AssetId::default() { - shader_flags::TEXTURED - } else { - shader_flags::UNTEXTURED - }; - - let mut uinode_rect = extracted_uinode.rect; - - let rect_size = uinode_rect.size().extend(1.0); - - // Specify the corners of the node - let positions = QUAD_VERTEX_POSITIONS.map(|pos| { - (extracted_uinode.transform * (pos * rect_size).extend(1.)).xyz() - }); - - // Calculate the effect of clipping - // Note: this won't work with rotation/scaling, but that's much more complex (may need more that 2 quads) - let mut positions_diff = if let Some(clip) = extracted_uinode.clip { - [ - Vec2::new( - f32::max(clip.min.x - positions[0].x, 0.), - f32::max(clip.min.y - positions[0].y, 0.), - ), - Vec2::new( - f32::min(clip.max.x - positions[1].x, 0.), - f32::max(clip.min.y - positions[1].y, 0.), - ), - Vec2::new( - f32::min(clip.max.x - positions[2].x, 0.), - f32::min(clip.max.y - positions[2].y, 0.), - ), - Vec2::new( - f32::max(clip.min.x - positions[3].x, 0.), - f32::min(clip.max.y - positions[3].y, 0.), - ), - ] - } else { - [Vec2::ZERO; 4] - }; - - let positions_clipped = [ - positions[0] + positions_diff[0].extend(0.), - positions[1] + positions_diff[1].extend(0.), - positions[2] + positions_diff[2].extend(0.), - positions[3] + positions_diff[3].extend(0.), - ]; - - let transformed_rect_size = - extracted_uinode.transform.transform_vector3(rect_size); - - // Don't try to cull nodes that have a rotation - // In a rotation around the Z-axis, this value is 0.0 for an angle of 0.0 or π - // In those two cases, the culling check can proceed normally as corners will be on - // horizontal / vertical lines - // For all other angles, bypass the culling check - // This does not properly handles all rotations on all axis - if extracted_uinode.transform.x_axis[1] == 0.0 { - // Cull nodes that are completely clipped - if positions_diff[0].x - positions_diff[1].x >= transformed_rect_size.x - || positions_diff[1].y - positions_diff[2].y >= transformed_rect_size.y - { - continue; - } - } - let uvs = if flags == shader_flags::UNTEXTURED { - [Vec2::ZERO, Vec2::X, Vec2::ONE, Vec2::Y] - } else { - let image = gpu_images - .get(extracted_uinode.image) - .expect("Image was checked during batching and should still exist"); - // Rescale atlases. This is done here because we need texture data that might not be available in Extract. - let atlas_extent = extracted_uinode - .atlas_scaling - .map(|scaling| image.size.as_vec2() * scaling) - .unwrap_or(uinode_rect.max); - if extracted_uinode.flip_x { - core::mem::swap(&mut uinode_rect.max.x, &mut uinode_rect.min.x); - positions_diff[0].x *= -1.; - positions_diff[1].x *= -1.; - positions_diff[2].x *= -1.; - positions_diff[3].x *= -1.; - } - if extracted_uinode.flip_y { - core::mem::swap(&mut uinode_rect.max.y, &mut uinode_rect.min.y); - positions_diff[0].y *= -1.; - positions_diff[1].y *= -1.; - positions_diff[2].y *= -1.; - positions_diff[3].y *= -1.; - } - [ - Vec2::new( - uinode_rect.min.x + positions_diff[0].x, - uinode_rect.min.y + positions_diff[0].y, - ), - Vec2::new( - uinode_rect.max.x + positions_diff[1].x, - uinode_rect.min.y + positions_diff[1].y, - ), - Vec2::new( - uinode_rect.max.x + positions_diff[2].x, - uinode_rect.max.y + positions_diff[2].y, - ), - Vec2::new( - uinode_rect.min.x + positions_diff[3].x, - uinode_rect.max.y + positions_diff[3].y, - ), - ] - .map(|pos| pos / atlas_extent) - }; - - let color = extracted_uinode.color.to_f32_array(); - if extracted_uinode.node_type == NodeType::Border { - flags |= shader_flags::BORDER; - } + let mut uinode_rect = extracted_uinode.rect; + + let rect_size = uinode_rect.size().extend(1.0); + + // Specify the corners of the node + let positions = QUAD_VERTEX_POSITIONS + .map(|pos| (*transform * (pos * rect_size).extend(1.)).xyz()); + + // Calculate the effect of clipping + // Note: this won't work with rotation/scaling, but that's much more complex (may need more that 2 quads) + let mut positions_diff = if let Some(clip) = extracted_uinode.clip { + [ + Vec2::new( + f32::max(clip.min.x - positions[0].x, 0.), + f32::max(clip.min.y - positions[0].y, 0.), + ), + Vec2::new( + f32::min(clip.max.x - positions[1].x, 0.), + f32::max(clip.min.y - positions[1].y, 0.), + ), + Vec2::new( + f32::min(clip.max.x - positions[2].x, 0.), + f32::min(clip.max.y - positions[2].y, 0.), + ), + Vec2::new( + f32::max(clip.min.x - positions[3].x, 0.), + f32::min(clip.max.y - positions[3].y, 0.), + ), + ] + } else { + [Vec2::ZERO; 4] + }; - for i in 0..4 { - ui_meta.vertices.push(UiVertex { - position: positions_clipped[i].into(), - uv: uvs[i].into(), - color, - flags: flags | shader_flags::CORNERS[i], - radius: [ - extracted_uinode.border_radius.top_left, - extracted_uinode.border_radius.top_right, - extracted_uinode.border_radius.bottom_right, - extracted_uinode.border_radius.bottom_left, - ], - border: [ - extracted_uinode.border.left, - extracted_uinode.border.top, - extracted_uinode.border.right, - extracted_uinode.border.bottom, - ], - size: rect_size.xy().into(), - }); - } + let positions_clipped = [ + positions[0] + positions_diff[0].extend(0.), + positions[1] + positions_diff[1].extend(0.), + positions[2] + positions_diff[2].extend(0.), + positions[3] + positions_diff[3].extend(0.), + ]; + + let transformed_rect_size = transform.transform_vector3(rect_size); + + // Don't try to cull nodes that have a rotation + // In a rotation around the Z-axis, this value is 0.0 for an angle of 0.0 or π + // In those two cases, the culling check can proceed normally as corners will be on + // horizontal / vertical lines + // For all other angles, bypass the culling check + // This does not properly handles all rotations on all axis + if transform.x_axis[1] == 0.0 { + // Cull nodes that are completely clipped + if positions_diff[0].x - positions_diff[1].x + >= transformed_rect_size.x + || positions_diff[1].y - positions_diff[2].y + >= transformed_rect_size.y + { + continue; + } + } + let uvs = if flags == shader_flags::UNTEXTURED { + [Vec2::ZERO, Vec2::X, Vec2::ONE, Vec2::Y] + } else { + let image = gpu_images.get(extracted_uinode.image).expect( + "Image was checked during batching and should still exist", + ); + // Rescale atlases. This is done here because we need texture data that might not be available in Extract. + let atlas_extent = atlas_scaling + .map(|scaling| image.size.as_vec2() * scaling) + .unwrap_or(uinode_rect.max); + if *flip_x { + core::mem::swap(&mut uinode_rect.max.x, &mut uinode_rect.min.x); + positions_diff[0].x *= -1.; + positions_diff[1].x *= -1.; + positions_diff[2].x *= -1.; + positions_diff[3].x *= -1.; + } + if *flip_y { + core::mem::swap(&mut uinode_rect.max.y, &mut uinode_rect.min.y); + positions_diff[0].y *= -1.; + positions_diff[1].y *= -1.; + positions_diff[2].y *= -1.; + positions_diff[3].y *= -1.; + } + [ + Vec2::new( + uinode_rect.min.x + positions_diff[0].x, + uinode_rect.min.y + positions_diff[0].y, + ), + Vec2::new( + uinode_rect.max.x + positions_diff[1].x, + uinode_rect.min.y + positions_diff[1].y, + ), + Vec2::new( + uinode_rect.max.x + positions_diff[2].x, + uinode_rect.max.y + positions_diff[2].y, + ), + Vec2::new( + uinode_rect.min.x + positions_diff[3].x, + uinode_rect.max.y + positions_diff[3].y, + ), + ] + .map(|pos| pos / atlas_extent) + }; - for &i in &QUAD_INDICES { - ui_meta.indices.push(indices_index + i as u32); - } + let color = extracted_uinode.color.to_f32_array(); + if *node_type == NodeType::Border { + flags |= shader_flags::BORDER; + } + + for i in 0..4 { + ui_meta.vertices.push(UiVertex { + position: positions_clipped[i].into(), + uv: uvs[i].into(), + color, + flags: flags | shader_flags::CORNERS[i], + radius: [ + border_radius.top_left, + border_radius.top_right, + border_radius.bottom_right, + border_radius.bottom_left, + ], + border: [border.left, border.top, border.right, border.bottom], + size: rect_size.xy().into(), + }); + } - vertices_index += 6; - indices_index += 4; + for &i in &QUAD_INDICES { + ui_meta.indices.push(indices_index + i as u32); + } + vertices_index += 6; + indices_index += 4; + } + ExtractedUiItem::Glyphs { + atlas_scaling, + range, + } => { + let image = gpu_images + .get(extracted_uinode.image) + .expect("Image was checked during batching and should still exist"); + + let atlas_extent = image.size.as_vec2() * *atlas_scaling; + + let color = extracted_uinode.color.to_f32_array(); + for glyph in &extracted_uinodes.glyphs[range.clone()] { + let glyph_rect = glyph.rect; + let size = glyph.rect.size(); + + let rect_size = glyph_rect.size().extend(1.0); + + // Specify the corners of the glyph + let positions = QUAD_VERTEX_POSITIONS.map(|pos| { + (glyph.transform * (pos * rect_size).extend(1.)).xyz() + }); + + let positions_diff = if let Some(clip) = extracted_uinode.clip { + [ + Vec2::new( + f32::max(clip.min.x - positions[0].x, 0.), + f32::max(clip.min.y - positions[0].y, 0.), + ), + Vec2::new( + f32::min(clip.max.x - positions[1].x, 0.), + f32::max(clip.min.y - positions[1].y, 0.), + ), + Vec2::new( + f32::min(clip.max.x - positions[2].x, 0.), + f32::min(clip.max.y - positions[2].y, 0.), + ), + Vec2::new( + f32::max(clip.min.x - positions[3].x, 0.), + f32::min(clip.max.y - positions[3].y, 0.), + ), + ] + } else { + [Vec2::ZERO; 4] + }; + + let positions_clipped = [ + positions[0] + positions_diff[0].extend(0.), + positions[1] + positions_diff[1].extend(0.), + positions[2] + positions_diff[2].extend(0.), + positions[3] + positions_diff[3].extend(0.), + ]; + + // cull nodes that are completely clipped + let transformed_rect_size = + glyph.transform.transform_vector3(rect_size); + if positions_diff[0].x - positions_diff[1].x + >= transformed_rect_size.x + || positions_diff[1].y - positions_diff[2].y + >= transformed_rect_size.y + { + continue; + } + + let uvs = [ + Vec2::new( + glyph.rect.min.x + positions_diff[0].x, + glyph.rect.min.y + positions_diff[0].y, + ), + Vec2::new( + glyph.rect.max.x + positions_diff[1].x, + glyph.rect.min.y + positions_diff[1].y, + ), + Vec2::new( + glyph.rect.max.x + positions_diff[2].x, + glyph.rect.max.y + positions_diff[2].y, + ), + Vec2::new( + glyph.rect.min.x + positions_diff[3].x, + glyph.rect.max.y + positions_diff[3].y, + ), + ] + .map(|pos| pos / atlas_extent); + + for i in 0..4 { + ui_meta.vertices.push(UiVertex { + position: positions_clipped[i].into(), + uv: uvs[i].into(), + color, + flags: shader_flags::TEXTURED, + radius: [0.0; 4], + border: [0.0; 4], + size: size.into(), + }); + } + + for &i in &QUAD_INDICES { + ui_meta.indices.push(indices_index + i as u32); + } + + vertices_index += 6; + indices_index += 4; + } + } + } existing_batch.unwrap().1.range.end = vertices_index; ui_phase.items[batch_item_index].batch_range_mut().end += 1; } else { @@ -1022,10 +1181,11 @@ pub fn prepare_uinodes( } } } + ui_meta.vertices.write_buffer(&render_device, &render_queue); ui_meta.indices.write_buffer(&render_device, &render_queue); *previous_len = batches.len(); commands.insert_or_spawn_batch(batches); } - extracted_uinodes.uinodes.clear(); + extracted_uinodes.clear(); }