From 6cc96f4c1fede790a6d0e3b2f345dee160abc5af Mon Sep 17 00:00:00 2001 From: JMS55 <47158642+JMS55@users.noreply.github.com> Date: Mon, 26 Aug 2024 10:54:34 -0700 Subject: [PATCH] Meshlet software raster + start of cleanup (#14623) # Objective - Faster meshlet rasterization path for small triangles - Avoid having to allocate and write out a triangle buffer - Refactor gpu_scene.rs ## Solution - Replace the 32bit visbuffer texture with a 64bit visbuffer buffer, where the left 32 bits encode depth, and the right 32 bits encode the existing cluster + triangle IDs. Can't use 64bit textures, wgpu/naga doesn't support atomic ops on textures yet. - Instead of writing out a buffer of packed cluster + triangle IDs (per triangle) to raster, the culling pass now writes out a buffer of just cluster IDs (per cluster, so less memory allocated, cheaper to write out). - Clusters for software raster are allocated from the left side - Clusters for hardware raster are allocated in the same buffer, from the right side - The buffer size is fixed at MeshletPlugin build time, and should be set to a reasonable value for your scene (no warning on overflow, and no good way to determine what value you need outside of renderdoc - I plan to fix this in a future PR adding a meshlet stats overlay) - Currently I don't have a heuristic for software vs hardware raster selection for each cluster. The existing code is just a placeholder. I need to profile on a release scene and come up with a heuristic, probably in a future PR. - The culling shader is getting pretty hard to follow at this point, but I don't want to spend time improving it as the entire shader/pass is getting rewritten/replaced in the near future. - Software raster is a compute workgroup per-cluster. Each workgroup loads and transforms the <=64 vertices of the cluster, and then rasterizes the <=64 triangles of the cluster. - Two variants are implemented: Scanline for clusters with any larger triangles (still smaller than hardware is good at), and brute-force for very very tiny triangles - Once the shader determines that a pixel should be filled in, it does an atomicMax() on the visbuffer to store the results, copying how Nanite works - On devices with a low max workgroups per dispatch limit, an extra compute pass is inserted before software raster to convert from a 1d to 2d dispatch (I don't think 3d would ever be necessary). - I haven't implemented the top-left rule or subpixel precision yet, I'm leaving that for a future PR since I get usable results without it for now - Resources used: https://kristoffer-dyrkorn.github.io/triangle-rasterizer and chapters 6-8 of https://fgiesen.wordpress.com/2013/02/17/optimizing-sw-occlusion-culling-index - Hardware raster now spawns 64*3 vertex invocations per meshlet, instead of the actual meshlet vertex count. Extra invocations just early-exit. - While this is slower than the existing system, hardware draws should be rare now that software raster is usable, and it saves a ton of memory using the unified cluster ID buffer. This would be fixed if wgpu had support for mesh shaders. - Instead of writing to a color+depth attachment, the hardware raster pass also does the same atomic visbuffer writes that software raster uses. - We have to bind a dummy render target anyways, as wgpu doesn't currently support render passes without any attachments - Material IDs are no longer written out during the main rasterization passes. - If we had async compute queues, we could overlap the software and hardware raster passes. - New material and depth resolve passes run at the end of the visbuffer node, and write out view depth and material ID depth textures ### Misc changes - Fixed cluster culling importing, but never actually using the previous view uniforms when doing occlusion culling - Fixed incorrectly adding the LOD error twice when building the meshlet mesh - Splitup gpu_scene module into meshlet_mesh_manager, instance_manager, and resource_manager - resource_manager is still too complex and inefficient (extract and prepare are way too expensive). I plan on improving this in a future PR, but for now ResourceManager is mostly a 1:1 port of the leftover MeshletGpuScene bits. - Material draw passes have been renamed to the more accurate material shade pass, as well as some other misc renaming (in the future, these will be compute shaders even, and not actual draw calls) --- ## Migration Guide - TBD (ask me at the end of the release for meshlet changes as a whole) --------- Co-authored-by: vero --- crates/bevy_pbr/src/material.rs | 6 +- crates/bevy_pbr/src/meshlet/asset.rs | 9 +- .../src/meshlet/copy_material_depth.wgsl | 10 - .../bevy_pbr/src/meshlet/cull_clusters.wgsl | 73 +- .../src/meshlet/downsample_depth.wgsl | 49 +- .../src/meshlet/fill_cluster_buffers.wgsl | 6 +- crates/bevy_pbr/src/meshlet/from_mesh.rs | 10 +- crates/bevy_pbr/src/meshlet/gpu_scene.rs | 1050 ----------------- .../bevy_pbr/src/meshlet/instance_manager.rs | 261 ++++ ...repare.rs => material_pipeline_prepare.rs} | 23 +- ..._draw_nodes.rs => material_shade_nodes.rs} | 42 +- .../src/meshlet/meshlet_bindings.wgsl | 34 +- .../src/meshlet/meshlet_mesh_manager.rs | 132 +++ crates/bevy_pbr/src/meshlet/mod.rs | 137 ++- .../src/meshlet/persistent_buffer_impls.rs | 1 + crates/bevy_pbr/src/meshlet/pipelines.rs | 383 ++++-- .../src/meshlet/remap_1d_to_2d_dispatch.wgsl | 20 + .../src/meshlet/resolve_render_targets.wgsl | 39 + .../bevy_pbr/src/meshlet/resource_manager.rs | 809 +++++++++++++ ...=> visibility_buffer_hardware_raster.wgsl} | 68 +- .../meshlet/visibility_buffer_raster_node.rs | 302 +++-- .../meshlet/visibility_buffer_resolve.wgsl | 5 +- .../visibility_buffer_software_raster.wgsl | 196 +++ crates/bevy_pbr/src/prepass/mod.rs | 4 +- crates/bevy_render/src/render_resource/mod.rs | 5 +- examples/3d/meshlet.rs | 6 +- 26 files changed, 2247 insertions(+), 1433 deletions(-) delete mode 100644 crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl delete mode 100644 crates/bevy_pbr/src/meshlet/gpu_scene.rs create mode 100644 crates/bevy_pbr/src/meshlet/instance_manager.rs rename crates/bevy_pbr/src/meshlet/{material_draw_prepare.rs => material_pipeline_prepare.rs} (95%) rename crates/bevy_pbr/src/meshlet/{material_draw_nodes.rs => material_shade_nodes.rs} (91%) create mode 100644 crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs create mode 100644 crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl create mode 100644 crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl create mode 100644 crates/bevy_pbr/src/meshlet/resource_manager.rs rename crates/bevy_pbr/src/meshlet/{visibility_buffer_raster.wgsl => visibility_buffer_hardware_raster.wgsl} (56%) create mode 100644 crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index d5b601d6515cc..05bb1d7acc5e2 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -1,7 +1,7 @@ #[cfg(feature = "meshlet")] use crate::meshlet::{ prepare_material_meshlet_meshes_main_opaque_pass, queue_material_meshlet_meshes, - MeshletGpuScene, + InstanceManager, }; use crate::*; use bevy_asset::{Asset, AssetId, AssetServer}; @@ -283,7 +283,7 @@ where Render, queue_material_meshlet_meshes:: .in_set(RenderSet::QueueMeshes) - .run_if(resource_exists::), + .run_if(resource_exists::), ); #[cfg(feature = "meshlet")] @@ -293,7 +293,7 @@ where .in_set(RenderSet::QueueMeshes) .after(prepare_assets::>) .before(queue_material_meshlet_meshes::) - .run_if(resource_exists::), + .run_if(resource_exists::), ); } diff --git a/crates/bevy_pbr/src/meshlet/asset.rs b/crates/bevy_pbr/src/meshlet/asset.rs index 5701e0f288449..108cf981515c7 100644 --- a/crates/bevy_pbr/src/meshlet/asset.rs +++ b/crates/bevy_pbr/src/meshlet/asset.rs @@ -35,8 +35,6 @@ pub const MESHLET_MESH_ASSET_VERSION: u64 = 1; /// See also [`super::MaterialMeshletMeshBundle`] and [`super::MeshletPlugin`]. #[derive(Asset, TypePath, Clone)] pub struct MeshletMesh { - /// The total amount of triangles summed across all LOD 0 meshlets in the mesh. - pub(crate) worst_case_meshlet_triangles: u64, /// Raw vertex data bytes for the overall mesh. pub(crate) vertex_data: Arc<[u8]>, /// Indices into `vertex_data`. @@ -57,6 +55,8 @@ pub struct Meshlet { pub start_vertex_id: u32, /// The offset within the parent mesh's [`MeshletMesh::indices`] buffer where the indices for this meshlet begin. pub start_index_id: u32, + /// The amount of vertices in this meshlet. + pub vertex_count: u32, /// The amount of triangles in this meshlet. pub triangle_count: u32, } @@ -107,9 +107,6 @@ impl AssetSaver for MeshletMeshSaverLoader { .await?; // Compress and write asset data - writer - .write_all(&asset.worst_case_meshlet_triangles.to_le_bytes()) - .await?; let mut writer = FrameEncoder::new(AsyncWriteSyncAdapter(writer)); write_slice(&asset.vertex_data, &mut writer)?; write_slice(&asset.vertex_ids, &mut writer)?; @@ -146,7 +143,6 @@ impl AssetLoader for MeshletMeshSaverLoader { } // Load and decompress asset data - let worst_case_meshlet_triangles = async_read_u64(reader).await?; let reader = &mut FrameDecoder::new(AsyncReadSyncAdapter(reader)); let vertex_data = read_slice(reader)?; let vertex_ids = read_slice(reader)?; @@ -155,7 +151,6 @@ impl AssetLoader for MeshletMeshSaverLoader { let bounding_spheres = read_slice(reader)?; Ok(MeshletMesh { - worst_case_meshlet_triangles, vertex_data, vertex_ids, indices, diff --git a/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl b/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl deleted file mode 100644 index 177cbc35a3424..0000000000000 --- a/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl +++ /dev/null @@ -1,10 +0,0 @@ -#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput - -@group(0) @binding(0) var material_depth: texture_2d; - -/// This pass copies the R16Uint material depth texture to an actual Depth16Unorm depth texture. - -@fragment -fn copy_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { - return f32(textureLoad(material_depth, vec2(in.position.xy), 0).r) / 65535.0; -} diff --git a/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl b/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl index b73792aa5d3f3..fe5df60f12082 100644 --- a/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl +++ b/crates/bevy_pbr/src/meshlet/cull_clusters.wgsl @@ -9,9 +9,10 @@ previous_view, should_cull_instance, cluster_is_second_pass_candidate, - meshlets, - draw_indirect_args, - draw_triangle_buffer, + meshlet_software_raster_indirect_args, + meshlet_hardware_raster_indirect_args, + meshlet_raster_clusters, + meshlet_raster_cluster_rightmost_slot, } #import bevy_render::maths::affine3_to_square @@ -25,10 +26,10 @@ fn cull_clusters( @builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, - @builtin(local_invocation_id) local_invocation_id: vec3, + @builtin(local_invocation_index) local_invocation_index: u32, ) { // Calculate the cluster ID for this thread - let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); if cluster_id >= arrayLength(&meshlet_cluster_meshlet_ids) { return; } #ifdef MESHLET_SECOND_CULLING_PASS @@ -47,8 +48,8 @@ fn cull_clusters( let world_from_local = affine3_to_square(instance_uniform.world_from_local); let world_scale = max(length(world_from_local[0]), max(length(world_from_local[1]), length(world_from_local[2]))); let bounding_spheres = meshlet_bounding_spheres[meshlet_id]; - var culling_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); - var culling_bounding_sphere_radius = world_scale * bounding_spheres.self_culling.radius; + let culling_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); + let culling_bounding_sphere_radius = world_scale * bounding_spheres.self_culling.radius; #ifdef MESHLET_FIRST_CULLING_PASS // Frustum culling @@ -59,17 +60,17 @@ fn cull_clusters( } } - // Calculate view-space LOD bounding sphere for the meshlet + // Calculate view-space LOD bounding sphere for the cluster let lod_bounding_sphere_center = world_from_local * vec4(bounding_spheres.self_lod.center, 1.0); let lod_bounding_sphere_radius = world_scale * bounding_spheres.self_lod.radius; let lod_bounding_sphere_center_view_space = (view.view_from_world * vec4(lod_bounding_sphere_center.xyz, 1.0)).xyz; - // Calculate view-space LOD bounding sphere for the meshlet's parent + // Calculate view-space LOD bounding sphere for the cluster's parent let parent_lod_bounding_sphere_center = world_from_local * vec4(bounding_spheres.parent_lod.center, 1.0); let parent_lod_bounding_sphere_radius = world_scale * bounding_spheres.parent_lod.radius; let parent_lod_bounding_sphere_center_view_space = (view.view_from_world * vec4(parent_lod_bounding_sphere_center.xyz, 1.0)).xyz; - // Check LOD cut (meshlet error imperceptible, and parent error not imperceptible) + // Check LOD cut (cluster error imperceptible, and parent error not imperceptible) let lod_is_ok = lod_error_is_imperceptible(lod_bounding_sphere_center_view_space, lod_bounding_sphere_radius); let parent_lod_is_ok = lod_error_is_imperceptible(parent_lod_bounding_sphere_center_view_space, parent_lod_bounding_sphere_radius); if !lod_is_ok || parent_lod_is_ok { return; } @@ -79,16 +80,20 @@ fn cull_clusters( #ifdef MESHLET_FIRST_CULLING_PASS let previous_world_from_local = affine3_to_square(instance_uniform.previous_world_from_local); let previous_world_from_local_scale = max(length(previous_world_from_local[0]), max(length(previous_world_from_local[1]), length(previous_world_from_local[2]))); - culling_bounding_sphere_center = previous_world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); - culling_bounding_sphere_radius = previous_world_from_local_scale * bounding_spheres.self_culling.radius; + let occlusion_culling_bounding_sphere_center = previous_world_from_local * vec4(bounding_spheres.self_culling.center, 1.0); + let occlusion_culling_bounding_sphere_radius = previous_world_from_local_scale * bounding_spheres.self_culling.radius; + let occlusion_culling_bounding_sphere_center_view_space = (previous_view.view_from_world * vec4(occlusion_culling_bounding_sphere_center.xyz, 1.0)).xyz; +#else + let occlusion_culling_bounding_sphere_center = culling_bounding_sphere_center; + let occlusion_culling_bounding_sphere_radius = culling_bounding_sphere_radius; + let occlusion_culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(occlusion_culling_bounding_sphere_center.xyz, 1.0)).xyz; #endif - let culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; - let aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + var aabb = project_view_space_sphere_to_screen_space_aabb(occlusion_culling_bounding_sphere_center_view_space, occlusion_culling_bounding_sphere_radius); let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)); - let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; - let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; - let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 + var aabb_width_pixels = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + var aabb_height_pixels = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + let depth_level = max(0, i32(ceil(log2(max(aabb_width_pixels, aabb_height_pixels))))); // TODO: Naga doesn't like this being a u32 let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); @@ -102,11 +107,11 @@ fn cull_clusters( var cluster_visible: bool; if view.clip_from_view[3][3] == 1.0 { // Orthographic - let sphere_depth = view.clip_from_view[3][2] + (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius) * view.clip_from_view[2][2]; + let sphere_depth = view.clip_from_view[3][2] + (occlusion_culling_bounding_sphere_center_view_space.z + occlusion_culling_bounding_sphere_radius) * view.clip_from_view[2][2]; cluster_visible = sphere_depth >= occluder_depth; } else { // Perspective - let sphere_depth = -view.clip_from_view[3][2] / (culling_bounding_sphere_center_view_space.z + culling_bounding_sphere_radius); + let sphere_depth = -view.clip_from_view[3][2] / (occlusion_culling_bounding_sphere_center_view_space.z + occlusion_culling_bounding_sphere_radius); cluster_visible = sphere_depth >= occluder_depth; } @@ -118,15 +123,29 @@ fn cull_clusters( } #endif - // Append a list of this cluster's triangles to draw if not culled - if cluster_visible { - let meshlet_triangle_count = meshlets[meshlet_id].triangle_count; - let buffer_start = atomicAdd(&draw_indirect_args.vertex_count, meshlet_triangle_count * 3u) / 3u; - let cluster_id_packed = cluster_id << 6u; - for (var triangle_id = 0u; triangle_id < meshlet_triangle_count; triangle_id++) { - draw_triangle_buffer[buffer_start + triangle_id] = cluster_id_packed | triangle_id; - } + // Cluster would be occluded if drawn, so don't setup a draw for it + if !cluster_visible { return; } + + // Check how big the cluster is in screen space +#ifdef MESHLET_FIRST_CULLING_PASS + let culling_bounding_sphere_center_view_space = (view.view_from_world * vec4(culling_bounding_sphere_center.xyz, 1.0)).xyz; + aabb = project_view_space_sphere_to_screen_space_aabb(culling_bounding_sphere_center_view_space, culling_bounding_sphere_radius); + aabb_width_pixels = (aabb.z - aabb.x) * view.viewport.z; + aabb_height_pixels = (aabb.w - aabb.y) * view.viewport.w; +#endif + let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(32.0)); // TODO: Nanite does something different. Come up with my own heuristic. + + // TODO: Also check if needs depth clipping + var buffer_slot: u32; + if cluster_is_small { + // Append this cluster to the list for software rasterization + buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u); + } else { + // Append this cluster to the list for hardware rasterization + buffer_slot = atomicAdd(&meshlet_hardware_raster_indirect_args.instance_count, 1u); + buffer_slot = meshlet_raster_cluster_rightmost_slot - buffer_slot; } + meshlet_raster_clusters[buffer_slot] = cluster_id; } // https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space/21649403#21649403 diff --git a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl index 04e8f3f56af08..80dd7d4baafd4 100644 --- a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl +++ b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl @@ -1,4 +1,8 @@ -@group(0) @binding(0) var mip_0: texture_depth_2d; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var mip_0: array; // Per pixel +#else +@group(0) @binding(0) var mip_0: array; // Per pixel +#endif @group(0) @binding(1) var mip_1: texture_storage_2d; @group(0) @binding(2) var mip_2: texture_storage_2d; @group(0) @binding(3) var mip_3: texture_storage_2d; @@ -12,11 +16,16 @@ @group(0) @binding(11) var mip_11: texture_storage_2d; @group(0) @binding(12) var mip_12: texture_storage_2d; @group(0) @binding(13) var samplr: sampler; -var max_mip_level: u32; +struct Constants { max_mip_level: u32, view_width: u32 } +var constants: Constants; /// Generates a hierarchical depth buffer. /// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528 +// TODO: +// * Subgroup support +// * True single pass downsampling + var intermediate_memory: array, 16>; @compute @@ -70,7 +79,7 @@ fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation v[3] = reduce_load_mip_0(tex); textureStore(mip_1, pix, vec4(v[3])); - if max_mip_level <= 1u { return; } + if constants.max_mip_level <= 1u { return; } for (var i = 0u; i < 4u; i++) { intermediate_memory[x][y] = v[i]; @@ -100,19 +109,19 @@ fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation } fn downsample_mips_2_to_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: u32) { - if max_mip_level <= 2u { return; } + if constants.max_mip_level <= 2u { return; } workgroupBarrier(); downsample_mip_2(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 3u { return; } + if constants.max_mip_level <= 3u { return; } workgroupBarrier(); downsample_mip_3(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 4u { return; } + if constants.max_mip_level <= 4u { return; } workgroupBarrier(); downsample_mip_4(x, y, workgroup_id, local_invocation_index); - if max_mip_level <= 5u { return; } + if constants.max_mip_level <= 5u { return; } workgroupBarrier(); downsample_mip_5(workgroup_id, local_invocation_index); } @@ -191,7 +200,7 @@ fn downsample_mips_6_and_7(x: u32, y: u32) { v[3] = reduce_load_mip_6(tex); textureStore(mip_7, pix, vec4(v[3])); - if max_mip_level <= 7u { return; } + if constants.max_mip_level <= 7u { return; } let vr = reduce_4(v); textureStore(mip_8, vec2(x, y), vec4(vr)); @@ -199,19 +208,19 @@ fn downsample_mips_6_and_7(x: u32, y: u32) { } fn downsample_mips_8_to_11(x: u32, y: u32, local_invocation_index: u32) { - if max_mip_level <= 8u { return; } + if constants.max_mip_level <= 8u { return; } workgroupBarrier(); downsample_mip_8(x, y, local_invocation_index); - if max_mip_level <= 9u { return; } + if constants.max_mip_level <= 9u { return; } workgroupBarrier(); downsample_mip_9(x, y, local_invocation_index); - if max_mip_level <= 10u { return; } + if constants.max_mip_level <= 10u { return; } workgroupBarrier(); downsample_mip_10(x, y, local_invocation_index); - if max_mip_level <= 11u { return; } + if constants.max_mip_level <= 11u { return; } workgroupBarrier(); downsample_mip_11(local_invocation_index); } @@ -275,8 +284,11 @@ fn remap_for_wave_reduction(a: u32) -> vec2u { } fn reduce_load_mip_0(tex: vec2u) -> f32 { - let uv = (vec2f(tex) + 0.5) / vec2f(textureDimensions(mip_0)); - return reduce_4(textureGather(mip_0, samplr, uv)); + let a = load_mip_0(tex.x, tex.y); + let b = load_mip_0(tex.x + 1u, tex.y); + let c = load_mip_0(tex.x, tex.y + 1u); + let d = load_mip_0(tex.x + 1u, tex.y + 1u); + return reduce_4(vec4(a, b, c, d)); } fn reduce_load_mip_6(tex: vec2u) -> f32 { @@ -288,6 +300,15 @@ fn reduce_load_mip_6(tex: vec2u) -> f32 { )); } +fn load_mip_0(x: u32, y: u32) -> f32 { + let i = y * constants.view_width + x; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + return bitcast(u32(mip_0[i] >> 32u)); +#else + return bitcast(mip_0[i]); +#endif +} + fn reduce_4(v: vec4f) -> f32 { return min(min(v.x, v.y), min(v.z, v.w)); } diff --git a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl index f228ba050875f..04af6c4ad7091 100644 --- a/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl +++ b/crates/bevy_pbr/src/meshlet/fill_cluster_buffers.wgsl @@ -13,11 +13,11 @@ fn fill_cluster_buffers( @builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, - @builtin(local_invocation_id) local_invocation_id: vec3 + @builtin(local_invocation_index) local_invocation_index: u32, ) { // Calculate the cluster ID for this thread - let cluster_id = local_invocation_id.x + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); - if cluster_id >= cluster_count { return; } + let cluster_id = local_invocation_index + 128u * dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + if cluster_id >= cluster_count { return; } // TODO: Could be an arrayLength? // Binary search to find the instance this cluster belongs to var left = 0u; diff --git a/crates/bevy_pbr/src/meshlet/from_mesh.rs b/crates/bevy_pbr/src/meshlet/from_mesh.rs index f62e00b4340bc..047b1a492e1ee 100644 --- a/crates/bevy_pbr/src/meshlet/from_mesh.rs +++ b/crates/bevy_pbr/src/meshlet/from_mesh.rs @@ -49,11 +49,6 @@ impl MeshletMesh { }, }) .collect::>(); - let worst_case_meshlet_triangles = meshlets - .meshlets - .iter() - .map(|m| m.triangle_count as u64) - .sum(); let mesh_scale = simplify_scale(&vertices); // Build further LODs @@ -87,7 +82,7 @@ impl MeshletMesh { // Add the maximum child error to the parent error to make parent error cumulative from LOD 0 // (we're currently building the parent from its children) - group_error += group_meshlets.iter().fold(group_error, |acc, meshlet_id| { + group_error += group_meshlets.iter().fold(0.0f32, |acc, meshlet_id| { acc.max(bounding_spheres[*meshlet_id].self_lod.radius) }); @@ -140,12 +135,12 @@ impl MeshletMesh { .map(|m| Meshlet { start_vertex_id: m.vertex_offset, start_index_id: m.triangle_offset, + vertex_count: m.vertex_count, triangle_count: m.triangle_count, }) .collect(); Ok(Self { - worst_case_meshlet_triangles, vertex_data: vertex_buffer.into(), vertex_ids: meshlets.vertices.into(), indices: meshlets.triangles.into(), @@ -294,6 +289,7 @@ fn simplify_meshlet_groups( let target_error = target_error_relative * mesh_scale; // Simplify the group to ~50% triangle count + // TODO: Simplify using vertex attributes let mut error = 0.0; let simplified_group_indices = simplify( &group_indices, diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs deleted file mode 100644 index 1d4bf7ffe6701..0000000000000 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ /dev/null @@ -1,1050 +0,0 @@ -use super::{ - asset::{Meshlet, MeshletBoundingSpheres, MeshletMesh}, - persistent_buffer::PersistentGpuBuffer, -}; -use crate::{ - Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, - PreviousGlobalTransform, RenderMaterialInstances, ShadowView, -}; -use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId}; -use bevy_core_pipeline::{ - core_3d::Camera3d, - prepass::{PreviousViewData, PreviousViewUniforms}, -}; -use bevy_ecs::{ - component::Component, - entity::{Entity, EntityHashMap}, - event::EventReader, - query::{AnyOf, Has}, - system::{Commands, Local, Query, Res, ResMut, Resource, SystemState}, - world::{FromWorld, World}, -}; -use bevy_math::{UVec2, Vec4Swizzles}; -use bevy_render::{ - render_resource::{binding_types::*, *}, - renderer::{RenderDevice, RenderQueue}, - texture::{CachedTexture, TextureCache}, - view::{ExtractedView, RenderLayers, ViewDepthTexture, ViewUniform, ViewUniforms}, - MainWorld, -}; -use bevy_transform::components::GlobalTransform; -use bevy_utils::{default, HashMap, HashSet}; -use encase::internal::WriteInto; -use std::{ - array, iter, - mem::size_of, - ops::{DerefMut, Range}, - sync::{atomic::AtomicBool, Arc}, -}; - -/// Create and queue for uploading to the GPU [`MeshUniform`] components for -/// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh -/// assets that have not already been uploaded to the GPU. -pub fn extract_meshlet_meshes( - mut gpu_scene: ResMut, - // TODO: Replace main_world and system_state when Extract>> is possible - mut main_world: ResMut, - mut system_state: Local< - Option< - SystemState<( - Query<( - Entity, - &Handle, - &GlobalTransform, - Option<&PreviousGlobalTransform>, - Option<&RenderLayers>, - Has, - Has, - )>, - Res, - ResMut>, - EventReader>, - )>, - >, - >, -) { - if system_state.is_none() { - *system_state = Some(SystemState::new(&mut main_world)); - } - let system_state = system_state.as_mut().unwrap(); - - let (instances_query, asset_server, mut assets, mut asset_events) = - system_state.get_mut(&mut main_world); - - // Reset all temporary data for MeshletGpuScene - gpu_scene.reset(); - - // Free GPU buffer space for any modified or dropped MeshletMesh assets - for asset_event in asset_events.read() { - if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { - if let Some(( - [vertex_data_slice, vertex_ids_slice, indices_slice, meshlets_slice, meshlet_bounding_spheres_slice], - _, - )) = gpu_scene.meshlet_mesh_slices.remove(id) - { - gpu_scene.vertex_data.mark_slice_unused(vertex_data_slice); - gpu_scene.vertex_ids.mark_slice_unused(vertex_ids_slice); - gpu_scene.indices.mark_slice_unused(indices_slice); - gpu_scene.meshlets.mark_slice_unused(meshlets_slice); - gpu_scene - .meshlet_bounding_spheres - .mark_slice_unused(meshlet_bounding_spheres_slice); - } - } - } - - for ( - instance, - handle, - transform, - previous_transform, - render_layers, - not_shadow_receiver, - not_shadow_caster, - ) in &instances_query - { - // Skip instances with an unloaded MeshletMesh asset - if asset_server.is_managed(handle.id()) - && !asset_server.is_loaded_with_dependencies(handle.id()) - { - continue; - } - - // Upload the instance's MeshletMesh asset data, if not done already, along with other per-frame per-instance data. - gpu_scene.queue_meshlet_mesh_upload( - instance, - render_layers.cloned().unwrap_or(default()), - not_shadow_caster, - handle, - &mut assets, - ); - - // Build a MeshUniform for each instance - let transform = transform.affine(); - let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); - let mut flags = if not_shadow_receiver { - MeshFlags::empty() - } else { - MeshFlags::SHADOW_RECEIVER - }; - if transform.matrix3.determinant().is_sign_positive() { - flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; - } - let transforms = MeshTransforms { - world_from_local: (&transform).into(), - previous_world_from_local: (&previous_transform).into(), - flags: flags.bits(), - }; - gpu_scene - .instance_uniforms - .get_mut() - .push(MeshUniform::new(&transforms, 0, None)); - } -} - -/// Upload all newly queued [`MeshletMesh`] asset data from [`extract_meshlet_meshes`] to the GPU. -pub fn perform_pending_meshlet_mesh_writes( - mut gpu_scene: ResMut, - render_queue: Res, - render_device: Res, -) { - gpu_scene - .vertex_data - .perform_writes(&render_queue, &render_device); - gpu_scene - .vertex_ids - .perform_writes(&render_queue, &render_device); - gpu_scene - .indices - .perform_writes(&render_queue, &render_device); - gpu_scene - .meshlets - .perform_writes(&render_queue, &render_device); - gpu_scene - .meshlet_bounding_spheres - .perform_writes(&render_queue, &render_device); -} - -/// For each entity in the scene, record what material ID (for use with depth testing during the meshlet mesh material draw nodes) -/// its material was assigned in the `prepare_material_meshlet_meshes` systems, and note that the material is used by at least one entity in the scene. -pub fn queue_material_meshlet_meshes( - mut gpu_scene: ResMut, - render_material_instances: Res>, -) { - // TODO: Ideally we could parallelize this system, both between different materials, and the loop over instances - let gpu_scene = gpu_scene.deref_mut(); - - for (i, (instance, _, _)) in gpu_scene.instances.iter().enumerate() { - if let Some(material_asset_id) = render_material_instances.get(instance) { - let material_asset_id = material_asset_id.untyped(); - if let Some(material_id) = gpu_scene.material_id_lookup.get(&material_asset_id) { - gpu_scene.material_ids_present_in_scene.insert(*material_id); - gpu_scene.instance_material_ids.get_mut()[i] = *material_id; - } - } - } -} - -// TODO: Try using Queue::write_buffer_with() in queue_meshlet_mesh_upload() to reduce copies -fn upload_storage_buffer( - buffer: &mut StorageBuffer>, - render_device: &RenderDevice, - render_queue: &RenderQueue, -) where - Vec: WriteInto, -{ - let inner = buffer.buffer(); - let capacity = inner.map_or(0, |b| b.size()); - let size = buffer.get().size().get() as BufferAddress; - - if capacity >= size { - let inner = inner.unwrap(); - let bytes = bytemuck::must_cast_slice(buffer.get().as_slice()); - render_queue.write_buffer(inner, 0, bytes); - } else { - buffer.write_buffer(render_device, render_queue); - } -} - -pub fn prepare_meshlet_per_frame_resources( - mut gpu_scene: ResMut, - views: Query<( - Entity, - &ExtractedView, - Option<&RenderLayers>, - AnyOf<(&Camera3d, &ShadowView)>, - )>, - mut texture_cache: ResMut, - render_queue: Res, - render_device: Res, - mut commands: Commands, -) { - if gpu_scene.scene_meshlet_count == 0 { - return; - } - - let gpu_scene = gpu_scene.as_mut(); - - gpu_scene - .instance_uniforms - .write_buffer(&render_device, &render_queue); - upload_storage_buffer( - &mut gpu_scene.instance_material_ids, - &render_device, - &render_queue, - ); - upload_storage_buffer( - &mut gpu_scene.instance_meshlet_counts_prefix_sum, - &render_device, - &render_queue, - ); - upload_storage_buffer( - &mut gpu_scene.instance_meshlet_slice_starts, - &render_device, - &render_queue, - ); - - // Early submission for GPU data uploads to start while the render graph records commands - render_queue.submit([]); - - let needed_buffer_size = 4 * gpu_scene.scene_meshlet_count as u64; - match &mut gpu_scene.cluster_instance_ids { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_cluster_instance_ids"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - match &mut gpu_scene.cluster_meshlet_ids { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_cluster_meshlet_ids"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let needed_buffer_size = 4 * gpu_scene.scene_triangle_count; - let visibility_buffer_draw_triangle_buffer = - match &mut gpu_scene.visibility_buffer_draw_triangle_buffer { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_visibility_buffer_draw_triangle_buffer"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let needed_buffer_size = - gpu_scene.scene_meshlet_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; - for (view_entity, view, render_layers, (_, shadow_view)) in &views { - let instance_visibility = gpu_scene - .view_instance_visibility - .entry(view_entity) - .or_insert_with(|| { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_view_instance_visibility")); - buffer - }); - for (instance_index, (_, layers, not_shadow_caster)) in - gpu_scene.instances.iter().enumerate() - { - // If either the layers don't match the view's layers or this is a shadow view - // and the instance is not a shadow caster, hide the instance for this view - if !render_layers.unwrap_or(&default()).intersects(layers) - || (shadow_view.is_some() && *not_shadow_caster) - { - let vec = instance_visibility.get_mut(); - let index = instance_index / 32; - let bit = instance_index - index * 32; - if vec.len() <= index { - vec.extend(iter::repeat(0).take(index - vec.len() + 1)); - } - vec[index] |= 1 << bit; - } - } - upload_storage_buffer(instance_visibility, &render_device, &render_queue); - let instance_visibility = instance_visibility.buffer().unwrap().clone(); - - let second_pass_candidates_buffer = match &mut gpu_scene.second_pass_candidates_buffer { - Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), - slot => { - let buffer = render_device.create_buffer(&BufferDescriptor { - label: Some("meshlet_second_pass_candidates"), - size: needed_buffer_size, - usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - *slot = Some(buffer.clone()); - buffer - } - }; - - let visibility_buffer = TextureDescriptor { - label: Some("meshlet_visibility_buffer"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Uint, - usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }; - - let visibility_buffer_draw_indirect_args_first = - render_device.create_buffer_with_data(&BufferInitDescriptor { - label: Some("meshlet_visibility_buffer_draw_indirect_args_first"), - contents: DrawIndirectArgs { - vertex_count: 0, - instance_count: 1, - first_vertex: 0, - first_instance: 0, - } - .as_bytes(), - usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, - }); - let visibility_buffer_draw_indirect_args_second = - render_device.create_buffer_with_data(&BufferInitDescriptor { - label: Some("meshlet_visibility_buffer_draw_indirect_args_second"), - contents: DrawIndirectArgs { - vertex_count: 0, - instance_count: 1, - first_vertex: 0, - first_instance: 0, - } - .as_bytes(), - usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, - }); - - let depth_pyramid_size = Extent3d { - width: view.viewport.z.div_ceil(2), - height: view.viewport.w.div_ceil(2), - depth_or_array_layers: 1, - }; - let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); - let depth_pyramid = texture_cache.get( - &render_device, - TextureDescriptor { - label: Some("meshlet_depth_pyramid"), - size: depth_pyramid_size, - mip_level_count: depth_pyramid_mip_count, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Float, - usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }, - ); - let depth_pyramid_mips = array::from_fn(|i| { - if (i as u32) < depth_pyramid_mip_count { - depth_pyramid.texture.create_view(&TextureViewDescriptor { - label: Some("meshlet_depth_pyramid_texture_view"), - format: Some(TextureFormat::R32Float), - dimension: Some(TextureViewDimension::D2), - aspect: TextureAspect::All, - base_mip_level: i as u32, - mip_level_count: Some(1), - base_array_layer: 0, - array_layer_count: Some(1), - }) - } else { - gpu_scene.depth_pyramid_dummy_texture.clone() - } - }); - let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); - - let previous_depth_pyramid = match gpu_scene.previous_depth_pyramids.get(&view_entity) { - Some(texture_view) => texture_view.clone(), - None => depth_pyramid_all_mips.clone(), - }; - gpu_scene - .previous_depth_pyramids - .insert(view_entity, depth_pyramid_all_mips.clone()); - - let material_depth_color = TextureDescriptor { - label: Some("meshlet_material_depth_color"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R16Uint, - usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, - view_formats: &[], - }; - - let material_depth = TextureDescriptor { - label: Some("meshlet_material_depth"), - size: Extent3d { - width: view.viewport.z, - height: view.viewport.w, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::Depth16Unorm, - usage: TextureUsages::RENDER_ATTACHMENT, - view_formats: &[], - }; - - let not_shadow_view = shadow_view.is_none(); - commands.entity(view_entity).insert(MeshletViewResources { - scene_meshlet_count: gpu_scene.scene_meshlet_count, - second_pass_candidates_buffer, - instance_visibility, - visibility_buffer: not_shadow_view - .then(|| texture_cache.get(&render_device, visibility_buffer)), - visibility_buffer_draw_indirect_args_first, - visibility_buffer_draw_indirect_args_second, - visibility_buffer_draw_triangle_buffer: visibility_buffer_draw_triangle_buffer.clone(), - depth_pyramid_all_mips, - depth_pyramid_mips, - depth_pyramid_mip_count, - previous_depth_pyramid, - material_depth_color: not_shadow_view - .then(|| texture_cache.get(&render_device, material_depth_color)), - material_depth: not_shadow_view - .then(|| texture_cache.get(&render_device, material_depth)), - view_size: view.viewport.zw(), - }); - } -} - -pub fn prepare_meshlet_view_bind_groups( - gpu_scene: Res, - views: Query<( - Entity, - &MeshletViewResources, - AnyOf<(&ViewDepthTexture, &ShadowView)>, - )>, - view_uniforms: Res, - previous_view_uniforms: Res, - render_device: Res, - mut commands: Commands, -) { - let ( - Some(cluster_instance_ids), - Some(cluster_meshlet_ids), - Some(view_uniforms), - Some(previous_view_uniforms), - ) = ( - gpu_scene.cluster_instance_ids.as_ref(), - gpu_scene.cluster_meshlet_ids.as_ref(), - view_uniforms.uniforms.binding(), - previous_view_uniforms.uniforms.binding(), - ) - else { - return; - }; - - let first_node = Arc::new(AtomicBool::new(true)); - - // TODO: Some of these bind groups can be reused across multiple views - for (view_entity, view_resources, view_depth) in &views { - let entries = BindGroupEntries::sequential(( - gpu_scene - .instance_meshlet_counts_prefix_sum - .binding() - .unwrap(), - gpu_scene.instance_meshlet_slice_starts.binding().unwrap(), - cluster_instance_ids.as_entire_binding(), - cluster_meshlet_ids.as_entire_binding(), - )); - let fill_cluster_buffers = render_device.create_bind_group( - "meshlet_fill_cluster_buffers", - &gpu_scene.fill_cluster_buffers_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlet_bounding_spheres.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - view_resources.instance_visibility.as_entire_binding(), - view_resources - .second_pass_candidates_buffer - .as_entire_binding(), - gpu_scene.meshlets.binding(), - view_resources - .visibility_buffer_draw_indirect_args_first - .as_entire_binding(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - &view_resources.previous_depth_pyramid, - view_uniforms.clone(), - previous_view_uniforms.clone(), - )); - let culling_first = render_device.create_bind_group( - "meshlet_culling_first_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlet_bounding_spheres.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - view_resources.instance_visibility.as_entire_binding(), - view_resources - .second_pass_candidates_buffer - .as_entire_binding(), - gpu_scene.meshlets.binding(), - view_resources - .visibility_buffer_draw_indirect_args_second - .as_entire_binding(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - &view_resources.depth_pyramid_all_mips, - view_uniforms.clone(), - previous_view_uniforms.clone(), - )); - let culling_second = render_device.create_bind_group( - "meshlet_culling_second_bind_group", - &gpu_scene.culling_bind_group_layout, - &entries, - ); - - let view_depth_texture = match view_depth { - (Some(view_depth), None) => view_depth.view(), - (None, Some(shadow_view)) => &shadow_view.depth_attachment.view, - _ => unreachable!(), - }; - let downsample_depth = render_device.create_bind_group( - "meshlet_downsample_depth_bind_group", - &gpu_scene.downsample_depth_bind_group_layout, - &BindGroupEntries::sequential(( - view_depth_texture, - &view_resources.depth_pyramid_mips[0], - &view_resources.depth_pyramid_mips[1], - &view_resources.depth_pyramid_mips[2], - &view_resources.depth_pyramid_mips[3], - &view_resources.depth_pyramid_mips[4], - &view_resources.depth_pyramid_mips[5], - &view_resources.depth_pyramid_mips[6], - &view_resources.depth_pyramid_mips[7], - &view_resources.depth_pyramid_mips[8], - &view_resources.depth_pyramid_mips[9], - &view_resources.depth_pyramid_mips[10], - &view_resources.depth_pyramid_mips[11], - &gpu_scene.depth_pyramid_sampler, - )), - ); - - let entries = BindGroupEntries::sequential(( - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlets.binding(), - gpu_scene.indices.binding(), - gpu_scene.vertex_ids.binding(), - gpu_scene.vertex_data.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - gpu_scene.instance_material_ids.binding().unwrap(), - view_resources - .visibility_buffer_draw_triangle_buffer - .as_entire_binding(), - view_uniforms.clone(), - )); - let visibility_buffer_raster = render_device.create_bind_group( - "meshlet_visibility_raster_buffer_bind_group", - &gpu_scene.visibility_buffer_raster_bind_group_layout, - &entries, - ); - - let copy_material_depth = - view_resources - .material_depth_color - .as_ref() - .map(|material_depth_color| { - render_device.create_bind_group( - "meshlet_copy_material_depth_bind_group", - &gpu_scene.copy_material_depth_bind_group_layout, - &[BindGroupEntry { - binding: 0, - resource: BindingResource::TextureView( - &material_depth_color.default_view, - ), - }], - ) - }); - - let material_draw = view_resources - .visibility_buffer - .as_ref() - .map(|visibility_buffer| { - let entries = BindGroupEntries::sequential(( - &visibility_buffer.default_view, - cluster_meshlet_ids.as_entire_binding(), - gpu_scene.meshlets.binding(), - gpu_scene.indices.binding(), - gpu_scene.vertex_ids.binding(), - gpu_scene.vertex_data.binding(), - cluster_instance_ids.as_entire_binding(), - gpu_scene.instance_uniforms.binding().unwrap(), - )); - render_device.create_bind_group( - "meshlet_mesh_material_draw_bind_group", - &gpu_scene.material_draw_bind_group_layout, - &entries, - ) - }); - - commands.entity(view_entity).insert(MeshletViewBindGroups { - first_node: Arc::clone(&first_node), - fill_cluster_buffers, - culling_first, - culling_second, - downsample_depth, - visibility_buffer_raster, - copy_material_depth, - material_draw, - }); - } -} - -/// A resource that manages GPU data for rendering [`MeshletMesh`]'s. -#[derive(Resource)] -pub struct MeshletGpuScene { - vertex_data: PersistentGpuBuffer>, - vertex_ids: PersistentGpuBuffer>, - indices: PersistentGpuBuffer>, - meshlets: PersistentGpuBuffer>, - meshlet_bounding_spheres: PersistentGpuBuffer>, - meshlet_mesh_slices: HashMap, ([Range; 5], u64)>, - - scene_meshlet_count: u32, - scene_triangle_count: u64, - next_material_id: u32, - material_id_lookup: HashMap, - material_ids_present_in_scene: HashSet, - /// Per-instance [`Entity`], [`RenderLayers`], and [`NotShadowCaster`] - instances: Vec<(Entity, RenderLayers, bool)>, - /// Per-instance transforms, model matrices, and render flags - instance_uniforms: StorageBuffer>, - /// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support. - view_instance_visibility: EntityHashMap>>, - instance_material_ids: StorageBuffer>, - instance_meshlet_counts_prefix_sum: StorageBuffer>, - instance_meshlet_slice_starts: StorageBuffer>, - cluster_instance_ids: Option, - cluster_meshlet_ids: Option, - second_pass_candidates_buffer: Option, - previous_depth_pyramids: EntityHashMap, - visibility_buffer_draw_triangle_buffer: Option, - - fill_cluster_buffers_bind_group_layout: BindGroupLayout, - culling_bind_group_layout: BindGroupLayout, - visibility_buffer_raster_bind_group_layout: BindGroupLayout, - downsample_depth_bind_group_layout: BindGroupLayout, - copy_material_depth_bind_group_layout: BindGroupLayout, - material_draw_bind_group_layout: BindGroupLayout, - depth_pyramid_sampler: Sampler, - depth_pyramid_dummy_texture: TextureView, -} - -impl FromWorld for MeshletGpuScene { - fn from_world(world: &mut World) -> Self { - let render_device = world.resource::(); - - Self { - vertex_data: PersistentGpuBuffer::new("meshlet_vertex_data", render_device), - vertex_ids: PersistentGpuBuffer::new("meshlet_vertex_ids", render_device), - indices: PersistentGpuBuffer::new("meshlet_indices", render_device), - meshlets: PersistentGpuBuffer::new("meshlets", render_device), - meshlet_bounding_spheres: PersistentGpuBuffer::new( - "meshlet_bounding_spheres", - render_device, - ), - meshlet_mesh_slices: HashMap::new(), - - scene_meshlet_count: 0, - scene_triangle_count: 0, - next_material_id: 0, - material_id_lookup: HashMap::new(), - material_ids_present_in_scene: HashSet::new(), - instances: Vec::new(), - instance_uniforms: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_uniforms")); - buffer - }, - view_instance_visibility: EntityHashMap::default(), - instance_material_ids: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_material_ids")); - buffer - }, - instance_meshlet_counts_prefix_sum: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum")); - buffer - }, - instance_meshlet_slice_starts: { - let mut buffer = StorageBuffer::default(); - buffer.set_label(Some("meshlet_instance_meshlet_slice_starts")); - buffer - }, - cluster_instance_ids: None, - cluster_meshlet_ids: None, - second_pass_candidates_buffer: None, - previous_depth_pyramids: EntityHashMap::default(), - visibility_buffer_draw_triangle_buffer: None, - - // TODO: Buffer min sizes - fill_cluster_buffers_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_fill_cluster_buffers_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), - ), - ), - ), - culling_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_culling_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_sized(false, None), - storage_buffer_sized(false, None), - texture_2d(TextureSampleType::Float { filterable: false }), - uniform_buffer::(true), - uniform_buffer::(true), - ), - ), - ), - downsample_depth_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_downsample_depth_bind_group_layout", - &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { - let write_only_r32float = || { - texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) - }; - ( - texture_depth_2d(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - texture_storage_2d( - TextureFormat::R32Float, - StorageTextureAccess::ReadWrite, - ), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - write_only_r32float(), - sampler(SamplerBindingType::NonFiltering), - ) - }), - ), - visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_visibility_buffer_raster_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::VERTEX, - ( - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - uniform_buffer::(true), - ), - ), - ), - copy_material_depth_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_copy_material_depth_bind_group_layout", - &BindGroupLayoutEntries::single( - ShaderStages::FRAGMENT, - texture_2d(TextureSampleType::Uint), - ), - ), - material_draw_bind_group_layout: render_device.create_bind_group_layout( - "meshlet_mesh_material_draw_bind_group_layout", - &BindGroupLayoutEntries::sequential( - ShaderStages::FRAGMENT, - ( - texture_2d(TextureSampleType::Uint), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - storage_buffer_read_only_sized(false, None), - ), - ), - ), - depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { - label: Some("meshlet_depth_pyramid_sampler"), - ..default() - }), - depth_pyramid_dummy_texture: render_device - .create_texture(&TextureDescriptor { - label: Some("meshlet_depth_pyramid_dummy_texture"), - size: Extent3d { - width: 1, - height: 1, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: TextureDimension::D2, - format: TextureFormat::R32Float, - usage: TextureUsages::STORAGE_BINDING, - view_formats: &[], - }) - .create_view(&TextureViewDescriptor { - label: Some("meshlet_depth_pyramid_dummy_texture_view"), - format: Some(TextureFormat::R32Float), - dimension: Some(TextureViewDimension::D2), - aspect: TextureAspect::All, - base_mip_level: 0, - mip_level_count: Some(1), - base_array_layer: 0, - array_layer_count: Some(1), - }), - } - } -} - -impl MeshletGpuScene { - /// Clear per-frame CPU->GPU upload buffers and reset all per-frame data. - fn reset(&mut self) { - // TODO: Shrink capacity if saturation is low - self.scene_meshlet_count = 0; - self.scene_triangle_count = 0; - self.next_material_id = 0; - self.material_id_lookup.clear(); - self.material_ids_present_in_scene.clear(); - self.instances.clear(); - self.view_instance_visibility - .values_mut() - .for_each(|b| b.get_mut().clear()); - self.instance_uniforms.get_mut().clear(); - self.instance_material_ids.get_mut().clear(); - self.instance_meshlet_counts_prefix_sum.get_mut().clear(); - self.instance_meshlet_slice_starts.get_mut().clear(); - // TODO: Remove unused entries for view_instance_visibility and previous_depth_pyramids - } - - fn queue_meshlet_mesh_upload( - &mut self, - instance: Entity, - render_layers: RenderLayers, - not_shadow_caster: bool, - handle: &Handle, - assets: &mut Assets, - ) { - let queue_meshlet_mesh = |asset_id: &AssetId| { - let meshlet_mesh = assets.remove_untracked(*asset_id).expect( - "MeshletMesh asset was already unloaded but is not registered with MeshletGpuScene", - ); - - let vertex_data_slice = self - .vertex_data - .queue_write(Arc::clone(&meshlet_mesh.vertex_data), ()); - let vertex_ids_slice = self.vertex_ids.queue_write( - Arc::clone(&meshlet_mesh.vertex_ids), - vertex_data_slice.start, - ); - let indices_slice = self - .indices - .queue_write(Arc::clone(&meshlet_mesh.indices), ()); - let meshlets_slice = self.meshlets.queue_write( - Arc::clone(&meshlet_mesh.meshlets), - (vertex_ids_slice.start, indices_slice.start), - ); - let meshlet_bounding_spheres_slice = self - .meshlet_bounding_spheres - .queue_write(Arc::clone(&meshlet_mesh.bounding_spheres), ()); - - ( - [ - vertex_data_slice, - vertex_ids_slice, - indices_slice, - meshlets_slice, - meshlet_bounding_spheres_slice, - ], - meshlet_mesh.worst_case_meshlet_triangles, - ) - }; - - // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading - let ([_, _, _, meshlets_slice, _], triangle_count) = self - .meshlet_mesh_slices - .entry(handle.id()) - .or_insert_with_key(queue_meshlet_mesh) - .clone(); - - let meshlets_slice = (meshlets_slice.start as u32 / size_of::() as u32) - ..(meshlets_slice.end as u32 / size_of::() as u32); - - // Append instance data for this frame - self.instances - .push((instance, render_layers, not_shadow_caster)); - self.instance_material_ids.get_mut().push(0); - self.instance_meshlet_counts_prefix_sum - .get_mut() - .push(self.scene_meshlet_count); - self.instance_meshlet_slice_starts - .get_mut() - .push(meshlets_slice.start); - - self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start; - self.scene_triangle_count += triangle_count; - } - - /// Get the depth value for use with the material depth texture for a given [`Material`] asset. - pub fn get_material_id(&mut self, material_id: UntypedAssetId) -> u32 { - *self - .material_id_lookup - .entry(material_id) - .or_insert_with(|| { - self.next_material_id += 1; - self.next_material_id - }) - } - - pub fn material_present_in_scene(&self, material_id: &u32) -> bool { - self.material_ids_present_in_scene.contains(material_id) - } - - pub fn fill_cluster_buffers_bind_group_layout(&self) -> BindGroupLayout { - self.fill_cluster_buffers_bind_group_layout.clone() - } - - pub fn culling_bind_group_layout(&self) -> BindGroupLayout { - self.culling_bind_group_layout.clone() - } - - pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout { - self.downsample_depth_bind_group_layout.clone() - } - - pub fn visibility_buffer_raster_bind_group_layout(&self) -> BindGroupLayout { - self.visibility_buffer_raster_bind_group_layout.clone() - } - - pub fn copy_material_depth_bind_group_layout(&self) -> BindGroupLayout { - self.copy_material_depth_bind_group_layout.clone() - } - - pub fn material_draw_bind_group_layout(&self) -> BindGroupLayout { - self.material_draw_bind_group_layout.clone() - } -} - -#[derive(Component)] -pub struct MeshletViewResources { - pub scene_meshlet_count: u32, - pub second_pass_candidates_buffer: Buffer, - instance_visibility: Buffer, - pub visibility_buffer: Option, - pub visibility_buffer_draw_indirect_args_first: Buffer, - pub visibility_buffer_draw_indirect_args_second: Buffer, - visibility_buffer_draw_triangle_buffer: Buffer, - depth_pyramid_all_mips: TextureView, - depth_pyramid_mips: [TextureView; 12], - pub depth_pyramid_mip_count: u32, - previous_depth_pyramid: TextureView, - pub material_depth_color: Option, - pub material_depth: Option, - pub view_size: UVec2, -} - -#[derive(Component)] -pub struct MeshletViewBindGroups { - pub first_node: Arc, - pub fill_cluster_buffers: BindGroup, - pub culling_first: BindGroup, - pub culling_second: BindGroup, - pub downsample_depth: BindGroup, - pub visibility_buffer_raster: BindGroup, - pub copy_material_depth: Option, - pub material_draw: Option, -} diff --git a/crates/bevy_pbr/src/meshlet/instance_manager.rs b/crates/bevy_pbr/src/meshlet/instance_manager.rs new file mode 100644 index 0000000000000..0f370f2200459 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/instance_manager.rs @@ -0,0 +1,261 @@ +use super::{meshlet_mesh_manager::MeshletMeshManager, MeshletMesh}; +use crate::{ + Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, + PreviousGlobalTransform, RenderMaterialInstances, +}; +use bevy_asset::{AssetEvent, AssetServer, Assets, Handle, UntypedAssetId}; +use bevy_ecs::{ + entity::{Entities, Entity, EntityHashMap}, + event::EventReader, + query::Has, + system::{Local, Query, Res, ResMut, Resource, SystemState}, +}; +use bevy_render::{render_resource::StorageBuffer, view::RenderLayers, MainWorld}; +use bevy_transform::components::GlobalTransform; +use bevy_utils::{HashMap, HashSet}; +use std::ops::{DerefMut, Range}; + +/// Manages data for each entity with a [`MeshletMesh`]. +#[derive(Resource)] +pub struct InstanceManager { + /// Amount of clusters in the scene (sum of all meshlet counts across all instances) + pub scene_cluster_count: u32, + + /// Per-instance [`Entity`], [`RenderLayers`], and [`NotShadowCaster`] + pub instances: Vec<(Entity, RenderLayers, bool)>, + /// Per-instance [`MeshUniform`] + pub instance_uniforms: StorageBuffer>, + /// Per-instance material ID + pub instance_material_ids: StorageBuffer>, + /// Prefix-sum of meshlet counts per instance + pub instance_meshlet_counts_prefix_sum: StorageBuffer>, + /// Per-instance index to the start of the instance's slice of the meshlets buffer + pub instance_meshlet_slice_starts: StorageBuffer>, + /// Per-view per-instance visibility bit. Used for [`RenderLayers`] and [`NotShadowCaster`] support. + pub view_instance_visibility: EntityHashMap>>, + + /// Next material ID available for a [`Material`] + next_material_id: u32, + /// Map of [`Material`] to material ID + material_id_lookup: HashMap, + /// Set of material IDs used in the scene + material_ids_present_in_scene: HashSet, +} + +impl InstanceManager { + pub fn new() -> Self { + Self { + scene_cluster_count: 0, + + instances: Vec::new(), + instance_uniforms: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_uniforms")); + buffer + }, + instance_material_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_material_ids")); + buffer + }, + instance_meshlet_counts_prefix_sum: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_meshlet_counts_prefix_sum")); + buffer + }, + instance_meshlet_slice_starts: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_meshlet_slice_starts")); + buffer + }, + view_instance_visibility: EntityHashMap::default(), + + next_material_id: 0, + material_id_lookup: HashMap::new(), + material_ids_present_in_scene: HashSet::new(), + } + } + + #[allow(clippy::too_many_arguments)] + pub fn add_instance( + &mut self, + instance: Entity, + meshlets_slice: Range, + transform: &GlobalTransform, + previous_transform: Option<&PreviousGlobalTransform>, + render_layers: Option<&RenderLayers>, + not_shadow_receiver: bool, + not_shadow_caster: bool, + ) { + // Build a MeshUniform for the instance + let transform = transform.affine(); + let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); + let mut flags = if not_shadow_receiver { + MeshFlags::empty() + } else { + MeshFlags::SHADOW_RECEIVER + }; + if transform.matrix3.determinant().is_sign_positive() { + flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; + } + let transforms = MeshTransforms { + world_from_local: (&transform).into(), + previous_world_from_local: (&previous_transform).into(), + flags: flags.bits(), + }; + let mesh_uniform = MeshUniform::new(&transforms, 0, None); + + // Append instance data + self.instances.push(( + instance, + render_layers.cloned().unwrap_or(RenderLayers::default()), + not_shadow_caster, + )); + self.instance_uniforms.get_mut().push(mesh_uniform); + self.instance_material_ids.get_mut().push(0); + self.instance_meshlet_counts_prefix_sum + .get_mut() + .push(self.scene_cluster_count); + self.instance_meshlet_slice_starts + .get_mut() + .push(meshlets_slice.start); + + self.scene_cluster_count += meshlets_slice.end - meshlets_slice.start; + } + + /// Get the material ID for a [`crate::Material`]. + pub fn get_material_id(&mut self, material_asset_id: UntypedAssetId) -> u32 { + *self + .material_id_lookup + .entry(material_asset_id) + .or_insert_with(|| { + self.next_material_id += 1; + self.next_material_id + }) + } + + pub fn material_present_in_scene(&self, material_id: &u32) -> bool { + self.material_ids_present_in_scene.contains(material_id) + } + + pub fn reset(&mut self, entities: &Entities) { + self.scene_cluster_count = 0; + + self.instances.clear(); + self.instance_uniforms.get_mut().clear(); + self.instance_material_ids.get_mut().clear(); + self.instance_meshlet_counts_prefix_sum.get_mut().clear(); + self.instance_meshlet_slice_starts.get_mut().clear(); + self.view_instance_visibility + .retain(|view_entity, _| entities.contains(*view_entity)); + self.view_instance_visibility + .values_mut() + .for_each(|b| b.get_mut().clear()); + + self.next_material_id = 0; + self.material_id_lookup.clear(); + self.material_ids_present_in_scene.clear(); + } +} + +pub fn extract_meshlet_mesh_entities( + mut meshlet_mesh_manager: ResMut, + mut instance_manager: ResMut, + // TODO: Replace main_world and system_state when Extract>> is possible + mut main_world: ResMut, + mut system_state: Local< + Option< + SystemState<( + Query<( + Entity, + &Handle, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + EventReader>, + &Entities, + )>, + >, + >, +) { + // Get instances query + if system_state.is_none() { + *system_state = Some(SystemState::new(&mut main_world)); + } + let system_state = system_state.as_mut().unwrap(); + let (instances_query, asset_server, mut assets, mut asset_events, entities) = + system_state.get_mut(&mut main_world); + + // Reset per-frame data + instance_manager.reset(entities); + + // Free GPU buffer space for any modified or dropped MeshletMesh assets + for asset_event in asset_events.read() { + if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { + meshlet_mesh_manager.remove(id); + } + } + + // Iterate over every instance + for ( + instance, + meshlet_mesh, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ) in &instances_query + { + // Skip instances with an unloaded MeshletMesh asset + // TODO: This is a semi-expensive check + if asset_server.is_managed(meshlet_mesh.id()) + && !asset_server.is_loaded_with_dependencies(meshlet_mesh.id()) + { + continue; + } + + // Upload the instance's MeshletMesh asset data if not done already done + let meshlets_slice = + meshlet_mesh_manager.queue_upload_if_needed(meshlet_mesh.id(), &mut assets); + + // Add the instance's data to the instance manager + instance_manager.add_instance( + instance, + meshlets_slice, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ); + } +} + +/// For each entity in the scene, record what material ID its material was assigned in the `prepare_material_meshlet_meshes` systems, +/// and note that the material is used by at least one entity in the scene. +pub fn queue_material_meshlet_meshes( + mut instance_manager: ResMut, + render_material_instances: Res>, +) { + let instance_manager = instance_manager.deref_mut(); + + for (i, (instance, _, _)) in instance_manager.instances.iter().enumerate() { + if let Some(material_asset_id) = render_material_instances.get(instance) { + if let Some(material_id) = instance_manager + .material_id_lookup + .get(&material_asset_id.untyped()) + { + instance_manager + .material_ids_present_in_scene + .insert(*material_id); + instance_manager.instance_material_ids.get_mut()[i] = *material_id; + } + } + } +} diff --git a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs b/crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs similarity index 95% rename from crates/bevy_pbr/src/meshlet/material_draw_prepare.rs rename to crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs index f572edc0c7df4..1a5c3e2d56207 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs +++ b/crates/bevy_pbr/src/meshlet/material_pipeline_prepare.rs @@ -1,4 +1,7 @@ -use super::{MeshletGpuScene, MESHLET_MESH_MATERIAL_SHADER_HANDLE}; +use super::{ + instance_manager::InstanceManager, resource_manager::ResourceManager, + MESHLET_MESH_MATERIAL_SHADER_HANDLE, +}; use crate::{environment_map::EnvironmentMapLight, irradiance_volume::IrradianceVolume, *}; use bevy_asset::AssetServer; use bevy_core_pipeline::{ @@ -22,10 +25,11 @@ use std::hash::Hash; pub struct MeshletViewMaterialsMainOpaquePass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); /// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletMainOpaquePass3dNode`], -/// and register the material with [`MeshletGpuScene`]. +/// and register the material with [`InstanceManager`]. #[allow(clippy::too_many_arguments)] pub fn prepare_material_meshlet_meshes_main_opaque_pass( - mut gpu_scene: ResMut, + resource_manager: ResMut, + mut instance_manager: ResMut, mut cache: Local>, pipeline_cache: Res, material_pipeline: Res>, @@ -167,7 +171,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( label: material_pipeline_descriptor.label, layout: vec![ mesh_pipeline.get_view_layout(view_key.into()).clone(), - gpu_scene.material_draw_bind_group_layout(), + resource_manager.material_shade_bind_group_layout.clone(), material_pipeline.material_layout.clone(), ], push_constant_ranges: vec![], @@ -198,7 +202,7 @@ pub fn prepare_material_meshlet_meshes_main_opaque_pass( }), }; - let material_id = gpu_scene.get_material_id(material_id.untyped()); + let material_id = instance_manager.get_material_id(material_id.untyped()); let pipeline_id = *cache.entry(view_key).or_insert_with(|| { pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) @@ -219,10 +223,11 @@ pub struct MeshletViewMaterialsDeferredGBufferPrepass( ); /// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletPrepassNode`], -/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`MeshletGpuScene`]. +/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`InstanceManager`]. #[allow(clippy::too_many_arguments)] pub fn prepare_material_meshlet_meshes_prepass( - mut gpu_scene: ResMut, + resource_manager: ResMut, + mut instance_manager: ResMut, mut cache: Local>, pipeline_cache: Res, prepass_pipeline: Res>, @@ -319,7 +324,7 @@ pub fn prepare_material_meshlet_meshes_prepass( label: material_pipeline_descriptor.label, layout: vec![ view_layout, - gpu_scene.material_draw_bind_group_layout(), + resource_manager.material_shade_bind_group_layout.clone(), prepass_pipeline.material_layout.clone(), ], push_constant_ranges: vec![], @@ -350,7 +355,7 @@ pub fn prepare_material_meshlet_meshes_prepass( }), }; - let material_id = gpu_scene.get_material_id(material_id.untyped()); + let material_id = instance_manager.get_material_id(material_id.untyped()); let pipeline_id = *cache.entry(view_key).or_insert_with(|| { pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) diff --git a/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs b/crates/bevy_pbr/src/meshlet/material_shade_nodes.rs similarity index 91% rename from crates/bevy_pbr/src/meshlet/material_draw_nodes.rs rename to crates/bevy_pbr/src/meshlet/material_shade_nodes.rs index e7b71ea25366d..9c2d432d8856a 100644 --- a/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs +++ b/crates/bevy_pbr/src/meshlet/material_shade_nodes.rs @@ -1,10 +1,10 @@ use super::{ - gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, - material_draw_prepare::{ + material_pipeline_prepare::{ MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, MeshletViewMaterialsPrepass, }, - MeshletGpuScene, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, + InstanceManager, }; use crate::{ MeshViewBindGroup, PrepassViewBindGroup, ViewEnvironmentMapUniformOffset, ViewFogUniformOffset, @@ -72,15 +72,15 @@ impl ViewNode for MeshletMainOpaquePass3dNode { } let ( - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -116,13 +116,13 @@ impl ViewNode for MeshletMainOpaquePass3dNode { **view_environment_map_offset, ], ); - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { @@ -175,16 +175,16 @@ impl ViewNode for MeshletPrepassNode { let ( Some(prepass_view_bind_group), - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( world.get_resource::(), - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -239,13 +239,13 @@ impl ViewNode for MeshletPrepassNode { ); } - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { @@ -298,16 +298,16 @@ impl ViewNode for MeshletDeferredGBufferPrepassNode { let ( Some(prepass_view_bind_group), - Some(meshlet_gpu_scene), + Some(instance_manager), Some(pipeline_cache), Some(meshlet_material_depth), - Some(meshlet_material_draw_bind_group), + Some(meshlet_material_shade_bind_group), ) = ( world.get_resource::(), - world.get_resource::(), + world.get_resource::(), world.get_resource::(), meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.material_draw.as_ref(), + meshlet_view_bind_groups.material_shade.as_ref(), ) else { return Ok(()); @@ -367,13 +367,13 @@ impl ViewNode for MeshletDeferredGBufferPrepassNode { ); } - render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + render_pass.set_bind_group(1, meshlet_material_shade_bind_group, &[]); // 1 fullscreen triangle draw per material for (material_id, material_pipeline_id, material_bind_group) in meshlet_view_materials.iter() { - if meshlet_gpu_scene.material_present_in_scene(material_id) { + if instance_manager.material_present_in_scene(material_id) { if let Some(material_pipeline) = pipeline_cache.get_render_pipeline(*material_pipeline_id) { diff --git a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl index a3f18cbc9b29e..f70252b28e328 100644 --- a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl +++ b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl @@ -30,6 +30,7 @@ fn unpack_meshlet_vertex(packed: PackedMeshletVertex) -> MeshletVertex { struct Meshlet { start_vertex_id: u32, start_index_id: u32, + vertex_count: u32, triangle_count: u32, } @@ -44,9 +45,15 @@ struct MeshletBoundingSphere { radius: f32, } +struct DispatchIndirectArgs { + x: atomic, + y: u32, + z: u32, +} + struct DrawIndirectArgs { - vertex_count: atomic, - instance_count: u32, + vertex_count: u32, + instance_count: atomic, first_vertex: u32, first_instance: u32, } @@ -60,15 +67,16 @@ var cluster_count: u32; #endif #ifdef MESHLET_CULLING_PASS +var meshlet_raster_cluster_rightmost_slot: u32; @group(0) @binding(0) var meshlet_cluster_meshlet_ids: array; // Per cluster @group(0) @binding(1) var meshlet_bounding_spheres: array; // Per meshlet @group(0) @binding(2) var meshlet_cluster_instance_ids: array; // Per cluster @group(0) @binding(3) var meshlet_instance_uniforms: array; // Per entity instance @group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask @group(0) @binding(5) var meshlet_second_pass_candidates: array>; // 1 bit per cluster , packed as a bitmask -@group(0) @binding(6) var meshlets: array; // Per meshlet -@group(0) @binding(7) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(6) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(7) var meshlet_hardware_raster_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(8) var meshlet_raster_clusters: array; // Single object shared between all workgroups/clusters/triangles @group(0) @binding(9) var depth_pyramid: texture_2d; // From the end of the last frame for the first culling pass, and from the first raster pass for the second culling pass @group(0) @binding(10) var view: View; @group(0) @binding(11) var previous_view: PreviousViewUniforms; @@ -79,6 +87,7 @@ fn should_cull_instance(instance_id: u32) -> bool { return bool(extractBits(packed_visibility, bit_offset, 1u)); } +// TODO: Load 4x per workgroup instead of once per thread? fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool { let packed_candidates = meshlet_second_pass_candidates[cluster_id / 32u]; let bit_offset = cluster_id % 32u; @@ -94,10 +103,16 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool { @group(0) @binding(4) var meshlet_vertex_data: array; // Many per meshlet @group(0) @binding(5) var meshlet_cluster_instance_ids: array; // Per cluster @group(0) @binding(6) var meshlet_instance_uniforms: array; // Per entity instance -@group(0) @binding(7) var meshlet_instance_material_ids: array; // Per entity instance -@group(0) @binding(8) var draw_triangle_buffer: array; // Single object shared between all workgroups/meshlets/triangles -@group(0) @binding(9) var view: View; +@group(0) @binding(7) var meshlet_raster_clusters: array; // Single object shared between all workgroups/clusters/triangles +@group(0) @binding(8) var meshlet_software_raster_cluster_count: u32; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(9) var meshlet_visibility_buffer: array>; // Per pixel +#else +@group(0) @binding(9) var meshlet_visibility_buffer: array>; // Per pixel +#endif +@group(0) @binding(10) var view: View; +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? fn get_meshlet_index(index_id: u32) -> u32 { let packed_index = meshlet_indices[index_id / 4u]; let bit_offset = (index_id % 4u) * 8u; @@ -106,7 +121,7 @@ fn get_meshlet_index(index_id: u32) -> u32 { #endif #ifdef MESHLET_MESH_MATERIAL_PASS -@group(1) @binding(0) var meshlet_visibility_buffer: texture_2d; // Generated from the meshlet raster passes +@group(1) @binding(0) var meshlet_visibility_buffer: array; // Per pixel @group(1) @binding(1) var meshlet_cluster_meshlet_ids: array; // Per cluster @group(1) @binding(2) var meshlets: array; // Per meshlet @group(1) @binding(3) var meshlet_indices: array; // Many per meshlet @@ -115,6 +130,7 @@ fn get_meshlet_index(index_id: u32) -> u32 { @group(1) @binding(6) var meshlet_cluster_instance_ids: array; // Per cluster @group(1) @binding(7) var meshlet_instance_uniforms: array; // Per entity instance +// TODO: Load only twice, instead of 3x in cases where you load 3 indices per thread? fn get_meshlet_index(index_id: u32) -> u32 { let packed_index = meshlet_indices[index_id / 4u]; let bit_offset = (index_id % 4u) * 8u; diff --git a/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs b/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs new file mode 100644 index 0000000000000..03855ec039a53 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/meshlet_mesh_manager.rs @@ -0,0 +1,132 @@ +use super::{ + asset::{Meshlet, MeshletBoundingSpheres}, + persistent_buffer::PersistentGpuBuffer, + MeshletMesh, +}; +use bevy_asset::{AssetId, Assets}; +use bevy_ecs::{ + system::{Res, ResMut, Resource}, + world::{FromWorld, World}, +}; +use bevy_render::{ + render_resource::BufferAddress, + renderer::{RenderDevice, RenderQueue}, +}; +use bevy_utils::HashMap; +use std::{mem::size_of, ops::Range, sync::Arc}; + +/// Manages uploading [`MeshletMesh`] asset data to the GPU. +#[derive(Resource)] +pub struct MeshletMeshManager { + pub vertex_data: PersistentGpuBuffer>, + pub vertex_ids: PersistentGpuBuffer>, + pub indices: PersistentGpuBuffer>, + pub meshlets: PersistentGpuBuffer>, + pub meshlet_bounding_spheres: PersistentGpuBuffer>, + meshlet_mesh_slices: HashMap, [Range; 5]>, +} + +impl FromWorld for MeshletMeshManager { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource::(); + Self { + vertex_data: PersistentGpuBuffer::new("meshlet_vertex_data", render_device), + vertex_ids: PersistentGpuBuffer::new("meshlet_vertex_ids", render_device), + indices: PersistentGpuBuffer::new("meshlet_indices", render_device), + meshlets: PersistentGpuBuffer::new("meshlets", render_device), + meshlet_bounding_spheres: PersistentGpuBuffer::new( + "meshlet_bounding_spheres", + render_device, + ), + meshlet_mesh_slices: HashMap::new(), + } + } +} + +impl MeshletMeshManager { + pub fn queue_upload_if_needed( + &mut self, + asset_id: AssetId, + assets: &mut Assets, + ) -> Range { + let queue_meshlet_mesh = |asset_id: &AssetId| { + let meshlet_mesh = assets.remove_untracked(*asset_id).expect( + "MeshletMesh asset was already unloaded but is not registered with MeshletMeshManager", + ); + + let vertex_data_slice = self + .vertex_data + .queue_write(Arc::clone(&meshlet_mesh.vertex_data), ()); + let vertex_ids_slice = self.vertex_ids.queue_write( + Arc::clone(&meshlet_mesh.vertex_ids), + vertex_data_slice.start, + ); + let indices_slice = self + .indices + .queue_write(Arc::clone(&meshlet_mesh.indices), ()); + let meshlets_slice = self.meshlets.queue_write( + Arc::clone(&meshlet_mesh.meshlets), + (vertex_ids_slice.start, indices_slice.start), + ); + let meshlet_bounding_spheres_slice = self + .meshlet_bounding_spheres + .queue_write(Arc::clone(&meshlet_mesh.bounding_spheres), ()); + + [ + vertex_data_slice, + vertex_ids_slice, + indices_slice, + meshlets_slice, + meshlet_bounding_spheres_slice, + ] + }; + + // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading + let [_, _, _, meshlets_slice, _] = self + .meshlet_mesh_slices + .entry(asset_id) + .or_insert_with_key(queue_meshlet_mesh) + .clone(); + + let meshlets_slice_start = meshlets_slice.start as u32 / size_of::() as u32; + let meshlets_slice_end = meshlets_slice.end as u32 / size_of::() as u32; + meshlets_slice_start..meshlets_slice_end + } + + pub fn remove(&mut self, asset_id: &AssetId) { + if let Some( + [vertex_data_slice, vertex_ids_slice, indices_slice, meshlets_slice, meshlet_bounding_spheres_slice], + ) = self.meshlet_mesh_slices.remove(asset_id) + { + self.vertex_data.mark_slice_unused(vertex_data_slice); + self.vertex_ids.mark_slice_unused(vertex_ids_slice); + self.indices.mark_slice_unused(indices_slice); + self.meshlets.mark_slice_unused(meshlets_slice); + self.meshlet_bounding_spheres + .mark_slice_unused(meshlet_bounding_spheres_slice); + } + } +} + +/// Upload all newly queued [`MeshletMesh`] asset data to the GPU. +pub fn perform_pending_meshlet_mesh_writes( + mut meshlet_mesh_manager: ResMut, + render_queue: Res, + render_device: Res, +) { + meshlet_mesh_manager + .vertex_data + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .vertex_ids + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .indices + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlets + .perform_writes(&render_queue, &render_device); + meshlet_mesh_manager + .meshlet_bounding_spheres + .perform_writes(&render_queue, &render_device); +} diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs index a55dc42247a80..d61dc05c5671f 100644 --- a/crates/bevy_pbr/src/meshlet/mod.rs +++ b/crates/bevy_pbr/src/meshlet/mod.rs @@ -3,12 +3,14 @@ mod asset; #[cfg(feature = "meshlet_processor")] mod from_mesh; -mod gpu_scene; -mod material_draw_nodes; -mod material_draw_prepare; +mod instance_manager; +mod material_pipeline_prepare; +mod material_shade_nodes; +mod meshlet_mesh_manager; mod persistent_buffer; mod persistent_buffer_impls; mod pipelines; +mod resource_manager; mod visibility_buffer_raster_node; pub mod graph { @@ -24,8 +26,8 @@ pub mod graph { } pub(crate) use self::{ - gpu_scene::{queue_material_meshlet_meshes, MeshletGpuScene}, - material_draw_prepare::{ + instance_manager::{queue_material_meshlet_meshes, InstanceManager}, + material_pipeline_prepare::{ prepare_material_meshlet_meshes_main_opaque_pass, prepare_material_meshlet_meshes_prepass, }, }; @@ -35,22 +37,19 @@ pub use self::asset::{MeshletMesh, MeshletMeshSaverLoader}; pub use self::from_mesh::MeshToMeshletMeshConversionError; use self::{ - gpu_scene::{ - extract_meshlet_meshes, perform_pending_meshlet_mesh_writes, - prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, - }, graph::NodeMeshlet, - material_draw_nodes::{ - MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, - }, - material_draw_prepare::{ + instance_manager::extract_meshlet_mesh_entities, + material_pipeline_prepare::{ MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, MeshletViewMaterialsPrepass, }, - pipelines::{ - MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE, - MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE, - MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + material_shade_nodes::{ + MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, + }, + meshlet_mesh_manager::{perform_pending_meshlet_mesh_writes, MeshletMeshManager}, + pipelines::*, + resource_manager::{ + prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, ResourceManager, }, visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, }; @@ -58,10 +57,7 @@ use crate::{graph::NodePbr, Material}; use bevy_app::{App, Plugin, PostUpdate}; use bevy_asset::{load_internal_asset, AssetApp, Handle}; use bevy_core_pipeline::{ - core_3d::{ - graph::{Core3d, Node3d}, - Camera3d, - }, + core_3d::graph::{Core3d, Node3d}, prepass::{DeferredPrepass, MotionVectorPrepass, NormalPrepass}, }; use bevy_ecs::{ @@ -74,7 +70,7 @@ use bevy_ecs::{ }; use bevy_render::{ render_graph::{RenderGraphApp, ViewNodeRunner}, - render_resource::{Shader, TextureUsages}, + render_resource::Shader, renderer::RenderDevice, settings::WgpuFeatures, view::{ @@ -84,6 +80,7 @@ use bevy_render::{ ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_transform::components::{GlobalTransform, Transform}; +use bevy_utils::tracing::error; const MESHLET_BINDINGS_SHADER_HANDLE: Handle = Handle::weak_from_u128(1325134235233421); const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle = @@ -96,26 +93,46 @@ const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle = /// /// In comparison to Bevy's standard renderer: /// * Much more efficient culling. Meshlets can be culled individually, instead of all or nothing culling for entire meshes at a time. -/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. -/// * Much more efficient batching. All geometry can be rasterized in a single indirect draw. +/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. +/// * Much more efficient batching. All geometry can be rasterized in a single draw. /// * Scales better with large amounts of dense geometry and overdraw. Bevy's standard renderer will bottleneck sooner. /// * Near-seamless level of detail (LOD). -/// * Much greater base overhead. Rendering will be slower than Bevy's standard renderer with small amounts of geometry and overdraw. -/// * Much greater memory usage. +/// * Much greater base overhead. Rendering will be slower and use more memory than Bevy's standard renderer +/// with small amounts of geometry and overdraw. /// * Requires preprocessing meshes. See [`MeshletMesh`] for details. /// * Limitations on the kinds of materials you can use. See [`MeshletMesh`] for details. /// +/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX`]. +/// +/// This plugin currently works only on the Vulkan backend. +/// /// This plugin is not compatible with [`Msaa`]. Any camera rendering a [`MeshletMesh`] must have /// [`Msaa`] set to [`Msaa::Off`]. /// -/// This plugin does not work on Wasm. -/// /// Mixing forward+prepass and deferred rendering for opaque materials is not currently supported when using this plugin. /// You must use one or the other by setting [`crate::DefaultOpaqueRendererMethod`]. /// Do not override [`crate::Material::opaque_render_method`] for any material when using this plugin. /// /// ![A render of the Stanford dragon as a `MeshletMesh`](https://raw.githubusercontent.com/bevyengine/bevy/main/crates/bevy_pbr/src/meshlet/meshlet_preview.png) -pub struct MeshletPlugin; +pub struct MeshletPlugin { + /// The maximum amount of clusters that can be processed at once, + /// used to control the size of a pre-allocated GPU buffer. + /// + /// If this number is too low, you'll see rendering artifacts like missing or blinking meshes. + /// + /// Each cluster slot costs 4 bytes of VRAM. + pub cluster_buffer_slots: u32, +} + +impl MeshletPlugin { + /// [`WgpuFeatures`] required for this plugin to function. + pub fn required_wgpu_features() -> WgpuFeatures { + WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX + | WgpuFeatures::SHADER_INT64 + | WgpuFeatures::SUBGROUP + | WgpuFeatures::PUSH_CONSTANTS + } +} impl Plugin for MeshletPlugin { fn build(&self, app: &mut App) { @@ -154,8 +171,14 @@ impl Plugin for MeshletPlugin { ); load_internal_asset!( app, - MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, - "visibility_buffer_raster.wgsl", + MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + "visibility_buffer_software_raster.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, + "visibility_buffer_hardware_raster.wgsl", Shader::from_wgsl ); load_internal_asset!( @@ -166,8 +189,14 @@ impl Plugin for MeshletPlugin { ); load_internal_asset!( app, - MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, - "copy_material_depth.wgsl", + MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + "resolve_render_targets.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE, + "remap_1d_to_2d_dispatch.wgsl", Shader::from_wgsl ); @@ -184,13 +213,14 @@ impl Plugin for MeshletPlugin { return; }; - if !render_app - .world() - .resource::() - .features() - .contains(WgpuFeatures::PUSH_CONSTANTS) - { - panic!("MeshletPlugin can't be used. GPU lacks support: WgpuFeatures::PUSH_CONSTANTS is not supported."); + let render_device = render_app.world().resource::().clone(); + let features = render_device.features(); + if !features.contains(Self::required_wgpu_features()) { + error!( + "MeshletPlugin can't be used. GPU lacks support for required features: {:?}.", + Self::required_wgpu_features().difference(features) + ); + std::process::exit(1); } render_app @@ -213,24 +243,31 @@ impl Plugin for MeshletPlugin { .add_render_graph_edges( Core3d, ( - // Non-meshlet shading passes _must_ come before meshlet shading passes - NodePbr::ShadowPass, NodeMeshlet::VisibilityBufferRasterPass, + NodePbr::ShadowPass, + // NodeMeshlet::Prepass, Node3d::Prepass, + // NodeMeshlet::DeferredPrepass, Node3d::DeferredPrepass, Node3d::CopyDeferredLightingId, Node3d::EndPrepasses, + // Node3d::StartMainPass, NodeMeshlet::MainOpaquePass, Node3d::MainOpaquePass, Node3d::EndMainPass, ), ) - .init_resource::() + .init_resource::() + .insert_resource(InstanceManager::new()) + .insert_resource(ResourceManager::new( + self.cluster_buffer_slots, + &render_device, + )) .init_resource::() - .add_systems(ExtractSchedule, extract_meshlet_meshes) + .add_systems(ExtractSchedule, extract_meshlet_mesh_entities) .add_systems( Render, ( @@ -281,7 +318,6 @@ pub type WithMeshletMesh = With>; fn configure_meshlet_views( mut views_3d: Query<( Entity, - &mut Camera3d, &Msaa, Has, Has, @@ -289,17 +325,12 @@ fn configure_meshlet_views( )>, mut commands: Commands, ) { - for (entity, mut camera_3d, msaa, normal_prepass, motion_vector_prepass, deferred_prepass) in - &mut views_3d - { + for (entity, msaa, normal_prepass, motion_vector_prepass, deferred_prepass) in &mut views_3d { if *msaa != Msaa::Off { - panic!("MeshletPlugin can't be used. MSAA is not supported."); + error!("MeshletPlugin can't be used with MSAA. Add Msaa::Off to your camera to use this plugin."); + std::process::exit(1); } - let mut usages: TextureUsages = camera_3d.depth_texture_usages.into(); - usages |= TextureUsages::TEXTURE_BINDING; - camera_3d.depth_texture_usages = usages.into(); - if !(normal_prepass || motion_vector_prepass || deferred_prepass) { commands .entity(entity) diff --git a/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs index 64b6861260f60..da341c285e895 100644 --- a/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs +++ b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs @@ -58,6 +58,7 @@ impl PersistentGpuBufferable for Arc<[Meshlet]> { let bytes = bytemuck::cast::<_, [u8; size_of::()]>(Meshlet { start_vertex_id: meshlet.start_vertex_id + vertex_offset, start_index_id: meshlet.start_index_id + index_offset, + vertex_count: meshlet.vertex_count, triangle_count: meshlet.triangle_count, }); buffer_slice[i..(i + size)].clone_from_slice(&bytes); diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs index 0112dcbb676cc..69f10e015e279 100644 --- a/crates/bevy_pbr/src/meshlet/pipelines.rs +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -1,4 +1,4 @@ -use super::gpu_scene::MeshletGpuScene; +use super::resource_manager::ResourceManager; use bevy_asset::Handle; use bevy_core_pipeline::{ core_3d::CORE_3D_DEPTH_FORMAT, fullscreen_vertex_shader::fullscreen_shader_vertex_state, @@ -14,10 +14,14 @@ pub const MESHLET_FILL_CLUSTER_BUFFERS_SHADER_HANDLE: Handle = pub const MESHLET_CULLING_SHADER_HANDLE: Handle = Handle::weak_from_u128(5325134235233421); pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = Handle::weak_from_u128(6325134235233421); -pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle = +pub const MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE: Handle = Handle::weak_from_u128(7325134235233421); -pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle = +pub const MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE: Handle = Handle::weak_from_u128(8325134235233421); +pub const MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE: Handle = + Handle::weak_from_u128(9325134235233421); +pub const MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(9425134235233421); #[derive(Resource)] pub struct MeshletPipelines { @@ -26,21 +30,38 @@ pub struct MeshletPipelines { cull_second: CachedComputePipelineId, downsample_depth_first: CachedComputePipelineId, downsample_depth_second: CachedComputePipelineId, - visibility_buffer_raster: CachedRenderPipelineId, - visibility_buffer_raster_depth_only: CachedRenderPipelineId, - visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId, - copy_material_depth: CachedRenderPipelineId, + downsample_depth_first_shadow_view: CachedComputePipelineId, + downsample_depth_second_shadow_view: CachedComputePipelineId, + visibility_buffer_software_raster: CachedComputePipelineId, + visibility_buffer_software_raster_depth_only: CachedComputePipelineId, + visibility_buffer_software_raster_depth_only_clamp_ortho: CachedComputePipelineId, + visibility_buffer_hardware_raster: CachedRenderPipelineId, + visibility_buffer_hardware_raster_depth_only: CachedRenderPipelineId, + visibility_buffer_hardware_raster_depth_only_clamp_ortho: CachedRenderPipelineId, + resolve_depth: CachedRenderPipelineId, + resolve_depth_shadow_view: CachedRenderPipelineId, + resolve_material_depth: CachedRenderPipelineId, + remap_1d_to_2d_dispatch: Option, } impl FromWorld for MeshletPipelines { fn from_world(world: &mut World) -> Self { - let gpu_scene = world.resource::(); - let fill_cluster_buffers_bind_group_layout = - gpu_scene.fill_cluster_buffers_bind_group_layout(); - let cull_layout = gpu_scene.culling_bind_group_layout(); - let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout(); - let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout(); - let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout(); + let resource_manager = world.resource::(); + let fill_cluster_buffers_bind_group_layout = resource_manager + .fill_cluster_buffers_bind_group_layout + .clone(); + let cull_layout = resource_manager.culling_bind_group_layout.clone(); + let downsample_depth_layout = resource_manager.downsample_depth_bind_group_layout.clone(); + let visibility_buffer_raster_layout = resource_manager + .visibility_buffer_raster_bind_group_layout + .clone(); + let resolve_depth_layout = resource_manager.resolve_depth_bind_group_layout.clone(); + let resolve_material_depth_layout = resource_manager + .resolve_material_depth_bind_group_layout + .clone(); + let remap_1d_to_2d_dispatch_layout = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .clone(); let pipeline_cache = world.resource_mut::(); Self { @@ -61,7 +82,10 @@ impl FromWorld for MeshletPipelines { cull_first: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { label: Some("meshlet_culling_first_pipeline".into()), layout: vec![cull_layout.clone()], - push_constant_ranges: vec![], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], shader: MESHLET_CULLING_SHADER_HANDLE, shader_defs: vec![ "MESHLET_CULLING_PASS".into(), @@ -73,7 +97,10 @@ impl FromWorld for MeshletPipelines { cull_second: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { label: Some("meshlet_culling_second_pipeline".into()), layout: vec![cull_layout], - push_constant_ranges: vec![], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..4, + }], shader: MESHLET_CULLING_SHADER_HANDLE, shader_defs: vec![ "MESHLET_CULLING_PASS".into(), @@ -88,21 +115,49 @@ impl FromWorld for MeshletPipelines { layout: vec![downsample_depth_layout.clone()], push_constant_ranges: vec![PushConstantRange { stages: ShaderStages::COMPUTE, - range: 0..4, + range: 0..8, }], shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, - shader_defs: vec![], + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], entry_point: "downsample_depth_first".into(), }, ), downsample_depth_second: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_second_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "downsample_depth_second".into(), + }, + ), + + downsample_depth_first_shadow_view: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_downsample_depth_first_pipeline".into()), + layout: vec![downsample_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::COMPUTE, + range: 0..8, + }], + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "downsample_depth_first".into(), + }, + ), + + downsample_depth_second_shadow_view: pipeline_cache.queue_compute_pipeline( ComputePipelineDescriptor { label: Some("meshlet_downsample_depth_second_pipeline".into()), layout: vec![downsample_depth_layout], push_constant_ranges: vec![PushConstantRange { stages: ShaderStages::COMPUTE, - range: 0..4, + range: 0..8, }], shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, shader_defs: vec![], @@ -110,13 +165,79 @@ impl FromWorld for MeshletPipelines { }, ), - visibility_buffer_raster: pipeline_cache.queue_render_pipeline( - RenderPipelineDescriptor { - label: Some("meshlet_visibility_buffer_raster_pipeline".into()), - layout: vec![visibility_buffer_layout.clone()], + visibility_buffer_software_raster: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }, + ), + + visibility_buffer_software_raster_depth_only: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_software_raster_depth_only_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }, + ), + + visibility_buffer_software_raster_depth_only_clamp_ortho: pipeline_cache + .queue_compute_pipeline(ComputePipelineDescriptor { + label: Some( + "meshlet_visibility_buffer_software_raster_depth_only_clamp_ortho_pipeline" + .into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_VISIBILITY_BUFFER_SOFTWARE_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "DEPTH_CLAMP_ORTHO".into(), + if remap_1d_to_2d_dispatch_layout.is_some() { + "MESHLET_2D_DISPATCH" + } else { + "" + } + .into(), + ], + entry_point: "rasterize_cluster".into(), + }), + + visibility_buffer_hardware_raster: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_pipeline".into()), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), @@ -133,44 +254,36 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, - depth_stencil: Some(DepthStencilState { - format: CORE_3D_DEPTH_FORMAT, - depth_write_enabled: true, - depth_compare: CompareFunction::GreaterEqual, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), + depth_stencil: None, multisample: MultisampleState::default(), fragment: Some(FragmentState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), ], entry_point: "fragment".into(), - targets: vec![ - Some(ColorTargetState { - format: TextureFormat::R32Uint, - blend: None, - write_mask: ColorWrites::ALL, - }), - Some(ColorTargetState { - format: TextureFormat::R16Uint, - blend: None, - write_mask: ColorWrites::ALL, - }), - ], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], }), }, ), - visibility_buffer_raster_depth_only: pipeline_cache.queue_render_pipeline( + visibility_buffer_hardware_raster_depth_only: pipeline_cache.queue_render_pipeline( RenderPipelineDescriptor { - label: Some("meshlet_visibility_buffer_raster_depth_only_pipeline".into()), - layout: vec![visibility_buffer_layout.clone()], - push_constant_ranges: vec![], + label: Some( + "meshlet_visibility_buffer_hardware_raster_depth_only_pipeline".into(), + ), + layout: vec![visibility_buffer_raster_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], entry_point: "vertex".into(), buffers: vec![], @@ -184,27 +297,34 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, - depth_stencil: Some(DepthStencilState { - format: CORE_3D_DEPTH_FORMAT, - depth_write_enabled: true, - depth_compare: CompareFunction::GreaterEqual, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), + depth_stencil: None, multisample: MultisampleState::default(), - fragment: None, + fragment: Some(FragmentState { + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + entry_point: "fragment".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], + }), }, ), - visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline( - RenderPipelineDescriptor { + visibility_buffer_hardware_raster_depth_only_clamp_ortho: pipeline_cache + .queue_render_pipeline(RenderPipelineDescriptor { label: Some( - "meshlet_visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into(), + "meshlet_visibility_buffer_hardware_raster_depth_only_clamp_ortho_pipeline" + .into(), ), - layout: vec![visibility_buffer_layout], - push_constant_ranges: vec![], + layout: vec![visibility_buffer_raster_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::VERTEX, + range: 0..4, + }], vertex: VertexState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "DEPTH_CLAMP_ORTHO".into(), @@ -221,47 +341,112 @@ impl FromWorld for MeshletPipelines { polygon_mode: PolygonMode::Fill, conservative: false, }, - depth_stencil: Some(DepthStencilState { - format: CORE_3D_DEPTH_FORMAT, - depth_write_enabled: true, - depth_compare: CompareFunction::GreaterEqual, - stencil: StencilState::default(), - bias: DepthBiasState::default(), - }), + depth_stencil: None, multisample: MultisampleState::default(), fragment: Some(FragmentState { - shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader: MESHLET_VISIBILITY_BUFFER_HARDWARE_RASTER_SHADER_HANDLE, shader_defs: vec![ "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), "DEPTH_CLAMP_ORTHO".into(), ], entry_point: "fragment".into(), - targets: vec![], + targets: vec![Some(ColorTargetState { + format: TextureFormat::R8Uint, + blend: None, + write_mask: ColorWrites::empty(), + })], }), - }, - ), + }), - copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { - label: Some("meshlet_copy_material_depth_pipeline".into()), - layout: vec![copy_material_depth_layout], - push_constant_ranges: vec![], + resolve_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_layout.clone()], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], vertex: fullscreen_shader_vertex_state(), primitive: PrimitiveState::default(), depth_stencil: Some(DepthStencilState { - format: TextureFormat::Depth16Unorm, + format: CORE_3D_DEPTH_FORMAT, depth_write_enabled: true, - depth_compare: CompareFunction::Always, + depth_compare: CompareFunction::GreaterEqual, stencil: StencilState::default(), bias: DepthBiasState::default(), }), multisample: MultisampleState::default(), fragment: Some(FragmentState { - shader: MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, - shader_defs: vec![], - entry_point: "copy_material_depth".into(), + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "resolve_depth".into(), targets: vec![], }), }), + + resolve_depth_shadow_view: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_resolve_depth_pipeline".into()), + layout: vec![resolve_depth_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "resolve_depth".into(), + targets: vec![], + }), + }, + ), + + resolve_material_depth: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_resolve_material_depth_pipeline".into()), + layout: vec![resolve_material_depth_layout], + push_constant_ranges: vec![PushConstantRange { + stages: ShaderStages::FRAGMENT, + range: 0..4, + }], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_RESOLVE_RENDER_TARGETS_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into()], + entry_point: "resolve_material_depth".into(), + targets: vec![], + }), + }, + ), + + remap_1d_to_2d_dispatch: remap_1d_to_2d_dispatch_layout.map(|layout| { + pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_remap_1d_to_2d_dispatch_pipeline".into()), + layout: vec![layout], + push_constant_ranges: vec![], + shader: MESHLET_REMAP_1D_TO_2D_DISPATCH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "remap_dispatch".into(), + }) + }), } } } @@ -275,10 +460,18 @@ impl MeshletPipelines { &ComputePipeline, &ComputePipeline, &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &RenderPipeline, &RenderPipeline, &RenderPipeline, &RenderPipeline, &RenderPipeline, + &RenderPipeline, + Option<&ComputePipeline>, )> { let pipeline_cache = world.get_resource::()?; let pipeline = world.get_resource::()?; @@ -288,11 +481,27 @@ impl MeshletPipelines { pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first)?, pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second)?, - pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, - pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_first_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.downsample_depth_second_shadow_view)?, + pipeline_cache.get_compute_pipeline(pipeline.visibility_buffer_software_raster)?, + pipeline_cache + .get_compute_pipeline(pipeline.visibility_buffer_software_raster_depth_only)?, + pipeline_cache.get_compute_pipeline( + pipeline.visibility_buffer_software_raster_depth_only_clamp_ortho, + )?, + pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_hardware_raster)?, pipeline_cache - .get_render_pipeline(pipeline.visibility_buffer_raster_depth_only_clamp_ortho)?, - pipeline_cache.get_render_pipeline(pipeline.copy_material_depth)?, + .get_render_pipeline(pipeline.visibility_buffer_hardware_raster_depth_only)?, + pipeline_cache.get_render_pipeline( + pipeline.visibility_buffer_hardware_raster_depth_only_clamp_ortho, + )?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_depth_shadow_view)?, + pipeline_cache.get_render_pipeline(pipeline.resolve_material_depth)?, + match pipeline.remap_1d_to_2d_dispatch { + Some(id) => Some(pipeline_cache.get_compute_pipeline(id)?), + None => None, + }, )) } } diff --git a/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl b/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl new file mode 100644 index 0000000000000..6ade11b1d87e6 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/remap_1d_to_2d_dispatch.wgsl @@ -0,0 +1,20 @@ +/// Remaps an indirect 1d to 2d dispatch for devices with low dispatch size limit. + +struct DispatchIndirectArgs { + x: u32, + y: u32, + z: u32, +} + +@group(0) @binding(0) var meshlet_software_raster_indirect_args: DispatchIndirectArgs; +@group(0) @binding(1) var meshlet_software_raster_cluster_count: u32; + +@compute +@workgroup_size(1, 1, 1) +fn remap_dispatch() { + meshlet_software_raster_cluster_count = meshlet_software_raster_indirect_args.x; + + let n = u32(ceil(sqrt(f32(meshlet_software_raster_indirect_args.x)))); + meshlet_software_raster_indirect_args.x = n; + meshlet_software_raster_indirect_args.y = n; +} diff --git a/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl b/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl new file mode 100644 index 0000000000000..b54dbaec53585 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/resolve_render_targets.wgsl @@ -0,0 +1,39 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@group(0) @binding(0) var meshlet_visibility_buffer: array; // Per pixel +#else +@group(0) @binding(0) var meshlet_visibility_buffer: array; // Per pixel +#endif +@group(0) @binding(1) var meshlet_cluster_instance_ids: array; // Per cluster +@group(0) @binding(2) var meshlet_instance_material_ids: array; // Per entity instance +var view_width: u32; + +/// This pass writes out the depth texture. +@fragment +fn resolve_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x); + let visibility = meshlet_visibility_buffer[frag_coord_1d]; +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + return bitcast(u32(visibility >> 32u)); +#else + return bitcast(visibility); +#endif +} + +/// This pass writes out the material depth texture. +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@fragment +fn resolve_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + let frag_coord_1d = u32(in.position.y) * view_width + u32(in.position.x); + let visibility = meshlet_visibility_buffer[frag_coord_1d]; + + let depth = visibility >> 32u; + if depth == 0lu { return 0.0; } + + let cluster_id = u32(visibility) >> 6u; + let instance_id = meshlet_cluster_instance_ids[cluster_id]; + let material_id = meshlet_instance_material_ids[instance_id]; + return f32(material_id) / 65535.0; +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/resource_manager.rs b/crates/bevy_pbr/src/meshlet/resource_manager.rs new file mode 100644 index 0000000000000..1d6ada2a2a19d --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/resource_manager.rs @@ -0,0 +1,809 @@ +use super::{instance_manager::InstanceManager, meshlet_mesh_manager::MeshletMeshManager}; +use crate::ShadowView; +use bevy_core_pipeline::{ + core_3d::Camera3d, + prepass::{PreviousViewData, PreviousViewUniforms}, +}; +use bevy_ecs::{ + component::Component, + entity::{Entity, EntityHashMap}, + query::AnyOf, + system::{Commands, Query, Res, ResMut, Resource}, +}; +use bevy_math::{UVec2, Vec4Swizzles}; +use bevy_render::{ + render_resource::*, + renderer::{RenderDevice, RenderQueue}, + texture::{CachedTexture, TextureCache}, + view::{ExtractedView, RenderLayers, ViewUniform, ViewUniforms}, +}; +use binding_types::*; +use encase::internal::WriteInto; +use std::{ + array, iter, + mem::size_of, + sync::{atomic::AtomicBool, Arc}, +}; + +/// Manages per-view and per-cluster GPU resources for [`super::MeshletPlugin`]. +#[derive(Resource)] +pub struct ResourceManager { + /// Intermediate buffer of cluster IDs for use with rasterizing the visibility buffer + visibility_buffer_raster_clusters: Buffer, + /// Intermediate buffer of count of clusters to software rasterize + software_raster_cluster_count: Buffer, + /// Rightmost slot index of [`Self::visibility_buffer_raster_clusters`] + raster_cluster_rightmost_slot: u32, + + /// Per-cluster instance ID + cluster_instance_ids: Option, + /// Per-cluster meshlet ID + cluster_meshlet_ids: Option, + /// Per-cluster bitmask of whether or not it's a candidate for the second raster pass + second_pass_candidates_buffer: Option, + /// Sampler for a depth pyramid + depth_pyramid_sampler: Sampler, + /// Dummy texture view for binding depth pyramids with less than the maximum amount of mips + depth_pyramid_dummy_texture: TextureView, + + // TODO + previous_depth_pyramids: EntityHashMap, + + // Bind group layouts + pub fill_cluster_buffers_bind_group_layout: BindGroupLayout, + pub culling_bind_group_layout: BindGroupLayout, + pub visibility_buffer_raster_bind_group_layout: BindGroupLayout, + pub downsample_depth_bind_group_layout: BindGroupLayout, + pub resolve_depth_bind_group_layout: BindGroupLayout, + pub resolve_material_depth_bind_group_layout: BindGroupLayout, + pub material_shade_bind_group_layout: BindGroupLayout, + pub remap_1d_to_2d_dispatch_bind_group_layout: Option, +} + +impl ResourceManager { + pub fn new(cluster_buffer_slots: u32, render_device: &RenderDevice) -> Self { + let needs_dispatch_remap = + cluster_buffer_slots < render_device.limits().max_compute_workgroups_per_dimension; + + Self { + visibility_buffer_raster_clusters: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer_raster_clusters"), + size: cluster_buffer_slots as u64 * size_of::() as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + software_raster_cluster_count: render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_software_raster_cluster_count"), + size: size_of::() as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }), + raster_cluster_rightmost_slot: cluster_buffer_slots - 1, + + cluster_instance_ids: None, + cluster_meshlet_ids: None, + second_pass_candidates_buffer: None, + depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("meshlet_depth_pyramid_sampler"), + ..SamplerDescriptor::default() + }), + depth_pyramid_dummy_texture: render_device + .create_texture(&TextureDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture"), + size: Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING, + view_formats: &[], + }) + .create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_dummy_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: 0, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }), + + previous_depth_pyramids: EntityHashMap::default(), + + // TODO: Buffer min sizes + fill_cluster_buffers_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_fill_cluster_buffers_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + culling_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + texture_2d(TextureSampleType::Float { filterable: false }), + uniform_buffer::(true), + uniform_buffer::(true), + ), + ), + ), + downsample_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_downsample_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential(ShaderStages::COMPUTE, { + let write_only_r32float = || { + texture_storage_2d(TextureFormat::R32Float, StorageTextureAccess::WriteOnly) + }; + ( + storage_buffer_read_only_sized(false, None), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + texture_storage_2d( + TextureFormat::R32Float, + StorageTextureAccess::ReadWrite, + ), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + write_only_r32float(), + sampler(SamplerBindingType::NonFiltering), + ) + }), + ), + visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_visibility_buffer_raster_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::all(), + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + uniform_buffer::(true), + ), + ), + ), + resolve_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_depth_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + storage_buffer_read_only_sized(false, None), + ), + ), + resolve_material_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_resolve_material_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + material_shade_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_mesh_material_shade_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + remap_1d_to_2d_dispatch_bind_group_layout: needs_dispatch_remap.then(|| { + render_device.create_bind_group_layout( + "meshlet_remap_1d_to_2d_dispatch_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ) + }), + } + } +} + +// ------------ TODO: Everything under here needs to be rewritten and cached ------------ + +#[derive(Component)] +pub struct MeshletViewResources { + pub scene_cluster_count: u32, + pub second_pass_candidates_buffer: Buffer, + instance_visibility: Buffer, + pub dummy_render_target: CachedTexture, + pub visibility_buffer: Buffer, + pub visibility_buffer_software_raster_indirect_args_first: Buffer, + pub visibility_buffer_software_raster_indirect_args_second: Buffer, + pub visibility_buffer_hardware_raster_indirect_args_first: Buffer, + pub visibility_buffer_hardware_raster_indirect_args_second: Buffer, + depth_pyramid_all_mips: TextureView, + depth_pyramid_mips: [TextureView; 12], + pub depth_pyramid_mip_count: u32, + previous_depth_pyramid: TextureView, + pub material_depth: Option, + pub view_size: UVec2, + pub raster_cluster_rightmost_slot: u32, +} + +#[derive(Component)] +pub struct MeshletViewBindGroups { + pub first_node: Arc, + pub fill_cluster_buffers: BindGroup, + pub culling_first: BindGroup, + pub culling_second: BindGroup, + pub downsample_depth: BindGroup, + pub visibility_buffer_raster: BindGroup, + pub resolve_depth: BindGroup, + pub resolve_material_depth: Option, + pub material_shade: Option, + pub remap_1d_to_2d_dispatch: Option<(BindGroup, BindGroup)>, +} + +// TODO: Try using Queue::write_buffer_with() in queue_meshlet_mesh_upload() to reduce copies +fn upload_storage_buffer( + buffer: &mut StorageBuffer>, + render_device: &RenderDevice, + render_queue: &RenderQueue, +) where + Vec: WriteInto, +{ + let inner = buffer.buffer(); + let capacity = inner.map_or(0, |b| b.size()); + let size = buffer.get().size().get() as BufferAddress; + + if capacity >= size { + let inner = inner.unwrap(); + let bytes = bytemuck::must_cast_slice(buffer.get().as_slice()); + render_queue.write_buffer(inner, 0, bytes); + } else { + buffer.write_buffer(render_device, render_queue); + } +} + +// TODO: Cache things per-view and skip running this system / optimize this system +pub fn prepare_meshlet_per_frame_resources( + mut resource_manager: ResMut, + mut instance_manager: ResMut, + views: Query<( + Entity, + &ExtractedView, + Option<&RenderLayers>, + AnyOf<(&Camera3d, &ShadowView)>, + )>, + mut texture_cache: ResMut, + render_queue: Res, + render_device: Res, + mut commands: Commands, +) { + if instance_manager.scene_cluster_count == 0 { + return; + } + + let instance_manager = instance_manager.as_mut(); + + // TODO: Move this and the submit to a separate system and remove pub from the fields + instance_manager + .instance_uniforms + .write_buffer(&render_device, &render_queue); + upload_storage_buffer( + &mut instance_manager.instance_material_ids, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut instance_manager.instance_meshlet_counts_prefix_sum, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut instance_manager.instance_meshlet_slice_starts, + &render_device, + &render_queue, + ); + + // Early submission for GPU data uploads to start while the render graph records commands + render_queue.submit([]); + + let needed_buffer_size = 4 * instance_manager.scene_cluster_count as u64; + match &mut resource_manager.cluster_instance_ids { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_cluster_instance_ids"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + match &mut resource_manager.cluster_meshlet_ids { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_cluster_meshlet_ids"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + let needed_buffer_size = + instance_manager.scene_cluster_count.div_ceil(u32::BITS) as u64 * size_of::() as u64; + for (view_entity, view, render_layers, (_, shadow_view)) in &views { + let not_shadow_view = shadow_view.is_none(); + + let instance_visibility = instance_manager + .view_instance_visibility + .entry(view_entity) + .or_insert_with(|| { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_view_instance_visibility")); + buffer + }); + for (instance_index, (_, layers, not_shadow_caster)) in + instance_manager.instances.iter().enumerate() + { + // If either the layers don't match the view's layers or this is a shadow view + // and the instance is not a shadow caster, hide the instance for this view + if !render_layers + .unwrap_or(&RenderLayers::default()) + .intersects(layers) + || (shadow_view.is_some() && *not_shadow_caster) + { + let vec = instance_visibility.get_mut(); + let index = instance_index / 32; + let bit = instance_index - index * 32; + if vec.len() <= index { + vec.extend(iter::repeat(0).take(index - vec.len() + 1)); + } + vec[index] |= 1 << bit; + } + } + upload_storage_buffer(instance_visibility, &render_device, &render_queue); + let instance_visibility = instance_visibility.buffer().unwrap().clone(); + + let second_pass_candidates_buffer = + match &mut resource_manager.second_pass_candidates_buffer { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_second_pass_candidates"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + // TODO: Remove this once wgpu allows render passes with no attachments + let dummy_render_target = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_dummy_render_target"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R8Uint, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }, + ); + + let type_size = if not_shadow_view { + size_of::() + } else { + size_of::() + } as u64; + // TODO: Cache + let visibility_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer"), + size: type_size * (view.viewport.z * view.viewport.w) as u64, + usage: BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let visibility_buffer_software_raster_indirect_args_first = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_software_raster_indirect_args_first"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let visibility_buffer_software_raster_indirect_args_second = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("visibility_buffer_software_raster_indirect_args_second"), + contents: DispatchIndirectArgs { x: 0, y: 1, z: 1 }.as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let visibility_buffer_hardware_raster_indirect_args_first = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_hardware_raster_indirect_args_first"), + contents: DrawIndirectArgs { + vertex_count: 64 * 3, + instance_count: 0, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let visibility_buffer_hardware_raster_indirect_args_second = render_device + .create_buffer_with_data(&BufferInitDescriptor { + label: Some("visibility_buffer_hardware_raster_indirect_args_second"), + contents: DrawIndirectArgs { + vertex_count: 64 * 3, + instance_count: 0, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let depth_pyramid_size = Extent3d { + width: view.viewport.z.div_ceil(2), + height: view.viewport.w.div_ceil(2), + depth_or_array_layers: 1, + }; + let depth_pyramid_mip_count = depth_pyramid_size.max_mips(TextureDimension::D2); + let depth_pyramid = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_depth_pyramid"), + size: depth_pyramid_size, + mip_level_count: depth_pyramid_mip_count, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + let depth_pyramid_mips = array::from_fn(|i| { + if (i as u32) < depth_pyramid_mip_count { + depth_pyramid.texture.create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: i as u32, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: Some(1), + }) + } else { + resource_manager.depth_pyramid_dummy_texture.clone() + } + }); + let depth_pyramid_all_mips = depth_pyramid.default_view.clone(); + + let previous_depth_pyramid = + match resource_manager.previous_depth_pyramids.get(&view_entity) { + Some(texture_view) => texture_view.clone(), + None => depth_pyramid_all_mips.clone(), + }; + resource_manager + .previous_depth_pyramids + .insert(view_entity, depth_pyramid_all_mips.clone()); + + let material_depth = TextureDescriptor { + label: Some("meshlet_material_depth"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Depth16Unorm, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }; + + commands.entity(view_entity).insert(MeshletViewResources { + scene_cluster_count: instance_manager.scene_cluster_count, + second_pass_candidates_buffer, + instance_visibility, + dummy_render_target, + visibility_buffer, + visibility_buffer_software_raster_indirect_args_first, + visibility_buffer_software_raster_indirect_args_second, + visibility_buffer_hardware_raster_indirect_args_first, + visibility_buffer_hardware_raster_indirect_args_second, + depth_pyramid_all_mips, + depth_pyramid_mips, + depth_pyramid_mip_count, + previous_depth_pyramid, + material_depth: not_shadow_view + .then(|| texture_cache.get(&render_device, material_depth)), + view_size: view.viewport.zw(), + raster_cluster_rightmost_slot: resource_manager.raster_cluster_rightmost_slot, + }); + } +} + +#[allow(clippy::too_many_arguments)] +pub fn prepare_meshlet_view_bind_groups( + meshlet_mesh_manager: Res, + resource_manager: Res, + instance_manager: Res, + views: Query<(Entity, &MeshletViewResources)>, + view_uniforms: Res, + previous_view_uniforms: Res, + render_device: Res, + mut commands: Commands, +) { + let ( + Some(cluster_instance_ids), + Some(cluster_meshlet_ids), + Some(view_uniforms), + Some(previous_view_uniforms), + ) = ( + resource_manager.cluster_instance_ids.as_ref(), + resource_manager.cluster_meshlet_ids.as_ref(), + view_uniforms.uniforms.binding(), + previous_view_uniforms.uniforms.binding(), + ) + else { + return; + }; + + let first_node = Arc::new(AtomicBool::new(true)); + + // TODO: Some of these bind groups can be reused across multiple views + for (view_entity, view_resources) in &views { + let entries = BindGroupEntries::sequential(( + instance_manager + .instance_meshlet_counts_prefix_sum + .binding() + .unwrap(), + instance_manager + .instance_meshlet_slice_starts + .binding() + .unwrap(), + cluster_instance_ids.as_entire_binding(), + cluster_meshlet_ids.as_entire_binding(), + )); + let fill_cluster_buffers = render_device.create_bind_group( + "meshlet_fill_cluster_buffers", + &resource_manager.fill_cluster_buffers_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlet_bounding_spheres.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), + view_resources + .visibility_buffer_software_raster_indirect_args_first + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args_first + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + &view_resources.previous_depth_pyramid, + view_uniforms.clone(), + previous_view_uniforms.clone(), + )); + let culling_first = render_device.create_bind_group( + "meshlet_culling_first_bind_group", + &resource_manager.culling_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlet_bounding_spheres.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + view_resources.instance_visibility.as_entire_binding(), + view_resources + .second_pass_candidates_buffer + .as_entire_binding(), + view_resources + .visibility_buffer_software_raster_indirect_args_second + .as_entire_binding(), + view_resources + .visibility_buffer_hardware_raster_indirect_args_second + .as_entire_binding(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + &view_resources.depth_pyramid_all_mips, + view_uniforms.clone(), + previous_view_uniforms.clone(), + )); + let culling_second = render_device.create_bind_group( + "meshlet_culling_second_bind_group", + &resource_manager.culling_bind_group_layout, + &entries, + ); + + let downsample_depth = render_device.create_bind_group( + "meshlet_downsample_depth_bind_group", + &resource_manager.downsample_depth_bind_group_layout, + &BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + &view_resources.depth_pyramid_mips[0], + &view_resources.depth_pyramid_mips[1], + &view_resources.depth_pyramid_mips[2], + &view_resources.depth_pyramid_mips[3], + &view_resources.depth_pyramid_mips[4], + &view_resources.depth_pyramid_mips[5], + &view_resources.depth_pyramid_mips[6], + &view_resources.depth_pyramid_mips[7], + &view_resources.depth_pyramid_mips[8], + &view_resources.depth_pyramid_mips[9], + &view_resources.depth_pyramid_mips[10], + &view_resources.depth_pyramid_mips[11], + &resource_manager.depth_pyramid_sampler, + )), + ); + + let entries = BindGroupEntries::sequential(( + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_ids.binding(), + meshlet_mesh_manager.vertex_data.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + resource_manager + .visibility_buffer_raster_clusters + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + view_resources.visibility_buffer.as_entire_binding(), + view_uniforms.clone(), + )); + let visibility_buffer_raster = render_device.create_bind_group( + "meshlet_visibility_raster_buffer_bind_group", + &resource_manager.visibility_buffer_raster_bind_group_layout, + &entries, + ); + + let resolve_depth = render_device.create_bind_group( + "meshlet_resolve_depth_bind_group", + &resource_manager.resolve_depth_bind_group_layout, + &BindGroupEntries::single(view_resources.visibility_buffer.as_entire_binding()), + ); + + let resolve_material_depth = view_resources.material_depth.as_ref().map(|_| { + let entries = BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_material_ids.binding().unwrap(), + )); + render_device.create_bind_group( + "meshlet_resolve_material_depth_bind_group", + &resource_manager.resolve_material_depth_bind_group_layout, + &entries, + ) + }); + + let material_shade = view_resources.material_depth.as_ref().map(|_| { + let entries = BindGroupEntries::sequential(( + view_resources.visibility_buffer.as_entire_binding(), + cluster_meshlet_ids.as_entire_binding(), + meshlet_mesh_manager.meshlets.binding(), + meshlet_mesh_manager.indices.binding(), + meshlet_mesh_manager.vertex_ids.binding(), + meshlet_mesh_manager.vertex_data.binding(), + cluster_instance_ids.as_entire_binding(), + instance_manager.instance_uniforms.binding().unwrap(), + )); + render_device.create_bind_group( + "meshlet_mesh_material_shade_bind_group", + &resource_manager.material_shade_bind_group_layout, + &entries, + ) + }); + + let remap_1d_to_2d_dispatch = resource_manager + .remap_1d_to_2d_dispatch_bind_group_layout + .as_ref() + .map(|layout| { + ( + render_device.create_bind_group( + "meshlet_remap_1d_to_2d_dispatch_first_bind_group", + layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args_first + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ), + render_device.create_bind_group( + "meshlet_remap_1d_to_2d_dispatch_second_bind_group", + layout, + &BindGroupEntries::sequential(( + view_resources + .visibility_buffer_software_raster_indirect_args_second + .as_entire_binding(), + resource_manager + .software_raster_cluster_count + .as_entire_binding(), + )), + ), + ) + }); + + commands.entity(view_entity).insert(MeshletViewBindGroups { + first_node: Arc::clone(&first_node), + fill_cluster_buffers, + culling_first, + culling_second, + downsample_depth, + visibility_buffer_raster, + resolve_depth, + resolve_material_depth, + material_shade, + remap_1d_to_2d_dispatch, + }); + } +} diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl similarity index 56% rename from crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl rename to crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl index 86b34cd2f0f22..e3cf7a6fb3a22 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_hardware_raster.wgsl @@ -6,8 +6,8 @@ meshlet_vertex_data, meshlet_cluster_instance_ids, meshlet_instance_uniforms, - meshlet_instance_material_ids, - draw_triangle_buffer, + meshlet_raster_clusters, + meshlet_visibility_buffer, view, get_meshlet_index, unpack_meshlet_vertex, @@ -15,38 +15,33 @@ mesh_functions::mesh_position_local_to_world, } #import bevy_render::maths::affine3_to_square +var meshlet_raster_cluster_rightmost_slot: u32; -/// Vertex/fragment shader for rasterizing meshlets into a visibility buffer. +/// Vertex/fragment shader for rasterizing large clusters into a visibility buffer. struct VertexOutput { - @builtin(position) clip_position: vec4, + @builtin(position) position: vec4, #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT - @location(0) @interpolate(flat) visibility: u32, - @location(1) @interpolate(flat) material_depth: u32, + @location(0) @interpolate(flat) packed_ids: u32, #endif #ifdef DEPTH_CLAMP_ORTHO @location(0) unclamped_clip_depth: f32, #endif } -#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT -struct FragmentOutput { - @location(0) visibility: vec4, - @location(1) material_depth: vec4, -} -#endif - @vertex -fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { - let packed_ids = draw_triangle_buffer[vertex_index / 3u]; - let cluster_id = packed_ids >> 6u; - let triangle_id = extractBits(packed_ids, 0u, 6u); - let index_id = (triangle_id * 3u) + (vertex_index % 3u); +fn vertex(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) vertex_index: u32) -> VertexOutput { + let cluster_id = meshlet_raster_clusters[meshlet_raster_cluster_rightmost_slot - instance_index]; let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; + + let triangle_id = vertex_index / 3u; + if triangle_id >= meshlet.triangle_count { return dummy_vertex(); } + let index_id = (triangle_id * 3u) + (vertex_index % 3u); let index = get_meshlet_index(meshlet.start_index_id + index_id); let vertex_id = meshlet_vertex_ids[meshlet.start_vertex_id + index]; let vertex = unpack_meshlet_vertex(meshlet_vertex_data[vertex_id]); + let instance_id = meshlet_cluster_instance_ids[cluster_id]; let instance_uniform = meshlet_instance_uniforms[instance_id]; @@ -61,8 +56,7 @@ fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { return VertexOutput( clip_position, #ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT - packed_ids, - meshlet_instance_material_ids[instance_id], + (cluster_id << 6u) | triangle_id, #endif #ifdef DEPTH_CLAMP_ORTHO unclamped_clip_depth, @@ -70,19 +64,31 @@ fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { ); } -#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT @fragment -fn fragment(vertex_output: VertexOutput) -> FragmentOutput { - return FragmentOutput( - vec4(vertex_output.visibility, 0u, 0u, 0u), - vec4(vertex_output.material_depth, 0u, 0u, 0u), - ); -} +fn fragment(vertex_output: VertexOutput) { + let frag_coord_1d = u32(vertex_output.position.y) * u32(view.viewport.z) + u32(vertex_output.position.x); + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let depth = bitcast(vertex_output.position.z); + let visibility = (u64(depth) << 32u) | u64(vertex_output.packed_ids); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility); +#else ifdef DEPTH_CLAMP_ORTHO + let depth = bitcast(vertex_output.unclamped_clip_depth); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#else + let depth = bitcast(vertex_output.position.z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); #endif +} +fn dummy_vertex() -> VertexOutput { + return VertexOutput( + vec4(0.0), +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + 0u, +#endif #ifdef DEPTH_CLAMP_ORTHO -@fragment -fn fragment(vertex_output: VertexOutput) -> @builtin(frag_depth) f32 { - return vertex_output.unclamped_clip_depth; -} + 0.0, #endif + ); +} diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs index 5af10769cc8dc..3f2b8883e75f2 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -1,6 +1,6 @@ use super::{ - gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, pipelines::MeshletPipelines, + resource_manager::{MeshletViewBindGroups, MeshletViewResources}, }; use crate::{LightEntity, ShadowView, ViewLightEntities}; use bevy_color::LinearRgba; @@ -80,10 +80,18 @@ impl Node for MeshletVisibilityBufferRasterPassNode { culling_second_pipeline, downsample_depth_first_pipeline, downsample_depth_second_pipeline, - visibility_buffer_raster_pipeline, - visibility_buffer_raster_depth_only_pipeline, - visibility_buffer_raster_depth_only_clamp_ortho, - copy_material_depth_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_software_raster_depth_only_pipeline, + visibility_buffer_software_raster_depth_only_clamp_ortho, + visibility_buffer_hardware_raster_pipeline, + visibility_buffer_hardware_raster_depth_only_pipeline, + visibility_buffer_hardware_raster_depth_only_clamp_ortho, + resolve_depth_pipeline, + resolve_depth_shadow_view_pipeline, + resolve_material_depth_pipeline, + remap_1d_to_2d_dispatch_pipeline, )) = MeshletPipelines::get(world) else { return Ok(()); @@ -94,7 +102,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { .fetch_and(false, Ordering::SeqCst); let thread_per_cluster_workgroups = - (meshlet_view_resources.scene_meshlet_count.div_ceil(128) as f32) + (meshlet_view_resources.scene_cluster_count.div_ceil(128) as f32) .cbrt() .ceil() as u32; @@ -112,7 +120,7 @@ impl Node for MeshletVisibilityBufferRasterPassNode { &meshlet_view_bind_groups.fill_cluster_buffers, fill_cluster_buffers_pipeline, thread_per_cluster_workgroups, - meshlet_view_resources.scene_meshlet_count, + meshlet_view_resources.scene_cluster_count, ); } cull_pass( @@ -123,17 +131,25 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_first_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(bg1, _)| bg1), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( true, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, - view_depth.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_first, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_first, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - visibility_buffer_raster_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, Some(camera), + meshlet_view_resources.raster_cluster_rightmost_slot, ); downsample_depth( render_context, @@ -150,23 +166,39 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_second_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(_, bg2)| bg2), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( false, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, - view_depth.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_second, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_second, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - visibility_buffer_raster_pipeline, + visibility_buffer_software_raster_pipeline, + visibility_buffer_hardware_raster_pipeline, Some(camera), + meshlet_view_resources.raster_cluster_rightmost_slot, ); - copy_material_depth_pass( + resolve_depth( render_context, + view_depth.get_attachment(StoreOp::Store), meshlet_view_resources, meshlet_view_bind_groups, - copy_material_depth_pipeline, + resolve_depth_pipeline, + camera, + ); + resolve_material_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + resolve_material_depth_pipeline, camera, ); downsample_depth( @@ -191,9 +223,18 @@ impl Node for MeshletVisibilityBufferRasterPassNode { continue; }; - let shadow_visibility_buffer_pipeline = match light_type { - LightEntity::Directional { .. } => visibility_buffer_raster_depth_only_clamp_ortho, - _ => visibility_buffer_raster_depth_only_pipeline, + let ( + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, + ) = match light_type { + LightEntity::Directional { .. } => ( + visibility_buffer_software_raster_depth_only_clamp_ortho, + visibility_buffer_hardware_raster_depth_only_clamp_ortho, + ), + _ => ( + visibility_buffer_software_raster_depth_only_pipeline, + visibility_buffer_hardware_raster_depth_only_pipeline, + ), }; render_context.command_encoder().push_debug_group(&format!( @@ -213,24 +254,32 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_first_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(bg1, _)| bg1), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( true, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, - shadow_view.depth_attachment.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_first, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_first, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - shadow_visibility_buffer_pipeline, + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, None, + meshlet_view_resources.raster_cluster_rightmost_slot, ); downsample_depth( render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_first_pipeline, - downsample_depth_second_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, ); cull_pass( "culling_second", @@ -240,24 +289,40 @@ impl Node for MeshletVisibilityBufferRasterPassNode { previous_view_offset, culling_second_pipeline, thread_per_cluster_workgroups, + meshlet_view_resources.raster_cluster_rightmost_slot, + meshlet_view_bind_groups + .remap_1d_to_2d_dispatch + .as_ref() + .map(|(_, bg2)| bg2), + remap_1d_to_2d_dispatch_pipeline, ); raster_pass( false, render_context, - meshlet_view_resources, - &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, - shadow_view.depth_attachment.get_attachment(StoreOp::Store), + &meshlet_view_resources.visibility_buffer_software_raster_indirect_args_second, + &meshlet_view_resources.visibility_buffer_hardware_raster_indirect_args_second, + &meshlet_view_resources.dummy_render_target.default_view, meshlet_view_bind_groups, view_offset, - shadow_visibility_buffer_pipeline, + shadow_visibility_buffer_software_raster_pipeline, + shadow_visibility_buffer_hardware_raster_pipeline, None, + meshlet_view_resources.raster_cluster_rightmost_slot, + ); + resolve_depth( + render_context, + shadow_view.depth_attachment.get_attachment(StoreOp::Store), + meshlet_view_resources, + meshlet_view_bind_groups, + resolve_depth_shadow_view_pipeline, + camera, ); downsample_depth( render_context, meshlet_view_resources, meshlet_view_bind_groups, - downsample_depth_first_pipeline, - downsample_depth_second_pipeline, + downsample_depth_first_shadow_view_pipeline, + downsample_depth_second_shadow_view_pipeline, ); render_context.command_encoder().pop_debug_group(); } @@ -274,20 +339,21 @@ fn fill_cluster_buffers_pass( cluster_count: u32, ) { let command_encoder = render_context.command_encoder(); - let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + let mut fill_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { label: Some("fill_cluster_buffers"), timestamp_writes: None, }); - cull_pass.set_pipeline(fill_cluster_buffers_pass_pipeline); - cull_pass.set_push_constants(0, &cluster_count.to_le_bytes()); - cull_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]); - cull_pass.dispatch_workgroups( + fill_pass.set_pipeline(fill_cluster_buffers_pass_pipeline); + fill_pass.set_push_constants(0, &cluster_count.to_le_bytes()); + fill_pass.set_bind_group(0, fill_cluster_buffers_bind_group, &[]); + fill_pass.dispatch_workgroups( fill_cluster_buffers_pass_workgroups, fill_cluster_buffers_pass_workgroups, fill_cluster_buffers_pass_workgroups, ); } +#[allow(clippy::too_many_arguments)] fn cull_pass( label: &'static str, render_context: &mut RenderContext, @@ -296,6 +362,9 @@ fn cull_pass( previous_view_offset: &PreviousViewUniformOffset, culling_pipeline: &ComputePipeline, culling_workgroups: u32, + raster_cluster_rightmost_slot: u32, + remap_1d_to_2d_dispatch_bind_group: Option<&BindGroup>, + remap_1d_to_2d_dispatch_pipeline: Option<&ComputePipeline>, ) { let command_encoder = render_context.command_encoder(); let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { @@ -303,82 +372,90 @@ fn cull_pass( timestamp_writes: None, }); cull_pass.set_pipeline(culling_pipeline); + cull_pass.set_push_constants(0, &raster_cluster_rightmost_slot.to_le_bytes()); cull_pass.set_bind_group( 0, culling_bind_group, &[view_offset.offset, previous_view_offset.offset], ); cull_pass.dispatch_workgroups(culling_workgroups, culling_workgroups, culling_workgroups); + + if let (Some(remap_1d_to_2d_dispatch_pipeline), Some(remap_1d_to_2d_dispatch_bind_group)) = ( + remap_1d_to_2d_dispatch_pipeline, + remap_1d_to_2d_dispatch_bind_group, + ) { + cull_pass.set_pipeline(remap_1d_to_2d_dispatch_pipeline); + cull_pass.set_bind_group(0, remap_1d_to_2d_dispatch_bind_group, &[]); + cull_pass.dispatch_workgroups(1, 1, 1); + } } #[allow(clippy::too_many_arguments)] fn raster_pass( first_pass: bool, render_context: &mut RenderContext, - meshlet_view_resources: &MeshletViewResources, - visibility_buffer_draw_indirect_args: &Buffer, - depth_stencil_attachment: RenderPassDepthStencilAttachment, + visibility_buffer_hardware_software_indirect_args: &Buffer, + visibility_buffer_hardware_raster_indirect_args: &Buffer, + dummy_render_target: &TextureView, meshlet_view_bind_groups: &MeshletViewBindGroups, view_offset: &ViewUniformOffset, - visibility_buffer_raster_pipeline: &RenderPipeline, + visibility_buffer_hardware_software_pipeline: &ComputePipeline, + visibility_buffer_hardware_raster_pipeline: &RenderPipeline, camera: Option<&ExtractedCamera>, + raster_cluster_rightmost_slot: u32, ) { - let mut color_attachments_filled = [None, None]; - if let (Some(visibility_buffer), Some(material_depth_color)) = ( - meshlet_view_resources.visibility_buffer.as_ref(), - meshlet_view_resources.material_depth_color.as_ref(), - ) { - let load = if first_pass { - LoadOp::Clear(LinearRgba::BLACK.into()) + let command_encoder = render_context.command_encoder(); + let mut software_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some(if first_pass { + "raster_software_first" } else { - LoadOp::Load - }; - color_attachments_filled = [ - Some(RenderPassColorAttachment { - view: &visibility_buffer.default_view, - resolve_target: None, - ops: Operations { - load, - store: StoreOp::Store, - }, - }), - Some(RenderPassColorAttachment { - view: &material_depth_color.default_view, - resolve_target: None, - ops: Operations { - load, - store: StoreOp::Store, - }, - }), - ]; - } + "raster_software_second" + }), + timestamp_writes: None, + }); + software_pass.set_pipeline(visibility_buffer_hardware_software_pipeline); + software_pass.set_bind_group( + 0, + &meshlet_view_bind_groups.visibility_buffer_raster, + &[view_offset.offset], + ); + software_pass + .dispatch_workgroups_indirect(visibility_buffer_hardware_software_indirect_args, 0); + drop(software_pass); - let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + let mut hardware_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { label: Some(if first_pass { - "raster_first" + "raster_hardware_first" } else { - "raster_second" + "raster_hardware_second" }), - color_attachments: if color_attachments_filled[0].is_none() { - &[] - } else { - &color_attachments_filled - }, - depth_stencil_attachment: Some(depth_stencil_attachment), + color_attachments: &[Some(RenderPassColorAttachment { + view: dummy_render_target, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Discard, + }, + })], + depth_stencil_attachment: None, timestamp_writes: None, occlusion_query_set: None, }); if let Some(viewport) = camera.and_then(|camera| camera.viewport.as_ref()) { - draw_pass.set_camera_viewport(viewport); + hardware_pass.set_camera_viewport(viewport); } - - draw_pass.set_render_pipeline(visibility_buffer_raster_pipeline); - draw_pass.set_bind_group( + hardware_pass.set_render_pipeline(visibility_buffer_hardware_raster_pipeline); + hardware_pass.set_push_constants( + ShaderStages::VERTEX, + 0, + &raster_cluster_rightmost_slot.to_le_bytes(), + ); + hardware_pass.set_bind_group( 0, &meshlet_view_bind_groups.visibility_buffer_raster, &[view_offset.offset], ); - draw_pass.draw_indirect(visibility_buffer_draw_indirect_args, 0); + hardware_pass.draw_indirect(visibility_buffer_hardware_raster_indirect_args, 0); } fn downsample_depth( @@ -396,7 +473,10 @@ fn downsample_depth( downsample_pass.set_pipeline(downsample_depth_first_pipeline); downsample_pass.set_push_constants( 0, - &meshlet_view_resources.depth_pyramid_mip_count.to_le_bytes(), + bytemuck::cast_slice(&[ + meshlet_view_resources.depth_pyramid_mip_count, + meshlet_view_resources.view_size.x, + ]), ); downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth, &[]); downsample_pass.dispatch_workgroups( @@ -411,19 +491,47 @@ fn downsample_depth( } } -fn copy_material_depth_pass( +fn resolve_depth( + render_context: &mut RenderContext, + depth_stencil_attachment: RenderPassDepthStencilAttachment, + meshlet_view_resources: &MeshletViewResources, + meshlet_view_bind_groups: &MeshletViewBindGroups, + resolve_depth_pipeline: &RenderPipeline, + camera: &ExtractedCamera, +) { + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_depth"), + color_attachments: &[], + depth_stencil_attachment: Some(depth_stencil_attachment), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = &camera.viewport { + resolve_pass.set_camera_viewport(viewport); + } + resolve_pass.set_render_pipeline(resolve_depth_pipeline); + resolve_pass.set_push_constants( + ShaderStages::FRAGMENT, + 0, + &meshlet_view_resources.view_size.x.to_le_bytes(), + ); + resolve_pass.set_bind_group(0, &meshlet_view_bind_groups.resolve_depth, &[]); + resolve_pass.draw(0..3, 0..1); +} + +fn resolve_material_depth( render_context: &mut RenderContext, meshlet_view_resources: &MeshletViewResources, meshlet_view_bind_groups: &MeshletViewBindGroups, - copy_material_depth_pipeline: &RenderPipeline, + resolve_material_depth_pipeline: &RenderPipeline, camera: &ExtractedCamera, ) { - if let (Some(material_depth), Some(copy_material_depth_bind_group)) = ( + if let (Some(material_depth), Some(resolve_material_depth_bind_group)) = ( meshlet_view_resources.material_depth.as_ref(), - meshlet_view_bind_groups.copy_material_depth.as_ref(), + meshlet_view_bind_groups.resolve_material_depth.as_ref(), ) { - let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { - label: Some("copy_material_depth"), + let mut resolve_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("resolve_material_depth"), color_attachments: &[], depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { view: &material_depth.default_view, @@ -437,11 +545,15 @@ fn copy_material_depth_pass( occlusion_query_set: None, }); if let Some(viewport) = &camera.viewport { - copy_pass.set_camera_viewport(viewport); + resolve_pass.set_camera_viewport(viewport); } - - copy_pass.set_render_pipeline(copy_material_depth_pipeline); - copy_pass.set_bind_group(0, copy_material_depth_bind_group, &[]); - copy_pass.draw(0..3, 0..1); + resolve_pass.set_render_pipeline(resolve_material_depth_pipeline); + resolve_pass.set_push_constants( + ShaderStages::FRAGMENT, + 0, + &meshlet_view_resources.view_size.x.to_le_bytes(), + ); + resolve_pass.set_bind_group(0, resolve_material_depth_bind_group, &[]); + resolve_pass.draw(0..3, 0..1); } } diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl index baf72afcc4cab..bb35c1649734a 100644 --- a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl @@ -95,13 +95,14 @@ struct VertexOutput { /// Load the visibility buffer texture and resolve it into a VertexOutput. fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { - let packed_ids = textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy), 0).r; + let frag_coord_1d = u32(frag_coord.y) * u32(view.viewport.z) + u32(frag_coord.x); + let packed_ids = u32(meshlet_visibility_buffer[frag_coord_1d]); // TODO: Might be faster to load the correct u32 directly let cluster_id = packed_ids >> 6u; let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; let meshlet = meshlets[meshlet_id]; let triangle_id = extractBits(packed_ids, 0u, 6u); - let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u); + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z)); let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]); let vertex_1 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.x]); diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl new file mode 100644 index 0000000000000..02feaaeaeda4c --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl @@ -0,0 +1,196 @@ +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_cluster_meshlet_ids, + meshlets, + meshlet_vertex_ids, + meshlet_vertex_data, + meshlet_cluster_instance_ids, + meshlet_instance_uniforms, + meshlet_raster_clusters, + meshlet_software_raster_cluster_count, + meshlet_visibility_buffer, + view, + get_meshlet_index, + unpack_meshlet_vertex, + }, + mesh_functions::mesh_position_local_to_world, + view_transformations::ndc_to_uv, +} +#import bevy_render::maths::affine3_to_square + +/// Compute shader for rasterizing small clusters into a visibility buffer. + +// TODO: Subpixel precision and top-left rule + +var viewport_vertices: array; + +@compute +@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 vertex/triangle per thread, 1 cluster per workgroup +fn rasterize_cluster( + @builtin(workgroup_id) workgroup_id: vec3, + @builtin(local_invocation_index) local_invocation_index: u32, +#ifdef MESHLET_2D_DISPATCH + @builtin(num_workgroups) num_workgroups: vec3, +#endif +) { + var workgroup_id_1d = workgroup_id.x; + +#ifdef MESHLET_2D_DISPATCH + workgroup_id_1d += workgroup_id.y * num_workgroups.x; + if workgroup_id_1d >= meshlet_software_raster_cluster_count { return; } +#endif + + let cluster_id = meshlet_raster_clusters[workgroup_id_1d]; + let meshlet_id = meshlet_cluster_meshlet_ids[cluster_id]; + let meshlet = meshlets[meshlet_id]; + + // Load and project 1 vertex per thread + let vertex_id = local_invocation_index; + if vertex_id < meshlet.vertex_count { + let meshlet_vertex_id = meshlet_vertex_ids[meshlet.start_vertex_id + vertex_id]; + let vertex = unpack_meshlet_vertex(meshlet_vertex_data[meshlet_vertex_id]); + + // Project vertex to viewport space + let instance_id = meshlet_cluster_instance_ids[cluster_id]; + let instance_uniform = meshlet_instance_uniforms[instance_id]; + let world_from_local = affine3_to_square(instance_uniform.world_from_local); + let world_position = mesh_position_local_to_world(world_from_local, vec4(vertex.position, 1.0)); + var clip_position = view.clip_from_world * vec4(world_position.xyz, 1.0); + var ndc_position = clip_position.xyz / clip_position.w; +#ifdef DEPTH_CLAMP_ORTHO + ndc_position.z = 1.0 / clip_position.z; +#endif + let viewport_position_xy = ndc_to_uv(ndc_position.xy) * view.viewport.zw; + + // Write vertex to workgroup shared memory + viewport_vertices[vertex_id] = vec3(viewport_position_xy, ndc_position.z); + } + + workgroupBarrier(); + + // Load 1 triangle's worth of vertex data per thread + let triangle_id = local_invocation_index; + if triangle_id >= meshlet.triangle_count { return; } + let index_ids = meshlet.start_index_id + (triangle_id * 3u) + vec3(0u, 1u, 2u); + let vertex_ids = vec3(get_meshlet_index(index_ids[0]), get_meshlet_index(index_ids[1]), get_meshlet_index(index_ids[2])); + let vertex_0 = viewport_vertices[vertex_ids[2]]; + let vertex_1 = viewport_vertices[vertex_ids[1]]; + let vertex_2 = viewport_vertices[vertex_ids[0]]; + let packed_ids = (cluster_id << 6u) | triangle_id; + + // Compute triangle bounding box + let min_x = u32(min3(vertex_0.x, vertex_1.x, vertex_2.x)); + let min_y = u32(min3(vertex_0.y, vertex_1.y, vertex_2.y)); + var max_x = u32(ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x))); + var max_y = u32(ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y))); + max_x = min(max_x, u32(view.viewport.z) - 1u); + max_y = min(max_y, u32(view.viewport.w) - 1u); + if any(vec2(min_x, min_y) > vec2(max_x, max_y)) { return; } + + // Setup triangle gradients + let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y); + let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x); + let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); // TODO: Reuse earlier calculations and take advantage of summing to 1 + let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area; + let z_x = dot(vertices_z, w_x); + let z_y = dot(vertices_z, w_y); + + // Setup initial triangle equations + let starting_pixel = vec2(f32(min_x), f32(min_y)) + 0.5; + var w_row = vec3( + // TODO: Reuse earlier calculations and take advantage of summing to 1 + edge_function(vertex_1.xy, vertex_2.xy, starting_pixel), + edge_function(vertex_2.xy, vertex_0.xy, starting_pixel), + edge_function(vertex_0.xy, vertex_1.xy, starting_pixel), + ); + var z_row = dot(vertices_z, w_row); + let view_width = u32(view.viewport.z); + var frag_coord_1d_row = min_y * view_width; + + // Rasterize triangle + if subgroupAny(max_x - min_x > 4u) { + // Scanline setup + let edge_012 = -w_x; + let open_edge = edge_012 < vec3(0.0); + let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0)); + let max_x_diff = vec3(max_x - min_x); + for (var y = min_y; y <= max_y; y++) { + // Calculate start and end X interval for pixels in this row within the triangle + let cross_x = w_row * inverse_edge_012; + let min_x2 = select(vec3(0.0), cross_x, open_edge); + let max_x2 = select(cross_x, max_x_diff, open_edge); + var x0 = u32(ceil(max3(min_x2[0], min_x2[1], min_x2[2]))); + var x1 = u32(min3(max_x2[0], max_x2[1], max_x2[2])); + + var w = w_row + w_x * f32(x0); + var z = z_row + z_x * f32(x0); + x0 += min_x; + x1 += min_x; + + // Iterate scanline X interval + for (var x = x0; x <= x1; x++) { + // Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it) + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids); + } + + // Increment edge functions along the X-axis + w += w_x; + z += z_x; + } + + // Increment edge functions along the Y-axis + w_row += w_y; + z_row += z_y; + frag_coord_1d_row += view_width; + } + } else { + // Iterate over every pixel in the triangle's bounding box + for (var y = min_y; y <= max_y; y++) { + var w = w_row; + var z = z_row; + + for (var x = min_x; x <= max_x; x++) { + // Check if point at pixel is within triangle + if min3(w[0], w[1], w[2]) >= 0.0 { + write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids); + } + + // Increment edge functions along the X-axis + w += w_x; + z += z_x; + } + + // Increment edge functions along the Y-axis + w_row += w_y; + z_row += z_y; + frag_coord_1d_row += view_width; + } + } +} + +fn write_visibility_buffer_pixel(frag_coord_1d: u32, z: f32, packed_ids: u32) { +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + let depth = bitcast(z); + let visibility = (u64(depth) << 32u) | u64(packed_ids); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], visibility); +#else ifdef DEPTH_CLAMP_ORTHO + let depth = bitcast(1.0 / z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#else + let depth = bitcast(z); + atomicMax(&meshlet_visibility_buffer[frag_coord_1d], depth); +#endif +} + +fn edge_function(a: vec2, b: vec2, c: vec2) -> f32 { + return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x); +} + +fn min3(a: f32, b: f32, c: f32) -> f32 { + return min(a, min(b, c)); +} + +fn max3(a: f32, b: f32, c: f32) -> f32 { + return max(a, max(b, c)); +} diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index a8ca69a41f176..e545e3df3b21e 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -31,7 +31,7 @@ use bevy_utils::tracing::error; #[cfg(feature = "meshlet")] use crate::meshlet::{ - prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene, + prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, InstanceManager, MeshletMesh, }; use crate::*; @@ -186,7 +186,7 @@ where .in_set(RenderSet::QueueMeshes) .after(prepare_assets::>) .before(queue_material_meshlet_meshes::) - .run_if(resource_exists::), + .run_if(resource_exists::), ); } } diff --git a/crates/bevy_render/src/render_resource/mod.rs b/crates/bevy_render/src/render_resource/mod.rs index 4b914ef413556..42aed363395d1 100644 --- a/crates/bevy_render/src/render_resource/mod.rs +++ b/crates/bevy_render/src/render_resource/mod.rs @@ -32,7 +32,10 @@ pub use uniform_buffer::*; // TODO: decide where re-exports should go pub use wgpu::{ - util::{BufferInitDescriptor, DrawIndexedIndirectArgs, DrawIndirectArgs, TextureDataOrder}, + util::{ + BufferInitDescriptor, DispatchIndirectArgs, DrawIndexedIndirectArgs, DrawIndirectArgs, + TextureDataOrder, + }, AdapterInfo as WgpuAdapterInfo, AddressMode, AstcBlock, AstcChannel, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, BindingResource, BindingType, BlendComponent, BlendFactor, BlendOperation, BlendState, BufferAddress, BufferAsyncError, diff --git a/examples/3d/meshlet.rs b/examples/3d/meshlet.rs index 4721e0c9819df..20264b43a15f3 100644 --- a/examples/3d/meshlet.rs +++ b/examples/3d/meshlet.rs @@ -17,7 +17,7 @@ use camera_controller::{CameraController, CameraControllerPlugin}; use std::{f32::consts::PI, path::Path, process::ExitCode}; const ASSET_URL: &str = - "https://raw.githubusercontent.com/JMS55/bevy_meshlet_asset/b6c712cfc87c65de419f856845401aba336a7bcd/bunny.meshlet_mesh"; + "https://raw.githubusercontent.com/JMS55/bevy_meshlet_asset/10bb5471c7beedfe63ad1cf269599c92b0f10aa2/bunny.meshlet_mesh"; fn main() -> ExitCode { if !Path::new("./assets/models/bunny.meshlet_mesh").exists() { @@ -29,7 +29,9 @@ fn main() -> ExitCode { .insert_resource(DirectionalLightShadowMap { size: 4096 }) .add_plugins(( DefaultPlugins, - MeshletPlugin, + MeshletPlugin { + cluster_buffer_slots: 8192, + }, MaterialPlugin::::default(), CameraControllerPlugin, ))