diff --git a/build.bash b/build.bash
index 5888eb7f..948aa6ae 100755
--- a/build.bash
+++ b/build.bash
@@ -21,7 +21,7 @@ case $1 in
     ;;
     serve)
         shift
-        simple-http-server target/generated -c wasm,html,js -i
+        simple-http-server target/generated -c wasm,html,js -i --nocache
     ;;
     ci)
         cargo fmt
diff --git a/examples/Cargo.toml b/examples/Cargo.toml
index 3c4fbbce..584f3d74 100644
--- a/examples/Cargo.toml
+++ b/examples/Cargo.toml
@@ -74,6 +74,7 @@ wgpu-profiler = "0.16.0"
 console_log = "1"
 console_error_panic_hook = "0.1"
 js-sys = "0.3"
+gloo-console = "0.3"
 web-sys = "0.3.67"
 wasm-bindgen = "0.2.83"
 wasm-bindgen-futures = "0.4"
diff --git a/examples/src/cube_no_framework/mod.rs b/examples/src/cube_no_framework/mod.rs
index e764e898..d036ce98 100644
--- a/examples/src/cube_no_framework/mod.rs
+++ b/examples/src/cube_no_framework/mod.rs
@@ -109,13 +109,8 @@ pub fn main() {
     let base_rendergraph = rend3_routine::base::BaseRenderGraph::new(&renderer, &spp);
 
     let mut data_core = renderer.data_core.lock();
-    let pbr_routine = rend3_routine::pbr::PbrRoutine::new(
-        &renderer,
-        &mut data_core,
-        &spp,
-        &base_rendergraph.interfaces,
-        &base_rendergraph.gpu_culler.culling_buffer_map_handle,
-    );
+    let pbr_routine =
+        rend3_routine::pbr::PbrRoutine::new(&renderer, &mut data_core, &spp, &base_rendergraph.interfaces);
     drop(data_core);
     let tonemapping_routine = rend3_routine::tonemapping::TonemappingRoutine::new(
         &renderer,
diff --git a/examples/src/lib.rs b/examples/src/lib.rs
index cba9e27c..9f28a0d3 100644
--- a/examples/src/lib.rs
+++ b/examples/src/lib.rs
@@ -13,7 +13,9 @@ mod static_gltf;
 mod textured_quad;
 
 #[cfg(target_arch = "wasm32")]
-use log::info as println;
+use gloo_console::info as output;
+#[cfg(not(target_arch = "wasm32"))]
+use std::println as output;
 
 #[cfg(test)]
 mod tests;
@@ -35,10 +37,10 @@ const EXAMPLES: &[ExampleDesc] = &[
 ];
 
 fn print_examples() {
-    println!("Usage: cargo run <example_name>\n");
-    println!("Available examples:");
+    output!("Usage: cargo run <example_name>\n");
+    output!("Available examples:");
     for example in EXAMPLES {
-        println!("    {}", example.name);
+        output!("    {}", example.name);
     }
 }
 
@@ -50,7 +52,7 @@ pub fn main_with_name(example_name: Option<String>) {
     };
 
     let Some(example) = EXAMPLES.iter().find(|example| example.name == example_name) else {
-        println!("Unknown example: {}\n", example_name);
+        output!("Unknown example: {}\n", example_name);
         print_examples();
         return;
     };
diff --git a/examples/src/scene_viewer/bistro.png b/examples/src/scene_viewer/bistro.png
index 9ed44c9b..b8d952fe 100644
Binary files a/examples/src/scene_viewer/bistro.png and b/examples/src/scene_viewer/bistro.png differ
diff --git a/examples/src/tests.rs b/examples/src/tests.rs
index 14ab8675..207673d4 100644
--- a/examples/src/tests.rs
+++ b/examples/src/tests.rs
@@ -34,7 +34,6 @@ pub async fn test_app<A: App<T>, T: 'static>(mut config: TestConfiguration<A>) -
             &mut data_core,
             &spp,
             &base_rendergraph.interfaces,
-            &base_rendergraph.gpu_culler.culling_buffer_map_handle,
         )),
         skybox: Mutex::new(rend3_routine::skybox::SkyboxRoutine::new(&renderer, &spp, &base_rendergraph.interfaces)),
         tonemapping: Mutex::new(rend3_routine::tonemapping::TonemappingRoutine::new(
diff --git a/rend3-framework/src/lib.rs b/rend3-framework/src/lib.rs
index dcc12303..8567b7b0 100644
--- a/rend3-framework/src/lib.rs
+++ b/rend3-framework/src/lib.rs
@@ -231,7 +231,6 @@ pub async fn async_start<A: App<T> + 'static, T: 'static>(mut app: A, window_bui
             &mut data_core,
             &spp,
             &base_rendergraph.interfaces,
-            &base_rendergraph.gpu_culler.culling_buffer_map_handle,
         )),
         skybox: Mutex::new(rend3_routine::skybox::SkyboxRoutine::new(&renderer, &spp, &base_rendergraph.interfaces)),
         tonemapping: Mutex::new(rend3_routine::tonemapping::TonemappingRoutine::new(
diff --git a/rend3-routine/shaders/src/cull.wgsl b/rend3-routine/shaders/src/cull.wgsl
deleted file mode 100644
index 295090e7..00000000
--- a/rend3-routine/shaders/src/cull.wgsl
+++ /dev/null
@@ -1,390 +0,0 @@
-{{include "rend3-routine/structures.wgsl"}}
-{{include "rend3-routine/structures_object.wgsl"}}
-
-@group(0) @binding(0)
-var<storage> vertex_buffer: array<u32>;
-@group(0) @binding(1)
-var<storage> object_buffer: array<Object>;
-
-fn vertex_fetch(
-    object_invocation: u32,
-    object_info: ptr<function, ObjectCullingInformation>,
-) -> Triangle {
-    let index_0_index = object_invocation * 3u + 0u;
-    let index_1_index = object_invocation * 3u + 1u;
-    let index_2_index = object_invocation * 3u + 2u;
-
-    let object = object_buffer[(*object_info).object_id];
-
-    let index0 = vertex_buffer[object.first_index + index_0_index];
-    let index1 = vertex_buffer[object.first_index + index_1_index];
-    let index2 = vertex_buffer[object.first_index + index_2_index];
-
-    let position_start_offset = object.vertex_attribute_start_offsets[{{position_attribute_offset}}];
-    let model_position0 = extract_attribute_vec3_f32(position_start_offset, index0);
-    let model_position1 = extract_attribute_vec3_f32(position_start_offset, index1);
-    let model_position2 = extract_attribute_vec3_f32(position_start_offset, index2);
-
-    return Triangle(
-        TriangleVertices(model_position0, model_position1, model_position2),
-        TriangleIndices(index0, index1, index2)
-    );
-}
-
-@group(0) @binding(2)
-var<storage> culling_job: BatchData;
-
-struct DrawCallBuffer {
-    /// We always put the buffer that needs to be present in the next frame first.
-    predicted_object_offset: u32,
-    residual_object_offset: u32,
-    calls: array<IndirectCall>,
-}
-
-@group(0) @binding(3)
-var<storage, read_write> draw_calls: DrawCallBuffer;
-
-fn init_draw_calls(global_invocation: u32, region_id: u32) {
-    // Init the inheritable draw call
-    let predicted_object_draw_index = draw_calls.predicted_object_offset + region_id;
-    draw_calls.calls[predicted_object_draw_index].vertex_offset = 0;
-    draw_calls.calls[predicted_object_draw_index].instance_count = 1u;
-    draw_calls.calls[predicted_object_draw_index].base_instance = 0u;
-    draw_calls.calls[predicted_object_draw_index].base_index = global_invocation * 3u;
-
-    // Init the residual objects draw call
-    let residual_object_draw_index = draw_calls.residual_object_offset + region_id;
-    draw_calls.calls[residual_object_draw_index].vertex_offset = 0;
-    draw_calls.calls[residual_object_draw_index].instance_count = 1u;
-    draw_calls.calls[residual_object_draw_index].base_instance = 0u;
-    draw_calls.calls[residual_object_draw_index].base_index = global_invocation * 3u;
-}
-
-fn add_predicted_triangle_to_draw_call(region_id: u32) -> u32 {
-    let output_region_index = atomicAdd(&draw_calls.calls[draw_calls.predicted_object_offset + region_id].vertex_count, 3u);
-    let output_region_triangle = output_region_index / 3u;
-    return output_region_triangle;
-}
-
-fn add_residual_triangle_to_draw_call(region_id: u32) -> u32 {
-    let output_region_index = atomicAdd(&draw_calls.calls[draw_calls.residual_object_offset + region_id].vertex_count, 3u);
-    let output_region_triangle = output_region_index / 3u;
-    return output_region_triangle;
-}
-
-struct OutputIndexBuffer {
-    /// We always put the buffer that needs to be present in the next frame first.
-    predicted_object_offset: u32,
-    residual_object_offset: u32,
-    indices: array<u32>,
-}
-@group(0) @binding(4)
-var<storage, read_write> output_indices : OutputIndexBuffer;
-
-fn write_predicted_atomic_triangle(
-    batch_object_index: u32,
-    object_info: ptr<function, ObjectCullingInformation>,
-    indices: TriangleIndices,
-) {
-    let region_invocation = add_predicted_triangle_to_draw_call((*object_info).region_id);
-    let batch_invocation = region_invocation + (*object_info).region_base_invocation;
-    let global_invocation = batch_invocation + culling_job.batch_base_invocation;
-
-    let packed_indices = pack_batch_indices(batch_object_index, indices);
-
-    let predicted_object_indices_index = output_indices.predicted_object_offset + global_invocation * 3u;
-    output_indices.indices[predicted_object_indices_index] = packed_indices[0];
-    output_indices.indices[predicted_object_indices_index + 1u] = packed_indices[1];
-    output_indices.indices[predicted_object_indices_index + 2u] = packed_indices[2];
-}
-
-fn write_residual_atomic_triangle(
-    batch_object_index: u32,
-    object_info: ptr<function, ObjectCullingInformation>,
-    indices: TriangleIndices,
-) {
-    let region_invocation = add_residual_triangle_to_draw_call((*object_info).region_id);
-    let batch_invocation = region_invocation + (*object_info).region_base_invocation;
-    let global_invocation = batch_invocation + culling_job.batch_base_invocation;
-
-    let packed_indices = pack_batch_indices(batch_object_index, indices);
-
-    let residual_object_indices_index = output_indices.residual_object_offset + global_invocation * 3u;
-    output_indices.indices[residual_object_indices_index] = packed_indices[0];
-    output_indices.indices[residual_object_indices_index + 1u] = packed_indices[1];
-    output_indices.indices[residual_object_indices_index + 2u] = packed_indices[2];
-}
-
-fn write_residual_nonatomic_triangle(
-    invocation: u32,
-    batch_object_index: u32,
-    object_info: ptr<function, ObjectCullingInformation>,
-    indices: TriangleIndices,
-) {
-    add_residual_triangle_to_draw_call((*object_info).region_id);
-
-    let packed_indices = pack_batch_indices(batch_object_index, indices);
-
-    let residual_object_indices_index = output_indices.residual_object_offset + invocation * 3u;
-    output_indices.indices[residual_object_indices_index] = packed_indices[0];
-    output_indices.indices[residual_object_indices_index + 1u] = packed_indices[1];
-    output_indices.indices[residual_object_indices_index + 2u] = packed_indices[2];
-}
-
-fn write_invalid_residual_nonatomic_triangle(invocation: u32, object_info: ptr<function, ObjectCullingInformation>) {
-    add_residual_triangle_to_draw_call((*object_info).region_id);
-
-    let residual_object_indices_index = output_indices.residual_object_offset + invocation * 3u;
-    output_indices.indices[residual_object_indices_index] = INVALID_VERTEX;
-    output_indices.indices[residual_object_indices_index + 1u] = INVALID_VERTEX;
-    output_indices.indices[residual_object_indices_index + 2u] = INVALID_VERTEX;
-}
-
-struct CullingResults {
-    /// We always put the buffer that needs to be present in the next frame first.
-    output_offset: u32,
-    input_offset: u32,
-    bits: array<u32>,
-}
-@group(0) @binding(5)
-var<storage, read_write> culling_results: CullingResults;
-
-fn get_previous_culling_result(object_info: ptr<function, ObjectCullingInformation>, object_invocation: u32) -> bool {
-    if (*object_info).previous_global_invocation == 0xFFFFFFFFu {
-        return false;
-    }
-
-    let previous_global_invocation = object_invocation + (*object_info).previous_global_invocation;
-    let bitmask = culling_results.bits[culling_results.input_offset + (previous_global_invocation / 32u)];
-    return ((bitmask >> (previous_global_invocation % 32u)) & 0x1u) == 0x1u;
-}
-
-@group(0) @binding(6)
-var<storage> per_camera_uniform: PerCameraUniform;
-
-fn is_shadow_pass() -> bool {
-    return per_camera_uniform.shadow_index != 0xFFFFFFFFu;
-}
-
-@group(0) @binding(7)
-var hirearchical_z_buffer: texture_depth_2d;
-@group(0) @binding(8)
-var nearest_sampler: sampler;
-
-{{include "rend3/vertex_attributes.wgsl"}}
-
-struct ObjectSearchResult {
-    range: ObjectCullingInformation,
-    index_within_region: u32,
-}
-
-fn find_object_info(wid: u32) -> ObjectSearchResult {
-    let target_invocation = wid * 256u;
-    // pulled directly from https://doc.rust-lang.org/src/core/slice/mod.rs.html#2412-2438
-
-    var size = culling_job.total_objects;
-    var left = 0u;
-    var right = size;
-    var object_info: ObjectCullingInformation;
-    while left < right {
-        let mid = left + size / 2u;
-
-        let probe = culling_job.object_culling_information[mid];
-
-        if probe.invocation_end <= target_invocation {
-            left = mid + 1u;
-        } else if probe.invocation_start > target_invocation {
-            right = mid;
-        } else {
-            return ObjectSearchResult(probe, mid);
-        }
-
-        size = right - left;
-    }
-    
-    // This is unreachable, but required for the compiler to be happy
-    return ObjectSearchResult(object_info, 0xFFFFFFFFu);
-}
-
-// 256 workgroup size / 32 bits
-var<workgroup> workgroup_culling_results: array<atomic<u32>, 8>;
-
-fn clear_culling_results(lid: u32) {
-    if lid == 0u {
-        atomicStore(&workgroup_culling_results[0], 0u);
-        atomicStore(&workgroup_culling_results[1], 0u);
-        atomicStore(&workgroup_culling_results[2], 0u);
-        atomicStore(&workgroup_culling_results[3], 0u);
-        atomicStore(&workgroup_culling_results[4], 0u);
-        atomicStore(&workgroup_culling_results[5], 0u);
-        atomicStore(&workgroup_culling_results[6], 0u);
-        atomicStore(&workgroup_culling_results[7], 0u);
-    }
-}
-
-fn compute_culling_results(lid: u32, passed_culling: bool) {
-    atomicOr(&workgroup_culling_results[lid / 32u], u32(passed_culling) << (lid % 32u));
-}
-
-fn save_culling_results(global_invocation: u32, lid: u32) {
-    if lid == 0u {
-        let culling_results_index = culling_results.output_offset + (global_invocation / 32u);
-        culling_results.bits[culling_results_index + 0u] = atomicLoad(&workgroup_culling_results[0]);
-        culling_results.bits[culling_results_index + 1u] = atomicLoad(&workgroup_culling_results[1]);
-        culling_results.bits[culling_results_index + 2u] = atomicLoad(&workgroup_culling_results[2]);
-        culling_results.bits[culling_results_index + 3u] = atomicLoad(&workgroup_culling_results[3]);
-        culling_results.bits[culling_results_index + 4u] = atomicLoad(&workgroup_culling_results[4]);
-        culling_results.bits[culling_results_index + 5u] = atomicLoad(&workgroup_culling_results[5]);
-        culling_results.bits[culling_results_index + 6u] = atomicLoad(&workgroup_culling_results[6]);
-        culling_results.bits[culling_results_index + 7u] = atomicLoad(&workgroup_culling_results[7]);
-    }
-}
-
-fn textureSampleMin(texture: texture_depth_2d, uv: vec2<f32>, mipmap: f32) -> f32 {
-    let int_mipmap = i32(mipmap);
-    let mip_resolution = vec2<f32>(textureDimensions(texture, int_mipmap).xy);
-
-    let pixel_coords = uv * mip_resolution - 0.5;
-
-    let low = vec2<u32>(max(floor(pixel_coords), vec2<f32>(0.0)));
-    let high = vec2<u32>(min(ceil(pixel_coords), mip_resolution - 1.0));
-
-    let top_left = vec2<u32>(low.x, low.y);
-    let top_right = vec2<u32>(high.x, low.y);
-    let bottom_left = vec2<u32>(low.x, high.y);
-    let bottom_right = vec2<u32>(high.x, high.y);
-
-    var minval = textureLoad(texture, top_left, int_mipmap);
-    minval = min(minval, textureLoad(texture, top_right, int_mipmap));
-    minval = min(minval, textureLoad(texture, bottom_left, int_mipmap));
-    minval = min(minval, textureLoad(texture, bottom_right, int_mipmap));
-    return minval;
-}
-
-fn execute_culling(
-    model_view_proj: mat4x4<f32>,
-    vertices: TriangleVertices,
-) -> bool {
-    let position0 = model_view_proj * vec4<f32>(vertices[0], 1.0);
-    let position1 = model_view_proj * vec4<f32>(vertices[1], 1.0);
-    let position2 = model_view_proj * vec4<f32>(vertices[2], 1.0);
-
-    let det = determinant(mat3x3<f32>(position0.xyw, position1.xyw, position2.xyw));
-
-    if (per_camera_uniform.flags & PCU_FLAGS_AREA_VISIBLE_MASK) == PCU_FLAGS_POSITIVE_AREA_VISIBLE && det <= 0.0 {
-        return false;
-    }
-    if (per_camera_uniform.flags & PCU_FLAGS_AREA_VISIBLE_MASK) == PCU_FLAGS_NEGATIVE_AREA_VISIBLE && det >= 0.0 {
-        return false;
-    }
-
-    let ndc0 = position0.xyz / position0.w;
-    let ndc1 = position1.xyz / position1.w;
-    let ndc2 = position2.xyz / position2.w;
-
-    let min_ndc_xy = min(ndc0.xy, min(ndc1.xy, ndc2.xy));
-    let max_ndc_xy = max(ndc0.xy, max(ndc1.xy, ndc2.xy));
-
-    let half_res = per_camera_uniform.resolution / 2.0;
-    let min_screen_xy = (min_ndc_xy + 1.0) * half_res;
-    let max_screen_xy = (max_ndc_xy + 1.0) * half_res;
-
-    if (per_camera_uniform.flags & PCU_FLAGS_MULTISAMPLE_MASK) == PCU_FLAGS_MULTISAMPLE_DISABLED {
-        let misses_pixel_center = any(round(min_screen_xy) == round(max_screen_xy));
-
-        if misses_pixel_center {
-            return false;
-        }
-    }
-
-    // We skip hi-z calculation if we're doing a shadow pass
-    if per_camera_uniform.shadow_index != 0xFFFFFFFFu {
-        return true;
-    }
-
-    var min_tex_coords = (min_ndc_xy + 1.0) / 2.0;
-    var max_tex_coords = (max_ndc_xy + 1.0) / 2.0;
-    min_tex_coords.y = 1.0 - min_tex_coords.y;
-    max_tex_coords.y = 1.0 - max_tex_coords.y;
-
-    let uv = (max_tex_coords + min_tex_coords) / 2.0;
-    let edges = max_screen_xy - min_screen_xy;
-
-    let longest_edge = max(edges.x, edges.y);
-    let mip = ceil(log2(max(longest_edge, 1.0)));
-
-    let depth = max(max(ndc0.z, ndc1.z), ndc2.z);
-    let occlusion_depth = textureSampleMin(hirearchical_z_buffer, uv, mip);
-
-    if depth < occlusion_depth {
-        return false;
-    }
-
-    return true;
-}
-
-@compute @workgroup_size(256)
-fn cs_main(
-    @builtin(workgroup_id) wid: vec3<u32>,
-    @builtin(global_invocation_id) gid: vec3<u32>,
-    @builtin(local_invocation_id) lid: vec3<u32>,
-) {
-    clear_culling_results(lid.x);
-
-    let object_search_results = find_object_info(wid.x);
-    var object_info = object_search_results.range;
-    let batch_object_index = object_search_results.index_within_region;
-    let global_invocation = culling_job.batch_base_invocation + gid.x;
-
-    // We need the workgroupBarrier to be in uniform control flow, so we can't return early here.
-    //
-    // If this is true, continue working on the other side of the barrier.
-    var write_culling_output = true;
-    if gid.x >= object_info.invocation_end {
-        if object_info.atomic_capable == 0u {
-            write_invalid_residual_nonatomic_triangle(global_invocation, &object_info);
-        }
-        write_culling_output = false;
-    } else {
-        let object_invocation = gid.x - object_info.invocation_start;
-
-        // If the first invocation in the region, set the region's draw call
-        if object_info.local_region_id == 0u && object_invocation == 0u {
-            init_draw_calls(global_invocation, object_info.region_id);
-        }
-
-        let triangle = vertex_fetch(object_invocation, &object_info);
-
-        let model_view_proj = per_camera_uniform.objects[object_info.object_id].model_view_proj;
-
-        let passes_culling = execute_culling(model_view_proj, triangle.vertices);
-
-        if object_info.atomic_capable == 1u {
-            if passes_culling {
-                write_predicted_atomic_triangle(batch_object_index, &object_info, triangle.indices);
-
-                if !is_shadow_pass() {
-                    let previously_passed_culling = get_previous_culling_result(&object_info, object_invocation);
-
-                    if !previously_passed_culling {
-                        write_residual_atomic_triangle(batch_object_index, &object_info, triangle.indices);
-                    }
-                }
-            }
-        } else {
-            if passes_culling {
-                write_residual_nonatomic_triangle(global_invocation, batch_object_index, &object_info, triangle.indices);
-            } else {
-                write_invalid_residual_nonatomic_triangle(global_invocation, &object_info);
-            }
-        }
-
-        compute_culling_results(lid.x, passes_culling);
-    }
-
-    workgroupBarrier();
-
-    if write_culling_output {
-        save_culling_results(global_invocation, lid.x);
-    }
-}
diff --git a/rend3-routine/shaders/src/depth.wgsl b/rend3-routine/shaders/src/depth.wgsl
index d7dab49f..d099cf5f 100644
--- a/rend3-routine/shaders/src/depth.wgsl
+++ b/rend3-routine/shaders/src/depth.wgsl
@@ -10,21 +10,19 @@ var<uniform> uniforms: UniformData;
 @group(1) @binding(0)
 var<storage> object_buffer: array<Object>;
 @group(1) @binding(1)
-var<storage> batch_data: BatchData;
-@group(1) @binding(2)
 var<storage> vertex_buffer: array<u32>;
-@group(1) @binding(3)
+@group(1) @binding(2)
 var<storage> per_camera_uniform: PerCameraUniform;
 
 {{#if (eq profile "GpuDriven")}}
-@group(1) @binding(4)
+@group(1) @binding(3)
 var<storage> materials: array<GpuMaterialData>;
 @group(2) @binding(0)
 var textures: binding_array<texture_2d<f32>>;
 {{/if}}
 
 {{#if (eq profile "CpuDriven")}}
-@group(1) @binding(4)
+@group(1) @binding(3)
 var<storage> materials: array<CpuMaterialData>;
 @group(2) @binding(0)
 var albedo_tex: texture_2d<f32>;
@@ -34,7 +32,6 @@ var albedo_tex: texture_2d<f32>;
     vertex_fetch
     
     object_buffer
-    batch_data
 
     position
     texture_coords_0
@@ -49,7 +46,7 @@ struct VertexOutput {
 }
 
 @vertex
-fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
+fn vs_main(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) vertex_index: u32) -> VertexOutput {
     // If the vertex index is our sentinel invalid value, return a degenerate triangle.
     //
     // This is used by the culling shader to discard triangles when the ordering of the
@@ -59,7 +56,7 @@ fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
         vs_out.position = vec4<f32>(0.0);
         return vs_out;
     }
-    let indices = unpack_vertex_index(vertex_index);
+    let indices = Indices(instance_index, vertex_index);
     
     let data = object_buffer[indices.object];
     // If the object is disabled, return a degenerate triangle.
@@ -73,7 +70,7 @@ fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
 
     let vs_in = get_vertices(indices);
 
-    let model_view_proj = per_camera_uniform.objects[indices.object].model_view_proj;
+    let model_view_proj = per_camera_uniform.view_proj * object_buffer[indices.object].transform;
 
     let position_vec4 = vec4<f32>(vs_in.position, 1.0);
 
diff --git a/rend3-routine/shaders/src/hi_z.wgsl b/rend3-routine/shaders/src/hi_z.wgsl
deleted file mode 100644
index ecd1e176..00000000
--- a/rend3-routine/shaders/src/hi_z.wgsl
+++ /dev/null
@@ -1,33 +0,0 @@
-@group(0) @binding(0)
-var source: texture_depth_2d;
-
-struct VertexOutput {
-    @builtin(position) position: vec4<f32>,
-    @location(0) @interpolate(flat) resolution: vec2<u32>,
-}
-
-@vertex
-fn vs_main(@builtin(vertex_index) id: u32) -> VertexOutput {
-    let resolution = vec2<u32>(textureDimensions(source));
-    var output: VertexOutput;
-    output.position = vec4<f32>(f32(id / 2u) * 4.0 - 1.0, f32(id % 2u) * 4.0 - 1.0, 0.0, 1.0);
-    output.resolution = resolution;
-    return output;
-}
-
-@fragment
-fn fs_main(vout: VertexOutput) -> @builtin(frag_depth) f32 {
-    let this_tex_coord = vec2<u32>(vout.position.xy);
-    let previous_base_tex_coord = 2u * this_tex_coord;
-
-    let count_odd = vout.resolution & vec2<u32>(1u);
-
-    var nearest = 1.0;
-    for (var x = 0u; x < 2u + count_odd.x; x += 1u) {
-        for (var y = 0u; y < 2u + count_odd.y; y += 1u) {
-            nearest = min(nearest, textureLoad(source, previous_base_tex_coord + vec2<u32>(x, y), 0));
-        }
-    }
-
-    return nearest;
-}
\ No newline at end of file
diff --git a/rend3-routine/shaders/src/opaque.wgsl b/rend3-routine/shaders/src/opaque.wgsl
index e398d0fc..54a13ab4 100644
--- a/rend3-routine/shaders/src/opaque.wgsl
+++ b/rend3-routine/shaders/src/opaque.wgsl
@@ -24,21 +24,19 @@ var shadows: texture_depth_2d;
 @group(1) @binding(0)
 var<storage> object_buffer: array<Object>;
 @group(1) @binding(1)
-var<storage> batch_data: BatchData;
-@group(1) @binding(2)
 var<storage> vertex_buffer: array<u32>;
-@group(1) @binding(3)
+@group(1) @binding(2)
 var<storage> per_camera_uniform: PerCameraUniform;
 
 {{#if (eq profile "GpuDriven")}}
-@group(1) @binding(4)
+@group(1) @binding(3)
 var<storage> materials: array<GpuMaterialData>;
 @group(2) @binding(0)
 var textures: binding_array<texture_2d<f32>>;
 {{/if}}
 
 {{#if (eq profile "CpuDriven")}}
-@group(1) @binding(4)
+@group(1) @binding(3)
 var<storage> materials: array<CpuMaterialData>;
 @group(2) @binding(0)
 var albedo_tex: texture_2d<f32>;
@@ -66,7 +64,6 @@ var ambient_occlusion_tex: texture_2d<f32>;
     vertex_fetch
     
     object_buffer
-    batch_data
 
     position
     normal
@@ -89,7 +86,7 @@ struct VertexOutput {
 
 
 @vertex
-fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
+fn vs_main(@builtin(instance_index) instance_index: u32, @builtin(vertex_index) vertex_index: u32) -> VertexOutput {
     // If the vertex index is our sentinel invalid value, return a degenerate triangle.
     //
     // This is used by the culling shader to discard triangles when the ordering of the
@@ -99,7 +96,7 @@ fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
         vs_out.position = vec4<f32>(0.0);
         return vs_out;
     }
-    let indices = unpack_vertex_index(vertex_index);
+    let indices = Indices(instance_index, vertex_index);
     
     let data = object_buffer[indices.object];
     // If the object is disabled, return a degenerate triangle.
@@ -113,8 +110,8 @@ fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
 
     let vs_in = get_vertices(indices);
 
-    let model_view = per_camera_uniform.objects[indices.object].model_view;
-    let model_view_proj = per_camera_uniform.objects[indices.object].model_view_proj;
+    let model_view = per_camera_uniform.view * object_buffer[indices.object].transform;
+    let model_view_proj = per_camera_uniform.view_proj * object_buffer[indices.object].transform;
 
     let position_vec4 = vec4<f32>(vs_in.position, 1.0);
     let mv_mat3 = mat3x3<f32>(model_view[0].xyz, model_view[1].xyz, model_view[2].xyz);
diff --git a/rend3-routine/shaders/src/resolve_depth_min.wgsl b/rend3-routine/shaders/src/resolve_depth_min.wgsl
deleted file mode 100644
index 677ef722..00000000
--- a/rend3-routine/shaders/src/resolve_depth_min.wgsl
+++ /dev/null
@@ -1,27 +0,0 @@
-@group(0) @binding(0)
-var source: texture_depth_multisampled_2d;
-
-const SAMPLES: i32 = {{SAMPLES}};
-
-struct VertexOutput {
-    @builtin(position) position: vec4<f32>,
-}
-
-@vertex
-fn vs_main(@builtin(vertex_index) id: u32) -> VertexOutput {
-    let resolution = vec2<u32>(textureDimensions(source));
-    var output: VertexOutput;
-    output.position = vec4<f32>(f32(id / 2u) * 4.0 - 1.0, f32(id % 2u) * 4.0 - 1.0, 0.0, 1.0);
-    return output;
-}
-
-@fragment
-fn fs_main(vout: VertexOutput) -> @builtin(frag_depth) f32 {
-    var nearest: f32 = 1.0;
-
-    for (var sample = 0; sample < SAMPLES; sample += 1) {
-        nearest = min(nearest, textureLoad(source, vec2u(vout.position.xy), sample));
-    }
-
-    return nearest;
-}
diff --git a/rend3-routine/shaders/src/structures.wgsl b/rend3-routine/shaders/src/structures.wgsl
index f10b05a2..7ca845c6 100644
--- a/rend3-routine/shaders/src/structures.wgsl
+++ b/rend3-routine/shaders/src/structures.wgsl
@@ -37,40 +37,15 @@ struct UniformData {
     resolution: vec2<u32>,
 }
 
-struct PerCameraUniformObjectData {
-    // TODO: use less space
-    model_view: mat4x4<f32>,
-    // TODO: use less space
-    model_view_proj: mat4x4<f32>,
-}
-
 struct PerCameraUniform {
     // TODO: use less space
     view: mat4x4<f32>,
     // TODO: use less space
     view_proj: mat4x4<f32>,
-    // The index of which shadow caster we are rendering for.
-    //
-    // This will be u32::MAX if we're rendering for a camera, not a shadow map.
-    shadow_index: u32,
     frustum: Frustum,
-    resolution: vec2<f32>,
-    // Uses PCU_FLAGS_* constants
-    flags: u32,
     object_count: u32,
-    objects: array<PerCameraUniformObjectData>,
 }
 
-// Area visible
-const PCU_FLAGS_AREA_VISIBLE_MASK: u32 = 0x1u;
-const PCU_FLAGS_NEGATIVE_AREA_VISIBLE: u32 = 0x0u;
-const PCU_FLAGS_POSITIVE_AREA_VISIBLE: u32 = 0x1u;
-
-// Multisampled
-const PCU_FLAGS_MULTISAMPLE_MASK: u32 = 0x2u;
-const PCU_FLAGS_MULTISAMPLE_DISABLED: u32 = 0x0u;
-const PCU_FLAGS_MULTISAMPLE_ENABLED: u32 = 0x2u;
-
 struct DirectionalLight {
     /// View/Projection of directional light. Shadow rendering uses viewports
     /// so this always outputs [-1, 1] no matter where in the atlast the shadow is.
diff --git a/rend3-routine/shaders/src/structures_object.wgsl b/rend3-routine/shaders/src/structures_object.wgsl
index 2bb5b32c..4d6db525 100644
--- a/rend3-routine/shaders/src/structures_object.wgsl
+++ b/rend3-routine/shaders/src/structures_object.wgsl
@@ -10,21 +10,3 @@ struct Object {
     // 1 if enabled, 0 if disabled
     enabled: u32,
 }
-
-struct ObjectCullingInformation {
-    invocation_start: u32,
-    invocation_end: u32,
-    object_id: u32,
-    region_id: u32,
-    region_base_invocation: u32,
-    local_region_id: u32,
-    previous_global_invocation: u32,
-    atomic_capable: u32,
-}
-
-struct BatchData {
-    total_objects: u32,
-    total_invocations: u32,
-    batch_base_invocation: u32,
-    object_culling_information: array<ObjectCullingInformation, 256>,
-}
diff --git a/rend3-routine/shaders/src/uniform_prep.wgsl b/rend3-routine/shaders/src/uniform_prep.wgsl
deleted file mode 100644
index 4812e94f..00000000
--- a/rend3-routine/shaders/src/uniform_prep.wgsl
+++ /dev/null
@@ -1,27 +0,0 @@
-{{include "rend3-routine/structures.wgsl"}}
-{{include "rend3-routine/structures_object.wgsl"}}
-
-@group(0) @binding(0)
-var<storage> object_buffer: array<Object>;
-@group(0) @binding(1)
-var<storage, read_write> per_camera_uniform: PerCameraUniform;
-
-@compute @workgroup_size(256)
-fn cs_main(
-    @builtin(global_invocation_id) gid: vec3<u32>,
-) {
-    let idx = gid.x;
-
-    if idx >= per_camera_uniform.object_count {
-        return;
-    }
-    if object_buffer[idx].enabled == 0u {
-        return;
-    }
-
-    let model_view = per_camera_uniform.view * object_buffer[idx].transform;
-    let model_view_proj = per_camera_uniform.view_proj * object_buffer[idx].transform;
-
-    per_camera_uniform.objects[idx].model_view = model_view;
-    per_camera_uniform.objects[idx].model_view_proj = model_view_proj;
-}
diff --git a/rend3-routine/src/base.rs b/rend3-routine/src/base.rs
index 304684f5..1b17861f 100644
--- a/rend3-routine/src/base.rs
+++ b/rend3-routine/src/base.rs
@@ -13,11 +13,10 @@
 //! to, or muck with any of the data in there, you are free to, and the
 //! following routines will behave as you configure.
 
-use std::{iter::zip, sync::Arc};
+use std::sync::Arc;
 
 use glam::{UVec2, Vec4};
 use rend3::{
-    format_sso,
     graph::{
         self, DataHandle, InstructionEvaluationOutput, RenderGraph, RenderPassTargets, RenderTargetDescriptor,
         RenderTargetHandle, ViewportRect,
@@ -25,14 +24,13 @@ use rend3::{
     types::{SampleCount, TextureFormat, TextureUsages},
     Renderer, ShaderPreProcessor, INTERNAL_SHADOW_DEPTH_FORMAT,
 };
-use wgpu::{BindGroup, Buffer};
+use wgpu::BindGroup;
 
 use crate::{
     clear,
     common::{self, CameraSpecifier},
-    culling,
     forward::{self, ForwardRoutineArgs},
-    pbr, skinning, uniforms,
+    skinning, uniforms,
 };
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -103,7 +101,6 @@ pub struct BaseRenderGraphSettings {
 pub struct BaseRenderGraph {
     pub interfaces: common::WholeFrameInterfaces,
     pub samplers: common::Samplers,
-    pub gpu_culler: culling::GpuCuller,
     pub gpu_skinner: skinning::GpuSkinner,
 }
 
@@ -116,11 +113,10 @@ impl BaseRenderGraph {
         let samplers = common::Samplers::new(&renderer.device);
 
         // TODO: Support more materials
-        let gpu_culler = culling::GpuCuller::new::<pbr::PbrMaterial>(renderer, spp);
 
         let gpu_skinner = skinning::GpuSkinner::new(&renderer.device, spp);
 
-        Self { interfaces, samplers, gpu_culler, gpu_skinner }
+        Self { interfaces, samplers, gpu_skinner }
     }
 
     /// Add this to the rendergraph. This is the function you should start
@@ -144,32 +140,11 @@ impl BaseRenderGraph {
         // Perform compute based skinning.
         state.skinning(self);
 
-        // Upload the uniforms for the objects in the shadow pass.
-        state.shadow_object_uniform_upload(self);
-        // Perform culling for the objects in the shadow pass.
-        state.pbr_shadow_culling(self);
-
         // Render all the shadows to the shadow map.
         state.pbr_shadow_rendering();
 
-        // Upload the uniforms for the objects in the forward pass.
-        state.object_uniform_upload(self);
-
         // Do the first pass, rendering the predicted triangles from last frame.
-        state.pbr_render_opaque_predicted_triangles();
-
-        // Create the hi-z buffer.
-        state.hi_z();
-
-        // Perform culling for the objects in the forward pass.
-        //
-        // The result of culling will be used to predict the visible triangles for
-        // the next frame. It will also render all the triangles that were visible
-        // but were not predicted last frame.
-        state.pbr_culling(self);
-
-        // Do the second pass, rendering the residual triangles.
-        state.pbr_render_opaque_residual_triangles();
+        state.pbr_render();
 
         // Render the skybox.
         state.skybox();
@@ -194,10 +169,6 @@ pub struct BaseRenderGraphIntermediateState<'a, 'node> {
     pub inputs: BaseRenderGraphInputs<'a, 'node>,
     pub settings: BaseRenderGraphSettings,
 
-    pub pre_cull: DataHandle<Buffer>,
-    pub shadow_cull: Vec<DataHandle<Arc<culling::DrawCallSet>>>,
-    pub cull: DataHandle<Arc<culling::DrawCallSet>>,
-
     pub shadow_uniform_bg: DataHandle<BindGroup>,
     pub forward_uniform_bg: DataHandle<BindGroup>,
 
@@ -214,9 +185,6 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
         inputs: BaseRenderGraphInputs<'a, 'node>,
         settings: BaseRenderGraphSettings,
     ) -> Self {
-        // We need to know how many shadows we need to render
-        let shadow_count = inputs.eval_output.shadows.len();
-
         // Create global bind group information
         let shadow_uniform_bg = graph.add_data::<BindGroup>();
         let forward_uniform_bg = graph.add_data::<BindGroup>();
@@ -265,19 +233,11 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
 
         let pre_skinning_buffers = graph.add_data::<skinning::PreSkinningBuffers>();
 
-        let pre_cull = graph.add_data();
-        let mut shadow_cull = Vec::with_capacity(shadow_count);
-        shadow_cull.resize_with(shadow_count, || graph.add_data());
-        let cull = graph.add_data();
         Self {
             graph,
             inputs,
             settings,
 
-            pre_cull,
-            shadow_cull,
-            cull,
-
             shadow_uniform_bg,
             forward_uniform_bg,
 
@@ -312,60 +272,13 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
         );
     }
 
-    pub fn shadow_object_uniform_upload(&mut self, base: &'node BaseRenderGraph) {
-        for (shadow_index, shadow) in self.inputs.eval_output.shadows.iter().enumerate() {
-            base.gpu_culler.add_object_uniform_upload_to_graph::<pbr::PbrMaterial>(
-                self.graph,
-                CameraSpecifier::Shadow(shadow_index as u32),
-                UVec2::splat(shadow.map.size),
-                SampleCount::One,
-                &format_sso!("Shadow Culling S{}", shadow_index),
-            );
-        }
-    }
-
-    /// Does all shadow culling for the PBR materials.
-    pub fn pbr_shadow_culling(&mut self, base: &'node BaseRenderGraph) {
-        for (shadow_index, &shadow_culled) in self.shadow_cull.iter().enumerate() {
-            base.gpu_culler.add_culling_to_graph::<pbr::PbrMaterial>(
-                self.graph,
-                shadow_culled,
-                self.shadow,
-                CameraSpecifier::Shadow(shadow_index as u32),
-                &format_sso!("Shadow Culling S{}", shadow_index),
-            );
-        }
-    }
-
     pub fn skinning(&mut self, base: &'node BaseRenderGraph) {
         skinning::add_skinning_to_graph(self.graph, &base.gpu_skinner);
     }
 
-    pub fn object_uniform_upload(&mut self, base: &'node BaseRenderGraph) {
-        base.gpu_culler.add_object_uniform_upload_to_graph::<pbr::PbrMaterial>(
-            self.graph,
-            CameraSpecifier::Viewport,
-            self.inputs.target.resolution,
-            self.inputs.target.samples,
-            "Uniform Bake",
-        );
-    }
-
-    /// Does all culling for the forward PBR materials.
-    pub fn pbr_culling(&mut self, base: &'node BaseRenderGraph) {
-        base.gpu_culler.add_culling_to_graph::<pbr::PbrMaterial>(
-            self.graph,
-            self.cull,
-            self.depth.single_sample_mipped,
-            CameraSpecifier::Viewport,
-            "Primary Culling",
-        );
-    }
-
     /// Render all shadows for the PBR materials.
     pub fn pbr_shadow_rendering(&mut self) {
-        let iter = zip(&self.shadow_cull, &self.inputs.eval_output.shadows);
-        for (shadow_index, (shadow_cull, desc)) in iter.enumerate() {
+        for (shadow_index, desc) in self.inputs.eval_output.shadows.iter().enumerate() {
             let target = self.shadow.set_viewport(ViewportRect::new(desc.map.offset, UVec2::splat(desc.map.size)));
             let renderpass = graph::RenderPassTargets {
                 targets: vec![],
@@ -387,7 +300,6 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
                         per_material_bgl: &self.inputs.routines.pbr.per_material,
                         extra_bgs: None,
                     },
-                    culling_source: forward::CullingSource::Residual(*shadow_cull),
                     samples: SampleCount::One,
                     renderpass: renderpass.clone(),
                 });
@@ -408,39 +320,18 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
     }
 
     /// Render the PBR materials.
-    pub fn pbr_render_opaque_predicted_triangles(&mut self) {
+    pub fn pbr_render(&mut self) {
         let routines = [&self.inputs.routines.pbr.opaque_routine, &self.inputs.routines.pbr.cutout_routine];
         for routine in routines {
             routine.add_forward_to_graph(ForwardRoutineArgs {
                 graph: self.graph,
-                label: "PBR Forward Pass 1",
+                label: "PBR Forward Pass",
                 camera: CameraSpecifier::Viewport,
                 binding_data: forward::ForwardRoutineBindingData {
                     whole_frame_uniform_bg: self.forward_uniform_bg,
                     per_material_bgl: &self.inputs.routines.pbr.per_material,
                     extra_bgs: None,
                 },
-                culling_source: forward::CullingSource::Predicted,
-                samples: self.inputs.target.samples,
-                renderpass: self.primary_renderpass.clone(),
-            });
-        }
-    }
-
-    /// Render the PBR materials.
-    pub fn pbr_render_opaque_residual_triangles(&mut self) {
-        let routines = [&self.inputs.routines.pbr.opaque_routine, &self.inputs.routines.pbr.cutout_routine];
-        for routine in routines {
-            routine.add_forward_to_graph(ForwardRoutineArgs {
-                graph: self.graph,
-                label: "PBR Forward Pass 2",
-                camera: CameraSpecifier::Viewport,
-                binding_data: forward::ForwardRoutineBindingData {
-                    whole_frame_uniform_bg: self.forward_uniform_bg,
-                    per_material_bgl: &self.inputs.routines.pbr.per_material,
-                    extra_bgs: None,
-                },
-                culling_source: forward::CullingSource::Residual(self.cull),
                 samples: self.inputs.target.samples,
                 renderpass: self.primary_renderpass.clone(),
             });
@@ -458,16 +349,11 @@ impl<'a, 'node> BaseRenderGraphIntermediateState<'a, 'node> {
                 per_material_bgl: &self.inputs.routines.pbr.per_material,
                 extra_bgs: None,
             },
-            culling_source: forward::CullingSource::Residual(self.cull),
             samples: self.inputs.target.samples,
             renderpass: self.primary_renderpass.clone(),
         });
     }
 
-    pub fn hi_z(&mut self) {
-        self.inputs.routines.pbr.hi_z.add_hi_z_to_graph(self.graph, self.depth, self.inputs.target.resolution);
-    }
-
     /// Tonemap onto the given render target.
     pub fn tonemapping(&mut self) {
         self.inputs.routines.tonemapping.add_to_graph(
diff --git a/rend3-routine/src/common/interfaces.rs b/rend3-routine/src/common/interfaces.rs
index 9361ce22..2efe84de 100644
--- a/rend3-routine/src/common/interfaces.rs
+++ b/rend3-routine/src/common/interfaces.rs
@@ -1,5 +1,6 @@
 use std::{marker::PhantomData, mem, num::NonZeroU64};
 
+use encase::ShaderType;
 use glam::{Mat4, Vec3};
 use rend3::{
     managers::{DirectionalLightManager, PointLightManager},
@@ -10,7 +11,10 @@ use wgpu::{
     BindGroupLayout, BindingType, BufferBindingType, Device, ShaderStages, TextureSampleType, TextureViewDimension,
 };
 
-use crate::{common::samplers::Samplers, uniforms::FrameUniforms};
+use crate::{
+    common::samplers::Samplers,
+    uniforms::{FrameUniforms, PerCameraUniform},
+};
 
 /// Interfaces which are used throughout the whole frame.
 ///
@@ -99,16 +103,6 @@ impl<M: Material> PerMaterialArchetypeInterface<M> {
                 },
                 None,
             )
-            // Batch data buffer
-            .append(
-                ShaderStages::VERTEX_FRAGMENT,
-                BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: true },
-                    has_dynamic_offset: true,
-                    min_binding_size: None,
-                },
-                None,
-            )
             // Vertex buffer
             .append(
                 ShaderStages::VERTEX_FRAGMENT,
@@ -125,11 +119,11 @@ impl<M: Material> PerMaterialArchetypeInterface<M> {
                 BindingType::Buffer {
                     ty: BufferBindingType::Storage { read_only: true },
                     has_dynamic_offset: false,
-                    min_binding_size: None,
+                    min_binding_size: Some(PerCameraUniform::min_size()),
                 },
                 None,
             )
-            // Mateiral data
+            // Material data
             .append(
                 ShaderStages::VERTEX_FRAGMENT,
                 BindingType::Buffer {
diff --git a/rend3-routine/src/culling/batching.rs b/rend3-routine/src/culling/batching.rs
deleted file mode 100644
index 9858dee5..00000000
--- a/rend3-routine/src/culling/batching.rs
+++ /dev/null
@@ -1,250 +0,0 @@
-use std::{cmp::Ordering, collections::HashMap};
-
-use encase::ShaderType;
-use ordered_float::OrderedFloat;
-use rend3::{
-    graph::NodeExecutionContext,
-    managers::{CameraState, TextureBindGroupIndex},
-    types::{GraphDataHandle, Material, RawObjectHandle, SortingOrder, SortingReason},
-    util::{math::round_up, typedefs::FastHashMap},
-};
-
-use crate::common::CameraSpecifier;
-
-use super::{BATCH_SIZE, WORKGROUP_SIZE};
-
-#[derive(Debug)]
-pub struct ShaderBatchDatas {
-    pub(super) regions: Vec<JobSubRegion>,
-    pub(super) jobs: Vec<ShaderBatchData>,
-}
-
-#[derive(Debug)]
-pub(super) struct JobSubRegion {
-    pub job_index: u32,
-    pub key: ShaderJobKey,
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub(super) struct ShaderJobKey {
-    pub material_key: u64,
-    pub bind_group_index: TextureBindGroupIndex,
-}
-
-#[derive(Debug, Clone, Copy, Eq)]
-pub(super) struct ShaderJobSortingKey {
-    pub job_key: ShaderJobKey,
-    pub distance: OrderedFloat<f32>,
-    pub sorting_reason: SortingReason,
-}
-
-impl PartialEq for ShaderJobSortingKey {
-    fn eq(&self, other: &Self) -> bool {
-        self.cmp(other).is_eq()
-    }
-}
-
-impl PartialOrd for ShaderJobSortingKey {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for ShaderJobSortingKey {
-    fn cmp(&self, other: &Self) -> Ordering {
-        // material key always first
-        match self.job_key.material_key.cmp(&other.job_key.material_key) {
-            Ordering::Equal => {}
-            ord => return ord,
-        }
-        match self.sorting_reason.cmp(&other.sorting_reason) {
-            Ordering::Equal => {}
-            ord => return ord,
-        }
-        // The above comparison means that both sides are equal
-        if self.sorting_reason == SortingReason::Requirement {
-            match self.distance.cmp(&other.distance) {
-                Ordering::Equal => {}
-                ord => return ord,
-            }
-            self.job_key.bind_group_index.cmp(&other.job_key.bind_group_index)
-        } else {
-            match self.job_key.bind_group_index.cmp(&other.job_key.bind_group_index) {
-                Ordering::Equal => {}
-                ord => return ord,
-            }
-            self.distance.cmp(&other.distance)
-        }
-    }
-}
-
-#[derive(Debug, ShaderType)]
-pub struct ShaderBatchData {
-    #[align(256)]
-    pub(super) total_objects: u32,
-    pub(super) total_invocations: u32,
-    pub(super) batch_base_invocation: u32,
-    pub(super) object_culling_information: [ShaderObjectCullingInformation; BATCH_SIZE],
-}
-
-#[derive(Debug, Copy, Clone, Default, ShaderType)]
-pub(super) struct ShaderObjectCullingInformation {
-    pub invocation_start: u32,
-    pub invocation_end: u32,
-    pub object_id: u32,
-    pub region_id: u32,
-    pub base_region_invocation: u32,
-    pub local_region_id: u32,
-    pub previous_global_invocation: u32,
-    pub atomic_capable: u32,
-}
-
-/// Map containing the previous invocation of each object.
-pub struct PerCameraPreviousInvocationsMap {
-    inner: FastHashMap<CameraSpecifier, FastHashMap<RawObjectHandle, u32>>,
-}
-impl PerCameraPreviousInvocationsMap {
-    pub fn new() -> Self {
-        Self { inner: HashMap::default() }
-    }
-
-    pub fn get_and_reset_camera(&mut self, camera: CameraSpecifier) -> FastHashMap<RawObjectHandle, u32> {
-        self.inner.remove(&camera).unwrap_or_default()
-    }
-
-    pub fn set_camera(&mut self, camera: CameraSpecifier, previous_invocations: FastHashMap<RawObjectHandle, u32>) {
-        self.inner.insert(camera, previous_invocations);
-    }
-}
-
-pub(super) fn batch_objects<M: Material>(
-    ctx: &mut NodeExecutionContext,
-    previous_invocation_map_handle: &GraphDataHandle<PerCameraPreviousInvocationsMap>,
-    camera: &CameraState,
-    camera_specifier: CameraSpecifier,
-) -> ShaderBatchDatas {
-    profiling::scope!("Batch Objects");
-
-    let mut per_camera_previous_invocation_map = ctx.data_core.graph_storage.get_mut(previous_invocation_map_handle);
-    let previous_invocation_map = per_camera_previous_invocation_map.get_and_reset_camera(camera_specifier);
-    let mut current_invocation_map = FastHashMap::default();
-
-    let mut jobs = ShaderBatchDatas { jobs: Vec::new(), regions: Vec::new() };
-
-    let objects = match ctx.data_core.object_manager.enumerated_objects::<M>() {
-        Some(o) => o,
-        None => return jobs,
-    };
-
-    let material_archetype = ctx.data_core.material_manager.archetype_view::<M>();
-
-    let mut sorted_objects = Vec::with_capacity(objects.len());
-    {
-        profiling::scope!("Sort Key Creation");
-        for (handle, object) in objects {
-            // Frustum culling
-            if !camera.world_frustum().contains_sphere(object.inner.bounding_sphere) {
-                continue;
-            }
-
-            let material = material_archetype.material(*object.material_handle);
-            let bind_group_index = material.bind_group_index.map_gpu(|_| TextureBindGroupIndex::DUMMY).into_common();
-
-            let material_key = material.inner.key();
-            let sorting = material.inner.sorting();
-
-            let mut distance_sq =
-                ctx.data_core.viewport_camera_state.location().distance_squared(object.location.into());
-            if sorting.order == SortingOrder::BackToFront {
-                distance_sq = -distance_sq;
-            }
-            sorted_objects.push((
-                ShaderJobSortingKey {
-                    job_key: ShaderJobKey { material_key, bind_group_index },
-                    distance: OrderedFloat(distance_sq),
-                    sorting_reason: sorting.reason,
-                },
-                handle,
-                object,
-            ))
-        }
-    }
-
-    {
-        profiling::scope!("Sorting");
-        sorted_objects.sort_unstable_by_key(|(k, _, _)| *k);
-    }
-
-    if !sorted_objects.is_empty() {
-        profiling::scope!("Batch Data Creation");
-        let mut current_region_idx = 0_u32;
-        let mut current_region_object_index = 0_u32;
-        let mut current_base_invocation = 0_u32;
-        let mut current_region_invocation = 0_u32;
-        let mut current_invocation = 0_u32;
-        let mut current_object_index = 0_u32;
-        let mut current_ranges = [ShaderObjectCullingInformation::default(); BATCH_SIZE];
-        let mut current_key = sorted_objects.first().unwrap().0.job_key;
-
-        let max_dispatch_count = ctx.renderer.limits.max_compute_workgroups_per_dimension;
-
-        for (ShaderJobSortingKey { job_key: key, sorting_reason, .. }, handle, object) in sorted_objects {
-            let invocation_count = object.inner.index_count / 3;
-
-            let key_difference = key != current_key;
-            let object_limit = current_object_index == BATCH_SIZE as u32;
-            let dispatch_limit = (current_invocation + invocation_count) >= max_dispatch_count * WORKGROUP_SIZE;
-
-            if key_difference || object_limit || dispatch_limit {
-                jobs.regions.push(JobSubRegion { job_index: jobs.jobs.len() as u32, key: current_key });
-                current_region_idx += 1;
-                current_key = key;
-                current_region_object_index = 0;
-                current_region_invocation = current_invocation;
-            }
-            if object_limit || dispatch_limit {
-                jobs.jobs.push(ShaderBatchData {
-                    object_culling_information: current_ranges,
-                    total_objects: current_object_index,
-                    total_invocations: current_invocation,
-                    batch_base_invocation: current_base_invocation,
-                });
-
-                current_base_invocation += current_invocation;
-                current_invocation = 0;
-                current_region_invocation = 0;
-                current_object_index = 0;
-            }
-
-            let range = ShaderObjectCullingInformation {
-                invocation_start: current_invocation,
-                invocation_end: current_invocation + invocation_count,
-                region_id: current_region_idx,
-                object_id: handle.idx as u32,
-                base_region_invocation: current_region_invocation,
-                local_region_id: current_region_object_index,
-                previous_global_invocation: previous_invocation_map.get(&handle).copied().unwrap_or(u32::MAX),
-                atomic_capable: matches!(sorting_reason, SortingReason::Optimization) as u32,
-            };
-
-            current_invocation_map.insert(handle, current_invocation + current_base_invocation);
-
-            current_ranges[current_object_index as usize] = range;
-            current_object_index += 1;
-            current_region_object_index += 1;
-            current_invocation += round_up(invocation_count, WORKGROUP_SIZE);
-        }
-
-        jobs.regions.push(JobSubRegion { job_index: jobs.jobs.len() as u32, key: current_key });
-        jobs.jobs.push(ShaderBatchData {
-            object_culling_information: current_ranges,
-            total_objects: current_object_index,
-            total_invocations: current_invocation,
-            batch_base_invocation: current_base_invocation,
-        });
-    }
-
-    per_camera_previous_invocation_map.set_camera(camera_specifier, current_invocation_map);
-
-    jobs
-}
diff --git a/rend3-routine/src/culling/culler.rs b/rend3-routine/src/culling/culler.rs
deleted file mode 100644
index 58fbce21..00000000
--- a/rend3-routine/src/culling/culler.rs
+++ /dev/null
@@ -1,714 +0,0 @@
-use std::{
-    any::{type_name, TypeId},
-    borrow::Cow,
-    collections::{hash_map::Entry, HashMap},
-    num::NonZeroU64,
-    ops::Range,
-    sync::Arc,
-};
-
-use encase::{ShaderSize, ShaderType, StorageBuffer};
-use glam::{Mat4, UVec2, Vec2};
-use rend3::{
-    format_sso,
-    graph::{DataHandle, DeclaredDependency, NodeExecutionContext, NodeResourceUsage, RenderGraph, RenderTargetHandle},
-    managers::{CameraState, ShaderObject, TextureBindGroupIndex},
-    types::{GraphDataHandle, Material, MaterialArray, SampleCount, VERTEX_ATTRIBUTE_POSITION},
-    util::{frustum::Frustum, math::IntegerExt, typedefs::FastHashMap},
-    Renderer, ShaderPreProcessor, ShaderVertexBufferConfig,
-};
-use wgpu::{
-    self, AddressMode, BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor,
-    BindGroupLayoutEntry, BindingResource, BindingType, Buffer, BufferBinding, BufferBindingType, BufferDescriptor,
-    BufferUsages, CommandEncoder, ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor, Device,
-    FilterMode, PipelineLayoutDescriptor, Queue, Sampler, SamplerBindingType, SamplerDescriptor,
-    ShaderModuleDescriptor, ShaderStages, TextureSampleType, TextureViewDimension,
-};
-
-use crate::{
-    common::CameraSpecifier,
-    culling::{
-        batching::{batch_objects, JobSubRegion, PerCameraPreviousInvocationsMap, ShaderBatchData, ShaderBatchDatas},
-        suballoc::InputOutputBuffer,
-        WORKGROUP_SIZE,
-    },
-};
-
-#[derive(Debug)]
-pub struct DrawCallSet {
-    pub culling_data_buffer: Buffer,
-    pub per_camera_uniform: Arc<Buffer>,
-    pub draw_calls: Vec<DrawCall>,
-    /// Range of draw calls in the draw call array corresponding to a given material key.
-    pub material_key_ranges: HashMap<u64, Range<usize>>,
-}
-
-#[derive(Debug, Clone)]
-pub struct DrawCall {
-    pub bind_group_index: TextureBindGroupIndex,
-    pub batch_index: u32,
-}
-
-#[derive(Default)]
-pub struct CullingBufferMap {
-    inner: FastHashMap<CameraSpecifier, CullingBuffers>,
-}
-impl CullingBufferMap {
-    pub fn get_buffers(&self, camera: CameraSpecifier) -> Option<&CullingBuffers> {
-        self.inner.get(&camera)
-    }
-
-    fn get_or_resize_buffers(
-        &mut self,
-        queue: &Queue,
-        device: &Device,
-        encoder: &mut CommandEncoder,
-        camera: CameraSpecifier,
-        sizes: CullingBufferSizes,
-    ) -> &mut CullingBuffers {
-        match self.inner.entry(camera) {
-            Entry::Occupied(b) => {
-                let b = b.into_mut();
-
-                b.update_sizes(queue, device, encoder, sizes);
-
-                b
-            }
-            Entry::Vacant(b) => b.insert(CullingBuffers::new(device, queue, sizes)),
-        }
-    }
-}
-
-#[derive(Debug)]
-struct CullingBufferSizes {
-    invocations: u64,
-    draw_calls: u64,
-}
-
-#[derive(Debug)]
-pub struct CullingBuffers {
-    pub index_buffer: InputOutputBuffer,
-    pub draw_call_buffer: InputOutputBuffer,
-    pub culling_results_buffer: InputOutputBuffer,
-}
-
-impl CullingBuffers {
-    fn new(device: &Device, queue: &Queue, sizes: CullingBufferSizes) -> Self {
-        Self {
-            // One element per triangle/invocation
-            index_buffer: InputOutputBuffer::new(device, queue, sizes.invocations * 3, "Index Buffer", 4, 4, false),
-            draw_call_buffer: InputOutputBuffer::new(device, queue, sizes.draw_calls, "Draw Call Buffer", 20, 4, true),
-            culling_results_buffer: InputOutputBuffer::new(
-                device,
-                queue,
-                // 32 bits in a u32
-                sizes.invocations.div_round_up(u32::BITS as _),
-                "Culling Results Buffer",
-                4,
-                4,
-                false,
-            ),
-        }
-    }
-
-    fn update_sizes(
-        &mut self,
-        queue: &Queue,
-        device: &Device,
-        encoder: &mut CommandEncoder,
-        sizes: CullingBufferSizes,
-    ) {
-        self.index_buffer.swap(queue, device, encoder, sizes.invocations * 3);
-        self.draw_call_buffer.swap(queue, device, encoder, sizes.draw_calls);
-        self.culling_results_buffer.swap(queue, device, encoder, sizes.invocations.div_round_up(32));
-    }
-}
-
-#[derive(Debug, Copy, Clone)]
-pub enum TriangleVisibility {
-    PositiveAreaVisible,
-    NegativeAreaVisible,
-}
-
-impl TriangleVisibility {
-    fn from_winding_and_face(winding: wgpu::FrontFace, culling: wgpu::Face) -> Self {
-        match (winding, culling) {
-            (wgpu::FrontFace::Ccw, wgpu::Face::Back) => TriangleVisibility::PositiveAreaVisible,
-            (wgpu::FrontFace::Ccw, wgpu::Face::Front) => TriangleVisibility::NegativeAreaVisible,
-            (wgpu::FrontFace::Cw, wgpu::Face::Back) => TriangleVisibility::NegativeAreaVisible,
-            (wgpu::FrontFace::Cw, wgpu::Face::Front) => TriangleVisibility::PositiveAreaVisible,
-        }
-    }
-
-    fn is_positive(self) -> bool {
-        match self {
-            TriangleVisibility::PositiveAreaVisible => true,
-            TriangleVisibility::NegativeAreaVisible => false,
-        }
-    }
-}
-
-bitflags::bitflags! {
-    struct PerCameraUniformFlags: u32 {
-        const POSTIIVE_AREA_VISIBLE = 1 << 0;
-        const MULTISAMPLED = 1 << 1;
-    }
-}
-
-#[derive(ShaderType)]
-struct PerCameraUniform {
-    // TODO: use less space
-    view: Mat4,
-    // TODO: use less space
-    view_proj: Mat4,
-    // The index of which shadow caster we are rendering for.
-    //
-    // This will be u32::MAX if we're rendering for a camera, not a shadow map.
-    shadow_index: u32,
-    frustum: Frustum,
-    resolution: Vec2,
-    // Created from PerCameraUniformFlags
-    flags: u32,
-    object_count: u32,
-    #[size(runtime)]
-    objects: Vec<PerCameraUniformObjectData>,
-}
-
-#[derive(ShaderType)]
-struct PerCameraUniformObjectData {
-    // TODO: use less space
-    model_view: Mat4,
-    // TODO: use less space
-    model_view_proj: Mat4,
-}
-
-pub struct GpuCuller {
-    prep_bgl: BindGroupLayout,
-    prep_pipeline: ComputePipeline,
-    culling_bgl: BindGroupLayout,
-    culling_pipeline: ComputePipeline,
-    sampler: Sampler,
-    winding: wgpu::FrontFace,
-    type_id: TypeId,
-    per_material_buffer_handle: GraphDataHandle<HashMap<CameraSpecifier, Arc<Buffer>>>,
-    pub culling_buffer_map_handle: GraphDataHandle<CullingBufferMap>,
-    previous_invocation_map_handle: GraphDataHandle<PerCameraPreviousInvocationsMap>,
-}
-
-impl GpuCuller {
-    pub fn new<M>(renderer: &Arc<Renderer>, spp: &ShaderPreProcessor) -> Self
-    where
-        M: Material,
-    {
-        let type_name = type_name::<M>();
-
-        let prep_source = spp
-            .render_shader(
-                "rend3-routine/uniform_prep.wgsl",
-                &(),
-                Some(&ShaderVertexBufferConfig::from_material::<M>()),
-            )
-            .unwrap();
-
-        let prep_sm = renderer.device.create_shader_module(ShaderModuleDescriptor {
-            label: Some(&format_sso!("UniformPrep {type_name} SM")),
-            source: wgpu::ShaderSource::Wgsl(Cow::Owned(prep_source)),
-        });
-
-        let prep_bgl = renderer.device.create_bind_group_layout(&BindGroupLayoutDescriptor {
-            label: Some(&format_sso!("UniformPrep {type_name} BGL")),
-            entries: &[
-                // Object
-                BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(ShaderObject::<M>::SHADER_SIZE),
-                    },
-                    count: None,
-                },
-                // Object
-                BindGroupLayoutEntry {
-                    binding: 1,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(PerCameraUniform::min_size()),
-                    },
-                    count: None,
-                },
-            ],
-        });
-
-        let prep_pll = renderer.device.create_pipeline_layout(&PipelineLayoutDescriptor {
-            label: Some(&format_sso!("UniformPrep {type_name} PLL")),
-            bind_group_layouts: &[&prep_bgl],
-            push_constant_ranges: &[],
-        });
-
-        let prep_pipeline = renderer.device.create_compute_pipeline(&ComputePipelineDescriptor {
-            label: Some(&format_sso!("UniformPrep {type_name} PLL")),
-            layout: Some(&prep_pll),
-            module: &prep_sm,
-            entry_point: "cs_main",
-        });
-
-        let position_offset = M::supported_attributes()
-            .into_iter()
-            .enumerate()
-            .find_map(|(idx, a)| (*a == *VERTEX_ATTRIBUTE_POSITION).then_some(idx))
-            .unwrap();
-
-        let culling_source = spp
-            .render_shader(
-                "rend3-routine/cull.wgsl",
-                &serde_json::json! {{
-                    "position_attribute_offset": position_offset,
-                }},
-                Some(&ShaderVertexBufferConfig::from_material::<M>()),
-            )
-            .unwrap();
-
-        let culling_sm = renderer.device.create_shader_module(ShaderModuleDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} SM")),
-            source: wgpu::ShaderSource::Wgsl(Cow::Owned(culling_source)),
-        });
-
-        let culling_bgl = renderer.device.create_bind_group_layout(&BindGroupLayoutDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} BGL")),
-            entries: &[
-                // Vertex Buffer
-                BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: false,
-                        min_binding_size: NonZeroU64::new(4),
-                    },
-                    count: None,
-                },
-                // Object Buffer
-                BindGroupLayoutEntry {
-                    binding: 1,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(ShaderObject::<M>::SHADER_SIZE),
-                    },
-                    count: None,
-                },
-                // Batch data
-                BindGroupLayoutEntry {
-                    binding: 2,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: true,
-                        min_binding_size: Some(ShaderBatchData::SHADER_SIZE),
-                    },
-                    count: None,
-                },
-                // Draw Calls
-                BindGroupLayoutEntry {
-                    binding: 3,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(20 + 8).unwrap()),
-                    },
-                    count: None,
-                },
-                // Index buffer
-                BindGroupLayoutEntry {
-                    binding: 4,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(4 + 8).unwrap()),
-                    },
-                    count: None,
-                },
-                // Culling Results
-                BindGroupLayoutEntry {
-                    binding: 5,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(NonZeroU64::new(4 + 8).unwrap()),
-                    },
-                    count: None,
-                },
-                // per camera uniforms
-                BindGroupLayoutEntry {
-                    binding: 6,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(PerCameraUniform::min_size()),
-                    },
-                    count: None,
-                },
-                // hirearchical z buffer
-                BindGroupLayoutEntry {
-                    binding: 7,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Texture {
-                        sample_type: TextureSampleType::Depth,
-                        view_dimension: TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                // hirearchical z buffer
-                BindGroupLayoutEntry {
-                    binding: 8,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Sampler(SamplerBindingType::NonFiltering),
-                    count: None,
-                },
-            ],
-        });
-
-        let culling_pll = renderer.device.create_pipeline_layout(&PipelineLayoutDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} PLL")),
-            bind_group_layouts: &[&culling_bgl],
-            push_constant_ranges: &[],
-        });
-
-        let culling_pipeline = renderer.device.create_compute_pipeline(&ComputePipelineDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} PLL")),
-            layout: Some(&culling_pll),
-            module: &culling_sm,
-            entry_point: "cs_main",
-        });
-
-        let sampler = renderer.device.create_sampler(&SamplerDescriptor {
-            label: Some("HiZ Sampler"),
-            address_mode_u: AddressMode::ClampToEdge,
-            address_mode_v: AddressMode::ClampToEdge,
-            address_mode_w: AddressMode::ClampToEdge,
-            mag_filter: FilterMode::Nearest,
-            min_filter: FilterMode::Nearest,
-            mipmap_filter: FilterMode::Nearest,
-            lod_min_clamp: 0.0,
-            lod_max_clamp: 100.0,
-            compare: None,
-            anisotropy_clamp: 1,
-            border_color: None,
-        });
-
-        let per_material_buffer_handle = renderer.add_graph_data(HashMap::default());
-        let culling_buffer_map_handle = renderer.add_graph_data(CullingBufferMap::default());
-        let previous_invocation_map_handle = renderer.add_graph_data(PerCameraPreviousInvocationsMap::new());
-
-        Self {
-            prep_bgl,
-            prep_pipeline,
-            culling_bgl,
-            culling_pipeline,
-            sampler,
-            winding: renderer.handedness.into(),
-            type_id: TypeId::of::<M>(),
-            per_material_buffer_handle,
-            culling_buffer_map_handle,
-            previous_invocation_map_handle,
-        }
-    }
-
-    pub fn object_uniform_upload<M>(
-        &self,
-        ctx: &mut NodeExecutionContext,
-        camera: &CameraState,
-        camera_specifier: CameraSpecifier,
-        resolution: UVec2,
-        samples: SampleCount,
-    ) where
-        M: Material,
-    {
-        profiling::scope!("GpuCuller::object_uniform_upload");
-
-        assert_eq!(TypeId::of::<M>(), self.type_id);
-
-        let type_name = type_name::<M>();
-
-        let encoder = ctx.encoder_or_pass.take_encoder();
-
-        // TODO: Isolate all this into a struct
-        let max_object_count = ctx.data_core.object_manager.buffer::<M>().map(wgpu::Buffer::size).unwrap_or(0)
-            / ShaderObject::<M>::SHADER_SIZE.get();
-
-        if max_object_count == 0 {
-            return;
-        }
-
-        let per_map_buffer_size = ((max_object_count - 1) * PerCameraUniformObjectData::SHADER_SIZE.get())
-            + PerCameraUniform::min_size().get();
-
-        let mut per_mat_buffer_map = ctx.data_core.graph_storage.get_mut(&self.per_material_buffer_handle);
-
-        let new_per_mat_buffer = || {
-            Arc::new(ctx.renderer.device.create_buffer(&BufferDescriptor {
-                label: None,
-                size: per_map_buffer_size,
-                usage: BufferUsages::STORAGE | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
-                mapped_at_creation: false,
-            }))
-        };
-        let buffer = match per_mat_buffer_map.entry(camera_specifier) {
-            Entry::Occupied(o) => {
-                let r = o.into_mut();
-                if r.size() != per_map_buffer_size {
-                    *r = new_per_mat_buffer();
-                }
-                r
-            }
-            Entry::Vacant(o) => o.insert(new_per_mat_buffer()),
-        };
-
-        let culling = match camera_specifier {
-            CameraSpecifier::Shadow(_) => wgpu::Face::Front,
-            CameraSpecifier::Viewport => wgpu::Face::Back,
-        };
-
-        {
-            // We don't write anything in the objects right now, as this will be filled in by the preparation compute shader
-            profiling::scope!("PerCameraUniform Data Upload");
-            let per_camera_data = PerCameraUniform {
-                view: camera.view(),
-                view_proj: camera.view_proj(),
-                shadow_index: camera_specifier.to_shader_index(),
-                frustum: camera.world_frustum(),
-                resolution: resolution.as_vec2(),
-                flags: {
-                    let mut flags = PerCameraUniformFlags::empty();
-                    flags.set(
-                        PerCameraUniformFlags::POSTIIVE_AREA_VISIBLE,
-                        TriangleVisibility::from_winding_and_face(self.winding, culling).is_positive(),
-                    );
-                    flags.set(PerCameraUniformFlags::MULTISAMPLED, samples != SampleCount::One);
-                    flags.bits()
-                },
-                object_count: max_object_count as u32,
-                objects: Vec::new(),
-            };
-            let mut buffer = ctx.renderer.queue.write_buffer_with(buffer, 0, per_camera_data.size()).unwrap();
-            StorageBuffer::new(&mut *buffer).write(&per_camera_data).unwrap();
-        }
-
-        let Some(object_manager_buffer) = ctx.data_core.object_manager.buffer::<M>() else {
-            return;
-        };
-        let prep_bg = ctx.renderer.device.create_bind_group(&BindGroupDescriptor {
-            label: Some(&format_sso!("UniformPrep {type_name} BG")),
-            layout: &self.prep_bgl,
-            entries: &[
-                BindGroupEntry { binding: 0, resource: object_manager_buffer.as_entire_binding() },
-                BindGroupEntry { binding: 1, resource: buffer.as_entire_binding() },
-            ],
-        });
-
-        profiling::scope!("Command Encoding");
-
-        let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} uniform bake")),
-            timestamp_writes: None,
-        });
-        cpass.set_pipeline(&self.prep_pipeline);
-        cpass.set_bind_group(0, &prep_bg, &[]);
-        cpass.dispatch_workgroups((max_object_count as u32).div_round_up(WORKGROUP_SIZE), 1, 1);
-        drop(cpass);
-    }
-
-    pub fn cull<M>(
-        &self,
-        ctx: &mut NodeExecutionContext,
-        jobs: ShaderBatchDatas,
-        depth_handle: DeclaredDependency<RenderTargetHandle>,
-        camera_specifier: CameraSpecifier,
-    ) -> DrawCallSet
-    where
-        M: Material,
-    {
-        profiling::scope!("GpuCuller::cull");
-
-        assert_eq!(TypeId::of::<M>(), self.type_id);
-
-        let type_name = type_name::<M>();
-
-        let total_invocations: u32 = jobs
-            .jobs
-            .iter()
-            .map(|j: &ShaderBatchData| {
-                debug_assert_eq!(j.total_invocations % WORKGROUP_SIZE, 0);
-                j.total_invocations
-            })
-            .sum();
-
-        let encoder = ctx.encoder_or_pass.take_encoder();
-
-        let mut culling_buffer_map = ctx.data_core.graph_storage.get_mut(&self.culling_buffer_map_handle);
-        let buffers = culling_buffer_map.get_or_resize_buffers(
-            &ctx.renderer.queue,
-            &ctx.renderer.device,
-            encoder,
-            camera_specifier,
-            CullingBufferSizes { invocations: total_invocations as u64, draw_calls: jobs.regions.len() as u64 },
-        );
-
-        let per_camera_uniform = Arc::clone(
-            ctx.data_core
-                .graph_storage
-                .get_mut(&self.per_material_buffer_handle)
-                .get(&camera_specifier)
-                .unwrap_or_else(|| panic!("No per camera uniform for camera {:?}", camera_specifier)),
-        );
-
-        let culling_data_buffer = {
-            profiling::scope!("Culling Job Data Upload");
-
-            let culling_data_buffer = ctx.renderer.device.create_buffer(&wgpu::BufferDescriptor {
-                label: Some("Culling Data Buffer"),
-                size: jobs.jobs.size().get(),
-                usage: wgpu::BufferUsages::STORAGE,
-                mapped_at_creation: true,
-            });
-
-            let mut mapping = culling_data_buffer.slice(..).get_mapped_range_mut();
-            StorageBuffer::new(&mut *mapping).write(&jobs.jobs).unwrap();
-            drop(mapping);
-            culling_data_buffer.unmap();
-
-            culling_data_buffer
-        };
-
-        let hi_z_buffer = ctx.graph_data.get_render_target(depth_handle);
-
-        let culling_bg = ctx.renderer.device.create_bind_group(&BindGroupDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} BG")),
-            layout: &self.culling_bgl,
-            entries: &[
-                BindGroupEntry { binding: 0, resource: ctx.eval_output.mesh_buffer.as_entire_binding() },
-                BindGroupEntry {
-                    binding: 1,
-                    resource: ctx.data_core.object_manager.buffer::<M>().unwrap().as_entire_binding(),
-                },
-                BindGroupEntry {
-                    binding: 2,
-                    resource: BindingResource::Buffer(BufferBinding {
-                        buffer: &culling_data_buffer,
-                        offset: 0,
-                        size: Some(ShaderBatchData::SHADER_SIZE),
-                    }),
-                },
-                BindGroupEntry { binding: 3, resource: buffers.draw_call_buffer.as_entire_binding() },
-                BindGroupEntry { binding: 4, resource: buffers.index_buffer.as_entire_binding() },
-                BindGroupEntry { binding: 5, resource: buffers.culling_results_buffer.as_entire_binding() },
-                BindGroupEntry { binding: 6, resource: per_camera_uniform.as_entire_binding() },
-                BindGroupEntry { binding: 7, resource: BindingResource::TextureView(hi_z_buffer) },
-                BindGroupEntry { binding: 8, resource: BindingResource::Sampler(&self.sampler) },
-            ],
-        });
-
-        profiling::scope!("Command Encoding");
-        let mut draw_calls = Vec::with_capacity(jobs.jobs.len());
-        let mut material_key_ranges = HashMap::new();
-
-        let mut current_material_key_range_start = 0;
-        let mut current_material_key = jobs.regions.first().map(|k| k.key.material_key).unwrap_or(0);
-        for region in jobs.regions {
-            let region: JobSubRegion = region;
-
-            if current_material_key != region.key.material_key {
-                let range_end = draw_calls.len();
-                material_key_ranges.insert(current_material_key, current_material_key_range_start..range_end);
-                current_material_key = region.key.material_key;
-                current_material_key_range_start = range_end;
-            }
-
-            draw_calls.push(DrawCall { bind_group_index: region.key.bind_group_index, batch_index: region.job_index });
-        }
-
-        material_key_ranges.insert(current_material_key, current_material_key_range_start..draw_calls.len());
-
-        encoder.clear_buffer(&buffers.draw_call_buffer, 8, None);
-        let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor {
-            label: Some(&format_sso!("GpuCuller {type_name} Culling")),
-            timestamp_writes: None,
-        });
-
-        cpass.set_pipeline(&self.culling_pipeline);
-        for (idx, job) in jobs.jobs.iter().enumerate() {
-            // RA can't infer this
-            let job: &ShaderBatchData = job;
-
-            cpass.set_bind_group(0, &culling_bg, &[idx as u32 * ShaderBatchData::SHADER_SIZE.get() as u32]);
-            cpass.dispatch_workgroups(job.total_invocations.div_round_up(WORKGROUP_SIZE), 1, 1);
-        }
-        drop(cpass);
-
-        DrawCallSet { culling_data_buffer, per_camera_uniform, draw_calls, material_key_ranges }
-    }
-
-    pub fn add_object_uniform_upload_to_graph<'node, M: Material>(
-        &'node self,
-        graph: &mut RenderGraph<'node>,
-        camera_specifier: CameraSpecifier,
-        resolution: UVec2,
-        samples: SampleCount,
-        name: &str,
-    ) {
-        let mut node = graph.add_node(name);
-        node.add_side_effect();
-
-        node.build(move |mut ctx| {
-            let camera = match camera_specifier {
-                CameraSpecifier::Shadow(i) => &ctx.eval_output.shadows[i as usize].camera,
-                CameraSpecifier::Viewport => &ctx.data_core.viewport_camera_state,
-            };
-
-            self.object_uniform_upload::<M>(&mut ctx, camera, camera_specifier, resolution, samples);
-        });
-    }
-
-    pub fn add_culling_to_graph<'node, M: Material>(
-        &'node self,
-        graph: &mut RenderGraph<'node>,
-        draw_calls_hdl: DataHandle<Arc<DrawCallSet>>,
-        depth_handle: RenderTargetHandle,
-        camera_specifier: CameraSpecifier,
-        name: &str,
-    ) {
-        let mut node = graph.add_node(name);
-        let output = node.add_data(draw_calls_hdl, NodeResourceUsage::Output);
-        let depth_handle = node.add_render_target(
-            depth_handle,
-            if camera_specifier.is_shadow() { NodeResourceUsage::Reference } else { NodeResourceUsage::Input },
-        );
-
-        node.build(move |mut ctx| {
-            let camera = match camera_specifier {
-                CameraSpecifier::Shadow(i) => &ctx.eval_output.shadows[i as usize].camera,
-                CameraSpecifier::Viewport => &ctx.data_core.viewport_camera_state,
-            };
-
-            let jobs = batch_objects::<M>(&mut ctx, &self.previous_invocation_map_handle, camera, camera_specifier);
-
-            if jobs.jobs.is_empty() {
-                return;
-            }
-
-            let draw_calls = self.cull::<M>(&mut ctx, jobs, depth_handle, camera_specifier);
-
-            ctx.graph_data.set_data(output, Some(Arc::new(draw_calls)));
-        });
-    }
-}
diff --git a/rend3-routine/src/culling/mod.rs b/rend3-routine/src/culling/mod.rs
deleted file mode 100644
index 3ff0b355..00000000
--- a/rend3-routine/src/culling/mod.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-const BATCH_SIZE: usize = 256;
-const WORKGROUP_SIZE: u32 = 256;
-
-mod batching;
-mod culler;
-mod suballoc;
-
-pub use batching::{ShaderBatchData, ShaderBatchDatas};
-pub use culler::{CullingBufferMap, DrawCall, DrawCallSet, GpuCuller};
-pub use suballoc::{InputOutputBuffer, InputOutputPartition};
diff --git a/rend3-routine/src/culling/suballoc.rs b/rend3-routine/src/culling/suballoc.rs
deleted file mode 100644
index 6079dcda..00000000
--- a/rend3-routine/src/culling/suballoc.rs
+++ /dev/null
@@ -1,223 +0,0 @@
-use std::{
-    ops::{Deref, Range},
-    sync::Arc,
-};
-
-use encase::{internal::WriteInto, ShaderType, StorageBuffer};
-use rend3::util::{math::IntegerExt, typedefs::SsoString};
-use wgpu::CommandEncoder;
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum InputOutputPartition {
-    Input,
-    Output,
-}
-
-#[derive(Debug)]
-pub struct InputOutputBuffer {
-    /// Label for the buffer
-    label: SsoString,
-    /// Current active buffer
-    buffer: Arc<wgpu::Buffer>,
-    /// Amount of elements reserved in the buffer for data, not including the header.
-    capacity_elements: u64,
-    /// Size of output partition
-    output_partition_elements: u64,
-    /// Size of input partition
-    input_partition_elements: u64,
-    /// When false, output partition is comes first.
-    ///
-    /// When true, input partition comes first.
-    flipped: bool,
-    /// Clear on swap
-    ///
-    /// When true, the data in both partitions will be cleared when the buffer
-    /// is swapped.
-    clear_on_swap: bool,
-    /// The size of each element in the buffer. This allows the user to provide sizes in element counts only.
-    ///
-    /// Must be a multiple of `element_alignment`.
-    element_size: u64,
-    /// Size of the header, including padding.
-    padded_header_size: u64,
-}
-
-impl Deref for InputOutputBuffer {
-    type Target = Arc<wgpu::Buffer>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.buffer
-    }
-}
-
-impl InputOutputBuffer {
-    const USAGES: wgpu::BufferUsages = wgpu::BufferUsages::STORAGE
-        .union(wgpu::BufferUsages::COPY_DST)
-        .union(wgpu::BufferUsages::COPY_SRC)
-        .union(wgpu::BufferUsages::INDEX)
-        .union(wgpu::BufferUsages::INDIRECT);
-
-    // The size of the header, including padding
-    fn padded_header_size(element_alignment: u64) -> u64 {
-        const HEADER_SIZE: u64 = 8;
-        HEADER_SIZE.round_up(element_alignment)
-    }
-
-    fn capacity_elements(input_partition_elements: u64, output_partition_elements: u64) -> u64 {
-        let max = input_partition_elements.max(output_partition_elements);
-        max.next_power_of_two() * 2
-    }
-
-    fn buffer_size(padded_header_size: u64, capacity_elements: u64, element_size: u64) -> u64 {
-        capacity_elements * element_size + padded_header_size
-    }
-
-    pub fn new(
-        device: &wgpu::Device,
-        queue: &wgpu::Queue,
-        partition_elements: u64,
-        label: &str,
-        element_size: u64,
-        element_alignment: u64,
-        clear_on_swap: bool,
-    ) -> Self {
-        let element_size = element_size.round_up(element_alignment);
-        let capacity_elements = Self::capacity_elements(partition_elements, partition_elements);
-        let padded_header_size = Self::padded_header_size(element_alignment);
-        let buffer_length = Self::buffer_size(padded_header_size, capacity_elements, element_size);
-
-        let buffer = Arc::new(device.create_buffer(&wgpu::BufferDescriptor {
-            label: Some(label),
-            size: buffer_length,
-            usage: Self::USAGES,
-            mapped_at_creation: false,
-        }));
-
-        let this = Self {
-            label: SsoString::from(label),
-            buffer,
-            capacity_elements,
-            output_partition_elements: partition_elements,
-            input_partition_elements: partition_elements,
-            flipped: false,
-            clear_on_swap,
-            element_size,
-            padded_header_size,
-        };
-
-        this.write_headers(queue);
-
-        this
-    }
-
-    /// Returns the offset in bytes for a given element in the given partition
-    pub fn element_offset(&self, partition: InputOutputPartition, element: u64) -> u64 {
-        let partition_offset = match partition {
-            InputOutputPartition::Input => self.input_partition_offset(),
-            InputOutputPartition::Output => self.output_partition_offset(),
-        };
-        self.padded_header_size + partition_offset + element * self.element_size
-    }
-
-    pub fn partition_slice(&self, partition: InputOutputPartition) -> Range<u64> {
-        let partition_offset = match partition {
-            InputOutputPartition::Input => self.input_partition_offset(),
-            InputOutputPartition::Output => self.output_partition_offset(),
-        };
-        let partition_elements = match partition {
-            InputOutputPartition::Input => self.input_partition_elements,
-            InputOutputPartition::Output => self.output_partition_elements,
-        };
-        let partition_size = partition_elements * self.element_size;
-        let slice_start = self.padded_header_size + partition_offset;
-        let slice_end: u64 = slice_start + partition_size;
-        slice_start..slice_end
-    }
-
-    pub fn write_to_output<T: ShaderType + WriteInto>(&self, queue: &wgpu::Queue, data: &T) {
-        assert_eq!(data.size().get(), self.output_partition_elements * self.element_size);
-        let mut mapping = queue
-            .write_buffer_with(&self.buffer, self.element_offset(InputOutputPartition::Output, 0), data.size())
-            .unwrap();
-        StorageBuffer::new(&mut *mapping).write(data).unwrap();
-        drop(mapping);
-    }
-
-    /// Returns the offset in bytes to get to the start of the output partition, not including the header.
-    fn output_partition_offset(&self) -> u64 {
-        if self.flipped {
-            (self.capacity_elements * self.element_size) / 2
-        } else {
-            0
-        }
-    }
-
-    /// Returns the offset in bytes to get to the start of the input partition, not including the header.
-    fn input_partition_offset(&self) -> u64 {
-        if self.flipped {
-            0
-        } else {
-            (self.capacity_elements * self.element_size) / 2
-        }
-    }
-
-    pub fn swap(
-        &mut self,
-        queue: &wgpu::Queue,
-        device: &wgpu::Device,
-        encoder: &mut CommandEncoder,
-        new_partition_elements: u64,
-    ) {
-        // Offset of the output partition in the old buffer.
-        let old_output_partition_offset = self.output_partition_offset();
-
-        // The output of last frame is now the input of this frame.
-        self.input_partition_elements = self.output_partition_elements;
-        // The new output is of the given size.
-        self.output_partition_elements = new_partition_elements;
-        // We're now flipped.
-        self.flipped = !self.flipped;
-
-        // Gather a new data capcity
-        let new_capacity_elements =
-            Self::capacity_elements(self.input_partition_elements, self.output_partition_elements);
-
-        if new_capacity_elements != self.capacity_elements {
-            // Set the capacity reserved
-            self.capacity_elements = new_capacity_elements;
-            let new_buffer = device.create_buffer(&wgpu::BufferDescriptor {
-                label: Some(&self.label),
-                size: Self::buffer_size(self.padded_header_size, new_capacity_elements, self.element_size),
-                usage: Self::USAGES,
-                mapped_at_creation: false,
-            });
-            if !self.clear_on_swap {
-                // We copy the old output partition to the input partition of the new buffer.
-                //
-                // Note that we call output_partition_offset before we change any internal parameters,
-                // as we need the old buffer offsets.
-                encoder.copy_buffer_to_buffer(
-                    &self.buffer,
-                    old_output_partition_offset + self.padded_header_size,
-                    &new_buffer,
-                    self.input_partition_offset() + self.padded_header_size,
-                    self.input_partition_elements * self.element_size,
-                );
-            }
-            // We now set the new buffer.
-            self.buffer = Arc::new(new_buffer);
-        } else if self.clear_on_swap {
-            encoder.clear_buffer(&self.buffer, self.padded_header_size, None);
-        }
-
-        self.write_headers(queue)
-    }
-
-    fn write_headers(&self, queue: &wgpu::Queue) {
-        let offsets = [
-            (self.output_partition_offset() / self.element_size) as u32,
-            (self.input_partition_offset() / self.element_size) as u32,
-        ];
-        queue.write_buffer(&self.buffer, 0, bytemuck::cast_slice(&offsets));
-    }
-}
diff --git a/rend3-routine/src/forward.rs b/rend3-routine/src/forward.rs
index e45c9003..3b5f4d6d 100644
--- a/rend3-routine/src/forward.rs
+++ b/rend3-routine/src/forward.rs
@@ -5,11 +5,11 @@
 use std::{marker::PhantomData, sync::Arc};
 
 use arrayvec::ArrayVec;
-use encase::ShaderSize;
+use encase::{ShaderSize, StorageBuffer};
 use rend3::{
-    graph::{self, DataHandle, NodeResourceUsage, RenderGraph, RenderPassTargets},
-    types::{GraphDataHandle, Material, SampleCount},
-    util::{bind_merge::BindGroupBuilder, typedefs::FastHashMap},
+    graph::{DataHandle, NodeResourceUsage, RenderGraph, RenderPassTargets},
+    types::{Material, SampleCount},
+    util::bind_merge::BindGroupBuilder,
     ProfileData, Renderer, RendererDataCore, RendererProfile, ShaderPreProcessor,
 };
 use serde::Serialize;
@@ -20,10 +20,8 @@ use wgpu::{
     VertexState,
 };
 
-use crate::{
-    common::{CameraSpecifier, PerMaterialArchetypeInterface, WholeFrameInterfaces},
-    culling::{self, CullingBufferMap, DrawCall, DrawCallSet, InputOutputPartition},
-};
+use crate::common::{CameraSpecifier, PerMaterialArchetypeInterface, WholeFrameInterfaces};
+use crate::uniforms::PerCameraUniform;
 
 #[derive(Serialize)]
 struct ForwardPreprocessingArguments {
@@ -44,44 +42,6 @@ pub struct ShaderModulePair<'a> {
     pub fs_module: &'a ShaderModule,
 }
 
-enum DeclaredCullingOutput {
-    Predicted,
-    Residual(graph::DeclaredDependency<DataHandle<Arc<culling::DrawCallSet>>>),
-}
-
-impl DeclaredCullingOutput {
-    /// Returns `true` if the declared culling output is [`Residual`].
-    ///
-    /// [`Residual`]: DeclaredCullingOutput::Residual
-    #[must_use]
-    fn is_residual(&self) -> bool {
-        matches!(self, Self::Residual(..))
-    }
-}
-
-pub enum CullingSource {
-    /// We are rendering the first pass with the predicted triangles from last frame.
-    ///
-    /// This is used on the first pass.
-    Predicted,
-    /// We are rendering the second pass with the residual triangles from this frame.
-    ///
-    /// This is used when we are rendering the residual triangles from this frame, as part of the second pass.
-    Residual(DataHandle<Arc<culling::DrawCallSet>>),
-}
-
-impl CullingSource {
-    fn add_inner_data(&self, builder: &mut graph::RenderGraphNodeBuilder<'_, '_>) -> DeclaredCullingOutput {
-        match self {
-            CullingSource::Predicted => DeclaredCullingOutput::Predicted,
-            CullingSource::Residual(handle) => {
-                let handle = builder.add_data(*handle, NodeResourceUsage::Input);
-                DeclaredCullingOutput::Residual(handle)
-            }
-        }
-    }
-}
-
 pub struct ForwardRoutineCreateArgs<'a, M> {
     pub name: &'a str,
 
@@ -96,8 +56,6 @@ pub struct ForwardRoutineCreateArgs<'a, M> {
     pub routine_type: RoutineType,
     pub shaders: ShaderModulePair<'a>,
 
-    pub culling_buffer_map_handle: GraphDataHandle<CullingBufferMap>,
-
     pub extra_bgls: &'a [&'a BindGroupLayout],
     #[allow(clippy::type_complexity)]
     pub descriptor_callback: Option<&'a dyn Fn(&mut RenderPipelineDescriptor<'_>, &mut [Option<ColorTargetState>])>,
@@ -126,7 +84,6 @@ pub struct ForwardRoutineArgs<'a, 'node, M> {
     pub binding_data: ForwardRoutineBindingData<'node, M>,
 
     /// Source of culling information, determines which triangles are rendered this pass.
-    pub culling_source: CullingSource,
     pub samples: SampleCount,
     pub renderpass: RenderPassTargets,
 }
@@ -136,8 +93,6 @@ pub struct ForwardRoutine<M: Material> {
     pipeline_s1: RenderPipeline,
     pipeline_s4: RenderPipeline,
     material_key: u64,
-    culling_buffer_map_handle: GraphDataHandle<CullingBufferMap>,
-    draw_call_set_cache_handle: GraphDataHandle<FastHashMap<CameraSpecifier, Arc<DrawCallSet>>>,
     _phantom: PhantomData<M>,
 }
 impl<M: Material> ForwardRoutine<M> {
@@ -182,8 +137,6 @@ impl<M: Material> ForwardRoutine<M> {
             pipeline_s1: build_forward_pipeline_inner(&pll, &args, SampleCount::One),
             pipeline_s4: build_forward_pipeline_inner(&pll, &args, SampleCount::Four),
             material_key: args.material_key,
-            draw_call_set_cache_handle: args.renderer.add_graph_data(FastHashMap::default()),
-            culling_buffer_map_handle: args.culling_buffer_map_handle,
             _phantom: PhantomData,
         }
     }
@@ -192,73 +145,49 @@ impl<M: Material> ForwardRoutine<M> {
     pub fn add_forward_to_graph<'node>(&'node self, args: ForwardRoutineArgs<'_, 'node, M>) {
         let mut builder = args.graph.add_node(args.label);
 
-        builder.add_side_effect();
-
-        let rpass_handle = builder.add_renderpass(args.renderpass.clone(), NodeResourceUsage::Output);
+        let rpass_handle = builder.add_renderpass(args.renderpass.clone(), NodeResourceUsage::InputOutput);
 
         let whole_frame_uniform_handle =
             builder.add_data(args.binding_data.whole_frame_uniform_bg, NodeResourceUsage::Input);
-        let culling_output_handle = args.culling_source.add_inner_data(&mut builder);
 
         builder.build(move |mut ctx| {
             let rpass = ctx.encoder_or_pass.take_rpass(rpass_handle);
             let whole_frame_uniform_bg = ctx.graph_data.get_data(ctx.temps, whole_frame_uniform_handle).unwrap();
 
-            // We need to store the draw call set in a cache so that next frame's predicted pass can use it.
-            let mut draw_call_set_cache = ctx.data_core.graph_storage.get_mut(&self.draw_call_set_cache_handle);
-
-            let draw_call_set = match culling_output_handle {
-                // We are rendering the second pass with the residual triangles from this frame.
-                DeclaredCullingOutput::Residual(handle) => {
-                    // If there is no draw call set for this camera in the cache, there isn't actually anything to render.
-                    let Some(draw_call_set) = ctx.graph_data.get_data(ctx.temps, handle) else {
-                        return;
-                    };
-
-                    // As we're in the residual, we need to store the draw call set for the next frame.
-                    draw_call_set_cache.insert(args.camera, Arc::clone(draw_call_set));
-
-                    draw_call_set
-                }
-                // We are rendering the first pass with the predicted triangles from last frame.
-                DeclaredCullingOutput::Predicted => {
-                    // If there is no draw call set for this camera in the cache, that means we have yet to actually render anything,
-                    // so either no objects yet exist, or we are in the first frame, so we can bail out.
-                    let Some(draw_call_set) = draw_call_set_cache.get(&args.camera) else {
-                        return;
-                    };
-
-                    draw_call_set
-                }
+            let Some(objects) = ctx.data_core.object_manager.enumerated_objects::<M>() else {
+                return;
             };
-            let residual = culling_output_handle.is_residual() && args.camera.is_viewport();
 
-            let culling_buffer_storage = ctx.data_core.graph_storage.get(&self.culling_buffer_map_handle);
+            let archetype_view = ctx.data_core.material_manager.archetype_view::<M>();
 
-            // If there are no culling buffers in storage yet, we are in the first frame. We depend on culling
-            // to render anything, so just bail at this point.
-            let Some(culling_buffers) = culling_buffer_storage.get_buffers(args.camera) else {
-                return;
+            let camera = match args.camera {
+                CameraSpecifier::Viewport => &ctx.data_core.viewport_camera_state,
+                CameraSpecifier::Shadow(idx) => &ctx.eval_output.shadows[idx as usize].camera,
             };
 
-            // We need to actually clone ownership of the underlying buffers and add them to renderpass temps,
-            // so we can use them in the renderpass.
-            let index_buffer = ctx.temps.add(Arc::clone(&culling_buffers.index_buffer));
-            let draw_call_buffer = ctx.temps.add(Arc::clone(&culling_buffers.draw_call_buffer));
+            let per_camera_uniform_values = PerCameraUniform {
+                view: camera.view(),
+                view_proj: camera.view_proj(),
+                frustum: camera.world_frustum(),
+                object_count: objects.len() as u32,
+            };
 
-            // When we're rendering the residual data, we are post buffer flip. We want to be rendering using the
-            // "input" partition, as this is the partition that all same-frame data is in.
-            let partition = if residual { InputOutputPartition::Input } else { InputOutputPartition::Output };
+            let per_camera_uniform_buffer = ctx.temps.add(ctx.renderer.device.create_buffer(&wgpu::BufferDescriptor {
+                label: Some("Per Camera Uniform"),
+                size: PerCameraUniform::SHADER_SIZE.get(),
+                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+                mapped_at_creation: true,
+            }));
+            let mut mapping = per_camera_uniform_buffer.slice(..).get_mapped_range_mut();
+            StorageBuffer::new(&mut *mapping).write(&per_camera_uniform_values).unwrap();
+            drop(mapping);
+            per_camera_uniform_buffer.unmap();
 
             let per_material_bg = ctx.temps.add(
                 BindGroupBuilder::new()
                     .append_buffer(ctx.data_core.object_manager.buffer::<M>().unwrap())
-                    .append_buffer_with_size(
-                        &draw_call_set.culling_data_buffer,
-                        culling::ShaderBatchData::SHADER_SIZE.get(),
-                    )
                     .append_buffer(&ctx.eval_output.mesh_buffer)
-                    .append_buffer(&draw_call_set.per_camera_uniform)
+                    .append_buffer(per_camera_uniform_buffer)
                     .append_buffer(ctx.data_core.material_manager.archetype_view::<M>().buffer())
                     .build(&ctx.renderer.device, Some("Per-Material BG"), &args.binding_data.per_material_bgl.bgl),
             );
@@ -267,10 +196,7 @@ impl<M: Material> ForwardRoutine<M> {
                 SampleCount::One => &self.pipeline_s1,
                 SampleCount::Four => &self.pipeline_s4,
             };
-            rpass.set_index_buffer(
-                index_buffer.slice(culling_buffers.index_buffer.partition_slice(partition)),
-                IndexFormat::Uint32,
-            );
+            rpass.set_index_buffer(ctx.eval_output.mesh_buffer.slice(..), IndexFormat::Uint32);
             rpass.set_pipeline(pipeline);
             rpass.set_bind_group(0, whole_frame_uniform_bg, &[]);
             if let Some(v) = args.binding_data.extra_bgs {
@@ -282,34 +208,23 @@ impl<M: Material> ForwardRoutine<M> {
                 rpass.set_bind_group(2, bg, &[]);
             }
 
-            // If there are no draw calls for this material, just bail.
-            let Some(range) = draw_call_set.material_key_ranges.get(&self.material_key) else {
-                return;
-            };
-
-            for (range_relative_idx, call) in draw_call_set.draw_calls[range.clone()].iter().enumerate() {
-                // Help RA out
-                let call: &DrawCall = call;
-                // Add the base of the range to the index to get the actual index
-                let idx = range_relative_idx + range.start;
+            for (idx, object) in objects.into_iter() {
+                let material = archetype_view.material(*object.material_handle);
+                if material.inner.key() != self.material_key {
+                    continue;
+                }
 
                 // If we're in cpu driven mode, we need to update the texture bind group.
                 if ctx.renderer.profile.is_cpu_driven() {
-                    rpass.set_bind_group(
-                        2,
-                        ctx.data_core.material_manager.texture_bind_group(call.bind_group_index),
-                        &[],
-                    );
+                    let texture_bind_group = material.bind_group_index.into_cpu();
+                    rpass.set_bind_group(2, ctx.data_core.material_manager.texture_bind_group(texture_bind_group), &[]);
                 }
-                rpass.set_bind_group(
-                    1,
-                    per_material_bg,
-                    &[call.batch_index * culling::ShaderBatchData::SHADER_SIZE.get() as u32],
-                );
-                rpass.draw_indexed_indirect(
-                    draw_call_buffer,
-                    culling_buffers.draw_call_buffer.element_offset(partition, idx as u64),
-                );
+                rpass.set_bind_group(1, per_material_bg, &[]);
+                rpass.draw_indexed(
+                    object.inner.first_index..object.inner.first_index + object.inner.index_count,
+                    0,
+                    idx.idx as u32..idx.idx as u32 + 1,
+                )
             }
         });
     }
diff --git a/rend3-routine/src/hi_z.rs b/rend3-routine/src/hi_z.rs
deleted file mode 100644
index e75c5f8f..00000000
--- a/rend3-routine/src/hi_z.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-use std::borrow::Cow;
-
-use glam::UVec2;
-use rend3::{
-    graph::{
-        DeclaredDependency, NodeExecutionContext, NodeResourceUsage, RenderGraph, RenderPassDepthTarget,
-        RenderPassHandle, RenderPassTargets, RenderTargetHandle, ViewportRect,
-    },
-    Renderer, ShaderPreProcessor,
-};
-use wgpu::{
-    BindGroupDescriptor, BindGroupEntry, BindGroupLayout, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
-    BindingResource, BindingType, CompareFunction, DepthBiasState, DepthStencilState, Extent3d, FragmentState,
-    MultisampleState, PipelineLayoutDescriptor, PrimitiveState, RenderPipeline, RenderPipelineDescriptor,
-    ShaderModuleDescriptor, ShaderStages, StencilState, TextureDimension, TextureFormat, TextureSampleType,
-    TextureViewDimension, VertexState,
-};
-
-use crate::base::DepthTargets;
-
-pub struct HiZRoutine {
-    multisampled_bgl: BindGroupLayout,
-    single_sampled_bgl: BindGroupLayout,
-    downscale_pipeline: RenderPipeline,
-    resolve_pipeline: RenderPipeline,
-}
-
-impl HiZRoutine {
-    pub fn new(renderer: &Renderer, spp: &ShaderPreProcessor) -> Self {
-        let resolve_source = spp
-            .render_shader("rend3-routine/resolve_depth_min.wgsl", &serde_json::json!({"SAMPLES": 4}), None)
-            .unwrap();
-        let downscale_source = spp.render_shader("rend3-routine/hi_z.wgsl", &(), None).unwrap();
-
-        let resolve_sm = renderer.device.create_shader_module(ShaderModuleDescriptor {
-            label: Some("HiZ Resolver"),
-            source: wgpu::ShaderSource::Wgsl(Cow::Owned(resolve_source)),
-        });
-        let downscale_sm = renderer.device.create_shader_module(ShaderModuleDescriptor {
-            label: Some("HiZ Downscaler"),
-            source: wgpu::ShaderSource::Wgsl(Cow::Owned(downscale_source)),
-        });
-
-        let multisampled_bgl = renderer.device.create_bind_group_layout(&BindGroupLayoutDescriptor {
-            label: Some("Multi Sample HiZ Texture BGL"),
-            entries: &[BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::VERTEX_FRAGMENT,
-                ty: BindingType::Texture {
-                    sample_type: TextureSampleType::Depth,
-                    view_dimension: TextureViewDimension::D2,
-                    multisampled: true,
-                },
-                count: None,
-            }],
-        });
-
-        let single_sampled_bgl = renderer.device.create_bind_group_layout(&BindGroupLayoutDescriptor {
-            label: Some("Single Sample HiZ Texture BGL"),
-            entries: &[BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::VERTEX_FRAGMENT,
-                ty: BindingType::Texture {
-                    sample_type: TextureSampleType::Depth,
-                    view_dimension: TextureViewDimension::D2,
-                    multisampled: false,
-                },
-                count: None,
-            }],
-        });
-
-        let resolve_pipline_layout = renderer.device.create_pipeline_layout(&PipelineLayoutDescriptor {
-            label: Some("HiZ Resolve PLL"),
-            bind_group_layouts: &[&multisampled_bgl],
-            push_constant_ranges: &[],
-        });
-
-        let downscale_pipline_layout = renderer.device.create_pipeline_layout(&PipelineLayoutDescriptor {
-            label: Some("HiZ Downscale PLL"),
-            bind_group_layouts: &[&single_sampled_bgl],
-            push_constant_ranges: &[],
-        });
-
-        let resolve_pipeline = renderer.device.create_render_pipeline(&RenderPipelineDescriptor {
-            label: Some("HiZ Resolve Pipeline"),
-            layout: Some(&resolve_pipline_layout),
-            vertex: VertexState { module: &resolve_sm, entry_point: "vs_main", buffers: &[] },
-            primitive: PrimitiveState::default(),
-            depth_stencil: Some(DepthStencilState {
-                format: TextureFormat::Depth32Float,
-                depth_write_enabled: true,
-                depth_compare: CompareFunction::Always,
-                stencil: StencilState::default(),
-                bias: DepthBiasState::default(),
-            }),
-            multisample: MultisampleState::default(),
-            fragment: Some(FragmentState { module: &resolve_sm, entry_point: "fs_main", targets: &[] }),
-            multiview: None,
-        });
-
-        let downscale_pipeline = renderer.device.create_render_pipeline(&RenderPipelineDescriptor {
-            label: Some("HiZ Downscale Pipeline"),
-            layout: Some(&downscale_pipline_layout),
-            vertex: VertexState { module: &downscale_sm, entry_point: "vs_main", buffers: &[] },
-            primitive: PrimitiveState::default(),
-            depth_stencil: Some(DepthStencilState {
-                format: TextureFormat::Depth32Float,
-                depth_write_enabled: true,
-                depth_compare: CompareFunction::Always,
-                stencil: StencilState::default(),
-                bias: DepthBiasState::default(),
-            }),
-            multisample: MultisampleState::default(),
-            fragment: Some(FragmentState { module: &downscale_sm, entry_point: "fs_main", targets: &[] }),
-            multiview: None,
-        });
-
-        Self { single_sampled_bgl, downscale_pipeline, multisampled_bgl, resolve_pipeline }
-    }
-
-    pub fn resolve<'pass>(
-        &'pass self,
-        mut ctx: NodeExecutionContext<'_, 'pass, '_>,
-        renderpass_handle: DeclaredDependency<RenderPassHandle>,
-        source_handle: DeclaredDependency<RenderTargetHandle>,
-    ) {
-        let rpass = ctx.encoder_or_pass.take_rpass(renderpass_handle);
-        let source = ctx.graph_data.get_render_target(source_handle);
-
-        let bind_group = ctx.temps.add(ctx.renderer.device.create_bind_group(&BindGroupDescriptor {
-            label: Some("HiZ Resolve BG"),
-            layout: &self.multisampled_bgl,
-            entries: &[BindGroupEntry { binding: 0, resource: BindingResource::TextureView(source) }],
-        }));
-
-        rpass.set_pipeline(&self.resolve_pipeline);
-        rpass.set_bind_group(0, bind_group, &[]);
-        rpass.draw(0..3, 0..1);
-    }
-
-    pub fn downscale<'pass>(
-        &'pass self,
-        mut ctx: NodeExecutionContext<'_, 'pass, '_>,
-        renderpass_handle: DeclaredDependency<RenderPassHandle>,
-        source_handle: DeclaredDependency<RenderTargetHandle>,
-    ) {
-        let rpass = ctx.encoder_or_pass.take_rpass(renderpass_handle);
-        let source = ctx.graph_data.get_render_target(source_handle);
-
-        let bind_group = ctx.temps.add(ctx.renderer.device.create_bind_group(&BindGroupDescriptor {
-            label: Some("HiZ Bind Group Layout"),
-            layout: &self.single_sampled_bgl,
-            entries: &[BindGroupEntry { binding: 0, resource: BindingResource::TextureView(source) }],
-        }));
-
-        rpass.set_pipeline(&self.downscale_pipeline);
-        rpass.set_bind_group(0, bind_group, &[]);
-        rpass.draw(0..3, 0..1);
-    }
-
-    pub fn add_hi_z_to_graph<'node>(
-        &'node self,
-        graph: &mut RenderGraph<'node>,
-        depth_targets: DepthTargets,
-        resolution: UVec2,
-    ) {
-        let extent = Extent3d { width: resolution.x, height: resolution.y, depth_or_array_layers: 1 };
-        let mips = extent.max_mips(TextureDimension::D2) as u8;
-
-        // First we need to downscale the depth buffer to a single sample texture
-        // if we are doing multisampling.
-        if let Some(multi_sample) = depth_targets.multi_sample {
-            let mut node = graph.add_node("HiZ Resolve");
-
-            let source = node.add_render_target(multi_sample, NodeResourceUsage::Output);
-
-            let rpass_handle = node.add_renderpass(
-                RenderPassTargets {
-                    targets: vec![],
-                    depth_stencil: Some(RenderPassDepthTarget {
-                        target: depth_targets.single_sample_mipped.set_mips(0..1),
-                        depth_clear: Some(0.0),
-                        stencil_clear: None,
-                    }),
-                },
-                NodeResourceUsage::InputOutput,
-            );
-
-            node.add_side_effect();
-
-            node.build(move |ctx| {
-                self.resolve(ctx, rpass_handle, source);
-            });
-        }
-
-        for dst_mip in 1..mips {
-            let src_mip = dst_mip - 1;
-
-            let mut node = graph.add_node(&format!("HiZ Mip {src_mip} -> {dst_mip}"));
-
-            let dst_extent = extent.mip_level_size(dst_mip as u32, TextureDimension::D2);
-            let src_extent = extent.mip_level_size(src_mip as u32, TextureDimension::D2);
-
-            let dst_target = depth_targets
-                .single_sample_mipped
-                .set_mips(dst_mip..dst_mip + 1)
-                .set_viewport(ViewportRect::from_size(UVec2::new(dst_extent.width, dst_extent.height)));
-            let src_target = node.add_render_target(
-                depth_targets
-                    .single_sample_mipped
-                    .set_mips(src_mip..src_mip + 1)
-                    .set_viewport(ViewportRect::from_size(UVec2::new(src_extent.width, src_extent.height))),
-                NodeResourceUsage::Input,
-            );
-
-            let rpass_handle = node.add_renderpass(
-                RenderPassTargets {
-                    targets: vec![],
-                    depth_stencil: Some(RenderPassDepthTarget {
-                        target: dst_target,
-                        depth_clear: Some(0.0),
-                        stencil_clear: None,
-                    }),
-                },
-                NodeResourceUsage::InputOutput,
-            );
-
-            node.add_side_effect();
-
-            node.build(move |ctx| {
-                self.downscale(ctx, rpass_handle, src_target);
-            });
-        }
-    }
-}
diff --git a/rend3-routine/src/lib.rs b/rend3-routine/src/lib.rs
index 9f13361b..eba4350f 100644
--- a/rend3-routine/src/lib.rs
+++ b/rend3-routine/src/lib.rs
@@ -21,9 +21,7 @@
 pub mod base;
 pub mod clear;
 pub mod common;
-pub mod culling;
 pub mod forward;
-pub mod hi_z;
 pub mod pbr;
 mod shaders;
 pub mod skinning;
diff --git a/rend3-routine/src/pbr/routine.rs b/rend3-routine/src/pbr/routine.rs
index 54b52f97..e6e8df83 100644
--- a/rend3-routine/src/pbr/routine.rs
+++ b/rend3-routine/src/pbr/routine.rs
@@ -1,16 +1,12 @@
 use std::{borrow::Cow, sync::Arc};
 
-use rend3::{
-    types::GraphDataHandle, Renderer, RendererDataCore, RendererProfile, ShaderPreProcessor, ShaderVertexBufferConfig,
-};
+use rend3::{Renderer, RendererDataCore, RendererProfile, ShaderPreProcessor, ShaderVertexBufferConfig};
 use serde::Serialize;
 use wgpu::{BlendState, ShaderModuleDescriptor, ShaderSource};
 
 use crate::{
     common::{PerMaterialArchetypeInterface, WholeFrameInterfaces},
-    culling::CullingBufferMap,
     forward::{ForwardRoutine, ForwardRoutineCreateArgs, RoutineType, ShaderModulePair},
-    hi_z::HiZRoutine,
     pbr::{PbrMaterial, TransparencyType},
 };
 
@@ -27,7 +23,6 @@ pub struct PbrRoutine {
     pub opaque_routine: ForwardRoutine<PbrMaterial>,
     pub cutout_routine: ForwardRoutine<PbrMaterial>,
     pub blend_routine: ForwardRoutine<PbrMaterial>,
-    pub hi_z: HiZRoutine,
     pub per_material: PerMaterialArchetypeInterface<PbrMaterial>,
 }
 
@@ -37,7 +32,6 @@ impl PbrRoutine {
         data_core: &mut RendererDataCore,
         spp: &ShaderPreProcessor,
         interfaces: &WholeFrameInterfaces,
-        culling_buffer_map_handle: &GraphDataHandle<CullingBufferMap>,
     ) -> Self {
         profiling::scope!("PbrRenderRoutine::new");
 
@@ -117,7 +111,6 @@ impl PbrRoutine {
                         targets[0].as_mut().unwrap().blend = Some(BlendState::ALPHA_BLENDING)
                     }
                 }),
-                culling_buffer_map_handle: culling_buffer_map_handle.clone(),
             })
         };
 
@@ -127,7 +120,6 @@ impl PbrRoutine {
             opaque_routine: inner(RoutineType::Forward, &pbr_forward, TransparencyType::Opaque),
             cutout_routine: inner(RoutineType::Forward, &pbr_cutout, TransparencyType::Cutout),
             blend_routine: inner(RoutineType::Forward, &pbr_forward, TransparencyType::Blend),
-            hi_z: HiZRoutine::new(renderer, spp),
             per_material,
         }
     }
diff --git a/rend3-routine/src/uniforms.rs b/rend3-routine/src/uniforms.rs
index b21f9d95..cd6ace6f 100644
--- a/rend3-routine/src/uniforms.rs
+++ b/rend3-routine/src/uniforms.rs
@@ -12,6 +12,16 @@ use wgpu::{BindGroup, BufferUsages};
 
 use crate::common::{Samplers, WholeFrameInterfaces};
 
+#[derive(ShaderType)]
+pub struct PerCameraUniform {
+    // TODO: use less space
+    pub view: Mat4,
+    // TODO: use less space
+    pub view_proj: Mat4,
+    pub frustum: Frustum,
+    pub object_count: u32,
+}
+
 /// Set of uniforms that are useful for the whole frame.
 #[derive(Debug, Copy, Clone, ShaderType)]
 pub struct FrameUniforms {
diff --git a/rend3-test/src/runner.rs b/rend3-test/src/runner.rs
index ad915976..d28bd3d2 100644
--- a/rend3-test/src/runner.rs
+++ b/rend3-test/src/runner.rs
@@ -83,13 +83,7 @@ impl TestRunnerBuilder {
 
         let base_rendergraph = BaseRenderGraph::new(&renderer, &spp);
 
-        let pbr = PbrRoutine::new(
-            &renderer,
-            &mut renderer.data_core.lock(),
-            &spp,
-            &base_rendergraph.interfaces,
-            &base_rendergraph.gpu_culler.culling_buffer_map_handle,
-        );
+        let pbr = PbrRoutine::new(&renderer, &mut renderer.data_core.lock(), &spp, &base_rendergraph.interfaces);
         let tonemapping =
             TonemappingRoutine::new(&renderer, &spp, &base_rendergraph.interfaces, TextureFormat::Rgba8UnormSrgb);
 
diff --git a/rend3/shaders/vertex_attributes.wgsl b/rend3/shaders/vertex_attributes.wgsl
index baa5bf52..58cb2f1b 100644
--- a/rend3/shaders/vertex_attributes.wgsl
+++ b/rend3/shaders/vertex_attributes.wgsl
@@ -5,26 +5,8 @@ struct Indices {
     vertex: u32,
 }
 
-struct BatchIndices {
-    /// Index _within_ the batch
-    local_object: u32,
-    /// Vertex index within the object
-    vertex: u32,
-}
-
 const INVALID_VERTEX: u32 = 0x00FFFFFFu;
 
-fn unpack_batch_index(vertex_index: u32) -> BatchIndices {
-    return BatchIndices(
-        vertex_index >> 24u,
-        vertex_index & 0xFFFFFFu,
-    );
-}
-
-fn pack_batch_index(local_object: u32, index: u32) -> u32 {
-    return (local_object << 24u) | (index & 0xFFFFFFu);
-}
-
 alias TriangleVertices = array<vec3f, 3>;
 alias TriangleIndices = array<u32, 3>;
 struct Triangle {
@@ -32,14 +14,6 @@ struct Triangle {
     indices: TriangleIndices,
 }
 
-fn pack_batch_indices(local_object: u32, indices: TriangleIndices) -> TriangleIndices {
-    return TriangleIndices(
-        pack_batch_index(local_object, indices[0]),
-        pack_batch_index(local_object, indices[1]),
-        pack_batch_index(local_object, indices[2]),
-    );
-}
-
 fn extract_attribute_vec2_f32(byte_base_offset: u32, vertex_index: u32) -> vec2<f32> {
     let first_element_idx = byte_base_offset / 4u + vertex_index * 2u;
     return vec2<f32>(
diff --git a/rend3/src/shader.rs b/rend3/src/shader.rs
index 9c8d51d0..ae2905bb 100644
--- a/rend3/src/shader.rs
+++ b/rend3/src/shader.rs
@@ -214,21 +214,9 @@ impl<'a> HelperDef for ShaderVertexBufferHelper<'a> {
             Some(s) => s,
             _ => Err(RenderErrorReason::Other("Vertex buffer helper's first argument must be a string".to_string()))?,
         };
-        let batch_buffer_value = h
-            .param(1)
-            .ok_or_else(|| {
-                RenderErrorReason::Other(
-                    "Vertex buffer helper must have an argument pointing to the buffer of batch data".to_string(),
-                )
-            })?
-            .relative_path();
-        let batch_buffer = match batch_buffer_value {
-            Some(s) => s,
-            _ => Err(RenderErrorReason::Other("Vertex buffer helper's second argument must be a string".to_string()))?,
-        };
 
         let template = self
-            .generate_template(h, object_buffer, batch_buffer)
+            .generate_template(h, object_buffer)
             .map_err(|_| RenderErrorReason::Other("Failed to writeln vertex template string".to_string()))?;
 
         out.write(&r.render_template(&template, ctx.data())?)?;
@@ -238,30 +226,17 @@ impl<'a> HelperDef for ShaderVertexBufferHelper<'a> {
 }
 
 impl<'a> ShaderVertexBufferHelper<'a> {
-    fn generate_template(
-        &self,
-        h: &Helper,
-        object_buffer: &str,
-        batch_buffer: &str,
-    ) -> Result<String, std::fmt::Error> {
+    fn generate_template(&self, h: &Helper, object_buffer: &str) -> Result<String, std::fmt::Error> {
         let includes = r#"{{include "rend3/vertex_attributes.wgsl"}}"#;
 
-        let unpack_function = format!(
-            "
-            fn unpack_vertex_index(vertex_index: u32) -> Indices {{
-                 let batch_indices = unpack_batch_index(vertex_index);
-                 let object_id = {batch_buffer}.object_culling_information[batch_indices.local_object].object_id;
-                 
-                 return Indices(object_id, batch_indices.vertex);
-            }}"
-        );
+        let unpack_function = String::new();
 
         let mut input_struct = String::new();
         writeln!(input_struct, "struct VertexInput {{")?;
         let mut input_function = String::new();
         writeln!(input_function, "fn get_vertices(indices: Indices) -> VertexInput {{")?;
         writeln!(input_function, "    var verts: VertexInput;")?;
-        for requested_attribute in &h.params()[2..] {
+        for requested_attribute in &h.params()[1..] {
             let (attr_idx, spec) = self
                 .config
                 .specs