From 52bb912aabf3a4430a0d5266a0afaa04846681fa Mon Sep 17 00:00:00 2001 From: Zack Gomez Date: Thu, 17 Oct 2024 03:13:39 -0400 Subject: [PATCH] implement `wgpuComputePassEncoderSetPushConstants` (#437) * wip compute encoder push constants * examples/push_constants * Build and run push_constants example in CI --------- Co-authored-by: Rajesh Malviya --- .github/workflows/ci.yml | 4 + Makefile | 14 ++ examples/CMakeLists.txt | 1 + examples/push_constants/CMakeLists.txt | 24 +++ examples/push_constants/main.c | 253 +++++++++++++++++++++++++ examples/push_constants/shader.wgsl | 15 ++ ffi/wgpu.h | 1 + src/lib.rs | 21 ++ 8 files changed, 333 insertions(+) create mode 100644 examples/push_constants/CMakeLists.txt create mode 100644 examples/push_constants/main.c create mode 100644 examples/push_constants/shader.wgsl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 831f2527..21bce3b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -220,11 +220,13 @@ jobs: make example-triangle make example-enumerate_adapters make example-texture_arrays + make example-push_constants - name: Run examples debug run: | make run-example-capture make run-example-compute make run-example-enumerate_adapters + make run-example-push_constants - name: Build examples release run: | make example-capture-release @@ -232,8 +234,10 @@ jobs: make example-triangle-release make example-enumerate_adapters-release make example-texture_arrays-release + make example-push_constants-release - name: Run examples release run: | make run-example-capture-release make run-example-compute-release make run-example-enumerate_adapters-release + make run-example-push_constants-release diff --git a/Makefile b/Makefile index 6b8127d5..d6002a99 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,8 @@ endif .PHONY: check test doc clear \ lib-native lib-native-release \ example-capture example-compute example-triangle \ + example-push_constants example-push_constants-release \ + run-example-push_constants run-example-push_constants-release \ example-capture-release example-compute-release example-triangle-release \ run-example-capture run-example-compute run-example-triangle \ run-example-capture-release run-example-compute-release run-example-triangle-release @@ -118,6 +120,18 @@ examples-debug: lib-native examples-release: lib-native-release cd examples && $(MKDIR_CMD) "build/RelWithDebInfo" && cd build/RelWithDebInfo && cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ../.. +example-push_constants: examples-debug + cd examples/build/Debug && cmake --build . --target push_constants + +run-example-push_constants: example-push_constants + cd examples/push_constants && "../build/Debug/push_constants/push_constants" + +example-push_constants-release: examples-release + cd examples/build/RelWithDebInfo && cmake --build . --target push_constants + +run-example-push_constants-release: example-push_constants-release + cd examples/push_constants && "../build/RelWithDebInfo/push_constants/push_constants" + example-capture: examples-debug cd examples/build/Debug && cmake --build . --target capture diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 23b87380..d11858fa 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -32,6 +32,7 @@ add_subdirectory(framework) add_subdirectory(capture) add_subdirectory(compute) add_subdirectory(enumerate_adapters) +add_subdirectory(push_constants) add_subdirectory(texture_arrays) add_subdirectory(triangle) diff --git a/examples/push_constants/CMakeLists.txt b/examples/push_constants/CMakeLists.txt new file mode 100644 index 00000000..2c00ba2e --- /dev/null +++ b/examples/push_constants/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.20) +project(push_constants LANGUAGES C) + +add_executable(push_constants main.c) + +if (MSVC) + add_compile_options(/W4) +else() + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +include_directories(${CMAKE_SOURCE_DIR}/../ffi) +include_directories(${CMAKE_SOURCE_DIR}/../ffi/webgpu-headers) +include_directories(${CMAKE_SOURCE_DIR}/framework) + +if (WIN32) + set(OS_LIBRARIES d3dcompiler ws2_32 userenv bcrypt ntdll opengl32) +elseif(UNIX AND NOT APPLE) + set(OS_LIBRARIES "-lm -ldl") +elseif(APPLE) + set(OS_LIBRARIES "-framework CoreFoundation -framework QuartzCore -framework Metal") +endif() + +target_link_libraries(push_constants framework ${WGPU_LIBRARY} ${OS_LIBRARIES}) diff --git a/examples/push_constants/main.c b/examples/push_constants/main.c new file mode 100644 index 00000000..6018eaa8 --- /dev/null +++ b/examples/push_constants/main.c @@ -0,0 +1,253 @@ +#include +#include +#include + +#include "framework.h" +#include "webgpu-headers/webgpu.h" + +#define LOG_PREFIX "[push_constants]" + +static void handle_request_adapter(WGPURequestAdapterStatus status, + WGPUAdapter adapter, char const *message, + void *userdata) { + UNUSED(status) + UNUSED(message) + *(WGPUAdapter *)userdata = adapter; +} +static void handle_request_device(WGPURequestDeviceStatus status, + WGPUDevice device, char const *message, + void *userdata) { + UNUSED(status) + UNUSED(message) + *(WGPUDevice *)userdata = device; +} +static void handle_buffer_map(WGPUBufferMapAsyncStatus status, void *userdata) { + UNUSED(userdata) + printf(LOG_PREFIX " buffer_map status=%#.8x\n", status); +} + +int main(int argc, char *argv[]) { + UNUSED(argc) + UNUSED(argv) + frmwrk_setup_logging(WGPULogLevel_Warn); + + uint32_t numbers[] = {0, 0, 0, 0}; + uint32_t numbers_size = sizeof(numbers); + uint32_t numbers_length = numbers_size / sizeof(uint32_t); + + WGPUInstance instance = wgpuCreateInstance(NULL); + assert(instance); + + WGPUAdapter adapter = NULL; + wgpuInstanceRequestAdapter(instance, NULL, handle_request_adapter, + (void *)&adapter); + assert(adapter); + + WGPUSupportedLimitsExtras supported_limits_extras = { + .chain = + { + .sType = WGPUSType_SupportedLimitsExtras, + }, + .limits = + { + .maxPushConstantSize = 0, + }, + }; + WGPUSupportedLimits supported_limits = { + .nextInChain = &supported_limits_extras.chain, + }; + wgpuAdapterGetLimits(adapter, &supported_limits); + + WGPURequiredLimitsExtras required_limits_extras = { + .chain = + { + .sType = WGPUSType_RequiredLimitsExtras, + }, + .limits = supported_limits_extras.limits, + }; + WGPURequiredLimits required_limits = { + .nextInChain = &required_limits_extras.chain, + .limits = supported_limits.limits, + }; + + WGPUFeatureName requiredFeatures[] = { + WGPUNativeFeature_PushConstants, + }; + WGPUDeviceDescriptor device_desc = { + .label = "compute_device", + .requiredFeatures = requiredFeatures, + .requiredFeatureCount = 1, + .requiredLimits = &required_limits, + }; + + WGPUDevice device = NULL; + wgpuAdapterRequestDevice(adapter, &device_desc, handle_request_device, + (void *)&device); + assert(device); + + WGPUQueue queue = wgpuDeviceGetQueue(device); + assert(queue); + + WGPUShaderModule shader_module = + frmwrk_load_shader_module(device, "shader.wgsl"); + assert(shader_module); + + WGPUBuffer storage_buffer = wgpuDeviceCreateBuffer( + device, &(const WGPUBufferDescriptor){ + .label = "storage_buffer", + .usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc, + .size = numbers_size, + .mappedAtCreation = false, + }); + assert(storage_buffer); + + WGPUBuffer staging_buffer = wgpuDeviceCreateBuffer( + device, &(const WGPUBufferDescriptor){ + .label = "staging_buffer", + .usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst, + .size = numbers_size, + .mappedAtCreation = false, + }); + assert(staging_buffer); + + WGPUPushConstantRange push_constant_range = { + .stages = WGPUShaderStage_Compute, + .start = 0, + .end = sizeof(uint32_t), + }; + + WGPUPipelineLayoutExtras pipeline_layout_extras = { + .chain = + { + .sType = WGPUSType_PipelineLayoutExtras, + }, + .pushConstantRangeCount = 1, + .pushConstantRanges = &push_constant_range, + }; + + WGPUBindGroupLayoutEntry bind_group_layout_entries[] = { + { + .binding = 0, + .visibility = WGPUShaderStage_Compute, + .buffer = + { + .type = WGPUBufferBindingType_Storage, + }, + }, + }; + WGPUBindGroupLayoutDescriptor bind_group_layout_desc = { + .label = "bind_group_layout", + .nextInChain = NULL, + .entryCount = 1, + .entries = bind_group_layout_entries, + }; + WGPUBindGroupLayout bind_group_layout = + wgpuDeviceCreateBindGroupLayout(device, &bind_group_layout_desc); + assert(bind_group_layout); + + WGPUPipelineLayoutDescriptor pipeline_layout_desc = { + .label = "pipeline_layout", + .nextInChain = &pipeline_layout_extras.chain, + .bindGroupLayouts = &bind_group_layout, + .bindGroupLayoutCount = 1, + }; + WGPUPipelineLayout pipeline_layout = + wgpuDeviceCreatePipelineLayout(device, &pipeline_layout_desc); + assert(pipeline_layout); + + WGPUComputePipeline compute_pipeline = wgpuDeviceCreateComputePipeline( + device, &(const WGPUComputePipelineDescriptor){ + .label = "compute_pipeline", + .compute = + (const WGPUProgrammableStageDescriptor){ + .module = shader_module, + .entryPoint = "main", + }, + .layout = pipeline_layout, + }); + assert(compute_pipeline); + + WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup( + device, &(const WGPUBindGroupDescriptor){ + .label = "bind_group", + .layout = bind_group_layout, + .entryCount = 1, + .entries = + (const WGPUBindGroupEntry[]){ + (const WGPUBindGroupEntry){ + .binding = 0, + .buffer = storage_buffer, + .offset = 0, + .size = numbers_size, + }, + }, + }); + assert(bind_group); + + WGPUCommandEncoder command_encoder = wgpuDeviceCreateCommandEncoder( + device, &(const WGPUCommandEncoderDescriptor){ + .label = "command_encoder", + }); + assert(command_encoder); + + WGPUComputePassEncoder compute_pass_encoder = + wgpuCommandEncoderBeginComputePass(command_encoder, + &(const WGPUComputePassDescriptor){ + .label = "compute_pass", + }); + assert(compute_pass_encoder); + + wgpuComputePassEncoderSetPipeline(compute_pass_encoder, compute_pipeline); + wgpuComputePassEncoderSetBindGroup(compute_pass_encoder, 0, bind_group, 0, + NULL); + + for (uint32_t i = 0; i < numbers_length; i++) { + uint32_t pushConst = i; + wgpuComputePassEncoderSetPushConstants(compute_pass_encoder, 0, + sizeof(uint32_t), &pushConst); + + wgpuComputePassEncoderDispatchWorkgroups(compute_pass_encoder, + numbers_length, 1, 1); + } + + wgpuComputePassEncoderEnd(compute_pass_encoder); + wgpuComputePassEncoderRelease(compute_pass_encoder); + + wgpuCommandEncoderCopyBufferToBuffer(command_encoder, storage_buffer, 0, + staging_buffer, 0, numbers_size); + + WGPUCommandBuffer command_buffer = wgpuCommandEncoderFinish( + command_encoder, &(const WGPUCommandBufferDescriptor){ + .label = "command_buffer", + }); + assert(command_buffer); + + wgpuQueueWriteBuffer(queue, storage_buffer, 0, &numbers, numbers_size); + wgpuQueueSubmit(queue, 1, &command_buffer); + + wgpuBufferMapAsync(staging_buffer, WGPUMapMode_Read, 0, numbers_size, + handle_buffer_map, NULL); + wgpuDevicePoll(device, true, NULL); + + uint32_t *buf = + (uint32_t *)wgpuBufferGetMappedRange(staging_buffer, 0, numbers_size); + assert(buf); + + printf("times: [%d, %d, %d, %d]\n", buf[0], buf[1], buf[2], buf[3]); + + wgpuBufferUnmap(staging_buffer); + wgpuCommandBufferRelease(command_buffer); + wgpuCommandEncoderRelease(command_encoder); + wgpuBindGroupRelease(bind_group); + wgpuBindGroupLayoutRelease(bind_group_layout); + wgpuComputePipelineRelease(compute_pipeline); + wgpuBufferRelease(storage_buffer); + wgpuBufferRelease(staging_buffer); + wgpuShaderModuleRelease(shader_module); + wgpuQueueRelease(queue); + wgpuDeviceRelease(device); + wgpuAdapterRelease(adapter); + wgpuInstanceRelease(instance); + return EXIT_SUCCESS; +} diff --git a/examples/push_constants/shader.wgsl b/examples/push_constants/shader.wgsl new file mode 100644 index 00000000..e0b86e42 --- /dev/null +++ b/examples/push_constants/shader.wgsl @@ -0,0 +1,15 @@ +@group(0) +@binding(0) +var buffer: array; + +struct PushConstants { + i: u32, +} +var push_constants: PushConstants; + +@compute +@workgroup_size(1) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let i = push_constants.i; + buffer[i] = i * 2; +} diff --git a/ffi/wgpu.h b/ffi/wgpu.h index c31adc5c..7c1cf575 100644 --- a/ffi/wgpu.h +++ b/ffi/wgpu.h @@ -292,6 +292,7 @@ void wgpuSetLogLevel(WGPULogLevel level); uint32_t wgpuGetVersion(void); void wgpuRenderPassEncoderSetPushConstants(WGPURenderPassEncoder encoder, WGPUShaderStageFlags stages, uint32_t offset, uint32_t sizeBytes, void const * data); +void wgpuComputePassEncoderSetPushConstants(WGPUComputePassEncoder encoder, uint32_t offset, uint32_t sizeBytes, void const * data); void wgpuRenderPassEncoderMultiDrawIndirect(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, uint32_t count); void wgpuRenderPassEncoderMultiDrawIndexedIndirect(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, uint32_t count); diff --git a/src/lib.rs b/src/lib.rs index 2c72dbc7..3b585fc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4293,6 +4293,27 @@ pub unsafe extern "C" fn wgpuRenderPassEncoderSetPushConstants( } } +#[no_mangle] +pub unsafe extern "C" fn wgpuComputePassEncoderSetPushConstants( + pass: native::WGPUComputePassEncoder, + offset: u32, + size_bytes: u32, + data: *const u8, +) { + let pass = pass.as_ref().expect("invalid compute pass"); + let encoder = pass.encoder.as_mut().unwrap(); + + match encoder.set_push_constants(&pass.context, offset, make_slice(data, size_bytes as usize)) { + Ok(()) => (), + Err(cause) => handle_error( + &pass.error_sink, + cause, + None, + "wgpuComputePassEncoderSetPushConstants", + ), + } +} + #[no_mangle] pub unsafe extern "C" fn wgpuRenderPassEncoderMultiDrawIndirect( pass: native::WGPURenderPassEncoder,