From 52bb912aabf3a4430a0d5266a0afaa04846681fa Mon Sep 17 00:00:00 2001
From: Zack Gomez <zack.gomez@gmail.com>
Date: Thu, 17 Oct 2024 03:13:39 -0400
Subject: [PATCH] implement `wgpuComputePassEncoderSetPushConstants` (#437)

* wip compute encoder push constants

* examples/push_constants

* Build and run push_constants example in CI

---------

Co-authored-by: Rajesh Malviya <rajveer0malviya@gmail.com>
---
 .github/workflows/ci.yml               |   4 +
 Makefile                               |  14 ++
 examples/CMakeLists.txt                |   1 +
 examples/push_constants/CMakeLists.txt |  24 +++
 examples/push_constants/main.c         | 253 +++++++++++++++++++++++++
 examples/push_constants/shader.wgsl    |  15 ++
 ffi/wgpu.h                             |   1 +
 src/lib.rs                             |  21 ++
 8 files changed, 333 insertions(+)
 create mode 100644 examples/push_constants/CMakeLists.txt
 create mode 100644 examples/push_constants/main.c
 create mode 100644 examples/push_constants/shader.wgsl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 831f2527..21bce3b7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -220,11 +220,13 @@ jobs:
           make example-triangle
           make example-enumerate_adapters
           make example-texture_arrays
+          make example-push_constants
       - name: Run examples debug
         run: |
           make run-example-capture
           make run-example-compute
           make run-example-enumerate_adapters
+          make run-example-push_constants
       - name: Build examples release
         run: |
           make example-capture-release
@@ -232,8 +234,10 @@ jobs:
           make example-triangle-release
           make example-enumerate_adapters-release
           make example-texture_arrays-release
+          make example-push_constants-release
       - name: Run examples release
         run: |
           make run-example-capture-release
           make run-example-compute-release
           make run-example-enumerate_adapters-release
+          make run-example-push_constants-release
diff --git a/Makefile b/Makefile
index 6b8127d5..d6002a99 100644
--- a/Makefile
+++ b/Makefile
@@ -39,6 +39,8 @@ endif
 .PHONY: check test doc clear \
 	lib-native lib-native-release \
 	example-capture example-compute example-triangle \
+	example-push_constants example-push_constants-release \
+	run-example-push_constants run-example-push_constants-release \
 	example-capture-release example-compute-release example-triangle-release \
 	run-example-capture run-example-compute run-example-triangle \
 	run-example-capture-release run-example-compute-release run-example-triangle-release
@@ -118,6 +120,18 @@ examples-debug: lib-native
 examples-release: lib-native-release
 	cd examples && $(MKDIR_CMD) "build/RelWithDebInfo" && cd build/RelWithDebInfo && cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ../..
 
+example-push_constants: examples-debug
+	cd examples/build/Debug && cmake --build . --target push_constants
+
+run-example-push_constants: example-push_constants
+	cd examples/push_constants && "../build/Debug/push_constants/push_constants"
+
+example-push_constants-release: examples-release
+	cd examples/build/RelWithDebInfo && cmake --build . --target push_constants
+
+run-example-push_constants-release: example-push_constants-release
+	cd examples/push_constants && "../build/RelWithDebInfo/push_constants/push_constants"
+
 example-capture: examples-debug
 	cd examples/build/Debug && cmake --build . --target capture
 
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 23b87380..d11858fa 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -32,6 +32,7 @@ add_subdirectory(framework)
 add_subdirectory(capture)
 add_subdirectory(compute)
 add_subdirectory(enumerate_adapters)
+add_subdirectory(push_constants)
 add_subdirectory(texture_arrays)
 add_subdirectory(triangle)
 
diff --git a/examples/push_constants/CMakeLists.txt b/examples/push_constants/CMakeLists.txt
new file mode 100644
index 00000000..2c00ba2e
--- /dev/null
+++ b/examples/push_constants/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.20)
+project(push_constants LANGUAGES C)
+
+add_executable(push_constants main.c)
+
+if (MSVC)
+    add_compile_options(/W4)
+else()
+    add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+include_directories(${CMAKE_SOURCE_DIR}/../ffi)
+include_directories(${CMAKE_SOURCE_DIR}/../ffi/webgpu-headers)
+include_directories(${CMAKE_SOURCE_DIR}/framework)
+
+if (WIN32)
+    set(OS_LIBRARIES d3dcompiler ws2_32 userenv bcrypt ntdll opengl32)
+elseif(UNIX AND NOT APPLE)
+    set(OS_LIBRARIES "-lm -ldl")
+elseif(APPLE)
+    set(OS_LIBRARIES "-framework CoreFoundation -framework QuartzCore -framework Metal")
+endif()
+
+target_link_libraries(push_constants framework ${WGPU_LIBRARY} ${OS_LIBRARIES})
diff --git a/examples/push_constants/main.c b/examples/push_constants/main.c
new file mode 100644
index 00000000..6018eaa8
--- /dev/null
+++ b/examples/push_constants/main.c
@@ -0,0 +1,253 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "framework.h"
+#include "webgpu-headers/webgpu.h"
+
+#define LOG_PREFIX "[push_constants]"
+
+static void handle_request_adapter(WGPURequestAdapterStatus status,
+                                   WGPUAdapter adapter, char const *message,
+                                   void *userdata) {
+  UNUSED(status)
+  UNUSED(message)
+  *(WGPUAdapter *)userdata = adapter;
+}
+static void handle_request_device(WGPURequestDeviceStatus status,
+                                  WGPUDevice device, char const *message,
+                                  void *userdata) {
+  UNUSED(status)
+  UNUSED(message)
+  *(WGPUDevice *)userdata = device;
+}
+static void handle_buffer_map(WGPUBufferMapAsyncStatus status, void *userdata) {
+  UNUSED(userdata)
+  printf(LOG_PREFIX " buffer_map status=%#.8x\n", status);
+}
+
+int main(int argc, char *argv[]) {
+  UNUSED(argc)
+  UNUSED(argv)
+  frmwrk_setup_logging(WGPULogLevel_Warn);
+
+  uint32_t numbers[] = {0, 0, 0, 0};
+  uint32_t numbers_size = sizeof(numbers);
+  uint32_t numbers_length = numbers_size / sizeof(uint32_t);
+
+  WGPUInstance instance = wgpuCreateInstance(NULL);
+  assert(instance);
+
+  WGPUAdapter adapter = NULL;
+  wgpuInstanceRequestAdapter(instance, NULL, handle_request_adapter,
+                             (void *)&adapter);
+  assert(adapter);
+
+  WGPUSupportedLimitsExtras supported_limits_extras = {
+      .chain =
+          {
+              .sType = WGPUSType_SupportedLimitsExtras,
+          },
+      .limits =
+          {
+              .maxPushConstantSize = 0,
+          },
+  };
+  WGPUSupportedLimits supported_limits = {
+      .nextInChain = &supported_limits_extras.chain,
+  };
+  wgpuAdapterGetLimits(adapter, &supported_limits);
+
+  WGPURequiredLimitsExtras required_limits_extras = {
+      .chain =
+          {
+              .sType = WGPUSType_RequiredLimitsExtras,
+          },
+      .limits = supported_limits_extras.limits,
+  };
+  WGPURequiredLimits required_limits = {
+      .nextInChain = &required_limits_extras.chain,
+      .limits = supported_limits.limits,
+  };
+
+  WGPUFeatureName requiredFeatures[] = {
+      WGPUNativeFeature_PushConstants,
+  };
+  WGPUDeviceDescriptor device_desc = {
+      .label = "compute_device",
+      .requiredFeatures = requiredFeatures,
+      .requiredFeatureCount = 1,
+      .requiredLimits = &required_limits,
+  };
+
+  WGPUDevice device = NULL;
+  wgpuAdapterRequestDevice(adapter, &device_desc, handle_request_device,
+                           (void *)&device);
+  assert(device);
+
+  WGPUQueue queue = wgpuDeviceGetQueue(device);
+  assert(queue);
+
+  WGPUShaderModule shader_module =
+      frmwrk_load_shader_module(device, "shader.wgsl");
+  assert(shader_module);
+
+  WGPUBuffer storage_buffer = wgpuDeviceCreateBuffer(
+      device, &(const WGPUBufferDescriptor){
+                  .label = "storage_buffer",
+                  .usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
+                           WGPUBufferUsage_CopySrc,
+                  .size = numbers_size,
+                  .mappedAtCreation = false,
+              });
+  assert(storage_buffer);
+
+  WGPUBuffer staging_buffer = wgpuDeviceCreateBuffer(
+      device, &(const WGPUBufferDescriptor){
+                  .label = "staging_buffer",
+                  .usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst,
+                  .size = numbers_size,
+                  .mappedAtCreation = false,
+              });
+  assert(staging_buffer);
+
+  WGPUPushConstantRange push_constant_range = {
+      .stages = WGPUShaderStage_Compute,
+      .start = 0,
+      .end = sizeof(uint32_t),
+  };
+
+  WGPUPipelineLayoutExtras pipeline_layout_extras = {
+      .chain =
+          {
+              .sType = WGPUSType_PipelineLayoutExtras,
+          },
+      .pushConstantRangeCount = 1,
+      .pushConstantRanges = &push_constant_range,
+  };
+
+  WGPUBindGroupLayoutEntry bind_group_layout_entries[] = {
+      {
+          .binding = 0,
+          .visibility = WGPUShaderStage_Compute,
+          .buffer =
+              {
+                  .type = WGPUBufferBindingType_Storage,
+              },
+      },
+  };
+  WGPUBindGroupLayoutDescriptor bind_group_layout_desc = {
+      .label = "bind_group_layout",
+      .nextInChain = NULL,
+      .entryCount = 1,
+      .entries = bind_group_layout_entries,
+  };
+  WGPUBindGroupLayout bind_group_layout =
+      wgpuDeviceCreateBindGroupLayout(device, &bind_group_layout_desc);
+  assert(bind_group_layout);
+
+  WGPUPipelineLayoutDescriptor pipeline_layout_desc = {
+      .label = "pipeline_layout",
+      .nextInChain = &pipeline_layout_extras.chain,
+      .bindGroupLayouts = &bind_group_layout,
+      .bindGroupLayoutCount = 1,
+  };
+  WGPUPipelineLayout pipeline_layout =
+      wgpuDeviceCreatePipelineLayout(device, &pipeline_layout_desc);
+  assert(pipeline_layout);
+
+  WGPUComputePipeline compute_pipeline = wgpuDeviceCreateComputePipeline(
+      device, &(const WGPUComputePipelineDescriptor){
+                  .label = "compute_pipeline",
+                  .compute =
+                      (const WGPUProgrammableStageDescriptor){
+                          .module = shader_module,
+                          .entryPoint = "main",
+                      },
+                  .layout = pipeline_layout,
+              });
+  assert(compute_pipeline);
+
+  WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(
+      device, &(const WGPUBindGroupDescriptor){
+                  .label = "bind_group",
+                  .layout = bind_group_layout,
+                  .entryCount = 1,
+                  .entries =
+                      (const WGPUBindGroupEntry[]){
+                          (const WGPUBindGroupEntry){
+                              .binding = 0,
+                              .buffer = storage_buffer,
+                              .offset = 0,
+                              .size = numbers_size,
+                          },
+                      },
+              });
+  assert(bind_group);
+
+  WGPUCommandEncoder command_encoder = wgpuDeviceCreateCommandEncoder(
+      device, &(const WGPUCommandEncoderDescriptor){
+                  .label = "command_encoder",
+              });
+  assert(command_encoder);
+
+  WGPUComputePassEncoder compute_pass_encoder =
+      wgpuCommandEncoderBeginComputePass(command_encoder,
+                                         &(const WGPUComputePassDescriptor){
+                                             .label = "compute_pass",
+                                         });
+  assert(compute_pass_encoder);
+
+  wgpuComputePassEncoderSetPipeline(compute_pass_encoder, compute_pipeline);
+  wgpuComputePassEncoderSetBindGroup(compute_pass_encoder, 0, bind_group, 0,
+                                     NULL);
+
+  for (uint32_t i = 0; i < numbers_length; i++) {
+    uint32_t pushConst = i;
+    wgpuComputePassEncoderSetPushConstants(compute_pass_encoder, 0,
+                                           sizeof(uint32_t), &pushConst);
+
+    wgpuComputePassEncoderDispatchWorkgroups(compute_pass_encoder,
+                                             numbers_length, 1, 1);
+  }
+
+  wgpuComputePassEncoderEnd(compute_pass_encoder);
+  wgpuComputePassEncoderRelease(compute_pass_encoder);
+
+  wgpuCommandEncoderCopyBufferToBuffer(command_encoder, storage_buffer, 0,
+                                       staging_buffer, 0, numbers_size);
+
+  WGPUCommandBuffer command_buffer = wgpuCommandEncoderFinish(
+      command_encoder, &(const WGPUCommandBufferDescriptor){
+                           .label = "command_buffer",
+                       });
+  assert(command_buffer);
+
+  wgpuQueueWriteBuffer(queue, storage_buffer, 0, &numbers, numbers_size);
+  wgpuQueueSubmit(queue, 1, &command_buffer);
+
+  wgpuBufferMapAsync(staging_buffer, WGPUMapMode_Read, 0, numbers_size,
+                     handle_buffer_map, NULL);
+  wgpuDevicePoll(device, true, NULL);
+
+  uint32_t *buf =
+      (uint32_t *)wgpuBufferGetMappedRange(staging_buffer, 0, numbers_size);
+  assert(buf);
+
+  printf("times: [%d, %d, %d, %d]\n", buf[0], buf[1], buf[2], buf[3]);
+
+  wgpuBufferUnmap(staging_buffer);
+  wgpuCommandBufferRelease(command_buffer);
+  wgpuCommandEncoderRelease(command_encoder);
+  wgpuBindGroupRelease(bind_group);
+  wgpuBindGroupLayoutRelease(bind_group_layout);
+  wgpuComputePipelineRelease(compute_pipeline);
+  wgpuBufferRelease(storage_buffer);
+  wgpuBufferRelease(staging_buffer);
+  wgpuShaderModuleRelease(shader_module);
+  wgpuQueueRelease(queue);
+  wgpuDeviceRelease(device);
+  wgpuAdapterRelease(adapter);
+  wgpuInstanceRelease(instance);
+  return EXIT_SUCCESS;
+}
diff --git a/examples/push_constants/shader.wgsl b/examples/push_constants/shader.wgsl
new file mode 100644
index 00000000..e0b86e42
--- /dev/null
+++ b/examples/push_constants/shader.wgsl
@@ -0,0 +1,15 @@
+@group(0)
+@binding(0)
+var<storage, read_write> buffer: array<u32>;
+
+struct PushConstants {
+    i: u32,
+}
+var<push_constant> push_constants: PushConstants;
+
+@compute
+@workgroup_size(1)
+fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
+    let i = push_constants.i;
+    buffer[i] = i * 2;
+}
diff --git a/ffi/wgpu.h b/ffi/wgpu.h
index c31adc5c..7c1cf575 100644
--- a/ffi/wgpu.h
+++ b/ffi/wgpu.h
@@ -292,6 +292,7 @@ void wgpuSetLogLevel(WGPULogLevel level);
 uint32_t wgpuGetVersion(void);
 
 void wgpuRenderPassEncoderSetPushConstants(WGPURenderPassEncoder encoder, WGPUShaderStageFlags stages, uint32_t offset, uint32_t sizeBytes, void const * data);
+void wgpuComputePassEncoderSetPushConstants(WGPUComputePassEncoder encoder, uint32_t offset, uint32_t sizeBytes, void const * data);
 
 void wgpuRenderPassEncoderMultiDrawIndirect(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, uint32_t count);
 void wgpuRenderPassEncoderMultiDrawIndexedIndirect(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, uint32_t count);
diff --git a/src/lib.rs b/src/lib.rs
index 2c72dbc7..3b585fc6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4293,6 +4293,27 @@ pub unsafe extern "C" fn wgpuRenderPassEncoderSetPushConstants(
     }
 }
 
+#[no_mangle]
+pub unsafe extern "C" fn wgpuComputePassEncoderSetPushConstants(
+    pass: native::WGPUComputePassEncoder,
+    offset: u32,
+    size_bytes: u32,
+    data: *const u8,
+) {
+    let pass = pass.as_ref().expect("invalid compute pass");
+    let encoder = pass.encoder.as_mut().unwrap();
+
+    match encoder.set_push_constants(&pass.context, offset, make_slice(data, size_bytes as usize)) {
+        Ok(()) => (),
+        Err(cause) => handle_error(
+            &pass.error_sink,
+            cause,
+            None,
+            "wgpuComputePassEncoderSetPushConstants",
+        ),
+    }
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn wgpuRenderPassEncoderMultiDrawIndirect(
     pass: native::WGPURenderPassEncoder,