Skip to content

Commit

Permalink
feat: add distribute function (#14)
Browse files Browse the repository at this point in the history
# What ❔

This PR adds the distribute function.

## Checklist

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Documentation comments have been added / updated.
  • Loading branch information
robik75 authored Sep 8, 2024
1 parent b521275 commit 42a59a6
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/bellman-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,11 @@ bc_error pn_set_values_from_packed_bits(void *values, const void *packet_bits, c
static_cast<cudaStream_t>(stream.handle)));
}

bc_error pn_distribute_values(const void *src, void *dst, const unsigned count, const unsigned stride, bc_stream stream) {
return static_cast<bc_error>(pn::distribute_values(static_cast<const fd_q::storage *>(src), static_cast<fd_q::storage *>(dst), count, stride,
static_cast<cudaStream_t>(stream.handle)));
}

bc_error pn_tear_down() { return static_cast<bc_error>(pn::tear_down()); };

bc_error msm_set_up() { return static_cast<bc_error>(msm::set_up()); }
Expand Down
8 changes: 8 additions & 0 deletions src/bellman-cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,14 @@ bc_error pn_generate_permutation_polynomials(generate_permutation_polynomials_co
// stream - Stream on which this operation will be scheduled
bc_error pn_set_values_from_packed_bits(void *values, const void *packet_bits, unsigned count, bc_stream stream);

// Distribute field element values with a stride
// src - device pointer to the vector of field elements from where the values will be read
// dst - device pointer to the vector of field elements to where the results will be written
// count - number of values to distribute
// stride - stride with which the values will be distributed
// stream - Stream on which this operation will be scheduled
bc_error pn_distribute_values(const void *src, void *dst, unsigned count, unsigned stride, bc_stream stream);

// release all resources associated with the internal state for polynomial computations
bc_error pn_tear_down();

Expand Down
17 changes: 17 additions & 0 deletions src/pn_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,21 @@ cudaError_t set_values_from_packed_bits(fd_q::storage *values, const unsigned *p
return cudaGetLastError();
}

__global__ void distribute_values_kernel(const fd_q::storage *src, fd_q::storage *dst, const unsigned count, const unsigned stride) {
typedef fd_q::storage storage;
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
if (gid >= count)
return;
const auto value = memory::load<storage, memory::ld_modifier::cs>(src + gid);
memory::store<storage, memory::st_modifier::cs>(dst + gid * stride, value);
}

cudaError_t distribute_values(const fd_q::storage *src, fd_q::storage *dst, const unsigned count, const unsigned stride, cudaStream_t stream) {
const unsigned threads_per_block = 128;
const dim3 block_dim = count < threads_per_block ? count : threads_per_block;
const dim3 grid_dim = (count - 1) / block_dim.x + 1;
distribute_values_kernel<<<grid_dim, block_dim, 0, stream>>>(src, dst, count, stride);
return cudaGetLastError();
}

} // namespace pn
2 changes: 2 additions & 0 deletions src/pn_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ cudaError_t generate_permutation_matrix(fd_q::storage *values, const fd_q::stora

cudaError_t set_values_from_packed_bits(fd_q::storage *values, const unsigned *packet_bits, unsigned count, cudaStream_t stream);

cudaError_t distribute_values(const fd_q::storage *src, fd_q::storage *dst, unsigned count, unsigned stride, cudaStream_t stream);

} // namespace pn

0 comments on commit 42a59a6

Please sign in to comment.