Skip to content

Commit

Permalink
add support for setting round robin scheduling (disabled by default)
Browse files Browse the repository at this point in the history
  • Loading branch information
bashbaug committed Mar 7, 2024
1 parent 54b9366 commit 8d23a8c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
2 changes: 1 addition & 1 deletion samples/99_matrixexperiments/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ add_opencl_sample(
TEST
NUMBER 99
TARGET matrixexperiments
VERSION 120
VERSION 200 # for clSetKernelExecInfo
SOURCES main.cpp
KERNELS matrix_helpers.cl matrix_kernels.cl matrix_kernel_tiled.cl)
26 changes: 26 additions & 0 deletions samples/99_matrixexperiments/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ bool validate = false;
bool emulate = false;
bool wallclock = false;
bool skipinit = false;
bool roundRobin = false;
int testIterations = 16;
float threshold = 0.01f;

Expand Down Expand Up @@ -75,6 +76,18 @@ static size_t findMinSubGroupSize(cl::Device& device)
return 0;
}

static void setRoundRobin(cl::Kernel& kernel)
{
constexpr cl_kernel_exec_info CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL = 0x10025;
constexpr cl_uint CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL = 0x10023;
const cl_uint policy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL;
clSetKernelExecInfo(
kernel(),
CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL,
sizeof(policy),
&policy);
}

template <typename T>
static void fill_matrix(std::vector<T>& M, size_t numRows, size_t numCols)
{
Expand Down Expand Up @@ -440,6 +453,9 @@ static void bfloat16_dpas_blockread_rowmajor(
kernel.setArg(1, A);
kernel.setArg(2, B);
kernel.setArg(3, static_cast<cl_int>(K));
if (roundRobin) {
setRoundRobin(kernel);
}

if (!skipinit) {
queue.enqueueFillBuffer(C, 0, 0, C_ref.size() * sizeof(C_ref[0]));
Expand Down Expand Up @@ -496,6 +512,9 @@ static void bfloat16_dpas_blockread_rowmajor_tiled(
kernel.setArg(1, A);
kernel.setArg(2, B);
kernel.setArg(3, static_cast<cl_int>(K));
if (roundRobin) {
setRoundRobin(kernel);
}

if (!skipinit) {
queue.enqueueFillBuffer(C, 0, 0, C_ref.size() * sizeof(C_ref[0]));
Expand Down Expand Up @@ -546,6 +565,9 @@ static void bfloat16_dpas_blockread_vnni(
kernel.setArg(1, A);
kernel.setArg(2, B);
kernel.setArg(3, static_cast<cl_int>(K));
if (roundRobin) {
setRoundRobin(kernel);
}

if (!skipinit) {
queue.enqueueFillBuffer(C, 0, 0, C_ref.size() * sizeof(C_ref[0]));
Expand Down Expand Up @@ -602,6 +624,9 @@ static void bfloat16_dpas_blockread_vnni_tiled(
kernel.setArg(1, A);
kernel.setArg(2, B);
kernel.setArg(3, static_cast<cl_int>(K));
if (roundRobin) {
setRoundRobin(kernel);
}

if (!skipinit) {
queue.enqueueFillBuffer(C, 0, 0, C_ref.size() * sizeof(C_ref[0]));
Expand Down Expand Up @@ -658,6 +683,7 @@ int main(int argc, char** argv)
op.add<popl::Switch>("", "emulate", "Unconditionally Emulate dpas", &emulate);
op.add<popl::Switch>("", "wallclock", "Measure Wallclock Time", &wallclock);
op.add<popl::Switch>("", "skipinit", "Do Not Initialize Buffers", &skipinit);
op.add<popl::Switch>("", "roundrobin", "Use Round Robin Scheduling", &roundRobin);
op.add<popl::Value<float>>("", "threshold", "Local Error Threshold", threshold, &threshold);
op.add<popl::Value<size_t>, popl::Attribute::advanced>("", "mask", "Test Mask", mask, &mask);
bool printUsage = false;
Expand Down

0 comments on commit 8d23a8c

Please sign in to comment.