Skip to content

Commit

Permalink
VLCLJ-2113 - Add test for timestamp IPC events
Browse files Browse the repository at this point in the history
Signed-off-by: Jemale Lockett <jemale.lockett@intel.com>
  • Loading branch information
Jemale committed Jan 10, 2024
1 parent d8a6aac commit 3f28718
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 10 deletions.
2 changes: 2 additions & 0 deletions conformance_tests/core/test_ipc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ add_lzt_test(
level_zero_tests::net
${ipc_libraries}
${ipc_rt_libraries}
KERNELS
ze_matrix_multiplication_ipc
)

add_lzt_test(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
*
* Copyright (c) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/

// Matrix A has M rows and K columns
// Matrix B has K rows and N columns
// Matrix C has M rows and N columns

#define TILE_SIZE 16
__attribute__((reqd_work_group_size(TILE_SIZE, TILE_SIZE, 1))) kernel void
matrix_multiplication(const global float *a, const global float *b,
const int m, const int k, const int n,
global float *c) {
const int2 global_id = {get_global_id(0), get_global_id(1)};
const int2 local_id = {get_local_id(0), get_local_id(1)};

local float a_tile[TILE_SIZE * TILE_SIZE];
local float b_tile[TILE_SIZE * TILE_SIZE];

float sum = 0.0f;
for (int tile_id = 0; tile_id < k / TILE_SIZE; ++tile_id) {
a_tile[local_id.y * TILE_SIZE + local_id.x] =
a[(tile_id * TILE_SIZE + local_id.y) * m + global_id.x];
b_tile[local_id.y * TILE_SIZE + local_id.x] =
b[global_id.y * k + (tile_id * TILE_SIZE + local_id.x)];
barrier(CLK_LOCAL_MEM_FENCE);

for (int i = 0; i < TILE_SIZE; ++i) {
sum += a_tile[i * TILE_SIZE + local_id.x] *
b_tile[local_id.y * TILE_SIZE + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}

c[global_id.y * m + global_id.x] = sum;
}
Binary file not shown.
166 changes: 162 additions & 4 deletions conformance_tests/core/test_ipc/src/test_ipc_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,28 @@ ze_event_pool_desc_t defaultEventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr,
(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_IPC), 10};

ze_event_pool_desc_t timestampEventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr,
(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_IPC |
ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP),
10};

ze_event_pool_desc_t mappedTimestampEventPoolDesc = {
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr,
(ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_IPC |
ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP),
10};

typedef enum {
NO_TIMESTAMP,
KERNEL_TIMESTAMP,
MAPPED_KERNEL_TIMESTAMP
} timestamp_type_t;

static ze_event_pool_handle_t get_event_pool(bool multi_device,
bool device_events,
ze_context_handle_t context) {
ze_context_handle_t context,
timestamp_type_t timestamp_type) {
if (device_events) {
defaultEventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr,
ZE_EVENT_POOL_FLAG_IPC, 10};
Expand All @@ -54,7 +73,16 @@ static ze_event_pool_handle_t get_event_pool(bool multi_device,
lzt::get_default_device(lzt::get_default_driver());
devices.push_back(device);
}
return lzt::create_event_pool(context, defaultEventPoolDesc, devices);
ze_event_pool_desc_t *eventPoolDesc = &defaultEventPoolDesc;
switch (timestamp_type) {
case KERNEL_TIMESTAMP:
eventPoolDesc = &timestampEventPoolDesc;
break;
case MAPPED_KERNEL_TIMESTAMP:
eventPoolDesc = &mappedTimestampEventPoolDesc;
break;
}
return lzt::create_event_pool(context, *eventPoolDesc, devices);
}

static void parent_host_signals(ze_event_handle_t hEvent) {
Expand All @@ -75,6 +103,96 @@ static void parent_device_signals(ze_event_handle_t hEvent,
lzt::destroy_command_bundle(cmdbundle);
}

ze_kernel_handle_t get_matrix_multiplication_kernel(
const ze_context_handle_t &context, ze_device_handle_t device,
ze_group_count_t *tg, void **a_buffer, void **b_buffer, void **c_buffer,
int dimensions = 1024) {
int m, k, n;
m = k = n = dimensions;
std::vector<float> a(m * k, 1);
std::vector<float> b(k * n, 1);
std::vector<float> c(m * n, 0);
*a_buffer = lzt::allocate_host_memory(m * k * sizeof(float));
*b_buffer = lzt::allocate_host_memory(k * n * sizeof(float));
*c_buffer = lzt::allocate_host_memory(m * n * sizeof(float));

std::memcpy(*a_buffer, a.data(), a.size() * sizeof(float));
std::memcpy(*b_buffer, b.data(), b.size() * sizeof(float));

int group_count_x = m / 16;
int group_count_y = n / 16;

tg->groupCountX = group_count_x;
tg->groupCountY = group_count_y;
tg->groupCountZ = 1;

ze_module_handle_t module =
lzt::create_module(context, device, "ze_matrix_multiplication_ipc.spv",
ZE_MODULE_FORMAT_IL_SPIRV, nullptr, nullptr);
ze_kernel_handle_t function =
lzt::create_function(module, "matrix_multiplication");
lzt::set_group_size(function, 16, 16, 1);
lzt::set_argument_value(function, 0, sizeof(*a_buffer), a_buffer);
lzt::set_argument_value(function, 1, sizeof(*b_buffer), b_buffer);
lzt::set_argument_value(function, 2, sizeof(m), &m);
lzt::set_argument_value(function, 3, sizeof(k), &k);
lzt::set_argument_value(function, 4, sizeof(n), &n);
lzt::set_argument_value(function, 5, sizeof(*c_buffer), c_buffer);
return function;
}

static void run_workload(ze_event_handle_t &timestamp_event,
ze_context_handle_t &context, uint64_t &start,
uint64_t &end, bool mapped_timestamp) {

auto driver = lzt::get_default_driver();
auto device = lzt::get_default_device(driver);

auto command_list = lzt::create_command_list(context, device, 0);
auto command_queue = lzt::create_command_queue(
context, device, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT,
ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0);

auto device_properties = lzt::get_device_properties(device);
const auto max_threads =
device_properties.numSlices * device_properties.numSubslicesPerSlice *
device_properties.numEUsPerSubslice * device_properties.numThreadsPerEU;
auto dimensions = max_threads > 4096 ? 1024 : 16;
ze_group_count_t group_count;
void *a_buffer, *b_buffer, *c_buffer;
auto kernel =
get_matrix_multiplication_kernel(context, device, &group_count, &a_buffer,
&b_buffer, &c_buffer, dimensions);

lzt::append_launch_function(command_list, kernel, &group_count,
timestamp_event, 0, nullptr);
lzt::close_command_list(command_list);
lzt::execute_command_lists(command_queue, 1, &command_list, nullptr);

lzt::synchronize(command_queue, UINT64_MAX);

// get time data
if (mapped_timestamp) {
std::vector<ze_kernel_timestamp_result_t> kernel_timestamp_buffer{};
std::vector<ze_synchronized_timestamp_result_ext_t>
synchronized_timestamp_buffer{};
lzt::get_event_kernel_timestamps_from_mapped_timestamp_event(
timestamp_event, device, kernel_timestamp_buffer,
synchronized_timestamp_buffer);

ASSERT_GT(kernel_timestamp_buffer.size(), 0);
start = synchronized_timestamp_buffer[0].global.kernelStart;
end = synchronized_timestamp_buffer[0].global.kernelEnd;
} else {
auto tsResult = lzt::get_event_kernel_timestamp(timestamp_event);
start = tsResult.global.kernelStart;
end = tsResult.global.kernelEnd;
}

lzt::destroy_command_list(command_list);
lzt::destroy_command_queue(command_queue);
}

static void run_ipc_event_test(parent_test_t parent_test,
child_test_t child_test, bool multi_device,
bool isImmediate) {
Expand Down Expand Up @@ -103,7 +221,15 @@ static void run_ipc_event_test(parent_test_t parent_test,
auto driver = lzt::get_default_driver();
auto context = lzt::create_context(driver);

auto ep = get_event_pool(multi_device, device_events, context);
auto timestamp_type = NO_TIMESTAMP;
if (child_test == CHILD_TEST_HOST_TIMESTAMP_READS ||
child_test == CHILD_TEST_DEVICE_TIMESTAMP_READS) {
timestamp_type = KERNEL_TIMESTAMP;
} else if (child_test == CHILD_TEST_HOST_MAPPED_TIMESTAMP_READS) {
timestamp_type = MAPPED_KERNEL_TIMESTAMP;
}
auto ep =
get_event_pool(multi_device, device_events, context, timestamp_type);
ze_ipc_event_pool_handle_t hIpcEventPool;
EXPECT_EQ(ZE_RESULT_SUCCESS, zeEventPoolGetIpcHandle(ep, &hIpcEventPool));
if (testing::Test::HasFatalFailure())
Expand All @@ -124,21 +250,32 @@ static void run_ipc_event_test(parent_test_t parent_test,
std::memcpy(region.get_address(), &test_data, sizeof(shared_data_t));

lzt::send_ipc_handle(hIpcEventPool);

uint64_t startTime = 0;
uint64_t endTime = 0;
switch (parent_test) {
case PARENT_TEST_HOST_SIGNALS:
parent_host_signals(hEvent);
break;
case PARENT_TEST_DEVICE_SIGNALS:
parent_device_signals(hEvent, context, isImmediate);
break;
case PARENT_TEST_HOST_LAUNCHES_KERNEL:
run_workload(hEvent, context, startTime, endTime,
(timestamp_type == MAPPED_KERNEL_TIMESTAMP));
break;
default:
FAIL() << "Fatal test error";
}

c.wait(); // wait for the process to exit
ASSERT_EQ(c.exit_code(), 0);

if (parent_test == PARENT_TEST_HOST_LAUNCHES_KERNEL) {
// ensure the timestamps match
EXPECT_EQ(test_data.start_time, startTime);
EXPECT_EQ(test_data.end_time, endTime);
}

// cleanup
bipc::shared_memory_object::remove("ipc_event_test");
lzt::destroy_event(hEvent);
Expand Down Expand Up @@ -196,6 +333,27 @@ TEST(
true);
}

TEST(
zeIPCEventTests,
GivenTwoProcessesWhenTimestampEventSignalledThenEventSetInChildFromHostPerspective) {
run_ipc_event_test(PARENT_TEST_HOST_LAUNCHES_KERNEL,
CHILD_TEST_HOST_TIMESTAMP_READS, false, false);
}

TEST(
zeIPCEventTests,
GivenTwoProcessesWhenTimestampEventSignalledThenEventSetInChildFromDevicePerspective) {
run_ipc_event_test(PARENT_TEST_HOST_LAUNCHES_KERNEL,
CHILD_TEST_DEVICE_TIMESTAMP_READS, false, false);
}

TEST(
zeIPCEventTests,
GivenTwoProcessesWhenMappedTimestampEventSignalledThenEventSetInChildFromHostPerspective) {
run_ipc_event_test(PARENT_TEST_HOST_LAUNCHES_KERNEL,
CHILD_TEST_HOST_MAPPED_TIMESTAMP_READS, false, false);
}

TEST(
zeIPCEventMultipleDeviceTests,
GivenTwoProcessesWhenEventSignaledByDeviceInParentThenEventSetinChildFromSecondDevicePerspective) {
Expand Down
10 changes: 8 additions & 2 deletions conformance_tests/core/test_ipc/src/test_ipc_event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,27 @@

typedef enum {
PARENT_TEST_HOST_SIGNALS,
PARENT_TEST_DEVICE_SIGNALS
PARENT_TEST_DEVICE_SIGNALS,
PARENT_TEST_HOST_LAUNCHES_KERNEL
} parent_test_t;

typedef enum {
CHILD_TEST_HOST_READS,
CHILD_TEST_DEVICE_READS,
CHILD_TEST_DEVICE2_READS,
CHILD_TEST_MULTI_DEVICE_READS
CHILD_TEST_MULTI_DEVICE_READS,
CHILD_TEST_HOST_TIMESTAMP_READS,
CHILD_TEST_DEVICE_TIMESTAMP_READS,
CHILD_TEST_HOST_MAPPED_TIMESTAMP_READS
} child_test_t;

typedef struct {
parent_test_t parent_type;
child_test_t child_type;
bool multi_device;
bool is_immediate;
uint64_t start_time;
uint64_t end_time;
} shared_data_t;

#endif
Loading

0 comments on commit 3f28718

Please sign in to comment.