diff --git a/tests/invoke/execution_and_memory_models.cpp b/tests/invoke/execution_and_memory_models.cpp new file mode 100644 index 000000000..d605b0cdf --- /dev/null +++ b/tests/invoke/execution_and_memory_models.cpp @@ -0,0 +1,247 @@ +/******************************************************************************* +// +// SYCL 2020 Conformance Test Suite +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +*******************************************************************************/ + +#include "../common/common.h" +#include "../common/disabled_for_test_case.h" + +template +class kernel; + +template +void long_loop(acc_t& loop_acc, size_t buffer_size) { + for (int i = 0; i < 1000000; i++) { + int s = sycl::sqrt(float(i)); + loop_acc[s % buffer_size] = i; + } +} + +TEST_CASE("Execution order of three command groups submitted to the same queue", + "[invoke]") { + const size_t buffer_size = 12; + auto queue = sycl_cts::util::get_cts_object::queue(); + + std::array data1; + std::array data2; + std::array res1; + std::array res2; + + { + sycl::buffer buffer1(data1.data(), sycl::range(buffer_size)); + sycl::buffer buffer2(data2.data(), sycl::range(buffer_size)); + sycl::buffer res_buffer1(res1.data(), sycl::range(buffer_size)); + sycl::buffer res_buffer2(res2.data(), sycl::range(buffer_size)); + sycl::buffer loop_buf(sycl::range<1>{buffer_size}); + + queue.submit([&](sycl::handler& cgh) { + auto acc1 = sycl::accessor(buffer1, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + long_loop(loop_acc, buffer_size); + for (int i = buffer_size - 1; i >= 0; i--) acc1[i] = i; + }); + }); + queue.submit([&](sycl::handler& cgh) { + auto acc2 = sycl::accessor(buffer2, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + long_loop(loop_acc, buffer_size); + for (int i = buffer_size - 1; i >= 0; i--) acc2[i] = i; + }); + }); + queue.submit([&](sycl::handler& cgh) { + auto acc1 = sycl::accessor(buffer1, cgh, sycl::read_only); + auto acc2 = sycl::accessor(buffer2, cgh, sycl::read_only); + auto res_acc1 = sycl::accessor(res_buffer1, cgh, sycl::write_only); + auto res_acc2 = sycl::accessor(res_buffer2, cgh, sycl::write_only); + cgh.parallel_for>(sycl::range<1>(buffer_size), + [=](sycl::id<1> index) { + res_acc1[index] = acc1[index]; + res_acc2[index] = acc2[index]; + }); + }); + queue.wait_and_throw(); + } + + // requisites for third command were satisfied before it started to execute + INFO( + "Check that all elements in result " + "buffer are equal to expected values"); + CHECK(data1 == res1); + CHECK(data2 == res2); +} + +TEST_CASE( + "Execution order of three command groups submitted to the different queues", + "[invoke]") { + const std::vector devices{sycl::device::get_devices()}; + + if (devices.size() >= 3) { + const size_t buffer_size = 12; + sycl::queue q0(devices[0]); + sycl::queue q1(devices[1]); + sycl::queue q2(devices[2]); + + std::array data1; + std::array data2; + std::array res1; + std::array res2; + + { + sycl::buffer buffer1(data1.data(), sycl::range(buffer_size)); + sycl::buffer buffer2(data2.data(), sycl::range(buffer_size)); + sycl::buffer res_buffer1(res1.data(), sycl::range(buffer_size)); + sycl::buffer res_buffer2(res2.data(), sycl::range(buffer_size)); + sycl::buffer loop_buf(sycl::range<1>{buffer_size}); + + q0.submit([&](sycl::handler& cgh) { + auto acc1 = sycl::accessor(buffer1, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + long_loop(loop_acc, buffer_size); + for (int i = buffer_size - 1; i >= 0; i--) acc1[i] = i; + }); + }); + q1.submit([&](sycl::handler& cgh) { + auto acc2 = sycl::accessor(buffer2, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + long_loop(loop_acc, buffer_size); + for (int i = buffer_size - 1; i >= 0; i--) acc2[i] = i; + }); + }); + q2.submit([&](sycl::handler& cgh) { + auto acc1 = sycl::accessor(buffer1, cgh, sycl::read_only); + auto acc2 = sycl::accessor(buffer2, cgh, sycl::read_only); + auto res_acc1 = sycl::accessor(res_buffer1, cgh, sycl::write_only); + auto res_acc2 = sycl::accessor(res_buffer2, cgh, sycl::write_only); + cgh.parallel_for>(sycl::range<1>(buffer_size), + [=](sycl::id<1> index) { + res_acc1[index] = acc1[index]; + res_acc2[index] = acc2[index]; + }); + }); + } + + INFO( + "Check that all elements in result buffer are equal to expected values " + "after runing on different queues"); + CHECK(data1 == res1); + CHECK(data2 == res2); + + } else { + SKIP("Test requires 3 or more devices"); + } +} + +DISABLED_FOR_TEST_CASE(hipSYCL) +("Requirements on overlapping sub-buffers", "[invoke]")({ + auto device = sycl_cts::util::get_cts_object::device(); + auto queue = sycl_cts::util::get_cts_object::queue(); + + if (device.has(sycl::aspect::usm_device_allocations)) { + const auto mem_base_addr_align = + device.get_info(); + const size_t buffer_size = mem_base_addr_align * 3; + + bool res = false; + + { + sycl::buffer res_buffer(&res, sycl::range<1>{1}); + sycl::buffer buffer(sycl::range<1>{buffer_size}); + sycl::buffer sub_buf1(buffer, 0, mem_base_addr_align * 2); + sycl::buffer sub_buf2(buffer, mem_base_addr_align, + mem_base_addr_align * 2); + sycl::buffer loop_buf(sycl::range<1>{buffer_size}); + + int* pflag = sycl::malloc_device(1, queue); + // assign pflag value to 0 + queue + .submit([&](sycl::handler& cgh) { + cgh.single_task>([=] { *pflag = 0; }); + }) + .wait(); + queue.submit([&](sycl::handler& cgh) { + auto acc1 = sycl::accessor(sub_buf1, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + sycl::atomic_ref + rflag(*pflag); + for (int i = acc1.size() - 1; i >= 0; i--) acc1[i] = i; + long_loop(loop_acc, buffer_size); + rflag = 1; + }); + }); + queue.submit([&](sycl::handler& cgh) { + auto acc2 = sycl::accessor(sub_buf2, cgh, sycl::write_only); + auto res_acc = sycl::accessor(res_buffer, cgh, sycl::write_only); + cgh.single_task>([=] { + sycl::atomic_ref + rflag(*pflag); + res_acc[0] = (rflag == 1); + for (int i = acc2.size() - 1; i >= 0; i--) acc2[i] = i; + }); + }); + queue.wait_and_throw(); + } + + INFO( + "Check that requisites for second command with overlapping sub-buffer " + "were satisfied before it started to execute"); + CHECK(res); + + } else { + SKIP("Device does not support USM device allocations"); + } +}); + +TEST_CASE("Host accessor as a barrier", "[invoke]") { + auto device = sycl_cts::util::get_cts_object::device(); + auto queue = sycl_cts::util::get_cts_object::queue(); + + if (device.has(sycl::aspect::usm_atomic_shared_allocations)) { + const size_t buffer_size = 12; + + { + sycl::buffer buffer(sycl::range<1>{buffer_size}); + sycl::buffer loop_buf(sycl::range<1>{buffer_size}); + int* pflag = sycl::malloc_shared(1, queue); + + queue.submit([&](sycl::handler& cgh) { + auto acc = sycl::accessor(buffer, cgh, sycl::write_only); + auto loop_acc = sycl::accessor(loop_buf, cgh, sycl::write_only); + cgh.single_task>([=] { + long_loop(loop_acc, buffer_size); + for (int i = buffer_size - 1; i >= 0; i--) acc[i] = i; + *pflag = 42; + }); + }); + sycl::host_accessor host_acc(buffer); + + INFO( + "Check that *pflag is equal to expected value, after host_acc " + "creation"); // that means that host_accessor acts as barrier + CHECK(*pflag == 42); + } + } else { + SKIP("Device does not support USM atomic shared allocations"); + } +}