From 7f2c805b5a1f1797ffaa2bdb8cbeb61597de45cb Mon Sep 17 00:00:00 2001 From: nscipione Date: Mon, 22 Jul 2024 16:13:20 +0100 Subject: [PATCH] Add comment to workaround Add comment to specify why portBLAS needs this workaround and point to open issue in intel/llvm --- src/blas/backends/portblas/portblas_level1.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/blas/backends/portblas/portblas_level1.cxx b/src/blas/backends/portblas/portblas_level1.cxx index 0b5c2eefb..21192a5a9 100644 --- a/src/blas/backends/portblas/portblas_level1.cxx +++ b/src/blas/backends/portblas/portblas_level1.cxx @@ -370,6 +370,11 @@ sycl::event rotmg(sycl::queue &queue, real_t *d1, real_t *d2, real_t *x1, real_t const std::vector &dependencies) { auto y_d = (real_t *)sycl::malloc_device(sizeof(real_t), queue.get_device(), queue.get_context()); + // This memcpy requires a wait to enforce synchronization. This is due to workaround + // a bug present in OpenCL backend that shows up with portBLAS implementation. + // Otherwise event dependencies works fine. + // The issue has been reported to intel/llvm project here: + // https://github.com/intel/llvm/issues/14623 queue.memcpy(y_d, &y1, sizeof(real_t), dependencies).wait(); auto rotmg_event = std::invoke([&]() -> sycl::event { CALL_PORTBLAS_USM_FN(::blas::_rotmg, queue, d1, d2, x1, y_d, param);