From a04b062ecc826fa6c864cc15ada7f8f10454f279 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 29 Mar 2024 17:46:07 +0000 Subject: [PATCH] Extended native cpu fill to bigger patterns than 1 byte --- source/adapters/native_cpu/enqueue.cpp | 37 ++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 75c2caeac0..784a36d3aa 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -374,8 +374,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( UR_ASSERT(size % patternSize == 0 || patternSize > size, UR_RESULT_ERROR_INVALID_SIZE); - memset(ptr, *static_cast(pPattern), size * patternSize); - + switch (patternSize) { + case 1: + memset(ptr, *static_cast(pPattern), size * patternSize); + break; + case 2: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = + reinterpret_cast(reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 4: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = + reinterpret_cast(reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 8: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = + reinterpret_cast(reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + default: + for (unsigned int step{0}; step < size; ++step) { + auto *dest = reinterpret_cast(reinterpret_cast(ptr) + + step * patternSize); + memcpy(dest, pPattern, patternSize); + } + } return UR_RESULT_SUCCESS; }