diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index c752c3fd14..7d3758e820 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -862,7 +862,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) { } } -/// General ND memory copy operation for images (where N > 1). +/// General ND memory copy operation for images. /// This function requires the corresponding CUDA context to be at the top of /// the context stack /// If the source and/or destination is an array, SrcPtr and/or DstPtr @@ -877,14 +877,14 @@ static ur_result_t commonEnqueueMemImageNDCopy( UR_ASSERT(DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - if (ImgType == UR_MEM_TYPE_IMAGE2D) { + if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) { CUDA_MEMCPY2D CpyDesc; memset(&CpyDesc, 0, sizeof(CpyDesc)); CpyDesc.srcMemoryType = SrcType; if (SrcType == CU_MEMORYTYPE_ARRAY) { CpyDesc.srcArray = *static_cast(SrcPtr); CpyDesc.srcXInBytes = SrcOffset.x; - CpyDesc.srcY = SrcOffset.y; + CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset.y; } else { CpyDesc.srcHost = SrcPtr; } @@ -892,12 +892,12 @@ static ur_result_t commonEnqueueMemImageNDCopy( if (DstType == CU_MEMORYTYPE_ARRAY) { CpyDesc.dstArray = *static_cast(DstPtr); CpyDesc.dstXInBytes = DstOffset.x; - CpyDesc.dstY = DstOffset.y; + CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset.y; } else { CpyDesc.dstHost = DstPtr; } CpyDesc.WidthInBytes = Region.width; - CpyDesc.Height = Region.height; + CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region.height; UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, CuStream)); return UR_RESULT_SUCCESS; } @@ -1124,21 +1124,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( } ur_mem_type_t ImgType = std::get(hImageSrc->Mem).getImageType(); - if (ImgType == UR_MEM_TYPE_IMAGE1D) { - UR_CHECK_ERROR(cuMemcpyAtoA(DstArray, DstByteOffsetX, SrcArray, - SrcByteOffsetX, BytesToCopy)); - } else { - ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height, - region.depth}; - ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z}; - ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z}; - Result = commonEnqueueMemImageNDCopy( - CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY, - SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset); - if (Result != UR_RESULT_SUCCESS) { - return Result; - } + ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height, + region.depth}; + ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z}; + ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z}; + + Result = commonEnqueueMemImageNDCopy( + CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY, + SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset); + if (Result != UR_RESULT_SUCCESS) { + return Result; } if (phEvent) {