Skip to content

Commit

Permalink
Merge pull request #1104 from fabiomestre/fabio/fix_cuda_intermittent
Browse files Browse the repository at this point in the history
[CUDA] Fix synchronization issue in urEnqueueMemImageCopy
  • Loading branch information
kbenzie authored Jan 30, 2024
2 parents 8695b5d + ffe9a51 commit edb281f
Showing 1 changed file with 15 additions and 19 deletions.
34 changes: 15 additions & 19 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
}
}

/// General ND memory copy operation for images (where N > 1).
/// General ND memory copy operation for images.
/// This function requires the corresponding CUDA context to be at the top of
/// the context stack
/// If the source and/or destination is an array, SrcPtr and/or DstPtr
Expand All @@ -877,27 +877,27 @@ static ur_result_t commonEnqueueMemImageNDCopy(
UR_ASSERT(DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);

if (ImgType == UR_MEM_TYPE_IMAGE2D) {
if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) {
CUDA_MEMCPY2D CpyDesc;
memset(&CpyDesc, 0, sizeof(CpyDesc));
CpyDesc.srcMemoryType = SrcType;
if (SrcType == CU_MEMORYTYPE_ARRAY) {
CpyDesc.srcArray = *static_cast<const CUarray *>(SrcPtr);
CpyDesc.srcXInBytes = SrcOffset.x;
CpyDesc.srcY = SrcOffset.y;
CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset.y;
} else {
CpyDesc.srcHost = SrcPtr;
}
CpyDesc.dstMemoryType = DstType;
if (DstType == CU_MEMORYTYPE_ARRAY) {
CpyDesc.dstArray = *static_cast<CUarray *>(DstPtr);
CpyDesc.dstXInBytes = DstOffset.x;
CpyDesc.dstY = DstOffset.y;
CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset.y;
} else {
CpyDesc.dstHost = DstPtr;
}
CpyDesc.WidthInBytes = Region.width;
CpyDesc.Height = Region.height;
CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region.height;
UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, CuStream));
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -1124,21 +1124,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
}

ur_mem_type_t ImgType = std::get<SurfaceMem>(hImageSrc->Mem).getImageType();
if (ImgType == UR_MEM_TYPE_IMAGE1D) {
UR_CHECK_ERROR(cuMemcpyAtoA(DstArray, DstByteOffsetX, SrcArray,
SrcByteOffsetX, BytesToCopy));
} else {
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
region.depth};
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};

Result = commonEnqueueMemImageNDCopy(
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
if (Result != UR_RESULT_SUCCESS) {
return Result;
}
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
region.depth};
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};

Result = commonEnqueueMemImageNDCopy(
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
if (Result != UR_RESULT_SUCCESS) {
return Result;
}

if (phEvent) {
Expand Down

0 comments on commit edb281f

Please sign in to comment.