Skip to content

Commit

Permalink
[Bindless][exp] 1D Image copy fix
Browse files Browse the repository at this point in the history
* image handles created using memory allocated by
  `malloc_device` crashes when copied.
* identify which type of memory is copied and use
  the correct cuda API for it.
  • Loading branch information
cppchedy committed Jan 10, 2024
1 parent 1ee242c commit 0b41fdd
Showing 1 changed file with 48 additions and 8 deletions.
56 changes: 48 additions & 8 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,15 +618,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
ScopedContext Active(hQueue->getContext());
CUstream Stream = hQueue->getNextTransferStream();
enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);

// We have to use a different copy function for each image dimensionality.

if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
CUmemorytype memType;

// Check what type of memory is pDst. If cuPointerGetAttribute returns
// somthing different from CUDA_SUCCESS then we know that pDst memory
// type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
bool isCudaArray =
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
(CUdeviceptr)pDst) != CUDA_SUCCESS;

size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
UR_CHECK_ERROR(
cuMemcpyHtoAAsync((CUarray)pDst, dstOffset.x * PixelSizeBytes,
(void *)SrcWithOffset, CopyExtentBytes, Stream));

if (isCudaArray) {
UR_CHECK_ERROR(cuMemcpyHtoAAsync(
(CUarray)pDst, dstOffset.x * PixelSizeBytes,
(void *)SrcWithOffset, CopyExtentBytes, Stream));
} else if (memType == CU_MEMORYTYPE_DEVICE) {
void *DstWithOffset =
(void *)((char *)pDst + (PixelSizeBytes * dstOffset.x));
UR_CHECK_ERROR(cuMemcpyHtoDAsync((CUdeviceptr)DstWithOffset,
(void *)SrcWithOffset,
CopyExtentBytes, Stream));
} else {
// This should be unreachable.
return UR_RESULT_ERROR_INVALID_VALUE;
}
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
CUDA_MEMCPY2D cpy_desc = {};
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
Expand Down Expand Up @@ -669,13 +691,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
}
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
CUmemorytype memType;
// Check what type of memory is pSrc. If cuPointerGetAttribute returns
// somthing different from CUDA_SUCCESS then we know that pSrc memory
// type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
bool isCudaArray =
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
(CUdeviceptr)pSrc) != CUDA_SUCCESS;

size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
size_t src_offset_bytes = PixelSizeBytes * srcOffset.x;
void *dst_with_offset =
void *DstWithOffset =
(void *)((char *)pDst + (PixelSizeBytes * dstOffset.x));
UR_CHECK_ERROR(cuMemcpyAtoHAsync(dst_with_offset, (CUarray)pSrc,
src_offset_bytes, CopyExtentBytes,
Stream));

if (isCudaArray) {
UR_CHECK_ERROR(cuMemcpyAtoHAsync(DstWithOffset, (CUarray)pSrc,
PixelSizeBytes * srcOffset.x,
CopyExtentBytes, Stream));
} else if (memType == CU_MEMORYTYPE_DEVICE) {
char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
UR_CHECK_ERROR(cuMemcpyDtoHAsync(DstWithOffset,
(CUdeviceptr)SrcWithOffset,
CopyExtentBytes, Stream));
} else {
// This should be unreachable.
return UR_RESULT_ERROR_INVALID_VALUE;
}
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
CUDA_MEMCPY2D cpy_desc = {};
cpy_desc.srcXInBytes = srcOffset.x;
Expand Down

0 comments on commit 0b41fdd

Please sign in to comment.