diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 1c074025a9..97eb7f4035 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -666,6 +666,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( } return UR_RESULT_SUCCESS; #else + [[maybe_unused]] auto _ = launchPropList; setErrorMessage("This feature requires cuda 11.8 or later.", UR_RESULT_ERROR_ADAPTER_SPECIFIC); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 427fde70e6..7599d9b9c4 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -315,8 +315,8 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, #if CUDA_VERSION >= 11060 ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP; #else - setErrorMessage("The " UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS - " feature requires cuda 11.6 or later.", + setErrorMessage("The UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS " + "feature requires cuda 11.6 or later.", UR_RESULT_ERROR_ADAPTER_SPECIFIC); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; #endif @@ -657,6 +657,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( UR_ASSERT(pSrcImageFormat->channelOrder == pDstImageFormat->channelOrder, UR_RESULT_ERROR_INVALID_ARGUMENT); + auto as_CUArray = [](const void *ptr) { + return static_cast(const_cast(ptr)); + }; + unsigned int NumChannels = 0; size_t PixelSizeBytes = 0; @@ -785,7 +789,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( if (isCudaArray) { UR_CHECK_ERROR( - cuMemcpyAtoHAsync(DstWithOffset, (CUarray)pSrc, + cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc), PixelSizeBytes * pCopyRegion->srcOffset.x, CopyExtentBytes, Stream)); } else if (memType == CU_MEMORYTYPE_DEVICE) { @@ -809,7 +813,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstHost = pDst; if (pSrcImageDesc->rowPitch == 0) { cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); } else { // Pitched memory cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE; @@ -831,7 +835,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; @@ -851,7 +855,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; @@ -881,7 +885,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; cpy_desc.dstY = 0; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; @@ -894,7 +898,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; @@ -909,7 +913,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; @@ -927,7 +931,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; - cpy_desc.srcArray = (CUarray)pSrc; + cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index 2061893744..c4b5e3aa80 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -13,6 +13,7 @@ #include "memory.hpp" #include "queue.hpp" #include "sampler.hpp" +#include "ur_api.h" UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, @@ -343,7 +344,8 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, try { auto Device = hKernel->getProgram()->getDevice(); ur_mem_flags_t MemAccess = - Properties ? Properties->memoryAccess : UR_MEM_FLAG_READ_WRITE; + Properties ? Properties->memoryAccess + : static_cast(UR_MEM_FLAG_READ_WRITE); hKernel->Args.addMemObjArg(argIndex, hArgValue, MemAccess); if (hArgValue->isImage()) { CUDA_ARRAY3D_DESCRIPTOR arrayDesc; diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index d58a8eb530..ca7472d646 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -218,7 +218,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, void *Base = nullptr; UR_CHECK_ERROR(hipPointerGetAttribute( &Base, HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR, - (hipDeviceptr_t)pMem)); + reinterpret_cast(const_cast(pMem)))); return ReturnValue(Base); } } diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 7f2d1baa13..e1de6d6237 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -49,10 +49,9 @@ void context_t::parseEnvEnabledLayers() { } } -void context_t::initLayers() const { +void context_t::initLayers() { for (auto &[layer, _] : layers) { - layer->init((ur_dditable_t *)&urDdiTable, enabledLayerNames, - codelocData); + layer->init(&urDdiTable, enabledLayerNames, codelocData); } } diff --git a/source/loader/ur_lib.hpp b/source/loader/ur_lib.hpp index edd0fffe9f..6334ed7b2a 100644 --- a/source/loader/ur_lib.hpp +++ b/source/loader/ur_lib.hpp @@ -110,7 +110,7 @@ class __urdlllocal context_t : public AtomicSingleton { codeloc_data codelocData; void parseEnvEnabledLayers(); - void initLayers() const; + void initLayers(); void tearDownLayers() const; };