From f269698dfff5af7eed500aae824f624c0a62a5ae Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Wed, 12 Jul 2023 15:18:26 +0000 Subject: [PATCH 01/16] Add option to disable OpenCL or Level Zero support. --- CMakeLists.txt | 30 ++++++++++++++++++------------ HIP | 2 +- HIPCC | 2 +- bitcode/ROCm-Device-Libs | 2 +- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40de0b808..3c195e4d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,9 @@ endforeach() # ============================================================================= # CHIP-SPV CMAKE DEPENDENCIES -if(NOT DEFINED OpenCL_LIBRARY) +option(CHIP_ENABLE_OPENCL "Enable building the OpenCL backend" ON) +option(CHIP_ENABLE_LEVEL0 "Enable building the Level Zero backend" ON) +if(NOT DEFINED OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) message(STATUS "OpenCL_LIBRARY was not set. Searching for libOpenCL.so in LD_LIBRARY_PATH") find_library(OpenCL_LIBRARY NAMES OpenCL PATHS ENV LD_LIBRARY_PATH ./ NO_CACHE) if(OpenCL_LIBRARY) @@ -76,7 +78,7 @@ if(NOT DEFINED OpenCL_LIBRARY) endif() endif() -if(NOT DEFINED LevelZero_LIBRARY) +if(NOT DEFINED LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0) message(STATUS "LevelZero_LIBRARY was not set. Searching for ze_loader.so in LD_LIBRARY_PATH") find_library(LevelZero_LIBRARY NAMES ze_loader PATHS ENV LD_LIBRARY_PATH ./ NO_CACHE) if(LevelZero_LIBRARY) @@ -87,10 +89,14 @@ if(NOT DEFINED LevelZero_LIBRARY) endif() endif() -message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") +if(CHIP_ENABLE_OPENCL) + message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") +endif() +if(CHIP_ENABLE_LEVEL0) message(STATUS "LevelZero_LIBRARY: ${LevelZero_LIBRARY}") +endif() -if(NOT OpenCL_LIBRARY AND NOT LevelZero_LIBRARY) +if(NOT (OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) AND NOT (LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)) message(FATAL_ERROR "At least one of OpenCL,Level0 libraries must be available") endif() @@ -128,13 +134,13 @@ set(CHIP_SRC src/SPIRVFuncInfo.cc ) -if(OpenCL_LIBRARY) +if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) list(APPEND CHIP_SRC src/backend/OpenCL/CHIPBackendOpenCL.cc src/backend/OpenCL/SVMemoryRegion.cc) endif() -if(LevelZero_LIBRARY) +if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0) list(APPEND CHIP_SRC src/backend/Level0/CHIPBackendLevel0.cc) endif() @@ -295,12 +301,12 @@ endif() set(CHIP_INTERFACE_LIBS ${PTHREAD_LIBRARY}) -if(OpenCL_LIBRARY) +if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) list(APPEND CHIP_SPV_DEFINITIONS HAVE_OPENCL) list(PREPEND CHIP_INTERFACE_LIBS ${OpenCL_LIBRARY}) endif() -if(LevelZero_LIBRARY) +if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0) list(APPEND CHIP_SPV_DEFINITIONS HAVE_LEVEL0) list(PREPEND CHIP_INTERFACE_LIBS ${LevelZero_LIBRARY}) endif() @@ -422,13 +428,13 @@ set(HIP_OFFLOAD_COMPILE_OPTIONS_BUILD_ list(APPEND HIP_OFFLOAD_LINK_OPTIONS_INSTALL_ "-L${LIB_INSTALL_DIR}" "-lCHIP") list(APPEND HIP_OFFLOAD_LINK_OPTIONS_BUILD_ "-L${CMAKE_BINARY_DIR}" "-lCHIP") -if(OpenCL_LIBRARY) +if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) target_link_options(CHIP PUBLIC -Wl,-rpath,${OpenCL_DIR}) target_link_directories(CHIP PUBLIC ${OpenCL_DIR}) target_link_libraries(CHIP PUBLIC OpenCL) endif() -if(LevelZero_LIBRARY) +if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0) target_link_options(CHIP PUBLIC -Wl,-rpath,${LevelZero_DIR}) target_link_directories(CHIP PUBLIC ${LevelZero_DIR}) target_link_libraries(CHIP PUBLIC ze_loader) @@ -673,10 +679,10 @@ endif() # Short Summary # print if Level Zero or OpenCL are enabbled -if(OpenCL_LIBRARY) +if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) message(STATUS "OpenCL is enabled: ${OpenCL_LIBRARY}") endif() -if(LevelZero_LIBRARY) +if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0) message(STATUS "Level Zero is enabled: ${LevelZero_LIBRARY}") endif() diff --git a/HIP b/HIP index 8a3f87e64..043ec5cd3 160000 --- a/HIP +++ b/HIP @@ -1 +1 @@ -Subproject commit 8a3f87e64ae3b44a58f0502fa3693b0192863c44 +Subproject commit 043ec5cd374fdd7a32a502d5d3fd561abab6b9c4 diff --git a/HIPCC b/HIPCC index f2d253250..76ec795a9 160000 --- a/HIPCC +++ b/HIPCC @@ -1 +1 @@ -Subproject commit f2d253250b1bb491157fbb0aa4e41d1646200a2c +Subproject commit 76ec795a97bcd5c439ecdc5bd8c73c329d8a889b diff --git a/bitcode/ROCm-Device-Libs b/bitcode/ROCm-Device-Libs index 7eca6d212..4db9cd7a5 160000 --- a/bitcode/ROCm-Device-Libs +++ b/bitcode/ROCm-Device-Libs @@ -1 +1 @@ -Subproject commit 7eca6d2125b7e8a1738313326a2f874ce945bb61 +Subproject commit 4db9cd7a5c2328d35eb0794928dbb173e4b7db94 From c1c2a263458e75e42c1885419877734b6af12d41 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Wed, 12 Jul 2023 21:35:59 +0000 Subject: [PATCH 02/16] Make streams dispatchable. --- src/CHIPBackend.cc | 23 ++--- src/CHIPBackend.hh | 8 +- src/CHIPBindings.cc | 109 ++++++++++++------------ src/CHIPGraph.cc | 14 +-- src/backend/Level0/CHIPBackendLevel0.cc | 19 +++-- src/backend/Level0/CHIPBackendLevel0.hh | 8 +- src/backend/OpenCL/CHIPBackendOpenCL.cc | 18 ++-- src/backend/OpenCL/CHIPBackendOpenCL.hh | 8 +- src/common.hh | 27 +++++- 9 files changed, 135 insertions(+), 99 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 14b04766d..0985d5a60 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -91,7 +91,7 @@ chipstar::CallbackData::CallbackData(hipStreamCallback_t TheCallbackF, CallbackF(TheCallbackF) {} void chipstar::CallbackData::execute(hipError_t ResultFromDependency) { - CallbackF(ChipQueue, ResultFromDependency, CallbackArgs); + CallbackF(STREAM(ChipQueue), ResultFromDependency, CallbackArgs); } // DeviceVar @@ -480,9 +480,9 @@ void chipstar::ExecItem::copyArgs(void **Args) { } chipstar::ExecItem::ExecItem(dim3 GridDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue) + chipstar::Queue *ChipQueue) : SharedMem_(SharedMem), GridDim_(GridDim), BlockDim_(BlockDim), - ChipQueue_(static_cast(ChipQueue)){}; + ChipQueue_(ChipQueue){}; dim3 chipstar::ExecItem::getBlock() { return BlockDim_; } dim3 chipstar::ExecItem::getGrid() { return GridDim_; } @@ -502,11 +502,13 @@ chipstar::Device::~Device() { LOCK(DeviceMtx); // chipstar::Device::ChipQueues_ logDebug("~Device() {}", (void *)this); while (this->ChipQueues_.size() > 0) { - delete ChipQueues_[0]; + ChipQueues_[0]->~Queue(); + free(CHIP_OBJ_TO_HANDLE(ChipQueues_[0], ihipStream_t)); ChipQueues_.erase(ChipQueues_.begin()); } - delete LegacyDefaultQueue; + LegacyDefaultQueue->~Queue(); + free(CHIP_OBJ_TO_HANDLE(LegacyDefaultQueue, ihipStream_t)); LegacyDefaultQueue = nullptr; } chipstar::Queue *chipstar::Device::getLegacyDefaultQueue() { @@ -897,7 +899,8 @@ bool chipstar::Device::removeQueue(chipstar::Queue *ChipQueue) { } ChipQueues_.erase(FoundQueue); - delete ChipQueue; + ChipQueue->~Queue(); + free(CHIP_OBJ_TO_HANDLE(ChipQueue, ihipStream_t)); return true; } @@ -1364,7 +1367,7 @@ void chipstar::Backend::addContext(chipstar::Context *ChipContext) { hipError_t chipstar::Backend::configureCall(dim3 Grid, dim3 Block, size_t SharedMem, - hipStream_t ChipQueue) { + chipstar::Queue *ChipQueue) { logDebug("Backend->configureCall(grid=({},{},{}), block=({},{},{}), " "shared={}, q={}", Grid.x, Grid.y, Grid.z, Block.x, Block.y, Block.z, SharedMem, @@ -1483,9 +1486,9 @@ chipstar::Queue *chipstar::Backend::findQueue(chipstar::Queue *ChipQueue) { auto Dev = ::Backend->getActiveDevice(); LOCK(Dev->DeviceMtx); // chipstar::Device::ChipQueues_ via getQueuesNoLock() - if (ChipQueue == hipStreamPerThread) { + if (ChipQueue == (chipstar::Queue *)hipStreamPerThread) { return Dev->getPerThreadDefaultQueueNoLock(); - } else if (ChipQueue == hipStreamLegacy) { + } else if (ChipQueue == (chipstar::Queue *)hipStreamLegacy) { return Dev->getLegacyDefaultQueue(); } else if (ChipQueue == nullptr) { return Dev->getDefaultQueue(); @@ -1883,4 +1886,4 @@ void chipstar::Queue::addCallback(hipStreamCallback_t Callback, CHIPGraph *chipstar::Queue::getCaptureGraph() const { return static_cast(CaptureGraph_); -} \ No newline at end of file +} diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index be0be6fb3..b297472b7 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1180,7 +1180,7 @@ public: * @param chip_queue_ */ ExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue); + chipstar::Queue *ChipQueue); /** * @brief Set the chipstar::Kernel object @@ -1802,7 +1802,7 @@ public: virtual chipstar::ExecItem *createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue) = 0; + chipstar::Queue *ChipQueue) = 0; int getPerThreadQueuesActive(); std::mutex SetActiveMtx; @@ -1956,7 +1956,7 @@ public: * @return hipError_t */ hipError_t configureCall(dim3 GridDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue); + chipstar::Queue *ChipQueue); /** * @brief Return a device which meets or exceeds the requirements @@ -2029,7 +2029,7 @@ public: /** * @brief Queue class for submitting kernels to for execution */ -class Queue : public ihipStream_t { +class Queue { protected: hipStreamCaptureStatus CaptureStatus_ = hipStreamCaptureStatusNone; hipStreamCaptureMode CaptureMode_ = hipStreamCaptureModeGlobal; diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 10c6b2322..c0a9b8ed1 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -498,7 +498,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(stream); + auto ChipQueue = QUEUE(stream); ChipQueue = Backend->findQueue(ChipQueue); EXEC(graphExec)->launch(ChipQueue); RETURN(hipSuccess); @@ -1208,7 +1208,7 @@ hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(stream); + auto ChipQueue = QUEUE(stream); if (ChipQueue == Backend->getActiveDevice()->getLegacyDefaultQueue()) { RETURN(hipErrorInvalidValue); @@ -1224,7 +1224,7 @@ hipError_t hipStreamBeginCapture(hipStream_t stream, hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t *pGraph) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(stream); + auto ChipQueue = QUEUE(stream); if (ChipQueue == Backend->getActiveDevice()->getLegacyDefaultQueue()) { RETURN(hipErrorInvalidValue); @@ -1293,7 +1293,7 @@ hipError_t hipMemcpyWithStream(void *Dst, const void *Src, size_t SizeBytes, hipMemcpyKind Kind, hipStream_t Stream) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -1338,7 +1338,7 @@ static inline hipError_t hipMemcpyAsyncInternal(void *Dst, const void *Src, return hipSuccess; NULLCHECK(Dst, Src); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (ChipQueue->captureIntoGraph(Dst, Src, SizeBytes, Kind)) { return hipSuccess; @@ -1376,7 +1376,7 @@ hipMemcpy2DAsyncInternal(void *Dst, size_t DPitch, const void *Src, if (Height * Width == 0) return hipSuccess; - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemcpy3DParms Params = { /* hipArray_t srcArray */ nullptr, /* struct hipPos srcPos */ make_hipPos(1, 1, 1), @@ -1421,7 +1421,7 @@ hipError_t hipMemcpy2DAsync(void *Dst, size_t DPitch, const void *Src, static inline hipError_t hipMemcpyParam2DAsyncInternal(const hip_Memcpy2D *PCopy, hipStream_t Stream) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); @@ -1447,7 +1447,7 @@ hipMemcpyParam2DAsyncInternal(const hip_Memcpy2D *PCopy, hipStream_t Stream) { return hipMemcpy2DAsyncInternal(PCopy->dstArray->data, PCopy->WidthInBytes, PCopy->srcHost, PCopy->srcPitch, PCopy->WidthInBytes, PCopy->Height, - hipMemcpyDefault, ChipQueue); + hipMemcpyDefault, STREAM(ChipQueue)); } hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D *PCopy, @@ -1468,7 +1468,7 @@ hipError_t __hipPushCallConfiguration(dim3 GridDim, dim3 BlockDim, logDebug("__hipPushCallConfiguration()"); CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); RETURN(Backend->configureCall(GridDim, BlockDim, SharedMem, ChipQueue)); @@ -1487,7 +1487,7 @@ hipError_t __hipPopCallConfiguration(dim3 *GridDim, dim3 *BlockDim, *GridDim = ExecItem->getGrid(); *BlockDim = ExecItem->getBlock(); *SharedMem = ExecItem->getSharedMem(); - *Stream = ExecItem->getQueue(); + *Stream = STREAM(ExecItem->getQueue()); delete ExecItem; RETURN(hipSuccess); CHIP_CATCH @@ -2030,7 +2030,7 @@ hipStreamCreateWithPriorityInternal(hipStream_t *Stream, unsigned int Flags, auto ClampedPriority = std::min(MinPriority, std::max(MaxPriority, Priority)); chipstar::Queue *ChipQueue = Dev->createQueueAndRegister(FlagsParsed, ClampedPriority); - *Stream = ChipQueue; + *Stream = CHIP_OBJ_TO_HANDLE(ChipQueue, ihipStream_t); return hipSuccess; } @@ -2074,15 +2074,15 @@ hipError_t hipDeviceGetStreamPriorityRange(int *LeastPriority, hipError_t hipStreamDestroy(hipStream_t Stream) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = static_cast(Stream); - if (ChipQueue == hipStreamPerThread) + if (Stream == hipStreamPerThread) CHIPERR_LOG_AND_THROW("Attemped to destroy default per-thread queue", hipErrorTbd); - if (ChipQueue == hipStreamLegacy) + if (Stream == hipStreamLegacy) CHIPERR_LOG_AND_THROW("Attemped to destroy default legacy queue", hipErrorTbd); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -2103,7 +2103,7 @@ hipError_t hipStreamDestroy(hipStream_t Stream) { } static inline hipError_t hipStreamQueryInternal(hipStream_t Stream) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -2124,7 +2124,7 @@ hipError_t hipStreamQuery(hipStream_t Stream) { } static inline hipError_t hipStreamSynchronizeInternal(hipStream_t Stream) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -2145,7 +2145,7 @@ hipError_t hipStreamSynchronize(hipStream_t Stream) { hipError_t hipStreamWaitEventInternal(hipStream_t Stream, hipEvent_t Event, unsigned int Flags) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); auto ChipEvent = static_cast(Event); ChipQueue = Backend->findQueue(ChipQueue); @@ -2180,15 +2180,14 @@ hipError_t hipStreamWaitEvent(hipStream_t Stream, hipEvent_t Event, int hipGetStreamDeviceId(hipStream_t Stream) { CHIP_TRY CHIPInitialize(); - chipstar::Device *Device = - Backend->findQueue(static_cast(Stream))->getDevice(); + chipstar::Device *Device = Backend->findQueue(QUEUE(Stream))->getDevice(); return Device->getDeviceId(); CHIP_CATCH } static inline hipError_t hipStreamGetFlagsInternal(hipStream_t Stream, unsigned int *Flags) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -2210,7 +2209,7 @@ hipError_t hipStreamGetFlags(hipStream_t Stream, unsigned int *Flags) { static inline hipError_t hipStreamGetPriorityInternal(hipStream_t Stream, int *Priority) { - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); if (Priority == nullptr) { CHIPERR_LOG_AND_THROW("Priority is nullptr", hipErrorInvalidValue); } @@ -2245,7 +2244,7 @@ hipError_t hipStreamAddCallback(hipStream_t Stream, // TODO: Can't use NULLCHECK for this one if (Callback == nullptr) CHIPERR_LOG_AND_THROW("passed in nullptr", hipErrorInvalidValue); - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->getCaptureStatus() != hipStreamCaptureStatusNone) { ChipQueue->setCaptureStatus(hipStreamCaptureStatusInvalidated); @@ -2308,7 +2307,7 @@ hipError_t hipEventCreateWithFlags(hipEvent_t *Event, unsigned Flags) { hipError_t hipEventRecordInternal(hipEvent_t Event, hipStream_t Stream) { auto ChipEvent = static_cast(Event); - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->captureIntoGraph(ChipEvent)) { return hipSuccess; @@ -2559,7 +2558,7 @@ hipError_t hipMemPrefetchAsync(const void *Ptr, size_t Count, int DstDevId, UNIMPLEMENTED(hipErrorTbd); CHIPInitialize(); NULLCHECK(Ptr); - auto ChipQueue = static_cast(Stream); + auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); // TODO Graphs - async operation should be supported by graphs but no prefetch // node is defined @@ -3025,7 +3024,7 @@ hipError_t hipMemcpyDtoH(void *Dst, hipDeviceptr_t Src, size_t SizeBytes) { static inline hipError_t hipMemsetAsyncInternal(void *Dst, int Value, size_t SizeBytes, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ Dst, /* elementSize*/ 1, @@ -3057,7 +3056,7 @@ static inline hipError_t hipMemset2DAsyncInternal(void *Dst, size_t Pitch, int Value, size_t Width, size_t Height, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ Dst, /* elementSize*/ 1, @@ -3103,8 +3102,8 @@ hipError_t hipMemset2D(void *Dst, size_t Pitch, int Value, size_t Width, NULLCHECK(Dst); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = - hipMemset2DAsyncInternal(Dst, Pitch, Value, Width, Height, ChipQueue); + auto Res = hipMemset2DAsyncInternal(Dst, Pitch, Value, Width, Height, + STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3115,7 +3114,7 @@ hipError_t hipMemset2D(void *Dst, size_t Pitch, int Value, size_t Width, static inline hipError_t hipMemset3DAsyncInternal(hipPitchedPtr PitchedDevPtr, int Value, hipExtent Extent, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ PitchedDevPtr.ptr, /* elementSize*/ 1, @@ -3190,7 +3189,8 @@ hipError_t hipMemset3D(hipPitchedPtr PitchedDevPtr, int Value, NULLCHECK(PitchedDevPtr.ptr); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = hipMemset3DAsyncInternal(PitchedDevPtr, Value, Extent, ChipQueue); + auto Res = + hipMemset3DAsyncInternal(PitchedDevPtr, Value, Extent, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3258,7 +3258,7 @@ hipError_t hipMemsetD8Async(hipDeviceptr_t Dest, unsigned char Value, CHIPInitialize(); NULLCHECK(Dest); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ Dest, /* elementSize*/ 1, @@ -3291,7 +3291,7 @@ hipError_t hipMemsetD16Async(hipDeviceptr_t Dest, unsigned short Value, CHIP_TRY CHIPInitialize(); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ Dest, /* elementSize*/ 2, @@ -3327,7 +3327,7 @@ hipError_t hipMemsetD32Async(hipDeviceptr_t Dst, int Value, size_t Count, CHIP_TRY CHIPInitialize(); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemsetParams Params = { /* Dst */ Dst, /* elementSize*/ 4, @@ -3363,7 +3363,7 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D *PCopy) { CHIPInitialize(); NULLCHECK(PCopy); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = hipMemcpyParam2DAsyncInternal(PCopy, ChipQueue); + auto Res = hipMemcpyParam2DAsyncInternal(PCopy, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3383,7 +3383,7 @@ hipError_t hipMemcpy2D(void *Dst, size_t DPitch, const void *Src, size_t SPitch, auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); hipError_t Res = hipMemcpy2DAsyncInternal(Dst, DPitch, Src, SPitch, Width, - Height, Kind, ChipQueue); + Height, Kind, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3397,7 +3397,7 @@ hipMemcpy2DToArrayAsyncInternal(hipArray *Dst, size_t WOffset, size_t HOffset, const void *Src, size_t SPitch, size_t Width, size_t Height, hipMemcpyKind Kind, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemcpy3DParms Params = { /* hipArray_t srcArray */ nullptr, /* struct hipPos srcPos */ make_hipPos(1, 1, 1), @@ -3453,8 +3453,9 @@ hipError_t hipMemcpy2DToArray(hipArray *Dst, size_t WOffset, size_t HOffset, NULLCHECK(Dst, Src); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = hipMemcpy2DToArrayAsyncInternal(Dst, WOffset, HOffset, Src, SPitch, - Width, Height, Kind, ChipQueue); + auto Res = + hipMemcpy2DToArrayAsyncInternal(Dst, WOffset, HOffset, Src, SPitch, Width, + Height, Kind, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3468,7 +3469,7 @@ hipMemcpy2DFromArrayAsyncInternal(void *Dst, size_t DPitch, hipArray_const_t Src, size_t WOffset, size_t HOffset, size_t Width, size_t Height, hipMemcpyKind Kind, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); const hipMemcpy3DParms Params = { /* hipArray_t srcArray */ const_cast(Src), /* struct hipPos srcPos */ make_hipPos(WOffset, HOffset, 1), @@ -3544,8 +3545,9 @@ hipError_t hipMemcpy2DFromArray(void *Dst, size_t DPitch, hipArray_const_t Src, NULLCHECK(Dst, Src); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = hipMemcpy2DFromArrayAsyncInternal( - Dst, DPitch, Src, WOffset, HOffset, Width, Height, Kind, ChipQueue); + auto Res = + hipMemcpy2DFromArrayAsyncInternal(Dst, DPitch, Src, WOffset, HOffset, + Width, Height, Kind, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3621,7 +3623,7 @@ hipError_t hipMemcpyHtoA(hipArray *DstArray, size_t DstOffset, hipError_t hipMemcpy3DAsyncInternal(const struct hipMemcpy3DParms *Params, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3738,7 +3740,7 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *Params) { NULLCHECK(Params); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Res = hipMemcpy3DAsyncInternal(Params, ChipQueue); + auto Res = hipMemcpy3DAsyncInternal(Params, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3804,7 +3806,7 @@ hipError_t hipMemcpyToSymbolAsyncInternal(const void *Symbol, const void *Src, CHIPERR_LOG_AND_THROW("Invalid memcpy direction!", hipErrorInvalidMemcpyDirection); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (ChipQueue->captureIntoGraph( const_cast(Src), Symbol, SizeBytes, Offset, Kind)) { return hipSuccess; @@ -3841,8 +3843,8 @@ hipError_t hipMemcpyToSymbol(const void *Symbol, const void *Src, CHIPInitialize(); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - hipError_t Res = hipMemcpyToSymbolAsyncInternal(Symbol, Src, SizeBytes, - Offset, Kind, ChipQueue); + hipError_t Res = hipMemcpyToSymbolAsyncInternal( + Symbol, Src, SizeBytes, Offset, Kind, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3864,7 +3866,7 @@ hipError_t hipMemcpyFromSymbolAsyncInternal(void *Dst, const void *Symbol, CHIPERR_LOG_AND_THROW("Invalid memcpy direction!", hipErrorInvalidMemcpyDirection); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (ChipQueue->captureIntoGraph( const_cast(Dst), Symbol, SizeBytes, Offset, Kind)) { return hipSuccess; @@ -3899,8 +3901,8 @@ hipError_t hipMemcpyFromSymbol(void *Dst, const void *Symbol, size_t SizeBytes, auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - hipError_t Res = hipMemcpyFromSymbolAsyncInternal(Dst, Symbol, SizeBytes, - Offset, Kind, ChipQueue); + hipError_t Res = hipMemcpyFromSymbolAsyncInternal( + Dst, Symbol, SizeBytes, Offset, Kind, STREAM(ChipQueue)); if (Res == hipSuccess) ChipQueue->finish(); @@ -3949,7 +3951,7 @@ static inline hipError_t hipLaunchKernelInternal(const void *HostFunction, dim3 GridDim, dim3 BlockDim, void **Args, size_t SharedMem, hipStream_t Stream) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (ChipQueue->captureIntoGraph( HostFunction, GridDim, BlockDim, Args, SharedMem)) { return hipSuccess; @@ -4169,7 +4171,7 @@ static inline hipError_t hipModuleLaunchKernelInternal( unsigned int GridDimZ, unsigned int BlockDimX, unsigned int BlockDimY, unsigned int BlockDimZ, unsigned int SharedMemBytes, hipStream_t Stream, void *KernelParams[], void *Extra[]) { - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); if (KernelParams == Extra) CHIPERR_LOG_AND_THROW("either kernelParams or extra is required", @@ -4320,7 +4322,7 @@ hipError_t hipConfigureCall(dim3 GridDim, dim3 BlockDim, size_t SharedMem, hipStream_t Stream) { CHIP_TRY CHIPInitialize(); - auto ChipQueue = Backend->findQueue(static_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE(Stream)); logDebug("hipConfigureCall()"); RETURN(Backend->configureCall(GridDim, BlockDim, SharedMem, ChipQueue)); RETURN(hipSuccess); @@ -4819,8 +4821,7 @@ int hipGetBackendNativeHandles(uintptr_t Stream, uintptr_t *NativeHandles, CHIP_TRY CHIPInitialize(); logDebug("hipGetBackendNativeHandles"); - auto ChipQueue = - Backend->findQueue(reinterpret_cast(Stream)); + auto ChipQueue = Backend->findQueue(QUEUE((hipStream_t)Stream)); RETURN(ChipQueue->getBackendHandles(NativeHandles, NumHandles)); CHIP_CATCH } diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 55fdec4af..8eba85061 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -109,7 +109,7 @@ void CHIPGraphNodeMemcpy::execute(chipstar::Queue *Queue) const { CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); } else { - auto Status = hipMemcpy3DAsyncInternal(&Params_, Queue); + auto Status = hipMemcpy3DAsyncInternal(&Params_, STREAM(Queue)); if (Status != hipSuccess) CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); @@ -398,7 +398,7 @@ void CHIPGraphExec::ExtractSubGraphs_() { void CHIPGraphNodeEventRecord::execute(chipstar::Queue *Queue) const { NULLCHECK(Event_); - auto Status = hipEventRecordInternal(Event_, Queue); + auto Status = hipEventRecordInternal(Event_, STREAM(Queue)); if (Status != hipSuccess) CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); @@ -407,8 +407,8 @@ void CHIPGraphNodeEventRecord::execute(chipstar::Queue *Queue) const { void CHIPGraphNodeMemcpyFromSymbol::execute(chipstar::Queue *Queue) const { NULLCHECK(Dst_, Symbol_); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Status = hipMemcpyFromSymbolAsyncInternal(Dst_, Symbol_, SizeBytes_, - Offset_, Kind_, ChipQueue); + auto Status = hipMemcpyFromSymbolAsyncInternal( + Dst_, Symbol_, SizeBytes_, Offset_, Kind_, STREAM(ChipQueue)); if (Status == hipSuccess) ChipQueue->finish(); if (Status != hipSuccess) @@ -419,8 +419,8 @@ void CHIPGraphNodeMemcpyFromSymbol::execute(chipstar::Queue *Queue) const { void CHIPGraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { NULLCHECK(Symbol_, Src_); auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); - auto Status = hipMemcpyToSymbolAsyncInternal(Symbol_, Src_, SizeBytes_, - Offset_, Kind_, ChipQueue); + auto Status = hipMemcpyToSymbolAsyncInternal( + Symbol_, Src_, SizeBytes_, Offset_, Kind_, STREAM(ChipQueue)); if (Status == hipSuccess) ChipQueue->finish(); if (Status != hipSuccess) @@ -431,7 +431,7 @@ void CHIPGraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { void CHIPGraphNodeWaitEvent::execute(chipstar::Queue *Queue) const { // current HIP API requires Flags unsigned int Flags = 0; - auto Status = hipStreamWaitEventInternal(Queue, Event_, Flags); + auto Status = hipStreamWaitEventInternal(STREAM(Queue), Event_, Flags); if (Status != hipSuccess) CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 0d0f2ea5f..7d616dce7 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1475,10 +1475,9 @@ void LZEventPool::returnSlot(int Slot) { // CHIPBackendLevel0 // *********************************************************************** -chipstar::ExecItem *CHIPBackendLevel0::createExecItem(dim3 GirdDim, - dim3 BlockDim, - size_t SharedMem, - hipStream_t ChipQueue) { +chipstar::ExecItem * +CHIPBackendLevel0::createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, + chipstar::Queue *ChipQueue) { CHIPExecItemLevel0 *ExecItem = new CHIPExecItemLevel0(GirdDim, BlockDim, SharedMem, ChipQueue); return ExecItem; @@ -1997,7 +1996,9 @@ void CHIPDeviceLevel0::populateDevicePropertiesImpl() { chipstar::Queue *CHIPDeviceLevel0::createQueue(chipstar::QueueFlags Flags, int Priority) { - CHIPQueueLevel0 *NewQ = new CHIPQueueLevel0(this, Flags, Priority); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); + CHIPQueueLevel0 *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); + NewQ = new (NewQ) CHIPQueueLevel0(this, Flags, Priority); return NewQ; } @@ -2008,9 +2009,13 @@ chipstar::Queue *CHIPDeviceLevel0::createQueue(const uintptr_t *NativeHandles, if (!CmdQ) { logWarn("initializeFromNative: native queue pointer is null. Creating a " "new queue"); - NewQ = new CHIPQueueLevel0(this, 0, 0); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); + NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); + NewQ = new (NewQ) CHIPQueueLevel0(this, 0, 0); } else { - NewQ = new CHIPQueueLevel0(this, CmdQ); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); + NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); + NewQ = new (NewQ) CHIPQueueLevel0(this, CmdQ); // In this case CHIP does not own the queue hence setting right ownership if (NewQ != nullptr) { NewQ->setCmdQueueOwnership(false); diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index a95329c1f..83e6a1bef 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -59,7 +59,7 @@ public: } CHIPExecItemLevel0(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue) + chipstar::Queue *ChipQueue) : ExecItem(GirdDim, BlockDim, SharedMem, ChipQueue) {} virtual ~CHIPExecItemLevel0() override {} @@ -535,9 +535,9 @@ public: class CHIPBackendLevel0 : public chipstar::Backend { public: - virtual chipstar::ExecItem *createExecItem(dim3 GirdDim, dim3 BlockDim, - size_t SharedMem, - hipStream_t ChipQueue) override; + virtual chipstar::ExecItem * + createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, + chipstar::Queue *ChipQueue) override; virtual void uninitialize() override; std::mutex CommandListsMtx; diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index b2fddedb2..9ec4b6d8f 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -759,7 +759,9 @@ void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) { chipstar::Queue *CHIPDeviceOpenCL::createQueue(chipstar::QueueFlags Flags, int Priority) { - CHIPQueueOpenCL *NewQ = new CHIPQueueOpenCL(this, Priority); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); + CHIPQueueOpenCL *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); + NewQ = new (NewQ) CHIPQueueOpenCL(this, Priority); NewQ->setFlags(Flags); return NewQ; } @@ -767,8 +769,9 @@ chipstar::Queue *CHIPDeviceOpenCL::createQueue(chipstar::QueueFlags Flags, chipstar::Queue *CHIPDeviceOpenCL::createQueue(const uintptr_t *NativeHandles, int NumHandles) { cl_command_queue CmdQ = (cl_command_queue)NativeHandles[3]; - CHIPQueueOpenCL *NewQ = - new CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); + CHIPQueueOpenCL *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); + NewQ = new (mem) CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); return NewQ; } @@ -988,7 +991,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, // finishing the user CB's execution. HipStreamCallbackData *Cb = new HipStreamCallbackData{ - this, hipSuccess, UserData, Callback, CallbackEvent}; + STREAM(this), hipSuccess, UserData, Callback, CallbackEvent}; std::vector> WaitForEventsCBB{CallbackEvent}; auto CallbackCompleted = enqueueBarrier(WaitForEventsCBB); @@ -1402,10 +1405,9 @@ void CHIPExecItemOpenCL::setKernel(chipstar::Kernel *Kernel) { // CHIPBackendOpenCL //************************************************************************* -chipstar::ExecItem *CHIPBackendOpenCL::createExecItem(dim3 GirdDim, - dim3 BlockDim, - size_t SharedMem, - hipStream_t ChipQueue) { +chipstar::ExecItem * +CHIPBackendOpenCL::createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, + chipstar::Queue *ChipQueue) { CHIPExecItemOpenCL *ExecItem = new CHIPExecItemOpenCL(GirdDim, BlockDim, SharedMem, ChipQueue); return ExecItem; diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index d11f928c7..cb0350851 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -312,7 +312,7 @@ public: this->Args_ = Other.Args_; } CHIPExecItemOpenCL(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, - hipStream_t ChipQueue) + chipstar::Queue *ChipQueue) : ExecItem(GirdDim, BlockDim, SharedMem, ChipQueue) {} virtual ~CHIPExecItemOpenCL() override { @@ -333,9 +333,9 @@ public: class CHIPBackendOpenCL : public chipstar::Backend { public: - virtual chipstar::ExecItem *createExecItem(dim3 GirdDim, dim3 BlockDim, - size_t SharedMem, - hipStream_t ChipQueue) override; + virtual chipstar::ExecItem * + createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, + chipstar::Queue *ChipQueue) override; virtual void uninitialize() override { waitForThreadExit(); } virtual void initializeImpl(std::string CHIPPlatformStr, diff --git a/src/common.hh b/src/common.hh index afcdd6bd2..811706bd9 100644 --- a/src/common.hh +++ b/src/common.hh @@ -41,11 +41,36 @@ #include #include +/// For multiplexing purposes, the first field of our objects must be a void * +/// pointer +struct ihipDispatch { + void *dispatch; +}; + +#define CHIP_HANDLE_TO_OBJ(handle, type) \ + (reinterpret_cast(reinterpret_cast(handle) + \ + sizeof(ihipDispatch))) + +#define CHIP_OBJ_TO_HANDLE(pobj, type) \ + (reinterpret_cast(reinterpret_cast(pobj) - \ + sizeof(ihipDispatch))) + +#define QUEUE(x) \ + ((!(x) || (x) == hipStreamPerThread || (x) == hipStreamLegacy) \ + ? reinterpret_cast(x) \ + : CHIP_HANDLE_TO_OBJ(x, chipstar::Queue)) + +#define STREAM(x) \ + ((!(x) || (x) == (chipstar::Queue *)hipStreamPerThread || \ + (x) == (chipstar::Queue *)hipStreamLegacy) \ + ? reinterpret_cast(x) \ + : CHIP_OBJ_TO_HANDLE(x, ihipStream_t)) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t {}; struct ihipCtx_t {}; -struct ihipStream_t {}; +struct ihipStream_t : ihipDispatch {}; struct ihipModule_t {}; struct ihipModuleSymbol_t {}; struct ihipGraph {}; From 61b7d008e272acbd54810215ad4656ebc04c50d8 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 13 Jul 2023 00:25:17 +0000 Subject: [PATCH 03/16] Made events dispatchable. --- src/CHIPBackend.hh | 6 ++++- src/CHIPBindings.cc | 34 ++++++++++++------------- src/CHIPGraph.cc | 5 ++-- src/backend/Level0/CHIPBackendLevel0.cc | 22 ++++++++++------ src/backend/OpenCL/CHIPBackendOpenCL.cc | 17 ++++++++----- src/common.hh | 10 +++++++- 6 files changed, 57 insertions(+), 37 deletions(-) diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index b297472b7..73f2343b9 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -626,7 +626,7 @@ public: void markHasInitializer(bool State = true) { HasInitializer_ = State; } }; -class Event : public ihipEvent_t { +class Event { protected: bool TrackCalled_ = false; bool UserEvent_ = false; @@ -654,6 +654,10 @@ protected: virtual ~Event(){}; public: + static void deleter(chipstar::Event *e) { + e->~Event(); + free(CHIP_OBJ_TO_HANDLE(e, ihipEvent_t)); + } void markTracked() { TrackCalled_ = true; } bool isTrackCalled() { return TrackCalled_; } void setTrackCalled(bool Val) { TrackCalled_ = Val; } diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index c0a9b8ed1..7210bcb4f 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -1079,8 +1079,7 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeEventRecord *Node = - new CHIPGraphNodeEventRecord(static_cast(event)); + CHIPGraphNodeEventRecord *Node = new CHIPGraphNodeEventRecord(EVENT(event)); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); *pGraphNode = Node; GRAPH(graph)->addNode(Node); @@ -1096,7 +1095,7 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, if (!CastNode) CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", hipErrorInvalidValue); - *event_out = CastNode->getEvent(); + *event_out = HIPEVENT(CastNode->getEvent()); RETURN(hipSuccess); CHIP_CATCH } @@ -1109,7 +1108,7 @@ hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, if (!CastNode) CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", hipErrorInvalidValue); - CastNode->setEvent(static_cast(event)); + CastNode->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -1131,7 +1130,7 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, "Node provided failed to cast to CHIPGraphNodeEventRecord", hipErrorInvalidValue); - CastNode->setEvent(static_cast(event)); + CastNode->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -1142,8 +1141,7 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeWaitEvent *Node = - new CHIPGraphNodeWaitEvent(static_cast(event)); + CHIPGraphNodeWaitEvent *Node = new CHIPGraphNodeWaitEvent(EVENT(event)); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -1162,7 +1160,7 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, "Node provided failed to cast to CHIPGraphNodeWaitEvent", hipErrorInvalidValue); - *event_out = CastNode->getEvent(); + *event_out = HIPEVENT(CastNode->getEvent()); RETURN(hipSuccess); CHIP_CATCH } @@ -1177,7 +1175,7 @@ hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, "Node provided failed to cast to CHIPGraphNodeWaitEvent", hipErrorInvalidValue); - CastNode->setEvent(static_cast(event)); + CastNode->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -1200,7 +1198,7 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, "Node provided failed to cast to CHIPGraphNodeWaitEvent", hipErrorInvalidValue); - CastNode->setEvent(static_cast(event)); + CastNode->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -2146,7 +2144,7 @@ hipError_t hipStreamSynchronize(hipStream_t Stream) { hipError_t hipStreamWaitEventInternal(hipStream_t Stream, hipEvent_t Event, unsigned int Flags) { auto ChipQueue = QUEUE(Stream); - auto ChipEvent = static_cast(Event); + auto ChipEvent = EVENT(Event); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->captureIntoGraph(ChipEvent)) { @@ -2285,7 +2283,7 @@ static inline hipError_t hipEventCreateWithFlagsInternal(hipEvent_t *Event, Backend->UserEvents.push_back(ChipEvent); } - *Event = ChipEvent.get(); + *Event = HIPEVENT(ChipEvent.get()); return hipSuccess; } @@ -2306,7 +2304,7 @@ hipError_t hipEventCreateWithFlags(hipEvent_t *Event, unsigned Flags) { } hipError_t hipEventRecordInternal(hipEvent_t Event, hipStream_t Stream) { - auto ChipEvent = static_cast(Event); + auto ChipEvent = EVENT(Event); auto ChipQueue = QUEUE(Stream); ChipQueue = Backend->findQueue(ChipQueue); if (ChipQueue->captureIntoGraph(ChipEvent)) { @@ -2330,7 +2328,7 @@ hipError_t hipEventDestroy(hipEvent_t Event) { CHIP_TRY CHIPInitialize(); NULLCHECK(Event); - chipstar::Event *ChipEvent = static_cast(Event); + chipstar::Event *ChipEvent = EVENT(Event); LOCK(Backend->UserEventsMtx); Backend->UserEvents.erase( @@ -2349,7 +2347,7 @@ hipError_t hipEventSynchronize(hipEvent_t Event) { CHIP_TRY CHIPInitialize(); NULLCHECK(Event); - chipstar::Event *ChipEvent = static_cast(Event); + chipstar::Event *ChipEvent = EVENT(Event); ChipEvent->wait(); RETURN(hipSuccess); @@ -2363,8 +2361,8 @@ hipError_t hipEventElapsedTime(float *Ms, hipEvent_t Start, hipEvent_t Stop) { if (!Ms) CHIPERR_LOG_AND_THROW("Ms pointer is null", hipErrorInvalidValue); NULLCHECK(Start, Stop); - chipstar::Event *ChipEventStart = static_cast(Start); - chipstar::Event *ChipEventStop = static_cast(Stop); + chipstar::Event *ChipEventStart = EVENT(Start); + chipstar::Event *ChipEventStop = EVENT(Stop); if (!ChipEventStart->isRecordingOrRecorded() || !ChipEventStop->isRecordingOrRecorded()) { CHIPERR_LOG_AND_THROW("One of the events was not recorded", @@ -2386,7 +2384,7 @@ hipError_t hipEventQuery(hipEvent_t Event) { CHIP_TRY CHIPInitialize(); NULLCHECK(Event); - chipstar::Event *ChipEvent = static_cast(Event); + chipstar::Event *ChipEvent = EVENT(Event); ChipEvent->updateFinishStatus(); if (ChipEvent->isFinished()) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 8eba85061..122b0fc55 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -398,7 +398,7 @@ void CHIPGraphExec::ExtractSubGraphs_() { void CHIPGraphNodeEventRecord::execute(chipstar::Queue *Queue) const { NULLCHECK(Event_); - auto Status = hipEventRecordInternal(Event_, STREAM(Queue)); + auto Status = hipEventRecordInternal(HIPEVENT(Event_), STREAM(Queue)); if (Status != hipSuccess) CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); @@ -431,7 +431,8 @@ void CHIPGraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { void CHIPGraphNodeWaitEvent::execute(chipstar::Queue *Queue) const { // current HIP API requires Flags unsigned int Flags = 0; - auto Status = hipStreamWaitEventInternal(STREAM(Queue), Event_, Flags); + auto Status = + hipStreamWaitEventInternal(STREAM(Queue), HIPEVENT(Event_), Flags); if (Status != hipSuccess) CHIPERR_LOG_AND_THROW("Error enountered while executing a graph node", hipErrorTbd); diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 7d616dce7..67f359b06 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1415,8 +1415,11 @@ LZEventPool::LZEventPool(CHIPContextLevel0 *Ctx, unsigned int Size) for (unsigned i = 0; i < Size_; i++) { chipstar::EventFlags Flags; - Events_.push_back(std::shared_ptr( - new CHIPEventLevel0(Ctx_, this, i, Flags))); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPEventLevel0)); + CHIPEventLevel0 *Event = CHIP_HANDLE_TO_OBJ(mem, CHIPEventLevel0); + Event = new (Event) CHIPEventLevel0(Ctx_, this, i, Flags); + Events_.push_back( + std::shared_ptr(Event, chipstar::Event::deleter)); FreeSlots_.push(i); } }; @@ -1488,8 +1491,10 @@ CHIPBackendLevel0::createCHIPEvent(chipstar::Context *ChipCtx, chipstar::EventFlags Flags, bool UserEvent) { std::shared_ptr Event; if (UserEvent) { - Event = std::shared_ptr( - new CHIPEventLevel0((CHIPContextLevel0 *)ChipCtx, Flags)); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPEventLevel0)); + CHIPEventLevel0 *evt = CHIP_HANDLE_TO_OBJ(mem, CHIPEventLevel0); + evt = new (evt) CHIPEventLevel0((CHIPContextLevel0 *)ChipCtx, Flags); + Event = std::shared_ptr(evt, chipstar::Event::deleter); Event->setUserEvent(true); } else { auto ZeCtx = (CHIPContextLevel0 *)ChipCtx; @@ -1671,14 +1676,15 @@ void CHIPBackendLevel0::initializeFromNative(const uintptr_t *NativeHandles, hipEvent_t CHIPBackendLevel0::getHipEvent(void *NativeEvent) { ze_event_handle_t E = (ze_event_handle_t)NativeEvent; - CHIPEventLevel0 *NewEvent = - new CHIPEventLevel0((CHIPContextLevel0 *)ActiveCtx_, E); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPEventLevel0)); + CHIPEventLevel0 *NewEvent = CHIP_HANDLE_TO_OBJ(mem, CHIPEventLevel0); + NewEvent = new (NewEvent) CHIPEventLevel0((CHIPContextLevel0 *)ActiveCtx_, E); // NewEvent->increaseRefCount("getHipEvent"); - return NewEvent; + return HIPEVENT(NewEvent); } void *CHIPBackendLevel0::getNativeEvent(hipEvent_t HipEvent) { - CHIPEventLevel0 *E = static_cast(HipEvent); + CHIPEventLevel0 *E = CHIP_HANDLE_TO_OBJ(HipEvent, CHIPEventLevel0); if (!E->isRecordingOrRecorded()) return nullptr; // TODO should we retain here? diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 9ec4b6d8f..4cb2f7449 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -584,10 +584,12 @@ CHIPEventOpenCL::~CHIPEventOpenCL() { ClEvent = nullptr; } std::shared_ptr CHIPBackendOpenCL::createCHIPEvent(chipstar::Context *ChipCtx, chipstar::EventFlags Flags, bool UserEvent) { - CHIPEventOpenCL *Event = new CHIPEventOpenCL((CHIPContextOpenCL *)ChipCtx, - nullptr, Flags, UserEvent); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPEventOpenCL)); + CHIPEventOpenCL *Event = CHIP_HANDLE_TO_OBJ(mem, CHIPEventOpenCL); + Event = new (Event) + CHIPEventOpenCL((CHIPContextOpenCL *)ChipCtx, nullptr, Flags, UserEvent); - return std::shared_ptr(Event); + return std::shared_ptr(Event, chipstar::Event::deleter); } void CHIPEventOpenCL::recordStream(chipstar::Queue *ChipQueue) { @@ -1551,15 +1553,16 @@ void CHIPBackendOpenCL::initializeFromNative(const uintptr_t *NativeHandles, hipEvent_t CHIPBackendOpenCL::getHipEvent(void *NativeEvent) { cl_event E = (cl_event)NativeEvent; + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPEventOpenCL)); + CHIPEventOpenCL *NewEvent = CHIP_HANDLE_TO_OBJ(mem, CHIPEventOpenCL); // this retains cl_event - CHIPEventOpenCL *NewEvent = - new CHIPEventOpenCL((CHIPContextOpenCL *)ActiveCtx_, E); + NewEvent = new (NewEvent) CHIPEventOpenCL((CHIPContextOpenCL *)ActiveCtx_, E); NewEvent->Msg = "fromHipEvent"; - return NewEvent; + return HIPEVENT(NewEvent); } void *CHIPBackendOpenCL::getNativeEvent(hipEvent_t HipEvent) { - CHIPEventOpenCL *E = (CHIPEventOpenCL *)HipEvent; + CHIPEventOpenCL *E = CHIP_HANDLE_TO_OBJ(HipEvent, CHIPEventOpenCL); if (!E->isRecordingOrRecorded()) return nullptr; return (void *)E->ClEvent; diff --git a/src/common.hh b/src/common.hh index 811706bd9..e475c2004 100644 --- a/src/common.hh +++ b/src/common.hh @@ -66,9 +66,17 @@ struct ihipDispatch { ? reinterpret_cast(x) \ : CHIP_OBJ_TO_HANDLE(x, ihipStream_t)) +#define EVENT(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_HANDLE_TO_OBJ(x, chipstar::Event)) + +#define HIPEVENT(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_OBJ_TO_HANDLE(x, ihipEvent_t)) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. -struct ihipEvent_t {}; +struct ihipEvent_t : ihipDispatch {}; struct ihipCtx_t {}; struct ihipStream_t : ihipDispatch {}; struct ihipModule_t {}; From 78b0b351c8900547eb4259d43ccad7c3c7dc0c81 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 13 Jul 2023 20:03:19 +0000 Subject: [PATCH 04/16] Made modules dispatchable. --- src/CHIPBackend.cc | 3 ++- src/CHIPBackend.hh | 2 +- src/CHIPBindings.cc | 8 ++++---- src/backend/Level0/CHIPBackendLevel0.cc | 6 ++++-- src/backend/OpenCL/CHIPBackendOpenCL.hh | 7 +++++-- src/common.hh | 10 +++++++++- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 0985d5a60..5c285b87f 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -801,7 +801,8 @@ void chipstar::Device::eraseModule(chipstar::Module *Module) { LOCK(DeviceMtx); // SrcModToCompiledMod_ for (auto &Kv : SrcModToCompiledMod_) if (Kv.second == Module) { - delete Module; + Module->~Module(); + free(CHIP_OBJ_TO_HANDLE(Module, ihipModule_t)); SrcModToCompiledMod_.erase(Kv.first); break; } diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 73f2343b9..0bb125709 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -850,7 +850,7 @@ public: * ROCclr - amd::Program * CUDA - CUmodule */ -class Module : public ihipModule_t { +class Module { /// Flag for the allocation state of the device variables. True if /// all variables have space allocated for this module for the /// device this module is attached to. False implies that diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 7210bcb4f..eae9fdee6 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -3766,7 +3766,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *Dptr, size_t *Bytes, CHIP_TRY CHIPInitialize(); NULLCHECK(Dptr, Bytes, Hmod, Name); - auto ChipModule = static_cast(Hmod); + auto ChipModule = MODULE(Hmod); chipstar::DeviceVar *Var = ChipModule->getGlobalVar(Name); *Dptr = Var->getDevAddr(); @@ -3922,7 +3922,7 @@ static inline hipError_t hipModuleLoadDataInternal(hipModule_t *ModuleHandle, auto Entry = getSPVRegister().registerSource(ModuleCode); auto *SrcMod = getSPVRegister().getSource(Entry); auto *ChipModule = Backend->getActiveDevice()->getOrCreateModule(*SrcMod); - *ModuleHandle = ChipModule; + *ModuleHandle = HIPMODULE(ChipModule); return hipSuccess; } @@ -4140,7 +4140,7 @@ hipError_t hipModuleUnload(hipModule_t Module) { NULLCHECK(Module); logInfo("hipModuleUnload(Module={}", (void *)Module); - auto *ChipModule = reinterpret_cast(Module); + auto *ChipModule = MODULE(Module); const auto &SrcMod = ChipModule->getSourceModule(); Backend->getActiveDevice()->eraseModule(ChipModule); getSPVRegister().unregisterSource(&SrcMod); @@ -4154,7 +4154,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *Function, hipModule_t Module, CHIP_TRY CHIPInitialize(); NULLCHECK(Function, Module, Name); - auto ChipModule = (chipstar::Module *)Module; + auto ChipModule = MODULE(Module); chipstar::Kernel *Kernel = ChipModule->getKernelByName(Name); ERROR_IF((Kernel == nullptr), hipErrorInvalidDeviceFunction); diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 67f359b06..302702248 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -2128,9 +2128,11 @@ chipstar::Texture *CHIPDeviceLevel0::createTexture( } CHIPModuleLevel0 *CHIPDeviceLevel0::compile(const SPVModule &SrcMod) { - auto CompiledModule = std::make_unique(SrcMod); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPModuleLevel0)); + CHIPModuleLevel0 *CompiledModule = CHIP_HANDLE_TO_OBJ(mem, CHIPModuleLevel0); + CompiledModule = new (CompiledModule) CHIPModuleLevel0(SrcMod); CompiledModule->compile(this); - return CompiledModule.release(); + return CompiledModule; } // Other diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index cb0350851..b09700d37 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -197,9 +197,12 @@ public: } CHIPModuleOpenCL *compile(const SPVModule &SrcMod) override { - auto CompiledModule = std::make_unique(SrcMod); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPModuleOpenCL)); + CHIPModuleOpenCL *CompiledModule = + CHIP_HANDLE_TO_OBJ(mem, CHIPModuleOpenCL); + CompiledModule = new (CompiledModule) CHIPModuleOpenCL(SrcMod); CompiledModule->compile(this); - return CompiledModule.release(); + return CompiledModule; } }; diff --git a/src/common.hh b/src/common.hh index e475c2004..f0cfb4127 100644 --- a/src/common.hh +++ b/src/common.hh @@ -74,12 +74,20 @@ struct ihipDispatch { (!(x) ? reinterpret_cast(x) \ : CHIP_OBJ_TO_HANDLE(x, ihipEvent_t)) +#define MODULE(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_HANDLE_TO_OBJ(x, chipstar::Module)) + +#define HIPMODULE(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_OBJ_TO_HANDLE(x, ihipModule_t)) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; struct ihipCtx_t {}; struct ihipStream_t : ihipDispatch {}; -struct ihipModule_t {}; +struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t {}; struct ihipGraph {}; struct hipGraphNode {}; From 41b158db87aebffd67d10764e069b40486ef76ee Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 13 Jul 2023 21:46:20 +0000 Subject: [PATCH 05/16] Made kernels dispatchable. --- src/CHIPBackend.hh | 2 +- src/CHIPBindings.cc | 6 +++--- src/backend/Level0/CHIPBackendLevel0.cc | 6 ++++-- src/backend/Level0/CHIPBackendLevel0.hh | 7 +++++-- src/backend/OpenCL/CHIPBackendOpenCL.cc | 21 +++++++++++++++++---- src/backend/OpenCL/CHIPBackendOpenCL.hh | 14 ++++++++++++-- src/common.hh | 10 +++++++++- 7 files changed, 51 insertions(+), 15 deletions(-) diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 0bb125709..3de31d9c0 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1010,7 +1010,7 @@ public: /** * @brief Contains information about the function on the host and device */ -class Kernel : public ihipModuleSymbol_t { +class Kernel { protected: /** * @brief hidden default constructor. Only derived type constructor should be diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index eae9fdee6..af02ac59e 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -4159,7 +4159,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *Function, hipModule_t Module, ERROR_IF((Kernel == nullptr), hipErrorInvalidDeviceFunction); - *Function = Kernel; + *Function = HIPMODULESYMBOL(Kernel); RETURN(hipSuccess); CHIP_CATCH } @@ -4178,7 +4178,7 @@ static inline hipError_t hipModuleLaunchKernelInternal( dim3 Grid(GridDimX, GridDimY, GridDimZ); dim3 Block(BlockDimX, BlockDimY, BlockDimZ); - auto ChipKernel = static_cast(Kernel); + auto ChipKernel = KERNEL(Kernel); Backend->getActiveDevice()->prepareDeviceVariables( HostPtr(ChipKernel->getHostPtr())); @@ -4207,7 +4207,7 @@ static inline hipError_t hipModuleLaunchKernelInternal( if (!ExtraArgBuf) // Null argument pointer. return hipErrorInvalidValue; - auto ChipKernel = static_cast(Kernel); + auto ChipKernel = KERNEL(Kernel); auto *FuncInfo = ChipKernel->getFuncInfo(); auto ParamBuffer = convertExtraArgsToPointerArray(ExtraArgBuf, *FuncInfo); diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 302702248..53be776d0 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -2307,8 +2307,10 @@ void CHIPModuleLevel0::compile(chipstar::Device *ChipDev) { Status = zeKernelCreate(ZeModule_, &KernelDesc, &ZeKernel); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); logTrace("LZ KERNEL CREATION via calling zeKernelCreate {} ", Status); - CHIPKernelLevel0 *ChipZeKernel = - new CHIPKernelLevel0(ZeKernel, LzDev, HostFName, FuncInfo, this); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPKernelLevel0)); + CHIPKernelLevel0 *ChipZeKernel = CHIP_HANDLE_TO_OBJ(mem, CHIPKernelLevel0); + ChipZeKernel = new (ChipZeKernel) + CHIPKernelLevel0(ZeKernel, LzDev, HostFName, FuncInfo, this); addKernel(ChipZeKernel); } } diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 83e6a1bef..d286da97a 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -347,8 +347,11 @@ public: virtual ~CHIPModuleLevel0() { logTrace("destroy CHIPModuleLevel0 {}", (void *)this); - for (auto *K : ChipKernels_) // Kernels must be destroyed before the module. - delete K; + for (auto *K : + ChipKernels_) { // Kernels must be destroyed before the module. + K->~Kernel(); + free(CHIP_OBJ_TO_HANDLE(K, ihipModuleSymbol_t)); + } ChipKernels_.clear(); if (ZeModule_) { // The application must not call this function from diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 4cb2f7449..60ba67742 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -751,8 +751,10 @@ void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) { // OpenCLFunctionInfoMap", // hipErrorInitializationError); } - CHIPKernelOpenCL *ChipKernel = - new CHIPKernelOpenCL(Krnl, ChipDevOcl, HostFName, FuncInfo, this); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPKernelOpenCL)); + CHIPKernelOpenCL *ChipKernel = CHIP_HANDLE_TO_OBJ(mem, CHIPKernelOpenCL); + ChipKernel = new (ChipKernel) + CHIPKernelOpenCL(Krnl, ChipDevOcl, HostFName, FuncInfo, this); addKernel(ChipKernel); } @@ -792,8 +794,11 @@ CHIPKernelOpenCL *CHIPKernelOpenCL::clone() { // called on the original cl_kernel. auto Cloned = clCreateKernel(Module->get()->get(), Name_.c_str(), &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); - return new CHIPKernelOpenCL(cl::Kernel(Cloned, false), Device, Name_, - getFuncInfo(), Module); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPKernelOpenCL)); + CHIPKernelOpenCL *ChipKernel = CHIP_HANDLE_TO_OBJ(mem, CHIPKernelOpenCL); + ChipKernel = new (ChipKernel) CHIPKernelOpenCL( + cl::Kernel(Cloned, false), Device, Name_, getFuncInfo(), Module); + return ChipKernel; } hipError_t CHIPKernelOpenCL::getAttributes(hipFuncAttributes *Attr) { @@ -1393,6 +1398,14 @@ void CHIPExecItemOpenCL::setupAllArgs() { return; } +void CHIPExecItemOpenCL::KernelDeleter::operator()( + CHIPKernelOpenCL *k) const noexcept { + if (k) { + k->~CHIPKernelOpenCL(); + free(CHIP_OBJ_TO_HANDLE(k, ihipStream_t)); + } +} + void CHIPExecItemOpenCL::setKernel(chipstar::Kernel *Kernel) { assert(Kernel && "Kernel is nullptr!"); // Make a clone of the kernel so the its cl_kernel object is not diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index b09700d37..c43afd53b 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -114,7 +114,13 @@ protected: public: CHIPModuleOpenCL(const SPVModule &SrcMod); - virtual ~CHIPModuleOpenCL() {} + virtual ~CHIPModuleOpenCL() { + for (auto *K : ChipKernels_) { + K->~Kernel(); + free(CHIP_OBJ_TO_HANDLE(K, ihipModuleSymbol_t)); + } + ChipKernels_.clear(); + } virtual void compile(chipstar::Device *ChipDevice) override; cl::Program *get(); }; @@ -298,8 +304,12 @@ public: }; class CHIPExecItemOpenCL : public chipstar::ExecItem { + struct KernelDeleter { + void operator()(CHIPKernelOpenCL *k) const noexcept; + }; + private: - std::unique_ptr ChipKernel_; + std::unique_ptr ChipKernel_; cl::Kernel *ClKernel_; public: diff --git a/src/common.hh b/src/common.hh index f0cfb4127..e42605524 100644 --- a/src/common.hh +++ b/src/common.hh @@ -82,13 +82,21 @@ struct ihipDispatch { (!(x) ? reinterpret_cast(x) \ : CHIP_OBJ_TO_HANDLE(x, ihipModule_t)) +#define KERNEL(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_HANDLE_TO_OBJ(x, chipstar::Kernel)) + +#define HIPMODULESYMBOL(x) \ + (!(x) ? reinterpret_cast(x) \ + : CHIP_OBJ_TO_HANDLE(x, ihipModuleSymbol_t)) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; struct ihipCtx_t {}; struct ihipStream_t : ihipDispatch {}; struct ihipModule_t : ihipDispatch {}; -struct ihipModuleSymbol_t {}; +struct ihipModuleSymbol_t : ihipDispatch {}; struct ihipGraph {}; struct hipGraphNode {}; struct hipGraphExec {}; From ec51132757eb1673ed265d5d8ed7d42e725562c3 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 14 Jul 2023 22:04:07 +0000 Subject: [PATCH 06/16] Fix stream dispatch --- src/CHIPBackend.cc | 10 +++++++++- src/CHIPBackend.hh | 6 +++++- src/backend/Level0/CHIPBackendLevel0.hh | 5 +++-- src/backend/OpenCL/CHIPBackendOpenCL.cc | 7 +++++-- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 5c285b87f..13644a78a 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -542,11 +542,19 @@ chipstar::Queue *chipstar::Device::getPerThreadDefaultQueue() { return getPerThreadDefaultQueueNoLock(); } +void chipstar::Device::QueueDeleter::operator()(chipstar::Queue *q) const noexcept { + if (q) { + q->~Queue(); + free(CHIP_OBJ_TO_HANDLE(q, ihipStream_t)); + } +} + chipstar::Queue *chipstar::Device::getPerThreadDefaultQueueNoLock() { if (!PerThreadDefaultQueue.get()) { logDebug("PerThreadDefaultQueue is null.. Creating a new queue."); PerThreadDefaultQueue = - std::unique_ptr(::Backend->createCHIPQueue(this)); + std::unique_ptr( + ::Backend->createCHIPQueue(this), chipstar::Device::QueueDeleter()); PerThreadStreamUsed_ = true; PerThreadDefaultQueue.get()->PerThreadQueueForDevice = this; } diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 3de31d9c0..0bf9ac4da 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1294,7 +1294,11 @@ public: std::vector getQueuesNoLock() { return ChipQueues_; } chipstar::Queue *LegacyDefaultQueue; - inline static thread_local std::unique_ptr + struct QueueDeleter { + void operator()(chipstar::Queue *q) const noexcept; + }; + + inline static thread_local std::unique_ptr PerThreadDefaultQueue; /** diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index d286da97a..1519bea22 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -560,8 +560,9 @@ public: virtual chipstar::Queue *createCHIPQueue(chipstar::Device *ChipDev) override { CHIPDeviceLevel0 *ChipDevLz = (CHIPDeviceLevel0 *)ChipDev; - auto Q = new CHIPQueueLevel0(ChipDevLz); - + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); + chipstar::Queue *Q = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); + Q = new (Q) CHIPQueueLevel0(ChipDevLz); return Q; } diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 60ba67742..911a5ed7c 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -775,7 +775,7 @@ chipstar::Queue *CHIPDeviceOpenCL::createQueue(const uintptr_t *NativeHandles, cl_command_queue CmdQ = (cl_command_queue)NativeHandles[3]; void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); CHIPQueueOpenCL *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); - NewQ = new (mem) CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); + NewQ = new (NewQ) CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); return NewQ; } @@ -1429,7 +1429,10 @@ CHIPBackendOpenCL::createExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, }; chipstar::Queue *CHIPBackendOpenCL::createCHIPQueue(chipstar::Device *ChipDev) { CHIPDeviceOpenCL *ChipDevCl = (CHIPDeviceOpenCL *)ChipDev; - return new CHIPQueueOpenCL(ChipDevCl, OCL_DEFAULT_QUEUE_PRIORITY); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); + CHIPQueueOpenCL *Q = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); + Q = new (Q) CHIPQueueOpenCL(ChipDevCl, OCL_DEFAULT_QUEUE_PRIORITY); + return Q; } chipstar::CallbackData *CHIPBackendOpenCL::createCallbackData( From 0aa3d3b8a02a83c0c4f6d93104c0aed2d18fc08e Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 14 Jul 2023 22:13:19 +0000 Subject: [PATCH 07/16] Refactored macros --- src/common.hh | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/common.hh b/src/common.hh index e42605524..acb85a882 100644 --- a/src/common.hh +++ b/src/common.hh @@ -66,29 +66,23 @@ struct ihipDispatch { ? reinterpret_cast(x) \ : CHIP_OBJ_TO_HANDLE(x, ihipStream_t)) -#define EVENT(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_HANDLE_TO_OBJ(x, chipstar::Event)) +#define HIPTOCHIP(x, t) \ + (!(x) ? reinterpret_cast(x) : CHIP_HANDLE_TO_OBJ(x, t)) -#define HIPEVENT(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_OBJ_TO_HANDLE(x, ihipEvent_t)) +#define CHIPTOHIP(x, t) \ + (!(x) ? reinterpret_cast(x) : CHIP_OBJ_TO_HANDLE(x, t)) -#define MODULE(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_HANDLE_TO_OBJ(x, chipstar::Module)) +#define EVENT(x) HIPTOCHIP(x, chipstar::Event) -#define HIPMODULE(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_OBJ_TO_HANDLE(x, ihipModule_t)) +#define HIPEVENT(x) CHIPTOHIP(x, ihipEvent_t) -#define KERNEL(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_HANDLE_TO_OBJ(x, chipstar::Kernel)) +#define MODULE(x) HIPTOCHIP(x, chipstar::Module) -#define HIPMODULESYMBOL(x) \ - (!(x) ? reinterpret_cast(x) \ - : CHIP_OBJ_TO_HANDLE(x, ihipModuleSymbol_t)) +#define HIPMODULE(x) CHIPTOHIP(x, ihipModule_t) + +#define KERNEL(x) HIPTOCHIP(x, chipstar::Kernel) + +#define HIPMODULESYMBOL(x) CHIPTOHIP(x, ihipModuleSymbol_t) /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. From f001da4516d15da955cda0db5e5b69c33118382d Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 14 Jul 2023 22:14:10 +0000 Subject: [PATCH 08/16] Made context dispatchable --- src/CHIPBackend.cc | 3 ++- src/CHIPBackend.hh | 2 +- src/backend/Level0/CHIPBackendLevel0.cc | 8 ++++++-- src/backend/OpenCL/CHIPBackendOpenCL.cc | 8 ++++++-- src/common.hh | 6 +++++- src/hipCtx.hh | 14 +++++++------- 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 13644a78a..2bc1615e0 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -1240,7 +1240,8 @@ chipstar::Backend::~Backend() { UserEvents.clear(); for (auto &Ctx : ChipContexts) { ::Backend->removeContext(Ctx); - delete Ctx; + Ctx->~Context(); + free(CHIP_OBJ_TO_HANDLE(Ctx, ihipCtx_t)); } } diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 0bf9ac4da..5fbd4e125 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1619,7 +1619,7 @@ protected: * multiple devices. Provides for creation of additional queues, events, and * interaction with devices. */ -class Context : public ihipCtx_t { +class Context { protected: int RefCount_; chipstar::Device *ChipDevice_; diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 53be776d0..1117dc5f9 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1625,7 +1625,9 @@ void CHIPBackendLevel0::initializeImpl(std::string CHIPPlatformStr, Status = zeContextCreateEx(ZeDriver, &CtxDesc, DeviceCount, ZeDevices.data(), &ZeCtx); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); - CHIPContextLevel0 *ChipL0Ctx = new CHIPContextLevel0(ZeDriver, ZeCtx); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPContextLevel0)); + CHIPContextLevel0 *ChipL0Ctx = CHIP_HANDLE_TO_OBJ(mem, CHIPContextLevel0); + ChipL0Ctx = new (ChipL0Ctx) CHIPContextLevel0(ZeDriver, ZeCtx); ::Backend->addContext(ChipL0Ctx); // Filter in only devices of selected type and add them to the @@ -1657,7 +1659,9 @@ void CHIPBackendLevel0::initializeFromNative(const uintptr_t *NativeHandles, ze_device_handle_t Dev = (ze_device_handle_t)NativeHandles[1]; ze_context_handle_t Ctx = (ze_context_handle_t)NativeHandles[2]; - CHIPContextLevel0 *ChipCtx = new CHIPContextLevel0(Drv, Ctx); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPContextLevel0)); + CHIPContextLevel0 *ChipCtx = CHIP_HANDLE_TO_OBJ(mem, CHIPContextLevel0); + ChipCtx = new (ChipCtx) CHIPContextLevel0(Drv, Ctx); ChipCtx->setZeContextOwnership(false); addContext(ChipCtx); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 911a5ed7c..9e0ce3651 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -1531,7 +1531,9 @@ void CHIPBackendOpenCL::initializeImpl(std::string CHIPPlatformStr, // Create queues that have devices each of which has an associated context // TODO Change this to spirv_enabled_devices cl::Context *Ctx = new cl::Context(SpirvDevices); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPContextOpenCL)); + CHIPContextOpenCL *ChipContext = CHIP_HANDLE_TO_OBJ(mem, CHIPContextOpenCL); + ChipContext = new (ChipContext) CHIPContextOpenCL(Ctx); ::Backend->addContext(ChipContext); // TODO for now only a single device is supported. @@ -1552,7 +1554,9 @@ void CHIPBackendOpenCL::initializeFromNative(const uintptr_t *NativeHandles, cl_context CtxId = (cl_context)NativeHandles[2]; cl::Context *Ctx = new cl::Context(CtxId); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPContextOpenCL)); + CHIPContextOpenCL *ChipContext = CHIP_HANDLE_TO_OBJ(mem, CHIPContextOpenCL); + ChipContext = new (ChipContext) CHIPContextOpenCL(Ctx); addContext(ChipContext); cl::Device *Dev = new cl::Device(DevId); diff --git a/src/common.hh b/src/common.hh index acb85a882..8cdff5252 100644 --- a/src/common.hh +++ b/src/common.hh @@ -84,10 +84,14 @@ struct ihipDispatch { #define HIPMODULESYMBOL(x) CHIPTOHIP(x, ihipModuleSymbol_t) +#define CONTEXT(x) HIPTOCHIP(x, chipstar::Context) + +#define HIPCONTEXT(x) CHIPTOHIP(x, ihipCtx_t) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; -struct ihipCtx_t {}; +struct ihipCtx_t : ihipDispatch {}; struct ihipStream_t : ihipDispatch {}; struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t : ihipDispatch {}; diff --git a/src/hipCtx.hh b/src/hipCtx.hh index 076e62b3b..dbc355f38 100644 --- a/src/hipCtx.hh +++ b/src/hipCtx.hh @@ -48,7 +48,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { auto ChipCtx = Backend->getDevices()[device]->getContext(); ChipCtx->retain(); - *ctx = ChipCtx; + *ctx = HIPCONTEXT(ChipCtx); RETURN(hipSuccess); CHIP_CATCH @@ -57,7 +57,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { hipError_t hipCtxDestroy(hipCtx_t ctx) { CHIP_TRY CHIPInitialize(); - auto ChipCtx = static_cast(ctx); + auto ChipCtx = CONTEXT(ctx); if (ChipCtx == nullptr) { RETURN(hipErrorInvalidValue); } @@ -80,7 +80,7 @@ hipError_t hipCtxPopCurrent(hipCtx_t *ctx) { if (ChipCtxStack.empty()) { *ctx = nullptr; } else { - *ctx = ChipCtxStack.top(); + *ctx = HIPCONTEXT(ChipCtxStack.top()); ChipCtxStack.pop(); } @@ -92,7 +92,7 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { CHIP_TRY CHIPInitialize(); - auto ChipCtx = static_cast(ctx); + auto ChipCtx = CONTEXT(ctx); if (!ChipCtx) { if (!ChipCtxStack.empty()) { ChipCtxStack.pop(); @@ -108,7 +108,7 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { hipError_t hipCtxSetCurrent(hipCtx_t ctx) { CHIP_TRY CHIPInitialize(); - Backend->setActiveContext(static_cast(ctx)); + Backend->setActiveContext(CONTEXT(ctx)); RETURN(hipSuccess); CHIP_CATCH } @@ -116,7 +116,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { hipError_t hipCtxGetCurrent(hipCtx_t *ctx) { CHIP_TRY CHIPInitialize(); - *ctx = Backend->getActiveContext(); + *ctx = HIPCONTEXT(Backend->getActiveContext()); RETURN(hipSuccess); CHIP_CATCH } @@ -221,7 +221,7 @@ hipError_t hipDevicePrimaryCtxRetain(hipCtx_t *Context, hipDevice_t Device) { NULLCHECK(Context); ERROR_CHECK_DEVNUM(Device); - *Context = Backend->getDevices()[Device]->getContext(); + *Context = HIPCONTEXT(Backend->getDevices()[Device]->getContext()); RETURN(hipSuccess); CHIP_CATCH From ba85f0805cce69179ef9d67294143e5986f456aa Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 14 Jul 2023 23:05:59 +0000 Subject: [PATCH 09/16] Made graph dispatchable. --- src/CHIPBackend.cc | 10 ++++++++-- src/CHIPBackend.hh | 2 +- src/CHIPBindings.cc | 25 ++++++++++++++++--------- src/CHIPGraph.hh | 2 +- src/common.hh | 6 +++++- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 2bc1615e0..f2f083227 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -542,7 +542,8 @@ chipstar::Queue *chipstar::Device::getPerThreadDefaultQueue() { return getPerThreadDefaultQueueNoLock(); } -void chipstar::Device::QueueDeleter::operator()(chipstar::Queue *q) const noexcept { +void chipstar::Device::QueueDeleter::operator()( + chipstar::Queue *q) const noexcept { if (q) { q->~Queue(); free(CHIP_OBJ_TO_HANDLE(q, ihipStream_t)); @@ -1701,7 +1702,12 @@ void chipstar::Queue::updateLastNode(CHIPGraphNode *NewNode) { LastNode_ = NewNode; } -void chipstar::Queue::initCaptureGraph() { CaptureGraph_ = new CHIPGraph(); } +void chipstar::Queue::initCaptureGraph() { + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraph)); + CHIPGraph *G = CHIP_HANDLE_TO_OBJ(mem, CHIPGraph); + G = new (G) CHIPGraph(); + CaptureGraph_ = G; +} std::shared_ptr chipstar::Queue::RegisteredVarCopy(chipstar::ExecItem *ExecItem, diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 5fbd4e125..311830064 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -2041,7 +2041,7 @@ class Queue { protected: hipStreamCaptureStatus CaptureStatus_ = hipStreamCaptureStatusNone; hipStreamCaptureMode CaptureMode_ = hipStreamCaptureModeGlobal; - hipGraph_t CaptureGraph_; + CHIPGraph *CaptureGraph_; std::mutex LastEventMtx; /// @brief node for creating a dependency chain between subsequent record /// events when in graph capture mode diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index af02ac59e..f80a06259 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -57,8 +57,6 @@ #define SVM_ALIGNMENT 128 // TODO Pass as CMAKE Define? -#define GRAPH(x) static_cast(x) - #define NODE(x) static_cast(x) #define EXEC(x) static_cast(x) @@ -281,8 +279,10 @@ static void handleAbortRequest(chipstar::Queue &Q, chipstar::Module &M) { hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { CHIP_TRY CHIPInitialize(); - CHIPGraph *Graph = new CHIPGraph(); - *pGraph = Graph; + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraph)); + CHIPGraph *Graph = CHIP_HANDLE_TO_OBJ(mem, CHIPGraph); + Graph = new (Graph) CHIPGraph(); + *pGraph = HIPGRAPH(Graph); RETURN(hipSuccess); CHIP_CATCH } @@ -290,7 +290,10 @@ hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { hipError_t hipGraphDestroy(hipGraph_t graph) { CHIP_TRY CHIPInitialize(); - delete graph; + NULLCHECK(graph); + CHIPGraph *G = GRAPH(graph); + G->~CHIPGraph(); + free(graph); RETURN(hipSuccess); CHIP_CATCH } @@ -456,8 +459,10 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { hipError_t hipGraphClone(hipGraph_t *pGraphClone, hipGraph_t originalGraph) { CHIP_TRY CHIPInitialize(); - CHIPGraph *CloneGraph = new CHIPGraph(*GRAPH(originalGraph)); - *pGraphClone = CloneGraph; + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraph)); + CHIPGraph *CloneGraph = CHIP_HANDLE_TO_OBJ(mem, CHIPGraph); + CloneGraph = new (CloneGraph) CHIPGraph(*GRAPH(originalGraph)); + *pGraphClone = HIPGRAPH(CloneGraph); RETURN(hipSuccess); CHIP_CATCH } @@ -1044,7 +1049,8 @@ hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY CHIPInitialize(); - *pGraph = static_cast(node)->getGraph(); + CHIPGraph *G = static_cast(node)->getGraph(); + *pGraph = HIPGRAPH(G); RETURN(hipSuccess); CHIP_CATCH } @@ -1233,7 +1239,8 @@ hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t *pGraph) { } ChipQueue->setCaptureStatus( hipStreamCaptureStatus::hipStreamCaptureStatusNone); - *pGraph = ChipQueue->getCaptureGraph(); + CHIPGraph *G = ChipQueue->getCaptureGraph(); + *pGraph = HIPGRAPH(G); RETURN(hipSuccess); CHIP_CATCH } diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 38fdf0cbc..279acf5af 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -575,7 +575,7 @@ public: } }; -class CHIPGraph : public ihipGraph { +class CHIPGraph { protected: std::vector Nodes_; // Map the pointers Original -> Clone diff --git a/src/common.hh b/src/common.hh index 8cdff5252..4781602dd 100644 --- a/src/common.hh +++ b/src/common.hh @@ -88,6 +88,10 @@ struct ihipDispatch { #define HIPCONTEXT(x) CHIPTOHIP(x, ihipCtx_t) +#define GRAPH(x) HIPTOCHIP(x, CHIPGraph) + +#define HIPGRAPH(x) CHIPTOHIP(x, ihipGraph) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; @@ -95,7 +99,7 @@ struct ihipCtx_t : ihipDispatch {}; struct ihipStream_t : ihipDispatch {}; struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t : ihipDispatch {}; -struct ihipGraph {}; +struct ihipGraph : ihipDispatch {}; struct hipGraphNode {}; struct hipGraphExec {}; From c34ce40dff8e014588439e2e64e4cb65948c0eab Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 17 Jul 2023 19:17:10 +0000 Subject: [PATCH 10/16] Made graph exec dispatchable. --- src/CHIPBindings.cc | 35 +++++++++++++++++++---------------- src/CHIPGraph.hh | 2 +- src/common.hh | 6 +++++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index f80a06259..7d1fc0e07 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -59,8 +59,6 @@ #define NODE(x) static_cast(x) -#define EXEC(x) static_cast(x) - #define NODES(x) reinterpret_cast(x) #define DECONST_NODE(x) \ @@ -483,8 +481,10 @@ hipError_t hipGraphInstantiate(hipGraphExec_t *pGraphExec, hipGraph_t graph, size_t bufferSize) { CHIP_TRY CHIPInitialize(); - CHIPGraphExec *GraphExec = new CHIPGraphExec(GRAPH(graph)); - *pGraphExec = GraphExec; + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphExec)); + CHIPGraphExec *GraphExec = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphExec); + GraphExec = new (GraphExec) CHIPGraphExec(GRAPH(graph)); + *pGraphExec = HIPGRAPHEXEC(GraphExec); RETURN(hipSuccess); CHIP_CATCH @@ -505,7 +505,7 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIPInitialize(); auto ChipQueue = QUEUE(stream); ChipQueue = Backend->findQueue(ChipQueue); - EXEC(graphExec)->launch(ChipQueue); + GRAPHEXEC(graphExec)->launch(ChipQueue); RETURN(hipSuccess); CHIP_CATCH } @@ -513,7 +513,10 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec) { CHIP_TRY CHIPInitialize(); - delete graphExec; + NULLCHECK(graphExec); + CHIPGraphExec *GE = GRAPHEXEC(graphExec); + GE->~CHIPGraphExec(); + free(graphExec); RETURN(hipSuccess); CHIP_CATCH } @@ -536,7 +539,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, 4. The dependent nodes of a pair differ, in which case hErrorNode_out is the node from hGraph. */ - auto ExecGraph = EXEC(hGraphExec)->getOriginalGraphPtr(); + auto ExecGraph = GRAPHEXEC(hGraphExec)->getOriginalGraphPtr(); // 1. if (ExecGraph->getNodes().size() != GRAPH(hGraph)->getNodes().size()) { *updateResult_out = hipGraphExecUpdateErrorTopologyChanged; @@ -669,7 +672,7 @@ hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, CHIP_TRY CHIPInitialize(); // Graph obtained from hipGraphExec_t is a clone of the original - CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); + CHIPGraph *Graph = GRAPHEXEC(hGraphExec)->getOriginalGraphPtr(); // KernelNode here is a handle to the original CHIPGraphNodeKernel *ExecKernelNode = static_cast( @@ -746,7 +749,7 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -799,7 +802,7 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -850,7 +853,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol( CHIP_TRY CHIPInitialize(); // Graph obtained from hipGraphExec_t is a clone of the original - CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); + CHIPGraph *Graph = GRAPHEXEC(hGraphExec)->getOriginalGraphPtr(); // KernelNode here is a handle to the original CHIPGraphNodeMemcpyFromSymbol *KernelNode = ((CHIPGraphNodeMemcpyFromSymbol *)node); @@ -901,7 +904,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -958,7 +961,7 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -1014,7 +1017,7 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -1125,7 +1128,7 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -1192,7 +1195,7 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 279acf5af..9e413c84d 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -645,7 +645,7 @@ public: } }; -class CHIPGraphExec : public hipGraphExec { +class CHIPGraphExec { protected: CHIPGraph *OriginalGraph_; CHIPGraph CompiledGraph_; diff --git a/src/common.hh b/src/common.hh index 4781602dd..d290b26d7 100644 --- a/src/common.hh +++ b/src/common.hh @@ -92,6 +92,10 @@ struct ihipDispatch { #define HIPGRAPH(x) CHIPTOHIP(x, ihipGraph) +#define GRAPHEXEC(x) HIPTOCHIP(x, CHIPGraphExec) + +#define HIPGRAPHEXEC(x) CHIPTOHIP(x, hipGraphExec) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; @@ -101,7 +105,7 @@ struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t : ihipDispatch {}; struct ihipGraph : ihipDispatch {}; struct hipGraphNode {}; -struct hipGraphExec {}; +struct hipGraphExec : ihipDispatch {}; bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst); bool parseSPIR(uint32_t *Stream, size_t NumWords, From 8b433e0e46fa5f84a5978d6e96fd881c185cb639 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 17 Jul 2023 20:39:13 +0000 Subject: [PATCH 11/16] Fix hipGraphAddDependencies and hipGraphRemoveDependencies semantics. --- src/CHIPBindings.cc | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 7d1fc0e07..982a979b1 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -301,11 +301,21 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, size_t numDependencies) { CHIP_TRY CHIPInitialize(); - CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); - if (!FoundNode) - RETURN(hipErrorInvalidValue); - - FoundNode->addDependencies(DECONST_NODES(from), numDependencies); + NULLCHECK(graph); + if (numDependencies) + NULLCHECK(from, to); + CHIPGraph *G = GRAPH(graph); + for (size_t i = 0; i < numDependencies; i++) { + CHIPGraphNode *FoundNode = G->findNode(NODE(to[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + FoundNode = G->findNode(NODE(from[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + G->findNode(NODE(to[i]))->addDependency(G->findNode(NODE(from[i]))); + } RETURN(hipSuccess); CHIP_CATCH } @@ -316,11 +326,21 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, size_t numDependencies) { CHIP_TRY CHIPInitialize(); - CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); - if (!FoundNode) - RETURN(hipErrorInvalidValue); - - FoundNode->removeDependencies(DECONST_NODES(from), numDependencies); + NULLCHECK(graph); + if (numDependencies) + NULLCHECK(from, to); + CHIPGraph *G = GRAPH(graph); + for (size_t i = 0; i < numDependencies; i++) { + CHIPGraphNode *FoundNode = G->findNode(NODE(to[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + FoundNode = G->findNode(NODE(from[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + G->findNode(NODE(to[i]))->removeDependency(G->findNode(NODE(from[i]))); + } RETURN(hipSuccess); CHIP_CATCH } From f5b6c6e2da8d2f2faec0bf61f8e7c1f362a1a2ec Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Tue, 18 Jul 2023 01:03:55 +0000 Subject: [PATCH 12/16] Made graph nodes dispatchable. --- src/CHIPBindings.cc | 360 +++++++++++++++++++++++++------------------- src/CHIPGraph.hh | 2 +- src/common.hh | 6 +- 3 files changed, 210 insertions(+), 158 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 982a979b1..68adcf570 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -306,15 +306,16 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, NULLCHECK(from, to); CHIPGraph *G = GRAPH(graph); for (size_t i = 0; i < numDependencies; i++) { - CHIPGraphNode *FoundNode = G->findNode(NODE(to[i])); + CHIPGraphNode *FoundNode = G->findNode(GRAPHNODE(to[i])); if (!FoundNode) RETURN(hipErrorInvalidValue); - FoundNode = G->findNode(NODE(from[i])); + FoundNode = G->findNode(GRAPHNODE(from[i])); if (!FoundNode) RETURN(hipErrorInvalidValue); } for (size_t i = 0; i < numDependencies; i++) { - G->findNode(NODE(to[i]))->addDependency(G->findNode(NODE(from[i]))); + G->findNode(GRAPHNODE(to[i])) + ->addDependency(G->findNode(GRAPHNODE(from[i]))); } RETURN(hipSuccess); CHIP_CATCH @@ -331,15 +332,16 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, NULLCHECK(from, to); CHIPGraph *G = GRAPH(graph); for (size_t i = 0; i < numDependencies; i++) { - CHIPGraphNode *FoundNode = G->findNode(NODE(to[i])); + CHIPGraphNode *FoundNode = G->findNode(GRAPHNODE(to[i])); if (!FoundNode) RETURN(hipErrorInvalidValue); - FoundNode = G->findNode(NODE(from[i])); + FoundNode = G->findNode(GRAPHNODE(from[i])); if (!FoundNode) RETURN(hipErrorInvalidValue); } for (size_t i = 0; i < numDependencies; i++) { - G->findNode(NODE(to[i]))->removeDependency(G->findNode(NODE(from[i]))); + G->findNode(GRAPHNODE(to[i])) + ->removeDependency(G->findNode(GRAPHNODE(from[i]))); } RETURN(hipSuccess); CHIP_CATCH @@ -359,8 +361,8 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, auto Edge = Edges[i]; auto FromNode = Edge.first; auto ToNode = Edge.second; - from[i] = FromNode; - to[i] = ToNode; + from[i] = HIPGRAPHNODE(FromNode); + to[i] = HIPGRAPHNODE(ToNode); } RETURN(hipSuccess); CHIP_CATCH @@ -371,8 +373,11 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, CHIP_TRY CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); - *nodes = *(Nodes.data()); *numNodes = GRAPH(graph)->getNodes().size(); + if (nodes) { + for (size_t i = 0; i < *numNodes; i++) + nodes[i] = HIPGRAPHNODE(Nodes[i]); + } RETURN(hipSuccess); CHIP_CATCH } @@ -382,8 +387,12 @@ hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, CHIP_TRY CHIPInitialize(); auto Nodes = GRAPH(graph)->getRootNodes(); - *pRootNodes = *(Nodes.data()); *pNumRootNodes = GRAPH(graph)->getNodes().size(); + if (pRootNodes) { + for (size_t i = 0; i < *pNumRootNodes; i++) { + pRootNodes[i] = HIPGRAPHNODE(Nodes[i]); + } + } RETURN(hipSuccess); CHIP_CATCH } @@ -393,12 +402,12 @@ hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, size_t *pNumDependencies) { CHIP_TRY CHIPInitialize(); - auto Deps = NODE(node)->getDependencies(); + auto Deps = GRAPHNODE(node)->getDependencies(); *pNumDependencies = Deps.size(); if (!pDependencies) RETURN(hipSuccess); for (int i = 0; i < Deps.size(); i++) { - pDependencies[i] = Deps[i]; + pDependencies[i] = HIPGRAPHNODE(Deps[i]); } RETURN(hipSuccess); CHIP_CATCH @@ -409,12 +418,12 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, size_t *pNumDependentNodes) { CHIP_TRY CHIPInitialize(); - auto Deps = NODE(node)->getDependants(); + auto Deps = GRAPHNODE(node)->getDependants(); *pNumDependentNodes = Deps.size(); if (!pDependentNodes) RETURN(hipSuccess); for (int i = 0; i < Deps.size(); i++) { - pDependentNodes[i] = Deps[i]; + pDependentNodes[i] = HIPGRAPHNODE(Deps[i]); } RETURN(hipSuccess); CHIP_CATCH @@ -423,7 +432,7 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { CHIP_TRY CHIPInitialize(); - *pType = NODE(node)->getType(); + *pType = GRAPHNODE(node)->getType(); RETURN(hipSuccess); CHIP_CATCH } @@ -431,45 +440,50 @@ hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { hipError_t hipGraphDestroyNode(hipGraphNode_t node) { CHIP_TRY CHIPInitialize(); + NULLCHECK(node); /** * have to resort to these shenanigans to call the proper derived destructor */ - auto NodeType = NODE(node)->getType(); + CHIPGraphNode *N = GRAPHNODE(node); + auto NodeType = N->getType(); switch (NodeType) { case hipGraphNodeTypeKernel: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeKernel(); break; case hipGraphNodeTypeMemcpy: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeMemcpy(); break; case hipGraphNodeTypeMemset: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeMemset(); break; case hipGraphNodeTypeHost: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeHost(); break; case hipGraphNodeTypeGraph: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeGraph(); break; case hipGraphNodeTypeEmpty: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeEmpty(); break; case hipGraphNodeTypeWaitEvent: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeWaitEvent(); break; case hipGraphNodeTypeEventRecord: - delete static_cast(node); + static_cast(N)->~CHIPGraphNodeEventRecord(); break; case hipGraphNodeTypeMemcpyFromSymbol: - delete static_cast(node); + static_cast(N) + ->~CHIPGraphNodeMemcpyFromSymbol(); break; case hipGraphNodeTypeMemcpyToSymbol: - delete static_cast(node); + static_cast(N) + ->~CHIPGraphNodeMemcpyToSymbol(); break; default: CHIPERR_LOG_AND_THROW("Unknown graph node type", hipErrorTbd); break; } + free(node); RETURN(hipSuccess); CHIP_CATCH } @@ -490,8 +504,9 @@ hipError_t hipGraphNodeFindInClone(hipGraphNode_t *pNode, hipGraph_t clonedGraph) { CHIP_TRY CHIPInitialize(); - auto Node = GRAPH(clonedGraph)->getClonedNodeFromOriginal(NODE(originalNode)); - *pNode = Node; + auto Node = + GRAPH(clonedGraph)->getClonedNodeFromOriginal(GRAPHNODE(originalNode)); + *pNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -577,7 +592,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, auto NodeFound = ExecGraph->nodeLookup(Node); if (!NodeFound) { *updateResult_out = hipGraphExecUpdateErrorTopologyChanged; - *hErrorNode_out = Node; + *hErrorNode_out = HIPGRAPHNODE(Node); RETURN(hipErrorGraphExecUpdateFailure); } } @@ -632,7 +647,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, // 3. if (Node->getType() != NodeFound->getType()) { *updateResult_out = hipGraphExecUpdateErrorNodeTypeChanged; - *hErrorNode_out = Node; + *hErrorNode_out = HIPGRAPHNODE(Node); RETURN(hipErrorGraphExecUpdateFailure); } @@ -643,7 +658,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, auto NodeFoundCast = static_cast(NodeFound); if (NodeCast->getParams().func != NodeFoundCast->getParams().func) { *updateResult_out = hipGraphExecUpdateErrorFunctionChanged; - *hErrorNode_out = Node; + *hErrorNode_out = HIPGRAPHNODE(Node); RETURN(hipErrorGraphExecUpdateFailure); } } @@ -653,17 +668,46 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, CHIP_CATCH } +// graphs test seems wrong - normally we expect hipErrorInvalidHandle +// NULLCHECK(gr, pn, params); +#define CHECK_GRAPH_ADD_NODE_ARGS(pn, gr, deps, num, params) \ + do { \ + if (!gr || !pn || !params) \ + RETURN(hipErrorInvalidValue); \ + CHIPGraph *G = GRAPH(gr); \ + if (!deps && num > 0) \ + RETURN(hipErrorInvalidValue); \ + for (size_t i = 0; i < num; i++) { \ + NULLCHECK(deps[i]); \ + CHIPGraphNode *N = G->findNode(GRAPHNODE(deps[i])); \ + if (!N) \ + RETURN(hipErrorInvalidValue); \ + } \ + } while (0) + +// Error check is shaky here, assume no error (else we leak memory) +#define GRAPH_ADD_NODE_AND_DEPENDENCIES(g, N, deps, num) \ + do { \ + for (size_t i = 0; i < num; i++) { \ + N->addDependency(GRAPHNODE(deps[i])); \ + } \ + GRAPH(g)->addNode(N); \ + } while (0) + hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies, const hipKernelNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeKernel *Node = new CHIPGraphNodeKernel{pNodeParams}; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - *pGraphNode = Node; - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + pNodeParams); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeKernel)); + CHIPGraphNodeKernel *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeKernel); + Node = new (Node) CHIPGraphNodeKernel(pNodeParams); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "Kernel"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -672,7 +716,7 @@ hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - *pNodeParams = ((CHIPGraphNodeKernel *)node)->getParams(); + *pNodeParams = HIPTOCHIP(node, CHIPGraphNodeKernel)->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -681,7 +725,7 @@ hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - ((CHIPGraphNodeKernel *)node)->setParams(*pNodeParams); + HIPTOCHIP(node, CHIPGraphNodeKernel)->setParams(*pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -696,7 +740,7 @@ hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, // KernelNode here is a handle to the original CHIPGraphNodeKernel *ExecKernelNode = static_cast( - GRAPH(Graph)->getClonedNodeFromOriginal(NODE(node))); + Graph->getClonedNodeFromOriginal(GRAPHNODE(node))); assert(ExecKernelNode); ExecKernelNode->setParams(*pNodeParams); @@ -710,15 +754,8 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipMemcpy3DParms *pCopyParams) { CHIP_TRY CHIPInitialize(); - - // graphs test seems wrong - normally we expect hipErrorInvalidHandle - // NULLCHECK(graph, pGraphNode, pCopyParams); - if (!graph || !pGraphNode || !pCopyParams) - RETURN(hipErrorInvalidValue); - if (pDependencies == nullptr & numDependencies > 0) - CHIPERR_LOG_AND_THROW( - "numDependencies is not 0 while pDependencies is null", - hipErrorInvalidValue); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + pCopyParams); if (!pCopyParams->srcArray && !pCopyParams->srcPtr.ptr) CHIPERR_LOG_AND_THROW("all src are null", hipErrorInvalidValue); @@ -734,11 +771,12 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, "Passing different element size for hipMemcpy3DParms::srcArray and " "hipMemcpy3DParms::dstArray", hipErrorInvalidValue); - CHIPGraphNodeMemcpy *Node = new CHIPGraphNodeMemcpy(pCopyParams); - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - *pGraphNode = Node; - GRAPH(graph)->addNode(Node); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeMemcpy)); + CHIPGraphNodeMemcpy *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeMemcpy); + Node = new (Node) CHIPGraphNodeMemcpy(pCopyParams); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "Memcpy"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -747,9 +785,7 @@ hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - hipMemcpy3DParms Params = - static_cast(node)->getParams(); - pNodeParams = &Params; + *pNodeParams = HIPTOCHIP(node, CHIPGraphNodeMemcpy)->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -758,7 +794,7 @@ hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + HIPTOCHIP(node, CHIPGraphNodeMemcpy)->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -769,14 +805,14 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(GRAPHNODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); + auto CastNode = static_cast(ExecNode); if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", + CHIPERR_LOG_AND_THROW("Node found failed to cast to CHIPGraphNodeMemcpy", hipErrorInvalidValue); CastNode->setParams(const_cast(pNodeParams)); @@ -791,11 +827,14 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemcpy *Node = new CHIPGraphNodeMemcpy(dst, src, count, kind); - *pGraphNode = Node; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + 1); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeMemcpy)); + CHIPGraphNodeMemcpy *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeMemcpy); + Node = new (Node) CHIPGraphNodeMemcpy(dst, src, count, kind); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "Memcpy1D"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -805,12 +844,7 @@ hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); - - CastNode->setParams(dst, src, count, kind); + HIPTOCHIP(node, CHIPGraphNodeMemcpy)->setParams(dst, src, count, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -822,14 +856,14 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(GRAPHNODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); + auto CastNode = static_cast(ExecNode); if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", + CHIPERR_LOG_AND_THROW("Node found failed to cast to CHIPGraphNodeMemcpy", hipErrorInvalidValue); CastNode->setParams(dst, src, count, kind); @@ -845,12 +879,17 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t *pGraphNode, size_t offset, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + 1); + void *mem = + malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeMemcpyFromSymbol)); CHIPGraphNodeMemcpyFromSymbol *Node = - new CHIPGraphNodeMemcpyFromSymbol(dst, symbol, count, offset, kind); - *pGraphNode = Node; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); + CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeMemcpyFromSymbol); + Node = new (Node) + CHIPGraphNodeMemcpyFromSymbol(dst, symbol, count, offset, kind); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "MemcpyNodeFromSymbol"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -861,8 +900,8 @@ hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams( - dst, symbol, count, offset, kind); + HIPTOCHIP(node, CHIPGraphNodeMemcpyFromSymbol) + ->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -875,11 +914,9 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol( // Graph obtained from hipGraphExec_t is a clone of the original CHIPGraph *Graph = GRAPHEXEC(hGraphExec)->getOriginalGraphPtr(); // KernelNode here is a handle to the original - CHIPGraphNodeMemcpyFromSymbol *KernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)node); CHIPGraphNodeMemcpyFromSymbol *ExecKernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)GRAPH(Graph)->getClonedNodeFromOriginal( - KernelNode)); + static_cast( + GRAPH(Graph)->getClonedNodeFromOriginal(GRAPHNODE(node))); ExecKernelNode->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); @@ -895,12 +932,17 @@ hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t *pGraphNode, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemcpyToSymbol *Node = new CHIPGraphNodeMemcpyToSymbol( - const_cast(src), symbol, count, offset, kind); - *pGraphNode = Node; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + 1); + void *mem = + malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeMemcpyToSymbol)); + CHIPGraphNodeMemcpyToSymbol *Node = + CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeMemcpyToSymbol); + Node = new (Node) CHIPGraphNodeMemcpyToSymbol(const_cast(src), symbol, + count, offset, kind); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "MemcpyNodeToSymbol"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -912,8 +954,8 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams( - const_cast(src), symbol, count, offset, kind); + HIPTOCHIP(node, CHIPGraphNodeMemcpyToSymbol) + ->setParams(const_cast(src), symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -924,12 +966,12 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( CHIP_TRY CHIPInitialize(); auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(GRAPHNODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); + auto CastNode = static_cast(ExecNode); if (!CastNode) CHIPERR_LOG_AND_THROW( "Node provided failed to cast to CHIPGraphNodeMemcpyToSymbol", @@ -946,11 +988,14 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipMemsetParams *pMemsetParams) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemset *Node = new CHIPGraphNodeMemset(pMemsetParams); - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); - *pGraphNode = Node; + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + pMemsetParams); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeMemset)); + CHIPGraphNodeMemset *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeMemset); + Node = new (Node) CHIPGraphNodeMemset(pMemsetParams); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "Memset"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -959,9 +1004,7 @@ hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - hipMemsetParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + *pNodeParams = HIPTOCHIP(node, CHIPGraphNodeMemset)->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -970,7 +1013,7 @@ hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + HIPTOCHIP(node, CHIPGraphNodeMemset)->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -981,12 +1024,12 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(GRAPHNODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); + auto CastNode = static_cast(ExecNode); if (!CastNode) CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemset", hipErrorInvalidValue); @@ -1002,11 +1045,14 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipHostNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeHost *Node = new CHIPGraphNodeHost(pNodeParams); - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); - *pGraphNode = Node; + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + pNodeParams); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeHost)); + CHIPGraphNodeHost *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeHost); + Node = new (Node) CHIPGraphNodeHost(pNodeParams); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "Host"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -1015,9 +1061,7 @@ hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - hipHostNodeParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + *pNodeParams = HIPTOCHIP(node, CHIPGraphNodeHost)->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -1026,7 +1070,7 @@ hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + HIPTOCHIP(node, CHIPGraphNodeHost)->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -1037,7 +1081,7 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, CHIP_TRY CHIPInitialize(); auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(GRAPHNODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); @@ -1059,11 +1103,14 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, hipGraph_t childGraph) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeGraph *Node = new CHIPGraphNodeGraph(GRAPH(childGraph)); - *pGraphNode = Node; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + childGraph); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeGraph)); + CHIPGraphNodeGraph *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeGraph); + Node = new (Node) CHIPGraphNodeGraph(GRAPH(childGraph)); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "ChildGraph"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -1072,7 +1119,7 @@ hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY CHIPInitialize(); - CHIPGraph *G = static_cast(node)->getGraph(); + CHIPGraph *G = HIPTOCHIP(node, CHIPGraphNodeGraph)->getGraph(); *pGraph = HIPGRAPH(G); RETURN(hipSuccess); CHIP_CATCH @@ -1083,7 +1130,10 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraph_t childGraph) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setGraph(GRAPH(childGraph)); + CHIPGraph *Graph = GRAPHEXEC(hGraphExec)->getOriginalGraphPtr(); + CHIPGraphNodeGraph *ExecKernelNode = static_cast( + Graph->getClonedNodeFromOriginal(GRAPHNODE(node))); + ExecKernelNode->setGraph(GRAPH(childGraph)); RETURN(hipSuccess); CHIP_CATCH } @@ -1093,10 +1143,14 @@ hipError_t hipGraphAddEmptyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeEmpty *Node = new CHIPGraphNodeEmpty(); - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - *pGraphNode = Node; - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + 1); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeEmpty)); + CHIPGraphNodeEmpty *Node = CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeEmpty); + Node = new (Node) CHIPGraphNodeEmpty(); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); + Node->Msg += "EmptyEvent"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -1108,10 +1162,15 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeEventRecord *Node = new CHIPGraphNodeEventRecord(EVENT(event)); - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - *pGraphNode = Node; - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + event); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeEventRecord)); + CHIPGraphNodeEventRecord *Node = + CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeEventRecord); + Node = new (Node) CHIPGraphNodeEventRecord(EVENT(event)); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); + Node->Msg += "RecordEvent"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -1120,11 +1179,9 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); - *event_out = HIPEVENT(CastNode->getEvent()); + chipstar::Event *Event = + HIPTOCHIP(node, CHIPGraphNodeEventRecord)->getEvent(); + *event_out = HIPEVENT(Event); RETURN(hipSuccess); CHIP_CATCH } @@ -1133,11 +1190,7 @@ hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); - CastNode->setEvent(EVENT(event)); + HIPTOCHIP(node, CHIPGraphNodeEventRecord)->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -1147,13 +1200,14 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); + auto ExecNode = GRAPHEXEC(hGraphExec) + ->getOriginalGraphPtr() + ->nodeLookup(GRAPHNODE(hNode)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(hNode); + auto CastNode = static_cast(ExecNode); if (!CastNode) CHIPERR_LOG_AND_THROW( "Node provided failed to cast to CHIPGraphNodeEventRecord", @@ -1170,11 +1224,15 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeWaitEvent *Node = new CHIPGraphNodeWaitEvent(EVENT(event)); - *pGraphNode = Node; - Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); - GRAPH(graph)->addNode(Node); + CHECK_GRAPH_ADD_NODE_ARGS(pGraphNode, graph, pDependencies, numDependencies, + event); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraphNodeWaitEvent)); + CHIPGraphNodeWaitEvent *Node = + CHIP_HANDLE_TO_OBJ(mem, CHIPGraphNodeWaitEvent); + Node = new (Node) CHIPGraphNodeWaitEvent(EVENT(event)); + GRAPH_ADD_NODE_AND_DEPENDENCIES(graph, Node, pDependencies, numDependencies); Node->Msg += "WaitEvent"; + *pGraphNode = HIPGRAPHNODE(Node); RETURN(hipSuccess); CHIP_CATCH } @@ -1183,13 +1241,8 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); - - *event_out = HIPEVENT(CastNode->getEvent()); + chipstar::Event *Event = HIPTOCHIP(node, CHIPGraphNodeWaitEvent)->getEvent(); + *event_out = HIPEVENT(Event); RETURN(hipSuccess); CHIP_CATCH } @@ -1198,13 +1251,7 @@ hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); - - CastNode->setEvent(EVENT(event)); + HIPTOCHIP(node, CHIPGraphNodeWaitEvent)->setEvent(EVENT(event)); RETURN(hipSuccess); CHIP_CATCH } @@ -1214,8 +1261,9 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - auto ExecNode = - GRAPHEXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); + auto ExecNode = GRAPHEXEC(hGraphExec) + ->getOriginalGraphPtr() + ->nodeLookup(GRAPHNODE(hNode)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 9e413c84d..e0fe64172 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -47,7 +47,7 @@ class ExecItem; class CHIPGraph; -class CHIPGraphNode : public hipGraphNode { +class CHIPGraphNode { protected: hipGraphNodeType Type_; // nodes which depend on this node diff --git a/src/common.hh b/src/common.hh index d290b26d7..7f118a889 100644 --- a/src/common.hh +++ b/src/common.hh @@ -96,6 +96,10 @@ struct ihipDispatch { #define HIPGRAPHEXEC(x) CHIPTOHIP(x, hipGraphExec) +#define GRAPHNODE(x) HIPTOCHIP(x, CHIPGraphNode) + +#define HIPGRAPHNODE(x) CHIPTOHIP(x, hipGraphNode) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; @@ -104,7 +108,7 @@ struct ihipStream_t : ihipDispatch {}; struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t : ihipDispatch {}; struct ihipGraph : ihipDispatch {}; -struct hipGraphNode {}; +struct hipGraphNode : ihipDispatch {}; struct hipGraphExec : ihipDispatch {}; bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst); From ef6c28b3688f2e721df34d732bf3227547c9b8d9 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Wed, 19 Jul 2023 22:26:10 +0000 Subject: [PATCH 13/16] Made texture objects dispatchable. --- src/CHIPBindings.cc | 6 ++--- src/backend/Level0/CHIPBackendLevel0.cc | 35 ++++++++++++++----------- src/backend/Level0/CHIPBackendLevel0.hh | 3 ++- src/backend/OpenCL/CHIPBackendOpenCL.cc | 32 +++++++++++++--------- src/backend/OpenCL/CHIPBackendOpenCL.hh | 3 ++- src/common.hh | 5 ++++ 6 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 68adcf570..4d56befe8 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -4152,7 +4152,7 @@ hipCreateTextureObject(hipTextureObject_t *TexObject, chipstar::Texture *RetObj = Backend->getActiveDevice()->createTexture(ResDesc, TexDesc, ResViewDesc); if (RetObj != nullptr) { - *TexObject = reinterpret_cast(RetObj); + *TexObject = HIPTEXTUREOBJECT(RetObj); RETURN(hipSuccess); } else RETURN(hipErrorInvalidValue); @@ -4165,7 +4165,7 @@ hipError_t hipDestroyTextureObject(hipTextureObject_t TextureObject) { // TODO CRITCAL look into the define for hipTextureObject_t if (TextureObject == nullptr) RETURN(hipSuccess); - chipstar::Texture *ChipTexture = (chipstar::Texture *)TextureObject; + chipstar::Texture *ChipTexture = TEXTURE(TextureObject); Backend->getActiveDevice()->destroyTexture(ChipTexture); RETURN(hipSuccess); CHIP_CATCH @@ -4177,7 +4177,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc *ResDesc, CHIPInitialize(); if (TextureObject == nullptr) RETURN(hipErrorInvalidValue); - chipstar::Texture *ChipTexture = (chipstar::Texture *)TextureObject; + chipstar::Texture *ChipTexture = TEXTURE(TextureObject); *ResDesc = ChipTexture->getResourceDesc(); RETURN(hipSuccess); CHIP_CATCH diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 1117dc5f9..556306b3d 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -2075,15 +2075,16 @@ chipstar::Texture *CHIPDeviceLevel0::createTexture( allocateImage(Array->textureType, Array->desc, NormalizedFloat, Width, Height, Depth)); - auto Tex = std::make_unique(*PResDesc, ImageHandle, - SamplerHandle); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureLevel0)); + CHIPTextureLevel0 *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureLevel0); + Tex = new (Tex) CHIPTextureLevel0(*PResDesc, ImageHandle, SamplerHandle); + logTrace("Created texture: {}", (void *)Tex); chipstar::RegionDesc SrcRegion = chipstar::RegionDesc::from(*Array); Q->memCopyToImage(ImageHandle, Array->data, SrcRegion); Q->finish(); // Finish for safety. - return Tex.release(); + return Tex; } if (PResDesc->resType == hipResourceTypeLinear) { @@ -2094,16 +2095,17 @@ chipstar::Texture *CHIPDeviceLevel0::createTexture( ze_image_handle_t ImageHandle = reinterpret_cast( allocateImage(hipTextureType1D, Res.desc, NormalizedFloat, Width)); - auto Tex = std::make_unique(*PResDesc, ImageHandle, - SamplerHandle); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureLevel0)); + CHIPTextureLevel0 *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureLevel0); + Tex = new (Tex) CHIPTextureLevel0(*PResDesc, ImageHandle, SamplerHandle); + logTrace("Created texture: {}", (void *)Tex); // Copy data to image. auto SrcDesc = chipstar::RegionDesc::get1DRegion(Width, TexelByteSize); Q->memCopyToImage(ImageHandle, Res.devPtr, SrcDesc); Q->finish(); // Finish for safety. - return Tex.release(); + return Tex; } if (PResDesc->resType == hipResourceTypePitch2D) { @@ -2115,16 +2117,17 @@ chipstar::Texture *CHIPDeviceLevel0::createTexture( allocateImage(hipTextureType2D, Res.desc, NormalizedFloat, Res.width, Res.height)); - auto Tex = std::make_unique(*PResDesc, ImageHandle, - SamplerHandle); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureLevel0)); + CHIPTextureLevel0 *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureLevel0); + Tex = new (Tex) CHIPTextureLevel0(*PResDesc, ImageHandle, SamplerHandle); + logTrace("Created texture: {}", (void *)Tex); // Copy data to image. auto SrcDesc = chipstar::RegionDesc::from(*PResDesc); Q->memCopyToImage(ImageHandle, Res.devPtr, SrcDesc); Q->finish(); // Finish for safety. - return Tex.release(); + return Tex; } CHIPASSERT(false && "Unsupported/unimplemented texture resource type."); @@ -2344,8 +2347,8 @@ void CHIPExecItemLevel0::setupAllArgs() { hipErrorTbd); case SPVTypeKind::Image: { - auto *TexObj = - *reinterpret_cast(Arg.Data); + hipTextureObject_t HIPTexObj = *(hipTextureObject_t *)(Arg.Data); + CHIPTextureLevel0 *TexObj = HIPTOCHIP(HIPTexObj, CHIPTextureLevel0); ze_image_handle_t ImageHandle = TexObj->getImage(); logTrace("setImageArg {} size {}\n", Arg.Index, sizeof(ze_image_handle_t)); @@ -2354,8 +2357,8 @@ void CHIPExecItemLevel0::setupAllArgs() { break; } case SPVTypeKind::Sampler: { - auto *TexObj = - *reinterpret_cast(Arg.Data); + hipTextureObject_t HIPTexObj = *(hipTextureObject_t *)(Arg.Data); + CHIPTextureLevel0 *TexObj = HIPTOCHIP(HIPTexObj, CHIPTextureLevel0); ze_sampler_handle_t SamplerHandle = TexObj->getSampler(); logTrace("setSamplerArg {} size {}\n", Arg.Index, sizeof(ze_sampler_handle_t)); diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 1519bea22..2866a407b 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -529,7 +529,8 @@ public: virtual void destroyTexture(chipstar::Texture *TextureObject) override { logTrace("CHIPDeviceLevel0::destroyTexture"); - delete TextureObject; + TextureObject->~Texture(); + free(CHIP_OBJ_TO_HANDLE(TextureObject, __hip_texture)); } CHIPModuleLevel0 *compile(const SPVModule &Src) override; diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 9e0ce3651..b193e4a32 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -316,13 +316,15 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, cl_mem Image = createImage(CLCtx, Array->textureType, Array->desc, NormalizedFloat, Width, Height, Depth); - auto Tex = std::make_unique(*ResDesc, Image, Sampler); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureOpenCL)); + CHIPTextureOpenCL *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureOpenCL); + Tex = new (Tex) CHIPTextureOpenCL(*ResDesc, Image, Sampler); + logTrace("Created texture: {}", (void *)Tex); chipstar::RegionDesc SrcRegion = chipstar::RegionDesc::from(*Array); memCopyToImage(Q->get()->get(), Image, Array->data, SrcRegion); - return Tex.release(); + return Tex; } if (ResDesc->resType == hipResourceTypeLinear) { @@ -333,14 +335,16 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, cl_mem Image = createImage(CLCtx, hipTextureType1D, Res.desc, NormalizedFloat, Width); - auto Tex = std::make_unique(*ResDesc, Image, Sampler); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureOpenCL)); + CHIPTextureOpenCL *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureOpenCL); + Tex = new (Tex) CHIPTextureOpenCL(*ResDesc, Image, Sampler); + logTrace("Created texture: {}", (void *)Tex); // Copy data to image. auto SrcDesc = chipstar::RegionDesc::get1DRegion(Width, TexelByteSize); memCopyToImage(Q->get()->get(), Image, Res.devPtr, SrcDesc); - return Tex.release(); + return Tex; } if (ResDesc->resType == hipResourceTypePitch2D) { @@ -350,14 +354,16 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, cl_mem Image = createImage(CLCtx, hipTextureType2D, Res.desc, NormalizedFloat, Res.width, Res.height); - auto Tex = std::make_unique(*ResDesc, Image, Sampler); - logTrace("Created texture: {}", (void *)Tex.get()); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPTextureOpenCL)); + CHIPTextureOpenCL *Tex = CHIP_HANDLE_TO_OBJ(mem, CHIPTextureOpenCL); + Tex = new (Tex) CHIPTextureOpenCL(*ResDesc, Image, Sampler); + logTrace("Created texture: {}", (void *)Tex); // Copy data to image. auto SrcDesc = chipstar::RegionDesc::from(*ResDesc); memCopyToImage(Q->get()->get(), Image, Res.devPtr, SrcDesc); - return Tex.release(); + return Tex; } CHIPASSERT(false && "Unsupported/unimplemented texture resource type."); @@ -1315,8 +1321,8 @@ void CHIPExecItemOpenCL::setupAllArgs() { CHIPERR_LOG_AND_THROW("Internal CHIP-SPV error: Unknown argument kind", hipErrorTbd); case SPVTypeKind::Image: { - auto *TexObj = - *reinterpret_cast(Arg.Data); + hipTextureObject_t HIPTexObj = *(hipTextureObject_t *)(Arg.Data); + CHIPTextureOpenCL *TexObj = HIPTOCHIP(HIPTexObj, CHIPTextureOpenCL); cl_mem Image = TexObj->getImage(); logTrace("set image arg {} for tex {}\n", Arg.Index, (void *)TexObj); Err = ::clSetKernelArg(Kernel->get()->get(), Arg.Index, sizeof(cl_mem), @@ -1326,8 +1332,8 @@ void CHIPExecItemOpenCL::setupAllArgs() { break; } case SPVTypeKind::Sampler: { - auto *TexObj = - *reinterpret_cast(Arg.Data); + hipTextureObject_t HIPTexObj = *(hipTextureObject_t *)(Arg.Data); + CHIPTextureOpenCL *TexObj = HIPTOCHIP(HIPTexObj, CHIPTextureOpenCL); cl_sampler Sampler = TexObj->getSampler(); logTrace("set sampler arg {} for tex {}\n", Arg.Index, (void *)TexObj); Err = ::clSetKernelArg(Kernel->get()->get(), Arg.Index, diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index c43afd53b..5efb792d9 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -199,7 +199,8 @@ public: const struct hipResourceViewDesc *ResViewDesc) override; virtual void destroyTexture(chipstar::Texture *ChipTexture) override { logTrace("CHIPDeviceOpenCL::destroyTexture"); - delete ChipTexture; + ChipTexture->~Texture(); + free(CHIP_OBJ_TO_HANDLE(ChipTexture, __hip_texture)); } CHIPModuleOpenCL *compile(const SPVModule &SrcMod) override { diff --git a/src/common.hh b/src/common.hh index 7f118a889..a2ffb9cc5 100644 --- a/src/common.hh +++ b/src/common.hh @@ -100,6 +100,10 @@ struct ihipDispatch { #define HIPGRAPHNODE(x) CHIPTOHIP(x, hipGraphNode) +#define TEXTURE(x) HIPTOCHIP(x, chipstar::Texture) + +#define HIPTEXTUREOBJECT(x) CHIPTOHIP(x, __hip_texture) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; @@ -110,6 +114,7 @@ struct ihipModuleSymbol_t : ihipDispatch {}; struct ihipGraph : ihipDispatch {}; struct hipGraphNode : ihipDispatch {}; struct hipGraphExec : ihipDispatch {}; +struct __hip_texture : ihipDispatch {}; bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst); bool parseSPIR(uint32_t *Stream, size_t NumWords, From 16dd5fa1cf631d042f1cf31fc07390e054add204 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Thu, 20 Jul 2023 00:04:15 +0000 Subject: [PATCH 14/16] Made hiprtc programs dispachable. --- src/common.hh | 5 +++++ src/spirv_hiprtc.cc | 25 ++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/common.hh b/src/common.hh index a2ffb9cc5..f81b2610e 100644 --- a/src/common.hh +++ b/src/common.hh @@ -104,6 +104,10 @@ struct ihipDispatch { #define HIPTEXTUREOBJECT(x) CHIPTOHIP(x, __hip_texture) +#define PROGRAM(x) HIPTOCHIP(x, chipstar::Program) + +#define HIPRTCPROGRAM(x) CHIPTOHIP(x, _hiprtcProgram) + /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; @@ -115,6 +119,7 @@ struct ihipGraph : ihipDispatch {}; struct hipGraphNode : ihipDispatch {}; struct hipGraphExec : ihipDispatch {}; struct __hip_texture : ihipDispatch {}; +struct _hiprtcProgram : ihipDispatch {}; bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst); bool parseSPIR(uint32_t *Stream, size_t NumWords, diff --git a/src/spirv_hiprtc.cc b/src/spirv_hiprtc.cc index a2ad7a46e..81776a9b5 100644 --- a/src/spirv_hiprtc.cc +++ b/src/spirv_hiprtc.cc @@ -360,7 +360,7 @@ hiprtcResult hiprtcAddNameExpression(hiprtcProgram Prog, if (!Prog || !NameExpression) return HIPRTC_ERROR_INVALID_INPUT; - auto &Program = *(chipstar::Program *)Prog; + auto &Program = *PROGRAM(Prog); if (Program.isAfterCompilation()) return HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION; @@ -395,7 +395,7 @@ hiprtcResult hiprtcCompileProgram(hiprtcProgram Prog, int NumOptions, if (!Prog) return HIPRTC_ERROR_INVALID_INPUT; try { - auto &Program = *(chipstar::Program *)Prog; + auto &Program = *PROGRAM(Prog); // Create temporary directory for compilation I/O. auto TmpDir = createTemporaryDirectory(); @@ -436,8 +436,9 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram *Prog, const char *Src, try { // From NVRTC: 'CUDA program name. name can be NULL; // "default_program" is used when name is NULL or "". '. - auto Program = - std::make_unique(Name ? Name : "default_program"); + void *mem = malloc(sizeof(ihipDispatch) + sizeof(chipstar::Program)); + chipstar::Program *Program = CHIP_HANDLE_TO_OBJ(mem, chipstar::Program); + Program = new (Program) chipstar::Program(Name ? Name : "default_program"); Program->setSource(Src); for (int i = 0; i < NumHeaders; i++) { @@ -465,7 +466,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram *Prog, const char *Src, Program->addHeader(IncludeName, HeaderPtr); } - *Prog = (hiprtcProgram)Program.release(); + *Prog = HIPRTCPROGRAM(Program); return HIPRTC_SUCCESS; } catch (...) { logDebug("Caught an unknown exception\n"); @@ -477,7 +478,9 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram *Prog) { if (!Prog || !*Prog) return HIPRTC_ERROR_INVALID_PROGRAM; try { - delete (chipstar::Program *)*Prog; + chipstar::Program *Program = PROGRAM(*Prog); + Program->~Program(); + free(*Prog); *Prog = nullptr; } catch (...) { logDebug("Caught an unknown exception\n"); @@ -492,7 +495,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram WrappedProg, if (!WrappedProg || !NameExpression || !LoweredName) return HIPRTC_ERROR_INVALID_INPUT; - auto &Prog = *(chipstar::Program *)WrappedProg; + auto &Prog = *PROGRAM(WrappedProg); if (!Prog.isAfterCompilation()) return HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION; @@ -510,7 +513,7 @@ hiprtcResult hiprtcGetProgramLog(hiprtcProgram Prog, char *Log) { if (!Prog || !Log) return HIPRTC_ERROR_INVALID_INPUT; try { - const auto &LogSrc = ((chipstar::Program *)Prog)->getProgramLog(); + const auto &LogSrc = PROGRAM(Prog)->getProgramLog(); std::memcpy(Log, LogSrc.c_str(), LogSrc.size()); return HIPRTC_SUCCESS; } catch (...) { @@ -523,7 +526,7 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram Prog, size_t *LogSizeRet) { if (!Prog || !LogSizeRet) return HIPRTC_ERROR_INVALID_INPUT; try { - *LogSizeRet = ((chipstar::Program *)Prog)->getProgramLog().size(); + *LogSizeRet = PROGRAM(Prog)->getProgramLog().size(); return HIPRTC_SUCCESS; } catch (...) { logDebug("Caught an unknown exception\n"); @@ -537,7 +540,7 @@ hiprtcResult hiprtcGetCode(hiprtcProgram Prog, char *Code) { if (!Code) return HIPRTC_ERROR_INVALID_INPUT; try { - auto &SavedCode = ((chipstar::Program *)Prog)->getCode(); + auto &SavedCode = PROGRAM(Prog)->getCode(); std::memcpy(Code, SavedCode.c_str(), SavedCode.size()); return HIPRTC_SUCCESS; } catch (...) { @@ -552,7 +555,7 @@ hiprtcResult hiprtcGetCodeSize(hiprtcProgram Prog, size_t *CodeSizeRet) { if (!CodeSizeRet) return HIPRTC_ERROR_INVALID_INPUT; try { - *CodeSizeRet = ((chipstar::Program *)Prog)->getCode().size(); + *CodeSizeRet = PROGRAM(Prog)->getCode().size(); return HIPRTC_SUCCESS; } catch (...) { logDebug("Caught an unknown exception\n"); From a44162c5d844d07983ae990d0f3721c37fac9d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henry=20Linjam=C3=A4ki?= Date: Tue, 10 Oct 2023 13:29:20 +0300 Subject: [PATCH 15/16] Propose alternate dispatchable object approach ... without need of placement new and manual destructor invocation. New approach, applied on chipstar::Queue as demonstration, discouples the backend from the dispatchable object logic. --- src/CHIPBackend.cc | 10 +++-- src/CHIPBackend.hh | 2 +- src/CHIPBindings.cc | 2 +- src/backend/Level0/CHIPBackendLevel0.cc | 13 ++---- src/backend/OpenCL/CHIPBackendOpenCL.cc | 8 +--- src/common.hh | 53 +++++++++++++++++++------ 6 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index f2f083227..38d6d513c 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -502,13 +502,11 @@ chipstar::Device::~Device() { LOCK(DeviceMtx); // chipstar::Device::ChipQueues_ logDebug("~Device() {}", (void *)this); while (this->ChipQueues_.size() > 0) { - ChipQueues_[0]->~Queue(); - free(CHIP_OBJ_TO_HANDLE(ChipQueues_[0], ihipStream_t)); + delete ChipQueues_[0]; ChipQueues_.erase(ChipQueues_.begin()); } - LegacyDefaultQueue->~Queue(); - free(CHIP_OBJ_TO_HANDLE(LegacyDefaultQueue, ihipStream_t)); + delete LegacyDefaultQueue; LegacyDefaultQueue = nullptr; } chipstar::Queue *chipstar::Device::getLegacyDefaultQueue() { @@ -1903,3 +1901,7 @@ void chipstar::Queue::addCallback(hipStreamCallback_t Callback, CHIPGraph *chipstar::Queue::getCaptureGraph() const { return static_cast(CaptureGraph_); } + +hipStream_t STREAM(chipstar::Queue *Queue) noexcept { + return reinterpret_cast(Queue->asDispatchableObject()); +} diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 311830064..1a2540b6e 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -2037,7 +2037,7 @@ public: /** * @brief Queue class for submitting kernels to for execution */ -class Queue { +class Queue : public Dispatchable { protected: hipStreamCaptureStatus CaptureStatus_ = hipStreamCaptureStatusNone; hipStreamCaptureMode CaptureMode_ = hipStreamCaptureModeGlobal; diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 4d56befe8..fea77f321 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -2106,7 +2106,7 @@ hipStreamCreateWithPriorityInternal(hipStream_t *Stream, unsigned int Flags, auto ClampedPriority = std::min(MinPriority, std::max(MaxPriority, Priority)); chipstar::Queue *ChipQueue = Dev->createQueueAndRegister(FlagsParsed, ClampedPriority); - *Stream = CHIP_OBJ_TO_HANDLE(ChipQueue, ihipStream_t); + *Stream = STREAM(ChipQueue); return hipSuccess; } diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 556306b3d..c1a719c07 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -2006,10 +2006,7 @@ void CHIPDeviceLevel0::populateDevicePropertiesImpl() { chipstar::Queue *CHIPDeviceLevel0::createQueue(chipstar::QueueFlags Flags, int Priority) { - void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); - CHIPQueueLevel0 *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); - NewQ = new (NewQ) CHIPQueueLevel0(this, Flags, Priority); - return NewQ; + return new CHIPQueueLevel0(this, Flags, Priority); } chipstar::Queue *CHIPDeviceLevel0::createQueue(const uintptr_t *NativeHandles, @@ -2019,13 +2016,9 @@ chipstar::Queue *CHIPDeviceLevel0::createQueue(const uintptr_t *NativeHandles, if (!CmdQ) { logWarn("initializeFromNative: native queue pointer is null. Creating a " "new queue"); - void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); - NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); - NewQ = new (NewQ) CHIPQueueLevel0(this, 0, 0); + NewQ = new CHIPQueueLevel0(this, 0, 0); } else { - void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueLevel0)); - NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueLevel0); - NewQ = new (NewQ) CHIPQueueLevel0(this, CmdQ); + NewQ = new CHIPQueueLevel0(this, CmdQ); // In this case CHIP does not own the queue hence setting right ownership if (NewQ != nullptr) { NewQ->setCmdQueueOwnership(false); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index b193e4a32..e8ebddc8e 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -769,9 +769,7 @@ void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) { chipstar::Queue *CHIPDeviceOpenCL::createQueue(chipstar::QueueFlags Flags, int Priority) { - void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); - CHIPQueueOpenCL *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); - NewQ = new (NewQ) CHIPQueueOpenCL(this, Priority); + auto *NewQ = new CHIPQueueOpenCL(this, Priority); NewQ->setFlags(Flags); return NewQ; } @@ -779,9 +777,7 @@ chipstar::Queue *CHIPDeviceOpenCL::createQueue(chipstar::QueueFlags Flags, chipstar::Queue *CHIPDeviceOpenCL::createQueue(const uintptr_t *NativeHandles, int NumHandles) { cl_command_queue CmdQ = (cl_command_queue)NativeHandles[3]; - void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPQueueOpenCL)); - CHIPQueueOpenCL *NewQ = CHIP_HANDLE_TO_OBJ(mem, CHIPQueueOpenCL); - NewQ = new (NewQ) CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); + auto *NewQ = new CHIPQueueOpenCL(this, OCL_DEFAULT_QUEUE_PRIORITY, CmdQ); return NewQ; } diff --git a/src/common.hh b/src/common.hh index f81b2610e..a160602df 100644 --- a/src/common.hh +++ b/src/common.hh @@ -40,6 +40,7 @@ #include #include #include +#include /// For multiplexing purposes, the first field of our objects must be a void * /// pointer @@ -47,6 +48,31 @@ struct ihipDispatch { void *dispatch; }; +class Dispatchable { +private: + struct DispatchImpl { + void *Dispatch; + void *Object; + } DispatchState_ = {nullptr, this}; + + // Need to have at least one virtual function so the base and + // derived classes have equal 'this' pointer. + virtual void forceInheritance() noexcept {}; + +public: + void *asDispatchableObject() noexcept { + return static_cast(&DispatchState_); + } + + template + static T *getObjectAs(void *Ptr) { + assert(Ptr); + auto *State = static_cast(Ptr); + assert(State->Object); + return static_cast(State->Object); + } +}; + #define CHIP_HANDLE_TO_OBJ(handle, type) \ (reinterpret_cast(reinterpret_cast(handle) + \ sizeof(ihipDispatch))) @@ -55,17 +81,6 @@ struct ihipDispatch { (reinterpret_cast(reinterpret_cast(pobj) - \ sizeof(ihipDispatch))) -#define QUEUE(x) \ - ((!(x) || (x) == hipStreamPerThread || (x) == hipStreamLegacy) \ - ? reinterpret_cast(x) \ - : CHIP_HANDLE_TO_OBJ(x, chipstar::Queue)) - -#define STREAM(x) \ - ((!(x) || (x) == (chipstar::Queue *)hipStreamPerThread || \ - (x) == (chipstar::Queue *)hipStreamLegacy) \ - ? reinterpret_cast(x) \ - : CHIP_OBJ_TO_HANDLE(x, ihipStream_t)) - #define HIPTOCHIP(x, t) \ (!(x) ? reinterpret_cast(x) : CHIP_HANDLE_TO_OBJ(x, t)) @@ -112,7 +127,6 @@ struct ihipDispatch { /// so ihipEvent_t pointers may carry chipstar::Event instances. struct ihipEvent_t : ihipDispatch {}; struct ihipCtx_t : ihipDispatch {}; -struct ihipStream_t : ihipDispatch {}; struct ihipModule_t : ihipDispatch {}; struct ihipModuleSymbol_t : ihipDispatch {}; struct ihipGraph : ihipDispatch {}; @@ -121,6 +135,21 @@ struct hipGraphExec : ihipDispatch {}; struct __hip_texture : ihipDispatch {}; struct _hiprtcProgram : ihipDispatch {}; +using hipStream_t = struct ihipStream_t *; + +namespace chipstar { +class Queue; +} + +// TODO: rename to unwrap() +static constexpr chipstar::Queue *QUEUE(hipStream_t Stream) noexcept { + return Stream ? Dispatchable::getObjectAs(Stream) : nullptr; +} + +// Implemented in CHIPBackend.cpp +// TODO: rename to wrap() +hipStream_t STREAM(chipstar::Queue *Queue) noexcept; + bool filterSPIRV(const char *Bytes, size_t NumBytes, std::string &Dst); bool parseSPIR(uint32_t *Stream, size_t NumWords, OpenCLFunctionInfoMap &FuncInfoMap); From 910063ab445f8747f79858420e66069b362eacf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henry=20Linjam=C3=A4ki?= Date: Tue, 10 Oct 2023 13:58:06 +0300 Subject: [PATCH 16/16] Cover missed ~Queue() + free --> delete --- src/CHIPBackend.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 38d6d513c..eab50e04b 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -542,10 +542,8 @@ chipstar::Queue *chipstar::Device::getPerThreadDefaultQueue() { void chipstar::Device::QueueDeleter::operator()( chipstar::Queue *q) const noexcept { - if (q) { - q->~Queue(); - free(CHIP_OBJ_TO_HANDLE(q, ihipStream_t)); - } + if (q) + delete q; } chipstar::Queue *chipstar::Device::getPerThreadDefaultQueueNoLock() { @@ -907,8 +905,7 @@ bool chipstar::Device::removeQueue(chipstar::Queue *ChipQueue) { } ChipQueues_.erase(FoundQueue); - ChipQueue->~Queue(); - free(CHIP_OBJ_TO_HANDLE(ChipQueue, ihipStream_t)); + delete ChipQueue; return true; }