From 724b4f0f244f585512bebcd216fb72fb879cb1b3 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 27 Aug 2024 14:41:35 +0300 Subject: [PATCH] WIP --- HIP | 2 +- src/CHIPBindings.cc | 6 ++- src/backend/Level0/zeHipErrorConversion.hh | 8 ++-- src/backend/OpenCL/CHIPBackendOpenCL.cc | 44 ++++++++++------------ src/backend/OpenCL/clHipErrorConversion.hh | 3 +- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/HIP b/HIP index 32a64267d..a6300992f 160000 --- a/HIP +++ b/HIP @@ -1 +1 @@ -Subproject commit 32a64267d23f27987be0115e243daec9a98b7613 +Subproject commit a6300992fa4013b9f5fe299b42de4bf1484406f8 diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 7fc9c45f4..ac517d879 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -2777,8 +2777,10 @@ hipError_t hipEventElapsedTime(float *Ms, hipEvent_t Start, hipEvent_t Stop) { hipErrorInvalidHandle); } - ChipEventStart->updateFinishStatus(true); - ChipEventStop->updateFinishStatus(true); + if (!ChipEventStart->getEventStatus() == EVENT_STATUS_RECORDING) + RETURN(hipErrorNotReady); + if (!ChipEventStop->getEventStatus() == EVENT_STATUS_RECORDING) + RETURN(hipErrorNotReady); *Ms = ChipEventStart->getElapsedTime(ChipEventStop); RETURN(hipSuccess); diff --git a/src/backend/Level0/zeHipErrorConversion.hh b/src/backend/Level0/zeHipErrorConversion.hh index 79592d25a..a27015f2f 100644 --- a/src/backend/Level0/zeHipErrorConversion.hh +++ b/src/backend/Level0/zeHipErrorConversion.hh @@ -345,14 +345,16 @@ inline hipError_t hip_convert_error(ze_result_t zeStatus, FuncPtr func) { #undef CHIPERR_CHECK_LOG_AND_THROW_TABLE #define CHIPERR_CHECK_LOG_AND_THROW_TABLE(func, ...) \ do { \ - if (zeStatus != ZE_RESULT_SUCCESS) { \ - hipError_t err = hip_convert_error(zeStatus, func); \ + if (zeStatus != ZE_RESULT_SUCCESS) { \ + hipError_t err = hip_convert_error(zeStatus, func); \ if (err == hipErrorTbd) { \ std::cerr << "Error: Unmapped API or API Error Code encountered at " \ << __FILE__ << ":" << __LINE__ << std::endl; \ + std::cerr << "API call: " << #func << std::endl; \ + std::cerr << "Error code: " << resultToString(zeStatus) << std::endl; \ std::abort(); \ } \ - std::string error_msg = std::string(resultToString(zeStatus)); \ + std::string error_msg = std::string(resultToString(zeStatus)); \ std::string custom_msg = std::string(__VA_ARGS__); \ std::string msg_ = error_msg + " " + custom_msg; \ CHIPERR_LOG_AND_THROW(msg_, err); \ diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index fbf0d01a6..3c053e5bc 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -666,7 +666,7 @@ CHIPEventOpenCL::CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, uint64_t CHIPEventOpenCL::getFinishTime() { int clStatus; uint64_t Ret; - clStatus = clGetEventProfilingInfo(ClEvent, CL_PROFILING_COMMAND_END, + clStatus = clGetEventProfilingInfo(ClEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(Ret), &Ret, NULL); if (clStatus != CL_SUCCESS) { @@ -730,7 +730,8 @@ void CHIPEventOpenCL::recordEventCopy( this->ClEvent = Other->ClEvent; this->RecordedEvent = Other; this->Msg = "recordEventCopy: " + Other->Msg; - this->HostTimeStamp = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + this->HostTimeStamp = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); } bool CHIPEventOpenCL::wait() { @@ -749,8 +750,6 @@ bool CHIPEventOpenCL::wait() { bool CHIPEventOpenCL::updateFinishStatus(bool ThrowErrorIfNotReady) { logTrace("CHIPEventOpenCL::updateFinishStatus()"); - - if (ThrowErrorIfNotReady && this->ClEvent == nullptr) CHIPERR_LOG_AND_THROW("OpenCL has not been initialized cl_event is null", hipErrorNotReady); @@ -801,14 +800,20 @@ float CHIPEventOpenCL::getElapsedTime(chipstar::Event *OtherIn) { logTrace("EventElapsedTime: STARTED {} / {} FINISHED {} / {} \n", (void *)this, Started, (void *)Other, Finished); - // apparently fails for Intel NEO, god knows why - // assert(Finished >= Started); int64_t Elapsed; const int64_t NANOSECS = 1000000000; - if (Finished < Started && Other->HostTimeStamp > this->HostTimeStamp) { - std::swap(Started, Finished); - logWarn("Events swapped based on host timestamps\n"); + if (Finished < Started) { + logWarn("Finished < Started\n"); + std::cout << "delta t: " + << std::setprecision(std::numeric_limits::digits10 + 1) + << std::fixed << (Finished - Started) << "\n"; } + + // if (Finished < Started && Other->HostTimeStamp > this->HostTimeStamp) { + // std::swap(Started, Finished); + // logWarn("Events swapped based on host timestamps\n"); + // } + Elapsed = Finished - Started; int64_t MS = (Elapsed / NANOSECS) * 1000; int64_t NS = Elapsed % NANOSECS; @@ -1089,8 +1094,9 @@ void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus, std::static_pointer_cast(Cbo->CallbackFinishEvent) ->ClEvent, CL_COMPLETE); + CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetUserEventStatus); } - delete Cbo; + // delete Cbo; } void CHIPQueueOpenCL::MemMap(const chipstar::AllocationInfo *AllocInfo, @@ -1181,21 +1187,12 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, cl::Context *ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); cl_int Err; - std::shared_ptr HoldBackEvent = - static_cast(Backend)->createEventShared( - ChipContext_); - - std::static_pointer_cast(HoldBackEvent)->ClEvent = - clCreateUserEvent(ClContext_->get(), &Err); - - std::vector> WaitForEvents{HoldBackEvent}; - // Enqueue a barrier used to ensure the callback is not called too early, // otherwise it would be (at worst) executed in this host thread when // setting it, blocking the execution, while the clients might expect // parallel execution. std::shared_ptr HoldbackBarrierCompletedEv = - enqueueBarrier(WaitForEvents); + enqueueBarrier(std::vector>{}); // OpenCL event callbacks have undefined execution ordering/finishing // guarantees. We need to enforce CUDA ordering using user events. @@ -1227,13 +1224,12 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetEventCallback); updateLastEvent(CallbackCompleted); - get()->flush(); // Now the CB can start executing in the background: clSetUserEventStatus( - std::static_pointer_cast(HoldBackEvent)->ClEvent, + std::static_pointer_cast(HoldbackBarrierCompletedEv)->ClEvent, CL_COMPLETE); - // HoldBackEvent->decreaseRefCount("Notified finished."); + CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetUserEventStatus); return; }; @@ -1517,7 +1513,7 @@ void CHIPQueueOpenCL::finish() { LOCK(Backend->DubiousLockOpenCL) #endif clStatus = get()->finish(); - // CHIPERR_CHECK_LOG_AND_ABORT(clStatus, CL_SUCCESS, hipErrorTbd); + CHIPERR_CHECK_LOG_AND_THROW_TABLE(clFinish); } std::shared_ptr diff --git a/src/backend/OpenCL/clHipErrorConversion.hh b/src/backend/OpenCL/clHipErrorConversion.hh index 984fd9275..f8aed2afb 100644 --- a/src/backend/OpenCL/clHipErrorConversion.hh +++ b/src/backend/OpenCL/clHipErrorConversion.hh @@ -164,7 +164,8 @@ const std::unordered_map CL_HIP_ERROR_MAPS = { {(void *)&clFinish, {{CL_SUCCESS, hipSuccess}, {CL_INVALID_COMMAND_QUEUE, hipErrorInvalidResourceHandle}, - {CL_OUT_OF_HOST_MEMORY, hipErrorOutOfMemory}}}, + {CL_OUT_OF_HOST_MEMORY, hipErrorOutOfMemory}, + {CL_OUT_OF_RESOURCES, hipErrorOutOfMemory}}}, {(void *)&clFlush, {{CL_SUCCESS, hipSuccess},