Skip to content

Commit

Permalink
Improvements to hipEventElapsedTime
Browse files Browse the repository at this point in the history
  • Loading branch information
pvelesko committed Sep 2, 2024
1 parent 4fff4e3 commit 78cfe90
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 40 deletions.
21 changes: 15 additions & 6 deletions src/CHIPBindings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2747,7 +2747,9 @@ hipError_t hipEventSynchronize(hipEvent_t Event) {
NULLCHECK(Event);
chipstar::Event *ChipEvent = static_cast<chipstar::Event *>(Event);

ChipEvent->wait();
if (ChipEvent->isRecordingOrRecorded())

Check warning on line 2750 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2750:42 [readability-braces-around-statements]

statement should be inside braces
ChipEvent->wait();

RETURN(hipSuccess);

CHIP_CATCH
Expand All @@ -2762,16 +2764,23 @@ hipError_t hipEventElapsedTime(float *Ms, hipEvent_t Start, hipEvent_t Stop) {
NULLCHECK(Start, Stop);
chipstar::Event *ChipEventStart = static_cast<chipstar::Event *>(Start);
chipstar::Event *ChipEventStop = static_cast<chipstar::Event *>(Stop);

if (ChipEventStart->getFlags().isDisableTiming() ||
ChipEventStop->getFlags().isDisableTiming())

Check warning on line 2769 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2769:51 [readability-braces-around-statements]

statement should be inside braces
CHIPERR_LOG_AND_THROW("One of the events has timings disabled. "
"Unable to return elasped time",
hipErrorInvalidHandle);

if (!ChipEventStart->isRecordingOrRecorded() ||
!ChipEventStop->isRecordingOrRecorded()) {
CHIPERR_LOG_AND_THROW("One of the events was not recorded",
hipErrorInvalidHandle);
}
if (ChipEventStart->getFlags().isDisableTiming() ||
ChipEventStop->getFlags().isDisableTiming())
CHIPERR_LOG_AND_THROW("One of the events has timings disabled. "
"Unable to return elasped time",
hipErrorInvalidResourceHandle);

if (!ChipEventStart->getEventStatus() == EVENT_STATUS_RECORDING)

Check warning on line 2780 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2780:7 [readability-implicit-bool-conversion]

implicit conversion bool -> 'int'

Check warning on line 2780 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2780:8 [readability-implicit-bool-conversion]

implicit conversion 'event_status_e' -> bool

Check warning on line 2780 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2780:67 [readability-braces-around-statements]

statement should be inside braces
RETURN(hipErrorNotReady);
if (!ChipEventStop->getEventStatus() == EVENT_STATUS_RECORDING)

Check warning on line 2782 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2782:7 [readability-implicit-bool-conversion]

implicit conversion bool -> 'int'

Check warning on line 2782 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2782:8 [readability-implicit-bool-conversion]

implicit conversion 'event_status_e' -> bool

Check warning on line 2782 in src/CHIPBindings.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBindings.cc:2782:66 [readability-braces-around-statements]

statement should be inside braces
RETURN(hipErrorNotReady);

*Ms = ChipEventStart->getElapsedTime(ChipEventStop);
RETURN(hipSuccess);
Expand Down
78 changes: 44 additions & 34 deletions src/backend/OpenCL/CHIPBackendOpenCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,8 @@ void CHIPEventOpenCL::recordEventCopy(
this->ClEvent = Other->ClEvent;
this->RecordedEvent = Other;
this->Msg = "recordEventCopy: " + Other->Msg;
this->HostTimeStamp =
std::chrono::high_resolution_clock::now().time_since_epoch().count();
}

bool CHIPEventOpenCL::wait() {
Expand Down Expand Up @@ -769,10 +771,6 @@ bool CHIPEventOpenCL::updateFinishStatus(bool ThrowErrorIfNotReady) {
}

float CHIPEventOpenCL::getElapsedTime(chipstar::Event *OtherIn) {
// Why do I need to lock the context mutex?
// Can I lock the mutex of this and the other event?
//

CHIPEventOpenCL *Other = (CHIPEventOpenCL *)OtherIn;

if (this->getContext() != Other->getContext())
Expand All @@ -781,8 +779,10 @@ float CHIPEventOpenCL::getElapsedTime(chipstar::Event *OtherIn) {
"the same context",
hipErrorTbd);

this->updateFinishStatus(true);
Other->updateFinishStatus(true);
if (this->getEventStatus() == EVENT_STATUS_RECORDING)

Check warning on line 782 in src/backend/OpenCL/CHIPBackendOpenCL.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/backend/OpenCL/CHIPBackendOpenCL.cc:782:56 [readability-braces-around-statements]

statement should be inside braces
this->updateFinishStatus(false);
if (Other->getEventStatus() == EVENT_STATUS_RECORDING)

Check warning on line 784 in src/backend/OpenCL/CHIPBackendOpenCL.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/backend/OpenCL/CHIPBackendOpenCL.cc:784:57 [readability-braces-around-statements]

statement should be inside braces
Other->updateFinishStatus(false);

if (!this->isRecordingOrRecorded() || !Other->isRecordingOrRecorded())
CHIPERR_LOG_AND_THROW("one of the events isn't/hasn't recorded",
Expand All @@ -792,23 +792,40 @@ float CHIPEventOpenCL::getElapsedTime(chipstar::Event *OtherIn) {
CHIPERR_LOG_AND_THROW("one of the events hasn't finished",
hipErrorNotReady);

uint64_t Started = this->getFinishTime();
uint64_t Finished = Other->getFinishTime();
uint64_t BeginGPU = this->getFinishTime();
uint64_t EndGPU = Other->getFinishTime();
uint64_t BeginCPU = this->HostTimeStamp;
uint64_t EndCPU = Other->HostTimeStamp;

logTrace("EventElapsedTime: STARTED {} / {} FINISHED {} / {} \n",
(void *)this, Started, (void *)Other, Finished);
bool ReversedEvents = false;
if (EndCPU < BeginCPU) {
ReversedEvents = true;
std::swap(BeginGPU, EndGPU);
}

// apparently fails for Intel NEO, god knows why
// assert(Finished >= Started);
// Handle overflow
int64_t Elapsed;
const uint64_t MaxValue = std::numeric_limits<uint64_t>::max();
if (EndGPU < BeginGPU) {
logError("Overflow detected in CHIPEventOpenCL::getElapsedTime()");
logError("BeginGPU: {}, EndGPU: {}", BeginGPU, EndGPU);
Elapsed =
(MaxValue - BeginGPU) + EndGPU + 1; // +1 to account for wraparound
} else {
Elapsed = EndGPU - BeginGPU;
}

const int64_t NANOSECS = 1000000000;
if (Finished < Started)
logWarn("Finished < Started\n");
Elapsed = Finished - Started;
int64_t MS = (Elapsed / NANOSECS) * 1000;
int64_t NS = Elapsed % NANOSECS;
float FractInMS = ((float)NS) / 1000000.0f;
return (float)MS + FractInMS;
float Ms = (float)MS + FractInMS;

Ms = std::abs(Ms);
if (ReversedEvents)
Ms = -Ms;

return Ms;
}

void CHIPEventOpenCL::hostSignal() { UNIMPLEMENTED(); }
Expand Down Expand Up @@ -1069,6 +1086,7 @@ struct HipStreamCallbackData {
void *UserData;
hipStreamCallback_t Callback;
std::shared_ptr<chipstar::Event> CallbackFinishEvent;
std::shared_ptr<chipstar::Event> CallbackCompleted;
};

void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus,
Expand All @@ -1084,6 +1102,7 @@ void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus,
std::static_pointer_cast<CHIPEventOpenCL>(Cbo->CallbackFinishEvent)
->ClEvent,
CL_COMPLETE);
CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetUserEventStatus);
}
delete Cbo;
}
Expand Down Expand Up @@ -1176,21 +1195,12 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback,
cl::Context *ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get();
cl_int Err;

std::shared_ptr<chipstar::Event> HoldBackEvent =
static_cast<CHIPBackendOpenCL *>(Backend)->createEventShared(
ChipContext_);

std::static_pointer_cast<CHIPEventOpenCL>(HoldBackEvent)->ClEvent =
clCreateUserEvent(ClContext_->get(), &Err);

std::vector<std::shared_ptr<chipstar::Event>> WaitForEvents{HoldBackEvent};

// Enqueue a barrier used to ensure the callback is not called too early,
// otherwise it would be (at worst) executed in this host thread when
// setting it, blocking the execution, while the clients might expect
// parallel execution.
std::shared_ptr<chipstar::Event> HoldbackBarrierCompletedEv =
enqueueBarrier(WaitForEvents);
enqueueBarrier(std::vector<std::shared_ptr<chipstar::Event>>{});

// OpenCL event callbacks have undefined execution ordering/finishing
// guarantees. We need to enforce CUDA ordering using user events.
Expand All @@ -1208,10 +1218,10 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback,
// finishing the user CB's execution.

HipStreamCallbackData *Cb = new HipStreamCallbackData{
this, hipSuccess, UserData, Callback, CallbackEvent};
this, hipSuccess, UserData, Callback, CallbackEvent, nullptr};

std::vector<std::shared_ptr<chipstar::Event>> WaitForEventsCBB{CallbackEvent};
auto CallbackCompleted = enqueueBarrier(WaitForEventsCBB);
Cb->CallbackCompleted = enqueueBarrier(WaitForEventsCBB);

// We know that the callback won't be yet launched since it's depending
// on the barrier which waits for the user event.
Expand All @@ -1221,17 +1231,17 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback,
CL_COMPLETE, pfn_notify, Cb);
CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetEventCallback);

updateLastEvent(CallbackCompleted);
get()->flush();
updateLastEvent(Cb->CallbackCompleted);

// Now the CB can start executing in the background:
clSetUserEventStatus(
std::static_pointer_cast<CHIPEventOpenCL>(HoldBackEvent)->ClEvent,
std::static_pointer_cast<CHIPEventOpenCL>(HoldbackBarrierCompletedEv)
->ClEvent,
CL_COMPLETE);
// HoldBackEvent->decreaseRefCount("Notified finished.");
CHIPERR_CHECK_LOG_AND_THROW_TABLE(clSetUserEventStatus);

return;
};
}

std::shared_ptr<chipstar::Event> CHIPQueueOpenCL::enqueueMarkerImpl() {
std::shared_ptr<chipstar::Event> MarkerEvent =
Expand Down Expand Up @@ -1512,7 +1522,7 @@ void CHIPQueueOpenCL::finish() {
LOCK(Backend->DubiousLockOpenCL)
#endif
clStatus = get()->finish();
// CHIPERR_CHECK_LOG_AND_ABORT(clStatus, CL_SUCCESS, hipErrorTbd);
CHIPERR_CHECK_LOG_AND_THROW_TABLE(clFinish);
}

std::shared_ptr<chipstar::Event>
Expand Down
1 change: 1 addition & 0 deletions src/backend/OpenCL/CHIPBackendOpenCL.hh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ public:
cl_event ClEvent;
friend class CHIPEventOpenCL;
std::shared_ptr<chipstar::Event> RecordedEvent;
uint64_t HostTimeStamp;

public:
CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, cl_event ClEvent,
Expand Down

0 comments on commit 78cfe90

Please sign in to comment.