-
Notifications
You must be signed in to change notification settings - Fork 116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[HIP] Implement coarse-grained memory advice for the HIP adapter #1249
Changes from 6 commits
2530ec6
d8fd5f0
a864fb2
7fdaed2
fd305a5
177e351
2a9ded6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6277,6 +6277,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value) { | |
case UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST: | ||
os << "UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST"; | ||
break; | ||
case UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY: | ||
os << "UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY"; | ||
break; | ||
case UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY: | ||
os << "UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY"; | ||
break; | ||
default: | ||
os << "unknown enumerator"; | ||
break; | ||
|
@@ -6441,6 +6447,26 @@ inline ur_result_t printFlag<ur_usm_advice_flag_t>(std::ostream &os, uint32_t fl | |
} | ||
os << UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST; | ||
} | ||
|
||
if ((val & UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know you're just copying from the surrounding style here, but if you're doing a bitwise AND of a single bit flag you only need to check against zero. Thus, the equality comparison and uint32_t cast is superfluous and just serves to make the line longer. Not going to block this patch, but it could do with cleaning up later, maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that could be brought up to UR because I think it was the generate cmake target that produced the code. |
||
val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY; | ||
if (!first) { | ||
os << " | "; | ||
} else { | ||
first = false; | ||
} | ||
os << UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY; | ||
} | ||
|
||
if ((val & UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) { | ||
val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY; | ||
if (!first) { | ||
os << " | "; | ||
} else { | ||
first = false; | ||
} | ||
os << UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY; | ||
} | ||
if (val != 0) { | ||
std::bitset<32> bits(val); | ||
if (!first) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,30 +92,46 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size, | |
if (URAdviceFlags & | ||
(UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | | ||
UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | | ||
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { | ||
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED | ||
#if !defined(__HIP_PLATFORM_AMD__) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth it to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't actively support it but until we have completely dropped it officially there are a bunch of other entry points in the hip adapter that maintain codepaths for hip-nv platforms. Either way, I do not disagree to removing it and simplifying this function a bit this way. |
||
| UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY | | ||
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY | ||
#endif | ||
)) { | ||
return UR_RESULT_ERROR_INVALID_ENUMERATION; | ||
} | ||
|
||
using ur_to_hip_advice_t = std::pair<ur_usm_advice_flags_t, hipMemoryAdvise>; | ||
|
||
static constexpr std::array<ur_to_hip_advice_t, 6> | ||
URToHIPMemAdviseDeviceFlags{ | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, | ||
hipMemAdviseSetReadMostly), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, | ||
hipMemAdviseUnsetReadMostly), | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, | ||
hipMemAdviseSetPreferredLocation), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, | ||
hipMemAdviseUnsetPreferredLocation), | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, | ||
hipMemAdviseSetAccessedBy), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, | ||
hipMemAdviseUnsetAccessedBy), | ||
}; | ||
for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) { | ||
if (URAdviceFlags & FlagPair.first) { | ||
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device)); | ||
#if defined(__HIP_PLATFORM_AMD__) | ||
constexpr size_t DeviceFlagCount = 8; | ||
#else | ||
constexpr size_t DeviceFlagCount = 6; | ||
#endif | ||
static constexpr std::array<ur_to_hip_advice_t, DeviceFlagCount> | ||
URToHIPMemAdviseDeviceFlags { | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, | ||
GeorgeWeb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
hipMemAdviseSetReadMostly), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, | ||
hipMemAdviseUnsetReadMostly), | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, | ||
hipMemAdviseSetPreferredLocation), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, | ||
hipMemAdviseUnsetPreferredLocation), | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, | ||
hipMemAdviseSetAccessedBy), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, | ||
hipMemAdviseUnsetAccessedBy), | ||
#if defined(__HIP_PLATFORM_AMD__) | ||
std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY, | ||
hipMemAdviseSetCoarseGrain), | ||
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY, | ||
hipMemAdviseUnsetCoarseGrain), | ||
#endif | ||
}; | ||
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseDeviceFlags) { | ||
if (URAdviceFlags & URAdvice) { | ||
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, Device)); | ||
ldrumm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
|
@@ -130,10 +146,9 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size, | |
hipMemAdviseUnsetAccessedBy), | ||
}; | ||
|
||
for (auto &FlagPair : URToHIPMemAdviseHostFlags) { | ||
if (URAdviceFlags & FlagPair.first) { | ||
UR_CHECK_ERROR( | ||
hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId)); | ||
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseHostFlags) { | ||
if (URAdviceFlags & URAdvice) { | ||
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, hipCpuDeviceId)); | ||
GeorgeWeb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
|
@@ -1615,6 +1630,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, | |
pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID)); | ||
UR_CHECK_ERROR( | ||
hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID)); | ||
#if defined(__HIP_PLATFORM_AMD__) | ||
UR_CHECK_ERROR( | ||
hipMemAdvise(pMem, size, hipMemAdviseUnsetCoarseGrain, DeviceID)); | ||
#endif | ||
} else { | ||
Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID); | ||
// UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This now has fewer bits set. Is this right? If so that's a somewhat unusual definition of "mask"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is also generated from the source generation scripts and initially threw me off too, but I think I got it now though. It is actually the inverse mask that UR uses for validation.