Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add two new properties to ur_kernel_group_info_t #1996

Merged
merged 1 commit into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4862,6 +4862,10 @@ typedef enum ur_kernel_group_info_t {
UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4, ///< [size_t] Return preferred multiple of Work Group size for launch
UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 5, ///< [size_t] Return minimum amount of private memory in bytes used by each
///< work item in the Kernel
UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE = 6, ///< [size_t[3]] Return the maximum Work Group size guaranteed by the
///< source code, or (0, 0, 0) if unspecified
UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE = 7, ///< [size_t] Return the maximum linearized Work Group size (X * Y * Z)
///< guaranteed by the source code, or 0 if unspecified
/// @cond
UR_KERNEL_GROUP_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -4965,7 +4969,7 @@ urKernelGetInfo(
/// + `NULL == hKernel`
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE < propName`
/// + `::UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE < propName`
UR_APIEXPORT ur_result_t UR_APICALL
urKernelGetGroupInfo(
ur_kernel_handle_t hKernel, ///< [in] handle of the Kernel object
Expand Down
32 changes: 32 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7994,6 +7994,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_group_info_t va
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE:
os << "UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE";
break;
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
os << "UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE";
break;
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:
os << "UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -8086,6 +8092,32 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_kernel_grou

os << ")";
} break;
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE: {

const size_t *tptr = (const size_t *)ptr;
os << "{";
size_t nelems = size / sizeof(size_t);
for (size_t i = 0; i < nelems; ++i) {
if (i != 0) {
os << ", ";
}

os << tptr[i];
}
os << "}";
} break;
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE: {
const size_t *tptr = (const size_t *)ptr;
if (sizeof(size_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down
8 changes: 8 additions & 0 deletions scripts/core/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,14 @@ etors:
desc: "[size_t] Return preferred multiple of Work Group size for launch"
- name: PRIVATE_MEM_SIZE
desc: "[size_t] Return minimum amount of private memory in bytes used by each work item in the Kernel"
- name: COMPILE_MAX_WORK_GROUP_SIZE
desc: |
[size_t[3]] Return the maximum Work Group size guaranteed by the
source code, or (0, 0, 0) if unspecified
- name: COMPILE_MAX_LINEAR_WORK_GROUP_SIZE
desc: |
[size_t] Return the maximum linearized Work Group size (X * Y * Z)
guaranteed by the source code, or 0 if unspecified
--- #--------------------------------------------------------------------------
type: enum
desc: "Get Kernel SubGroup information"
Expand Down
11 changes: 11 additions & 0 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
// Set the active context here as guessLocalWorkSize needs an active context
ScopedContext Active(Device);
{
size_t *MaxThreadsPerBlock = Kernel->MaxThreadsPerBlock;
size_t *ReqdThreadsPerBlock = Kernel->ReqdThreadsPerBlock;
MaxWorkGroupSize = Device->getMaxWorkGroupSize();

Expand All @@ -212,6 +213,10 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;

if (MaxThreadsPerBlock[Dim] != 0 &&
LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;

if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
// Checks that local work sizes are a divisor of the global work sizes
Expand All @@ -235,6 +240,12 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
KernelLocalWorkGroupSize *= LocalWorkSize[Dim];
}

if (size_t MaxLinearThreadsPerBlock = Kernel->MaxLinearThreadsPerBlock;
MaxLinearThreadsPerBlock &&
MaxLinearThreadsPerBlock < KernelLocalWorkGroupSize) {
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
}

if (hasExceededMaxRegistersPerBlock(Device, Kernel,
KernelLocalWorkGroupSize)) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
Expand Down
24 changes: 24 additions & 0 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,30 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
&Bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, hKernel->get()));
return ReturnValue(uint64_t(Bytes));
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE: {
size_t MaxGroupSize[3] = {0, 0, 0};
const auto &MaxWGSizeMDMap =
hKernel->getProgram()->KernelMaxWorkGroupSizeMD;
const auto MaxWGSizeMD = MaxWGSizeMDMap.find(hKernel->getName());
if (MaxWGSizeMD != MaxWGSizeMDMap.end()) {
const auto MaxWGSize = MaxWGSizeMD->second;
MaxGroupSize[0] = std::get<0>(MaxWGSize);
MaxGroupSize[1] = std::get<1>(MaxWGSize);
MaxGroupSize[2] = std::get<2>(MaxWGSize);
}
return ReturnValue(MaxGroupSize, 3);
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE: {
size_t MaxLinearGroupSize = 0;
const auto &MaxLinearWGSizeMDMap =
hKernel->getProgram()->KernelMaxLinearWorkGroupSizeMD;
const auto MaxLinearWGSizeMD =
MaxLinearWGSizeMDMap.find(hKernel->getName());
if (MaxLinearWGSizeMD != MaxLinearWGSizeMDMap.end()) {
MaxLinearGroupSize = MaxLinearWGSizeMD->second;
}
return ReturnValue(MaxLinearGroupSize);
}
default:
break;
}
Expand Down
14 changes: 14 additions & 0 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ struct ur_kernel_handle_t_ {

static constexpr uint32_t ReqdThreadsPerBlockDimensions = 3u;
size_t ReqdThreadsPerBlock[ReqdThreadsPerBlockDimensions];
size_t MaxThreadsPerBlock[ReqdThreadsPerBlockDimensions];
size_t MaxLinearThreadsPerBlock{0};
int RegsPerThread{0};

/// Structure that holds the arguments to the kernel.
Expand Down Expand Up @@ -169,6 +171,18 @@ struct ur_kernel_handle_t_ {
sizeof(ReqdThreadsPerBlock), ReqdThreadsPerBlock, nullptr);
(void)RetError;
assert(RetError == UR_RESULT_SUCCESS);
/// Note: this code assumes that there is only one device per context
RetError = urKernelGetGroupInfo(
this, Program->getDevice(),
UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE,
sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr);
assert(RetError == UR_RESULT_SUCCESS);
/// Note: this code assumes that there is only one device per context
RetError = urKernelGetGroupInfo(
this, Program->getDevice(),
UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE,
sizeof(MaxLinearThreadsPerBlock), &MaxLinearThreadsPerBlock, nullptr);
assert(RetError == UR_RESULT_SUCCESS);
UR_CHECK_ERROR(
cuFuncGetAttribute(&RegsPerThread, CU_FUNC_ATTRIBUTE_NUM_REGS, Func));
}
Expand Down
22 changes: 14 additions & 8 deletions source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata,

auto [Prefix, Tag] = splitMetadataName(MetadataElementName);

if (Tag == __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE) {
// If metadata is reqd_work_group_size, record it for the corresponding
// kernel name.
if (Tag == __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE ||
Tag == __SYCL_UR_PROGRAM_METADATA_TAG_MAX_WORK_GROUP_SIZE) {
// If metadata is reqd_work_group_size/max_work_group_size, record it for
// the corresponding kernel name.
size_t MDElemsSize = MetadataElement.size - sizeof(std::uint64_t);

// Expect between 1 and 3 32-bit integer values.
Expand All @@ -69,18 +70,23 @@ ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata,
reinterpret_cast<const char *>(MetadataElement.value.pData) +
sizeof(std::uint64_t);
// Read values and pad with 1's for values not present.
std::uint32_t ReqdWorkGroupElements[] = {1, 1, 1};
std::memcpy(ReqdWorkGroupElements, ValuePtr, MDElemsSize);
KernelReqdWorkGroupSizeMD[Prefix] =
std::make_tuple(ReqdWorkGroupElements[0], ReqdWorkGroupElements[1],
ReqdWorkGroupElements[2]);
std::array<uint32_t, 3> WorkGroupElements = {1, 1, 1};
std::memcpy(WorkGroupElements.data(), ValuePtr, MDElemsSize);
(Tag == __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE
? KernelReqdWorkGroupSizeMD
: KernelMaxWorkGroupSizeMD)[Prefix] =
std::make_tuple(WorkGroupElements[0], WorkGroupElements[1],
WorkGroupElements[2]);
} else if (Tag == __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING) {
const char *MetadataValPtr =
reinterpret_cast<const char *>(MetadataElement.value.pData) +
sizeof(std::uint64_t);
const char *MetadataValPtrEnd =
MetadataValPtr + MetadataElement.size - sizeof(std::uint64_t);
GlobalIDMD[Prefix] = std::string{MetadataValPtr, MetadataValPtrEnd};
} else if (Tag ==
__SYCL_UR_PROGRAM_METADATA_TAG_MAX_LINEAR_WORK_GROUP_SIZE) {
KernelMaxLinearWorkGroupSizeMD[Prefix] = MetadataElement.value.data64;
}
}
return UR_RESULT_SUCCESS;
Expand Down
6 changes: 5 additions & 1 deletion source/adapters/cuda/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ struct ur_program_handle_t_ {
std::unordered_map<std::string, std::tuple<uint32_t, uint32_t, uint32_t>>
KernelReqdWorkGroupSizeMD;
std::unordered_map<std::string, std::string> GlobalIDMD;
std::unordered_map<std::string, std::tuple<uint32_t, uint32_t, uint32_t>>
KernelMaxWorkGroupSizeMD;
std::unordered_map<std::string, uint64_t> KernelMaxLinearWorkGroupSizeMD;

constexpr static size_t MaxLogSize = 8192u;

Expand All @@ -45,7 +48,8 @@ struct ur_program_handle_t_ {

ur_program_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device)
: Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1},
Context{Context}, Device{Device}, KernelReqdWorkGroupSizeMD{} {
Context{Context}, Device{Device}, KernelReqdWorkGroupSizeMD{},
KernelMaxWorkGroupSizeMD{}, KernelMaxLinearWorkGroupSizeMD{} {
urContextRetain(Context);
urDeviceRetain(Device);
}
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
&Bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, hKernel->get()));
return ReturnValue(uint64_t(Bytes));
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:
// FIXME: could be added
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
default:
break;
}
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,10 @@ ur_result_t urKernelGetGroupInfo(
case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
return ReturnValue(uint32_t{Kernel->ZeKernelProperties->privateMemSize});
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:
// No corresponding enumeration in Level Zero
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
default: {
logger::error(
"Unknown ParamName in urKernelGetGroupInfo: ParamName={}(0x{})",
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/level_zero/v2/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,10 @@ ur_result_t urKernelGetGroupInfo(
auto props = hKernel->getProperties(hDevice);
return returnValue(uint32_t{props.privateMemSize});
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:
// No corresponding enumeration in Level Zero
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
default: {
logger::error(
"Unknown ParamName in urKernelGetGroupInfo: ParamName={}(0x{})",
Expand Down
19 changes: 15 additions & 4 deletions source/adapters/native_cpu/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
DIE_NO_IMPLEMENTATION;
}

// Check reqd_work_group_size
if (hKernel->hasReqdWGSize() && pLocalWorkSize != nullptr) {
const auto &Reqd = hKernel->getReqdWGSize();
// Check reqd_work_group_size and other kernel constraints
if (pLocalWorkSize != nullptr) {
uint64_t TotalNumWIs = 1;
for (uint32_t Dim = 0; Dim < workDim; Dim++) {
if (pLocalWorkSize[Dim] != Reqd[Dim]) {
TotalNumWIs *= pLocalWorkSize[Dim];
if (auto Reqd = hKernel->getReqdWGSize();
Reqd && pLocalWorkSize[Dim] != Reqd.value()[Dim]) {
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
}
if (auto MaxWG = hKernel->getMaxWGSize();
MaxWG && pLocalWorkSize[Dim] > MaxWG.value()[Dim]) {
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
}
}
if (auto MaxLinearWG = hKernel->getMaxLinearWGSize()) {
if (TotalNumWIs > MaxLinearWG) {
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
}
}
Expand Down
25 changes: 20 additions & 5 deletions source/adapters/native_cpu/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,25 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
ur_kernel_handle_t_ *kernel;

// Set reqd_work_group_size for kernel if needed
std::optional<native_cpu::WGSize_t> ReqdWG;
const auto &ReqdMap = hProgram->KernelReqdWorkGroupSizeMD;
auto ReqdIt = ReqdMap.find(pKernelName);
if (ReqdIt != ReqdMap.end()) {
kernel = new ur_kernel_handle_t_(hProgram, pKernelName, *f, ReqdIt->second);
} else {
kernel = new ur_kernel_handle_t_(hProgram, pKernelName, *f);
if (auto ReqdIt = ReqdMap.find(pKernelName); ReqdIt != ReqdMap.end()) {
ReqdWG = ReqdIt->second;
}

std::optional<native_cpu::WGSize_t> MaxWG;
const auto &MaxMap = hProgram->KernelMaxWorkGroupSizeMD;
if (auto MaxIt = MaxMap.find(pKernelName); MaxIt != MaxMap.end()) {
MaxWG = MaxIt->second;
}
std::optional<uint64_t> MaxLinearWG;
const auto &MaxLinMap = hProgram->KernelMaxLinearWorkGroupSizeMD;
if (auto MaxLIt = MaxLinMap.find(pKernelName); MaxLIt != MaxLinMap.end()) {
MaxLinearWG = MaxLIt->second;
}
kernel = new ur_kernel_handle_t_(hProgram, pKernelName, *f, ReqdWG, MaxWG,
MaxLinearWG);

*phKernel = kernel;

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -148,6 +159,10 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
int bytes = 0;
return returnValue(static_cast<uint64_t>(bytes));
}
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE:
case UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE:
// FIXME: could be added
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;

default:
break;
Expand Down
25 changes: 16 additions & 9 deletions source/adapters/native_cpu/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,14 @@ struct ur_kernel_handle_t_ : RefCounted {

ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name,
nativecpu_task_t subhandler)
: hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)},
HasReqdWGSize(false) {}
: hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)} {}

ur_kernel_handle_t_(const ur_kernel_handle_t_ &other)
: hProgram(other.hProgram), _name(other._name),
_subhandler(other._subhandler), _args(other._args),
_localArgInfo(other._localArgInfo), _localMemPool(other._localMemPool),
_localMemPoolSize(other._localMemPoolSize),
HasReqdWGSize(other.HasReqdWGSize), ReqdWGSize(other.ReqdWGSize) {
ReqdWGSize(other.ReqdWGSize) {
incrementReferenceCount();
}

Expand All @@ -60,19 +59,26 @@ struct ur_kernel_handle_t_ : RefCounted {
}
ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name,
nativecpu_task_t subhandler,
const native_cpu::ReqdWGSize_t &ReqdWGSize)
std::optional<native_cpu::WGSize_t> ReqdWGSize,
std::optional<native_cpu::WGSize_t> MaxWGSize,
std::optional<uint64_t> MaxLinearWGSize)
: hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)},
HasReqdWGSize(true), ReqdWGSize(ReqdWGSize) {}
ReqdWGSize(ReqdWGSize), MaxWGSize(MaxWGSize),
MaxLinearWGSize(MaxLinearWGSize) {}

ur_program_handle_t hProgram;
std::string _name;
nativecpu_task_t _subhandler;
std::vector<native_cpu::NativeCPUArgDesc> _args;
std::vector<local_arg_info_t> _localArgInfo;

bool hasReqdWGSize() const { return HasReqdWGSize; }
std::optional<native_cpu::WGSize_t> getReqdWGSize() const {
return ReqdWGSize;
}

std::optional<native_cpu::WGSize_t> getMaxWGSize() const { return MaxWGSize; }

const native_cpu::ReqdWGSize_t &getReqdWGSize() const { return ReqdWGSize; }
std::optional<uint64_t> getMaxLinearWGSize() const { return MaxLinearWGSize; }

void updateMemPool(size_t numParallelThreads) {
// compute requested size.
Expand Down Expand Up @@ -103,6 +109,7 @@ struct ur_kernel_handle_t_ : RefCounted {
private:
char *_localMemPool = nullptr;
size_t _localMemPoolSize = 0;
bool HasReqdWGSize;
native_cpu::ReqdWGSize_t ReqdWGSize;
std::optional<native_cpu::WGSize_t> ReqdWGSize = std::nullopt;
std::optional<native_cpu::WGSize_t> MaxWGSize = std::nullopt;
std::optional<uint64_t> MaxLinearWGSize = std::nullopt;
};
Loading
Loading