Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for nvidia MIG in Mesos containerizer #454

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 131 additions & 16 deletions src/slave/containerizer/mesos/isolators/gpu/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,21 @@ static Try<set<Gpu>> enumerateGpus(
if (flags.nvidia_gpu_devices.isSome()) {
indices = flags.nvidia_gpu_devices.get();
} else {
for (size_t i = 0; i < resources.gpus().getOrElse(0); ++i) {
Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

for (unsigned int i = 0; i < available.get(); ++i) {
indices.push_back(i);
}
}

Try<unsigned int> caps_major = nvml::systemGetCapsMajor();
if (caps_major.isError()) {
return Error("Failed to get nvidia caps major: " + caps_major.error());
}

set<Gpu> gpus;

foreach (unsigned int index, indices) {
Expand All @@ -103,17 +113,91 @@ static Try<set<Gpu>> enumerateGpus(
return Error("Failed to nvml::deviceGetMinorNumber: " + minor.error());
}

Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();
Try<bool> ismig = nvml::deviceGetMigMode(handle.get());
if (ismig.isError()) {
return Error("Failed to nvml::deviceGetMigMode: " + ismig.error());
}

if (!ismig.get()) {
Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();

gpus.insert(gpu);

gpus.insert(gpu);
continue;
}

Try<unsigned int> migcount = nvml::deviceGetMigDeviceCount(handle.get());
if (migcount.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceCount: " + migcount.error());
}

for (unsigned int migindex = 0; migindex < migcount.get(); migindex++) {
Try<nvmlDevice_t> mighandle = nvml::deviceGetMigDeviceHandleByIndex(handle.get(), migindex);
if (mighandle.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceHandleByIndex: " + mighandle.error());
}

Try<unsigned int> gi_minor = nvml::deviceGetGpuInstanceMinor(mighandle.get());
if (gi_minor.isError()) {
return Error("Failed to nvml::deviceGetGpuInstanceMinor: " + gi_minor.error());
}

Try<unsigned int> ci_minor = nvml::deviceGetComputeInstanceMinor(mighandle.get());
if (ci_minor.isError()) {
return Error("Failed to nvml::deviceGetComputeInstanceMinor: " + ci_minor.error());
}

Gpu gpu;
gpu.major = NVIDIA_MAJOR_DEVICE;
gpu.minor = minor.get();
gpu.ismig = true;
gpu.caps_major = caps_major.get();
gpu.gi_minor = gi_minor.get();
gpu.ci_minor = ci_minor.get();

gpus.insert(gpu);
}
}

return gpus;
}


static Try<unsigned int> countGpuInstancesForDevices(
const vector<unsigned int>& devices)
{
unsigned int count = 0;

foreach (unsigned int device, devices) {
Try<nvmlDevice_t> handle = nvml::deviceGetHandleByIndex(device);
if (handle.isError()) {
return Error("Failed to nvml::deviceGetHandleByIndex: " + handle.error());
}

Try<bool> ismig = nvml::deviceGetMigMode(handle.get());
if (ismig.isError()) {
return Error("Failed to nvml::deviceGetMigMode: " + ismig.error());
}

if (!ismig.get()) {
count++;
continue;
}

Try<unsigned int> migcount = nvml::deviceGetMigDeviceCount(handle.get());
if (migcount.isError()) {
return Error("Failed to nvml::deviceGetMigDeviceCount: " + migcount.error());
}

count += migcount.get();
}

return count;
}


// To determine the proper number of GPU resources to return, we
// need to check both --resources and --nvidia_gpu_devices.
// There are two cases to consider:
Expand Down Expand Up @@ -174,11 +258,6 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return Error("Failed to nvml::initialize: " + initialized.error());
}

Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

// The `Resources` wrapper does not allow us to distinguish between
// a user specifying "gpus:0" in the --resources flag and not
// specifying "gpus" at all. To help with this we short circuit
Expand Down Expand Up @@ -225,9 +304,11 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return Error("'--nvidia_gpu_devices' contains duplicates");
}

if (flags.nvidia_gpu_devices->size() != resources.gpus().get()) {
return Error("'--resources' and '--nvidia_gpu_devices' specify"
" different numbers of GPU devices");
Try<unsigned int> available = countGpuInstancesForDevices(unique);
if (available.isError()) {
return Error("Failed to count all GPU instances for devices"
" specified by --nvidia_gpu_devices: "
+ available.error());
}

if (resources.gpus().get() > available.get()) {
Expand All @@ -238,6 +319,22 @@ static Try<Resources> enumerateGpuResources(const Flags& flags)
return resources;
}

Try<unsigned int> available = nvml::deviceGetCount();
if (available.isError()) {
return Error("Failed to nvml::deviceGetCount: " + available.error());
}

vector<unsigned int> indices;
for (unsigned int i = 0; i < available.get(); ++i) {
indices.push_back(i);
}

available = countGpuInstancesForDevices(indices);
if (available.isError()) {
return Error("Failed to count all GPU instances: "
+ available.error());
}

return Resources::parse(
"gpus",
stringify(available.get()),
Expand Down Expand Up @@ -378,7 +475,15 @@ Future<Nothing> NvidiaGpuAllocator::deallocate(const set<Gpu>& gpus)
bool operator<(const Gpu& left, const Gpu& right)
{
if (left.major == right.major) {
return left.minor < right.minor;
// Either or both aren't MIG, comparing major/minor is enough
if (!left.ismig || !right.ismig || (left.minor != right.minor)) {
return left.minor < right.minor;
}

if (left.gi_minor == right.gi_minor) {
return left.ci_minor < right.ci_minor;
}
return left.gi_minor < right.gi_minor;
}
return left.major < right.major;
}
Expand All @@ -404,7 +509,14 @@ bool operator>=(const Gpu& left, const Gpu& right)

bool operator==(const Gpu& left, const Gpu& right)
{
return left.major == right.major && left.minor == right.minor;
if (left.ismig != right.ismig)
return false;

if (!left.ismig)
return left.major == right.major && left.minor == right.minor;

return left.major == right.major && left.minor == right.minor
&& left.gi_minor == right.gi_minor && left.ci_minor == right.ci_minor;
}


Expand All @@ -416,7 +528,10 @@ bool operator!=(const Gpu& left, const Gpu& right)

ostream& operator<<(ostream& stream, const Gpu& gpu)
{
return stream << gpu.major << '.' << gpu.minor;
if (gpu.ismig)
return stream << gpu.major << '.' << gpu.minor << ':' << gpu.gi_minor << '.' << gpu.ci_minor;
else
return stream << gpu.major << '.' << gpu.minor;
}

} // namespace slave {
Expand Down
7 changes: 7 additions & 0 deletions src/slave/containerizer/mesos/isolators/gpu/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ namespace slave {
// abstraction in terms of it.
struct Gpu
{
// GPU device
unsigned int major;
unsigned int minor;

// MIG support
bool ismig;
unsigned int caps_major;
unsigned int gi_minor;
unsigned int ci_minor;
};


Expand Down
Loading