Skip to content

Commit

Permalink
CDI: Add mounts, mount-nvidia-binaries and `mount-nvidia-docker-1…
Browse files Browse the repository at this point in the history
…-directories`

Add three options to `hardware.nvidia-container-toolkit`:

- `mounts`: list of  mounts that allow to mount arbitrary paths on the
CDI enabled containers.

- `mount-nvidia-binaries`: this option allows users to avoid mounting
nvidia binaries on the container.

- `mount-nvidia-docker-1-directories`: this option allows users to
avoid mounting `/usr/local/nvidia/lib{,64}` on containers.

Remove `cdi.static` and `cdi.dynamic.nvidia.enable` attributes.
  • Loading branch information
ereslibre committed Apr 18, 2024
1 parent 6080721 commit 7ad83fc
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 111 deletions.
4 changes: 1 addition & 3 deletions nixos/doc/manual/release-notes/rl-2405.section.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ Use `services.pipewire.extraConfig` or `services.pipewire.configPackages` for Pi

- The default dbus implementation has transitioned to dbus-broker from the classic dbus daemon for better performance and reliability. Users can revert to the classic dbus daemon by setting `services.dbus.implementation = "dbus";`. For detailed deviations, refer to [dbus-broker's deviations page](https://github.com/bus1/dbus-broker/wiki/Deviations).

- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI).

- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `hardware.nvidia-container-toolkit.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.

- A new option `system.etc.overlay.enable` was added. If enabled, `/etc` is
mounted via an overlayfs instead of being created by a custom perl script.
Expand Down
2 changes: 1 addition & 1 deletion nixos/modules/module-list.nix
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@
./services/hardware/kanata.nix
./services/hardware/lcd.nix
./services/hardware/lirc.nix
./services/hardware/nvidia-container-toolkit-cdi-generator
./services/hardware/nvidia-container-toolkit
./services/hardware/monado.nix
./services/hardware/nvidia-optimus.nix
./services/hardware/openrgb.nix
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
glibc,
jq,
lib,
mounts,
nvidia-container-toolkit,
nvidia-driver,
runtimeShell,
writeScriptBin,
}: let
mkMount = {hostPath, containerPath, mountOptions}: {
inherit hostPath containerPath;
options = mountOptions;
};
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
allJqMounts = lib.concatMap
(mount:
["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"])
mounts;
in
writeScriptBin "nvidia-cdi-generator"
''
#! ${runtimeShell}
function cdiGenerate {
${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \
--format json \
--ldconfig-path ${lib.getExe' glibc "ldconfig"} \
--library-search-path ${lib.getLib nvidia-driver}/lib \
--nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}
}
cdiGenerate | \
${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''
122 changes: 122 additions & 0 deletions nixos/modules/services/hardware/nvidia-container-toolkit/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
{ config, lib, pkgs, ... }:

{
imports = [
(lib.mkRenamedOptionModule
[ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ]
[ "hardware" "nvidia-container-toolkit" "enable" ])
];

options = let
mountType = {
options = {
hostPath = lib.mkOption {
type = lib.types.str;
description = lib.mdDoc "Host path.";
};
containerPath = lib.mkOption {
type = lib.types.str;
description = lib.mdDoc "Container path.";
};
mountOptions = lib.mkOption {
default = [ "ro" "nosuid" "nodev" "bind" ];
type = lib.types.listOf lib.types.str;
description = lib.mdDoc "Mount options.";
};
};
};
in {

hardware.nvidia-container-toolkit = {
enable = lib.mkOption {
default = false;
type = lib.types.bool;
description = lib.mdDoc ''
Enable dynamic CDI configuration for NVidia devices by running
nvidia-container-toolkit on boot.
'';
};

mounts = lib.mkOption {
type = lib.types.listOf (lib.types.submodule mountType);
default = [];
description = lib.mdDoc "Mounts to be added to every container under the Nvidia CDI profile.";
};

mount-nvidia-executables = lib.mkOption {
default = true;
type = lib.types.bool;
description = lib.mdDoc ''
Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server,
nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers.
'';
};

mount-nvidia-docker-1-directories = lib.mkOption {
default = true;
type = lib.types.bool;
description = lib.mdDoc ''
Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and
/usr/local/nvidia/lib64.
'';
};

};

};

config = {

hardware.nvidia-container-toolkit.mounts = let
nvidia-driver = config.hardware.nvidia.package;
in (lib.mkMerge [
[{ hostPath = pkgs.addDriverRunpath.driverLink;
containerPath = pkgs.addDriverRunpath.driverLink; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib";
containerPath = "${lib.getLib pkgs.glibc}/lib"; }
{ hostPath = "${lib.getLib pkgs.glibc}/lib64";
containerPath = "${lib.getLib pkgs.glibc}/lib64"; }]
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables
[{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
containerPath = "/usr/bin/nvidia-debugdump"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
containerPath = "/usr/bin/nvidia-powerd"; }
{ hostPath = lib.getExe' nvidia-driver "nvidia-smi";
containerPath = "/usr/bin/nvidia-smi"; }])
# nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
# e.g.
# - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
# - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories
[{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib"; }
{ hostPath = "${lib.getLib nvidia-driver}/lib";
containerPath = "/usr/local/nvidia/lib64"; }])
]);

systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit.enable {
description = "Container Device Interface (CDI) for Nvidia generator";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
ExecStart =
let
script = pkgs.callPackage ./cdi-generate.nix {
inherit (config.hardware.nvidia-container-toolkit) mounts;
nvidia-driver = config.hardware.nvidia.package;
};
in
lib.getExe script;
Type = "oneshot";
};
};

};

}
49 changes: 2 additions & 47 deletions nixos/modules/virtualisation/containers.nix
Original file line number Diff line number Diff line change
Expand Up @@ -28,43 +28,6 @@ in
description = "Enable the OCI seccomp BPF hook";
};

cdi = {
dynamic.nvidia.enable = mkOption {
type = types.bool;
default = false;
description = ''
Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot.
'';
};

static = mkOption {
type = types.attrs;
default = { };
description = ''
Declarative CDI specification. Each key of the attribute set
will be mapped to a file in /etc/cdi. It is required for every
key to be provided in JSON format.
'';
example = {
some-vendor = builtins.fromJSON ''
{
"cdiVersion": "0.5.0",
"kind": "some-vendor.com/foo",
"devices": [],
"containerEdits": []
}
'';

some-other-vendor = {
cdiVersion = "0.5.0";
kind = "some-other-vendor.com/bar";
devices = [];
containerEdits = [];
};
};
};
};

containersConf.settings = mkOption {
type = toml.type;
default = { };
Expand Down Expand Up @@ -150,8 +113,6 @@ in

config = lib.mkIf cfg.enable {

hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true;

virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ];

virtualisation.containers.containersConf.settings = {
Expand All @@ -163,13 +124,7 @@ in
};
};

environment.etc = let
cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs'
(name: value:
lib.attrsets.nameValuePair "cdi/${name}.json"
{ text = builtins.toJSON value; })
cfg.cdi.static);
in {
environment.etc = {
"containers/containers.conf".source =
toml.generate "containers.conf" cfg.containersConf.settings;

Expand All @@ -183,7 +138,7 @@ in
"containers/policy.json".source =
if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
else "${pkgs.skopeo.policy}/default-policy.json";
} // cdiStaticConfigurationFiles;
};

};

Expand Down

0 comments on commit 7ad83fc

Please sign in to comment.