Skip to content

Commit

Permalink
Update to CUDA 12.5. (#332)
Browse files Browse the repository at this point in the history
* Update to CUDA 12.5.

* Use cudnn 9.

* Add cuDNN input variable.

* Try to use cuDNN version.

* fix shellcheck lint

* cudnn_version -> cuDNNVersion

* install different packages for cuDNN v8 and v9

* Use CUDNNVERSION=8 by default

---------

Co-authored-by: Paul Taylor <178183+trxcllnt@users.noreply.github.com>
Co-authored-by: ptaylor <paul.e.taylor@me.com>
  • Loading branch information
3 people committed Jul 17, 2024
1 parent fa0901f commit 1bd1bd5
Show file tree
Hide file tree
Showing 18 changed files with 231 additions and 31 deletions.
1 change: 1 addition & 0 deletions .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"features": {
"./features/src/cuda": {
"version": "11.8",
"cuDNNVersion": "8",
"installcuBLAS": true,
"installcuDNN": true,
"installcuSOLVER": true,
Expand Down
1 change: 1 addition & 0 deletions .devcontainer/cuda12.0-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"features": {
"./features/src/cuda": {
"version": "12.0",
"cuDNNVersion": "8",
"installcuBLAS": true,
"installcuDNN": true,
"installcuSOLVER": true,
Expand Down
1 change: 1 addition & 0 deletions .devcontainer/cuda12.2-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"features": {
"./features/src/cuda": {
"version": "12.2",
"cuDNNVersion": "8",
"installcuBLAS": true,
"installcuDNN": true,
"installcuSOLVER": true,
Expand Down
82 changes: 82 additions & 0 deletions .devcontainer/cuda12.5-conda/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/rapids.Dockerfile",
"args": {
"CUDA": "12.5",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:24.08-cpp-mambaforge-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
"./features/src/utils": {},
"./features/src/rapids-build-utils": {}
},
"overrideFeatureInstallOrder": [
"./features/src/utils",
"./features/src/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs,log/devcontainer-utils} ${localWorkspaceFolder}/../{rmm,kvikio,ucxx,cudf,raft,cuvs,cumlprims_mg,cuml,cugraph-ops,wholegraph,cugraph,cuspatial}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/devcontainers,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../rmm,target=/home/coder/rmm,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../kvikio,target=/home/coder/kvikio,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../ucxx,target=/home/coder/ucxx,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cudf,target=/home/coder/cudf,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../raft,target=/home/coder/raft,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuvs,target=/home/coder/cuvs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cumlprims_mg,target=/home/coder/cumlprims_mg,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuml,target=/home/coder/cuml,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cugraph-ops,target=/home/coder/cugraph-ops,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../wholegraph,target=/home/coder/wholegraph,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cugraph,target=/home/coder/cugraph,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuspatial,target=/home/coder/cuspatial,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.log/devcontainer-utils,target=/var/log/devcontainer-utils,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.devcontainer/cuda12.5-conda/features/src/utils/opt/devcontainer/bin,target=/opt/devcontainer/bin,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.devcontainer/cuda12.5-conda/features/src/rapids-build-utils/opt/rapids-build-utils,target=/opt/rapids-build-utils,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"augustocdias.tasks-shell-input",
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
],
"settings": {
"files.watcherExclude": {
"**/build/**": true,
"**/_skbuild/**": true,
"**/target/**": true,
"/home/coder/.aws/**/*": true,
"/home/coder/.cache/**/*": true,
"/home/coder/.conda/**/*": true,
"/home/coder/.local/share/**/*": true,
"/home/coder/.vscode-server/**/*": true
},
"search.exclude": {
"**/build/**": true,
"**/_skbuild/**": true,
"**/*.code-search": true,
"/home/coder/.aws/**/*": true,
"/home/coder/.cache/**/*": true,
"/home/coder/.conda/**/*": true,
"/home/coder/.local/share/**/*": true,
"/home/coder/.vscode-server/**/*": true
}
}
}
}
}
1 change: 1 addition & 0 deletions .devcontainer/cuda12.5-conda/features
91 changes: 91 additions & 0 deletions .devcontainer/cuda12.5-pip/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"build": {
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/rapids.Dockerfile",
"args": {
"CUDA": "12.5",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.5-ucx1.15.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
"./features/src/cuda": {
"version": "12.5",
"cuDNNVersion": "9",
"installcuBLAS": true,
"installcuDNN": true,
"installcuSOLVER": true,
"installcuRAND": true,
"installcuSPARSE": true
},
"./features/src/utils": {},
"./features/src/rapids-build-utils": {}
},
"overrideFeatureInstallOrder": [
"./features/src/cuda",
"./features/src/utils",
"./features/src/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,log/devcontainer-utils} ${localWorkspaceFolder}/../{rmm,kvikio,ucxx,cudf,raft,cuvs,cumlprims_mg,cuml,cugraph-ops,wholegraph,cugraph,cuspatial}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; fi"],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/devcontainers,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../rmm,target=/home/coder/rmm,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../kvikio,target=/home/coder/kvikio,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../ucxx,target=/home/coder/ucxx,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cudf,target=/home/coder/cudf,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../raft,target=/home/coder/raft,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuvs,target=/home/coder/cuvs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cumlprims_mg,target=/home/coder/cumlprims_mg,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuml,target=/home/coder/cuml,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cugraph-ops,target=/home/coder/cugraph-ops,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../wholegraph,target=/home/coder/wholegraph,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cugraph,target=/home/coder/cugraph,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../cuspatial,target=/home/coder/cuspatial,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.log/devcontainer-utils,target=/var/log/devcontainer-utils,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.devcontainer/cuda12.5-pip/features/src/utils/opt/devcontainer/bin,target=/opt/devcontainer/bin,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.devcontainer/cuda12.5-pip/features/src/rapids-build-utils/opt/rapids-build-utils,target=/opt/rapids-build-utils,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"augustocdias.tasks-shell-input",
"ms-python.flake8",
"nvidia.nsight-vscode-edition"
],
"settings": {
"files.watcherExclude": {
"**/build/**": true,
"**/_skbuild/**": true,
"**/target/**": true,
"/home/coder/.aws/**/*": true,
"/home/coder/.cache/**/*": true,
"/home/coder/.conda/**/*": true,
"/home/coder/.local/share/**/*": true,
"/home/coder/.vscode-server/**/*": true
},
"search.exclude": {
"**/build/**": true,
"**/_skbuild/**": true,
"**/*.code-search": true,
"/home/coder/.aws/**/*": true,
"/home/coder/.cache/**/*": true,
"/home/coder/.conda/**/*": true,
"/home/coder/.local/share/**/*": true,
"/home/coder/.vscode-server/**/*": true
}
}
}
}
}
1 change: 1 addition & 0 deletions .devcontainer/cuda12.5-pip/features
2 changes: 1 addition & 1 deletion .github/workflows/build-all-rapids-repos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
pull-requests: read
with:
arch: '["amd64"]'
cuda: '["12.0", "12.2"]'
cuda: '["12.0", "12.5"]'
node_type: cpu32
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
extra-repo-deploy-key-2: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
Expand Down
3 changes: 2 additions & 1 deletion features/src/cuda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ A feature to install the NVIDIA CUDA Toolkit

| Options Id | Description | Type | Default Value |
|-----|-----|-----|-----|
| version | Version of the CUDA Toolkit to install. | string | 12.4 |
| version | Version of the CUDA Toolkit to install. | string | 12.5 |
| cuDNNVersion | Version of cuDNN to install. | string | 9 |
| installCompilers | Install NVIDIA CUDA Compiler (nvcc) | boolean | true |
| installProfilers | Install NVIDIA NSight Systems Profiler (nsys) | boolean | true |
| installCTKLibraries | Shortcut to install all CUDA Toolkit Libraries | boolean | true |
Expand Down
14 changes: 12 additions & 2 deletions features/src/cuda/devcontainer-feature.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"name": "CUDA Toolkit",
"id": "cuda",
"version": "24.8.1",
"version": "24.8.2",
"description": "A feature to install the NVIDIA CUDA Toolkit",
"options": {
"version": {
"type": "string",
"proposals": [
"12.5",
"12.4",
"12.3",
"12.2",
Expand All @@ -21,9 +22,18 @@
"11.2",
"11.1"
],
"default": "12.4",
"default": "12.5",
"description": "Version of the CUDA Toolkit to install."
},
"cuDNNVersion": {
"type": "string",
"proposals": [
"9",
"8"
],
"default": "8",
"description": "Version of cuDNN to install."
},
"installCompilers": {
"type": "boolean",
"default": true,
Expand Down
35 changes: 23 additions & 12 deletions features/src/cuda/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export OSNAME="$(
echo "$ID$((major - (major % 2)))${minor}";
)";

VERSION="${CUDA_VERSION:-${VERSION:-12.4.0}}";
VERSION="${CUDA_VERSION:-${VERSION:-12.5.0}}";

if [[ "$NVARCH" == aarch64 ]]; then
NVARCH="sbsa";
Expand Down Expand Up @@ -75,7 +75,8 @@ echo "Installing dev CUDA toolkit...";
export CUDA_HOME="/usr/local/cuda";

cuda_ver="${VERSION}";
cuda_ver=$(grep -o '^[0-9]*.[0-9]' <<< "${cuda_ver}");
cuda_ver=$(grep -Po '^[0-9]+\.[0-9]+' <<< "${cuda_ver}");
cuda_ver_major=$(grep -Po '^[0-9]+' <<< "${cuda_ver}");

cudapath="${CUDA_HOME}-${cuda_ver}";
cuda_tag="cuda${cuda_ver}";
Expand Down Expand Up @@ -124,13 +125,13 @@ fi

if [ "${INSTALLNVRTC:-false}" = true ]; then
PKGS+=("cuda-nvrtc${dev_tag}-${cuda_ver}");
if test -n "$(apt-cache search libnvjitlink${dev_tag}-${cuda_ver} 2>/dev/null)"; then
if test -n "$(apt-cache search "libnvjitlink${dev_tag}-${cuda_ver}" 2>/dev/null)"; then
PKGS+=("libnvjitlink${dev_tag}-${cuda_ver}");
fi
fi

if [ "${INSTALLOPENCL:-false}" = true ] \
&& test -n "$(apt-cache search cuda-opencl${dev_tag}-${cuda_ver} 2>/dev/null)"; then
&& test -n "$(apt-cache search "cuda-opencl${dev_tag}-${cuda_ver}" 2>/dev/null)"; then
PKGS+=("cuda-opencl${dev_tag}-${cuda_ver}");
fi

Expand All @@ -147,7 +148,7 @@ if [ "${INSTALLCUFFT:-false}" = true ]; then
fi

if [ "${INSTALLCUFILE:-false}" = true ] \
&& test -n "$(apt-cache search libcufile${dev_tag}-${cuda_ver} 2>/dev/null)"; then
&& test -n "$(apt-cache search "libcufile${dev_tag}-${cuda_ver}" 2>/dev/null)"; then
PKGS+=("libcufile${dev_tag}-${cuda_ver}");
fi

Expand All @@ -167,12 +168,21 @@ if [ "${INSTALLNVJPEG:-false}" = true ]; then
PKGS+=("libnvjpeg${dev_tag}-${cuda_ver}");
fi

if [ "${INSTALLCUDNN:-false}" = true ] \
&& test -n "$(apt-cache search libcudnn8 2>/dev/null)" \
&& apt-cache policy libcudnn8 2>/dev/null | grep -q "+${cuda_tag}"; then
PKGS+=("libcudnn8=*+${cuda_tag}");
if [ "${INSTALLDEVPACKAGES:-false}" = true ]; then
PKGS+=("libcudnn8-dev=*+${cuda_tag}");
if [ "${INSTALLCUDNN:-false}" = true ]; then
CUDNNVERSION="${CUDNNVERSION:-8}";
if test "${CUDNNVERSION}" -le 8; then
if test -n "$(apt-cache search "libcudnn${CUDNNVERSION:-8}" 2>/dev/null)" \
&& apt-cache policy "libcudnn${CUDNNVERSION:-8}" 2>/dev/null | grep -q "+${cuda_tag}"; then
PKGS+=("libcudnn${CUDNNVERSION:-8}=*+${cuda_tag}");
if [ "${INSTALLDEVPACKAGES:-false}" = true ]; then
PKGS+=("libcudnn${CUDNNVERSION:-8}-dev=*+${cuda_tag}");
fi
fi
elif test -n "$(apt-cache search "libcudnn${CUDNNVERSION}-cuda-${cuda_ver_major}" 2>/dev/null)"; then
PKGS+=("libcudnn${CUDNNVERSION}-cuda-${cuda_ver_major}");
if [ "${INSTALLDEVPACKAGES:-false}" = true ]; then
PKGS+=("libcudnn${CUDNNVERSION}-dev-cuda-${cuda_ver_major}");
fi
fi
fi

Expand Down Expand Up @@ -206,7 +216,8 @@ if [ "${INSTALLCUTENSOR:-false}" = true ]; then
fi
fi

check_packages ${PKGS[@]};
check_packages "${PKGS[@]}";
apt autoremove -y;

if ! test -L "${CUDA_HOME}"; then
# Create /usr/local/cuda symlink
Expand Down
6 changes: 3 additions & 3 deletions features/test/_global/cpp_llvm_cuda_nvhpc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ source dev-container-features-test-lib;
check "gitlab-cli version" glab --version

# Check CUDA
check "CUDA version" bash -c "echo '$CUDA_VERSION' | grep '12.4.0'";
check "CUDA version" bash -c "echo '$CUDA_VERSION' | grep '12.5.0'";
check "CUDA major version" bash -c "echo '$CUDA_VERSION_MAJOR' | grep '12'";
check "CUDA minor version" bash -c "echo '$CUDA_VERSION_MINOR' | grep '4'";
check "CUDA minor version" bash -c "echo '$CUDA_VERSION_MINOR' | grep '5'";
check "CUDA patch version" bash -c "echo '$CUDA_VERSION_PATCH' | grep '0'";
check "installed" stat /usr/local/cuda-12.4 /usr/local/cuda;
check "installed" stat /usr/local/cuda-12.5 /usr/local/cuda;
check "nvcc exists and is on path" which nvcc;

# Check NVHPC
Expand Down
6 changes: 3 additions & 3 deletions features/test/_global/cuda_rust.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ source dev-container-features-test-lib;
>&2 echo "BASH_ENV=$BASH_ENV";

# Check CUDA
check "CUDA version" bash -c "echo '$CUDA_VERSION' | grep '12.4.0'";
check "CUDA version" bash -c "echo '$CUDA_VERSION' | grep '12.5.0'";
check "CUDA major version" bash -c "echo '$CUDA_VERSION_MAJOR' | grep '12'";
check "CUDA minor version" bash -c "echo '$CUDA_VERSION_MINOR' | grep '4'";
check "CUDA minor version" bash -c "echo '$CUDA_VERSION_MINOR' | grep '5'";
check "CUDA patch version" bash -c "echo '$CUDA_VERSION_PATCH' | grep '0'";
check "installed" stat /usr/local/cuda-12.4 /usr/local/cuda;
check "installed" stat /usr/local/cuda-12.5 /usr/local/cuda;
check "nvcc exists and is on path" which nvcc;

# Check Rust
Expand Down
4 changes: 2 additions & 2 deletions features/test/_global/scenarios.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"features": {
"rust": {},
"cuda": {
"version": "12.4"
"version": "12.5"
}
},
"overrideFeatureInstallOrder": [
Expand Down Expand Up @@ -44,7 +44,7 @@
"version": "16"
},
"cuda": {
"version": "12.4"
"version": "12.5"
},
"nvhpc": {
"version": "24.5"
Expand Down
Loading

0 comments on commit 1bd1bd5

Please sign in to comment.