Skip to content

Commit

Permalink
build: Use the correct HPCX_DISTRIBUTION based on architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
Eta0 committed Oct 31, 2024
1 parent 05cfcd3 commit 8d39e97
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 28 deletions.
26 changes: 4 additions & 22 deletions .github/workflows/ubuntu-20.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,6 @@ on:
- .github/workflows/build.yml

jobs:
cu118:
uses: ./.github/workflows/build.yml
secrets:
ORG_BUILDKIT_CLIENT_TOKEN: ${{ secrets.ORG_BUILDKIT_CLIENT_TOKEN }}
BUILDKIT_CONSUMER_DOPPLER_PROJECT: ${{ secrets.BUILDKIT_CONSUMER_DOPPLER_PROJECT }}
BUILDKIT_CONSUMER_DOPPLER_CONFIG: ${{ secrets.BUILDKIT_CONSUMER_DOPPLER_CONFIG }}
BUILDKIT_CONSUMER_ENDPOINT: ${{ secrets.BUILDKIT_CONSUMER_ENDPOINT }}
with:
folder: .
dockerfile: Dockerfile.ubuntu20
base-image: nvidia/cuda
base-tag: 11.8.0-cudnn8-devel-ubuntu20.04
cuda-version-minor: "11.8.0"
cuda-version-major: "11.8"
nccl-version: 2.16.5-1
cuda-samples-version: "11.6"
hpcx-distribution: "hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64"

cu120:
uses: ./.github/workflows/build.yml
secrets:
Expand All @@ -41,7 +23,7 @@ jobs:
cuda-version-major: "12.0"
nccl-version: 2.19.3-1
cuda-samples-version: "12.0"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12"

cu122:
uses: ./.github/workflows/build.yml
Expand All @@ -59,7 +41,7 @@ jobs:
cuda-version-major: "12.2"
nccl-version: 2.21.5-1
cuda-samples-version: "12.2"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12"

cu124:
uses: ./.github/workflows/build.yml
Expand All @@ -77,7 +59,7 @@ jobs:
cuda-version-major: "12.4"
nccl-version: 2.23.4-1
cuda-samples-version: "12.4"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12"

cu126:
uses: ./.github/workflows/build.yml
Expand All @@ -95,4 +77,4 @@ jobs:
cuda-version-major: "12.6"
nccl-version: 2.23.4-1
cuda-samples-version: "12.5"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12"
8 changes: 4 additions & 4 deletions .github/workflows/ubuntu-22.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
cuda-version-major: "12.0"
nccl-version: 2.18.5-1
cuda-samples-version: "12.0"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12"

cu122:
uses: ./.github/workflows/build.yml
Expand All @@ -41,7 +41,7 @@ jobs:
cuda-version-major: "12.2"
nccl-version: 2.23.4-1
cuda-samples-version: "12.2"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12"

cu124:
uses: ./.github/workflows/build.yml
Expand All @@ -59,7 +59,7 @@ jobs:
cuda-version-major: "12.4"
nccl-version: 2.23.4-1
cuda-samples-version: "12.4"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12"

cu126:
uses: ./.github/workflows/build.yml
Expand All @@ -77,4 +77,4 @@ jobs:
cuda-version-major: "12.6"
nccl-version: 2.23.4-1
cuda-samples-version: "12.5"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
hpcx-distribution: "hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12"
5 changes: 4 additions & 1 deletion Dockerfile.ubuntu20
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ RUN apt-get -qq update \

# HPC-X
# grep + sed is used as a workaround to update hardcoded pkg-config / libtools archive / CMake prefixes
ARG HPCX_DISTRIBUTION="hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64"
ARG HPCX_DISTRIBUTION="hpcx-v2.20-gcc-mlnx_ofed-ubuntu20.04-cuda12"
ARG _HPCX_ARCH="${TARGETARCH/amd64/x86_64}"
ARG _HPCX_ARCH="${_HPCX_ARCH/arm64/aarch64}"
ARG HPCX_DISTRIBUTION="${HPCX_DISTRIBUTION}-${_HPCX_ARCH}"
RUN cd /tmp && \
export HPCX_DIR="/opt/hpcx" && \
wget -q -O - https://blobstore.object.ord1.coreweave.com/drivers/${HPCX_DISTRIBUTION}.tbz | tar xjf - && \
Expand Down
5 changes: 4 additions & 1 deletion Dockerfile.ubuntu22
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ RUN apt-get -qq update \

# HPC-X
# grep + sed is used as a workaround to update hardcoded pkg-config / libtools archive / CMake prefixes
ARG HPCX_DISTRIBUTION="hpcx-v2.18-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
ARG HPCX_DISTRIBUTION="hpcx-v2.20-gcc-mlnx_ofed-ubuntu22.04-cuda12"
ARG _HPCX_ARCH="${TARGETARCH/amd64/x86_64}"
ARG _HPCX_ARCH="${_HPCX_ARCH/arm64/aarch64}"
ARG HPCX_DISTRIBUTION="${HPCX_DISTRIBUTION}-${_HPCX_ARCH}"
RUN cd /tmp && \
export HPCX_DIR="/opt/hpcx" && \
wget -q -O - https://blobstore.object.ord1.coreweave.com/drivers/${HPCX_DISTRIBUTION}.tbz | tar xjf - && \
Expand Down

0 comments on commit 8d39e97

Please sign in to comment.