Skip to content

Commit

Permalink
Merge pull request #29 from coreweave/eta/ubuntu22.04
Browse files Browse the repository at this point in the history
feat: Restore `ubuntu22.04` builds
  • Loading branch information
wbrown authored Dec 8, 2023
2 parents 3ddac2c + fc66cd4 commit 3e0fbc3
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 113 deletions.
16 changes: 2 additions & 14 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,7 @@ on:
cuda-samples-version:
required: true
type: string
hpcx-version:
required: true
type: string
hpcx-nccl-version:
required: true
type: string
hpcx-cuda-version:
required: true
type: string
hpcx-mlnx-ofed:
hpcx-distribution:
required: true
type: string

Expand Down Expand Up @@ -79,10 +70,7 @@ jobs:
CUDA_VERSION_MAJOR=${{ inputs.cuda-version-major }}
TARGET_NCCL_VERSION=${{ inputs.nccl-version }}
CUDA_SAMPLES_VERSION=${{ inputs.cuda-samples-version }}
HPCX_VERSION=${{ inputs.hpcx-version }}
HPCX_NCCL_VERSION=${{ inputs.hpcx-nccl-version }}
HPCX_CUDA_VERSION=${{ inputs.hpcx-cuda-version }}
HPCX_MLNX_OFED=${{ inputs.hpcx-mlnx-ofed }}
HPCX_DISTRIBUTION=${{ inputs.hpcx-distribution }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand Down
25 changes: 5 additions & 20 deletions .github/workflows/ubuntu-20.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ jobs:
cuda-version-major: "11.8"
nccl-version: 2.16.5-1
cuda-samples-version: "11.6"
hpcx-version: "2.14"
hpcx-nccl-version: "2.16"
hpcx-cuda-version: "11"
hpcx-mlnx-ofed: "MLNX_OFED_LINUX-5"
hpcx-distribution: "hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64"

cu120:
uses: ./.github/workflows/build.yml
Expand All @@ -34,10 +31,7 @@ jobs:
cuda-version-major: "12.0"
nccl-version: 2.19.3-1
cuda-samples-version: "12.0"
hpcx-version: "2.16"
hpcx-nccl-version: "2.18"
hpcx-cuda-version: "12"
hpcx-mlnx-ofed: "mlnx_ofed"
hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64"

cu121:
uses: ./.github/workflows/build.yml
Expand All @@ -50,10 +44,7 @@ jobs:
cuda-version-major: "12.1"
nccl-version: 2.18.3-1
cuda-samples-version: "12.1"
hpcx-version: "2.16"
hpcx-nccl-version: "2.18"
hpcx-cuda-version: "12"
hpcx-mlnx-ofed: "mlnx_ofed"
hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64"

cu122:
uses: ./.github/workflows/build.yml
Expand All @@ -66,10 +57,7 @@ jobs:
cuda-version-major: "12.2"
nccl-version: 2.19.3-1
cuda-samples-version: "12.2"
hpcx-version: "2.16"
hpcx-nccl-version: "2.18"
hpcx-cuda-version: "12"
hpcx-mlnx-ofed: "mlnx_ofed"
hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64"

# cu123:
# uses: ./.github/workflows/build.yml
Expand All @@ -82,7 +70,4 @@ jobs:
# cuda-version-major: "12.3"
# nccl-version: 2.19.3-1
# cuda-samples-version: "12.3"
# hpcx-version: "2.17"
# hpcx-nccl-version: "2.19"
# hpcx-cuda-version: "12"
# hpcx-mlnx-ofed: "mlnx_ofed"
# hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64"
79 changes: 57 additions & 22 deletions .github/workflows/ubuntu-22.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,60 @@
#
# Ubuntu 22 builds are disabled for now
#
on:
workflow_dispatch:
push:
paths:
- Dockerfile.ubuntu22
- .github/workflows/ubuntu-22.yml
- .github/workflows/build.yml

jobs:
cu120:
uses: ./.github/workflows/build.yml
with:
folder: .
dockerfile: Dockerfile.ubuntu22
base-image: nvidia/cuda
base-tag: 12.0.1-cudnn8-devel-ubuntu22.04
cuda-version-minor: "12.0.1"
cuda-version-major: "12.0"
nccl-version: 2.19.3-1
cuda-samples-version: "12.0"
hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"

# on:
# workflow_dispatch:
# push:
# paths:
# - Dockerfile.ubuntu22
# - .github/workflows/ubuntu-22.yml
# - .github/workflows/build.yml
cu121:
uses: ./.github/workflows/build.yml
with:
folder: .
dockerfile: Dockerfile.ubuntu22
base-image: nvidia/cuda
base-tag: 12.1.1-cudnn8-devel-ubuntu22.04
cuda-version-minor: "12.1.1"
cuda-version-major: "12.1"
nccl-version: 2.18.3-1
cuda-samples-version: "12.1"
hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"

# jobs:
# build:
# uses: ./.github/workflows/build.yml
# with:
# folder: .
# dockerfile: Dockerfile.ubuntu22
# base-image: nvidia/cuda
# base-tag: 11.7.1-devel-ubuntu22.04
# cuda-version-minor: 11.7.1
# cuda-version-major: 11.7
# nccl-version: 2.14.3-1
# cuda-samples-version: 11.6
cu122:
uses: ./.github/workflows/build.yml
with:
folder: .
dockerfile: Dockerfile.ubuntu22
base-image: nvidia/cuda
base-tag: 12.2.2-cudnn8-devel-ubuntu22.04
cuda-version-minor: "12.2.2"
cuda-version-major: "12.2"
nccl-version: 2.19.3-1
cuda-samples-version: "12.2"
hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"

cu123:
uses: ./.github/workflows/build.yml
with:
folder: .
dockerfile: Dockerfile.ubuntu22
base-image: nvidia/cuda
base-tag: 12.3.1-devel-ubuntu22.04
cuda-version-minor: "12.3.1"
cuda-version-major: "12.3"
nccl-version: 2.19.3-1
cuda-samples-version: "12.3"
hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64"
8 changes: 2 additions & 6 deletions Dockerfile.ubuntu20
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,9 @@ RUN mkdir /tmp/build && \

# HPC-X
# grep + sed is used as a workaround to update hardcoded pkg-config / libtools archive / CMake prefixes
ARG HPCX_VERSION=2.14
ARG HPCX_NCCL_VERSION=2.16
ARG HPCX_CUDA_VERSION=11
ARG HPCX_MLNX_OFED="MLNX_OFED_LINUX-5"
ARG HPCX_DISTRIBUTION="hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64"
RUN cd /tmp && \
export HPCX_DISTRIBUTION="hpcx-v${HPCX_VERSION}-gcc-${HPCX_MLNX_OFED}-ubuntu20.04-cuda${HPCX_CUDA_VERSION}-gdrcopy2-nccl${HPCX_NCCL_VERSION}-x86_64" \
HPCX_DIR="/opt/hpcx" && \
export HPCX_DIR="/opt/hpcx" && \
wget -q -O - http://blobstore.object.ord1.coreweave.com/drivers/${HPCX_DISTRIBUTION}.tbz | tar xjf - && \
grep -IrlF "/build-result/${HPCX_DISTRIBUTION}" ${HPCX_DISTRIBUTION} | xargs -rd'\n' sed -i -e "s:/build-result/${HPCX_DISTRIBUTION}:${HPCX_DIR}:g" && \
mv ${HPCX_DISTRIBUTION} ${HPCX_DIR}
Expand Down
Loading

0 comments on commit 3e0fbc3

Please sign in to comment.