diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1affc9e..889d049 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,16 +27,7 @@ on: cuda-samples-version: required: true type: string - hpcx-version: - required: true - type: string - hpcx-nccl-version: - required: true - type: string - hpcx-cuda-version: - required: true - type: string - hpcx-mlnx-ofed: + hpcx-distribution: required: true type: string @@ -79,10 +70,7 @@ jobs: CUDA_VERSION_MAJOR=${{ inputs.cuda-version-major }} TARGET_NCCL_VERSION=${{ inputs.nccl-version }} CUDA_SAMPLES_VERSION=${{ inputs.cuda-samples-version }} - HPCX_VERSION=${{ inputs.hpcx-version }} - HPCX_NCCL_VERSION=${{ inputs.hpcx-nccl-version }} - HPCX_CUDA_VERSION=${{ inputs.hpcx-cuda-version }} - HPCX_MLNX_OFED=${{ inputs.hpcx-mlnx-ofed }} + HPCX_DISTRIBUTION=${{ inputs.hpcx-distribution }} push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/ubuntu-20.yml b/.github/workflows/ubuntu-20.yml index e6f3a50..54f5ae0 100644 --- a/.github/workflows/ubuntu-20.yml +++ b/.github/workflows/ubuntu-20.yml @@ -18,10 +18,7 @@ jobs: cuda-version-major: "11.8" nccl-version: 2.16.5-1 cuda-samples-version: "11.6" - hpcx-version: "2.14" - hpcx-nccl-version: "2.16" - hpcx-cuda-version: "11" - hpcx-mlnx-ofed: "MLNX_OFED_LINUX-5" + hpcx-distribution: "hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64" cu120: uses: ./.github/workflows/build.yml @@ -34,10 +31,7 @@ jobs: cuda-version-major: "12.0" nccl-version: 2.19.3-1 cuda-samples-version: "12.0" - hpcx-version: "2.16" - hpcx-nccl-version: "2.18" - hpcx-cuda-version: "12" - hpcx-mlnx-ofed: "mlnx_ofed" + hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64" cu121: uses: ./.github/workflows/build.yml @@ -50,10 +44,7 @@ jobs: cuda-version-major: "12.1" nccl-version: 2.18.3-1 cuda-samples-version: "12.1" - hpcx-version: "2.16" - hpcx-nccl-version: "2.18" - hpcx-cuda-version: "12" - hpcx-mlnx-ofed: "mlnx_ofed" + hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64" cu122: uses: ./.github/workflows/build.yml @@ -66,10 +57,7 @@ jobs: cuda-version-major: "12.2" nccl-version: 2.19.3-1 cuda-samples-version: "12.2" - hpcx-version: "2.16" - hpcx-nccl-version: "2.18" - hpcx-cuda-version: "12" - hpcx-mlnx-ofed: "mlnx_ofed" + hpcx-distribution: "hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64" # cu123: # uses: ./.github/workflows/build.yml @@ -82,7 +70,4 @@ jobs: # cuda-version-major: "12.3" # nccl-version: 2.19.3-1 # cuda-samples-version: "12.3" -# hpcx-version: "2.17" -# hpcx-nccl-version: "2.19" -# hpcx-cuda-version: "12" -# hpcx-mlnx-ofed: "mlnx_ofed" +# hpcx-distribution: "hpcx-v2.17-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64" diff --git a/Dockerfile.ubuntu20 b/Dockerfile.ubuntu20 index d584ac8..6b0c478 100644 --- a/Dockerfile.ubuntu20 +++ b/Dockerfile.ubuntu20 @@ -60,13 +60,9 @@ RUN mkdir /tmp/build && \ # HPC-X # grep + sed is used as a workaround to update hardcoded pkg-config / libtools archive / CMake prefixes -ARG HPCX_VERSION=2.14 -ARG HPCX_NCCL_VERSION=2.16 -ARG HPCX_CUDA_VERSION=11 -ARG HPCX_MLNX_OFED="MLNX_OFED_LINUX-5" +ARG HPCX_DISTRIBUTION="hpcx-v2.14-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.16-x86_64" RUN cd /tmp && \ - export HPCX_DISTRIBUTION="hpcx-v${HPCX_VERSION}-gcc-${HPCX_MLNX_OFED}-ubuntu20.04-cuda${HPCX_CUDA_VERSION}-gdrcopy2-nccl${HPCX_NCCL_VERSION}-x86_64" \ - HPCX_DIR="/opt/hpcx" && \ + export HPCX_DIR="/opt/hpcx" && \ wget -q -O - http://blobstore.object.ord1.coreweave.com/drivers/${HPCX_DISTRIBUTION}.tbz | tar xjf - && \ grep -IrlF "/build-result/${HPCX_DISTRIBUTION}" ${HPCX_DISTRIBUTION} | xargs -rd'\n' sed -i -e "s:/build-result/${HPCX_DISTRIBUTION}:${HPCX_DIR}:g" && \ mv ${HPCX_DISTRIBUTION} ${HPCX_DIR}