From ef3b668a5e1eeb9af543cf3dbf5981ad973c0b23 Mon Sep 17 00:00:00 2001 From: Rob Ballantyne Date: Wed, 17 Apr 2024 12:17:16 +0100 Subject: [PATCH] Update build process. Fix WEB_TOKEN setting on Vast --- .github/workflows/docker-build.yml | 91 ++++++++++--------- .../opt/ai-dock/bin/build/layer0/amd.sh | 5 +- .../opt/ai-dock/bin/build/layer0/nvidia.sh | 9 +- build/COPY_ROOT/opt/ai-dock/bin/init.sh | 4 +- build/Dockerfile | 7 +- docker-compose.yaml | 7 +- 6 files changed, 63 insertions(+), 60 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 3c0c504..f47d861 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,19 +2,23 @@ name: Docker Build on: workflow_dispatch: - push: - branches: [ "main" ] + #push: + # branches: [ "main" ] env: UBUNTU_VERSION: 22.04 BUILDX_NO_DEFAULT_ATTESTATIONS: 1 - LATEST_CUDA: "null" - LATEST_ROCM: "null" - LATEST_CPU: "null" jobs: cpu-base: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # This is nor as silly as it seems... We will build for ARM later and this will be useful + build: + - {latest: "false"} + steps: - name: Free Space @@ -51,12 +55,14 @@ jobs: name: Set tags run: | img_path="ghcr.io/${{ env.PACKAGE_NAME }}" - ver_tag="cpu-${{ env.UBUNTU_VERSION }}" - if [[ $ver_tag == ${{ env.LATEST_CPU }} ]]; then - TAGS="${img_path}:latest-cpu, ${img_path}:$ver_tag" + base_tag="cpu-${{ env.UBUNTU_VERSION }}" + + if [[ ${{ matrix.build.latest }} == "true" ]]; then + echo "Marking latest" + TAGS="${img_path}:${base_tag}, ${img_path}:latest-cpu" else - TAGS="${img_path}:$ver_tag" + TAGS="${img_path}:${base_tag}" fi echo "TAGS=${TAGS}" >> ${GITHUB_ENV} - @@ -77,15 +83,16 @@ jobs: strategy: fail-fast: false matrix: - cuda: - - 11.8.0 - - 12.1.0 - - 12.2.0 - - 12.3.1 - level: - - "base" - - "runtime" - - "devel" + build: + - {latest: "false", cuda: "11.8.0-base"} + - {latest: "false", cuda: "11.8.0-runtime"} + - {latest: "false", cuda: "11.8.0-devel"} + - {latest: "false", cuda: "12.1.0-base"} + - {latest: "false", cuda: "12.1.0-runtime"} + - {latest: "false", cuda: "12.1.0-devel"} + - {latest: "false", cuda: "12.1.1-base"} + - {latest: "false", cuda: "12.1.1-runtime"} + - {latest: "false", cuda: "12.1.1-devel"} steps: - name: Free Space @@ -122,12 +129,14 @@ jobs: name: Set tags run: | img_path="ghcr.io/${{ env.PACKAGE_NAME }}" - ver_tag="cuda-${{ matrix.cuda }}-${{ matrix.level }}-${{ env.UBUNTU_VERSION }}" - if [[ $ver_tag == ${{ env.LATEST_CUDA }} ]]; then - TAGS="${img_path}:latest, ${img_path}:latest-cuda, ${img_path}:$ver_tag" + base_tag="cuda-${{ matrix.build.cuda }}-${{ env.UBUNTU_VERSION }}" + + if [[ ${{ matrix.build.latest }} == "true" ]]; then + echo "Marking latest" + TAGS="${img_path}:${base_tag}, ${img_path}:latest-cuda, ${img_path}:latest" else - TAGS="${img_path}:$ver_tag" + TAGS="${img_path}:${base_tag}" fi echo "TAGS=${TAGS}" >> ${GITHUB_ENV} - @@ -136,10 +145,9 @@ jobs: with: context: build build-args: | - IMAGE_BASE=nvidia/cuda:${{ matrix.cuda }}-${{ matrix.level }}-ubuntu${{ env.UBUNTU_VERSION }} + IMAGE_BASE=nvidia/cuda:${{ matrix.build.cuda }}-ubuntu${{ env.UBUNTU_VERSION }} XPU_TARGET=NVIDIA_GPU - CUDA_VERSION=${{ matrix.cuda }} - CUDA_LEVEL=${{ matrix.level }} + CUDA_STRING=${{ matrix.build.cuda }} push: true provenance: false tags: ${{ env.TAGS }} @@ -149,15 +157,16 @@ jobs: strategy: fail-fast: false matrix: - rocm: - - "5.4.2" - - "5.6" - - "5.7" - - "6.0.2" - level: - - "core" - - "runtime" - - "devel" + build: + - {latest: "false", rocm: "5.6-core"} + - {latest: "false", rocm: "5.6-runtime"} + - {latest: "false", rocm: "5.6-devel"} + - {latest: "false", rocm: "5.7-core"} + - {latest: "false", rocm: "5.7-runtime"} + - {latest: "false", rocm: "5.7-devel"} + - {latest: "false", rocm: "6.0.2-core"} + - {latest: "false", rocm: "6.0.2-runtime"} + - {latest: "false", rocm: "6.0.2-devel"} steps: - name: Free Space @@ -174,7 +183,6 @@ jobs: name: Env Setter run: | echo "PACKAGE_NAME=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "TAG=rocm-${{ matrix.rocm }}-${{ matrix.level }}-${{ env.UBUNTU_VERSION }}" >> ${GITHUB_ENV} - name: Checkout uses: actions/checkout@v3 @@ -195,12 +203,14 @@ jobs: name: Set tags run: | img_path="ghcr.io/${{ env.PACKAGE_NAME }}" - ver_tag="rocm-${{ matrix.rocm }}-${{ matrix.level }}-${{ env.UBUNTU_VERSION }}" - if [[ $ver_tag == ${{ env.LATEST_ROCM }} ]]; then - TAGS="${img_path}:latest-rocm, ${img_path}:$ver_tag" + base_tag="rocm-${{ matrix.build.rocm }}-${{ env.UBUNTU_VERSION }}" + + if [[ ${{ matrix.build.latest }} == "true" ]]; then + echo "Marking latest" + TAGS="${img_path}:${base_tag}, ${img_path}:latest-rocm" else - TAGS="${img_path}:$ver_tag" + TAGS="${img_path}:${base_tag}" fi echo "TAGS=${TAGS}" >> ${GITHUB_ENV} - @@ -211,8 +221,7 @@ jobs: build-args: | IMAGE_BASE=ubuntu:${{ env.UBUNTU_VERSION }} XPU_TARGET=AMD_GPU - ROCM_VERSION=${{ matrix.rocm }} - ROCM_LEVEL=${{ matrix.level }} + ROCM_STRING=${{ matrix.build.rocm }} push: true provenance: false tags: ${{ env.TAGS }} \ No newline at end of file diff --git a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh index b44f98d..96c7e3c 100755 --- a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh +++ b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh @@ -5,10 +5,11 @@ if [[ -z $ROCM_VERSION ]]; then exit 1 fi -export ROCM_VERSION="${ROCM_VERSION}" +export ROCM_VERSION=$(printf "%s" "$ROCM_STRING" | cut -d'-' -f1) env-store ROCM_VERSION -export ROCM_LEVEL="${ROCM_LEVEL}" +export ROCM_LEVEL=$(printf "%s" "$ROCM_STRING" | cut -d'-' -f2) env-store ROCM_LEVEL + export PATH=/opt/rocm/bin:$PATH env-store PATH diff --git a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/nvidia.sh b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/nvidia.sh index 28a39cb..23bf8b6 100755 --- a/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/nvidia.sh +++ b/build/COPY_ROOT/opt/ai-dock/bin/build/layer0/nvidia.sh @@ -1,10 +1,7 @@ #!/bin/false -export CUDA_VERSION="${CUDA_VERSION}" +export CUDA_VERSION=$(printf "%s" "$CUDA_STRING" | cut -d'-' -f1) env-store CUDA_VERSION -export CUDNN_VERSION="${CUDNN_VERSION}" -env-store CUDNN_VERSION -export CUDA_LEVEL="${CUDA_LEVEL}" +export CUDA_LEVEL=$(printf "%s" "$CUDA_STRING" | cut -d'-' -f2) env-store CUDA_LEVEL -export CUDA_STRING="$(cut -d '.' -f 1,2 <<< "${CUDA_VERSION}")" -env-store CUDA_STRING \ No newline at end of file + diff --git a/build/COPY_ROOT/opt/ai-dock/bin/init.sh b/build/COPY_ROOT/opt/ai-dock/bin/init.sh index d830d9e..2b67812 100755 --- a/build/COPY_ROOT/opt/ai-dock/bin/init.sh +++ b/build/COPY_ROOT/opt/ai-dock/bin/init.sh @@ -126,7 +126,9 @@ init_set_web_credentials() { # Handle cloud provider auto login # Vast.ai if [[ $(env | grep -i vast) && -n $OPEN_BUTTON_TOKEN ]]; then - export WEB_TOKEN="${OPEN_BUTTON_TOKEN}" + if [[ -z $WEB_TOKEN ]]; then + export WEB_TOKEN="${OPEN_BUTTON_TOKEN}" + fi if [[ $WEB_PASSWORD == "password" ]]; then unset WEB_PASSWORD fi diff --git a/build/Dockerfile b/build/Dockerfile index 45d8113..02261b3 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -60,11 +60,8 @@ ENV IMAGE_SLUG="base-image" # Copy early so we can use scripts in the build - Changes to these files will invalidate the cache and cause a rebuild. COPY --chown=0:1111 ./COPY_ROOT/ / -ARG CUDA_VERSION -ARG CUDNN_VERSION -ARG CUDA_LEVEL -ARG ROCM_VERSION -ARG ROCM_LEVEL +ARG CUDA_STRING +ARG ROCM_STRING # Use build scripts to ensure we can build all targets from one Dockerfile in a single layer. # Don't put anything heavy in here - We can use multi-stage building above if necessary. diff --git a/docker-compose.yaml b/docker-compose.yaml index cf3685a..996821f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,11 +9,8 @@ services: IMAGE_BASE: ${IMAGE_BASE:-nvidia/cuda:11.8.0-base-ubuntu22.04} IMAGE_TAG: ${IMAGE_TAG:-cuda-11.8.0-base-22.04} XPU_TARGET: ${XPU_TARGET:-NVIDIA_GPU} - CUDA_VERSION: ${CUDA_VERSION:-11.8.0} - CUDA_LEVEL: ${CUDA_LEVEL:-base} - CUDNN_VERSION: ${CUDNN_VERSION:-8} - ROCM_VERSION: ${ROCM_VERSION:-5.6} - ROCM_LEVEL: ${ROCM_LEVEL:-runtime} + CUDA_STRING: ${CUDA_STRING:-11.8.0-base} + ROCM_STRING: ${ROCM_STRING:-5.7-runtime} tags: - "ghcr.io/ai-dock/base-image:${IMAGE_TAG:-cuda-11.8.0-base-22.04}"