From 3952e7659647675691ef01ba46325a07366f2ce0 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:44:51 -0700 Subject: [PATCH] Better ensure sccache credentials don't expire during builds (#389) * Bump up default TTL for creds from 8 to 12 hours * Regenerates credentials up to 2 hours before they're set to expire * Restarts the sccache server in the `build-*` scripts to ensure the sccache server has the latest credentials --- .../devcontainer-feature.json | 2 +- .../bin/tmpl/cpp.build.tmpl.sh | 3 +++ .../bin/tmpl/python.build.wheel.tmpl.sh | 3 +++ .../bin/tmpl/python.install.tmpl.sh | 3 +++ features/src/sccache/.bashrc | 1 - .../src/sccache/devcontainer-feature.json | 2 +- features/src/utils/devcontainer-feature.json | 2 +- .../bin/vault/s3/creds/generate.sh | 2 +- .../bin/vault/s3/creds/schedule.sh | 21 ++++++++++--------- .../devcontainer/bin/vault/s3/creds/test.sh | 2 +- 10 files changed, 25 insertions(+), 16 deletions(-) diff --git a/features/src/rapids-build-utils/devcontainer-feature.json b/features/src/rapids-build-utils/devcontainer-feature.json index 5ebacc5a..fe3190f2 100644 --- a/features/src/rapids-build-utils/devcontainer-feature.json +++ b/features/src/rapids-build-utils/devcontainer-feature.json @@ -1,7 +1,7 @@ { "name": "NVIDIA RAPIDS devcontainer build utilities", "id": "rapids-build-utils", - "version": "24.10.8", + "version": "24.10.9", "description": "A feature to install the RAPIDS devcontainer build utilities", "containerEnv": { "BASH_ENV": "/etc/bash.bash_env" diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.build.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.build.tmpl.sh index 4c79bffa..7bd7448e 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.build.tmpl.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp.build.tmpl.sh @@ -17,6 +17,9 @@ build_${CPP_LIB}_cpp() { local -; set -euo pipefail; + # Stop the sccache server in case we need to reload credentials before starting the next build + sccache --stop-server >/dev/null 2>&1 || true; + eval "$(\ PARALLEL_LEVEL=${PARALLEL_LEVEL:-$(nproc --all)} \ MAX_TOTAL_SYSTEM_MEMORY="${MAX_TOTAL_SYSTEM_MEMORY:-${CPP_MAX_TOTAL_SYSTEM_MEMORY}}" \ diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.build.wheel.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.build.wheel.tmpl.sh index 86c89ae2..164bfae0 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.build.wheel.tmpl.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.build.wheel.tmpl.sh @@ -21,6 +21,9 @@ build_${PY_LIB}_python_wheel() { local -; set -euo pipefail; + # Stop the sccache server in case we need to reload credentials before starting the next build + sccache --stop-server >/dev/null 2>&1 || true; + eval "$( \ PARALLEL_LEVEL=${PARALLEL_LEVEL:-$(nproc --all)} \ rapids-get-num-archs-jobs-and-load "$@" \ diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.install.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.install.tmpl.sh index 4e2c94b5..3af8073d 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.install.tmpl.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python.install.tmpl.sh @@ -21,6 +21,9 @@ install_${PY_LIB}_python() { local -; set -euo pipefail; + # Stop the sccache server in case we need to reload credentials before starting the next build + sccache --stop-server >/dev/null 2>&1 || true; + eval "$( \ PARALLEL_LEVEL=${PARALLEL_LEVEL:-$(nproc --all)} \ rapids-get-num-archs-jobs-and-load "$@" \ diff --git a/features/src/sccache/.bashrc b/features/src/sccache/.bashrc index 799d21b2..fa591c23 100644 --- a/features/src/sccache/.bashrc +++ b/features/src/sccache/.bashrc @@ -1,5 +1,4 @@ export RUSTC_WRAPPER=${RUSTC_WRAPPER:-/usr/bin/sccache}; -export SCCACHE_IDLE_TIMEOUT=${SCCACHE_IDLE_TIMEOUT:-32768}; export CMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER:-/usr/bin/sccache}; export CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER:-/usr/bin/sccache}; export CMAKE_CUDA_COMPILER_LAUNCHER=${CMAKE_CUDA_COMPILER_LAUNCHER:-/usr/bin/sccache}; diff --git a/features/src/sccache/devcontainer-feature.json b/features/src/sccache/devcontainer-feature.json index 40d9837d..17f4a3ce 100644 --- a/features/src/sccache/devcontainer-feature.json +++ b/features/src/sccache/devcontainer-feature.json @@ -1,7 +1,7 @@ { "name": "sccache", "id": "sccache", - "version": "24.10.0", + "version": "24.10.1", "description": "A feature to install sccache", "options": { "version": { diff --git a/features/src/utils/devcontainer-feature.json b/features/src/utils/devcontainer-feature.json index 8dcfe7ff..7fb87a98 100644 --- a/features/src/utils/devcontainer-feature.json +++ b/features/src/utils/devcontainer-feature.json @@ -1,7 +1,7 @@ { "name": "devcontainer-utils", "id": "utils", - "version": "24.10.2", + "version": "24.10.3", "description": "A feature to install RAPIDS devcontainer utility scripts", "containerEnv": { "BASH_ENV": "/etc/bash.bash_env" diff --git a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/generate.sh b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/generate.sh index ec21641b..5f6f8185 100755 --- a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/generate.sh +++ b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/generate.sh @@ -75,7 +75,7 @@ Successfully authenticated with vault! ____EOF local -r generated_at="$(date '+%s')"; - local ttl="${VAULT_S3_TTL:-"28800"}"; + local ttl="${VAULT_S3_TTL:-"43200"}"; local uri="${VAULT_S3_URI:-"v1/aws/creds/devs"}"; if grep -qE '^[0-9]$' <<< "${ttl}"; then diff --git a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/schedule.sh b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/schedule.sh index 2d2b4735..3e1f98b2 100755 --- a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/schedule.sh +++ b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/schedule.sh @@ -8,22 +8,23 @@ schedule_s3_creds_refresh() { . devcontainer-utils-debug-output 'devcontainer_utils_debug' 'vault-s3 vault-s3-creds-schedule'; local -r now="$(date '+%s')"; - local ttl="${VAULT_S3_TTL:-"28800"}"; + local ttl="${VAULT_S3_TTL:-"43200"}"; ttl="${ttl%s}"; local -r stamp="$(cat ~/.aws/stamp 2>/dev/null || echo "${now}")"; - local then="$((ttl - (now - stamp)))"; - then="$((then < ttl ? then : ttl))"; - then="$((((then + 59) / 60) * 60))"; + local ttime="$((ttl - (now - stamp)))"; + ttime="$((ttime < ttl ? ttime : ttl))"; + ttime="$((( (ttime + 59) / 60) * 60))"; - # Regenerate if within 5 minutes of keys expiring - if test "${then}" -le 300; then + # Regenerate creds if within `min(ttl / 5, 2hrs)` of keys expiring + local refresh_window="$(((ttl / 5) > 7200 ? 7200 : ttl / 5))"; + + if test "${ttime}" -le "${refresh_window}"; then if devcontainer-utils-vault-s3-creds-generate; then devcontainer-utils-vault-s3-creds-schedule; fi else - # Regenerate 5 minutes before keys expire - then="$((now + then - 300))"; + ttime="$((now + ttime - refresh_window))"; crontab -u "$(whoami)" -r 2>/dev/null || true; @@ -34,7 +35,7 @@ schedule_s3_creds_refresh() { cat <<________EOF | tee -a /var/log/devcontainer-utils/vault-s3-creds-refresh.log $(date --date="@${now}") -Scheduling cron to regerate S3 creds $(date -u --date="@$((then - now))" '+%T') from now. +Scheduling cron to regerate S3 creds $(date -u --date="@$((ttime - now))" '+%T') from now. ________EOF cat <<________EOF | crontab -u "$(whoami)" - @@ -47,7 +48,7 @@ SCCACHE_BUCKET="${SCCACHE_BUCKET:-}" SCCACHE_REGION="${SCCACHE_REGION:-}" VAULT_GITHUB_ORGS="${VAULT_GITHUB_ORGS:-}" AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-}" -$(date --date="@${then}" '+%M %H %d %m %w') \ +$(date --date="@${ttime}" '+%M %H %d %m %w') \ devcontainer-utils-vault-s3-creds-schedule ________EOF diff --git a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/test.sh b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/test.sh index 78ed0682..27e01ba7 100755 --- a/features/src/utils/opt/devcontainer/bin/vault/s3/creds/test.sh +++ b/features/src/utils/opt/devcontainer/bin/vault/s3/creds/test.sh @@ -14,7 +14,7 @@ test_aws_creds() { if test -f ~/.aws/stamp; then local -r now="$(date '+%s')"; local -r stamp="$(cat ~/.aws/stamp)"; - local ttl="${VAULT_S3_TTL:-"28800"}"; + local ttl="${VAULT_S3_TTL:-"43200"}"; if [ $((now - stamp)) -ge "${ttl%s}" ]; then exit 1; fi