Skip to content

Commit

Permalink
adapt to latest workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
msarahan committed Nov 1, 2024
1 parent 70c386e commit eeac5b0
Showing 1 changed file with 142 additions and 113 deletions.
255 changes: 142 additions & 113 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,59 +10,52 @@ concurrency:
cancel-in-progress: true

env:
# TODO: put this in a shared org-wide secret?
OTEL_SERVICE_NAME: 'pr-cuml'
# TODO: this should be set as an org-wide variable
OTEL_EXPORTER_OTLP_ENDPOINT: https://tempo.gha-runners.nvidia.com:4318
# These are where the secrets in github env vars are written to files. These files don't
# exist unless you explicitly write them in a step.
# The purpose of setting the environment variable is to tell OpenTelemetry tools where to find them.
# We abuse it a bit by also using it as the write destination for the certificate files.
OTEL_EXPORTER_OTLP_CERTIFICATE: "/tmp/certs/ca.crt"
OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE: "/tmp/certs/client.crt"
OTEL_EXPORTER_OTLP_CLIENT_KEY: "/tmp/certs/client.key"
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
OTEL_EXPORTER_OTLP_HEADERS: ${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}
OTEL_RESOURCE_ATTRIBUTES: "git.repository=${{github.repository}},git.ref=${{github.ref}},git.sha=${{github.sha}},git.job_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# Set these to point the shared-actions clones at a branch on the rapidsai/shared-actions repo
SHARED_ACTIONS_REPO: "rapidsai/shared-actions"
SHARED_ACTIONS_REF: "main"

jobs:
telemetry-setup:
reexports:
# Re-export secrets and env vars to make them available to shared actions and workflows
# You can't use env.SOMETHING in a "with:" section when calling a shared workflow.
runs-on: ubuntu-latest
outputs:
start_time: ${{ steps.timestamp.outputs.START_TIME }}
traceparent: ${{ steps.telemetry-setup.outputs.traceparent }}
endpoint: ${{ steps.var-reexports.outputs.endpoint }}
top_level_service_name: ${{ steps.var-reexports.outputs.service_name }}
endpoint: "${{steps.reexport.outputs.endpoint}}"
service_name: "${{steps.reexport.outputs.service_name}}"
otel_resource_attributes: "${{steps.reexport.outputs.otel_resource_attributes}}"
shared_actions_repo: "${{steps.reexport.outputs.shared_actions_repo}}"
shared_actions_ref: "${{steps.reexport.outputs.shared_actions_ref}}"
steps:
- name: Get starting timestamp
id: timestamp
run:
echo "START_TIME=$(date +%s.%N)" >> ${GITHUB_OUTPUT}
- name: Echo endpoint to make it available to shared workflows
id: var-reexports
run: |
echo endpoint="${OTEL_EXPORTER_OTLP_ENDPOINT}" >> ${GITHUB_OUTPUT}
echo service_name="${OTEL_SERVICE_NAME}" >> ${GITHUB_OUTPUT}
- name: Write certificate files for mTLS
run: |
mkdir -p /tmp/certs
cat << EOF > "${OTEL_EXPORTER_OTLP_CERTIFICATE}"
${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }}
EOF
cat << EOF > "${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}"
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }}
EOF
cat << EOF > "${OTEL_EXPORTER_OTLP_CLIENT_KEY}"
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}
EOF
- name: Telemetry setup
id: telemetry-setup
uses: rapidsai/shared-actions/telemetry-traceparent@add-telemetry
- name: Start root span
uses: rapidsai/shared-actions/telemetry-create-span@add-telemetry
with:
name: "root span"
traceparent: ${{steps.telemetry-setup.outputs.traceparent}}
start_time: ${{steps.timestamp.outputs.start_time}}
- id: reexport
# NOTE: certs are base-64 encoded so that they're easier to handle here
run: |
echo endpoint="${OTEL_EXPORTER_OTLP_ENDPOINT}" >> ${GITHUB_OUTPUT}
echo service_name="${OTEL_SERVICE_NAME}" >> ${GITHUB_OUTPUT}
echo otel_resource_attributes="${OTEL_RESOURCE_ATTRIBUTES}" >> ${GITHUB_OUTPUT}
echo shared_actions_ref="${SHARED_ACTIONS_REF}" >> ${GITHUB_OUTPUT}
echo shared_actions_repo="${SHARED_ACTIONS_REPO}" >> ${GITHUB_OUTPUT}
top-level-telemetry-traceparent:
runs-on: ubuntu-latest
needs:
- reexports
outputs:
traceparent: "${{steps.traceparent.outputs.traceparent}}"
env:
OTEL_SERVICE_NAME: ${{needs.reexports.outputs.service_name}}
steps:
- name: Clone shared-actions repo
uses: actions/checkout@v4
with:
repository: ${{env.SHARED_ACTIONS_REPO}}
ref: ${{env.SHARED_ACTIONS_REF}}
path: ./shared-actions
- id: traceparent
uses: ./shared-actions/telemetry-traceparent
pr-builder:
needs:
- changed-files
Expand All @@ -76,10 +69,11 @@ jobs:
- conda-python-tests-dask
- conda-notebook-tests
- docs-build
- telemetry-setup
- wheel-build-cuml
- wheel-tests-cuml
- devcontainer
- reexports
- top-level-telemetry-traceparent
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@add-telemetry
if: always()
Expand Down Expand Up @@ -120,17 +114,20 @@ jobs:
- '!thirdparty/LICENSES/**'
checks:
secrets: inherit
needs: telemetry-setup
needs: [reexports, top-level-telemetry-traceparent]
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@add-telemetry
with:
enable_check_generated_files: false
ignored_pr_jobs: >-
optional-job-conda-python-tests-cudf-pandas-integration
final-telemetry-update
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
clang-tidy:
needs: [checks, telemetry-setup]
needs: [checks, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@add-telemetry
with:
Expand All @@ -139,74 +136,95 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10"
run_script: "ci/run_clang_tidy.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-cpp-build:
needs: [checks, telemetry-setup]
needs: [checks, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@add-telemetry
with:
build_type: pull-request
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-cpp-tests:
needs: [conda-cpp-build, changed-files, telemetry-setup]
needs: [conda-cpp-build, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
with:
build_type: pull-request
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-cpp-checks:
needs: [conda-cpp-build, telemetry-setup]
needs: [conda-cpp-build, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@add-telemetry
with:
build_type: pull-request
enable_check_symbols: true
symbol_exclusions: raft_cutlass
conda-python-build:
needs: [conda-cpp-build, telemetry-setup]
needs: [conda-cpp-build, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@add-telemetry
with:
build_type: pull-request
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-python-tests-singlegpu:
needs: [conda-python-build, changed-files, telemetry-setup]
needs: [conda-python-build, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_singlegpu.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
optional-job-conda-python-tests-cudf-pandas-integration:
needs: [conda-python-build, changed-files, telemetry-setup]
needs: [conda-python-build, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
matrix_filter: map(select(.ARCH == "amd64"))
build_type: pull-request
script: "ci/test_python_integration.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-python-tests-dask:
needs: [conda-python-build, changed-files, telemetry-setup]
needs: [conda-python-build, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_dask.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
conda-notebook-tests:
needs: [conda-python-build, changed-files, telemetry-setup]
needs: [conda-python-build, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
Expand All @@ -216,10 +234,13 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
docs-build:
needs: [conda-python-build, telemetry-setup]
needs: [conda-python-build, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@add-telemetry
with:
Expand All @@ -228,10 +249,13 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
wheel-build-cuml:
needs: [checks, telemetry-setup]
needs: [checks, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry
with:
Expand All @@ -240,59 +264,64 @@ jobs:
extra-repo: rapidsai/cumlprims_mg
extra-repo-sha: branch-24.12
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
wheel-tests-cuml:
needs: [wheel-build-cuml, changed-files, telemetry-setup]
needs: [wheel-build-cuml, changed-files, reexports, top-level-telemetry-traceparent]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: ci/test_wheel.sh
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }}
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
devcontainer:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@add-telemetry
with:
arch: '["amd64"]'
cuda: '["12.5"]'
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
default_endpoint: "${{needs.reexports.outputs.endpoint}}"
otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}"
traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }}
shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}}
shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}}
build_command: |
sccache -z;
build-all --verbose;
sccache -s;
final-telemetry-update:
runs-on: ubuntu-latest
needs: [pr-builder, telemetry-setup]
needs:
- reexports
- top-level-telemetry-traceparent
- pr-builder
steps:
- name: Get final timestamp
id: timestamp
run:
echo "FINAL_TIME=$(date +%s.%N)" >> ${GITHUB_OUTPUT}
# Main purpose of this traceparent line here is to ensure that otel-cli is installed.
- name: Get job traceparent
uses: rapidsai/shared-actions/telemetry-traceparent@add-telemetry
- name: Write certificate files for mTLS
run: |
mkdir -p /tmp/certs
cat << EOF > ${OTEL_EXPORTER_OTLP_CERTIFICATE}
${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }}
EOF
cat << EOF > ${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }}
EOF
cat << EOF > ${OTEL_EXPORTER_OTLP_CLIENT_KEY}
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}
EOF
- name: Update root span with final completion time
if: always()
uses: rapidsai/shared-actions/telemetry-create-span@add-telemetry
with:
service: ${{needs.telemetry-setup.outputs.top_level_service_name}}
name: "end-of-job update"
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}"
traceparent: ${{needs.telemetry-setup.outputs.traceparent}}
start_time: ${{needs.telemetry-setup.outputs.start_time}}
end_time: ${{steps.timestamp.outputs.FINAL_TIME}}
- name: Clone shared-actions repo
uses: actions/checkout@v4
with:
repository: rapidsai/shared-actions
ref: ${{needs.reexports.outputs.shared_actions_ref}}
path: ./shared-actions
- name: Send summary
uses: ./shared-actions/telemetry-summarize
with:
traceparent: ${{needs.top-level-telemetry-traceparent.outputs.traceparent}}
ca_cert: "${{secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE}}"
client_cert: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}}"
client_key: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY}}"
- name: Send root span with final time
uses: ./shared-actions/telemetry-finalize-root-span
with:
traceparent: ${{needs.top-level-telemetry-traceparent.outputs.traceparent}}
ca_cert: "${{secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE}}"
client_cert: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}}"
client_key: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY}}"

0 comments on commit eeac5b0

Please sign in to comment.