-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
In this PR, I've restructured Relay's GoCD pipelines to separate Processing And PoPs into distinct pipelines. As we previously discussed, this change aims to improve efficiency and robustness of Relay's deployment process. Key enhancements include: * Addition of canary deployments for both Processing and PoPs, providing an early warning system for issues in our deployment process * Introduction of soak-time stage with Sentry and DataDog checks, allowing us to detect and address issues early on and stop problematic deploys from rolling out further. * Inclusion of dedicated rollback pipelines for both Processing and PoPs, increasing the speed at which we can mitigate issues. #skip-changelog
- Loading branch information
1 parent
d52ae94
commit 41abecb
Showing
19 changed files
with
577 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
/devinfra/scripts/checks/datadog/monitor_status.py \ | ||
${DATADOG_MONITOR_IDS} \ | ||
--skip-check=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
# shellcheck disable=SC2206 | ||
project_ids=(${SENTRY_PROJECT_IDS}) | ||
# shellcheck disable=SC2206 | ||
project_slugs=(${SENTRY_PROJECTS}) | ||
|
||
|
||
if [ ${##project_ids[@]} -ne ${##project_slugs[@]} ]; then | ||
echo "Error: SENTRY_PROJECT_IDS and SENTRY_PROJECTS must have the same number of elements" | ||
exit 1 | ||
fi | ||
|
||
for i in "${!project_ids[@]}"; do | ||
/devinfra/scripts/checks/sentry/release_error_events.py \ | ||
--project-id="${project_ids[i]}" \ | ||
--project-slug="${project_slugs[i]}" \ | ||
--release="relay@${GO_REVISION_GETSENTRY_REPO}" \ | ||
--duration=5 \ | ||
--error-events-limit="${ERROR_LIMIT}" \ | ||
--dry-run="${DRY_RUN}" \ | ||
--single-tenant="${SENTRY_SINGLE_TENANT}" \ | ||
--skip-check="${SKIP_CANARY_CHECKS}" \ | ||
--sentry-base="${SENTRY_BASE}" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
|
||
# shellcheck disable=SC2206 | ||
project_ids=(${SENTRY_PROJECT_IDS}) | ||
# shellcheck disable=SC2206 | ||
project_slugs=(${SENTRY_PROJECTS}) | ||
|
||
if [ ${##project_ids[@]} -ne ${##project_slugs[@]} ]; then | ||
echo "Error: SENTRY_PROJECT_IDS and SENTRY_PROJECTS must have the same number of elements" | ||
exit 1 | ||
fi | ||
|
||
for i in "${!project_ids[@]}"; do | ||
/devinfra/scripts/checks/sentry/release_new_issues.py \ | ||
--project-id="${project_ids[i]}" \ | ||
--project-slug="${project_slugs[i]}" \ | ||
--release="relay@${GO_REVISION_GETSENTRY_REPO}" \ | ||
--new-issues-limit=0 \ | ||
--dry-run="${DRY_RUN}" \ | ||
--single-tenant="${SENTRY_SINGLE_TENANT}" \ | ||
--skip-check="${SKIP_CANARY_CHECKS}" \ | ||
--sentry-base="${SENTRY_BASE}" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
eval $(/devinfra/scripts/regions/project_env_vars.py --region="${SENTRY_REGION}") | ||
|
||
/devinfra/scripts/k8s/k8stunnel | ||
|
||
/devinfra/scripts/k8s/k8s-deploy.py \ | ||
--label-selector="service=relay-pop,env=canary" \ | ||
--image="us.gcr.io/sentryio/relay-pop:${GO_REVISION_RELAY_REPO}" \ | ||
--container-name="relay" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
eval $(/devinfra/scripts/regions/project_env_vars.py --region="${SENTRY_REGION}") | ||
|
||
/devinfra/scripts/k8s/k8stunnel | ||
|
||
/devinfra/scripts/k8s/k8s-deploy.py \ | ||
--label-selector="service=relay,env=canary" \ | ||
--image="us.gcr.io/sentryio/relay:${GO_REVISION_RELAY_REPO}" \ | ||
--container-name="relay" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
eval $(/devinfra/scripts/regions/project_env_vars.py --region="${SENTRY_REGION}") | ||
|
||
/devinfra/scripts/k8s/k8stunnel | ||
|
||
/devinfra/scripts/k8s/k8s-deploy.py \ | ||
--label-selector="service=relay" \ | ||
--image="us.gcr.io/sentryio/relay:${GO_REVISION_RELAY_REPO}" \ | ||
--container-name="relay" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
gocd-pause-current-pipeline \ | ||
--pause-message="${PAUSE_MESSAGE}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
# Wait for 5 minutes | ||
sleep 300 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
# Wait for 5 minutes | ||
sleep 300 |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
local gocdtasks = import 'github.com/getsentry/gocd-jsonnet/libs/gocd-tasks.libsonnet'; | ||
|
||
{ | ||
pause_on_failure(): { | ||
plugin: { | ||
options: gocdtasks.script(importstr '../bash/pause-current-pipeline.sh'), | ||
run_if: 'failed', | ||
configuration: { | ||
id: 'script-executor', | ||
version: 1, | ||
}, | ||
}, | ||
}, | ||
github_checks(): [ | ||
{ | ||
checks: { | ||
fetch_materials: true, | ||
jobs: { | ||
checks: { | ||
environment_variables: { | ||
GITHUB_TOKEN: '{{SECRET:[devinfra-github][token]}}', | ||
}, | ||
timeout: 1800, | ||
elastic_profile_id: 'relay', | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/github-check-runs.sh'), | ||
], | ||
}, | ||
}, | ||
}, | ||
}, | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
local utils = import '../libs/utils.libsonnet'; | ||
local gocdtasks = import 'github.com/getsentry/gocd-jsonnet/libs/gocd-tasks.libsonnet'; | ||
|
||
local canary_region_pops = { | ||
de: [], | ||
// TODO: Check that these are right | ||
us: ['us-pop-1', 'us-pop-regional-1'], | ||
}; | ||
|
||
local region_pops = { | ||
de: [ | ||
'de-pop-1', | ||
'de-pop-2', | ||
], | ||
us: [ | ||
'us-pop-1', | ||
'us-pop-2', | ||
'us-pop-3', | ||
'us-pop-4', | ||
'us-pop-regional-1', | ||
'us-pop-regional-2', | ||
'us-pop-regional-3', | ||
'us-pop-regional-4', | ||
], | ||
}; | ||
|
||
// The purpose of this stage is to let the deployment soak for a while and | ||
// detect any issues that might have been introduced. | ||
local soak_time(region) = | ||
if region == 's4s' || region == 'us' then | ||
[ | ||
{ | ||
'soak-time': { | ||
jobs: { | ||
soak: { | ||
environment_variables: { | ||
SENTRY_REGION: region, | ||
GOCD_ACCESS_TOKEN: '{{SECRET:[devinfra][gocd_access_token]}}', | ||
SENTRY_AUTH_TOKEN: '{{SECRET:[devinfra-sentryio][token]}}', | ||
DATADOG_API_KEY: '{{SECRET:[devinfra][sentry_datadog_api_key]}}', | ||
DATADOG_APP_KEY: '{{SECRET:[devinfra][sentry_datadog_app_key]}}', | ||
// Datadog monitor IDs for the soak time | ||
DATADOG_MONITOR_IDS: '137575470 22592147 27804625 22634395 22635255', | ||
SENTRY_PROJECTS: if region == 's4s' then 'sentry-for-sentry' else 'pop-relay relay', | ||
SENTRY_PROJECT_IDS: if region == 's4s' then '1513938' else '9 4', | ||
SENTRY_SINGLE_TENANT: if region == 's4s' then 'true' else 'false', | ||
SENTRY_BASE: if region == 's4s' then 'https://sentry.io/api/0' else 'https://sentry.my.sentry.io/api/0', | ||
// TODO: Set a proper error limit | ||
ERROR_LIMIT: 500, | ||
PAUSE_MESSAGE: 'Detecting issues in the deployment. Pausing pipeline.', | ||
// TODO: Switch dry run to false once we're confident in the soak time | ||
DRY_RUN: 'true', | ||
}, | ||
elastic_profile_id: 'relay-pop', | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/wait-soak.sh'), | ||
gocdtasks.script(importstr '../bash/check-sentry-errors.sh'), | ||
gocdtasks.script(importstr '../bash/check-sentry-new-errors.sh'), | ||
gocdtasks.script(importstr '../bash/check-datadog-status.sh'), | ||
utils.pause_on_failure(), | ||
], | ||
}, | ||
}, | ||
}, | ||
}, | ||
] | ||
else | ||
[]; | ||
|
||
// Create a gocd job that will run the deploy-pop-canary script, | ||
// wait for a few minutes, and check the status of the canary deployment. | ||
local deploy_pop_canary_job(region) = | ||
{ | ||
timeout: 1200, | ||
elastic_profile_id: 'relay-pop', | ||
environment_variables: { | ||
SENTRY_REGION: region, | ||
GOCD_ACCESS_TOKEN: '{{SECRET:[devinfra][gocd_access_token]}}', | ||
SENTRY_AUTH_TOKEN: '{{SECRET:[devinfra-sentryio][token]}}', | ||
DATADOG_API_KEY: '{{SECRET:[devinfra][sentry_datadog_api_key]}}', | ||
DATADOG_APP_KEY: '{{SECRET:[devinfra][sentry_datadog_app_key]}}', | ||
// Datadog monitor IDs for the canary deployment | ||
DATADOG_MONITOR_IDS: '137575470 22592147 27804625 22634395 22635255', | ||
SENTRY_PROJECTS: 'pop-relay relay', | ||
SENTRY_PROJECT_IDS: '9 4', | ||
SENTRY_SINGLE_TENANT: 'false', | ||
SENTRY_BASE: 'https://sentry.my.sentry.io/api/0', | ||
// TODO: Set a proper error limit | ||
ERROR_LIMIT: 500, | ||
PAUSE_MESSAGE: 'Pausing pipeline due to canary failure.', | ||
// TODO: Switch dry run to false once we're confident in the soak time | ||
DRY_RUN: 'true', | ||
}, | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/deploy-pop-canary.sh'), | ||
gocdtasks.script(importstr '../bash/wait-canary.sh'), | ||
gocdtasks.script(importstr '../bash/check-sentry-errors.sh'), | ||
gocdtasks.script(importstr '../bash/check-sentry-new-errors.sh'), | ||
gocdtasks.script(importstr '../bash/check-datadog-status.sh'), | ||
utils.pause_on_failure(), | ||
], | ||
}; | ||
|
||
// Create a gocd job that will run the deploy-pop script | ||
local deploy_pop_job(region) = | ||
{ | ||
timeout: 1200, | ||
elastic_profile_id: 'relay-pop', | ||
environment_variables: { | ||
SENTRY_REGION: region, | ||
}, | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/deploy-pop.sh'), | ||
], | ||
}; | ||
|
||
// Iterate over a list of regions and create a job for each | ||
local deploy_jobs(regions, deploy_job, partition='-') = | ||
{ | ||
['deploy-primary' + partition + region]: deploy_job(region) | ||
for region in regions | ||
}; | ||
|
||
// The purpose of this stage is to deploy a canary to all canary PoPs for a given region | ||
// and wait for a few minutes to see if there are any issues. | ||
local deploy_canary_pops_stage(region) = | ||
{ | ||
'deploy-canary': { | ||
fetch_materials: true, | ||
jobs: { | ||
create_sentry_release: { | ||
timeout: 1200, | ||
elastic_profile_id: 'relay', | ||
environment_variables: { | ||
SENTRY_ORG: 'sentry', | ||
SENTRY_PROJECT: 'pop-relay', | ||
SENTRY_URL: 'https://sentry.my.sentry.io/', | ||
// Temporary; self-service encrypted secrets aren't implemented yet. | ||
// This should really be rotated to an internal integration token. | ||
SENTRY_AUTH_TOKEN: '{{SECRET:[devinfra-temp][relay_sentry_auth_token]}}', | ||
SENTRY_ENVIRONMENT: 'canary', | ||
}, | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/create-sentry-relay-release.sh'), | ||
], | ||
}, | ||
}, | ||
}, | ||
} { | ||
'deploy-canary'+: { | ||
fetch_materials: true, | ||
jobs+: deploy_jobs( | ||
[region] + canary_region_pops[region], | ||
deploy_pop_canary_job, | ||
'-canary-', | ||
), | ||
}, | ||
}; | ||
|
||
// The purpose of this stage is to deploy to all PoPs for a given region as well | ||
// as create a sentry release. | ||
local deploy_pops_stage(region) = | ||
{ | ||
'deploy-primary': { | ||
fetch_materials: true, | ||
jobs: { | ||
// PoPs have their own Sentry project, which requires separate symbol upload via | ||
// create-sentry-release. They could be moved into the same project with a different | ||
// environment to avoid this. | ||
create_sentry_release: { | ||
timeout: 1200, | ||
elastic_profile_id: 'relay', | ||
environment_variables: { | ||
SENTRY_ORG: if region == 's4s' then 'sentry-st' else 'sentry', | ||
SENTRY_PROJECT: if region == 's4s' then 'sentry-for-sentry' else 'pop-relay', | ||
SENTRY_URL: if region == 's4s' then 'https://sentry-st.sentry.io/' else 'https://sentry.my.sentry.io/', | ||
// Temporary; self-service encrypted secrets aren't implemented yet. | ||
// This should really be rotated to an internal integration token. | ||
SENTRY_AUTH_TOKEN: if region == 's4s' then '{{SECRET:[devinfra-temp][relay_sentry_st_auth_token]}}' else '{{SECRET:[devinfra-temp][relay_sentry_auth_token]}}', | ||
}, | ||
tasks: [ | ||
gocdtasks.script(importstr '../bash/create-sentry-relay-pop-release.sh'), | ||
], | ||
}, | ||
}, | ||
}, | ||
} { | ||
'deploy-primary'+: { | ||
jobs+: deploy_jobs( | ||
[region] + region_pops[region], | ||
deploy_pop_job, | ||
), | ||
}, | ||
}; | ||
|
||
// The purpose of this stage is to deploy to a single PoP for a given region. | ||
local deploy_generic_pops_stage(region) = | ||
{ | ||
'deploy-primary': { | ||
fetch_materials: true, | ||
jobs: { | ||
['deploy-primary-' + region]: deploy_pop_job(region), | ||
}, | ||
}, | ||
}; | ||
|
||
// The US region deploys create a sentry release and deploys to a number | ||
// of clusters, other regions only deploy to a single cluster. | ||
local deployment_stages(region) = | ||
if region == 'us' || region == 'de' then | ||
// The canary stage is only for the US and DE regions | ||
[deploy_canary_pops_stage(region), deploy_pops_stage(region)] | ||
else | ||
[deploy_generic_pops_stage(region)]; | ||
|
||
|
||
function(region) { | ||
environment_variables: { | ||
SENTRY_REGION: region, | ||
}, | ||
group: 'relay-pops-next', | ||
lock_behavior: 'unlockWhenFinished', | ||
materials: { | ||
relay_repo: { | ||
git: 'git@github.com:getsentry/relay.git', | ||
shallow_clone: true, | ||
branch: 'master', | ||
destination: 'relay', | ||
}, | ||
}, | ||
stages: utils.github_checks() + deployment_stages(region) + soak_time(region), | ||
} |
Oops, something went wrong.