Skip to content

Commit

Permalink
v1.9.3
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyorlando authored Aug 19, 2024
2 parents 6d922f6 + 4872588 commit 39cb6f2
Show file tree
Hide file tree
Showing 23 changed files with 66 additions and 248 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,14 @@ jobs:
git clone https://x-access-token:${{ steps.generate-token.outputs.token }}@github.com/grafana/ops-devenv.git
git clone https://x-access-token:${{ steps.generate-token.outputs.token }}@github.com/grafana/gops-labels.git
- name: Tilt CI - standard and expensive E2E tests
- name: Tilt CI - Expensive E2E tests
if: inputs.run-expensive-tests
shell: bash
env:
E2E_TESTS_CMD: "cd ../../grafana-plugin && yarn test:e2e-expensive"
GRAFANA_VERSION: ${{ inputs.grafana_version }}
GF_FEATURE_TOGGLES_ENABLE: "externalServiceAccounts"
ONCALL_API_URL: "http://oncall-dev-engine:8080"
GRAFANA_ADMIN_USERNAME: "irm"
GRAFANA_ADMIN_PASSWORD: "irm"
BROWSERS: ${{ inputs.browsers }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/expensive-e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
# - 9.3.16
# - 9.4.13
# - 9.5.7
- 10.0.11
- 10.1.7
- 10.3.3
# TODO: fix issues with running e2e tests against Grafana v10.2.x and v10.3.x
# - 10.2.4
# - latest
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
#
- uses: slackapi/slack-github-action@v1.24.0
with:
channel-id: gops-oncall-dev
channel-id: gops-irm-dev
# yamllint disable rule:line-length
payload: |
{
Expand Down
13 changes: 10 additions & 3 deletions .github/workflows/linting-and-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,16 @@ jobs:
end-to-end-tests:
name: Standard e2e tests
uses: ./.github/workflows/e2e-tests.yml
strategy:
matrix:
grafana_version:
- 10.1.7
- 10.3.3
# TODO: fix issues with running e2e tests against Grafana v10.2.x and latest
# - 10.2.4
# - latest
fail-fast: false
with:
# TODO: fix issues with running e2e tests against Grafana v10.2.x and v10.3.x
grafana_version: 10.1.7
# grafana_version: 10.3.3
grafana_version: ${{ matrix.grafana_version }}
run-expensive-tests: false
browsers: "chromium"
11 changes: 9 additions & 2 deletions Tiltfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
load('ext://uibutton', 'cmd_button', 'location', 'text_input', 'bool_input')
load("ext://configmap", "configmap_create")

grafana_url = os.getenv("GRAFANA_URL", "http://grafana:3000")
running_under_parent_tiltfile = os.getenv("TILT_PARENT", "false") == "true"
twilio_values=[
"oncall.twilio.accountSid=" + os.getenv("TWILIO_ACCOUNT_SID", ""),
Expand Down Expand Up @@ -29,6 +30,14 @@ def plugin_json():
return plugin_file
return 'NOT_A_PLUGIN'

def extra_env():
return {
"GF_APP_URL": grafana_url,
"GF_SERVER_ROOT_URL": grafana_url,
"GF_FEATURE_TOGGLES_ENABLE": "externalServiceAccounts",
"ONCALL_API_URL": "http://oncall-dev-engine:8080"
}


allow_k8s_contexts(["kind-kind"])

Expand Down Expand Up @@ -83,8 +92,6 @@ def load_grafana():
# The user/pass that you will login to Grafana with
grafana_admin_user_pass = os.getenv("GRAFANA_ADMIN_USER_PASS", "oncall")
grafana_version = os.getenv("GRAFANA_VERSION", "latest")
grafana_url = os.getenv("GRAFANA_URL", "http://grafana:3000")


if 'plugin' in profiles:
k8s_resource(
Expand Down
2 changes: 1 addition & 1 deletion engine/apps/alerts/models/alert_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
resolved_by_user: typing.Optional["User"]
root_alert_group: typing.Optional["AlertGroup"]
silenced_by_user: typing.Optional["User"]
slack_log_message: typing.Optional["SlackMessage"]
slack_messages: "RelatedManager['SlackMessage']"
users: "RelatedManager['User']"
labels: "RelatedManager['AlertGroupAssociatedLabel']"
Expand Down Expand Up @@ -396,6 +395,7 @@ def status(self) -> int:
related_name="wiped_alert_groups",
)

# TODO: drop this column in future release
slack_log_message = models.OneToOneField(
"slack.SlackMessage",
on_delete=models.SET_NULL,
Expand Down
4 changes: 0 additions & 4 deletions engine/apps/alerts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
AlertGroupSlackRepresentative.on_alert_group_action_triggered,
)

alert_group_update_log_report_signal.connect(
AlertGroupSlackRepresentative.on_alert_group_update_log_report,
)

alert_group_update_resolution_note_signal.connect(
AlertGroupSlackRepresentative.on_alert_group_update_resolution_note,
)
Expand Down
40 changes: 1 addition & 39 deletions engine/apps/slack/representatives/alert_group_representative.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,7 @@ def on_alert_group_action_triggered_async(log_record_id):
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def on_alert_group_update_log_report_async(alert_group_id):
from apps.alerts.models import AlertGroup

alert_group = AlertGroup.objects.get(pk=alert_group_id)
logger.debug(f"Start on_alert_group_update_log_report for alert_group {alert_group_id}")
organization = alert_group.channel.organization
if alert_group.slack_message and organization.slack_team_identity:
logger.debug(f"Process on_alert_group_update_log_report for alert_group {alert_group_id}")
UpdateLogReportMessageStep = ScenarioStep.get_step("distribute_alerts", "UpdateLogReportMessageStep")
step = UpdateLogReportMessageStep(organization.slack_team_identity, organization)
step.process_signal(alert_group)
else:
logger.debug(f"Drop on_alert_group_update_log_report for alert_group {alert_group_id}")
logger.debug(f"Finish on_alert_group_update_log_report for alert_group {alert_group_id}")
return "Deprecated, will be removed after queue cleanup"


class AlertGroupSlackRepresentative(AlertGroupAbstractRepresentative):
Expand Down Expand Up @@ -173,32 +161,6 @@ def on_alert_group_action_triggered(cls, **kwargs):
logger.debug(f"SLACK on_alert_group_action_triggered: async {log_record_id} {force_sync}")
on_alert_group_action_triggered_async.apply_async((log_record_id,))

@classmethod
def on_alert_group_update_log_report(cls, **kwargs):
from apps.alerts.models import AlertGroup

alert_group = kwargs["alert_group"]

if isinstance(alert_group, AlertGroup):
alert_group_id = alert_group.pk
else:
alert_group_id = alert_group
try:
alert_group = AlertGroup.objects.get(pk=alert_group_id)
except AlertGroup.DoesNotExist as e:
logger.warning(f"SLACK update log report: alert group {alert_group_id} has been deleted")
raise e

logger.debug(
f"Received alert_group_update_log_report signal in SLACK representative for alert_group {alert_group_id}"
)

if alert_group.notify_in_slack_enabled is False:
logger.debug(f"Skipping alert_group {alert_group_id} since notify_in_slack is disabled")
return

on_alert_group_update_log_report_async.apply_async((alert_group_id,))

@classmethod
def on_alert_group_update_resolution_note(cls, **kwargs):
alert_group = kwargs["alert_group"]
Expand Down
87 changes: 1 addition & 86 deletions engine/apps/slack/scenarios/distribute_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from datetime import datetime

from django.core.cache import cache
from django.utils import timezone

from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
Expand All @@ -14,25 +13,17 @@
from apps.slack.chatops_proxy_routing import make_private_metadata, make_value
from apps.slack.constants import CACHE_UPDATE_INCIDENT_SLACK_MESSAGE_LIFETIME
from apps.slack.errors import (
SlackAPICantUpdateMessageError,
SlackAPIChannelArchivedError,
SlackAPIChannelInactiveError,
SlackAPIChannelNotFoundError,
SlackAPIError,
SlackAPIInvalidAuthError,
SlackAPIMessageNotFoundError,
SlackAPIRatelimitError,
SlackAPIRestrictedActionError,
SlackAPITokenError,
)
from apps.slack.scenarios import scenario_step
from apps.slack.scenarios.slack_renderer import AlertGroupLogSlackRenderer
from apps.slack.slack_formatter import SlackFormatter
from apps.slack.tasks import (
post_or_update_log_report_message_task,
send_message_to_thread_if_bot_not_in_channel,
update_incident_slack_message,
)
from apps.slack.tasks import send_message_to_thread_if_bot_not_in_channel, update_incident_slack_message
from apps.slack.types import (
Block,
BlockActionType,
Expand Down Expand Up @@ -95,7 +86,6 @@ def process_signal(self, alert: Alert) -> None:
else:
# check if alert group was posted to slack before posting message to thread
if not alert.group.skip_escalation_in_slack:
self._send_log_report_message(alert.group, channel_id)
self._send_message_to_thread_if_bot_not_in_channel(alert.group, channel_id)
else:
# check if alert group was posted to slack before updating its message
Expand Down Expand Up @@ -208,11 +198,6 @@ def _send_debug_mode_notice(self, alert_group: AlertGroup, channel_id: str) -> N
blocks=blocks,
)

def _send_log_report_message(self, alert_group: AlertGroup, channel_id: str) -> None:
post_or_update_log_report_message_task.apply_async(
(alert_group.pk, self.slack_team_identity.pk),
)

def _send_message_to_thread_if_bot_not_in_channel(self, alert_group: AlertGroup, channel_id: str) -> None:
send_message_to_thread_if_bot_not_in_channel.apply_async(
(alert_group.pk, self.slack_team_identity.pk, channel_id),
Expand Down Expand Up @@ -895,76 +880,6 @@ def process_signal(self, log_record: AlertGroupLogRecord) -> None:
message.delete()


class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
def process_signal(self, alert_group: AlertGroup) -> None:
if alert_group.skip_escalation_in_slack or alert_group.channel.is_rate_limited_in_slack:
return

self.update_log_message(alert_group)

def update_log_message(self, alert_group: AlertGroup) -> None:
slack_message = alert_group.slack_message
if slack_message is None:
logger.info(
f"Cannot update log message for alert_group {alert_group.pk} because SlackMessage doesn't exist"
)
return None

slack_log_message = alert_group.slack_log_message

if slack_log_message is not None:
# prevent too frequent updates
if timezone.now() <= slack_log_message.last_updated + timezone.timedelta(seconds=5):
return

attachments = AlertGroupLogSlackRenderer.render_incident_log_report_for_slack(alert_group)
logger.debug(
f"Update log message for alert_group {alert_group.pk}, slack_log_message {slack_log_message.pk}"
)
try:
self._slack_client.chat_update(
channel=slack_message.channel_id,
text="Alert Group log",
ts=slack_log_message.slack_id,
attachments=attachments,
)
except SlackAPIRatelimitError as e:
if not alert_group.channel.is_rate_limited_in_slack:
alert_group.channel.start_send_rate_limit_message_task(e.retry_after)
except SlackAPIMessageNotFoundError:
alert_group.slack_log_message = None
alert_group.save(update_fields=["slack_log_message"])
except (
SlackAPITokenError,
SlackAPIChannelNotFoundError,
SlackAPIChannelArchivedError,
SlackAPIChannelInactiveError,
SlackAPIInvalidAuthError,
SlackAPICantUpdateMessageError,
):
pass
else:
slack_log_message.last_updated = timezone.now()
slack_log_message.save(update_fields=["last_updated"])
logger.debug(
f"Finished update log message for alert_group {alert_group.pk}, "
f"slack_log_message {slack_log_message.pk}"
)
# check how much time has passed since slack message was created
# to prevent eternal loop of restarting update log message task
elif timezone.now() <= slack_message.created_at + timezone.timedelta(minutes=5):
logger.debug(
f"Update log message failed for alert_group {alert_group.pk}: "
f"log message does not exist yet. Restarting post_or_update_log_report_message_task..."
)
post_or_update_log_report_message_task.apply_async(
(alert_group.pk, self.slack_team_identity.pk, True),
countdown=3,
)
else:
logger.debug(f"Update log message failed for alert_group {alert_group.pk}: " f"log message does not exist.")


STEPS_ROUTING: ScenarioRoute.RoutingSteps = [
{
"payload_type": PayloadType.INTERACTIVE_MESSAGE,
Expand Down
14 changes: 0 additions & 14 deletions engine/apps/slack/scenarios/slack_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,3 @@ def render_alert_group_future_log_report_text(alert_group: "AlertGroup"):
for plan_line in escalation_policies_plan[time]:
result += f"*{humanize.naturaldelta(time)}:* {plan_line}\n"
return result

@staticmethod
def render_incident_log_report_for_slack(alert_group: "AlertGroup"):
attachments = []
past = AlertGroupLogSlackRenderer.render_alert_group_past_log_report_text(alert_group)
future = AlertGroupLogSlackRenderer.render_alert_group_future_log_report_text(alert_group)
text = past + future
if len(text) > 0:
attachments.append(
{
"text": text,
}
)
return attachments
23 changes: 1 addition & 22 deletions engine/apps/slack/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
SlackAPITokenError,
SlackAPIUsergroupNotFoundError,
)
from apps.slack.scenarios.scenario_step import ScenarioStep
from apps.slack.utils import (
get_cache_key_update_incident_slack_message,
get_populate_slack_channel_task_id_key,
Expand Down Expand Up @@ -289,27 +288,7 @@ def populate_slack_user_identities(organization_pk):
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
)
def post_or_update_log_report_message_task(alert_group_pk, slack_team_identity_pk, update=False):
logger.debug(f"Start post_or_update_log_report_message_task for alert_group {alert_group_pk}")
from apps.alerts.models import AlertGroup
from apps.slack.models import SlackTeamIdentity

UpdateLogReportMessageStep = ScenarioStep.get_step("distribute_alerts", "UpdateLogReportMessageStep")

slack_team_identity = SlackTeamIdentity.objects.get(pk=slack_team_identity_pk)
alert_group = AlertGroup.objects.get(pk=alert_group_pk)
step = UpdateLogReportMessageStep(slack_team_identity, alert_group.channel.organization)

if alert_group.skip_escalation_in_slack or alert_group.channel.is_rate_limited_in_slack:
return

if update: # flag to prevent multiple posting log message to slack
step.update_log_message(alert_group)
else:
# don't post a new message, as it is available from the button
# this is an intermediate step, so we will only update posted messages but not post new ones
# once majority of messages are updated, we can remove this step (https://github.com/grafana/oncall/pull/4686)
pass
logger.debug(f"Finish post_or_update_log_report_message_task for alert_group {alert_group_pk}")
return "Deprecated, will be removed after queue cleanup"


@shared_dedicated_queue_retry_task(
Expand Down
Loading

0 comments on commit 39cb6f2

Please sign in to comment.