diff --git a/Tiltfile b/Tiltfile index a2cd3a5141..f6e07226f1 100644 --- a/Tiltfile +++ b/Tiltfile @@ -83,7 +83,7 @@ if not is_ci: local_resource( "e2e-tests", - labels=["E2eTests"], + labels=["allTests"], cmd=e2e_tests_cmd, trigger_mode=TRIGGER_MODE_MANUAL, auto_init=is_ci, @@ -127,6 +127,34 @@ cmd_button( icon_name="dangerous", ) +# Inspired by https://github.com/grafana/slo/blob/main/Tiltfile#L72 +pod_engine_pytest_script = ''' +set -eu +# get engine k8s pod name from tilt resource name +POD_NAME="$(tilt get kubernetesdiscovery "engine" -ojsonpath='{.status.pods[0].name}')" +kubectl exec "$POD_NAME" -- pytest . $STOP_ON_FIRST_FAILURE $TESTS_FILTER +''' +local_resource( + "pytest-tests", + labels=["allTests"], + cmd=['sh', '-c', pod_engine_pytest_script], + trigger_mode=TRIGGER_MODE_MANUAL, + auto_init=False, + resource_deps=["engine"] +) + +cmd_button( + name="pytest Tests - headless run", + argv=['sh', '-c', pod_engine_pytest_script], + text="Run pytest", + resource="pytest-tests", + icon_name="replay", + inputs=[ + text_input("TESTS_FILTER", "pytest optional arguments (e.g. \"apps/webhooks/tests/test_webhook.py::test_build_url_private_raises\")", "", "Test file names to run"), + bool_input("STOP_ON_FIRST_FAILURE", "Stop on first failure", True, "-x", ""), + ] +) + helm_oncall_values = ["./dev/helm-local.yml", "./dev/helm-local.dev.yml"] if is_ci: helm_oncall_values = helm_oncall_values + ["./.github/helm-ci.yml"] @@ -174,7 +202,10 @@ k8s_resource( resource_deps=["mariadb", "redis-master"], labels=["OnCallBackend"], ) +k8s_resource(workload="engine-migrate", labels=["OnCallBackend"]) + k8s_resource(workload="redis-master", labels=["OnCallDeps"]) +k8s_resource(workload="prometheus-server", labels=["OnCallDeps"]) k8s_resource( workload="mariadb", port_forwards='3307:3306', # : @@ -184,6 +215,9 @@ k8s_resource( # name all tilt resources after the k8s object namespace + name def resource_name(id): + # Remove variable date from job name + if id.name.startswith(HELM_PREFIX + "-engine-migrate"): + return "engine-migrate" return id.name.replace(HELM_PREFIX + "-", "") workload_to_resource_function(resource_name) diff --git a/dev/helm-local.yml b/dev/helm-local.yml index 721bd3c6a8..02a7052718 100644 --- a/dev/helm-local.yml +++ b/dev/helm-local.yml @@ -5,6 +5,14 @@ env: value: "False" - name: FEATURE_PROMETHEUS_EXPORTER_ENABLED value: "True" + - name: DJANGO_SETTINGS_MODULE + value: "settings.dev" + - name: FEATURE_TELEGRAM_INTEGRATION_ENABLED + value: "True" + - name: FEATURE_SLACK_INTEGRATION_ENABLED + value: "True" + - name: SLACK_SLASH_COMMAND_NAME + value: "/oncall" # enabled to be able to test docker.host.internal in the webhook e2e tests - name: DANGEROUS_WEBHOOKS_ENABLED value: "True" @@ -131,6 +139,14 @@ service: nodePort: 30001 prometheus: enabled: true + alertmanager: + enabled: false + kube-state-metrics: + enabled: false + prometheus-node-exporter: + enabled: false + prometheus-pushgateway: + enabled: false server: global: scrape_interval: 10s diff --git a/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py b/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py index 4930746649..c99d59579e 100644 --- a/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py +++ b/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py @@ -135,7 +135,7 @@ def render_alert_group_attachments(self): if self.alert_group.resolved: resolve_attachment = { "fallback": "Resolved...", - "text": self.alert_group.get_resolve_text(mention_user=True), + "text": self.alert_group.get_resolve_text(mention_user=False), "callback_id": "alert", } attachments.append(resolve_attachment) @@ -143,7 +143,7 @@ def render_alert_group_attachments(self): if self.alert_group.acknowledged: ack_attachment = { "fallback": "Acknowledged...", - "text": self.alert_group.get_acknowledge_text(mention_user=True), + "text": self.alert_group.get_acknowledge_text(mention_user=False), "callback_id": "alert", } attachments.append(ack_attachment) diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py index 4dcf586a7a..27f484f591 100644 --- a/engine/apps/alerts/incident_log_builder/incident_log_builder.py +++ b/engine/apps/alerts/incident_log_builder/incident_log_builder.py @@ -670,7 +670,7 @@ def _get_notification_plan_for_user( # last passed step order + 1 notification_policy_order = last_user_log.notification_policy.order + 1 - notification_policies = user_to_notify.get_notification_policies_or_use_default_fallback(important=important) + _, notification_policies = user_to_notify.get_notification_policies_or_use_default_fallback(important=important) for notification_policy in notification_policies: future_notification = notification_policy.order >= notification_policy_order diff --git a/engine/apps/alerts/tasks/notify_group.py b/engine/apps/alerts/tasks/notify_group.py index 6787c04056..e38cdeab0d 100644 --- a/engine/apps/alerts/tasks/notify_group.py +++ b/engine/apps/alerts/tasks/notify_group.py @@ -83,7 +83,7 @@ def notify_group_task(alert_group_pk, escalation_policy_snapshot_order=None): continue important = escalation_policy_step == EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT - notification_policies = user.get_notification_policies_or_use_default_fallback(important=important) + _, notification_policies = user.get_notification_policies_or_use_default_fallback(important=important) if notification_policies: usergroup_notification_plan += "\n_{} (".format( diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 1f7b7c1c63..cdfd35d617 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -1,4 +1,5 @@ import time +import typing from functools import partial from celery.exceptions import Retry @@ -42,7 +43,6 @@ def notify_user_task( countdown = 0 stop_escalation = False - log_message = "" log_record = None with transaction.atomic(): @@ -70,9 +70,13 @@ def notify_user_task( ) user_has_notification = UserHasNotification.objects.filter(pk=user_has_notification.pk).select_for_update()[0] + using_fallback_default_notification_policy_step = False if previous_notification_policy_pk is None: - notification_policies = user.get_notification_policies_or_use_default_fallback(important=important) + ( + using_fallback_default_notification_policy_step, + notification_policies, + ) = user.get_notification_policies_or_use_default_fallback(important=important) if not notification_policies: task_logger.info( f"notify_user_task: Failed to notify. No notification policies. user_id={user_pk} alert_group_id={alert_group_pk} important={important}" @@ -115,16 +119,25 @@ def notify_user_task( ) return reason = None - if notification_policy is None: - stop_escalation = True - log_record = UserNotificationPolicyLogRecord( + + def _create_user_notification_policy_log_record(**kwargs): + return UserNotificationPolicyLogRecord( + **kwargs, + using_fallback_default_notification_policy_step=using_fallback_default_notification_policy_step, + ) + + def _create_notification_finished_user_notification_policy_log_record(): + return _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FINISHED, notification_policy=notification_policy, alert_group=alert_group, slack_prevent_posting=prevent_posting_to_thread, ) - log_message += "Personal escalation exceeded" + + if notification_policy is None: + stop_escalation = True + log_record = _create_notification_finished_user_notification_policy_log_record() else: if ( (alert_group.acknowledged and not notify_even_acknowledged) @@ -146,7 +159,7 @@ def notify_user_task( else: delay_in_seconds = 0 countdown = delay_in_seconds - log_record = UserNotificationPolicyLogRecord( + log_record = _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, notification_policy=notification_policy, @@ -160,7 +173,7 @@ def notify_user_task( notification_policy.notify_by == UserNotificationPolicy.NotificationChannel.SLACK ) if user_to_be_notified_in_slack and alert_group.notify_in_slack_enabled is False: - log_record = UserNotificationPolicyLogRecord( + log_record = _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -172,7 +185,7 @@ def notify_user_task( notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_POSTING_TO_SLACK_IS_DISABLED, ) else: - log_record = UserNotificationPolicyLogRecord( + log_record = _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, notification_policy=notification_policy, @@ -182,6 +195,7 @@ def notify_user_task( notification_step=notification_policy.step, notification_channel=notification_policy.notify_by, ) + if log_record: # log_record is None if user notification policy step is unspecified # if this is the first notification step, and user hasn't been notified for this alert group - update metric if ( @@ -198,25 +212,43 @@ def notify_user_task( if notify_user_task.request.retries == 0: transaction.on_commit(partial(send_user_notification_signal.apply_async, (log_record.pk,))) - if not stop_escalation: - if notification_policy.step != UserNotificationPolicy.Step.WAIT: + def _create_perform_notification_task(log_record_pk, alert_group_pk, use_default_notification_policy_fallback): + task = perform_notification.apply_async((log_record_pk, use_default_notification_policy_fallback)) + task_logger.info( + f"Created perform_notification task {task} log_record={log_record_pk} " f"alert_group={alert_group_pk}" + ) - def _create_perform_notification_task(log_record_pk, alert_group_pk): - task = perform_notification.apply_async((log_record_pk,)) - task_logger.info( - f"Created perform_notification task {task} log_record={log_record_pk} " - f"alert_group={alert_group_pk}" - ) + def _update_user_has_notification_active_notification_policy_id(active_policy_id: typing.Optional[str]) -> None: + user_has_notification.active_notification_policy_id = active_policy_id + user_has_notification.save(update_fields=["active_notification_policy_id"]) + + def _reset_user_has_notification_active_notification_policy_id() -> None: + _update_user_has_notification_active_notification_policy_id(None) + + create_perform_notification_task = partial( + _create_perform_notification_task, + log_record.pk, + alert_group_pk, + using_fallback_default_notification_policy_step, + ) - transaction.on_commit(partial(_create_perform_notification_task, log_record.pk, alert_group_pk)) + if using_fallback_default_notification_policy_step: + # if we are using default notification policy, we're done escalating.. there's no further notification + # policy steps in this case. Kick off the perform_notification task, create the + # TYPE_PERSONAL_NOTIFICATION_FINISHED log record, and reset the active_notification_policy_id + transaction.on_commit(create_perform_notification_task) + _create_notification_finished_user_notification_policy_log_record() + _reset_user_has_notification_active_notification_policy_id() + elif not stop_escalation: + if notification_policy.step != UserNotificationPolicy.Step.WAIT: + transaction.on_commit(create_perform_notification_task) delay = NEXT_ESCALATION_DELAY if countdown is not None: delay += countdown task_id = celery_uuid() - user_has_notification.active_notification_policy_id = task_id - user_has_notification.save(update_fields=["active_notification_policy_id"]) + _update_user_has_notification_active_notification_policy_id(task_id) transaction.on_commit( partial( @@ -231,10 +263,8 @@ def _create_perform_notification_task(log_record_pk, alert_group_pk): task_id=task_id, ) ) - else: - user_has_notification.active_notification_policy_id = None - user_has_notification.save(update_fields=["active_notification_policy_id"]) + _reset_user_has_notification_active_notification_policy_id() @shared_dedicated_queue_retry_task( @@ -243,7 +273,7 @@ def _create_perform_notification_task(log_record_pk, alert_group_pk): dont_autoretry_for=(Retry,), max_retries=1 if settings.DEBUG else None, ) -def perform_notification(log_record_pk): +def perform_notification(log_record_pk, use_default_notification_policy_fallback): from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord from apps.telegram.models import TelegramToUserConnector @@ -262,8 +292,13 @@ def perform_notification(log_record_pk): user = log_record.author alert_group = log_record.alert_group - notification_policy = log_record.notification_policy + notification_policy = ( + UserNotificationPolicy.get_default_fallback_policy(user) + if use_default_notification_policy_fallback + else log_record.notification_policy + ) notification_channel = notification_policy.notify_by if notification_policy else None + if user is None or notification_policy is None: UserNotificationPolicyLogRecord( author=user, @@ -300,7 +335,9 @@ def perform_notification(log_record_pk): TelegramToUserConnector.notify_user(user, alert_group, notification_policy) except RetryAfter as e: countdown = getattr(e, "retry_after", 3) - raise perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) + raise perform_notification.retry( + (log_record_pk, use_default_notification_policy_fallback), countdown=countdown, exc=e + ) elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK: # TODO: refactor checking the possibility of sending a notification in slack @@ -380,7 +417,9 @@ def perform_notification(log_record_pk): f"does not exist. Restarting perform_notification." ) restart_delay_seconds = 60 - perform_notification.apply_async((log_record_pk,), countdown=restart_delay_seconds) + perform_notification.apply_async( + (log_record_pk, use_default_notification_policy_fallback), countdown=restart_delay_seconds + ) else: task_logger.debug( f"send_slack_notification for alert_group {alert_group.pk} failed because slack message " diff --git a/engine/apps/alerts/tests/test_alert_receiver_channel.py b/engine/apps/alerts/tests/test_alert_receiver_channel.py index 1a2923f318..282b8c18b1 100644 --- a/engine/apps/alerts/tests/test_alert_receiver_channel.py +++ b/engine/apps/alerts/tests/test_alert_receiver_channel.py @@ -1,7 +1,9 @@ +import os from unittest import mock from unittest.mock import patch import pytest +from django.conf import settings from django.db import IntegrityError from django.urls import reverse from django.utils import timezone @@ -10,6 +12,7 @@ from common.api_helpers.utils import create_engine_url from common.exceptions import UnableToSendDemoAlert from engine.management.commands import alertmanager_v2_migrate +from settings.base import DatabaseTypes @pytest.mark.django_db @@ -272,6 +275,13 @@ def test_create_missing_direct_paging_integrations( def test_create_duplicate_direct_paging_integrations(make_organization, make_team, make_alert_receive_channel): """Check that it's not possible to have more than one active direct paging integration per team.""" + # MariaDB is not supported for this test + # See comment: https://github.com/grafana/oncall/commit/381a9ecf54bf0dd076f233b207c13d72ed792181#diff-9d96504027309f2bd1e95352bac1433b09b60eb4fafb611b52a6c15ed16cbc48R219-R223 + is_local_dev_env = os.environ.get("DJANGO_SETTINGS_MODULE") == "settings.dev" + is_db_type_mysql = settings.DATABASE_TYPE == DatabaseTypes.MYSQL + if is_local_dev_env and is_db_type_mysql: + pytest.skip("This test is not supported by Mariadb (used by settings.dev)") + organization = make_organization() team = make_team(organization) make_alert_receive_channel(organization, team=team, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING) diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index e0c32d2c14..7dd2b81210 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -40,7 +40,7 @@ def test_custom_backend_call( ) with patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user") as mock_notify_user: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mock_notify_user.assert_called_once_with(user_1, alert_group, user_notification_policy) @@ -72,7 +72,7 @@ def test_custom_backend_error( with patch("apps.alerts.tasks.notify_user.get_messaging_backend_from_id") as mock_get_backend: mock_get_backend.return_value = None - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -119,7 +119,7 @@ def test_notify_user_missing_data_errors( with patch("apps.alerts.tasks.notify_user.get_messaging_backend_from_id") as mock_get_backend: mock_get_backend.return_value = None - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -154,7 +154,7 @@ def test_notify_user_perform_notification_error_if_viewer( type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, ) - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) error_log_record = UserNotificationPolicyLogRecord.objects.last() assert error_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED @@ -230,7 +230,7 @@ def test_perform_notification_reason_to_skip_escalation_in_slack( if not error_code: make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") with patch.object(SlackMessage, "send_slack_notification") as mocked_send_slack_notification: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) last_log_record = UserNotificationPolicyLogRecord.objects.last() if error_code: @@ -277,7 +277,7 @@ def test_perform_notification_slack_prevent_posting( make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") with patch.object(SlackMessage, "send_slack_notification") as mocked_send_slack_notification: - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mocked_send_slack_notification.assert_not_called() last_log_record = UserNotificationPolicyLogRecord.objects.last() @@ -292,7 +292,7 @@ def test_perform_notification_slack_prevent_posting( @pytest.mark.django_db def test_perform_notification_missing_user_notification_policy_log_record(caplog): invalid_pk = 12345 - perform_notification(invalid_pk) + perform_notification(invalid_pk, False) assert ( f"perform_notification: log_record {invalid_pk} doesn't exist. Skipping remainder of task. " @@ -327,7 +327,45 @@ def test_perform_notification_telegram_retryafter_error( exc = RetryAfter(countdown) with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user: with pytest.raises(RetryAfter): - perform_notification(log_record.pk) + perform_notification(log_record.pk, False) mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy) assert alert_group.personal_log_records.last() == log_record + + +@patch("apps.base.models.UserNotificationPolicy.get_default_fallback_policy") +@patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user") +@pytest.mark.django_db +def test_perform_notification_use_default_notification_policy_fallback( + mock_notify_user, + mock_get_default_fallback_policy, + make_organization, + make_user, + make_alert_receive_channel, + make_alert_group, + make_user_notification_policy_log_record, +): + organization = make_organization() + user = make_user(organization=organization) + fallback_notification_policy = UserNotificationPolicy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=UserNotificationPolicy.NotificationChannel.TESTONLY, + important=False, + order=0, + ) + + mock_get_default_fallback_policy.return_value = fallback_notification_policy + + alert_receive_channel = make_alert_receive_channel(organization=organization) + alert_group = make_alert_group(alert_receive_channel=alert_receive_channel) + log_record = make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=None, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + + perform_notification(log_record.pk, True) + + mock_notify_user.assert_called_once_with(user, alert_group, fallback_notification_policy) diff --git a/engine/apps/base/models/user_notification_policy.py b/engine/apps/base/models/user_notification_policy.py index 1593b3ca1f..40855dcca2 100644 --- a/engine/apps/base/models/user_notification_policy.py +++ b/engine/apps/base/models/user_notification_policy.py @@ -1,4 +1,5 @@ import datetime +import typing from enum import unique from typing import Tuple @@ -13,6 +14,11 @@ from common.ordered_model.ordered_model import OrderedModel from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length +if typing.TYPE_CHECKING: + from django.db.models.manager import RelatedManager + + from apps.base.models import UserNotificationPolicyLogRecord + def generate_public_primary_key_for_notification_policy(): prefix = "N" @@ -66,6 +72,9 @@ def validate_channel_choice(value): class UserNotificationPolicy(OrderedModel): + personal_log_records: "RelatedManager['UserNotificationPolicyLogRecord']" + user: typing.Optional[User] + order_with_respect_to = ("user_id", "important") public_primary_key = models.CharField( @@ -129,6 +138,19 @@ def get_short_verbals_for_user(cls, user: User) -> Tuple[Tuple[str, ...], Tuple[ return default, important + @staticmethod + def get_default_fallback_policy(user: User) -> "UserNotificationPolicy": + return UserNotificationPolicy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=settings.EMAIL_BACKEND_INTERNAL_ID, + # The important flag doesn't really matter here.. since we're just using this as a transient/fallacbk + # in-memory object (important is really only used for allowing users to group their + # notification policy steps) + important=False, + order=0, + ) + @property def short_verbal(self) -> str: if self.step == UserNotificationPolicy.Step.NOTIFY: diff --git a/engine/apps/base/models/user_notification_policy_log_record.py b/engine/apps/base/models/user_notification_policy_log_record.py index f219857cb2..2d2f8a2ac1 100644 --- a/engine/apps/base/models/user_notification_policy_log_record.py +++ b/engine/apps/base/models/user_notification_policy_log_record.py @@ -1,4 +1,5 @@ import logging +import typing import humanize from django.db import models @@ -15,11 +16,45 @@ from apps.slack.slack_formatter import SlackFormatter from common.utils import clean_markup +if typing.TYPE_CHECKING: + from apps.alerts.models import AlertGroup + from apps.user_management.models import User + logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) +def _check_if_notification_policy_is_transient_fallback(kwargs): + """ + If `using_fallback_default_notification_policy_step` is present, and `True`, then the `notification_policy` + field should be set to `None`. This is because we do not persist default notification policies in the + database. It only exists as a transient/in-memory object, and therefore has no foreign key to reference. + """ + using_fallback_default_notification_policy_step = kwargs.pop( + "using_fallback_default_notification_policy_step", False + ) + + if using_fallback_default_notification_policy_step: + kwargs.pop("notification_policy", None) + + +class UserNotificationPolicyLogRecordQuerySet(models.QuerySet): + def create(self, **kwargs): + """ + Needed for when we do something like this: + notification_policy = UserNotificationPolicy.objects.create(arg1="foo", ...) + """ + _check_if_notification_policy_is_transient_fallback(kwargs) + return super().create(**kwargs) + + class UserNotificationPolicyLogRecord(models.Model): + alert_group: "AlertGroup" + author: typing.Optional["User"] + notification_policy: typing.Optional[UserNotificationPolicy] + + objects: models.Manager["UserNotificationPolicyLogRecord"] = UserNotificationPolicyLogRecordQuerySet.as_manager() + ( TYPE_PERSONAL_NOTIFICATION_TRIGGERED, TYPE_PERSONAL_NOTIFICATION_FINISHED, @@ -112,6 +147,15 @@ class UserNotificationPolicyLogRecord(models.Model): notification_step = models.IntegerField(choices=UserNotificationPolicy.Step.choices, null=True, default=None) notification_channel = models.IntegerField(validators=[validate_channel_choice], null=True, default=None) + def __init__(self, *args, **kwargs): + """ + Needed for when we do something like this: + notification_policy = UserNotificationPolicy(arg1="foo", ...) + notification_policy.save() + """ + _check_if_notification_policy_is_transient_fallback(kwargs) + super().__init__(*args, **kwargs) + def rendered_notification_log_line(self, for_slack=False, html=False): timeline = render_relative_timeline(self.created_at, self.alert_group.started_at) diff --git a/engine/apps/email/backend.py b/engine/apps/email/backend.py index 164f0cb82f..e654a5adca 100644 --- a/engine/apps/email/backend.py +++ b/engine/apps/email/backend.py @@ -1,6 +1,13 @@ +import typing + from apps.base.messaging import BaseMessagingBackend from apps.email.tasks import notify_user_async +if typing.TYPE_CHECKING: + from apps.alerts.models import AlertGroup + from apps.base.models import UserNotificationPolicy + from apps.user_management.models import User + class EmailBackend(BaseMessagingBackend): backend_id = "EMAIL" @@ -11,10 +18,18 @@ class EmailBackend(BaseMessagingBackend): templater = "apps.email.alert_rendering.AlertEmailTemplater" template_fields = ("title", "message") - def serialize_user(self, user): + def serialize_user(self, user: "User"): return {"email": user.email} - def notify_user(self, user, alert_group, notification_policy): + def notify_user( + self, user: "User", alert_group: "AlertGroup", notification_policy: typing.Optional["UserNotificationPolicy"] + ): + """ + NOTE: `notification_policy` may be None if the user has no notification policies defined, as + email is the default backend used + """ notify_user_async.delay( - user_pk=user.pk, alert_group_pk=alert_group.pk, notification_policy_pk=notification_policy.pk + user_pk=user.pk, + alert_group_pk=alert_group.pk, + notification_policy_pk=notification_policy.pk if notification_policy else None, ) diff --git a/engine/apps/email/tasks.py b/engine/apps/email/tasks.py index 685eb80806..8aaff7f3f7 100644 --- a/engine/apps/email/tasks.py +++ b/engine/apps/email/tasks.py @@ -43,15 +43,28 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): logger.warning(f"Alert group {alert_group_pk} does not exist") return - try: - notification_policy = UserNotificationPolicy.objects.get(pk=notification_policy_pk) - except UserNotificationPolicy.DoesNotExist: - logger.warning(f"User notification policy {notification_policy_pk} does not exist") - return + using_fallback_default_notification_policy_step = False + + if notification_policy_pk is None: + # NOTE: `notification_policy_pk` may be None if the user has no notification policies defined, as + # email is the default backend used. see `UserNotificationPolicy.get_default_fallback_policy` for more details + notification_policy = UserNotificationPolicy.get_default_fallback_policy(user) + using_fallback_default_notification_policy_step = True + else: + try: + notification_policy = UserNotificationPolicy.objects.get(pk=notification_policy_pk) + except UserNotificationPolicy.DoesNotExist: + logger.warning(f"User notification policy {notification_policy_pk} does not exist") + return + + def _create_user_notification_policy_log_record(**kwargs): + return UserNotificationPolicyLogRecord.objects.create( + **kwargs, using_fallback_default_notification_policy_step=using_fallback_default_notification_policy_step + ) # create an error log in case EMAIL_HOST is not specified if not live_settings.EMAIL_HOST: - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -65,7 +78,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): emails_left = user.organization.emails_left(user) if emails_left <= 0: - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -111,7 +124,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): except (gaierror, BadHeaderError) as e: # gaierror is raised when EMAIL_HOST is invalid # BadHeaderError is raised when there's newlines in the subject - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED, notification_policy=notification_policy, @@ -124,7 +137,7 @@ def notify_user_async(user_pk, alert_group_pk, notification_policy_pk): return # record success log - UserNotificationPolicyLogRecord.objects.create( + _create_user_notification_policy_log_record( author=user, type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_SUCCESS, notification_policy=notification_policy, diff --git a/engine/apps/grafana_plugin/tasks/sync.py b/engine/apps/grafana_plugin/tasks/sync.py index e1a4d2f9ae..35754ee9d1 100644 --- a/engine/apps/grafana_plugin/tasks/sync.py +++ b/engine/apps/grafana_plugin/tasks/sync.py @@ -89,7 +89,7 @@ def run_organization_sync(organization_pk, force_sync): logger.info(f"Finish sync Organization {organization_pk}") -@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=1) +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=0) def start_cleanup_deleted_organizations(): sync_threshold = timezone.now() - INACTIVE_PERIOD @@ -113,7 +113,7 @@ def start_cleanup_deleted_organizations(): cleanup_organization_async.apply_async((organization_pk,), countdown=countdown) -@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=1) +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=0) def cleanup_organization_async(organization_pk): cleanup_organization(organization_pk) @@ -166,9 +166,11 @@ def cleanup_empty_deleted_integrations(organization_pk, dry_run=True): integration.hard_delete() -@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=1) +@shared_dedicated_queue_retry_task(autoretry_for=(Exception,), max_retries=0) def start_cleanup_organizations(): - organization_pks = Organization.objects.all().values_list("pk", flat=True) + cleanup_threshold = timezone.now() - INACTIVE_PERIOD + organization_qs = Organization.objects.filter(last_time_synced__lte=cleanup_threshold) + organization_pks = organization_qs.values_list("pk", flat=True) logger.debug(f"Found {len(organization_pks)} organizations") max_countdown = CLEANUP_PERIOD.seconds for idx, organization_pk in enumerate(organization_pks): diff --git a/engine/apps/integrations/tasks.py b/engine/apps/integrations/tasks.py index 3d9a77ad70..0ee5acbb82 100644 --- a/engine/apps/integrations/tasks.py +++ b/engine/apps/integrations/tasks.py @@ -132,7 +132,9 @@ def create_alert( }, countdown=countdown, ) - logger.warning(f"Retrying the task gracefully in {countdown} seconds due to ConcurrentUpdateError") + logger.warning( + f"Retrying the task gracefully in {countdown} seconds due to ConcurrentUpdateError for alert_receive_channel={alert_receive_channel_pk}" + ) @shared_dedicated_queue_retry_task() diff --git a/engine/apps/user_management/models/user.py b/engine/apps/user_management/models/user.py index b7d9920086..c6de3813f9 100644 --- a/engine/apps/user_management/models/user.py +++ b/engine/apps/user_management/models/user.py @@ -31,7 +31,7 @@ from apps.alerts.models import AlertGroup, EscalationPolicy from apps.auth_token.models import ApiAuthToken, ScheduleExportAuthToken, UserScheduleExportAuthToken - from apps.base.models import UserNotificationPolicy + from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord from apps.slack.models import SlackUserIdentity from apps.social_auth.types import GoogleOauth2Response from apps.user_management.models import Organization, Team @@ -165,6 +165,7 @@ class User(models.Model): last_notified_in_escalation_policies: "RelatedManager['EscalationPolicy']" notification_policies: "RelatedManager['UserNotificationPolicy']" organization: "Organization" + personal_log_records: "RelatedManager['UserNotificationPolicyLogRecord']" resolved_alert_groups: "RelatedManager['AlertGroup']" schedule_export_token: "RelatedManager['ScheduleExportAuthToken']" silenced_alert_groups: "RelatedManager['AlertGroup']" @@ -413,25 +414,31 @@ def build_permissions_query( return PermissionsQuery(permissions__contains=[required_permission]) return RoleInQuery(role__lte=permission.fallback_role.value) + def get_default_fallback_notification_policy(self) -> "UserNotificationPolicy": + from apps.base.models import UserNotificationPolicy + + return UserNotificationPolicy.get_default_fallback_policy(self) + def get_notification_policies_or_use_default_fallback( self, important=False - ) -> typing.List["UserNotificationPolicy"]: + ) -> typing.Tuple[bool, typing.List["UserNotificationPolicy"]]: """ If the user has no notification policies defined, fallback to using e-mail as the notification channel. + + The 1st tuple element is a boolean indicating if we are falling back to using a "fallback"/default + notification policy step (which occurs when the user has no notification policies defined). """ - from apps.base.models import UserNotificationPolicy + notification_polices = self.notification_policies.filter(important=important) - if not self.notification_policies.filter(important=important).exists(): - return [ - UserNotificationPolicy( - user=self, - step=UserNotificationPolicy.Step.NOTIFY, - notify_by=settings.EMAIL_BACKEND_INTERNAL_ID, - important=important, - order=0, - ), - ] - return list(self.notification_policies.filter(important=important).all()) + if not notification_polices.exists(): + return ( + True, + [self.get_default_fallback_notification_policy()], + ) + return ( + False, + list(notification_polices.all()), + ) def update_alert_group_table_selected_columns(self, columns: typing.List[AlertGroupTableColumn]) -> None: if self.alert_group_table_selected_columns != columns: diff --git a/engine/apps/webhooks/tests/test_webhook.py b/engine/apps/webhooks/tests/test_webhook.py index 7e86dc02bc..389c0702b8 100644 --- a/engine/apps/webhooks/tests/test_webhook.py +++ b/engine/apps/webhooks/tests/test_webhook.py @@ -168,6 +168,9 @@ def test_build_url_invalid_url(make_organization, make_custom_webhook): @pytest.mark.django_db def test_build_url_private_raises(make_organization, make_custom_webhook): + if settings.DANGEROUS_WEBHOOKS_ENABLED: + pytest.skip("Dangerous webhooks are enabled") + organization = make_organization() webhook = make_custom_webhook(organization=organization, url="{{foo}}") @@ -241,6 +244,9 @@ def test_make_request(make_organization, make_custom_webhook): @httpretty.activate(verbose=True, allow_net_connect=False) @pytest.mark.django_db def test_make_request_bad_redirect(make_organization, make_custom_webhook): + if settings.DANGEROUS_WEBHOOKS_ENABLED: + pytest.skip("Dangerous webhooks are enabled") + organization = make_organization() webhook = make_custom_webhook(organization=organization, http_method="POST") diff --git a/engine/settings/dev.py b/engine/settings/dev.py index 087f013f39..078b0735d4 100644 --- a/engine/settings/dev.py +++ b/engine/settings/dev.py @@ -66,6 +66,13 @@ TELEGRAM_TOKEN = "0000000000:XXXXXXXXXXXXXXXXXXXXXXXXXXXX-XXXXXX" TWILIO_AUTH_TOKEN = "twilio_auth_token" + # charset/collation related tests don't work without this + TEST_SETTINGS = { + "CHARSET": "utf8mb4", + "COLLATION": "utf8mb4_unicode_ci", + } + DATABASES["default"]["TEST"] = TEST_SETTINGS + INTERNAL_IPS = [ "127.0.0.1", ] diff --git a/engine/settings/prod_without_db.py b/engine/settings/prod_without_db.py index f505c8f988..ced1f3e96c 100644 --- a/engine/settings/prod_without_db.py +++ b/engine/settings/prod_without_db.py @@ -16,11 +16,9 @@ def on_uwsgi_worker_exit(): # Only works under uwsgi web server environment pass - SLACK_SIGNING_SECRET = os.environ.get("SLACK_SIGNING_SECRET") SLACK_SIGNING_SECRET_LIVE = os.environ.get("SLACK_SIGNING_SECRET_LIVE", "") - STATICFILES_DIRS = [ "/etc/app/static", ] diff --git a/grafana-plugin/e2e-tests/utils/userSettings.ts b/grafana-plugin/e2e-tests/utils/userSettings.ts index d24cdfbbf1..a38878bcba 100644 --- a/grafana-plugin/e2e-tests/utils/userSettings.ts +++ b/grafana-plugin/e2e-tests/utils/userSettings.ts @@ -49,14 +49,15 @@ export const verifyUserPhoneNumber = async (page: Page): Promise => { await closeModal(page); }; +const getDefaultNotificationSettingsSectionByTestId = (page: Page): Locator => + page.getByTestId('default-personal-notification-settings'); + /** * gets the first row of our default notification settings * and then gets the notification type dropdown */ const getFirstDefaultNotificationSettingTypeDropdown = async (page: Page): Promise => { - const firstDefaultNotificationSettingRow = page - .getByTestId('default-personal-notification-settings') - .locator('li >> nth=0'); + const firstDefaultNotificationSettingRow = getDefaultNotificationSettingsSectionByTestId(page).locator('li >> nth=0'); // get the notification type dropdown specifically return firstDefaultNotificationSettingRow.locator('div[class*="input-wrapper"] >> nth=1'); @@ -66,7 +67,19 @@ export const configureUserNotificationSettings = async (page: Page, notifyBy: No // open the user settings modal await openUserSettingsModal(page); - // select our notification type + // select our notification type, first check if we have any already defined, if so, click the + // "Add Notification Step" button + const defaultNotificationSettingsSection = getDefaultNotificationSettingsSectionByTestId(page); + const addNotificationStepText = 'Add Notification Step'; + + if (!(await defaultNotificationSettingsSection.locator(`button >> text=${addNotificationStepText}`).isVisible())) { + await clickButton({ + page, + buttonText: addNotificationStepText, + startingLocator: defaultNotificationSettingsSection, + }); + } + const firstDefaultNotificationTypeDropdopdown = await getFirstDefaultNotificationSettingTypeDropdown(page); await selectDropdownValue({ page, diff --git a/tools/migrators/requirements.txt b/tools/migrators/requirements.txt index ce7823fcdc..25eccecbf9 100644 --- a/tools/migrators/requirements.txt +++ b/tools/migrators/requirements.txt @@ -6,7 +6,7 @@ # attrs==23.2.0 # via pytest -certifi==2024.2.2 +certifi==2024.7.4 # via requests charset-normalizer==3.3.2 # via requests