From 6165f0a87d022c64bbdf682f051ddb3dcaf64d47 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Mon, 6 Nov 2023 09:51:24 -0500 Subject: [PATCH] fix(slo): Track exception class name on errors (#4981) Add the exception class name to the metrics we emit on failure, so we can get a better idea of what the actual bugs are. get_request_status didn't seem to be correctly classifying invalid requests, so also change that. --- snuba/querylog/__init__.py | 20 ++++++++++++++++---- snuba/querylog/query_metadata.py | 6 +++++- snuba/request/validation.py | 8 ++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/snuba/querylog/__init__.py b/snuba/querylog/__init__.py index e1984d8543..15c8f58527 100644 --- a/snuba/querylog/__init__.py +++ b/snuba/querylog/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from random import random from typing import Any, Mapping, Optional, Union @@ -172,7 +174,10 @@ def _add_tags( def record_invalid_request( - timer: Timer, request_status: Status, referrer: Optional[str] + timer: Timer, + request_status: Status, + referrer: Optional[str], + exception_name: str | None = None, ) -> None: """ Records a failed request before the request object is created, so @@ -180,19 +185,24 @@ def record_invalid_request( This is for client errors. """ _record_failure_building_request( - QueryStatus.INVALID_REQUEST, request_status, timer, referrer + QueryStatus.INVALID_REQUEST, request_status, timer, referrer, exception_name ) def record_error_building_request( - timer: Timer, request_status: Status, referrer: Optional[str] + timer: Timer, + request_status: Status, + referrer: Optional[str], + exception_name: str | None = None, ) -> None: """ Records a failed request before the request object is created, so it records failures during parsing/validation. This is for system errors during parsing/validation. """ - _record_failure_building_request(QueryStatus.ERROR, request_status, timer, referrer) + _record_failure_building_request( + QueryStatus.ERROR, request_status, timer, referrer, exception_name + ) def _record_failure_building_request( @@ -200,6 +210,7 @@ def _record_failure_building_request( request_status: Status, timer: Timer, referrer: Optional[str], + exception_name: str | None = None, ) -> None: # TODO: Revisit if recording some data for these queries in the querylog # table would be useful. @@ -211,6 +222,7 @@ def _record_failure_building_request( "referrer": referrer or "none", "request_status": request_status.status.value, "slo": request_status.slo.value, + "exception": exception_name or "none", }, ) _add_tags(timer) diff --git a/snuba/querylog/query_metadata.py b/snuba/querylog/query_metadata.py index 347ed838e7..d228a5f766 100644 --- a/snuba/querylog/query_metadata.py +++ b/snuba/querylog/query_metadata.py @@ -10,7 +10,9 @@ from snuba.clickhouse.errors import ClickhouseError from snuba.datasets.storage import StorageNotAvailable +from snuba.query.exceptions import InvalidQueryException from snuba.request import Request +from snuba.request.exceptions import InvalidJsonRequestException from snuba.state.cache.abstract import ExecutionTimeoutError from snuba.state.rate_limit import TABLE_RATE_LIMIT_NAME, RateLimitExceeded from snuba.utils.metrics.timer import Timer @@ -123,7 +125,9 @@ def get_request_status(cause: Exception | None = None) -> Status: slo_status = RequestStatus.CACHE_SET_TIMEOUT elif isinstance(cause, ExecutionTimeoutError): slo_status = RequestStatus.CACHE_WAIT_TIMEOUT - elif isinstance(cause, StorageNotAvailable): + elif isinstance( + cause, (StorageNotAvailable, InvalidJsonRequestException, InvalidQueryException) + ): slo_status = RequestStatus.INVALID_REQUEST else: slo_status = RequestStatus.ERROR diff --git a/snuba/request/validation.py b/snuba/request/validation.py index 560e7ff713..dc61348b00 100644 --- a/snuba/request/validation.py +++ b/snuba/request/validation.py @@ -137,11 +137,15 @@ def build_request( ) except (InvalidJsonRequestException, InvalidQueryException) as exception: request_status = get_request_status(exception) - record_invalid_request(timer, request_status, referrer) + record_invalid_request( + timer, request_status, referrer, str(type(exception).__name__) + ) raise exception except Exception as exception: request_status = get_request_status(exception) - record_error_building_request(timer, request_status, referrer) + record_error_building_request( + timer, request_status, referrer, str(type(exception).__name__) + ) raise exception span.set_data(