Skip to content

Commit

Permalink
Revert "ref: Remove old query status field (#4423)" (#4493)
Browse files Browse the repository at this point in the history
This reverts commit 6c43e8e.
  • Loading branch information
evanh committed Jul 11, 2023
1 parent e9f6201 commit f53f4f0
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 11 deletions.
12 changes: 9 additions & 3 deletions snuba/querylog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
from snuba.datasets.storage import StorageNotAvailable
from snuba.query.exceptions import QueryPlanException
from snuba.querylog.query_metadata import SnubaQueryMetadata, Status
from snuba.querylog.query_metadata import QueryStatus, SnubaQueryMetadata, Status
from snuba.request import Request
from snuba.utils.metrics.timer import Timer
from snuba.utils.metrics.wrapper import MetricsWrapper
Expand All @@ -32,6 +32,7 @@ def _record_timer_metrics(
app_id = request.attribution_info.app_id.key or "none"
parent_api = request.attribution_info.parent_api or "none"
tags = {
"status": query_metadata.status.value,
"request_status": query_metadata.request_status.value,
"slo": query_metadata.slo.value,
"referrer": referrer,
Expand All @@ -51,6 +52,7 @@ def _record_timer_metrics(
# As a result, its status and SLO values are not based on its query_list
status = get_request_status(result)
tags = {
"status": QueryStatus.ERROR.value,
"request_status": status.status.value,
"slo": status.slo.value,
"referrer": referrer,
Expand Down Expand Up @@ -153,7 +155,9 @@ def record_invalid_request(
it records failures during parsing/validation.
This is for client errors.
"""
_record_failure_building_request(request_status, timer, referrer)
_record_failure_building_request(
QueryStatus.INVALID_REQUEST, request_status, timer, referrer
)


def record_error_building_request(
Expand All @@ -164,10 +168,11 @@ def record_error_building_request(
it records failures during parsing/validation.
This is for system errors during parsing/validation.
"""
_record_failure_building_request(request_status, timer, referrer)
_record_failure_building_request(QueryStatus.ERROR, request_status, timer, referrer)


def _record_failure_building_request(
status: QueryStatus,
request_status: Status,
timer: Timer,
referrer: Optional[str],
Expand All @@ -178,6 +183,7 @@ def _record_failure_building_request(
timer.send_metrics_to(
metrics,
tags={
"status": status.value,
"referrer": referrer or "none",
"request_status": request_status.status.value,
"slo": request_status.slo.value,
Expand Down
33 changes: 31 additions & 2 deletions snuba/querylog/query_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,31 @@
from snuba.utils.metrics.timer import Timer


class QueryStatus(Enum):
SUCCESS = "success"
ERROR = "error" # A system error
RATE_LIMITED = "rate-limited"
INVALID_REQUEST = "invalid-request"
TIMEOUT = "timeout"


CLICKHOUSE_ERROR_TO_SNUBA_ERROR_MAPPINGS = {
ErrorCodes.TOO_SLOW: QueryStatus.TIMEOUT,
ErrorCodes.TIMEOUT_EXCEEDED: QueryStatus.TIMEOUT,
ErrorCodes.SOCKET_TIMEOUT: QueryStatus.TIMEOUT,
ErrorCodes.NETWORK_ERROR: QueryStatus.TIMEOUT,
}


def get_query_status_from_error_codes(code: ErrorCodes) -> QueryStatus | None:
return CLICKHOUSE_ERROR_TO_SNUBA_ERROR_MAPPINGS.get(code)


class RequestStatus(Enum):
"""
The different statuses we return for a request.
TODO: This will replace QueryStatus, but both exist as we cut over.
"""

# Successfully returned a response
Expand Down Expand Up @@ -165,6 +187,7 @@ class ClickhouseQueryMetadata:
start_timestamp: Optional[datetime]
end_timestamp: Optional[datetime]
stats: Dict[str, Any]
status: QueryStatus
request_status: Status
profile: ClickhouseQueryProfile
trace_id: Optional[str] = None
Expand All @@ -179,7 +202,7 @@ def to_dict(self) -> snuba_queries_v1.QueryMetadata:
"start_timestamp": start,
"end_timestamp": end,
"stats": self.stats,
"status": self.request_status.status.value, # TODO: Remove this from querylog schema
"status": self.status.value,
"request_status": self.request_status.status.value,
"slo": self.request_status.slo.value,
"trace_id": self.trace_id,
Expand Down Expand Up @@ -221,7 +244,7 @@ def to_dict(self) -> snuba_queries_v1.Querylog:
"start_timestamp": start,
"end_timestamp": end,
"query_list": [q.to_dict() for q in self.query_list],
"status": self.request_status.value, # TODO: Remove this from querylog schema
"status": self.status.value,
"request_status": self.request_status.value,
"slo": self.slo.value,
"timing": self.timer.for_json(),
Expand All @@ -234,6 +257,12 @@ def to_dict(self) -> snuba_queries_v1.Querylog:
request_dict["organization"] = org_id
return request_dict

@property
def status(self) -> QueryStatus:
# If we do not have any recorded query and we did not specifically log
# invalid_query, we assume there was an error somewhere.
return self.query_list[-1].status if self.query_list else QueryStatus.ERROR

@property
def request_status(self) -> RequestStatus:
# If we do not have any recorded query and we did not specifically log
Expand Down
16 changes: 15 additions & 1 deletion snuba/web/db_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@
from snuba.querylog.query_metadata import (
SLO,
ClickhouseQueryMetadata,
QueryStatus,
Status,
get_query_status_from_error_codes,
get_request_status,
)
from snuba.reader import Reader, Result
from snuba.redis import RedisClientKey, get_redis_client
from snuba.state.cache.abstract import Cache
from snuba.state.cache.abstract import Cache, ExecutionTimeoutError
from snuba.state.cache.redis.backend import RESULT_VALUE, RESULT_WAIT, RedisCache
from snuba.state.quota import ResourceQuota
from snuba.state.rate_limit import (
Expand Down Expand Up @@ -125,6 +127,7 @@ def update_query_metadata_and_stats(
query_metadata_list: MutableSequence[ClickhouseQueryMetadata],
query_settings: Mapping[str, Any],
trace_id: Optional[str],
status: QueryStatus,
request_status: Status,
profile_data: Optional[Dict[str, Any]] = None,
error_code: Optional[int] = None,
Expand All @@ -150,6 +153,7 @@ def update_query_metadata_and_stats(
start_timestamp=start,
end_timestamp=end,
stats=dict(stats),
status=status,
request_status=request_status,
profile=generate_profile(query),
trace_id=trace_id,
Expand Down Expand Up @@ -594,16 +598,24 @@ def _raw_query(
except Exception as cause:
error_code = None
trigger_rate_limiter = None
status = None
request_status = get_request_status(cause)
if isinstance(cause, RateLimitExceeded):
status = QueryStatus.RATE_LIMITED
trigger_rate_limiter = cause.extra_data.get("scope", "")
elif isinstance(cause, ClickhouseError):
error_code = cause.code
status = get_query_status_from_error_codes(error_code)

with configure_scope() as scope:
fingerprint = ["{{default}}", str(cause.code), dataset_name]
if error_code not in constants.CLICKHOUSE_SYSTEMATIC_FAILURES:
fingerprint.append(attribution_info.referrer)
scope.fingerprint = fingerprint
elif isinstance(cause, TimeoutError):
status = QueryStatus.TIMEOUT
elif isinstance(cause, ExecutionTimeoutError):
status = QueryStatus.TIMEOUT

if request_status.slo == SLO.AGAINST:
logger.exception("Error running query: %s\n%s", sql, cause)
Expand All @@ -613,6 +625,7 @@ def _raw_query(
sentry_sdk.set_tag("slo_status", request_status.status.value)

stats = update_with_status(
status=status or QueryStatus.ERROR,
request_status=request_status,
error_code=error_code,
triggered_rate_limiter=str(trigger_rate_limiter),
Expand All @@ -630,6 +643,7 @@ def _raw_query(
) from cause
else:
stats = update_with_status(
status=QueryStatus.SUCCESS,
request_status=get_request_status(),
profile_data=result["profile"],
)
Expand Down
9 changes: 6 additions & 3 deletions tests/datasets/test_querylog_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ClickhouseQueryMetadata,
ClickhouseQueryProfile,
FilterProfile,
QueryStatus,
RequestStatus,
SnubaQueryMetadata,
Status,
Expand Down Expand Up @@ -78,6 +79,7 @@ def test_simple() -> None:
"error_code": 386,
"triggered_rate_limiter": "test_rate_limiter",
},
status=QueryStatus.SUCCESS,
request_status=Status(RequestStatus.SUCCESS),
profile=ClickhouseQueryProfile(
time_range=10,
Expand Down Expand Up @@ -200,6 +202,7 @@ def test_missing_fields() -> None:
start_timestamp=None,
end_timestamp=None,
stats={"sample": 10},
status=QueryStatus.SUCCESS,
request_status=Status(RequestStatus.SUCCESS),
profile=ClickhouseQueryProfile(
time_range=10,
Expand All @@ -222,15 +225,14 @@ def test_missing_fields() -> None:
).to_dict()

messages = []
first = dict(deepcopy(orig_message))
first = deepcopy(orig_message)
del first["timing"]
del first["status"]
messages.append(first)

second = dict(deepcopy(orig_message))
second = deepcopy(orig_message)
second["timing"] = None
second["status"] = None
messages.append(second)

for message in messages:
processor = (
Expand Down Expand Up @@ -328,6 +330,7 @@ def test_negative_project_id_fields() -> None:
start_timestamp=None,
end_timestamp=None,
stats={"sample": 10},
status=QueryStatus.SUCCESS,
request_status=Status(RequestStatus.SUCCESS),
profile=ClickhouseQueryProfile(
time_range=10,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_snql_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def test_timing_metrics_tags(self) -> None:
metric_calls = get_recorded_metric_calls("timing", "api.query")
assert metric_calls is not None
assert len(metric_calls) == 1
assert metric_calls[0].tags["request_status"] == "success"
assert metric_calls[0].tags["status"] == "success"
assert metric_calls[0].tags["referrer"] == "test"
assert metric_calls[0].tags["parent_api"] == "some/endpoint"
assert metric_calls[0].tags["final"] == "False"
Expand Down
2 changes: 1 addition & 1 deletion tests/web/test_db_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_db_query_fail() -> None:
)

assert len(query_metadata_list) == 1
assert query_metadata_list[0].request_status.status.value == "error"
assert query_metadata_list[0].status.value == "error"
assert excinfo.value.extra["stats"] == stats
assert excinfo.value.extra["sql"] is not None

Expand Down

0 comments on commit f53f4f0

Please sign in to comment.