Skip to content

Commit

Permalink
feat(snubsplain): Use request context to capture query processors
Browse files Browse the repository at this point in the history
This change is a framework for how meta information about the query execution
will be captured and surfaced through the tool. It starts with one simple data
point, the entity query processors that ran. This can be expanded in subsequent
PRs to add more information.

Since the explain functionality is very similar to what the SnQL-to-SQL tool
already does, the two endpoints were collapsed. The tools are still separate,
and use different parts of the output. In the future the tools can be
 consolidated.

The meta module that was added is the main interface for capturing the query
steps. The data is stored as a list of steps, to preserve the order of execution
during the pipeline.

Changes:
- Add a meta module to provide an interface for capturing steps during query
execution.
- Setup the meta capture based on the `dry_run` flag using the existing
  snql-to-sql tool
  • Loading branch information
evanh committed Jun 20, 2023
1 parent d8b2360 commit 039c7f1
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 13 deletions.
11 changes: 8 additions & 3 deletions snuba/admin/static/api_client.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ import { SnQLRequest, SnQLResult, SnubaDatasetName } from "./snql_to_sql/types";

import { KafkaTopicData } from "./kafka/types";
import { QuerylogRequest, QuerylogResult } from "./querylog/types";
import { CardinalityQueryRequest, CardinalityQueryResult } from "./cardinality_analyzer/types";
import {
CardinalityQueryRequest,
CardinalityQueryResult,
} from "./cardinality_analyzer/types";

import { AllocationPolicy } from "./capacity_management/types";

Expand Down Expand Up @@ -57,7 +60,9 @@ interface Client {
getPredefinedQuerylogOptions: () => Promise<[PredefinedQuery]>;
getQuerylogSchema: () => Promise<QuerylogResult>;
executeQuerylogQuery: (req: QuerylogRequest) => Promise<QuerylogResult>;
executeCardinalityQuery: (req: CardinalityQueryRequest) => Promise<CardinalityQueryResult>;
executeCardinalityQuery: (
req: CardinalityQueryRequest
) => Promise<CardinalityQueryResult>;
getAllMigrationGroups: () => Promise<MigrationGroupResult[]>;
runMigration: (req: RunMigrationRequest) => Promise<RunMigrationResult>;
getAllowedTools: () => Promise<AllowedTools>;
Expand Down Expand Up @@ -183,7 +188,7 @@ function Client() {
},

convertSnQLQuery: (query: SnQLRequest) => {
const url = baseUrl + "snql_to_sql";
const url = baseUrl + "snuba_debug";
return fetch(url, {
headers: { "Content-Type": "application/json" },
method: "POST",
Expand Down
1 change: 1 addition & 0 deletions snuba/admin/tool_policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class AdminTools(Enum):
KAFKA = "kafka"
CAPACITY_MANAGEMENT = "capacity-management"
CARDINALITY_ANALYZER = "cardinality-analyzer"
SNUBA_EXPLAIN = "snuba_explain"


DEVELOPER_TOOLS: set[AdminTools] = {AdminTools.SNQL_TO_SQL, AdminTools.QUERY_TRACING}
Expand Down
19 changes: 15 additions & 4 deletions snuba/admin/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from snuba.migrations.groups import MigrationGroup
from snuba.migrations.runner import MigrationKey, Runner
from snuba.query.exceptions import InvalidQueryException
from snuba.state.explain_meta import explain_cleanup, get_explain_meta
from snuba.utils.metrics.timer import Timer
from snuba.web.views import dataset_query

Expand Down Expand Up @@ -721,15 +722,23 @@ def snuba_datasets() -> Response:
)


@application.route("/snql_to_sql", methods=["POST"])
@check_tool_perms(tools=[AdminTools.SNQL_TO_SQL])
def snql_to_sql() -> Response:
@application.route("/snuba_debug", methods=["POST"])
@check_tool_perms(tools=[AdminTools.SNQL_TO_SQL, AdminTools.SNUBA_EXPLAIN])
def snuba_debug() -> Response:
body = json.loads(request.data)
body["debug"] = True
body["dry_run"] = True
try:
dataset = get_dataset(body.pop("dataset"))
return dataset_query(dataset, body, Timer("admin"))
response = dataset_query(dataset, body, Timer("admin"))
data = response.get_json()
assert isinstance(data, dict)

meta = get_explain_meta()
if meta:
data["explain"] = asdict(meta)
response.data = json.dumps(data)
return response
except InvalidQueryException as exception:
return Response(
json.dumps({"error": {"message": str(exception)}}, indent=4),
Expand All @@ -742,6 +751,8 @@ def snql_to_sql() -> Response:
400,
{"Content-Type": "application/json"},
)
finally:
explain_cleanup()


@application.route("/storages_with_allocation_policies")
Expand Down
4 changes: 4 additions & 0 deletions snuba/pipeline/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import sentry_sdk

from snuba.state import explain_meta
from snuba.datasets.entities.factory import get_entity
from snuba.datasets.plans.query_plan import ClickhouseQueryPlan
from snuba.query.logical import Query as LogicalQuery
Expand Down Expand Up @@ -74,4 +75,7 @@ def execute_entity_processors(query: LogicalQuery, settings: QuerySettings) -> N
with sentry_sdk.start_span(
description=type(processor).__name__, op="processor"
):
if settings.get_dry_run():
explain_meta.add_step("entity_processor", type(processor).__name__)

processor.process_query(query, settings)
57 changes: 57 additions & 0 deletions snuba/state/explain_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, cast

from flask import g


@dataclass
class ExplainStep:
category: str # The type of step e.g. "processor"
name: str # The specific name for the step e.g. "TimeSeriesProcessor"
data: dict[str, Any] = field(
default_factory=dict
) # Any extra information about the step


@dataclass
class ExplainMeta:
steps: list[ExplainStep] = field(default_factory=list)

def add_step(self, step: ExplainStep) -> None:
self.steps.append(step)


def add_step(category: str, name: str, data: dict[str, Any] | None = None) -> None:
try:
if data is None:
data = {}
step = ExplainStep(category, name, data)

if not hasattr(g, "explain_meta"):
g.explain_meta = ExplainMeta()

g.explain_meta.add_step(step)
except RuntimeError:
# Code is executing outside of a flask context
return


def get_explain_meta() -> ExplainMeta | None:
try:
if hasattr(g, "explain_meta"):
return cast(ExplainMeta, g.explain_meta)
return None
except RuntimeError:
# Code is executing outside of a flask context
return None


def explain_cleanup() -> None:
try:
if hasattr(g, "explain_meta"):
g.pop("explain_meta")
except RuntimeError:
# Code is executing outside of a flask context
pass
18 changes: 12 additions & 6 deletions tests/admin/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,19 +320,19 @@ def test_get_snuba_datasets(admin_api: FlaskClient) -> None:
assert set(data) == set(get_enabled_dataset_names())


def test_convert_SnQL_to_SQL_invalid_dataset(admin_api: FlaskClient) -> None:
def test_snuba_debug_invalid_dataset(admin_api: FlaskClient) -> None:
response = admin_api.post(
"/snql_to_sql", data=json.dumps({"dataset": "", "query": ""})
"/snuba_debug", data=json.dumps({"dataset": "", "query": ""})
)
assert response.status_code == 400
data = json.loads(response.data)
assert data["error"]["message"] == "dataset '' does not exist"


@pytest.mark.redis_db
def test_convert_SnQL_to_SQL_invalid_query(admin_api: FlaskClient) -> None:
def test_snuba_debug_invalid_query(admin_api: FlaskClient) -> None:
response = admin_api.post(
"/snql_to_sql", data=json.dumps({"dataset": "sessions", "query": ""})
"/snuba_debug", data=json.dumps({"dataset": "sessions", "query": ""})
)
assert response.status_code == 400
data = json.loads(response.data)
Expand All @@ -344,7 +344,7 @@ def test_convert_SnQL_to_SQL_invalid_query(admin_api: FlaskClient) -> None:

@pytest.mark.redis_db
@pytest.mark.clickhouse_db
def test_convert_SnQL_to_SQL_valid_query(admin_api: FlaskClient) -> None:
def test_snuba_debug_valid_query(admin_api: FlaskClient) -> None:
snql_query = """
MATCH (sessions)
SELECT sessions_crashed
Expand All @@ -354,11 +354,17 @@ def test_convert_SnQL_to_SQL_valid_query(admin_api: FlaskClient) -> None:
AND started < toDateTime('2022-02-01 00:00:00')
"""
response = admin_api.post(
"/snql_to_sql", data=json.dumps({"dataset": "sessions", "query": snql_query})
"/snuba_debug", data=json.dumps({"dataset": "sessions", "query": snql_query})
)
assert response.status_code == 200
data = json.loads(response.data)
assert data["sql"] != ""
assert len(data["explain"]["steps"]) > 0
assert {
"category": "entity_processor",
"name": "BasicFunctionsProcessor",
"data": {},
} in data["explain"]["steps"]


@pytest.mark.redis_db
Expand Down

0 comments on commit 039c7f1

Please sign in to comment.