Skip to content

Commit

Permalink
fix(admin): fix projects finder not looking in composite query condit…
Browse files Browse the repository at this point in the history
…ions (#6066)

Currently production queries are broken for joins and subqueries. This is because we weren't looking for project ids in the condition of the composite query. This should fix that

* fix projects finder not looking in composite query conditions

* remove comment

* add test for the negative case
  • Loading branch information
volokluev committed Jun 25, 2024
1 parent cff7315 commit 0bb40d8
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
4 changes: 2 additions & 2 deletions snuba/admin/production_queries/prod_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from snuba import settings
from snuba.admin.audit_log.query import audit_log
from snuba.clickhouse.query_dsl.accessors import get_object_ids_in_query_ast
from snuba.datasets.dataset import Dataset
from snuba.datasets.factory import get_dataset
from snuba.query.data_source.projects_finder import ProjectsFinder
from snuba.query.exceptions import InvalidQueryException
from snuba.query.query_settings import HTTPQuerySettings
from snuba.query.snql.parser import parse_snql_query
Expand Down Expand Up @@ -45,7 +45,7 @@ def _validate_projects_in_query(body: Dict[str, Any], dataset: Dataset) -> None:

request_parts = RequestSchema.build(HTTPQuerySettings).validate(body)
query = parse_snql_query(request_parts.query["query"], dataset)
project_ids = get_object_ids_in_query_ast(query, "project_id")
project_ids = ProjectsFinder().visit(query)
if project_ids == set():
raise InvalidQueryException("Missing project ID")

Expand Down
12 changes: 10 additions & 2 deletions snuba/query/data_source/projects_finder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from snuba.clickhouse.query_dsl.accessors import get_object_ids_in_query_ast
from snuba.clickhouse.query_dsl.accessors import (
get_object_ids_in_condition,
get_object_ids_in_query_ast,
)
from snuba.query import ProcessableQuery
from snuba.query.composite import CompositeQuery
from snuba.query.data_source.join import IndividualNode, JoinClause, JoinVisitor
Expand Down Expand Up @@ -28,7 +31,12 @@ def _visit_simple_query(
def _visit_composite_query(
self, data_source: CompositeQuery[LogicalDataSource]
) -> set[int]:
return self.visit(data_source.get_from_clause())
from_clause_project_ids = self.visit(data_source.get_from_clause())
condition_project_ids = set()
condition = data_source.get_condition()
if condition is not None:
condition_project_ids = get_object_ids_in_condition(condition, "project_id")
return from_clause_project_ids | condition_project_ids

def visit_individual_node(
self, node: IndividualNode[LogicalDataSource]
Expand Down
15 changes: 15 additions & 0 deletions tests/admin/test_production_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,18 @@ def test_disallowed_project_ids() -> None:
prod_queries._validate_projects_in_query(
body={"query": query, "dataset": "events"}, dataset=get_dataset("events")
)


def test_with_joins() -> None:
query = """MATCH (si: search_issues) -[attributes]-> (g: group_attributes) SELECT g.group_id, ifNull(multiply(toUInt64(max(si.timestamp)), 1000), 0) AS `score` BY g.group_id WHERE si.project_id IN array(1) AND g.project_id IN array(1) AND si.timestamp >= toDateTime('2024-06-17T22:43:14.617430') AND si.timestamp < toDateTime('2024-06-24T22:43:14.617430') AND g.group_id IN array(5001473500) AND si.project_id=1 AND g.project_id=1"""
prod_queries._validate_projects_in_query(
body={"query": query, "dataset": "events"}, dataset=get_dataset("events")
)


def test_fail_with_joins() -> None:
query = """MATCH (si: search_issues) -[attributes]-> (g: group_attributes) SELECT g.group_id, ifNull(multiply(toUInt64(max(si.timestamp)), 1000), 0) AS `score` BY g.group_id WHERE si.project_id IN array(42069) AND g.project_id IN array(42069) AND si.timestamp >= toDateTime('2024-06-17T22:43:14.617430') AND si.timestamp < toDateTime('2024-06-24T22:43:14.617430') AND g.group_id IN array(5001473500) AND si.project_id=1 AND g.project_id=1"""
with pytest.raises(InvalidQueryException):
prod_queries._validate_projects_in_query(
body={"query": query, "dataset": "events"}, dataset=get_dataset("events")
)

0 comments on commit 0bb40d8

Please sign in to comment.