From 9c94e09d15d0c2386943de3cd1a30514aa8543be Mon Sep 17 00:00:00 2001 From: volokluev <3169433+volokluev@users.noreply.github.com> Date: Tue, 11 Jul 2023 10:41:09 -0700 Subject: [PATCH] feat(capman): add project_id as implicit tenant (#4474) In order to port the project based rate limiters to the capman framework, we have to have project ids as tenants, we currently do not. Add logic in the build_request function to add project_id to AttributionInfo.tenant_ids This logic makes an explicit decision that when there are multiple projects being searched in one request, there is no project tenant, instead the organization_id becomes the tenant. --- snuba/request/validation.py | 9 ++++- tests/request/test_build_request.py | 60 +++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/snuba/request/validation.py b/snuba/request/validation.py index f5aa9d8fbb..93680ead73 100644 --- a/snuba/request/validation.py +++ b/snuba/request/validation.py @@ -98,8 +98,10 @@ def build_request( ) project_ids = get_object_ids_in_query_ast(query, "project_id") + query_project_id = None if project_ids is not None and len(project_ids) == 1: - sentry_sdk.set_tag("snuba_project_id", project_ids.pop()) + query_project_id = project_ids.pop() + sentry_sdk.set_tag("snuba_project_id", query_project_id) org_ids = get_object_ids_in_query_ast(query, "org_id") if org_ids is not None and len(org_ids) == 1: @@ -113,6 +115,11 @@ def build_request( attribution_info["tenant_ids"] = request_parts.attribution_info[ "tenant_ids" ] + if ( + "project_id" not in attribution_info["tenant_ids"] + and query_project_id is not None + ): + attribution_info["tenant_ids"]["project_id"] = query_project_id request_id = uuid.uuid4().hex request = Request( diff --git a/tests/request/test_build_request.py b/tests/request/test_build_request.py index de1871a7e5..8f76c5ab3a 100644 --- a/tests/request/test_build_request.py +++ b/tests/request/test_build_request.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from datetime import datetime from typing import Any, Dict @@ -35,6 +37,7 @@ "GRANULARITY 60" ), "parent_api": "", + "tenant_ids": {"organization_id": 1, "referrer": "test"}, }, binary_condition( BooleanFunctions.AND, @@ -97,3 +100,60 @@ def test_build_request(body: Dict[str, Any], condition: Expression) -> None: assert dict(request.original_body) == body status, differences = request.query.equals(expected_query) assert status == True, f"Query mismatch: {differences}" + + +TENANT_ID_TESTS = [ + pytest.param( + { + "query": ( + "MATCH (events) " + "SELECT count() AS count BY time " + "WHERE " + "project_id IN tuple(1) AND " + "timestamp >= toDateTime('2011-07-01T19:54:15') AND" + "timestamp < toDateTime('2018-07-06T19:54:15') " + "LIMIT 1000 " + "GRANULARITY 60" + ), + "tenant_ids": {"organization_id": 1, "referrer": "test"}, + }, + {"organization_id": 1, "referrer": "test", "project_id": 1}, + id="one project id in query", + ), + pytest.param( + { + "query": ( + "MATCH (events) " + "SELECT count() AS count BY time " + "WHERE " + "project_id IN tuple(1, 2, 3, 4) AND" + "timestamp >= toDateTime('2011-07-01t19:54:15') AND" + "timestamp < toDateTime('2018-07-06t19:54:15') " + "LIMIT 1000 " + "GRANULARITY 60" + ), + "tenant_ids": {"organization_id": 1, "referrer": "test"}, + }, + {"organization_id": 1, "referrer": "test"}, + id="multiple projects, no project tenant", + ), +] + + +@pytest.mark.parametrize("request_payload, expected_tenant_ids", TENANT_ID_TESTS) +def test_tenant_ids( + request_payload: dict[str, Any], expected_tenant_ids: dict[str, Any] +) -> None: + dataset = get_dataset("events") + schema = RequestSchema.build(HTTPQuerySettings) + + request = build_request( + request_payload, + parse_snql_query, + HTTPQuerySettings, + schema, + dataset, + Timer("test"), + "my_request", + ) + assert request.attribution_info.tenant_ids == expected_tenant_ids