From 29b9ebb03156cb4ec7395c5faaaa590f4203838d Mon Sep 17 00:00:00 2001 From: volokluev <3169433+volokluev@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:36:20 -0700 Subject: [PATCH] chore(capman): set default bytes scanned limit for rejecting policy (#5755) This is the limit that is in production right now and it's much higher than what I put in originally. Putting it in the code so we don't lose this number. Making the policy active by default. (we don't need to enforce this in single tenant at this time --- snuba/datasets/configuration/events/storages/errors.yaml | 1 - .../configuration/events/storages/errors_ro.yaml | 1 - .../bytes_scanned_rejecting_policy.py | 9 ++++++--- .../test_bytes_scanned_rejecting_policy.py | 7 ++++--- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/snuba/datasets/configuration/events/storages/errors.yaml b/snuba/datasets/configuration/events/storages/errors.yaml index d6019a2a72..0d07219c0b 100644 --- a/snuba/datasets/configuration/events/storages/errors.yaml +++ b/snuba/datasets/configuration/events/storages/errors.yaml @@ -267,7 +267,6 @@ allocation_policies: - project_id - referrer default_config_overrides: - is_active: 0 is_enforced: 0 query_processors: diff --git a/snuba/datasets/configuration/events/storages/errors_ro.yaml b/snuba/datasets/configuration/events/storages/errors_ro.yaml index cde878b869..72cd07c7cf 100644 --- a/snuba/datasets/configuration/events/storages/errors_ro.yaml +++ b/snuba/datasets/configuration/events/storages/errors_ro.yaml @@ -265,7 +265,6 @@ allocation_policies: - project_id - referrer default_config_overrides: - is_active: 0 is_enforced: 0 diff --git a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py index 3d06c39995..4e5a204ea7 100644 --- a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py +++ b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py @@ -38,8 +38,9 @@ get_redis_client(RedisClientKey.RATE_LIMITER) ) DEFAULT_OVERRIDE_LIMIT = -1 -DEFAULT_BYTES_SCANNED_LIMIT = 10000000 -DEFAULT_TIMEOUT_PENALIZATION = DEFAULT_BYTES_SCANNED_LIMIT // 20 +PETABYTE = 10**12 +DEFAULT_BYTES_SCANNED_LIMIT = int(1.28 * PETABYTE) +DEFAULT_TIMEOUT_PENALIZATION = DEFAULT_BYTES_SCANNED_LIMIT // 40 class BytesScannedRejectingPolicy(AllocationPolicy): @@ -191,7 +192,9 @@ def _get_quota_allowance( if granted_quota.granted <= 0: explanation[ "reason" - ] = f"{customer_tenant_key} {customer_tenant_value} is over the bytes scanned limit of {scan_limit} for referrer {referrer}" + ] = f"""{customer_tenant_key} {customer_tenant_value} is over the bytes scanned limit of {scan_limit} for referrer {referrer}. + This policy is exceeded when a customer is abusing a specific feature in a way that puts load on clickhouse. If this is happening to + "many customers, that may mean the feature is written in an inefficient way""" explanation["granted_quota"] = granted_quota.granted explanation["limit"] = scan_limit # This is technically a high cardinality tag value however these rejections diff --git a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py index dde0bab664..b27e84a6b4 100644 --- a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py +++ b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py @@ -90,12 +90,13 @@ def test_consume_quota( ) allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID) assert not allowance.can_run - assert allowance.explanation == { - "reason": reason, + assert { "granted_quota": 0, "limit": limit, "storage_key": "StorageKey.ERRORS", - } + }.items() <= allowance.explanation.items() + + assert reason in str(allowance.explanation["reason"]) new_tenant_ids = {**tenant_ids, "referrer": tenant_ids["referrer"] + "abcd"} # a different referrer should work fine though