From 9d89778c53fbfe45faa68c160285aa499568e385 Mon Sep 17 00:00:00 2001
From: getsentry-bot <bot@sentry.io>
Date: Mon, 15 Apr 2024 17:04:59 +0000
Subject: [PATCH 1/9] release: 24.4.0

---
 CHANGELOG.md        | 31 +++++++++++++++++++++++++++++++
 docs/source/conf.py |  2 +-
 setup.py            |  2 +-
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9257781f3d..65e3de0811 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
 # Changelog
 
+## 24.4.0
+
+### Various fixes & improvements
+
+- chore(on-call): Add metric for concurrent queries by referrer that violate policy (#5767) by @enochtangg
+- ref: make auto.offset.reset=earliest everywhere (#5765) by @lynnagara
+- feat(trace): Add trace id to transactions (#5768) by @wmak
+- fix(spans): Move span id above trace id in the prewhere (#5766) by @wmak
+- feat(meta): Add record meta column to gauges (#5760) by @evanh
+- feat(meta): Add record meta column to distributions (#5759) by @evanh
+- feat(meta): Add record_meta column to sets (#5735) by @evanh
+- ref: bump sentry-kafka-schemas to 0.1.68 (#5764) by @getsentry-bot
+- chore(capman): set default bytes scanned limit for rejecting policy (#5755) by @volokluev
+- ref(ch-upgrades): create dist tables functionality (#5737) by @MeredithAnya
+- perf(metrics): Use kafka header optimization (#5756) by @nikhars
+- feat(meta): Add updated versions of meta tables for counters (#5734) by @evanh
+- chore(rust): Update dependencies (#5751) by @nikhars
+- chore: update sdk version (#5754) by @kylemumma
+- fix: Fix default auto-offset-reset value (#5753) by @lynnagara
+- lower max query size to 128KiB (#5750) by @enochtangg
+- clean up old simple pipeline (#5732) by @enochtangg
+- feat(capman): Long term rejection allocation policy (#5718) by @volokluev
+- ref(fetcher): --tables optional now (#5730) by @MeredithAnya
+- fix: parser, bug in pushdown filter (#5731) by @kylemumma
+- feat(generic-metrics): Add a killswitch to processor (#5617) by @ayirr7
+- feat(replays): Replace python processor with a rust-based processor (#5380) by @cmanallen
+- feat(replay): add ReplayViewedEvent to replay processor (#5712) by @aliu3ntry
+- chore(deps): bump h2 from 0.3.22 to 0.3.26 in /rust_snuba (#5727) by @dependabot
+
+_Plus 52 more_
+
 ## 24.3.0
 
 ### Various fixes & improvements
diff --git a/docs/source/conf.py b/docs/source/conf.py
index eea908078d..6641f057d0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -7,7 +7,7 @@
 copyright = "2021, Sentry Team and Contributors"
 author = "Sentry Team and Contributors"
 
-release = "24.4.0.dev0"
+release = "24.4.0"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.py b/setup.py
index 3825fd5528..0cc3bd14ee 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import find_packages, setup
 
-VERSION = "24.4.0.dev0"
+VERSION = "24.4.0"
 
 
 def get_requirements() -> Sequence[str]:

From 30bb06ca76103798f0348dc62a3593e141812502 Mon Sep 17 00:00:00 2001
From: Evan Hicks <evanh@users.noreply.github.com>
Date: Mon, 15 Apr 2024 17:02:29 -0400
Subject: [PATCH 2/9] fix(meta): Remove experimental meta tables (#5733)

These tables have some missing features:

- The meta table doesn't need tag values
- The meta table has the tag key as a String instead of an int
- Both tables should use a granularity of 8192

Instead of fixing these with modifications, drop the tables completetly and
recreate them with a correct schema.
---
 snuba/migrations/group_loader.py              |   1 +
 .../0040_remove_counters_meta_tables.py       | 187 ++++++++++++++++++
 2 files changed, 188 insertions(+)
 create mode 100644 snuba/snuba_migrations/generic_metrics/0040_remove_counters_meta_tables.py

diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py
index 506e9b47c0..c911e24e8d 100644
--- a/snuba/migrations/group_loader.py
+++ b/snuba/migrations/group_loader.py
@@ -330,6 +330,7 @@ def get_migrations(self) -> Sequence[str]:
             "0037_add_record_meta_column_sets",
             "0038_add_record_meta_column_distributions",
             "0039_add_record_meta_column_gauges",
+            "0040_remove_counters_meta_tables",
         ]
 
 
diff --git a/snuba/snuba_migrations/generic_metrics/0040_remove_counters_meta_tables.py b/snuba/snuba_migrations/generic_metrics/0040_remove_counters_meta_tables.py
new file mode 100644
index 0000000000..f2b4d4b23c
--- /dev/null
+++ b/snuba/snuba_migrations/generic_metrics/0040_remove_counters_meta_tables.py
@@ -0,0 +1,187 @@
+from typing import Sequence
+
+from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt
+from snuba.clusters.storage_sets import StorageSetKey
+from snuba.migrations import migration, operations, table_engines
+from snuba.migrations.columns import MigrationModifiers as Modifiers
+from snuba.migrations.operations import OperationTarget
+from snuba.utils.schemas import Float
+
+
+class Migration(migration.ClickhouseNodeMigration):
+    blocking = False
+    granularity = "2048"
+    tag_value_view_name = "generic_metric_counters_meta_tag_value_aggregation_mv"
+    tag_value_local_table_name = (
+        "generic_metric_counters_meta_tag_value_aggregated_local"
+    )
+    tag_value_dist_table_name = "generic_metric_counters_meta_tag_value_aggregated_dist"
+    tag_value_table_columns: Sequence[Column[Modifiers]] = [
+        Column("project_id", UInt(64)),
+        Column("metric_id", UInt(64)),
+        Column("tag_key", UInt(64)),
+        Column("tag_value", String()),
+        Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))),
+        Column("retention_days", UInt(16)),
+        Column("count", AggregateFunction("sum", [Float(64)])),
+    ]
+
+    meta_view_name = "generic_metric_counters_meta_aggregation_mv"
+    meta_local_table_name = "generic_metric_counters_meta_aggregated_local"
+    meta_dist_table_name = "generic_metric_counters_meta_aggregated_dist"
+    meta_table_columns: Sequence[Column[Modifiers]] = [
+        Column("org_id", UInt(64)),
+        Column("project_id", UInt(64)),
+        Column("use_case_id", String(Modifiers(low_cardinality=True))),
+        Column("metric_id", UInt(64)),
+        Column("tag_key", String()),
+        Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))),
+        Column("retention_days", UInt(16)),
+        Column("tag_values", AggregateFunction("groupUniqArray", [String()])),
+        Column("value", AggregateFunction("sum", [Float(64)])),
+    ]
+
+    storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS
+
+    def forwards_ops(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.tag_value_view_name,
+                target=OperationTarget.LOCAL,
+            ),
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.tag_value_dist_table_name,
+                target=OperationTarget.DISTRIBUTED,
+            ),
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.tag_value_local_table_name,
+                target=OperationTarget.LOCAL,
+            ),
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.meta_view_name,
+                target=OperationTarget.LOCAL,
+            ),
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.meta_dist_table_name,
+                target=OperationTarget.DISTRIBUTED,
+            ),
+            operations.DropTable(
+                storage_set=self.storage_set_key,
+                table_name=self.meta_local_table_name,
+                target=OperationTarget.LOCAL,
+            ),
+        ]
+
+    def backwards_ops(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.CreateTable(
+                storage_set=self.storage_set_key,
+                table_name=self.tag_value_local_table_name,
+                engine=table_engines.AggregatingMergeTree(
+                    storage_set=self.storage_set_key,
+                    order_by="(project_id, metric_id, tag_key, tag_value, timestamp)",
+                    primary_key="(project_id, metric_id, tag_key, tag_value, timestamp)",
+                    partition_by="(retention_days, toMonday(timestamp))",
+                    settings={"index_granularity": self.granularity},
+                    ttl="timestamp + toIntervalDay(retention_days)",
+                ),
+                columns=self.tag_value_table_columns,
+                target=OperationTarget.LOCAL,
+            ),
+            operations.CreateTable(
+                storage_set=self.storage_set_key,
+                table_name=self.tag_value_dist_table_name,
+                engine=table_engines.Distributed(
+                    local_table_name=self.tag_value_local_table_name, sharding_key=None
+                ),
+                columns=self.tag_value_table_columns,
+                target=OperationTarget.DISTRIBUTED,
+            ),
+            operations.CreateMaterializedView(
+                storage_set=self.storage_set_key,
+                view_name=self.tag_value_view_name,
+                columns=self.tag_value_table_columns,
+                destination_table_name=self.tag_value_local_table_name,
+                target=OperationTarget.LOCAL,
+                query="""
+                SELECT
+                    project_id,
+                    metric_id,
+                    tag_key,
+                    tag_value,
+                    toStartOfWeek(timestamp) as timestamp,
+                    retention_days,
+                    sumState(count_value) as count
+                FROM generic_metric_counters_raw_local
+                ARRAY JOIN
+                    tags.key AS tag_key, tags.raw_value AS tag_value
+                WHERE record_meta = 1
+                GROUP BY
+                    project_id,
+                    metric_id,
+                    tag_key,
+                    tag_value,
+                    timestamp,
+                    retention_days
+                """,
+            ),
+            operations.CreateTable(
+                storage_set=self.storage_set_key,
+                table_name=self.meta_local_table_name,
+                engine=table_engines.AggregatingMergeTree(
+                    storage_set=self.storage_set_key,
+                    order_by="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)",
+                    primary_key="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)",
+                    partition_by="(retention_days, toMonday(timestamp))",
+                    settings={"index_granularity": self.granularity},
+                    ttl="timestamp + toIntervalDay(retention_days)",
+                ),
+                columns=self.meta_table_columns,
+                target=OperationTarget.LOCAL,
+            ),
+            operations.CreateTable(
+                storage_set=self.storage_set_key,
+                table_name=self.meta_dist_table_name,
+                engine=table_engines.Distributed(
+                    local_table_name=self.meta_local_table_name, sharding_key=None
+                ),
+                columns=self.meta_table_columns,
+                target=OperationTarget.DISTRIBUTED,
+            ),
+            operations.CreateMaterializedView(
+                storage_set=self.storage_set_key,
+                view_name=self.meta_view_name,
+                columns=self.meta_table_columns,
+                destination_table_name=self.meta_local_table_name,
+                target=OperationTarget.LOCAL,
+                query="""
+                SELECT
+                    org_id,
+                    project_id,
+                    use_case_id,
+                    metric_id,
+                    tag_key,
+                    toStartOfWeek(timestamp) as timestamp,
+                    retention_days,
+                    groupUniqArrayState(tag_value) as `tag_values`,
+                    sumState(count_value) as count
+                FROM generic_metric_counters_raw_local
+                ARRAY JOIN
+                    tags.key AS tag_key, tags.raw_value AS tag_value
+                WHERE record_meta = 1
+                GROUP BY
+                    org_id,
+                    project_id,
+                    use_case_id,
+                    metric_id,
+                    tag_key,
+                    timestamp,
+                    retention_days
+                """,
+            ),
+        ]

From 2248bb554d66e9b69713149874b35af1315beb97 Mon Sep 17 00:00:00 2001
From: getsentry-bot <bot@getsentry.com>
Date: Mon, 15 Apr 2024 22:02:48 +0000
Subject: [PATCH 3/9] meta: Bump new development version

---
 docs/source/conf.py | 2 +-
 setup.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6641f057d0..d75e71bc66 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -7,7 +7,7 @@
 copyright = "2021, Sentry Team and Contributors"
 author = "Sentry Team and Contributors"
 
-release = "24.4.0"
+release = "24.5.0.dev0"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.py b/setup.py
index 0cc3bd14ee..8c2aa2a2db 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import find_packages, setup
 
-VERSION = "24.4.0"
+VERSION = "24.5.0.dev0"
 
 
 def get_requirements() -> Sequence[str]:

From 4e97382e30c12fc18d19f9474d6fa1e563a2ca4d Mon Sep 17 00:00:00 2001
From: Lyn Nagara <1779792+lynnagara@users.noreply.github.com>
Date: Mon, 15 Apr 2024 21:01:48 -0700
Subject: [PATCH 4/9] inc-715: rust consumer can stop processing messages at
 specific timestamp (#5779)

---
 rust_snuba/benches/processors.rs       |  1 +
 rust_snuba/src/consumer.rs             |  4 +++
 rust_snuba/src/factory.rs              |  5 +++
 rust_snuba/src/strategies/processor.rs | 42 ++++++++++++++++++++++++--
 snuba/cli/rust_consumer.py             |  7 +++++
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/rust_snuba/benches/processors.rs b/rust_snuba/benches/processors.rs
index 51b5015cc6..b7246b19be 100644
--- a/rust_snuba/benches/processors.rs
+++ b/rust_snuba/benches/processors.rs
@@ -96,6 +96,7 @@ fn create_factory(
             logical_topic_name: "shared-resources-usage".to_string(),
             broker_config: BrokerConfig::default(),
         },
+        None,
     );
     Box::new(factory)
 }
diff --git a/rust_snuba/src/consumer.rs b/rust_snuba/src/consumer.rs
index 4330362f39..b0f8d85023 100644
--- a/rust_snuba/src/consumer.rs
+++ b/rust_snuba/src/consumer.rs
@@ -39,6 +39,7 @@ pub fn consumer(
     max_poll_interval_ms: usize,
     python_max_queue_depth: Option<usize>,
     health_check_file: Option<&str>,
+    stop_at_timestamp: Option<i64>,
 ) {
     py.allow_threads(|| {
         consumer_impl(
@@ -53,6 +54,7 @@ pub fn consumer(
             max_poll_interval_ms,
             python_max_queue_depth,
             health_check_file,
+            stop_at_timestamp,
         )
     });
 }
@@ -70,6 +72,7 @@ pub fn consumer_impl(
     max_poll_interval_ms: usize,
     python_max_queue_depth: Option<usize>,
     health_check_file: Option<&str>,
+    stop_at_timestamp: Option<i64>,
 ) -> usize {
     setup_logging();
 
@@ -217,6 +220,7 @@ pub fn consumer_impl(
         consumer_group.to_owned(),
         Topic::new(&consumer_config.raw_topic.physical_topic_name),
         consumer_config.accountant_topic,
+        stop_at_timestamp,
     );
 
     let topic = Topic::new(&consumer_config.raw_topic.physical_topic_name);
diff --git a/rust_snuba/src/factory.rs b/rust_snuba/src/factory.rs
index 48d5bd0ece..c0c2a91cc5 100644
--- a/rust_snuba/src/factory.rs
+++ b/rust_snuba/src/factory.rs
@@ -54,6 +54,7 @@ pub struct ConsumerStrategyFactory {
     physical_consumer_group: String,
     physical_topic_name: Topic,
     accountant_topic_config: config::TopicConfig,
+    stop_at_timestamp: Option<i64>,
 }
 
 impl ConsumerStrategyFactory {
@@ -78,6 +79,7 @@ impl ConsumerStrategyFactory {
         physical_consumer_group: String,
         physical_topic_name: Topic,
         accountant_topic_config: config::TopicConfig,
+        stop_at_timestamp: Option<i64>,
     ) -> Self {
         Self {
             storage_config,
@@ -99,6 +101,7 @@ impl ConsumerStrategyFactory {
             physical_consumer_group,
             physical_topic_name,
             accountant_topic_config,
+            stop_at_timestamp,
         }
     }
 }
@@ -220,6 +223,7 @@ impl ProcessingStrategyFactory<KafkaPayload> for ConsumerStrategyFactory {
                     config::ProcessorConfig {
                         env_config: self.env_config.clone(),
                     },
+                    self.stop_at_timestamp,
                 );
             }
             (true, Some(processors::ProcessingFunctionType::ProcessingFunction(func))) => {
@@ -232,6 +236,7 @@ impl ProcessingStrategyFactory<KafkaPayload> for ConsumerStrategyFactory {
                     config::ProcessorConfig {
                         env_config: self.env_config.clone(),
                     },
+                    self.stop_at_timestamp,
                 )
             }
             (
diff --git a/rust_snuba/src/strategies/processor.rs b/rust_snuba/src/strategies/processor.rs
index 272cd3266a..ee0e30ed15 100644
--- a/rust_snuba/src/strategies/processor.rs
+++ b/rust_snuba/src/strategies/processor.rs
@@ -26,6 +26,7 @@ pub fn make_rust_processor(
     enforce_schema: bool,
     concurrency: &ConcurrencyConfig,
     processor_config: ProcessorConfig,
+    stop_at_timestamp: Option<i64>,
 ) -> Box<dyn ProcessingStrategy<KafkaPayload>> {
     let schema = get_schema(schema_name, enforce_schema);
 
@@ -34,7 +35,18 @@ pub fn make_rust_processor(
         partition: Partition,
         offset: u64,
         timestamp: DateTime<Utc>,
+        stop_at_timestamp: Option<i64>,
     ) -> anyhow::Result<Message<BytesInsertBatch<RowData>>> {
+        // Don't process any more messages
+        if let Some(stop) = stop_at_timestamp {
+            if stop < timestamp.timestamp() {
+                let payload = BytesInsertBatch::default();
+                return Ok(Message::new_broker_message(
+                    payload, partition, offset, timestamp,
+                ));
+            }
+        }
+
         let payload = BytesInsertBatch::new(
             transformed.rows,
             Some(timestamp),
@@ -62,6 +74,7 @@ pub fn make_rust_processor(
         func,
         result_to_next_msg,
         processor_config,
+        stop_at_timestamp,
     };
 
     Box::new(RunTaskInThreads::new(
@@ -79,6 +92,7 @@ pub fn make_rust_processor_with_replacements(
     enforce_schema: bool,
     concurrency: &ConcurrencyConfig,
     processor_config: ProcessorConfig,
+    stop_at_timestamp: Option<i64>,
 ) -> Box<dyn ProcessingStrategy<KafkaPayload>> {
     let schema = get_schema(schema_name, enforce_schema);
 
@@ -87,7 +101,21 @@ pub fn make_rust_processor_with_replacements(
         partition: Partition,
         offset: u64,
         timestamp: DateTime<Utc>,
+        stop_at_timestamp: Option<i64>,
     ) -> anyhow::Result<Message<InsertOrReplacement<BytesInsertBatch<RowData>>>> {
+        // Don't process any more messages
+        if let Some(stop) = stop_at_timestamp {
+            if stop < timestamp.timestamp() {
+                let payload = BytesInsertBatch::default();
+                return Ok(Message::new_broker_message(
+                    InsertOrReplacement::Insert(payload),
+                    partition,
+                    offset,
+                    timestamp,
+                ));
+            }
+        }
+
         let payload = match transformed {
             InsertOrReplacement::Insert(transformed) => {
                 InsertOrReplacement::Insert(BytesInsertBatch::new(
@@ -120,6 +148,7 @@ pub fn make_rust_processor_with_replacements(
         func,
         result_to_next_msg,
         processor_config,
+        stop_at_timestamp,
     };
 
     Box::new(RunTaskInThreads::new(
@@ -155,9 +184,11 @@ struct MessageProcessor<TResult: Clone, TNext: Clone> {
         fn(KafkaPayload, KafkaMessageMetadata, config: &ProcessorConfig) -> anyhow::Result<TResult>,
     // Function that return Message<TNext> to be passed to the next strategy. Gets passed TResult,
     // as well as the message's partition, offset and timestamp.
+    #[allow(clippy::type_complexity)]
     result_to_next_msg:
-        fn(TResult, Partition, u64, DateTime<Utc>) -> anyhow::Result<Message<TNext>>,
+        fn(TResult, Partition, u64, DateTime<Utc>, Option<i64>) -> anyhow::Result<Message<TNext>>,
     processor_config: ProcessorConfig,
+    stop_at_timestamp: Option<i64>,
 }
 
 impl<TResult: Clone, TNext: Clone> MessageProcessor<TResult, TNext> {
@@ -216,7 +247,13 @@ impl<TResult: Clone, TNext: Clone> MessageProcessor<TResult, TNext> {
 
         let transformed = (self.func)(msg.payload, metadata, &self.processor_config)?;
 
-        (self.result_to_next_msg)(transformed, msg.partition, msg.offset, msg.timestamp)
+        (self.result_to_next_msg)(
+            transformed,
+            msg.partition,
+            msg.offset,
+            msg.timestamp,
+            self.stop_at_timestamp,
+        )
     }
 }
 
@@ -329,6 +366,7 @@ mod tests {
             true,
             &concurrency,
             ProcessorConfig::default(),
+            None,
         );
 
         let example = "{
diff --git a/snuba/cli/rust_consumer.py b/snuba/cli/rust_consumer.py
index 9ad4302ce6..d636966f06 100644
--- a/snuba/cli/rust_consumer.py
+++ b/snuba/cli/rust_consumer.py
@@ -148,6 +148,11 @@
     default=False,
     help="Enforce schema on the raw events topic.",
 )
+@click.option(
+    "--stop-at-timestamp",
+    type=int,
+    help="Unix timestamp after which to stop processing messages",
+)
 def rust_consumer(
     *,
     storage_names: Sequence[str],
@@ -174,6 +179,7 @@ def rust_consumer(
     python_max_queue_depth: Optional[int],
     health_check_file: Optional[str],
     enforce_schema: bool,
+    stop_at_timestamp: Optional[int],
 ) -> None:
     """
     Experimental alternative to `snuba consumer`
@@ -215,6 +221,7 @@ def rust_consumer(
         max_poll_interval_ms,
         python_max_queue_depth,
         health_check_file,
+        stop_at_timestamp,
     )
 
     sys.exit(exitcode)

From 64381e52f3ac959376a11b62b0f93d4c584d9338 Mon Sep 17 00:00:00 2001
From: Enoch Tang <enoch.tang@sentry.io>
Date: Tue, 16 Apr 2024 10:45:47 -0400
Subject: [PATCH 5/9] chore(on-call): Add CrossOrgQueryAllocationPolicy to
 errors (#5774)

* add CrossOrgQueryAllocationPolicy to errors

* add getsentry.tasks.backfill_grouping_records to cross org policy

* fix test

* fix more test
---
 .../configuration/events/storages/errors.yaml         | 11 +++++++++++
 .../configuration/events/storages/errors_ro.yaml      | 11 +++++++++++
 tests/admin/test_api.py                               |  4 ++--
 tests/web/test_db_query.py                            |  5 +++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/snuba/datasets/configuration/events/storages/errors.yaml b/snuba/datasets/configuration/events/storages/errors.yaml
index 0d07219c0b..e0347a8d4a 100644
--- a/snuba/datasets/configuration/events/storages/errors.yaml
+++ b/snuba/datasets/configuration/events/storages/errors.yaml
@@ -268,6 +268,17 @@ allocation_policies:
         - referrer
       default_config_overrides:
         is_enforced: 0
+  - name: CrossOrgQueryAllocationPolicy
+    args:
+      required_tenant_types:
+        - referrer
+      default_config_overrides:
+        is_enforced: 0
+        is_active: 0
+      cross_org_referrer_limits:
+        getsentry.tasks.backfill_grouping_records:
+          max_threads: 2
+          concurrent_limit: 4
 
 query_processors:
   - processor: UniqInSelectAndHavingProcessor
diff --git a/snuba/datasets/configuration/events/storages/errors_ro.yaml b/snuba/datasets/configuration/events/storages/errors_ro.yaml
index 72cd07c7cf..68f18b7e00 100644
--- a/snuba/datasets/configuration/events/storages/errors_ro.yaml
+++ b/snuba/datasets/configuration/events/storages/errors_ro.yaml
@@ -266,6 +266,17 @@ allocation_policies:
         - referrer
       default_config_overrides:
         is_enforced: 0
+  - name: CrossOrgQueryAllocationPolicy
+    args:
+      required_tenant_types:
+        - referrer
+      default_config_overrides:
+        is_enforced: 0
+        is_active: 0
+      cross_org_referrer_limits:
+        getsentry.tasks.backfill_grouping_records:
+          max_threads: 2
+          concurrent_limit: 4
 
 
 query_processors:
diff --git a/tests/admin/test_api.py b/tests/admin/test_api.py
index 085e0c0490..58f38ea578 100644
--- a/tests/admin/test_api.py
+++ b/tests/admin/test_api.py
@@ -502,7 +502,7 @@ def mock_record(user: Any, action: Any, data: Any, notify: Any) -> None:
         assert response.status_code == 200
 
         # three policies
-        assert response.json is not None and len(response.json) == 4
+        assert response.json is not None and len(response.json) == 5
         policy_configs = response.json
         bytes_scanned_policy = [
             policy
@@ -542,7 +542,7 @@ def mock_record(user: Any, action: Any, data: Any, notify: Any) -> None:
 
         response = admin_api.get("/allocation_policy_configs/errors")
         assert response.status_code == 200
-        assert response.json is not None and len(response.json) == 4
+        assert response.json is not None and len(response.json) == 5
         assert {
             "default": -1,
             "description": "Number of bytes a specific org can scan in a 10 minute "
diff --git a/tests/web/test_db_query.py b/tests/web/test_db_query.py
index c96207d27f..3b352d7543 100644
--- a/tests/web/test_db_query.py
+++ b/tests/web/test_db_query.py
@@ -288,6 +288,11 @@ def test_db_query_success() -> None:
             "explanation": {},
             "max_threads": 10,
         },
+        "CrossOrgQueryAllocationPolicy": {
+            "can_run": True,
+            "explanation": {},
+            "max_threads": 10,
+        },
     }
     assert len(query_metadata_list) == 1
     assert result.extra["stats"] == stats

From 656d28ccd0cb6d0cd01f6fa467d9550839a0bc3a Mon Sep 17 00:00:00 2001
From: Lyn Nagara <1779792+lynnagara@users.noreply.github.com>
Date: Tue, 16 Apr 2024 09:44:09 -0700
Subject: [PATCH 6/9] ref: Rust consumer should not skip writes by default
 (#5778)

Previous default was for testing, consumer should not skip
writes by default. This matches the default value on the python consumer.
---
 snuba/cli/rust_consumer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snuba/cli/rust_consumer.py b/snuba/cli/rust_consumer.py
index d636966f06..f666285d3c 100644
--- a/snuba/cli/rust_consumer.py
+++ b/snuba/cli/rust_consumer.py
@@ -105,7 +105,7 @@
     "--skip-write/--no-skip-write",
     "skip_write",
     help="Skip the write to clickhouse",
-    default=True,
+    default=False,
 )
 @click.option(
     "--concurrency",

From 55d2efc26b439d456fd8dfe6f03620cbfb0a72ca Mon Sep 17 00:00:00 2001
From: Josh Ferge <josh.ferge@sentry.io>
Date: Tue, 16 Apr 2024 10:00:07 -0700
Subject: [PATCH 7/9] feat(replays): add replay_id column to merged discover
 table (#5777)

* feat(replays): add replay_id column to merged discover table

* dont use legacy migration
---
 snuba/migrations/group_loader.py              |  1 +
 .../discover/0008_discover_add_replay_id.py   | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 snuba/snuba_migrations/discover/0008_discover_add_replay_id.py

diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py
index c911e24e8d..e1625a05c7 100644
--- a/snuba/migrations/group_loader.py
+++ b/snuba/migrations/group_loader.py
@@ -136,6 +136,7 @@ def get_migrations(self) -> Sequence[str]:
             "0005_discover_fix_transaction_name",
             "0006_discover_add_trace_id",
             "0007_discover_add_span_id",
+            "0008_discover_add_replay_id",
         ]
 
 
diff --git a/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py b/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py
new file mode 100644
index 0000000000..673e270630
--- /dev/null
+++ b/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py
@@ -0,0 +1,56 @@
+from typing import Sequence
+
+from snuba.clickhouse.columns import UUID, Column
+from snuba.clusters.storage_sets import StorageSetKey
+from snuba.migrations import migration, operations
+from snuba.migrations.columns import MigrationModifiers as Modifiers
+from snuba.migrations.operations import OperationTarget
+
+
+class Migration(migration.ClickhouseNodeMigration):
+    """
+    Add replay_id to merge table
+    """
+
+    blocking = False
+
+    def forwards_ops(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.AddColumn(
+                storage_set=StorageSetKey.DISCOVER,
+                table_name=table_name,
+                column=Column("replay_id", UUID(Modifiers(nullable=True))),
+                after="span_id",
+                target=target,
+            )
+            for table_name, target in [
+                ("discover_local", OperationTarget.LOCAL),
+                ("discover_dist", OperationTarget.DISTRIBUTED),
+            ]
+        ]
+
+    def backwards_ops(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.DropColumn(
+                storage_set=StorageSetKey.DISCOVER,
+                table_name=table_name,
+                column_name="replay_id",
+                target=target,
+            )
+            for table_name, target in [
+                ("discover_dist", OperationTarget.DISTRIBUTED),
+                ("discover_local", OperationTarget.LOCAL),
+            ]
+        ]
+
+    def forwards_local(self) -> Sequence[operations.SqlOperation]:
+        return self.forwards_ops()
+
+    def backwards_local(self) -> Sequence[operations.SqlOperation]:
+        return self.backwards_ops()
+
+    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
+        return self.forwards_ops()
+
+    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
+        return self.backwards_ops()

From 1615672dcbbd0b8ff54e9a092a8ba6353c387480 Mon Sep 17 00:00:00 2001
From: Kyle Mumma <kyle.mumma@sentry.io>
Date: Tue, 16 Apr 2024 13:53:02 -0700
Subject: [PATCH 8/9] fix: fix mypy --strict vscode (#5781)

---
 .vscode/settings.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 7df4e96f1a..f9e0f3a0ca 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -57,7 +57,6 @@
   },
 
   "python.linting.mypyEnabled": true,
-  "python.linting.mypyArgs": ["--strict"],
   "python.linting.flake8Enabled": true,
   "python.formatting.provider": "black",
   // https://github.com/DonJayamanne/pythonVSCode/issues/992
@@ -65,5 +64,6 @@
   // test discovery is sluggish and the UI around running
   // tests is often in your way and misclicked
   "python.testing.pytestEnabled": true,
-  "python.testing.pytestArgs": ["tests"]
+  "python.testing.pytestArgs": ["tests"],
+  "mypy-type-checker.args": ["--strict"]
 }

From 2f1509fc0f6117c92650bb8d170924a94aa5bd63 Mon Sep 17 00:00:00 2001
From: getsentry-bot <bot@sentry.io>
Date: Tue, 16 Apr 2024 23:38:34 +0000
Subject: [PATCH 9/9] Revert "feat(replays): add replay_id column to merged
 discover table (#5777)"

This reverts commit 55d2efc26b439d456fd8dfe6f03620cbfb0a72ca.

Co-authored-by: dbanda <1960911+dbanda@users.noreply.github.com>
---
 snuba/migrations/group_loader.py              |  1 -
 .../discover/0008_discover_add_replay_id.py   | 56 -------------------
 2 files changed, 57 deletions(-)
 delete mode 100644 snuba/snuba_migrations/discover/0008_discover_add_replay_id.py

diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py
index e1625a05c7..c911e24e8d 100644
--- a/snuba/migrations/group_loader.py
+++ b/snuba/migrations/group_loader.py
@@ -136,7 +136,6 @@ def get_migrations(self) -> Sequence[str]:
             "0005_discover_fix_transaction_name",
             "0006_discover_add_trace_id",
             "0007_discover_add_span_id",
-            "0008_discover_add_replay_id",
         ]
 
 
diff --git a/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py b/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py
deleted file mode 100644
index 673e270630..0000000000
--- a/snuba/snuba_migrations/discover/0008_discover_add_replay_id.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from typing import Sequence
-
-from snuba.clickhouse.columns import UUID, Column
-from snuba.clusters.storage_sets import StorageSetKey
-from snuba.migrations import migration, operations
-from snuba.migrations.columns import MigrationModifiers as Modifiers
-from snuba.migrations.operations import OperationTarget
-
-
-class Migration(migration.ClickhouseNodeMigration):
-    """
-    Add replay_id to merge table
-    """
-
-    blocking = False
-
-    def forwards_ops(self) -> Sequence[operations.SqlOperation]:
-        return [
-            operations.AddColumn(
-                storage_set=StorageSetKey.DISCOVER,
-                table_name=table_name,
-                column=Column("replay_id", UUID(Modifiers(nullable=True))),
-                after="span_id",
-                target=target,
-            )
-            for table_name, target in [
-                ("discover_local", OperationTarget.LOCAL),
-                ("discover_dist", OperationTarget.DISTRIBUTED),
-            ]
-        ]
-
-    def backwards_ops(self) -> Sequence[operations.SqlOperation]:
-        return [
-            operations.DropColumn(
-                storage_set=StorageSetKey.DISCOVER,
-                table_name=table_name,
-                column_name="replay_id",
-                target=target,
-            )
-            for table_name, target in [
-                ("discover_dist", OperationTarget.DISTRIBUTED),
-                ("discover_local", OperationTarget.LOCAL),
-            ]
-        ]
-
-    def forwards_local(self) -> Sequence[operations.SqlOperation]:
-        return self.forwards_ops()
-
-    def backwards_local(self) -> Sequence[operations.SqlOperation]:
-        return self.backwards_ops()
-
-    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
-        return self.forwards_ops()
-
-    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
-        return self.backwards_ops()