getsentry · corps · Jul 18, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
@@ -48,3 +48,6 @@ formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
+
+[exclude]
+matches = grouping_records_p.+,grouping_records_new_p.+
@@ -1,8 +1,10 @@
 import logging
+import re
 from logging.config import fileConfig
 
 from alembic import context
 from flask import current_app
+from sqlalchemy.sql.schema import SchemaItem
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
@@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
     connectable = get_engine()
 
     with connectable.connect() as connection:
-        context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
+        context.configure(
+            connection=connection,
+            target_metadata=get_metadata(),
+            include_object=include_object,
+            **conf_args,
+        )
 
         with context.begin_transaction():
             context.run_migrations()
 
 
+def get_excludes_from_config(config_, type_="tables"):
+    excludes = config_.get(type_, None)
+    if excludes is not None:
+        excludes = excludes.split(",")
+    return excludes
+
+
+excluded_matches = config.get_section("exclude").get("matches", "").split(",")
+
+
+def include_object(
+    obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
+):
+    if name:
+        for pattern in excluded_matches:
+            if re.match(pattern, name):
+                return False
+
+    return True
+
+
 if context.is_offline_mode():
     run_migrations_offline()
 else:

@@ -0,0 +1,33 @@
+"""Migration
+
+Revision ID: 95b4ba4f731d
+Revises: 2597db647e9a
+Create Date: 2024-07-17 23:35:18.871569
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "95b4ba4f731d"
+down_revision = "2597db647e9a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT")
+    op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq")
+    op.execute(
+        "ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')"
+    )
+    op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id")
+    op.execute(
+        "SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)"
+    )
+
+
+def downgrade():
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT")
+    op.execute("DROP SEQUENCE grouping_records_id_seq")
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER")
@@ -1,6 +1,7 @@
 import contextlib
 import datetime
 import json
+from typing import Any
 
 import sqlalchemy
 from flask_migrate import Migrate
@@ -10,15 +11,18 @@
 from sqlalchemy import (
     JSON,
     BigInteger,
+    Connection,
     DateTime,
     ForeignKey,
     Index,
     Integer,
+    Sequence,
     String,
     UniqueConstraint,
     delete,
     func,
     select,
+    text,
 )
 from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
@@ -231,28 +235,47 @@ class DbPrIdToAutofixRunIdMapping(Base):
     )
 
 
+def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
+    for i in range(100):
+        connection.execute(
+            text(
+                f"""
+            CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
+            FOR VALUES WITH (MODULUS 100, REMAINDER {i});
+            """
+            )
+        )
+
+
 class DbGroupingRecord(Base):
     __tablename__ = "grouping_records"
-    id: Mapped[int] = mapped_column(Integer, primary_key=True)
-    project_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
-    message: Mapped[str] = mapped_column(String, nullable=False)
-    error_type: Mapped[str] = mapped_column(String, nullable=True)
-    stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
-    hash: Mapped[str] = mapped_column(
-        String(32), nullable=False, default="00000000000000000000000000000000"
-    )
-
     __table_args__ = (
         Index(
-            "ix_grouping_records_stacktrace_embedding_hnsw",
+            "ix_grouping_records_new_stacktrace_embedding_hnsw",
             "stacktrace_embedding",
             postgresql_using="hnsw",
             postgresql_with={"m": 16, "ef_construction": 200},
             postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
         ),
         Index(
-            "ix_grouping_records_project_id",
+            "ix_grouping_records_new_project_id",
             "project_id",
         ),
-        UniqueConstraint("project_id", "hash", name="u_project_id_hash"),
+        UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
+        {
+            "postgresql_partition_by": "HASH (project_id)",
+            "listeners": [("after_create", create_grouping_partition)],
+        },
+    )
+
+    id: Mapped[int] = mapped_column(
+        BigInteger,
+        Sequence("grouping_records_id_seq"),
+        primary_key=True,
+        server_default=text("nextval('grouping_records_id_seq')"),
     )
+    project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
+    message: Mapped[str] = mapped_column(String, nullable=False)
+    error_type: Mapped[str] = mapped_column(String, nullable=True)
+    stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
+    hash: Mapped[str] = mapped_column(String(32), nullable=False)