getsentry · corps · Jul 18, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/src/migrations/alembic.ini b/src/migrations/alembic.ini
@@ -48,3 +48,6 @@ formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
+
+[exclude]
+matches = grouping_records_p.+,grouping_records_new_p.+
diff --git a/src/migrations/env.py b/src/migrations/env.py
@@ -1,8 +1,10 @@
 import logging
+import re
 from logging.config import fileConfig
 
 from alembic import context
 from flask import current_app
+from sqlalchemy.sql.schema import SchemaItem
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
@@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
  connectable = get_engine()
 
  with connectable.connect() as connection:
- context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
+ context.configure(
+ connection=connection,
+ target_metadata=get_metadata(),
+ include_object=include_object,
+ **conf_args,
+ )
 
  with context.begin_transaction():
  context.run_migrations()
 
 
+def get_excludes_from_config(config_, type_="tables"):
+ excludes = config_.get(type_, None)
+ if excludes is not None:
+ excludes = excludes.split(",")
+ return excludes
+
+
+excluded_matches = config.get_section("exclude").get("matches", "").split(",")
+
+
+def include_object(
+ obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
+):
+ if name:
+ for pattern in excluded_matches:
+ if re.match(pattern, name):
+ return False
+
+ return True
+
+
 if context.is_offline_mode():
  run_migrations_offline()
 else:

diff --git a/src/seer/db.py b/src/seer/db.py
@@ -1,6 +1,7 @@
 import contextlib
 import datetime
 import json
+from typing import Any
 
 import sqlalchemy
 from flask_migrate import Migrate
@@ -10,6 +11,7 @@
 from sqlalchemy import (
  JSON,
  BigInteger,
+ Connection,
  DateTime,
  ForeignKey,
  Index,
@@ -233,21 +235,19 @@ class DbPrIdToAutofixRunIdMapping(Base):
  )
 
 
-class DbGroupingRecordBase:
- id: Mapped[int] = mapped_column(
- BigInteger,
- Sequence("grouping_records_id_seq"),
- primary_key=True,
- server_default=text("nextval('grouping_records_id_seq')"),
- )
- project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
- message: Mapped[str] = mapped_column(String, nullable=False)
- error_type: Mapped[str] = mapped_column(String, nullable=True)
- stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
- hash: Mapped[str] = mapped_column(String(32), nullable=False)
+def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
+ for i in range(100):
+ connection.execute(
+ text(
+ f"""
+ CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
+ FOR VALUES WITH (MODULUS 100, REMAINDER {i});
+ """
+ )
+ )
 
 
-class DbGroupingRecord(DbGroupingRecordBase, Base):
+class DbGroupingRecord(Base):
  __tablename__ = "grouping_records"
  __table_args__ = (
  Index(
@@ -262,35 +262,20 @@ class DbGroupingRecord(DbGroupingRecordBase, Base):
  "project_id",
  ),
  UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
- {"postgresql_partition_by": "HASH (project_id)"},
+ {
+ "postgresql_partition_by": "HASH (project_id)",
+ "listeners": [("after_create", create_grouping_partition)],
+ },
  )
 
-
-globals().update(
- {
- f"DbGroupingRecord{i}": type(
- f"DbGroupingRecord{i}",
- (DbGroupingRecordBase, Base),
- dict(
- __tablename__=f"grouping_records_p{i}",
- __table_args__=(
- Index(
- f"grouping_records_new_p{i}_stacktrace_embedding_idx",
- "stacktrace_embedding",
- postgresql_using="hnsw",
- postgresql_with={"m": 16, "ef_construction": 200},
- postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
- ),
- Index(
- f"grouping_records_new_p{i}_project_id_idx",
- "project_id",
- ),
- UniqueConstraint(
- "project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key"
- ),
- ),
- ),
- )
- for i in range(100)
- }
-)
+ id: Mapped[int] = mapped_column(
+ BigInteger,
+ Sequence("grouping_records_id_seq"),
+ primary_key=True,
+ server_default=text("nextval('grouping_records_id_seq')"),
+ )
+ project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
+ message: Mapped[str] = mapped_column(String, nullable=False)
+ error_type: Mapped[str] = mapped_column(String, nullable=True)
+ stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
+ hash: Mapped[str] = mapped_column(String(32), nullable=False)