From 97c67a1b09b375d2d28945c454caacd8fb358ddf Mon Sep 17 00:00:00 2001
From: Tillman Elser <trillville@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:54:31 -0700
Subject: [PATCH 1/8] make id bigint sequence

---
 .../versions/95b4ba4f731d_migration.py        | 33 +++++++++++++++++++
 src/seer/db.py                                |  4 +--
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 src/migrations/versions/95b4ba4f731d_migration.py

diff --git a/src/migrations/versions/95b4ba4f731d_migration.py b/src/migrations/versions/95b4ba4f731d_migration.py
new file mode 100644
index 000000000..366c90b07
--- /dev/null
+++ b/src/migrations/versions/95b4ba4f731d_migration.py
@@ -0,0 +1,33 @@
+"""Migration
+
+Revision ID: 95b4ba4f731d
+Revises: 2597db647e9a
+Create Date: 2024-07-17 23:35:18.871569
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "95b4ba4f731d"
+down_revision = "2597db647e9a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT")
+    op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq")
+    op.execute(
+        "ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')"
+    )
+    op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id")
+    op.execute(
+        "SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)"
+    )
+
+
+def downgrade():
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT")
+    op.execute("DROP SEQUENCE grouping_records_id_seq")
+    op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER")
diff --git a/src/seer/db.py b/src/seer/db.py
index 2d1c5e3f3..bd0493729 100644
--- a/src/seer/db.py
+++ b/src/seer/db.py
@@ -233,8 +233,8 @@ class DbPrIdToAutofixRunIdMapping(Base):
 
 class DbGroupingRecord(Base):
     __tablename__ = "grouping_records"
-    id: Mapped[int] = mapped_column(Integer, primary_key=True)
-    project_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+    project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
     message: Mapped[str] = mapped_column(String, nullable=False)
     error_type: Mapped[str] = mapped_column(String, nullable=True)
     stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)

From a444269675b1d8d6dd3f4d4dd08380b6ef86904e Mon Sep 17 00:00:00 2001
From: Tillman Elser <trillville@users.noreply.github.com>
Date: Wed, 17 Jul 2024 17:07:25 -0700
Subject: [PATCH 2/8] start fixing db.py

---
 src/seer/db.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/seer/db.py b/src/seer/db.py
index bd0493729..21a29d0e1 100644
--- a/src/seer/db.py
+++ b/src/seer/db.py
@@ -233,26 +233,25 @@ class DbPrIdToAutofixRunIdMapping(Base):
 
 class DbGroupingRecord(Base):
     __tablename__ = "grouping_records"
+    __table_args__ = {"postgres_partition_by": "HASH (project_id)"}
     id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
     project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
     message: Mapped[str] = mapped_column(String, nullable=False)
     error_type: Mapped[str] = mapped_column(String, nullable=True)
     stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
-    hash: Mapped[str] = mapped_column(
-        String(32), nullable=False, default="00000000000000000000000000000000"
-    )
+    hash: Mapped[str] = mapped_column(String(32), nullable=False)
 
     __table_args__ = (
         Index(
-            "ix_grouping_records_stacktrace_embedding_hnsw",
+            "ix_grouping_records_new_stacktrace_embedding_hnsw",
             "stacktrace_embedding",
             postgresql_using="hnsw",
             postgresql_with={"m": 16, "ef_construction": 200},
             postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
         ),
         Index(
-            "ix_grouping_records_project_id",
+            "ix_grouping_records_new_project_id",
             "project_id",
         ),
-        UniqueConstraint("project_id", "hash", name="u_project_id_hash"),
+        UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
     )

From 5c1814f612b2a84238eb5ef78c3a81558e8aebab Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 17:55:05 -0700
Subject: [PATCH 3/8] Vroom vroom

---
 src/seer/db.py | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/seer/db.py b/src/seer/db.py
index 21a29d0e1..77361e792 100644
--- a/src/seer/db.py
+++ b/src/seer/db.py
@@ -231,16 +231,19 @@ class DbPrIdToAutofixRunIdMapping(Base):
     )
 
 
-class DbGroupingRecord(Base):
-    __tablename__ = "grouping_records"
-    __table_args__ = {"postgres_partition_by": "HASH (project_id)"}
-    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+class DbGroupingRecordBase:
+    id: Mapped[int] = mapped_column(
+        BigInteger, primary_key=True, server_default="nextval('grouping_records_id_seq')"
+    )
     project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
     message: Mapped[str] = mapped_column(String, nullable=False)
     error_type: Mapped[str] = mapped_column(String, nullable=True)
     stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
     hash: Mapped[str] = mapped_column(String(32), nullable=False)
 
+
+class DbGroupingRecord(DbGroupingRecordBase, Base):
+    __tablename__ = "grouping_records"
     __table_args__ = (
         Index(
             "ix_grouping_records_new_stacktrace_embedding_hnsw",
@@ -254,4 +257,35 @@ class DbGroupingRecord(Base):
             "project_id",
         ),
         UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
+        {"postgresql_partition_by": "HASH (project_id)"},
     )
+
+
+globals().update(
+    {
+        f"DbGroupingRecord{i}": type(
+            f"DbGroupingRecord{i}",
+            (DbGroupingRecordBase, Base),
+            dict(
+                __tablename__=f"grouping_records_p{i}",
+                __table_args__=(
+                    Index(
+                        f"grouping_records_new_p{i}_stacktrace_embedding_idx",
+                        "stacktrace_embedding",
+                        postgresql_using="hnsw",
+                        postgresql_with={"m": 16, "ef_construction": 200},
+                        postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
+                    ),
+                    Index(
+                        f"grouping_records_new_p{i}_project_id_idx",
+                        "project_id",
+                    ),
+                    UniqueConstraint(
+                        "project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key"
+                    ),
+                ),
+            ),
+        )
+        for i in range(100)
+    }
+)

From 3e5702b443d24e003d348784017e1fa46caf7b32 Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 18:01:26 -0700
Subject: [PATCH 4/8] Adding check for pending migrations

---
 .github/workflows/test-branches.yml | 3 +++
 .github/workflows/test-prs.yml      | 3 +++
 Makefile                            | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/.github/workflows/test-branches.yml b/.github/workflows/test-branches.yml
index 4450ddb6c..211611ce8 100644
--- a/.github/workflows/test-branches.yml
+++ b/.github/workflows/test-branches.yml
@@ -42,6 +42,9 @@ jobs:
       - name: Typecheck with mypy
         run: |
           make mypy
+      - name: Validate no pending migrations
+        run: |
+          make check-no-pending-migrations
       - name: Test with pytest
         run: |
           make test
diff --git a/.github/workflows/test-prs.yml b/.github/workflows/test-prs.yml
index 3b7c270fe..7fa13b5fc 100644
--- a/.github/workflows/test-prs.yml
+++ b/.github/workflows/test-prs.yml
@@ -22,6 +22,9 @@ jobs:
       - name: Typecheck with mypy
         run: |
           make mypy
+      - name: Validate no pending migrations
+        run: |
+          make check-no-pending-migrations
       - name: Test with pytest
         run: |
           make test
diff --git a/Makefile b/Makefile
index 885b847bb..fcea2a58e 100644
--- a/Makefile
+++ b/Makefile
@@ -54,6 +54,10 @@ schemas: # Generates json files
 migration: .env
 	docker compose run app flask db migrate -m 'Migration'
 
+.PHONY: check-no-pending-migrations
+check-no-pending-migrations: .env
+	docker compose run app flask db check
+
 .PHONY: merge-migrations
 merge-migrations: .env
 	docker compose run app flask db merge heads

From 8cfcf0e683ce411ddd42eca91e0ff7320ef67055 Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 18:10:10 -0700
Subject: [PATCH 5/8] let's gooooooo

---
 src/seer/db.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/seer/db.py b/src/seer/db.py
index 77361e792..b07607369 100644
--- a/src/seer/db.py
+++ b/src/seer/db.py
@@ -14,11 +14,13 @@
     ForeignKey,
     Index,
     Integer,
+    Sequence,
     String,
     UniqueConstraint,
     delete,
     func,
     select,
+    text,
 )
 from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
@@ -233,7 +235,10 @@ class DbPrIdToAutofixRunIdMapping(Base):
 
 class DbGroupingRecordBase:
     id: Mapped[int] = mapped_column(
-        BigInteger, primary_key=True, server_default="nextval('grouping_records_id_seq')"
+        BigInteger,
+        Sequence("grouping_records_id_seq"),
+        primary_key=True,
+        server_default=text("nextval('grouping_records_id_seq')"),
     )
     project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
     message: Mapped[str] = mapped_column(String, nullable=False)

From 5b041e2c4c58ed306b0d0f9a12e6fdd8bcfb5a81 Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 20:55:03 -0700
Subject: [PATCH 6/8] fix

---
 src/migrations/alembic.ini |  3 ++
 src/migrations/env.py      | 30 +++++++++++++++-
 src/seer/db.py             | 71 +++++++++++++++-----------------------
 3 files changed, 60 insertions(+), 44 deletions(-)

diff --git a/src/migrations/alembic.ini b/src/migrations/alembic.ini
index ec9d45c26..502a22590 100644
--- a/src/migrations/alembic.ini
+++ b/src/migrations/alembic.ini
@@ -48,3 +48,6 @@ formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
+
+[exclude]
+matches = grouping_records_p.+,grouping_records_new_p.+
diff --git a/src/migrations/env.py b/src/migrations/env.py
index 54179952a..7a8ff2afd 100644
--- a/src/migrations/env.py
+++ b/src/migrations/env.py
@@ -1,8 +1,10 @@
 import logging
+import re
 from logging.config import fileConfig
 
 from alembic import context
 from flask import current_app
+from sqlalchemy.sql.schema import SchemaItem
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
@@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
     connectable = get_engine()
 
     with connectable.connect() as connection:
-        context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
+        context.configure(
+            connection=connection,
+            target_metadata=get_metadata(),
+            include_object=include_object,
+            **conf_args,
+        )
 
         with context.begin_transaction():
             context.run_migrations()
 
 
+def get_excludes_from_config(config_, type_="tables"):
+    excludes = config_.get(type_, None)
+    if excludes is not None:
+        excludes = excludes.split(",")
+    return excludes
+
+
+excluded_matches = config.get_section("exclude").get("matches", "").split(",")
+
+
+def include_object(
+    obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
+):
+    if name:
+        for pattern in excluded_matches:
+            if re.match(pattern, name):
+                return False
+
+    return True
+
+
 if context.is_offline_mode():
     run_migrations_offline()
 else:
diff --git a/src/seer/db.py b/src/seer/db.py
index b07607369..516b2eba4 100644
--- a/src/seer/db.py
+++ b/src/seer/db.py
@@ -1,6 +1,7 @@
 import contextlib
 import datetime
 import json
+from typing import Any
 
 import sqlalchemy
 from flask_migrate import Migrate
@@ -10,6 +11,7 @@
 from sqlalchemy import (
     JSON,
     BigInteger,
+    Connection,
     DateTime,
     ForeignKey,
     Index,
@@ -233,21 +235,19 @@ class DbPrIdToAutofixRunIdMapping(Base):
     )
 
 
-class DbGroupingRecordBase:
-    id: Mapped[int] = mapped_column(
-        BigInteger,
-        Sequence("grouping_records_id_seq"),
-        primary_key=True,
-        server_default=text("nextval('grouping_records_id_seq')"),
-    )
-    project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
-    message: Mapped[str] = mapped_column(String, nullable=False)
-    error_type: Mapped[str] = mapped_column(String, nullable=True)
-    stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
-    hash: Mapped[str] = mapped_column(String(32), nullable=False)
+def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
+    for i in range(100):
+        connection.execute(
+            text(
+                f"""
+            CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
+            FOR VALUES WITH (MODULUS 100, REMAINDER {i});
+            """
+            )
+        )
 
 
-class DbGroupingRecord(DbGroupingRecordBase, Base):
+class DbGroupingRecord(Base):
     __tablename__ = "grouping_records"
     __table_args__ = (
         Index(
@@ -262,35 +262,20 @@ class DbGroupingRecord(DbGroupingRecordBase, Base):
             "project_id",
         ),
         UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
-        {"postgresql_partition_by": "HASH (project_id)"},
+        {
+            "postgresql_partition_by": "HASH (project_id)",
+            "listeners": [("after_create", create_grouping_partition)],
+        },
     )
 
-
-globals().update(
-    {
-        f"DbGroupingRecord{i}": type(
-            f"DbGroupingRecord{i}",
-            (DbGroupingRecordBase, Base),
-            dict(
-                __tablename__=f"grouping_records_p{i}",
-                __table_args__=(
-                    Index(
-                        f"grouping_records_new_p{i}_stacktrace_embedding_idx",
-                        "stacktrace_embedding",
-                        postgresql_using="hnsw",
-                        postgresql_with={"m": 16, "ef_construction": 200},
-                        postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
-                    ),
-                    Index(
-                        f"grouping_records_new_p{i}_project_id_idx",
-                        "project_id",
-                    ),
-                    UniqueConstraint(
-                        "project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key"
-                    ),
-                ),
-            ),
-        )
-        for i in range(100)
-    }
-)
+    id: Mapped[int] = mapped_column(
+        BigInteger,
+        Sequence("grouping_records_id_seq"),
+        primary_key=True,
+        server_default=text("nextval('grouping_records_id_seq')"),
+    )
+    project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
+    message: Mapped[str] = mapped_column(String, nullable=False)
+    error_type: Mapped[str] = mapped_column(String, nullable=True)
+    stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
+    hash: Mapped[str] = mapped_column(String(32), nullable=False)

From 28ef0f7813877d62b01f7af7a726a9d2c44914b1 Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 20:56:05 -0700
Subject: [PATCH 7/8] Codecov comment

---
 codecov.yml | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/codecov.yml b/codecov.yml
index b5d971b2d..e9c19b276 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -1,22 +1,2 @@
 coverage:
-  status:
-    project:
-      default: false
-    patch:
-      default: false
-      grouping:
-        paths:
-          - 'src/seer/grouping/'
-      automation:
-        paths:
-          - 'src/seer/automation/'
-      severity:
-        paths:
-          - 'src/seer/severity/'
-      trend_detection:
-        paths:
-          - 'src/seer/trend_detection/'
-      app:
-        paths:
-          - 'src/seer/app.py'
-comment: false
+comment: true

From 4e4c9e1164304f0732b022ef28270c6f6e96c7fd Mon Sep 17 00:00:00 2001
From: Zachary Collins <zachary.collins@sentry.io>
Date: Wed, 17 Jul 2024 21:06:25 -0700
Subject: [PATCH 8/8] Typing fixes

---
 src/migrations/env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/migrations/env.py b/src/migrations/env.py
index 7a8ff2afd..b4a7a4a36 100644
--- a/src/migrations/env.py
+++ b/src/migrations/env.py
@@ -114,11 +114,11 @@ def get_excludes_from_config(config_, type_="tables"):
     return excludes
 
 
-excluded_matches = config.get_section("exclude").get("matches", "").split(",")
+excluded_matches = (config.get_section("exclude") or {}).get("matches", "").split(",")
 
 
 def include_object(
-    obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
+    obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem | None
 ):
     if name:
         for pattern in excluded_matches: