Skip to content

Commit

Permalink
fix(grouping): make id bigint sequence (#924)
Browse files Browse the repository at this point in the history
Co-authored-by: Zachary Collins <zachary.collins@sentry.io>
  • Loading branch information
trillville and corps authored Jul 18, 2024
1 parent 507ed7c commit 9d107a9
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 34 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test-branches.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ jobs:
- name: Typecheck with mypy
run: |
make mypy
- name: Validate no pending migrations
run: |
make check-no-pending-migrations
- name: Test with pytest
run: |
make test
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test-prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ jobs:
- name: Typecheck with mypy
run: |
make mypy
- name: Validate no pending migrations
run: |
make check-no-pending-migrations
- name: Test with pytest
run: |
make test
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ schemas: # Generates json files
migration: .env
docker compose run app flask db migrate -m 'Migration'

.PHONY: check-no-pending-migrations
check-no-pending-migrations: .env
docker compose run app flask db check

.PHONY: merge-migrations
merge-migrations: .env
docker compose run app flask db merge heads
Expand Down
22 changes: 1 addition & 21 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,22 +1,2 @@
coverage:
status:
project:
default: false
patch:
default: false
grouping:
paths:
- 'src/seer/grouping/'
automation:
paths:
- 'src/seer/automation/'
severity:
paths:
- 'src/seer/severity/'
trend_detection:
paths:
- 'src/seer/trend_detection/'
app:
paths:
- 'src/seer/app.py'
comment: false
comment: true
3 changes: 3 additions & 0 deletions src/migrations/alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

[exclude]
matches = grouping_records_p.+,grouping_records_new_p.+
30 changes: 29 additions & 1 deletion src/migrations/env.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import re
from logging.config import fileConfig

from alembic import context
from flask import current_app
from sqlalchemy.sql.schema import SchemaItem

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
connectable = get_engine()

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
context.configure(
connection=connection,
target_metadata=get_metadata(),
include_object=include_object,
**conf_args,
)

with context.begin_transaction():
context.run_migrations()


def get_excludes_from_config(config_, type_="tables"):
excludes = config_.get(type_, None)
if excludes is not None:
excludes = excludes.split(",")
return excludes


excluded_matches = (config.get_section("exclude") or {}).get("matches", "").split(",")


def include_object(
obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem | None
):
if name:
for pattern in excluded_matches:
if re.match(pattern, name):
return False

return True


if context.is_offline_mode():
run_migrations_offline()
else:
Expand Down
33 changes: 33 additions & 0 deletions src/migrations/versions/95b4ba4f731d_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Migration
Revision ID: 95b4ba4f731d
Revises: 2597db647e9a
Create Date: 2024-07-17 23:35:18.871569
"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "95b4ba4f731d"
down_revision = "2597db647e9a"
branch_labels = None
depends_on = None


def upgrade():
op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT")
op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq")
op.execute(
"ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')"
)
op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id")
op.execute(
"SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)"
)


def downgrade():
op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT")
op.execute("DROP SEQUENCE grouping_records_id_seq")
op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER")
47 changes: 35 additions & 12 deletions src/seer/db.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import datetime
import json
from typing import Any

import sqlalchemy
from flask_migrate import Migrate
Expand All @@ -10,15 +11,18 @@
from sqlalchemy import (
JSON,
BigInteger,
Connection,
DateTime,
ForeignKey,
Index,
Integer,
Sequence,
String,
UniqueConstraint,
delete,
func,
select,
text,
)
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
Expand Down Expand Up @@ -231,28 +235,47 @@ class DbPrIdToAutofixRunIdMapping(Base):
)


def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
for i in range(100):
connection.execute(
text(
f"""
CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
FOR VALUES WITH (MODULUS 100, REMAINDER {i});
"""
)
)


class DbGroupingRecord(Base):
__tablename__ = "grouping_records"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
project_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(
String(32), nullable=False, default="00000000000000000000000000000000"
)

__table_args__ = (
Index(
"ix_grouping_records_stacktrace_embedding_hnsw",
"ix_grouping_records_new_stacktrace_embedding_hnsw",
"stacktrace_embedding",
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 200},
postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
),
Index(
"ix_grouping_records_project_id",
"ix_grouping_records_new_project_id",
"project_id",
),
UniqueConstraint("project_id", "hash", name="u_project_id_hash"),
UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
{
"postgresql_partition_by": "HASH (project_id)",
"listeners": [("after_create", create_grouping_partition)],
},
)

id: Mapped[int] = mapped_column(
BigInteger,
Sequence("grouping_records_id_seq"),
primary_key=True,
server_default=text("nextval('grouping_records_id_seq')"),
)
project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(String(32), nullable=False)

0 comments on commit 9d107a9

Please sign in to comment.