Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(grouping): make id bigint sequence #924

Merged
merged 9 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/migrations/alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

[exclude]
matches = grouping_records_p.+,grouping_records_new_p.+
trillville marked this conversation as resolved.
Show resolved Hide resolved
30 changes: 29 additions & 1 deletion src/migrations/env.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import re
from logging.config import fileConfig

from alembic import context
from flask import current_app
from sqlalchemy.sql.schema import SchemaItem

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
connectable = get_engine()

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
context.configure(
connection=connection,
target_metadata=get_metadata(),
include_object=include_object,
**conf_args,
)

with context.begin_transaction():
context.run_migrations()


def get_excludes_from_config(config_, type_="tables"):
excludes = config_.get(type_, None)
if excludes is not None:
excludes = excludes.split(",")
return excludes


excluded_matches = config.get_section("exclude").get("matches", "").split(",")


def include_object(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Teach alembic to just ignore objects in the database that match given patterns so that it doesn't try to remove them.

obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
):
if name:
for pattern in excluded_matches:
if re.match(pattern, name):
return False

return True


if context.is_offline_mode():
run_migrations_offline()
else:
Expand Down
33 changes: 33 additions & 0 deletions src/migrations/versions/95b4ba4f731d_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Migration

Revision ID: 95b4ba4f731d
Revises: 2597db647e9a
Create Date: 2024-07-17 23:35:18.871569

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "95b4ba4f731d"
down_revision = "2597db647e9a"
branch_labels = None
depends_on = None


def upgrade():
op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT")
op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq")
op.execute(
"ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')"
)
op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id")
op.execute(
"SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)"
)


def downgrade():
op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT")
op.execute("DROP SEQUENCE grouping_records_id_seq")
op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER")
47 changes: 35 additions & 12 deletions src/seer/db.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import datetime
import json
from typing import Any

import sqlalchemy
from flask_migrate import Migrate
Expand All @@ -10,15 +11,18 @@
from sqlalchemy import (
JSON,
BigInteger,
Connection,
DateTime,
ForeignKey,
Index,
Integer,
Sequence,
String,
UniqueConstraint,
delete,
func,
select,
text,
)
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
Expand Down Expand Up @@ -231,28 +235,47 @@ class DbPrIdToAutofixRunIdMapping(Base):
)


def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
for i in range(100):
connection.execute(
text(
f"""
CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
FOR VALUES WITH (MODULUS 100, REMAINDER {i});
"""
)
)


class DbGroupingRecord(Base):
__tablename__ = "grouping_records"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
project_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(
String(32), nullable=False, default="00000000000000000000000000000000"
)

__table_args__ = (
Index(
"ix_grouping_records_stacktrace_embedding_hnsw",
"ix_grouping_records_new_stacktrace_embedding_hnsw",
"stacktrace_embedding",
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 200},
postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
),
Index(
"ix_grouping_records_project_id",
"ix_grouping_records_new_project_id",
"project_id",
),
UniqueConstraint("project_id", "hash", name="u_project_id_hash"),
UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
{
"postgresql_partition_by": "HASH (project_id)",
"listeners": [("after_create", create_grouping_partition)],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Effectively, this will only run for tests. Tests use the declarative form here, but production and development uses migrations (alembic)

},
)

id: Mapped[int] = mapped_column(
BigInteger,
Sequence("grouping_records_id_seq"),
primary_key=True,
server_default=text("nextval('grouping_records_id_seq')"),
)
project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(String(32), nullable=False)