Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(grouping): make id bigint sequence #924

Merged
merged 9 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/migrations/alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

[exclude]
matches = grouping_records_p.+,grouping_records_new_p.+
trillville marked this conversation as resolved.
Show resolved Hide resolved
30 changes: 29 additions & 1 deletion src/migrations/env.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import re
from logging.config import fileConfig

from alembic import context
from flask import current_app
from sqlalchemy.sql.schema import SchemaItem

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives):
connectable = get_engine()

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=get_metadata(), **conf_args)
context.configure(
connection=connection,
target_metadata=get_metadata(),
include_object=include_object,
**conf_args,
)

with context.begin_transaction():
context.run_migrations()


def get_excludes_from_config(config_, type_="tables"):
excludes = config_.get(type_, None)
if excludes is not None:
excludes = excludes.split(",")
return excludes


excluded_matches = config.get_section("exclude").get("matches", "").split(",")


def include_object(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Teach alembic to just ignore objects in the database that match given patterns so that it doesn't try to remove them.

obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem
):
if name:
for pattern in excluded_matches:
if re.match(pattern, name):
return False

return True


if context.is_offline_mode():
run_migrations_offline()
else:
Expand Down
71 changes: 28 additions & 43 deletions src/seer/db.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import datetime
import json
from typing import Any

import sqlalchemy
from flask_migrate import Migrate
Expand All @@ -10,6 +11,7 @@
from sqlalchemy import (
JSON,
BigInteger,
Connection,
DateTime,
ForeignKey,
Index,
Expand Down Expand Up @@ -233,21 +235,19 @@ class DbPrIdToAutofixRunIdMapping(Base):
)


class DbGroupingRecordBase:
id: Mapped[int] = mapped_column(
BigInteger,
Sequence("grouping_records_id_seq"),
primary_key=True,
server_default=text("nextval('grouping_records_id_seq')"),
)
project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(String(32), nullable=False)
def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None:
for i in range(100):
connection.execute(
text(
f"""
CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records
FOR VALUES WITH (MODULUS 100, REMAINDER {i});
"""
)
)


class DbGroupingRecord(DbGroupingRecordBase, Base):
class DbGroupingRecord(Base):
__tablename__ = "grouping_records"
__table_args__ = (
Index(
Expand All @@ -262,35 +262,20 @@ class DbGroupingRecord(DbGroupingRecordBase, Base):
"project_id",
),
UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"),
{"postgresql_partition_by": "HASH (project_id)"},
{
"postgresql_partition_by": "HASH (project_id)",
"listeners": [("after_create", create_grouping_partition)],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Effectively, this will only run for tests. Tests use the declarative form here, but production and development uses migrations (alembic)

},
)


globals().update(
{
f"DbGroupingRecord{i}": type(
f"DbGroupingRecord{i}",
(DbGroupingRecordBase, Base),
dict(
__tablename__=f"grouping_records_p{i}",
__table_args__=(
Index(
f"grouping_records_new_p{i}_stacktrace_embedding_idx",
"stacktrace_embedding",
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 200},
postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
),
Index(
f"grouping_records_new_p{i}_project_id_idx",
"project_id",
),
UniqueConstraint(
"project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key"
),
),
),
)
for i in range(100)
}
)
id: Mapped[int] = mapped_column(
BigInteger,
Sequence("grouping_records_id_seq"),
primary_key=True,
server_default=text("nextval('grouping_records_id_seq')"),
)
project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(String(32), nullable=False)