Skip to content

Commit

Permalink
chore(grouping): migration for hnsw index (#870)
Browse files Browse the repository at this point in the history
based on analysis
[here](https://github.com/getsentry/data-analysis/blob/main/grouping/hnsw_analysis.ipynb)

---------

Co-authored-by: Matt Duncan <14761+mrduncan@users.noreply.github.com>
  • Loading branch information
trillville and mrduncan authored Jul 17, 2024
1 parent b5d10ae commit ce03fd0
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
68 changes: 68 additions & 0 deletions src/migrations/versions/d87a6410efe4_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Update HNSW parameters for grouping_records
Revision ID: d87a6410efe4
Revises: a0d00121d118
Create Date: 2024-07-09 22:28:26.035785
"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "d87a6410efe4"
down_revision = "a0d00121d118"
branch_labels = None
depends_on = None


def upgrade():
op.execute("DROP TABLE IF EXISTS grouping_records_new CASCADE;")

op.execute(
"""
CREATE TABLE grouping_records_new (
id BIGINT NOT NULL,
project_id BIGINT NOT NULL,
hash VARCHAR(32) NOT NULL,
message VARCHAR NOT NULL,
error_type VARCHAR,
stacktrace_embedding VECTOR(768) NOT NULL,
PRIMARY KEY (id, project_id)
) PARTITION BY HASH (project_id);
"""
)

for i in range(100):
op.execute(
f"""
CREATE TABLE grouping_records_new_p{i} PARTITION OF grouping_records_new
FOR VALUES WITH (MODULUS 100, REMAINDER {i});
"""
)

op.execute(
"""
INSERT INTO grouping_records_new (id, project_id, message, error_type, stacktrace_embedding, hash)
SELECT id, project_id, message, error_type, stacktrace_embedding, hash
FROM grouping_records;
"""
)

op.execute(
"""
CREATE INDEX ix_grouping_records_new_stacktrace_embedding_hnsw
ON grouping_records_new USING hnsw (stacktrace_embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 200);
"""
)

op.execute(
"CREATE INDEX ix_grouping_records_new_project_id ON grouping_records_new (project_id);"
)

with op.batch_alter_table("grouping_records_new", schema=None) as batch_op:
batch_op.create_unique_constraint("u_project_id_hash_composite", ["project_id", "hash"])


def downgrade():
op.execute("DROP TABLE IF EXISTS grouping_records_new CASCADE;")
2 changes: 1 addition & 1 deletion src/seer/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class DbGroupingRecord(Base):
"ix_grouping_records_stacktrace_embedding_hnsw",
"stacktrace_embedding",
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_with={"m": 16, "ef_construction": 200},
postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"},
),
Index(
Expand Down

0 comments on commit ce03fd0

Please sign in to comment.