Skip to content

Commit

Permalink
chore(similarity): Make message column nullable in grouping records (#…
Browse files Browse the repository at this point in the history
…1182)

Make message column nullable in grouping records
  • Loading branch information
jangjodi authored Sep 19, 2024
1 parent 0104de0 commit 751bae1
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
31 changes: 31 additions & 0 deletions src/migrations/versions/09b3ef05f1fe_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Migration
Revision ID: 09b3ef05f1fe
Revises: da0a9c9f1bb4
Create Date: 2024-09-17 17:13:49.660584
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "09b3ef05f1fe"
down_revision = "da0a9c9f1bb4"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("grouping_records", schema=None) as batch_op:
batch_op.alter_column("message", existing_type=sa.VARCHAR(), nullable=True)

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("grouping_records", schema=None) as batch_op:
batch_op.alter_column("message", existing_type=sa.VARCHAR(), nullable=False)

# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion src/seer/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class DbGroupingRecord(Base):
server_default=text("nextval('grouping_records_id_seq')"),
)
project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False)
message: Mapped[str] = mapped_column(String, nullable=False)
message: Mapped[Optional[str]] = mapped_column(String, nullable=True)
error_type: Mapped[str] = mapped_column(String, nullable=True)
stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False)
hash: Mapped[str] = mapped_column(String(32), nullable=False)
Expand Down
11 changes: 2 additions & 9 deletions src/seer/grouping/grouping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import difflib
import gc
import logging
from functools import wraps
Expand Down Expand Up @@ -367,9 +366,6 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:

similarity_response = SimilarityResponse(responses=[])
for record, distance in results:
message_similarity_score = difflib.SequenceMatcher(
None, issue.message, record.message
).ratio()
should_group = distance <= issue.threshold

if should_group:
Expand All @@ -387,7 +383,7 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
GroupingResponse(
parent_hash=record.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
message_distance=0.0,
should_group=should_group,
)
)
Expand Down Expand Up @@ -443,13 +439,10 @@ def insert_batch_grouping_records(

if nearest_neighbor:
neighbor, distance = nearest_neighbor[0][0], nearest_neighbor[0][1]
message_similarity_score = difflib.SequenceMatcher(
None, entry.message, neighbor.message
).ratio()
response = GroupingResponse(
parent_hash=neighbor.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
message_distance=0.0,
should_group=True,
)
groups_with_neighbor[str(entry.group_id)] = response
Expand Down

0 comments on commit 751bae1

Please sign in to comment.