Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(similarity): Add try catch around insert grouping record #1184

Merged
merged 3 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 45 additions & 30 deletions src/seer/grouping/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pydantic import BaseModel, ValidationInfo, field_validator
from sentence_transformers import SentenceTransformer
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.exc import IntegrityError
from torch.cuda import OutOfMemoryError

from seer.db import DbGroupingRecord, Session
Expand Down Expand Up @@ -362,8 +363,7 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
"stacktrace_length": len(issue.stacktrace),
},
)
self.insert_new_grouping_record(session, issue, embedding)
session.commit()
self.insert_new_grouping_record(issue, embedding)

similarity_response = SimilarityResponse(responses=[])
for record, distance in results:
Expand Down Expand Up @@ -472,43 +472,58 @@ def insert_batch_grouping_records(
return groups_with_neighbor

@sentry_sdk.tracing.trace
def insert_new_grouping_record(
self, session, issue: GroupingRequest, embedding: np.ndarray
) -> None:
def insert_new_grouping_record(self, issue: GroupingRequest, embedding: np.ndarray) -> None:
"""
Inserts a new GroupingRecord into the database if the group_hash does not already exist.
If new grouping record was created, return the id.

:param session: The database session.
:param issue: The issue to insert as a new GroupingRecord.
:param embedding: The embedding of the stacktrace.
"""
existing_record = (
session.query(DbGroupingRecord)
.filter_by(hash=issue.hash, project_id=issue.project_id)
.first()
)

if existing_record is None:
new_record = GroupingRecord(
project_id=issue.project_id,
message=issue.message,
stacktrace_embedding=embedding,
hash=issue.hash,
error_type=issue.exception_type,
).to_db_model()
session.add(new_record)
else:
logger.info(
"group_already_exists_in_seer_db",
extra={
"existing_hash": existing_record.hash,
"project_id": issue.project_id,
"stacktrace_length": len(issue.stacktrace),
"input_hash": issue.hash,
},
with Session() as session:
existing_record = (
session.query(DbGroupingRecord)
.filter_by(hash=issue.hash, project_id=issue.project_id)
.first()
)

extra = {
"project_id": issue.project_id,
"stacktrace_length": len(issue.stacktrace),
"input_hash": issue.hash,
}

if existing_record is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we modify this to return early if existing record is not none so we don't get so much arrowing? https://blog.codinghorror.com/flattening-arrow-code/

new_record = GroupingRecord(
project_id=issue.project_id,
message=issue.message,
stacktrace_embedding=embedding,
hash=issue.hash,
error_type=issue.exception_type,
).to_db_model()
session.add(new_record)

try:
session.commit()
except IntegrityError:
session.rollback()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does rollback do? do we need it here?

existing_record = (
session.query(DbGroupingRecord)
.filter_by(hash=issue.hash, project_id=issue.project_id)
.first()
)
extra["existing_hash"] = existing_record.hash
logger.info(
"group_already_exists_in_seer_db",
extra=extra,
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm wondering if this is necessary or if we can skip not having this log -- in theory we should be able to see the group hashes linked on the sentry side, right?

else:
extra["existing_hash"] = existing_record.hash
logger.info(
"group_already_exists_in_seer_db",
extra=extra,
)

@sentry_sdk.tracing.trace
def delete_grouping_records_for_project(self, project_id: int) -> bool:
"""
Expand Down
17 changes: 6 additions & 11 deletions tests/seer/grouping/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_get_nearest_neighbors_has_neighbor(self):
message="message",
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
session.commit()

grouping_request = GroupingRequest(
Expand Down Expand Up @@ -122,11 +122,9 @@ def test_insert_new_grouping_record_group_record_exists(self):
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
# Insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
# Re-insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
matching_record = (
session.query(DbGroupingRecord)
.filter_by(hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD")
Expand All @@ -150,10 +148,8 @@ def test_insert_new_grouping_record_group_record_cross_project(self):
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
# Insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request1, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(session, grouping_request2, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request1, embedding)
grouping_lookup().insert_new_grouping_record(grouping_request2, embedding)
matching_record = (
session.query(DbGroupingRecord)
.filter_by(hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD")
Expand Down Expand Up @@ -293,8 +289,7 @@ def test_bulk_create_and_insert_grouping_records_has_neighbor_in_existing_record
message="message",
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)

# Create record data to attempt to be inserted, create 5 with the stacktrace "stacktrace"
hashes = [str(i) * 32 for i in range(10)]
Expand Down
Loading