Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(similarity): Add try catch around insert grouping record #1184

Merged
merged 3 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 11 additions & 25 deletions src/seer/grouping/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,7 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
"stacktrace_length": len(issue.stacktrace),
},
)
self.insert_new_grouping_record(session, issue, embedding)
session.commit()
self.insert_new_grouping_record(issue, embedding)

similarity_response = SimilarityResponse(responses=[])
for record, distance in results:
Expand Down Expand Up @@ -472,43 +471,30 @@ def insert_batch_grouping_records(
return groups_with_neighbor

@sentry_sdk.tracing.trace
def insert_new_grouping_record(
self, session, issue: GroupingRequest, embedding: np.ndarray
) -> None:
def insert_new_grouping_record(self, issue: GroupingRequest, embedding: np.ndarray) -> None:
"""
Inserts a new GroupingRecord into the database if the group_hash does not already exist.
If new grouping record was created, return the id.

:param session: The database session.
:param issue: The issue to insert as a new GroupingRecord.
:param embedding: The embedding of the stacktrace.
"""
existing_record = (
session.query(DbGroupingRecord)
.filter_by(hash=issue.hash, project_id=issue.project_id)
.first()
)

if existing_record is None:
new_record = GroupingRecord(
with Session() as session:
insert_stmt = insert(DbGroupingRecord).values(
project_id=issue.project_id,
message=issue.message,
stacktrace_embedding=embedding,
hash=issue.hash,
error_type=issue.exception_type,
).to_db_model()
session.add(new_record)
else:
logger.info(
"group_already_exists_in_seer_db",
extra={
"existing_hash": existing_record.hash,
"project_id": issue.project_id,
"stacktrace_length": len(issue.stacktrace),
"input_hash": issue.hash,
},
)

session.execute(
insert_stmt.on_conflict_do_nothing(
index_elements=(DbGroupingRecord.project_id, DbGroupingRecord.hash)
)
)
session.commit()

@sentry_sdk.tracing.trace
def delete_grouping_records_for_project(self, project_id: int) -> bool:
"""
Expand Down
17 changes: 6 additions & 11 deletions tests/seer/grouping/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_get_nearest_neighbors_has_neighbor(self):
message="message",
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
session.commit()

grouping_request = GroupingRequest(
Expand Down Expand Up @@ -122,11 +122,9 @@ def test_insert_new_grouping_record_group_record_exists(self):
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
# Insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
# Re-insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)
matching_record = (
session.query(DbGroupingRecord)
.filter_by(hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD")
Expand All @@ -150,10 +148,8 @@ def test_insert_new_grouping_record_group_record_cross_project(self):
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
# Insert the grouping record
grouping_lookup().insert_new_grouping_record(session, grouping_request1, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(session, grouping_request2, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request1, embedding)
grouping_lookup().insert_new_grouping_record(grouping_request2, embedding)
matching_record = (
session.query(DbGroupingRecord)
.filter_by(hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD")
Expand Down Expand Up @@ -293,8 +289,7 @@ def test_bulk_create_and_insert_grouping_records_has_neighbor_in_existing_record
message="message",
hash="QYK7aNYNnp5FgSev9Np1soqb1SdtyahD",
)
grouping_lookup().insert_new_grouping_record(session, grouping_request, embedding)
session.commit()
grouping_lookup().insert_new_grouping_record(grouping_request, embedding)

# Create record data to attempt to be inserted, create 5 with the stacktrace "stacktrace"
hashes = [str(i) * 32 for i in range(10)]
Expand Down
Loading