Skip to content

Commit

Permalink
chore(similarity): Add span to individual record insert (#1159)
Browse files Browse the repository at this point in the history
Add span for individual record insert in bulk record insertion
  • Loading branch information
jangjodi committed Sep 13, 2024
1 parent 8a4432a commit b9caa1e
Showing 1 changed file with 41 additions and 37 deletions.
78 changes: 41 additions & 37 deletions src/seer/grouping/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,46 +424,50 @@ def insert_batch_grouping_records(
)
with Session() as session:
for i, entry in enumerate(data.data):
embedding = embeddings[i].astype("float32")
nearest_neighbor = self.query_nearest_k_neighbors(
session,
embedding,
entry.project_id,
entry.hash,
data.threshold,
data.k,
data.hnsw_candidates,
data.hnsw_distance,
data.use_reranking,
)

if nearest_neighbor:
neighbor, distance = nearest_neighbor[0][0], nearest_neighbor[0][1]
message_similarity_score = difflib.SequenceMatcher(
None, entry.message, neighbor.message
).ratio()
response = GroupingResponse(
parent_hash=neighbor.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
should_group=True,
)
groups_with_neighbor[str(entry.group_id)] = response
else:
insert_stmt = insert(DbGroupingRecord).values(
project_id=entry.project_id,
message=entry.message,
error_type=entry.exception_type,
hash=entry.hash,
stacktrace_embedding=embedding,
with sentry_sdk.start_span(
op="seer.grouping", description="insert single grouping record"
) as span:
span.set_data("stacktrace_len", len(data.stacktrace_list[i]))
embedding = embeddings[i].astype("float32")
nearest_neighbor = self.query_nearest_k_neighbors(
session,
embedding,
entry.project_id,
entry.hash,
data.threshold,
data.k,
data.hnsw_candidates,
data.hnsw_distance,
data.use_reranking,
)

session.execute(
insert_stmt.on_conflict_do_nothing(
index_elements=(DbGroupingRecord.project_id, DbGroupingRecord.hash)
if nearest_neighbor:
neighbor, distance = nearest_neighbor[0][0], nearest_neighbor[0][1]
message_similarity_score = difflib.SequenceMatcher(
None, entry.message, neighbor.message
).ratio()
response = GroupingResponse(
parent_hash=neighbor.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
should_group=True,
)
)
session.commit()
groups_with_neighbor[str(entry.group_id)] = response
else:
insert_stmt = insert(DbGroupingRecord).values(
project_id=entry.project_id,
message=entry.message,
error_type=entry.exception_type,
hash=entry.hash,
stacktrace_embedding=embedding,
)

session.execute(
insert_stmt.on_conflict_do_nothing(
index_elements=(DbGroupingRecord.project_id, DbGroupingRecord.hash)
)
)
session.commit()

return groups_with_neighbor

Expand Down

0 comments on commit b9caa1e

Please sign in to comment.