From 5d90590c4a569821d6829d23a53bc3d305992f97 Mon Sep 17 00:00:00 2001 From: Jenn Mueng <30991498+jennmueng@users.noreply.github.com> Date: Wed, 14 Aug 2024 01:12:39 +0700 Subject: [PATCH] feat(issue-summary): Initial working issue summary endpoint (#1033) Introduces the `/v1/automation/summarize/issue` endpoint. The ai pipeline follows: - Initial pass w/ unstructured CoT and answer - Second pass to extract structured output. --- requirements.txt | 2 +- src/seer/app.py | 7 ++ src/seer/automation/summarize/issue.py | 100 +++++++++++++++++++++++ src/seer/automation/summarize/models.py | 14 ++++ tests/automation/summarize/test_issue.py | 96 ++++++++++++++++++++++ 5 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 src/seer/automation/summarize/issue.py create mode 100644 src/seer/automation/summarize/models.py create mode 100644 tests/automation/summarize/test_issue.py diff --git a/requirements.txt b/requirements.txt index 108741e00..aafbea0eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ mpmath==1.3.0 networkx==3.1 numpy==1.26.1 onnx==1.16.0 -openai==1.16.2 +openai==1.40.1 openai-multi-tool-use-parallel-patch==0.2.0 optimum==1.16.2 packaging diff --git a/src/seer/app.py b/src/seer/app.py index 15340855c..c71246f18 100644 --- a/src/seer/app.py +++ b/src/seer/app.py @@ -39,6 +39,8 @@ RepoAccessCheckResponse, ) from seer.automation.codebase.repo_client import RepoClient +from seer.automation.summarize.issue import summarize_issue +from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse from seer.automation.utils import raise_if_no_genai_consent from seer.bootup import bootup, module from seer.configuration import AppConfig @@ -208,6 +210,11 @@ def autofix_evaluation_start_endpoint(data: AutofixEvaluationRequest) -> Autofix return AutofixEndpointResponse(started=True, run_id=-1) +@json_api(blueprint, "/v1/automation/summarize/issue") +def summarize_issue_endpoint(data: SummarizeIssueRequest) -> SummarizeIssueResponse: + return summarize_issue(data) + + @json_api(blueprint, "/v1/anomaly-detection/detect") def detect_anomalies_endpoint(data: DetectAnomaliesRequest) -> DetectAnomaliesResponse: return anomaly_detection().detect_anomalies(data) diff --git a/src/seer/automation/summarize/issue.py b/src/seer/automation/summarize/issue.py new file mode 100644 index 000000000..3813a046a --- /dev/null +++ b/src/seer/automation/summarize/issue.py @@ -0,0 +1,100 @@ +import textwrap + +from langfuse.decorators import observe +from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam +from pydantic import BaseModel + +from seer.automation.agent.client import GptClient +from seer.automation.models import EventDetails +from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse +from seer.dependency_injection import inject, injected + + +class IssueSummary(BaseModel): + cause_of_issue: str + impact: str + + +@observe(name="Summarize Issue") +@inject +def summarize_issue(request: SummarizeIssueRequest, gpt_client: GptClient = injected): + event_details = EventDetails.from_event(request.issue.events[0]) + + prompt = textwrap.dedent( + """\ + You are an exceptional developer that understands the issue and can summarize it in 1-2 sentences. + {event_details} + + Analyze the issue, find the root cause, and summarize it in 1-2 sentences. In your answer, make sure to use backticks to highlight code snippets, output two results: + + # Cause of issue + - 1 sentence, be extremely verbose with the exact snippets of code that are causing the issue. + - Be extremely short and specific. + - When talking about pieces of code, try to shorten it, so for example, instead of saying `foo.1.Def.bar` was undefined, say `Def` was undefined. Or saying if `foo.bar.baz.Class` is missing input field `bam.bar.Object` say `Class` is missing input field `Object`. + - A developer that sees this should know exactly what to fix right away. + + # The impact on the system and users + - 1 sentence, be extremely verbose with how this issue affects the system and end users. + - Be extremely short and specific. + + Reason & explain the thought process step-by-step before giving the answers.""" + ).format(event_details=event_details.format_event()) + + message_dicts: list[ChatCompletionMessageParam] = [ + { + "content": prompt, + "role": "user", + }, + ] + + completion = gpt_client.openai_client.chat.completions.create( + model="gpt-4o-mini-2024-07-18", + messages=message_dicts, + temperature=0.0, + max_tokens=2048, + ) + + message = completion.choices[0].message + + if message.refusal: + raise RuntimeError(message.refusal) + + message_dicts.append( + { + "content": message.content, + "role": "assistant", + } + ) + + formatting_prompt = textwrap.dedent( + """\ + Format your answer to the following schema.""" + ) + message_dicts.append( + { + "content": formatting_prompt, + "role": "user", + } + ) + + structured_completion = gpt_client.openai_client.beta.chat.completions.parse( + model="gpt-4o-mini-2024-07-18", + messages=message_dicts, + temperature=0.0, + max_tokens=2048, + response_format=IssueSummary, + ) + + structured_message = structured_completion.choices[0].message + + if structured_message.refusal: + raise RuntimeError(structured_message.refusal) + + if not structured_message.parsed: + raise RuntimeError("Failed to parse message") + + return SummarizeIssueResponse( + group_id=request.group_id, + summary=structured_message.parsed.cause_of_issue, + impact=structured_message.parsed.impact, + ) diff --git a/src/seer/automation/summarize/models.py b/src/seer/automation/summarize/models.py new file mode 100644 index 000000000..d4e3553ae --- /dev/null +++ b/src/seer/automation/summarize/models.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel + +from seer.automation.models import IssueDetails + + +class SummarizeIssueRequest(BaseModel): + group_id: int + issue: IssueDetails + + +class SummarizeIssueResponse(BaseModel): + group_id: int + summary: str + impact: str diff --git a/tests/automation/summarize/test_issue.py b/tests/automation/summarize/test_issue.py new file mode 100644 index 000000000..2afbe9d38 --- /dev/null +++ b/tests/automation/summarize/test_issue.py @@ -0,0 +1,96 @@ +from unittest.mock import MagicMock, Mock, patch + +import pytest +from johen import generate + +from seer.automation.models import IssueDetails +from seer.automation.summarize.issue import summarize_issue +from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse + + +class TestSummarizeIssue: + @pytest.fixture + def mock_gpt_client(self): + return Mock() + + @pytest.fixture + def sample_request(self): + return SummarizeIssueRequest(group_id=1, issue=next(generate(IssueDetails))) + + def test_summarize_issue_success(self, mock_gpt_client, sample_request): + mock_completion = MagicMock() + mock_completion.choices[0].message.content = "Test content" + mock_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion + + mock_structured_completion = MagicMock() + mock_structured_completion.choices[0].message.parsed = MagicMock( + cause_of_issue="Test cause", impact="Test impact" + ) + mock_structured_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = ( + mock_structured_completion + ) + + result = summarize_issue(sample_request, gpt_client=mock_gpt_client) + + assert isinstance(result, SummarizeIssueResponse) + assert result.group_id == 1 + assert result.summary == "Test cause" + assert result.impact == "Test impact" + + def test_summarize_issue_refusal(self, mock_gpt_client, sample_request): + mock_completion = MagicMock() + mock_completion.choices[0].message.content = "Test content" + mock_completion.choices[0].message.refusal = "Test refusal" + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion + + with pytest.raises(RuntimeError, match="Test refusal"): + summarize_issue(sample_request, gpt_client=mock_gpt_client) + + def test_summarize_issue_parsing_failure(self, mock_gpt_client, sample_request): + mock_completion = MagicMock() + mock_completion.choices[0].message.content = "Test content" + mock_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion + + mock_structured_completion = MagicMock() + mock_structured_completion.choices[0].message.parsed = None + mock_structured_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = ( + mock_structured_completion + ) + + with pytest.raises(RuntimeError, match="Failed to parse message"): + summarize_issue(sample_request, gpt_client=mock_gpt_client) + + @patch("seer.automation.summarize.issue.EventDetails.from_event") + def test_summarize_issue_event_details(self, mock_from_event, mock_gpt_client, sample_request): + mock_event_details = Mock() + mock_event_details.format_event.return_value = "Formatted event details" + mock_from_event.return_value = mock_event_details + + mock_completion = MagicMock() + mock_completion.choices[0].message.content = "Test content" + mock_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion + + mock_structured_completion = MagicMock() + mock_structured_completion.choices[0].message.parsed = MagicMock( + cause_of_issue="Test cause", impact="Test impact" + ) + mock_structured_completion.choices[0].message.refusal = None + mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = ( + mock_structured_completion + ) + + summarize_issue(sample_request, gpt_client=mock_gpt_client) + + mock_from_event.assert_called_once_with(sample_request.issue.events[0]) + mock_event_details.format_event.assert_called_once() + assert ( + "Formatted event details" + in mock_gpt_client.openai_client.chat.completions.create.call_args[1]["messages"][0][ + "content" + ] + )