From 5d90590c4a569821d6829d23a53bc3d305992f97 Mon Sep 17 00:00:00 2001
From: Jenn Mueng <30991498+jennmueng@users.noreply.github.com>
Date: Wed, 14 Aug 2024 01:12:39 +0700
Subject: [PATCH] feat(issue-summary): Initial working issue summary endpoint
 (#1033)

Introduces the `/v1/automation/summarize/issue` endpoint.

The ai pipeline follows:
- Initial pass w/ unstructured CoT and answer
- Second pass to extract structured output.
---
 requirements.txt                         |   2 +-
 src/seer/app.py                          |   7 ++
 src/seer/automation/summarize/issue.py   | 100 +++++++++++++++++++++++
 src/seer/automation/summarize/models.py  |  14 ++++
 tests/automation/summarize/test_issue.py |  96 ++++++++++++++++++++++
 5 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 src/seer/automation/summarize/issue.py
 create mode 100644 src/seer/automation/summarize/models.py
 create mode 100644 tests/automation/summarize/test_issue.py

diff --git a/requirements.txt b/requirements.txt
index 108741e00..aafbea0eb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,7 +28,7 @@ mpmath==1.3.0
 networkx==3.1
 numpy==1.26.1
 onnx==1.16.0
-openai==1.16.2
+openai==1.40.1
 openai-multi-tool-use-parallel-patch==0.2.0
 optimum==1.16.2
 packaging
diff --git a/src/seer/app.py b/src/seer/app.py
index 15340855c..c71246f18 100644
--- a/src/seer/app.py
+++ b/src/seer/app.py
@@ -39,6 +39,8 @@
     RepoAccessCheckResponse,
 )
 from seer.automation.codebase.repo_client import RepoClient
+from seer.automation.summarize.issue import summarize_issue
+from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse
 from seer.automation.utils import raise_if_no_genai_consent
 from seer.bootup import bootup, module
 from seer.configuration import AppConfig
@@ -208,6 +210,11 @@ def autofix_evaluation_start_endpoint(data: AutofixEvaluationRequest) -> Autofix
     return AutofixEndpointResponse(started=True, run_id=-1)
 
 
+@json_api(blueprint, "/v1/automation/summarize/issue")
+def summarize_issue_endpoint(data: SummarizeIssueRequest) -> SummarizeIssueResponse:
+    return summarize_issue(data)
+
+
 @json_api(blueprint, "/v1/anomaly-detection/detect")
 def detect_anomalies_endpoint(data: DetectAnomaliesRequest) -> DetectAnomaliesResponse:
     return anomaly_detection().detect_anomalies(data)
diff --git a/src/seer/automation/summarize/issue.py b/src/seer/automation/summarize/issue.py
new file mode 100644
index 000000000..3813a046a
--- /dev/null
+++ b/src/seer/automation/summarize/issue.py
@@ -0,0 +1,100 @@
+import textwrap
+
+from langfuse.decorators import observe
+from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from pydantic import BaseModel
+
+from seer.automation.agent.client import GptClient
+from seer.automation.models import EventDetails
+from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse
+from seer.dependency_injection import inject, injected
+
+
+class IssueSummary(BaseModel):
+    cause_of_issue: str
+    impact: str
+
+
+@observe(name="Summarize Issue")
+@inject
+def summarize_issue(request: SummarizeIssueRequest, gpt_client: GptClient = injected):
+    event_details = EventDetails.from_event(request.issue.events[0])
+
+    prompt = textwrap.dedent(
+        """\
+        You are an exceptional developer that understands the issue and can summarize it in 1-2 sentences.
+        {event_details}
+
+        Analyze the issue, find the root cause, and summarize it in 1-2 sentences. In your answer, make sure to use backticks to highlight code snippets, output two results:
+
+        # Cause of issue
+        - 1 sentence, be extremely verbose with the exact snippets of code that are causing the issue.
+        - Be extremely short and specific.
+        - When talking about pieces of code, try to shorten it, so for example, instead of saying `foo.1.Def.bar` was undefined, say `Def` was undefined. Or saying if `foo.bar.baz.Class` is missing input field `bam.bar.Object` say `Class` is missing input field `Object`.
+        - A developer that sees this should know exactly what to fix right away.
+
+        # The impact on the system and users
+        - 1 sentence, be extremely verbose with how this issue affects the system and end users.
+        - Be extremely short and specific.
+
+        Reason & explain the thought process step-by-step before giving the answers."""
+    ).format(event_details=event_details.format_event())
+
+    message_dicts: list[ChatCompletionMessageParam] = [
+        {
+            "content": prompt,
+            "role": "user",
+        },
+    ]
+
+    completion = gpt_client.openai_client.chat.completions.create(
+        model="gpt-4o-mini-2024-07-18",
+        messages=message_dicts,
+        temperature=0.0,
+        max_tokens=2048,
+    )
+
+    message = completion.choices[0].message
+
+    if message.refusal:
+        raise RuntimeError(message.refusal)
+
+    message_dicts.append(
+        {
+            "content": message.content,
+            "role": "assistant",
+        }
+    )
+
+    formatting_prompt = textwrap.dedent(
+        """\
+        Format your answer to the following schema."""
+    )
+    message_dicts.append(
+        {
+            "content": formatting_prompt,
+            "role": "user",
+        }
+    )
+
+    structured_completion = gpt_client.openai_client.beta.chat.completions.parse(
+        model="gpt-4o-mini-2024-07-18",
+        messages=message_dicts,
+        temperature=0.0,
+        max_tokens=2048,
+        response_format=IssueSummary,
+    )
+
+    structured_message = structured_completion.choices[0].message
+
+    if structured_message.refusal:
+        raise RuntimeError(structured_message.refusal)
+
+    if not structured_message.parsed:
+        raise RuntimeError("Failed to parse message")
+
+    return SummarizeIssueResponse(
+        group_id=request.group_id,
+        summary=structured_message.parsed.cause_of_issue,
+        impact=structured_message.parsed.impact,
+    )
diff --git a/src/seer/automation/summarize/models.py b/src/seer/automation/summarize/models.py
new file mode 100644
index 000000000..d4e3553ae
--- /dev/null
+++ b/src/seer/automation/summarize/models.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+
+from seer.automation.models import IssueDetails
+
+
+class SummarizeIssueRequest(BaseModel):
+    group_id: int
+    issue: IssueDetails
+
+
+class SummarizeIssueResponse(BaseModel):
+    group_id: int
+    summary: str
+    impact: str
diff --git a/tests/automation/summarize/test_issue.py b/tests/automation/summarize/test_issue.py
new file mode 100644
index 000000000..2afbe9d38
--- /dev/null
+++ b/tests/automation/summarize/test_issue.py
@@ -0,0 +1,96 @@
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+from johen import generate
+
+from seer.automation.models import IssueDetails
+from seer.automation.summarize.issue import summarize_issue
+from seer.automation.summarize.models import SummarizeIssueRequest, SummarizeIssueResponse
+
+
+class TestSummarizeIssue:
+    @pytest.fixture
+    def mock_gpt_client(self):
+        return Mock()
+
+    @pytest.fixture
+    def sample_request(self):
+        return SummarizeIssueRequest(group_id=1, issue=next(generate(IssueDetails)))
+
+    def test_summarize_issue_success(self, mock_gpt_client, sample_request):
+        mock_completion = MagicMock()
+        mock_completion.choices[0].message.content = "Test content"
+        mock_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion
+
+        mock_structured_completion = MagicMock()
+        mock_structured_completion.choices[0].message.parsed = MagicMock(
+            cause_of_issue="Test cause", impact="Test impact"
+        )
+        mock_structured_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = (
+            mock_structured_completion
+        )
+
+        result = summarize_issue(sample_request, gpt_client=mock_gpt_client)
+
+        assert isinstance(result, SummarizeIssueResponse)
+        assert result.group_id == 1
+        assert result.summary == "Test cause"
+        assert result.impact == "Test impact"
+
+    def test_summarize_issue_refusal(self, mock_gpt_client, sample_request):
+        mock_completion = MagicMock()
+        mock_completion.choices[0].message.content = "Test content"
+        mock_completion.choices[0].message.refusal = "Test refusal"
+        mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion
+
+        with pytest.raises(RuntimeError, match="Test refusal"):
+            summarize_issue(sample_request, gpt_client=mock_gpt_client)
+
+    def test_summarize_issue_parsing_failure(self, mock_gpt_client, sample_request):
+        mock_completion = MagicMock()
+        mock_completion.choices[0].message.content = "Test content"
+        mock_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion
+
+        mock_structured_completion = MagicMock()
+        mock_structured_completion.choices[0].message.parsed = None
+        mock_structured_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = (
+            mock_structured_completion
+        )
+
+        with pytest.raises(RuntimeError, match="Failed to parse message"):
+            summarize_issue(sample_request, gpt_client=mock_gpt_client)
+
+    @patch("seer.automation.summarize.issue.EventDetails.from_event")
+    def test_summarize_issue_event_details(self, mock_from_event, mock_gpt_client, sample_request):
+        mock_event_details = Mock()
+        mock_event_details.format_event.return_value = "Formatted event details"
+        mock_from_event.return_value = mock_event_details
+
+        mock_completion = MagicMock()
+        mock_completion.choices[0].message.content = "Test content"
+        mock_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion
+
+        mock_structured_completion = MagicMock()
+        mock_structured_completion.choices[0].message.parsed = MagicMock(
+            cause_of_issue="Test cause", impact="Test impact"
+        )
+        mock_structured_completion.choices[0].message.refusal = None
+        mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = (
+            mock_structured_completion
+        )
+
+        summarize_issue(sample_request, gpt_client=mock_gpt_client)
+
+        mock_from_event.assert_called_once_with(sample_request.issue.events[0])
+        mock_event_details.format_event.assert_called_once()
+        assert (
+            "Formatted event details"
+            in mock_gpt_client.openai_client.chat.completions.create.call_args[1]["messages"][0][
+                "content"
+            ]
+        )