getsentry · roaga · Sep 19, 2024 · Sep 9, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -28,7 +28,7 @@ mpmath==1.3.0
 networkx==3.1
 numpy==1.26.1
 onnx==1.16.0
-openai==1.40.1
+openai==1.44.1
 optimum==1.16.2
 packaging
 pandas==2.0.3
@@ -106,4 +106,4 @@ langfuse @ git+https://github.com/jennmueng/langfuse-python.git@9d9350de1e4e84fa
 watchdog
 stumpy==1.13.0
 pytest_alembic==0.11.1
-cryptography==43.0.0
+cryptography==43.0.1
diff --git a/src/seer/app.py b/src/seer/app.py
@@ -27,6 +27,7 @@
  check_and_mark_if_timed_out,
  get_autofix_state,
  get_autofix_state_from_pr_id,
+ receive_user_message,
  run_autofix_create_pr,
  run_autofix_evaluation,
  run_autofix_execution,
@@ -176,6 +177,8 @@ def autofix_update_endpoint(
  run_autofix_execution(data)
  elif data.payload.type == AutofixUpdateType.CREATE_PR:
  run_autofix_create_pr(data)
+ elif data.payload.type == AutofixUpdateType.USER_MESSAGE:
+ receive_user_message(data)
  return AutofixEndpointResponse(started=True, run_id=data.run_id)
 
 

diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py
@@ -1,6 +1,6 @@
 import logging
 from abc import ABC
-from typing import Optional
+from typing import Optional, cast
 
 from pydantic import BaseModel, Field
 
@@ -15,7 +15,9 @@
 from seer.automation.agent.tools import FunctionTool
 from seer.automation.agent.utils import parse_json_with_keys
 from seer.automation.autofix.autofix_context import AutofixContext
-from seer.automation.utils import extract_text_inside_tags
+from seer.automation.autofix.components.insight_sharing.component import InsightSharingComponent
+from seer.automation.autofix.components.insight_sharing.models import InsightSharingRequest
+from seer.automation.autofix.models import DefaultStep
 from seer.dependency_injection import inject, injected
 
 logger = logging.getLogger("autofix")
@@ -61,26 +63,51 @@ def get_completion(self):
  tools=(self.tools if len(self.tools) > 0 else None),
  )
 
+ def use_user_messages(self, context: AutofixContext):
+ # adds any queued user messages to the memory
+ user_msgs = context.state.get().steps[-1].queued_user_messages
+ if user_msgs:
+ self.memory.append(Message(content="\n".join(user_msgs), role="user"))
+ with context.state.update() as cur:
+ cur.steps[-1].queued_user_messages = []
+ context.event_manager.add_log("Thanks for the input. I'm thinking through it now...")
+
  def run_iteration(self, context: Optional[AutofixContext] = None):
  logger.debug(f"----[{self.name}] Running Iteration {self.iterations}----")
 
  message, usage = self.get_completion()
 
+ # interrupt if user message is queued and awaiting handling
+ if context and context.state.get().steps[-1].queued_user_messages:
+ self.use_user_messages(context)
+ return
+
  self.memory.append(message)
 
  # log thoughts to the user
  if message.content and context:
  text_before_tag = message.content.split("<")[0]
- logs_inside_tags = extract_text_inside_tags(
- message.content, "log", strip_newlines=False
- )
- text = ""
- if logs_inside_tags:
- text = logs_inside_tags
- elif text_before_tag:
- text = text_before_tag
+ text = text_before_tag
  if text:
- context.event_manager.add_log(text)
+ # call LLM separately with the same memory to generate structured output insight cards
+ insight_sharing = InsightSharingComponent(context)
+ past_insights = context.state.get().get_all_insights()
+ insight_card = insight_sharing.invoke(
+ InsightSharingRequest(
+ latest_thought=text,
+ memory=self.memory,
+ task_description=context.state.get().get_step_description(),
+ past_insights=past_insights,
+ )
+ )
+ if insight_card:
+ if context.state.get().steps and isinstance(
+ context.state.get().steps[-1], DefaultStep
+ ):
+ step = cast(DefaultStep, context.state.get().steps[-1])
+ step.insights.append(insight_card)
+ with context.state.update() as cur:
+ cur.steps[-1] = step
 
  if message.tool_calls:
  for tool_call in message.tool_calls:
@@ -121,6 +148,8 @@ def run(self, prompt: str, context: Optional[AutofixContext] = None):
  self.reset_iterations()
 
  while self.should_continue():
+ if context:
+ self.use_user_messages(context)
  self.run_iteration(context=context)
 
  if self.iterations == self.config.max_iterations:

diff --git a/src/seer/automation/agent/client.py b/src/seer/automation/agent/client.py
@@ -147,6 +147,10 @@ def clean_tool_call_assistant_messages(self, messages: list[Message]) -> list[Me
  )
  elif message.role == "tool":
  new_messages.append(Message(role="user", content=message.content, tool_calls=[]))
+ elif message.role == "tool_use":
+ new_messages.append(
+ Message(role="assistant", content=message.content, tool_calls=[])
+ )
  else:
  new_messages.append(message)
  return new_messages

diff --git a/src/seer/automation/autofix/components/insight_sharing/__init__.py b/src/seer/automation/autofix/components/insight_sharing/__init__.py
diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py
@@ -0,0 +1,134 @@
+import re
+import textwrap
+
+from langfuse.decorators import observe
+from sentry_sdk.ai.monitoring import ai_track
+
+from seer.automation.agent.client import GptClient
+from seer.automation.agent.models import Message, Usage
+from seer.automation.autofix.autofix_context import AutofixContext
+from seer.automation.autofix.components.insight_sharing.models import (
+ InsightContextOutput,
+ InsightSharingOutput,
+ InsightSharingRequest,
+)
+from seer.automation.component import BaseComponent
+from seer.dependency_injection import inject, injected
+
+
+class InsightSharingPrompts:
+ @staticmethod
+ def format_step_one(
+ task_description: str,
+ latest_thought: str,
+ past_insights: list[str],
+ ):
+ past_insights = [f"{i + 1}. {insight}" for i, insight in enumerate(past_insights)]
+ return textwrap.dedent(
+ """\
+ Given the chain of thought below for {task_description}:
+ {insights}
+
+ Write the next under-25-words conclusion in the chain of thought based on the notes below, or if there is no good conclusion to add, return <NO_INSIGHT/>. The criteria for a good conclusion are that it should be a large, novel jump in insights, not similar to any item in the existing chain of thought, it should be a complete conclusion after analysis, it should not be a plan of what to analyze next, and it should be valuable for {task_description}. Every item in the chain of thought should read like a chain that clearly builds off of the previous step. If you can't find a conclusion that meets these criteria, return <NO_INSIGHT/>.
+
+ {latest_thought}"""
+ ).format(
+ task_description=task_description,
+ latest_thought=latest_thought,
+ insights="\n".join(past_insights) if past_insights else "None",
+ )
+
+ @staticmethod
+ def format_step_two(insight: str, latest_thought: str):
+ return textwrap.dedent(
+ """\
+ Return the pieces of context from the issue details or the files in the codebase that are directly relevant to the text below:
+ {insight}
+
+ That means choose the most relevant codebase snippets, event logs, stacktraces, or other information, that show specifically what the text mentions. Don't include any repeated information; just include what's needed.
+
+ Also provide a one-line explanation of how the pieces of context directly explain the text.
+
+ To know what's needed, reference these notes:
+ {latest_thought}"""
+ ).format(
+ insight=insight,
+ latest_thought=latest_thought,
+ )
+
+
+class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharingOutput]):
+ context: AutofixContext
+
+ @observe(name="Sharing Insights")
+ @ai_track(description="Sharing Insights")
+ @inject
+ def invoke(
+ self, request: InsightSharingRequest, gpt_client: GptClient = injected
+ ) -> InsightSharingOutput | None:
+ prompt_one = InsightSharingPrompts.format_step_one(
+ task_description=request.task_description,
+ latest_thought=request.latest_thought,
+ past_insights=request.past_insights,
+ )
+ completion = gpt_client.openai_client.chat.completions.create(
+ model="gpt-4o-mini-2024-07-18",
+ messages=[Message(role="user", content=prompt_one).to_message()],
+ temperature=0.0,
+ )
+ with self.context.state.update() as cur:
+ usage = Usage(
+ completion_tokens=completion.usage.completion_tokens,
+ prompt_tokens=completion.usage.prompt_tokens,
+ total_tokens=completion.usage.total_tokens,
+ )
+ cur.usage += usage
+ insight = completion.choices[0].message.content
+ if insight == "<NO_INSIGHT/>":
+ return None
+
+ insight = re.sub(
+ r"^\d+\.\s+", "", insight
+ ) # since the model often starts the insight with a number, e.g. "3. Insight..."
+
+ prompt_two = InsightSharingPrompts.format_step_two(
+ insight=insight,
+ latest_thought=request.latest_thought,
+ )
+ memory = []
+ for i, message in enumerate(gpt_client.clean_tool_call_assistant_messages(request.memory)):
+ if message.role != "system":
+ memory.append(message.to_message())
+ memory.append(Message(role="user", content=prompt_two).to_message())
+
+ completion = gpt_client.openai_client.beta.chat.completions.parse(
+ model="gpt-4o-mini-2024-07-18",
+ messages=memory,
+ response_format=InsightContextOutput,
+ temperature=0.0,
+ max_tokens=2048,
+ )
+ with self.context.state.update() as cur:
+ usage = Usage(
+ completion_tokens=completion.usage.completion_tokens,
+ prompt_tokens=completion.usage.prompt_tokens,
+ total_tokens=completion.usage.total_tokens,
+ )
+ cur.usage += usage
+ structured_message = completion.choices[0].message
+ if structured_message.refusal:
+ raise RuntimeError(structured_message.refusal)
+ if not structured_message.parsed:
+ raise RuntimeError("Failed to parse message")
+
+ res = completion.choices[0].message.parsed
+
+ response = InsightSharingOutput(
+ insight=insight,
+ justification=res.explanation,
+ error_message_context=res.error_message_context,
+ codebase_context=res.codebase_context,
+ stacktrace_context=res.stacktrace_context,
+ breadcrumb_context=res.event_log_context,
+ )
+ return response
diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py
@@ -0,0 +1,52 @@
+from pydantic import BaseModel
+
+from seer.automation.agent.models import Message
+from seer.automation.component import BaseComponentOutput, BaseComponentRequest
+
+
+class CodeSnippetContext(BaseModel):
+ repo_name: str
+ file_path: str
+ snippet: str
+
+
+class BreadcrumbContext(BaseModel):
+ type: str
+ category: str
+ body: str
+ level: str
+ data_as_json: str
+
+
+class StacktraceContext(BaseModel):
+ file_name: str
+ repo_name: str
+ function: str
+ line_no: int
+ col_no: int
+ code_snippet: str
+ vars_as_json: str
+
+
+class InsightContextOutput(BaseModel):
+ explanation: str
+ error_message_context: list[str]
+ codebase_context: list[CodeSnippetContext]
+ stacktrace_context: list[StacktraceContext]
+ event_log_context: list[BreadcrumbContext]
+
+
+class InsightSharingRequest(BaseComponentRequest):
+ latest_thought: str
+ task_description: str
+ memory: list[Message]
+ past_insights: list[str]
+
+
+class InsightSharingOutput(BaseComponentOutput):
+ insight: str
+ error_message_context: list[str]
+ codebase_context: list[CodeSnippetContext]
+ stacktrace_context: list[StacktraceContext]
+ breadcrumb_context: list[BreadcrumbContext]
+ justification: str
diff --git a/src/seer/automation/autofix/components/root_cause/component.py b/src/seer/automation/autofix/components/root_cause/component.py
@@ -82,17 +82,13 @@ def invoke(
  parsed = extract_parsed_model(response)
 
  # Assign the ids to be the numerical indices of the causes and relevant code context
- causes = []
- for i, cause in enumerate(parsed.causes):
- cause_model = cause.to_model()
- cause_model.id = i
-
- if cause_model.code_context:
- for j, snippet in enumerate(cause_model.code_context):
- snippet.id = j
-
- causes.append(cause_model)
+ cause_model = parsed.cause.to_model()
+ cause_model.id = 0
+ if cause_model.code_context:
+ for j, snippet in enumerate(cause_model.code_context):
+ snippet.id = j
 
+ causes = [cause_model]
  return RootCauseAnalysisOutput(causes=causes)
  finally:
  with self.context.state.update() as cur:

diff --git a/src/seer/automation/autofix/components/root_cause/models.py b/src/seer/automation/autofix/components/root_cause/models.py
@@ -1,8 +1,6 @@
 from typing import Annotated, Optional
 
-from johen import gen
-from johen.examples import Examples
-from pydantic import BaseModel, Field, StringConstraints
+from pydantic import BaseModel, StringConstraints
 from pydantic_xml import attr
 
 from seer.automation.component import BaseComponentOutput, BaseComponentRequest
@@ -42,8 +40,6 @@ class RootCauseAnalysisItem(BaseModel):
  title: str
  description: str
  reproduction: str
- likelihood: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1)
- actionability: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1)
  code_context: Optional[list[RootCauseRelevantContext]] = None
 
 
@@ -54,17 +50,13 @@ class RootCauseAnalysisRelevantContext(BaseModel):
 class RootCauseAnalysisItemPrompt(BaseModel):
  title: str
  description: str
- likelihood: float
- actionability: float
  reproduction: str
  relevant_code: Optional[RootCauseAnalysisRelevantContext]
 
  @classmethod
  def from_model(cls, model: RootCauseAnalysisItem):
  return cls(
  title=model.title,
- likelihood=model.likelihood,
- actionability=model.actionability,
  description=model.description,
  reproduction=model.reproduction,
  relevant_code=(
@@ -96,7 +88,7 @@ def to_model(self):
 
 
 class MultipleRootCauseAnalysisOutputPrompt(BaseModel):
- causes: list[RootCauseAnalysisItemPrompt]
+ cause: RootCauseAnalysisItemPrompt
 
 
 class RootCauseAnalysisOutputPrompt(BaseModel):