Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(autofix): Add first pass at interactive flow #1168

Merged
merged 16 commits into from
Sep 19, 2024
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ mpmath==1.3.0
networkx==3.1
numpy==1.26.1
onnx==1.16.0
openai==1.40.1
openai==1.44.1
optimum==1.16.2
packaging
pandas==2.0.3
Expand Down Expand Up @@ -106,4 +106,4 @@ langfuse @ git+https://github.com/jennmueng/langfuse-python.git@9d9350de1e4e84fa
watchdog
stumpy==1.13.0
pytest_alembic==0.11.1
cryptography==43.0.0
cryptography==43.0.1
3 changes: 3 additions & 0 deletions src/seer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
check_and_mark_if_timed_out,
get_autofix_state,
get_autofix_state_from_pr_id,
receive_user_message,
run_autofix_create_pr,
run_autofix_evaluation,
run_autofix_execution,
Expand Down Expand Up @@ -176,6 +177,8 @@ def autofix_update_endpoint(
run_autofix_execution(data)
elif data.payload.type == AutofixUpdateType.CREATE_PR:
run_autofix_create_pr(data)
elif data.payload.type == AutofixUpdateType.USER_MESSAGE:
receive_user_message(data)
return AutofixEndpointResponse(started=True, run_id=data.run_id)


Expand Down
51 changes: 40 additions & 11 deletions src/seer/automation/agent/agent.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from abc import ABC
from typing import Optional
from typing import Optional, cast

from pydantic import BaseModel, Field

Expand All @@ -15,7 +15,9 @@
from seer.automation.agent.tools import FunctionTool
from seer.automation.agent.utils import parse_json_with_keys
from seer.automation.autofix.autofix_context import AutofixContext
from seer.automation.utils import extract_text_inside_tags
from seer.automation.autofix.components.insight_sharing.component import InsightSharingComponent
from seer.automation.autofix.components.insight_sharing.models import InsightSharingRequest
from seer.automation.autofix.models import DefaultStep
from seer.dependency_injection import inject, injected

logger = logging.getLogger("autofix")
Expand Down Expand Up @@ -61,26 +63,51 @@ def get_completion(self):
tools=(self.tools if len(self.tools) > 0 else None),
)

def use_user_messages(self, context: AutofixContext):
# adds any queued user messages to the memory
user_msgs = context.state.get().steps[-1].queued_user_messages
if user_msgs:
self.memory.append(Message(content="\n".join(user_msgs), role="user"))
with context.state.update() as cur:
cur.steps[-1].queued_user_messages = []
context.event_manager.add_log("Thanks for the input. I'm thinking through it now...")

def run_iteration(self, context: Optional[AutofixContext] = None):
logger.debug(f"----[{self.name}] Running Iteration {self.iterations}----")

message, usage = self.get_completion()

# interrupt if user message is queued and awaiting handling
if context and context.state.get().steps[-1].queued_user_messages:
self.use_user_messages(context)
return

self.memory.append(message)

# log thoughts to the user
if message.content and context:
text_before_tag = message.content.split("<")[0]
logs_inside_tags = extract_text_inside_tags(
message.content, "log", strip_newlines=False
)
text = ""
if logs_inside_tags:
text = logs_inside_tags
elif text_before_tag:
text = text_before_tag
text = text_before_tag
if text:
context.event_manager.add_log(text)
# call LLM separately with the same memory to generate structured output insight cards
insight_sharing = InsightSharingComponent(context)
past_insights = context.state.get().get_all_insights()
insight_card = insight_sharing.invoke(
InsightSharingRequest(
latest_thought=text,
memory=self.memory,
task_description=context.state.get().get_step_description(),
past_insights=past_insights,
)
)
if insight_card:
if context.state.get().steps and isinstance(
context.state.get().steps[-1], DefaultStep
):
step = cast(DefaultStep, context.state.get().steps[-1])
step.insights.append(insight_card)
with context.state.update() as cur:
cur.steps[-1] = step
Comment on lines +93 to +111
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I think this is worth pulling out of the general agent logic and into a specific AutofixAgent that inherits from this generic Agent as codecov will be using this too and we don't need to generate insights there.

Copy link
Member Author

@roaga roaga Sep 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since in many cases we'll want to disable it for Autofix too (e.g. evals, GH Copilot), I'm adding an interactive flag to the AgentConfig so that these kinds of features are easy to enable/disable whenever we want. It defaults to False so the unit test gen code will not generate any insights.


if message.tool_calls:
for tool_call in message.tool_calls:
Expand Down Expand Up @@ -121,6 +148,8 @@ def run(self, prompt: str, context: Optional[AutofixContext] = None):
self.reset_iterations()

while self.should_continue():
if context:
self.use_user_messages(context)
self.run_iteration(context=context)

if self.iterations == self.config.max_iterations:
Expand Down
4 changes: 4 additions & 0 deletions src/seer/automation/agent/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ def clean_tool_call_assistant_messages(self, messages: list[Message]) -> list[Me
)
elif message.role == "tool":
new_messages.append(Message(role="user", content=message.content, tool_calls=[]))
elif message.role == "tool_use":
new_messages.append(
Message(role="assistant", content=message.content, tool_calls=[])
)
Comment on lines +150 to +153
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tool_use is for anthropic right, we don't need to use this hack on anthropic calls right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I did need to use this hack for Claude, I can't recall the exact error that was occurring though

else:
new_messages.append(message)
return new_messages
Expand Down
Empty file.
134 changes: 134 additions & 0 deletions src/seer/automation/autofix/components/insight_sharing/component.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import re
import textwrap

from langfuse.decorators import observe
from sentry_sdk.ai.monitoring import ai_track

from seer.automation.agent.client import GptClient
from seer.automation.agent.models import Message, Usage
from seer.automation.autofix.autofix_context import AutofixContext
from seer.automation.autofix.components.insight_sharing.models import (
InsightContextOutput,
InsightSharingOutput,
InsightSharingRequest,
)
from seer.automation.component import BaseComponent
from seer.dependency_injection import inject, injected


class InsightSharingPrompts:
@staticmethod
def format_step_one(
task_description: str,
latest_thought: str,
past_insights: list[str],
):
past_insights = [f"{i + 1}. {insight}" for i, insight in enumerate(past_insights)]
return textwrap.dedent(
"""\
Given the chain of thought below for {task_description}:
{insights}

Write the next under-25-words conclusion in the chain of thought based on the notes below, or if there is no good conclusion to add, return <NO_INSIGHT/>. The criteria for a good conclusion are that it should be a large, novel jump in insights, not similar to any item in the existing chain of thought, it should be a complete conclusion after analysis, it should not be a plan of what to analyze next, and it should be valuable for {task_description}. Every item in the chain of thought should read like a chain that clearly builds off of the previous step. If you can't find a conclusion that meets these criteria, return <NO_INSIGHT/>.

{latest_thought}"""
).format(
task_description=task_description,
latest_thought=latest_thought,
insights="\n".join(past_insights) if past_insights else "None",
)

@staticmethod
def format_step_two(insight: str, latest_thought: str):
return textwrap.dedent(
"""\
Return the pieces of context from the issue details or the files in the codebase that are directly relevant to the text below:
{insight}

That means choose the most relevant codebase snippets, event logs, stacktraces, or other information, that show specifically what the text mentions. Don't include any repeated information; just include what's needed.

Also provide a one-line explanation of how the pieces of context directly explain the text.

To know what's needed, reference these notes:
{latest_thought}"""
).format(
insight=insight,
latest_thought=latest_thought,
)


class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharingOutput]):
context: AutofixContext

@observe(name="Sharing Insights")
@ai_track(description="Sharing Insights")
@inject
def invoke(
self, request: InsightSharingRequest, gpt_client: GptClient = injected
) -> InsightSharingOutput | None:
prompt_one = InsightSharingPrompts.format_step_one(
task_description=request.task_description,
latest_thought=request.latest_thought,
past_insights=request.past_insights,
)
completion = gpt_client.openai_client.chat.completions.create(
model="gpt-4o-mini-2024-07-18",
messages=[Message(role="user", content=prompt_one).to_message()],
temperature=0.0,
)
with self.context.state.update() as cur:
usage = Usage(
completion_tokens=completion.usage.completion_tokens,
prompt_tokens=completion.usage.prompt_tokens,
total_tokens=completion.usage.total_tokens,
)
cur.usage += usage
insight = completion.choices[0].message.content
if insight == "<NO_INSIGHT/>":
return None

insight = re.sub(
r"^\d+\.\s+", "", insight
) # since the model often starts the insight with a number, e.g. "3. Insight..."

prompt_two = InsightSharingPrompts.format_step_two(
insight=insight,
latest_thought=request.latest_thought,
)
memory = []
for i, message in enumerate(gpt_client.clean_tool_call_assistant_messages(request.memory)):
if message.role != "system":
memory.append(message.to_message())
memory.append(Message(role="user", content=prompt_two).to_message())

completion = gpt_client.openai_client.beta.chat.completions.parse(
model="gpt-4o-mini-2024-07-18",
messages=memory,
response_format=InsightContextOutput,
temperature=0.0,
max_tokens=2048,
)
with self.context.state.update() as cur:
usage = Usage(
completion_tokens=completion.usage.completion_tokens,
prompt_tokens=completion.usage.prompt_tokens,
total_tokens=completion.usage.total_tokens,
)
cur.usage += usage
structured_message = completion.choices[0].message
if structured_message.refusal:
raise RuntimeError(structured_message.refusal)
if not structured_message.parsed:
raise RuntimeError("Failed to parse message")

res = completion.choices[0].message.parsed
roaga marked this conversation as resolved.
Show resolved Hide resolved

response = InsightSharingOutput(
insight=insight,
justification=res.explanation,
error_message_context=res.error_message_context,
codebase_context=res.codebase_context,
stacktrace_context=res.stacktrace_context,
breadcrumb_context=res.event_log_context,
)
return response
52 changes: 52 additions & 0 deletions src/seer/automation/autofix/components/insight_sharing/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pydantic import BaseModel

from seer.automation.agent.models import Message
from seer.automation.component import BaseComponentOutput, BaseComponentRequest


class CodeSnippetContext(BaseModel):
repo_name: str
file_path: str
snippet: str


class BreadcrumbContext(BaseModel):
type: str
category: str
body: str
level: str
data_as_json: str


class StacktraceContext(BaseModel):
file_name: str
repo_name: str
function: str
line_no: int
col_no: int
code_snippet: str
vars_as_json: str


class InsightContextOutput(BaseModel):
explanation: str
error_message_context: list[str]
codebase_context: list[CodeSnippetContext]
stacktrace_context: list[StacktraceContext]
event_log_context: list[BreadcrumbContext]


class InsightSharingRequest(BaseComponentRequest):
latest_thought: str
task_description: str
memory: list[Message]
past_insights: list[str]


class InsightSharingOutput(BaseComponentOutput):
insight: str
error_message_context: list[str]
codebase_context: list[CodeSnippetContext]
stacktrace_context: list[StacktraceContext]
breadcrumb_context: list[BreadcrumbContext]
justification: str
16 changes: 6 additions & 10 deletions src/seer/automation/autofix/components/root_cause/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,13 @@ def invoke(
parsed = extract_parsed_model(response)

# Assign the ids to be the numerical indices of the causes and relevant code context
causes = []
for i, cause in enumerate(parsed.causes):
cause_model = cause.to_model()
cause_model.id = i

if cause_model.code_context:
for j, snippet in enumerate(cause_model.code_context):
snippet.id = j

causes.append(cause_model)
cause_model = parsed.cause.to_model()
cause_model.id = 0
if cause_model.code_context:
for j, snippet in enumerate(cause_model.code_context):
snippet.id = j

causes = [cause_model]
return RootCauseAnalysisOutput(causes=causes)
finally:
with self.context.state.update() as cur:
Expand Down
12 changes: 2 additions & 10 deletions src/seer/automation/autofix/components/root_cause/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from typing import Annotated, Optional

from johen import gen
from johen.examples import Examples
from pydantic import BaseModel, Field, StringConstraints
from pydantic import BaseModel, StringConstraints
from pydantic_xml import attr

from seer.automation.component import BaseComponentOutput, BaseComponentRequest
Expand Down Expand Up @@ -42,8 +40,6 @@ class RootCauseAnalysisItem(BaseModel):
title: str
description: str
reproduction: str
likelihood: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1)
actionability: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1)
code_context: Optional[list[RootCauseRelevantContext]] = None


Expand All @@ -54,17 +50,13 @@ class RootCauseAnalysisRelevantContext(BaseModel):
class RootCauseAnalysisItemPrompt(BaseModel):
title: str
description: str
likelihood: float
actionability: float
reproduction: str
relevant_code: Optional[RootCauseAnalysisRelevantContext]

@classmethod
def from_model(cls, model: RootCauseAnalysisItem):
return cls(
title=model.title,
likelihood=model.likelihood,
actionability=model.actionability,
description=model.description,
reproduction=model.reproduction,
relevant_code=(
Expand Down Expand Up @@ -96,7 +88,7 @@ def to_model(self):


class MultipleRootCauseAnalysisOutputPrompt(BaseModel):
causes: list[RootCauseAnalysisItemPrompt]
cause: RootCauseAnalysisItemPrompt


class RootCauseAnalysisOutputPrompt(BaseModel):
Expand Down
Loading
Loading