From a748d7647f6b1456aa71ffd87e859230cb067f4f Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 9 Sep 2024 09:45:02 -0700 Subject: [PATCH 01/15] Only one root cause output --- .../components/root_cause/component.py | 20 +++++++++---------- .../autofix/components/root_cause/models.py | 15 +++----------- .../autofix/components/root_cause/prompts.py | 12 +++++------ src/seer/automation/autofix/tools.py | 4 ++-- 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/seer/automation/autofix/components/root_cause/component.py b/src/seer/automation/autofix/components/root_cause/component.py index ee6466e6..8b720ebb 100644 --- a/src/seer/automation/autofix/components/root_cause/component.py +++ b/src/seer/automation/autofix/components/root_cause/component.py @@ -65,26 +65,24 @@ def invoke(self, request: RootCauseAnalysisRequest) -> RootCauseAnalysisOutput | logger.warning("Root Cause Analysis formatter did not return a valid response") return None - extracted_text = extract_text_inside_tags(formatter_response, "potential_root_causes") + extracted_text = extract_text_inside_tags(formatter_response, "root_cause_analysis") xml_response = RootCauseAnalysisOutputPromptXml.from_xml( - f"{escape_multi_xml(extracted_text, ['thoughts', 'title', 'description', 'code'])}" + f"{escape_multi_xml(extracted_text, ['thoughts', 'title', 'description', 'code'])}" ) - if not xml_response.potential_root_causes.causes: + if not xml_response.potential_root_causes.cause: logger.warning("Root Cause Analysis formatter did not return causes") return None # Assign the ids to be the numerical indices of the causes and relevant code context - causes = [] - for i, cause in enumerate(xml_response.potential_root_causes.causes): - cause_model = cause.to_model() - cause_model.id = i + cause_model = xml_response.potential_root_causes.cause.to_model() + cause_model.id = 0 - if cause_model.code_context: - for j, snippet in enumerate(cause_model.code_context): - snippet.id = j + if cause_model.code_context: + for j, snippet in enumerate(cause_model.code_context): + snippet.id = j - causes.append(cause_model) + causes = [cause_model] return RootCauseAnalysisOutput(causes=causes) diff --git a/src/seer/automation/autofix/components/root_cause/models.py b/src/seer/automation/autofix/components/root_cause/models.py index 84d21f57..1ebdb850 100644 --- a/src/seer/automation/autofix/components/root_cause/models.py +++ b/src/seer/automation/autofix/components/root_cause/models.py @@ -127,22 +127,13 @@ def to_model(self): ) -class MultipleRootCauseAnalysisOutputPromptXml(PromptXmlModel, tag="potential_root_causes"): - causes: list[RootCauseAnalysisItemPromptXml] = [] +class MultipleRootCauseAnalysisOutputPromptXml(PromptXmlModel, tag="root_cause_analysis"): + cause: RootCauseAnalysisItemPromptXml | None = None @classmethod def get_example(cls): return cls( - causes=[ - RootCauseAnalysisItemPromptXml.get_example(), - RootCauseAnalysisItemPromptXml( - title="Summarize the root cause here in a few words.", - likelihood=0.2, - actionability=1.0, - description="Explain the root cause in full detail here with the full chain of reasoning behind it.", - relevant_code=RootCauseAnalysisRelevantContextPromptXml.get_example(), - ), - ] + cause=RootCauseAnalysisItemPromptXml.get_example(), ) diff --git a/src/seer/automation/autofix/components/root_cause/prompts.py b/src/seer/automation/autofix/components/root_cause/prompts.py index 26f3f054..b1b17c69 100644 --- a/src/seer/automation/autofix/components/root_cause/prompts.py +++ b/src/seer/automation/autofix/components/root_cause/prompts.py @@ -25,7 +25,7 @@ def format_system_msg(): - EVERY TIME before you use a tool, think step-by-step each time before using the tools provided to you. - You also MUST think step-by-step before giving the final answer. - It is important that we find all the potential root causes of the issue, so provide as many possibilities as you can for the root cause, ordered from most likely to least likely.""" + It is important that we find the potential root causes of the issue.""" ).format( root_cause_output_example_str=MultipleRootCauseAnalysisOutputPromptXml.get_example().to_prompt_str(), ) @@ -44,13 +44,13 @@ def format_default_msg( {error_str} {instruction_str} - When ready with your final answer, detail all the potential root causes of the issue. + When ready with your final answer, detail the potential root cause of the issue. # Guidelines: - - Each root cause should be inside its own block. - - Include a title and description in each root cause. Your description may be as long as you need to help your team understand the issue, explaining the issue, the root cause, why this is happening, and how you came to your conclusion. - - Include float values from 0.0-1.0 of the likelihood and actionability of each root cause. - - In each root cause, provide snippets of the original code, each with their own titles and descriptions, to highlight where and why the issue is occurring so that your colleagues fully understand the root cause. Provide as many snippets as you want. Within your snippets, you may highlight specific lines with a comment beginning with ***. + - The root cause should be inside its own block. + - Include a title and description in the root cause. Your description may be as long as you need to help your team understand the issue, explaining the issue, the root cause, why this is happening, and how you came to your conclusion. + - Include float values from 0.0-1.0 of the likelihood and actionability of the root cause. + - In the root cause, provide snippets of the original code, each with their own titles and descriptions, to highlight where and why the issue is occurring so that your colleagues fully understand the root cause. Provide as many snippets as you want. Within your snippets, you may highlight specific lines with a comment beginning with ***. - You MUST include the EXACT file name and repository name in the code snippets you provide. If you cannot, do not provide a code snippet. - EVERY TIME before you use a tool, think step-by-step each time before using the tools provided to you. - You also MUST think step-by-step before giving the final answer.""" diff --git a/src/seer/automation/autofix/tools.py b/src/seer/automation/autofix/tools.py index f7e523fb..339e2295 100644 --- a/src/seer/automation/autofix/tools.py +++ b/src/seer/automation/autofix/tools.py @@ -32,7 +32,7 @@ def expand_document(self, input: str, repo_name: str | None = None): client = self.context.get_repo_client(repo_name) repo_name = client.repo_name - self.context.event_manager.add_log(f"Looked at `{input}` in `{repo_name}`") + self.context.event_manager.add_log(f"Looking at `{input}` in `{repo_name}`...") if file_contents: return file_contents @@ -173,7 +173,7 @@ def keyword_search( result_str += f"{match_xml.to_prompt_str()}\n\n" self.context.event_manager.add_log( - f"Searched codebase for `{keyword}`, found {len(file_names)} result(s) in {', '.join(file_names)}" + f"Searched codebase for `{keyword}`, found {len(file_names)} result(s) in {', '.join(file_names)}." ) return result_str From 8cb25f030ccb2536bb1319b8ef6a47aebe7ca172 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Wed, 11 Sep 2024 14:07:41 -0700 Subject: [PATCH 02/15] Generating v0 insight cards --- src/seer/automation/agent/agent.py | 23 ++++- .../components/insight_sharing/__init__.py | 0 .../components/insight_sharing/component.py | 83 +++++++++++++++++++ .../components/insight_sharing/models.py | 41 +++++++++ .../autofix/components/root_cause/prompts.py | 6 +- src/seer/automation/autofix/models.py | 18 +++- 6 files changed, 165 insertions(+), 6 deletions(-) create mode 100644 src/seer/automation/autofix/components/insight_sharing/__init__.py create mode 100644 src/seer/automation/autofix/components/insight_sharing/component.py create mode 100644 src/seer/automation/autofix/components/insight_sharing/models.py diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index 59c3865a..f541dc74 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -1,6 +1,6 @@ import logging from abc import ABC -from typing import Optional +from typing import Optional, cast from pydantic import BaseModel, Field @@ -15,6 +15,9 @@ from seer.automation.agent.tools import FunctionTool from seer.automation.agent.utils import parse_json_with_keys from seer.automation.autofix.autofix_context import AutofixContext +from seer.automation.autofix.components.insight_sharing.component import InsightSharingComponent +from seer.automation.autofix.components.insight_sharing.models import InsightSharingRequest +from seer.automation.autofix.models import DefaultStep from seer.automation.utils import extract_text_inside_tags from seer.dependency_injection import inject, injected @@ -81,6 +84,24 @@ def run_iteration(self, context: Optional[AutofixContext] = None): text = text_before_tag if text: context.event_manager.add_log(text) + # TODO call LLM separately with the same memory to generate structured output insight cards + insight_sharing = InsightSharingComponent(context) + insight_card = insight_sharing.invoke( + InsightSharingRequest( + latest_thought=text, + memory=self.memory, + task_description=context.state.get().get_step_description() + ) + ) + print("HELLO insight card") + print(insight_card) + if insight_card: + if context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): + step = cast(DefaultStep, context.state.get().steps[-1]) + step.insights.append(insight_card) + with context.state.update() as cur: + cur.steps[-1] = step + if message.tool_calls: for tool_call in message.tool_calls: diff --git a/src/seer/automation/autofix/components/insight_sharing/__init__.py b/src/seer/automation/autofix/components/insight_sharing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py new file mode 100644 index 00000000..e392da62 --- /dev/null +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -0,0 +1,83 @@ +import textwrap + +from langfuse.decorators import observe +from pydantic import BaseModel +from sentry_sdk.ai.monitoring import ai_track + +from seer.automation.agent.client import GptClient +from seer.automation.agent.models import Message, Usage +from seer.automation.autofix.autofix_context import AutofixContext +from seer.automation.autofix.components.insight_sharing.models import InsightSharingOutput, InsightSharingRequest +from seer.automation.component import BaseComponent, BaseComponentOutput, BaseComponentRequest +from seer.dependency_injection import inject, injected + + +class InsightSharingPrompts: + @staticmethod + def format_default_msg( + task_description: str, + latest_thought: str, + ): + return textwrap.dedent( + """\ + You're an engineer leading the process of {task_description}. + To help your team, whenever you find an important insight needed for the process of {task_description}, you should document it. The only things we want to document are key conclusions in {task_description} that would belong in a final report, not random thoughts, tasks, or work-in-progress plans. + + You can look back on the conversation so far for context, but we're focused on the latest thought you had, which was: + --- + {latest_thought} + --- + + First decide whether or not there is anything about this thought of yours that's important to share with your team and permanently document. If not, respond with no insight and no context items; should_share_insight is false. + + If you think there is something new and critical to know from this thought regarding {task_description}, document it it. When documenting, you should give a clear, concise, and concrete insight (1 short line). Then you should provide a clear, concise justification (1 short line) for your insight using concrete pieces of context, whether it's a snippet from the codebase, a line from the stacktrace, an event log, an error message, or something else. Finally, return the specific context you needed for your justification so your team can connect the dots easily. Only include the minimum necessary; leave out anything not critical for understanding your insight.""" + ).format( + task_description=task_description, + latest_thought=latest_thought + ) + + +class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharingOutput]): + context: AutofixContext + + @observe(name="Sharing Insights") + @ai_track(description="Sharing Insights") + @inject + def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injected) -> InsightSharingOutput | None: + prompt = InsightSharingPrompts.format_default_msg( + task_description=request.task_description, + latest_thought=request.latest_thought, + ) + + memory = [] + for msg in request.memory: + if msg.role == "system": + continue + if msg.role == "tool": + msg.role == "user" + msg.role = "user" if msg.role == "tool" else msg.role + msg.tool_calls = [] + msg.tool_call_id = None + memory.append(msg.to_message()) + memory.append(Message(role="user", content=prompt).to_message()) + + completion = gpt_client.openai_client.beta.chat.completions.parse( + model="gpt-4o-mini-2024-07-18", + messages=memory, + response_format=InsightSharingOutput, + temperature=0.0, + max_tokens=2048, + ) + + with self.context.state.update() as cur: + usage = Usage(completion_tokens=completion.usage.completion_tokens, prompt_tokens=completion.usage.prompt_tokens, total_tokens=completion.usage.total_tokens) + cur.usage += usage + + structured_message = completion.choices[0].message + if structured_message.refusal: + raise RuntimeError(structured_message.refusal) + if not structured_message.parsed: + raise RuntimeError("Failed to parse message") + + res = completion.choices[0].message.parsed + return res diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py new file mode 100644 index 00000000..6eca0978 --- /dev/null +++ b/src/seer/automation/autofix/components/insight_sharing/models.py @@ -0,0 +1,41 @@ +from pydantic import BaseModel + +from seer.automation.agent.models import Message +from seer.automation.component import BaseComponentOutput, BaseComponentRequest + + +class CodeSnippetContext(BaseModel): + repo_name: str + file_path: str + snippet: str + +class BreadcrumbContext(BaseModel): + type: str + category: str + message: str + level: str + data_as_json: str + +class StacktraceContext(BaseModel): + file_name: str + repo_name: str + function: str + line_no: int + col_no: int + code_snippet: str + vars_as_json: str + +class InsightSharingRequest(BaseComponentRequest): + latest_thought: str + task_description: str + memory: list[Message] + + +class InsightSharingOutput(BaseComponentOutput): + should_share_insight: bool + insight: str + justification_using_context: str + error_message_context: list[str] + code_snippet_context: list[CodeSnippetContext] + stacktrace_context: list[StacktraceContext] + event_log_context: list[BreadcrumbContext] diff --git a/src/seer/automation/autofix/components/root_cause/prompts.py b/src/seer/automation/autofix/components/root_cause/prompts.py index b1b17c69..006a5e81 100644 --- a/src/seer/automation/autofix/components/root_cause/prompts.py +++ b/src/seer/automation/autofix/components/root_cause/prompts.py @@ -22,8 +22,7 @@ def format_system_msg(): - You are not able to search in or make changes to external libraries. If the error is caused by an external library or the stacktrace only contains frames from external libraries, do not attempt to search in external libraries. - If you are not able to find any potential root causes, return only . - If multiple searches turn up no viable results, you should conclude the session. - - EVERY TIME before you use a tool, think step-by-step each time before using the tools provided to you. - - You also MUST think step-by-step before giving the final answer. + - At EVERY step of your investigation, you must think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK. It is important that we find the potential root causes of the issue.""" ).format( @@ -52,8 +51,7 @@ def format_default_msg( - Include float values from 0.0-1.0 of the likelihood and actionability of the root cause. - In the root cause, provide snippets of the original code, each with their own titles and descriptions, to highlight where and why the issue is occurring so that your colleagues fully understand the root cause. Provide as many snippets as you want. Within your snippets, you may highlight specific lines with a comment beginning with ***. - You MUST include the EXACT file name and repository name in the code snippets you provide. If you cannot, do not provide a code snippet. - - EVERY TIME before you use a tool, think step-by-step each time before using the tools provided to you. - - You also MUST think step-by-step before giving the final answer.""" + - At EVERY step of your investigation, you must think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK.""" ).format( error_str=event, repo_names_str=format_repo_names(repo_names), diff --git a/src/seer/automation/autofix/models.py b/src/seer/automation/autofix/models.py index ce158e5f..2d2b43ac 100644 --- a/src/seer/automation/autofix/models.py +++ b/src/seer/automation/autofix/models.py @@ -9,6 +9,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator from seer.automation.agent.models import Usage +from seer.automation.autofix.components.insight_sharing.models import InsightSharingOutput from seer.automation.autofix.components.root_cause.models import RootCauseAnalysisItem from seer.automation.autofix.config import AUTOFIX_HARD_TIME_OUT_MINS, AUTOFIX_UPDATE_TIMEOUT_SECS from seer.automation.models import FileChange, FilePatch, IssueDetails, RepoDefinition @@ -158,7 +159,7 @@ def is_valid_uuid(uuid_string: str) -> bool: class DefaultStep(BaseStep): type: Literal[StepType.DEFAULT] = StepType.DEFAULT - + insights: list[InsightSharingOutput] = [] class RootCauseStep(BaseStep): type: Literal[StepType.ROOT_CAUSE_ANALYSIS] = StepType.ROOT_CAUSE_ANALYSIS @@ -295,6 +296,21 @@ class AutofixUpdateRequest(BaseModel): class AutofixContinuation(AutofixGroupState): request: AutofixRequest + def get_step_description(self) -> str: + if not self.steps: + return "" + step = self.steps[-1] + if step.type == StepType.DEFAULT and step.key == "root_cause_analysis_processing": + return "finding the root cause of the issue" + elif step.type == StepType.DEFAULT and step.key == "plan": + return "coming up with a fix for the issue" + elif step.type == StepType.ROOT_CAUSE_ANALYSIS: + return "selecting the final root cause" + elif step.type == StepType.CHANGES: + return "writing the code changes to fix the issue" + else: + return "" + def find_step(self, *, id: str | None = None, key: str | None = None) -> Step | None: for step in self.steps[::-1]: if step.id == id: From 73bc67499c08dd40108ce1a7fac43b5af765fe04 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Wed, 11 Sep 2024 14:49:39 -0700 Subject: [PATCH 03/15] Improve logging --- src/seer/automation/agent/agent.py | 1 - src/seer/automation/autofix/event_manager.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index f541dc74..20789fe2 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -83,7 +83,6 @@ def run_iteration(self, context: Optional[AutofixContext] = None): elif text_before_tag: text = text_before_tag if text: - context.event_manager.add_log(text) # TODO call LLM separately with the same memory to generate structured output insight cards insight_sharing = InsightSharingComponent(context) insight_card = insight_sharing.invoke( diff --git a/src/seer/automation/autofix/event_manager.py b/src/seer/automation/autofix/event_manager.py index 08c103d2..0909d811 100644 --- a/src/seer/automation/autofix/event_manager.py +++ b/src/seer/automation/autofix/event_manager.py @@ -29,28 +29,28 @@ class AutofixEventManager: def root_cause_analysis_processing_step(self) -> DefaultStep: return DefaultStep( key="root_cause_analysis_processing", - title="Analyzing the Issue", + title="Analyzing the issue", ) @property def root_cause_analysis_step(self) -> RootCauseStep: return RootCauseStep( key="root_cause_analysis", - title="Root Cause Analysis", + title="Root cause analysis", ) @property def plan_step(self) -> DefaultStep: return DefaultStep( key="plan", - title="Create Fix", + title="Creating fix", ) @property def changes_step(self) -> ChangesStep: return ChangesStep( key="changes", - title="Changes", + title="Code changes", changes=[], ) @@ -78,6 +78,7 @@ def send_root_cause_analysis_start(self): cur.make_step_latest(root_cause_step) cur.status = AutofixStatus.PROCESSING + self.add_log("I'll start analyzing the issue to figure out a root cause...") def send_root_cause_analysis_result(self, root_cause_output: RootCauseAnalysisOutput | None): with self.state.update() as cur: @@ -89,6 +90,7 @@ def send_root_cause_analysis_result(self, root_cause_output: RootCauseAnalysisOu root_cause_step.causes = root_cause_output.causes cur.status = AutofixStatus.NEED_MORE_INFORMATION + self.add_log("Here's what I think the root cause of the issue is. Feel free to edit it, or provide your own below if you disagree.") else: root_cause_step.status = AutofixStatus.ERROR cur.status = AutofixStatus.ERROR @@ -121,6 +123,7 @@ def send_coding_start(self): plan_step.status = AutofixStatus.PROCESSING cur.status = AutofixStatus.PROCESSING + self.add_log("Now that we've decided on a root cause, I'll start figuring out a fix...") def send_coding_result(self, result: CodingOutput | None): with self.state.update() as cur: @@ -138,6 +141,7 @@ def send_coding_complete(self, codebase_changes: list[CodebaseChange]): changes_step.changes = codebase_changes cur.status = AutofixStatus.COMPLETED + self.add_log("Here are the code changes that I think will fix the issue. Feel free to tweak them, or tell me below anything I should change.") def add_log(self, message: str): with self.state.update() as cur: From 77fb80a028a8e6c5bf6900be57fcae5ff16b1e3d Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Wed, 11 Sep 2024 14:51:09 -0700 Subject: [PATCH 04/15] Capitalization --- src/seer/automation/autofix/event_manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/seer/automation/autofix/event_manager.py b/src/seer/automation/autofix/event_manager.py index 0909d811..743348b2 100644 --- a/src/seer/automation/autofix/event_manager.py +++ b/src/seer/automation/autofix/event_manager.py @@ -29,28 +29,28 @@ class AutofixEventManager: def root_cause_analysis_processing_step(self) -> DefaultStep: return DefaultStep( key="root_cause_analysis_processing", - title="Analyzing the issue", + title="Analyzing the Issue", ) @property def root_cause_analysis_step(self) -> RootCauseStep: return RootCauseStep( key="root_cause_analysis", - title="Root cause analysis", + title="Root Cause Analysis", ) @property def plan_step(self) -> DefaultStep: return DefaultStep( key="plan", - title="Creating fix", + title="Creating Fix", ) @property def changes_step(self) -> ChangesStep: return ChangesStep( key="changes", - title="Code changes", + title="Code Changes", changes=[], ) From 79837a189ba8ff8808dcfdfd36c08864ce879293 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Thu, 12 Sep 2024 08:58:20 -0700 Subject: [PATCH 05/15] Updates --- requirements.txt | 2 +- src/seer/automation/agent/agent.py | 16 ++++++++++++---- .../components/insight_sharing/component.py | 6 +++--- .../autofix/components/insight_sharing/models.py | 4 ++-- .../autofix/components/root_cause/prompts.py | 4 ++-- src/seer/automation/autofix/event_manager.py | 4 ---- src/seer/automation/autofix/steps/coding_step.py | 2 ++ .../automation/autofix/steps/root_cause_step.py | 3 +++ src/seer/automation/autofix/tools.py | 16 ++++++++++++---- 9 files changed, 37 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5cdff4d1..6bf9802e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ mpmath==1.3.0 networkx==3.1 numpy==1.26.1 onnx==1.16.0 -openai==1.40.1 +openai==1.44.1 optimum==1.16.2 packaging pandas==2.0.3 diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index 20789fe2..5b81703a 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -72,8 +72,12 @@ def run_iteration(self, context: Optional[AutofixContext] = None): self.memory.append(message) # log thoughts to the user + print("BYE") + print(message) # TODO if message.content and context: text_before_tag = message.content.split("<")[0] + print("HELLO") + print(len(text_before_tag)) logs_inside_tags = extract_text_inside_tags( message.content, "log", strip_newlines=False ) @@ -82,8 +86,9 @@ def run_iteration(self, context: Optional[AutofixContext] = None): text = logs_inside_tags elif text_before_tag: text = text_before_tag + print("Asking for insight" if text else "Not asking for insight") if text: - # TODO call LLM separately with the same memory to generate structured output insight cards + # call LLM separately with the same memory to generate structured output insight cards insight_sharing = InsightSharingComponent(context) insight_card = insight_sharing.invoke( InsightSharingRequest( @@ -92,10 +97,13 @@ def run_iteration(self, context: Optional[AutofixContext] = None): task_description=context.state.get().get_step_description() ) ) - print("HELLO insight card") - print(insight_card) + print("insight step run") if insight_card: - if context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): + print("insight card generated") + if not insight_card.should_share_insight: + print("Shouldn't share") + elif context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): + print("updating step with new insight card") step = cast(DefaultStep, context.state.get().steps[-1]) step.insights.append(insight_card) with context.state.update() as cur: diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index e392da62..1499a9bc 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -8,7 +8,7 @@ from seer.automation.agent.models import Message, Usage from seer.automation.autofix.autofix_context import AutofixContext from seer.automation.autofix.components.insight_sharing.models import InsightSharingOutput, InsightSharingRequest -from seer.automation.component import BaseComponent, BaseComponentOutput, BaseComponentRequest +from seer.automation.component import BaseComponent from seer.dependency_injection import inject, injected @@ -28,9 +28,9 @@ def format_default_msg( {latest_thought} --- - First decide whether or not there is anything about this thought of yours that's important to share with your team and permanently document. If not, respond with no insight and no context items; should_share_insight is false. + First decide whether or not there is anything about this thought of yours that's important to share with your team and permanently document. If not, respond with should_share_insight is false. - If you think there is something new and critical to know from this thought regarding {task_description}, document it it. When documenting, you should give a clear, concise, and concrete insight (1 short line). Then you should provide a clear, concise justification (1 short line) for your insight using concrete pieces of context, whether it's a snippet from the codebase, a line from the stacktrace, an event log, an error message, or something else. Finally, return the specific context you needed for your justification so your team can connect the dots easily. Only include the minimum necessary; leave out anything not critical for understanding your insight.""" + If you think there is something new and critical to know from this thought regarding {task_description}, document it it. When documenting, you should give a clear, concise, and concrete insight (1 short line). Then you should provide a clear, concise justification (1 short line) for your insight using concrete pieces of context, whether it's a snippet from the files in the codebase you're looking at, a line from the stacktrace, an event log, or an error message. Finally, return the specific context you needed for your justification so your team can connect the dots easily. Only include the minimum necessary; leave out anything not critical for understanding your insight. Make sure to put the context pieces in the field that corresponds to their original type, and do not make any data up.""" ).format( task_description=task_description, latest_thought=latest_thought diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py index 6eca0978..84edbed9 100644 --- a/src/seer/automation/autofix/components/insight_sharing/models.py +++ b/src/seer/automation/autofix/components/insight_sharing/models.py @@ -12,7 +12,7 @@ class CodeSnippetContext(BaseModel): class BreadcrumbContext(BaseModel): type: str category: str - message: str + body: str level: str data_as_json: str @@ -36,6 +36,6 @@ class InsightSharingOutput(BaseComponentOutput): insight: str justification_using_context: str error_message_context: list[str] - code_snippet_context: list[CodeSnippetContext] + codebase_snippet_context: list[CodeSnippetContext] stacktrace_context: list[StacktraceContext] event_log_context: list[BreadcrumbContext] diff --git a/src/seer/automation/autofix/components/root_cause/prompts.py b/src/seer/automation/autofix/components/root_cause/prompts.py index 006a5e81..e7c2c5bd 100644 --- a/src/seer/automation/autofix/components/root_cause/prompts.py +++ b/src/seer/automation/autofix/components/root_cause/prompts.py @@ -22,7 +22,7 @@ def format_system_msg(): - You are not able to search in or make changes to external libraries. If the error is caused by an external library or the stacktrace only contains frames from external libraries, do not attempt to search in external libraries. - If you are not able to find any potential root causes, return only . - If multiple searches turn up no viable results, you should conclude the session. - - At EVERY step of your investigation, you must think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK. + - At EVERY step of your investigation, you MUST think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK. It is important that we find the potential root causes of the issue.""" ).format( @@ -51,7 +51,7 @@ def format_default_msg( - Include float values from 0.0-1.0 of the likelihood and actionability of the root cause. - In the root cause, provide snippets of the original code, each with their own titles and descriptions, to highlight where and why the issue is occurring so that your colleagues fully understand the root cause. Provide as many snippets as you want. Within your snippets, you may highlight specific lines with a comment beginning with ***. - You MUST include the EXACT file name and repository name in the code snippets you provide. If you cannot, do not provide a code snippet. - - At EVERY step of your investigation, you must think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK.""" + - At EVERY step of your investigation, you MUST think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK.""" ).format( error_str=event, repo_names_str=format_repo_names(repo_names), diff --git a/src/seer/automation/autofix/event_manager.py b/src/seer/automation/autofix/event_manager.py index 743348b2..1e30990f 100644 --- a/src/seer/automation/autofix/event_manager.py +++ b/src/seer/automation/autofix/event_manager.py @@ -78,7 +78,6 @@ def send_root_cause_analysis_start(self): cur.make_step_latest(root_cause_step) cur.status = AutofixStatus.PROCESSING - self.add_log("I'll start analyzing the issue to figure out a root cause...") def send_root_cause_analysis_result(self, root_cause_output: RootCauseAnalysisOutput | None): with self.state.update() as cur: @@ -90,7 +89,6 @@ def send_root_cause_analysis_result(self, root_cause_output: RootCauseAnalysisOu root_cause_step.causes = root_cause_output.causes cur.status = AutofixStatus.NEED_MORE_INFORMATION - self.add_log("Here's what I think the root cause of the issue is. Feel free to edit it, or provide your own below if you disagree.") else: root_cause_step.status = AutofixStatus.ERROR cur.status = AutofixStatus.ERROR @@ -123,7 +121,6 @@ def send_coding_start(self): plan_step.status = AutofixStatus.PROCESSING cur.status = AutofixStatus.PROCESSING - self.add_log("Now that we've decided on a root cause, I'll start figuring out a fix...") def send_coding_result(self, result: CodingOutput | None): with self.state.update() as cur: @@ -141,7 +138,6 @@ def send_coding_complete(self, codebase_changes: list[CodebaseChange]): changes_step.changes = codebase_changes cur.status = AutofixStatus.COMPLETED - self.add_log("Here are the code changes that I think will fix the issue. Feel free to tweak them, or tell me below anything I should change.") def add_log(self, message: str): with self.state.update() as cur: diff --git a/src/seer/automation/autofix/steps/coding_step.py b/src/seer/automation/autofix/steps/coding_step.py index e7332cfa..9cc9e7f5 100644 --- a/src/seer/automation/autofix/steps/coding_step.py +++ b/src/seer/automation/autofix/steps/coding_step.py @@ -57,6 +57,7 @@ def _invoke(self, **kwargs): self.logger.info("Executing Autofix - Plan+Code Step") self.context.event_manager.send_coding_start() + self.context.event_manager.add_log("Beginning to figure out a fix for the root cause of this issue...") state = self.context.state.get() root_cause_and_fix = state.get_selected_root_cause_and_fix() @@ -88,3 +89,4 @@ def _invoke(self, **kwargs): ), queue=CeleryQueues.DEFAULT, ) + self.context.event_manager.add_log("Here are some code changes that I think fix the issue. If you disagree, feel free to edit them or tell me what I should change.") diff --git a/src/seer/automation/autofix/steps/root_cause_step.py b/src/seer/automation/autofix/steps/root_cause_step.py index aafa89ef..4c1c828c 100644 --- a/src/seer/automation/autofix/steps/root_cause_step.py +++ b/src/seer/automation/autofix/steps/root_cause_step.py @@ -50,6 +50,8 @@ def _instantiate_request(request: dict[str, Any]) -> RootCauseStepRequest: def _invoke(self, **kwargs): self.context.event_manager.send_root_cause_analysis_start() + self.context.event_manager.add_log("Beginning root cause analysis...") + state = self.context.state.get() event_details = EventDetails.from_event(state.request.issue.events[0]) self.context.process_event_paths(event_details) @@ -65,3 +67,4 @@ def _invoke(self, **kwargs): ) self.context.event_manager.send_root_cause_analysis_result(root_cause_output) + self.context.event_manager.add_log("Here's what I think the root cause is. If you disagree, feel free to edit it or provide your own from scratch.") diff --git a/src/seer/automation/autofix/tools.py b/src/seer/automation/autofix/tools.py index 339e2295..df37fa75 100644 --- a/src/seer/automation/autofix/tools.py +++ b/src/seer/automation/autofix/tools.py @@ -69,6 +69,8 @@ def list_directory(self, path: str, repo_name: str | None = None) -> str: # show potential corrected paths if nothing was found here other_paths = self._get_potential_abs_paths(path, repo_name) return f"\n{other_paths}".strip() + + self.context.event_manager.add_log(f"Looking at contents of `{path}` in `{repo_name}`...") joined = self._format_list_directory_output(dirs, files) return f"\n{joined}\n" @@ -158,6 +160,10 @@ def keyword_search( cleanup_dir(tmp_dir) + self.context.event_manager.add_log( + f"Searched codebase for `{keyword}`, found {len(results)} result(s)." + ) + if not results: return "No results found." @@ -172,10 +178,6 @@ def keyword_search( file_names.append(f"`{result.relative_path}`") result_str += f"{match_xml.to_prompt_str()}\n\n" - self.context.event_manager.add_log( - f"Searched codebase for `{keyword}`, found {len(file_names)} result(s) in {', '.join(file_names)}." - ) - return result_str @observe(name="File Search") @@ -191,6 +193,9 @@ def file_search( repo_client = self.context.get_repo_client(repo_name=repo_name) all_paths = repo_client.get_index_file_set() found = [path for path in all_paths if os.path.basename(path) == filename] + + self.context.event_manager.add_log(f"Searching for file `{filename}` in `{repo_name}`...") + if len(found) == 0: return f"no file with name {filename} found in repository" @@ -211,6 +216,9 @@ def file_search_wildcard( repo_client = self.context.get_repo_client(repo_name=repo_name) all_paths = repo_client.get_index_file_set() found = [path for path in all_paths if fnmatch.fnmatch(path, pattern)] + + self.context.event_manager.add_log(f"Searching for files with pattern `{pattern}` in `{repo_name}`...") + if len(found) == 0: return f"No files matching pattern '{pattern}' found in repository" From 1dce4a2ebf7095f07ffcc68dd776a69e04b33af6 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Thu, 12 Sep 2024 09:18:44 -0700 Subject: [PATCH 06/15] Remove import --- .../automation/autofix/components/insight_sharing/component.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 1499a9bc..447744a4 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -1,7 +1,6 @@ import textwrap from langfuse.decorators import observe -from pydantic import BaseModel from sentry_sdk.ai.monitoring import ai_track from seer.automation.agent.client import GptClient From bd5c109dae435a687528cdea93f779cf634e396f Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Thu, 12 Sep 2024 10:47:20 -0700 Subject: [PATCH 07/15] Fix tool call cleanup, logs --- src/seer/automation/agent/client.py | 2 ++ .../components/insight_sharing/component.py | 14 ++++---------- src/seer/automation/autofix/event_manager.py | 2 -- .../autofix/steps/change_describer_step.py | 1 + src/seer/automation/autofix/steps/coding_step.py | 3 +-- .../automation/autofix/steps/root_cause_step.py | 2 +- 6 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/seer/automation/agent/client.py b/src/seer/automation/agent/client.py index 8b275e36..d7e74ed5 100644 --- a/src/seer/automation/agent/client.py +++ b/src/seer/automation/agent/client.py @@ -147,6 +147,8 @@ def clean_tool_call_assistant_messages(self, messages: list[Message]) -> list[Me ) elif message.role == "tool": new_messages.append(Message(role="user", content=message.content, tool_calls=[])) + elif message.role == "tool_use": + new_messages.append(Message(role="assistant", content=message.content, tool_calls=[])) else: new_messages.append(message) return new_messages diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 447744a4..256d15fb 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -48,16 +48,10 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte latest_thought=request.latest_thought, ) - memory = [] - for msg in request.memory: - if msg.role == "system": - continue - if msg.role == "tool": - msg.role == "user" - msg.role = "user" if msg.role == "tool" else msg.role - msg.tool_calls = [] - msg.tool_call_id = None - memory.append(msg.to_message()) + memory = [ + message.to_message() + for message in gpt_client.clean_tool_call_assistant_messages(request.memory) if message.role != "system" + ] memory.append(Message(role="user", content=prompt).to_message()) completion = gpt_client.openai_client.beta.chat.completions.parse( diff --git a/src/seer/automation/autofix/event_manager.py b/src/seer/automation/autofix/event_manager.py index 1e30990f..ee1ae1ac 100644 --- a/src/seer/automation/autofix/event_manager.py +++ b/src/seer/automation/autofix/event_manager.py @@ -143,8 +143,6 @@ def add_log(self, message: str): with self.state.update() as cur: if cur.steps: step = cur.steps[-1] - if step.status != AutofixStatus.PROCESSING: - return # If the current step is the planning step, and an execution step is running, we log it there instead. if step.id == self.plan_step.id and step.progress: diff --git a/src/seer/automation/autofix/steps/change_describer_step.py b/src/seer/automation/autofix/steps/change_describer_step.py index 1c008efa..0f9577b9 100644 --- a/src/seer/automation/autofix/steps/change_describer_step.py +++ b/src/seer/automation/autofix/steps/change_describer_step.py @@ -92,3 +92,4 @@ def _invoke(self, **kwargs): codebase_changes.append(change) self.context.event_manager.send_coding_complete(codebase_changes) + self.context.event_manager.add_log("Here are some code changes that I think fix the issue. If you disagree, feel free to edit them or tell me what I should change.") diff --git a/src/seer/automation/autofix/steps/coding_step.py b/src/seer/automation/autofix/steps/coding_step.py index 9cc9e7f5..49018e3c 100644 --- a/src/seer/automation/autofix/steps/coding_step.py +++ b/src/seer/automation/autofix/steps/coding_step.py @@ -57,7 +57,7 @@ def _invoke(self, **kwargs): self.logger.info("Executing Autofix - Plan+Code Step") self.context.event_manager.send_coding_start() - self.context.event_manager.add_log("Beginning to figure out a fix for the root cause of this issue...") + self.context.event_manager.add_log("Figuring out a fix for the root cause of this issue...") state = self.context.state.get() root_cause_and_fix = state.get_selected_root_cause_and_fix() @@ -89,4 +89,3 @@ def _invoke(self, **kwargs): ), queue=CeleryQueues.DEFAULT, ) - self.context.event_manager.add_log("Here are some code changes that I think fix the issue. If you disagree, feel free to edit them or tell me what I should change.") diff --git a/src/seer/automation/autofix/steps/root_cause_step.py b/src/seer/automation/autofix/steps/root_cause_step.py index 4c1c828c..07dddb72 100644 --- a/src/seer/automation/autofix/steps/root_cause_step.py +++ b/src/seer/automation/autofix/steps/root_cause_step.py @@ -67,4 +67,4 @@ def _invoke(self, **kwargs): ) self.context.event_manager.send_root_cause_analysis_result(root_cause_output) - self.context.event_manager.add_log("Here's what I think the root cause is. If you disagree, feel free to edit it or provide your own from scratch.") + self.context.event_manager.add_log("Here's what I think the root cause is. If you disagree, feel free to edit it or provide your own idea below.") From 6d66401cbc302484c29625fd202faca6581db7a1 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 13 Sep 2024 13:16:30 -0700 Subject: [PATCH 08/15] Working insight cards --- src/seer/automation/agent/agent.py | 22 ++++---------- .../components/insight_sharing/component.py | 29 ++++++++++--------- .../components/insight_sharing/models.py | 8 +++-- .../autofix/components/root_cause/models.py | 6 ---- .../autofix/components/root_cause/prompts.py | 1 - src/seer/automation/autofix/models.py | 11 ++++++- .../autofix/steps/change_describer_step.py | 2 +- .../autofix/steps/root_cause_step.py | 2 +- 8 files changed, 39 insertions(+), 42 deletions(-) diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index 5b81703a..0cafdd48 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -72,36 +72,27 @@ def run_iteration(self, context: Optional[AutofixContext] = None): self.memory.append(message) # log thoughts to the user - print("BYE") - print(message) # TODO if message.content and context: text_before_tag = message.content.split("<")[0] - print("HELLO") - print(len(text_before_tag)) - logs_inside_tags = extract_text_inside_tags( - message.content, "log", strip_newlines=False - ) - text = "" - if logs_inside_tags: - text = logs_inside_tags - elif text_before_tag: - text = text_before_tag + text = text_before_tag print("Asking for insight" if text else "Not asking for insight") if text: # call LLM separately with the same memory to generate structured output insight cards insight_sharing = InsightSharingComponent(context) + past_insights = context.state.get().get_all_insights() insight_card = insight_sharing.invoke( InsightSharingRequest( latest_thought=text, memory=self.memory, - task_description=context.state.get().get_step_description() + task_description=context.state.get().get_step_description(), + past_insights=past_insights ) ) print("insight step run") if insight_card: print("insight card generated") - if not insight_card.should_share_insight: - print("Shouldn't share") + if insight_card.is_unimportant_insight or insight_card.repeats_existing_idea or insight_card.is_incomplete_idea: + print(f"Shouldn't share: {insight_card.insight}") elif context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): print("updating step with new insight card") step = cast(DefaultStep, context.state.get().steps[-1]) @@ -109,7 +100,6 @@ def run_iteration(self, context: Optional[AutofixContext] = None): with context.state.update() as cur: cur.steps[-1] = step - if message.tool_calls: for tool_call in message.tool_calls: tool_response = self.call_tool(tool_call) diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 256d15fb..36df5bae 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -1,5 +1,5 @@ import textwrap - +import re from langfuse.decorators import observe from sentry_sdk.ai.monitoring import ai_track @@ -16,23 +16,24 @@ class InsightSharingPrompts: def format_default_msg( task_description: str, latest_thought: str, + past_insights: list[str], ): + past_insights = [f"{i + 1}. {insight}" for i, insight in enumerate(past_insights)] return textwrap.dedent( """\ - You're an engineer leading the process of {task_description}. - To help your team, whenever you find an important insight needed for the process of {task_description}, you should document it. The only things we want to document are key conclusions in {task_description} that would belong in a final report, not random thoughts, tasks, or work-in-progress plans. - - You can look back on the conversation so far for context, but we're focused on the latest thought you had, which was: - --- - {latest_thought} - --- - - First decide whether or not there is anything about this thought of yours that's important to share with your team and permanently document. If not, respond with should_share_insight is false. + Consider the last thing you said in the conversation. Is there any key takeaway insight in there that we should use to continue the WIP line of reasoning below? + {insights} + + Make your answer 1 line that will be added onto the current line of reasoning. Separately, give a justification that should use context from the codebase and the issue details to quickly explain your insight. Also answer with snippets of only the most relevant context that helps explain. - If you think there is something new and critical to know from this thought regarding {task_description}, document it it. When documenting, you should give a clear, concise, and concrete insight (1 short line). Then you should provide a clear, concise justification (1 short line) for your insight using concrete pieces of context, whether it's a snippet from the files in the codebase you're looking at, a line from the stacktrace, an event log, or an error message. Finally, return the specific context you needed for your justification so your team can connect the dots easily. Only include the minimum necessary; leave out anything not critical for understanding your insight. Make sure to put the context pieces in the field that corresponds to their original type, and do not make any data up.""" + Finally, check: + - if your insight turns out to be unimportant for {task_description}. + - if your insight just repeats an already covered idea. + - if your insight is an incomplete idea that isn't ready to be added to the list.""" ).format( task_description=task_description, - latest_thought=latest_thought + latest_thought=latest_thought, + insights="\n".join(past_insights) if past_insights else "None", ) @@ -46,6 +47,7 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte prompt = InsightSharingPrompts.format_default_msg( task_description=request.task_description, latest_thought=request.latest_thought, + past_insights=request.past_insights, ) memory = [ @@ -55,7 +57,7 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte memory.append(Message(role="user", content=prompt).to_message()) completion = gpt_client.openai_client.beta.chat.completions.parse( - model="gpt-4o-mini-2024-07-18", + model="gpt-4o-mini-2024-07-18",#"gpt-4o-2024-08-06", messages=memory, response_format=InsightSharingOutput, temperature=0.0, @@ -73,4 +75,5 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte raise RuntimeError("Failed to parse message") res = completion.choices[0].message.parsed + res.insight = re.sub(r'^\d+\.\s+', '', res.insight) # since the model often starts the insight with a number, e.g. "3. Insight..." return res diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py index 84edbed9..e5ad070d 100644 --- a/src/seer/automation/autofix/components/insight_sharing/models.py +++ b/src/seer/automation/autofix/components/insight_sharing/models.py @@ -29,13 +29,15 @@ class InsightSharingRequest(BaseComponentRequest): latest_thought: str task_description: str memory: list[Message] - + past_insights: list[str] class InsightSharingOutput(BaseComponentOutput): - should_share_insight: bool insight: str justification_using_context: str error_message_context: list[str] - codebase_snippet_context: list[CodeSnippetContext] + codebase_context: list[CodeSnippetContext] stacktrace_context: list[StacktraceContext] event_log_context: list[BreadcrumbContext] + is_unimportant_insight: bool + repeats_existing_idea: bool + is_incomplete_idea: bool diff --git a/src/seer/automation/autofix/components/root_cause/models.py b/src/seer/automation/autofix/components/root_cause/models.py index 9ba764f1..32976b4c 100644 --- a/src/seer/automation/autofix/components/root_cause/models.py +++ b/src/seer/automation/autofix/components/root_cause/models.py @@ -42,8 +42,6 @@ class RootCauseAnalysisItem(BaseModel): title: str description: str reproduction: str - likelihood: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1) - actionability: Annotated[float, Examples(r.uniform(0, 1) for r in gen)] = Field(..., ge=0, le=1) code_context: Optional[list[RootCauseRelevantContext]] = None @@ -54,8 +52,6 @@ class RootCauseAnalysisRelevantContext(BaseModel): class RootCauseAnalysisItemPrompt(BaseModel): title: str description: str - likelihood: float - actionability: float reproduction: str relevant_code: Optional[RootCauseAnalysisRelevantContext] @@ -63,8 +59,6 @@ class RootCauseAnalysisItemPrompt(BaseModel): def from_model(cls, model: RootCauseAnalysisItem): return cls( title=model.title, - likelihood=model.likelihood, - actionability=model.actionability, description=model.description, reproduction=model.reproduction, relevant_code=( diff --git a/src/seer/automation/autofix/components/root_cause/prompts.py b/src/seer/automation/autofix/components/root_cause/prompts.py index f97333fe..810b2e16 100644 --- a/src/seer/automation/autofix/components/root_cause/prompts.py +++ b/src/seer/automation/autofix/components/root_cause/prompts.py @@ -43,7 +43,6 @@ def format_default_msg( # Guidelines: - The root cause should be inside its own block. - Include a title and description in the root cause. Your description may be as long as you need to help your team understand the issue, explaining the issue, the root cause, why this is happening, and how you came to your conclusion. - - Include float values from 0.0-1.0 of the likelihood and actionability of the root cause. - In the root cause, provide snippets of the original code, each with their own titles and descriptions, to highlight where and why the issue is occurring so that your colleagues fully understand the root cause. Provide as many snippets as you want. Within your snippets, you may highlight specific lines with a comment beginning with ***. - You MUST include the EXACT file name and repository name in the code snippets you provide. If you cannot, do not provide a code snippet. - At EVERY step of your investigation, you MUST think out loud! Share what you're learning and thinking along the way, EVERY TIME YOU SPEAK.""" diff --git a/src/seer/automation/autofix/models.py b/src/seer/automation/autofix/models.py index 2d2b43ac..57040e5e 100644 --- a/src/seer/automation/autofix/models.py +++ b/src/seer/automation/autofix/models.py @@ -1,7 +1,7 @@ import datetime import enum import uuid -from typing import Annotated, Any, Literal, Optional, Union +from typing import Annotated, Any, Literal, Optional, Union, cast from johen import gen from johen.examples import Examples @@ -390,6 +390,15 @@ def clear_file_changes(self): codebase.file_changes = [] self.codebases[key] = codebase + def get_all_insights(self): + insights = [] + step = self.steps[-1] + if step.status != AutofixStatus.ERROR and isinstance(step, DefaultStep): + for insight in cast(DefaultStep, step).insights: + insights.append(insight.insight) + return insights + + @property def is_running(self): return self.status == AutofixStatus.PROCESSING diff --git a/src/seer/automation/autofix/steps/change_describer_step.py b/src/seer/automation/autofix/steps/change_describer_step.py index 0f9577b9..7a1a924b 100644 --- a/src/seer/automation/autofix/steps/change_describer_step.py +++ b/src/seer/automation/autofix/steps/change_describer_step.py @@ -92,4 +92,4 @@ def _invoke(self, **kwargs): codebase_changes.append(change) self.context.event_manager.send_coding_complete(codebase_changes) - self.context.event_manager.add_log("Here are some code changes that I think fix the issue. If you disagree, feel free to edit them or tell me what I should change.") + self.context.event_manager.add_log("Here are some code changes that I think fix the issue. Feel free to edit them or tell me what I should change.") diff --git a/src/seer/automation/autofix/steps/root_cause_step.py b/src/seer/automation/autofix/steps/root_cause_step.py index 07dddb72..90a23240 100644 --- a/src/seer/automation/autofix/steps/root_cause_step.py +++ b/src/seer/automation/autofix/steps/root_cause_step.py @@ -67,4 +67,4 @@ def _invoke(self, **kwargs): ) self.context.event_manager.send_root_cause_analysis_result(root_cause_output) - self.context.event_manager.add_log("Here's what I think the root cause is. If you disagree, feel free to edit it or provide your own idea below.") + self.context.event_manager.add_log("Here's what I think the root cause is. Feel free to edit it or provide your own idea below.") From ef7ed8d56c62f727f00ade5bfc0e6e9a92301ea6 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 16 Sep 2024 10:43:28 -0700 Subject: [PATCH 09/15] Improve insight cards, add user interjection --- src/seer/app.py | 3 + src/seer/automation/agent/agent.py | 25 ++++-- .../components/insight_sharing/component.py | 88 ++++++++++++++----- .../components/insight_sharing/models.py | 14 +-- src/seer/automation/autofix/models.py | 17 +++- .../autofix/steps/change_describer_step.py | 2 +- .../autofix/steps/root_cause_step.py | 2 +- src/seer/automation/autofix/tasks.py | 8 ++ 8 files changed, 117 insertions(+), 42 deletions(-) diff --git a/src/seer/app.py b/src/seer/app.py index 9ed0a45f..999b98dc 100644 --- a/src/seer/app.py +++ b/src/seer/app.py @@ -27,6 +27,7 @@ check_and_mark_if_timed_out, get_autofix_state, get_autofix_state_from_pr_id, + receive_user_message, run_autofix_create_pr, run_autofix_evaluation, run_autofix_execution, @@ -176,6 +177,8 @@ def autofix_update_endpoint( run_autofix_execution(data) elif data.payload.type == AutofixUpdateType.CREATE_PR: run_autofix_create_pr(data) + elif data.payload.type == AutofixUpdateType.USER_MESSAGE: + receive_user_message(data) return AutofixEndpointResponse(started=True, run_id=data.run_id) diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index 0cafdd48..f7fde4ec 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -63,19 +63,32 @@ def get_completion(self): system_prompt=self.config.system_prompt if self.config.system_prompt else None, tools=(self.tools if len(self.tools) > 0 else None), ) + + def use_user_messages(self, context: AutofixContext): + # adds any queued user messages to the memory + user_msgs = context.state.get().steps[-1].queued_user_messages + if user_msgs: + self.memory.append(Message(content="\n".join(user_msgs), role="user")) + with context.state.update() as cur: + cur.steps[-1].queued_user_messages = [] + context.event_manager.add_log("Thanks for the input. I'm thinking through it now...") def run_iteration(self, context: Optional[AutofixContext] = None): logger.debug(f"----[{self.name}] Running Iteration {self.iterations}----") - + message, usage = self.get_completion() + # interrupt if user message is queued and awaiting handling + if context and context.state.get().steps[-1].queued_user_messages: + self.use_user_messages(context) + return + self.memory.append(message) # log thoughts to the user if message.content and context: text_before_tag = message.content.split("<")[0] text = text_before_tag - print("Asking for insight" if text else "Not asking for insight") if text: # call LLM separately with the same memory to generate structured output insight cards insight_sharing = InsightSharingComponent(context) @@ -88,13 +101,8 @@ def run_iteration(self, context: Optional[AutofixContext] = None): past_insights=past_insights ) ) - print("insight step run") if insight_card: - print("insight card generated") - if insight_card.is_unimportant_insight or insight_card.repeats_existing_idea or insight_card.is_incomplete_idea: - print(f"Shouldn't share: {insight_card.insight}") - elif context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): - print("updating step with new insight card") + if context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): step = cast(DefaultStep, context.state.get().steps[-1]) step.insights.append(insight_card) with context.state.update() as cur: @@ -139,6 +147,7 @@ def run(self, prompt: str, context: Optional[AutofixContext] = None): self.reset_iterations() while self.should_continue(): + if context: self.use_user_messages(context) self.run_iteration(context=context) if self.iterations == self.config.max_iterations: diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 36df5bae..7d6b826e 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -6,14 +6,13 @@ from seer.automation.agent.client import GptClient from seer.automation.agent.models import Message, Usage from seer.automation.autofix.autofix_context import AutofixContext -from seer.automation.autofix.components.insight_sharing.models import InsightSharingOutput, InsightSharingRequest +from seer.automation.autofix.components.insight_sharing.models import InsightContextOutput, InsightSharingOutput, InsightSharingRequest from seer.automation.component import BaseComponent -from seer.dependency_injection import inject, injected - +from seer.dependency_injection import inject, injected class InsightSharingPrompts: @staticmethod - def format_default_msg( + def format_step_one( task_description: str, latest_thought: str, past_insights: list[str], @@ -21,20 +20,38 @@ def format_default_msg( past_insights = [f"{i + 1}. {insight}" for i, insight in enumerate(past_insights)] return textwrap.dedent( """\ - Consider the last thing you said in the conversation. Is there any key takeaway insight in there that we should use to continue the WIP line of reasoning below? + Given the chain of thought below for {task_description}: {insights} - - Make your answer 1 line that will be added onto the current line of reasoning. Separately, give a justification that should use context from the codebase and the issue details to quickly explain your insight. Also answer with snippets of only the most relevant context that helps explain. - - Finally, check: - - if your insight turns out to be unimportant for {task_description}. - - if your insight just repeats an already covered idea. - - if your insight is an incomplete idea that isn't ready to be added to the list.""" + + Write the next under-25-words conclusion in the chain of thought based on the notes below, or if there is no good conclusion to add, return . The criteria for a good conclusion are that it should be a large, novel jump in insights, not similar to any item in the existing chain of thought, it should be a complete conclusion after analysis, it should not be a plan of what to analyze next, and it should be valuable for {task_description}. Every item in the chain of thought should read like a chain that clearly builds off of the previous step. If you can't find a conclusion that meets these criteria, return . + + {latest_thought}""" ).format( task_description=task_description, latest_thought=latest_thought, insights="\n".join(past_insights) if past_insights else "None", ) + + @staticmethod + def format_step_two( + insight: str, + latest_thought: str + ): + return textwrap.dedent( + """\ + Return the pieces of context from the issue details or the files in the codebase that are directly relevant to the text below: + {insight} + + That means choose the most relevant codebase snippets, event logs, stacktraces, or other information, that show specifically what the text mentions. Don't include any repeated information; just include what's needed. + + Also provide a one-line explanation of how the pieces of context directly explain the text. + + To know what's needed, reference these notes: + {latest_thought}""" + ).format( + insight=insight, + latest_thought=latest_thought, + ) class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharingOutput]): @@ -44,30 +61,45 @@ class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharin @ai_track(description="Sharing Insights") @inject def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injected) -> InsightSharingOutput | None: - prompt = InsightSharingPrompts.format_default_msg( + prompt_one = InsightSharingPrompts.format_step_one( task_description=request.task_description, latest_thought=request.latest_thought, past_insights=request.past_insights, ) + completion = gpt_client.openai_client.chat.completions.create( + model="gpt-4o-mini-2024-07-18", + messages=[Message(role="user", content=prompt_one).to_message()], + temperature=0.0, + ) + with self.context.state.update() as cur: + usage = Usage(completion_tokens=completion.usage.completion_tokens, prompt_tokens=completion.usage.prompt_tokens, total_tokens=completion.usage.total_tokens) + cur.usage += usage + insight = completion.choices[0].message.content + if insight == "": + return None + + insight = re.sub(r'^\d+\.\s+', '', insight) # since the model often starts the insight with a number, e.g. "3. Insight..." - memory = [ - message.to_message() - for message in gpt_client.clean_tool_call_assistant_messages(request.memory) if message.role != "system" - ] - memory.append(Message(role="user", content=prompt).to_message()) + prompt_two = InsightSharingPrompts.format_step_two( + insight=insight, + latest_thought=request.latest_thought, + ) + memory = [] + for i, message in enumerate(gpt_client.clean_tool_call_assistant_messages(request.memory)): + if message.role != "system": + memory.append(message.to_message()) + memory.append(Message(role="user", content=prompt_two).to_message()) completion = gpt_client.openai_client.beta.chat.completions.parse( - model="gpt-4o-mini-2024-07-18",#"gpt-4o-2024-08-06", + model="gpt-4o-mini-2024-07-18", messages=memory, - response_format=InsightSharingOutput, + response_format=InsightContextOutput, temperature=0.0, max_tokens=2048, ) - with self.context.state.update() as cur: usage = Usage(completion_tokens=completion.usage.completion_tokens, prompt_tokens=completion.usage.prompt_tokens, total_tokens=completion.usage.total_tokens) cur.usage += usage - structured_message = completion.choices[0].message if structured_message.refusal: raise RuntimeError(structured_message.refusal) @@ -75,5 +107,13 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte raise RuntimeError("Failed to parse message") res = completion.choices[0].message.parsed - res.insight = re.sub(r'^\d+\.\s+', '', res.insight) # since the model often starts the insight with a number, e.g. "3. Insight..." - return res + + response = InsightSharingOutput( + insight=insight, + justification=res.explanation, + error_message_context=res.error_message_context, + codebase_context=res.codebase_context, + stacktrace_context=res.stacktrace_context, + breadcrumb_context=res.event_log_context + ) + return response diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py index e5ad070d..94614e5b 100644 --- a/src/seer/automation/autofix/components/insight_sharing/models.py +++ b/src/seer/automation/autofix/components/insight_sharing/models.py @@ -25,6 +25,13 @@ class StacktraceContext(BaseModel): code_snippet: str vars_as_json: str +class InsightContextOutput(BaseModel): + explanation: str + error_message_context: list[str] + codebase_context: list[CodeSnippetContext] + stacktrace_context: list[StacktraceContext] + event_log_context: list[BreadcrumbContext] + class InsightSharingRequest(BaseComponentRequest): latest_thought: str task_description: str @@ -33,11 +40,8 @@ class InsightSharingRequest(BaseComponentRequest): class InsightSharingOutput(BaseComponentOutput): insight: str - justification_using_context: str error_message_context: list[str] codebase_context: list[CodeSnippetContext] stacktrace_context: list[StacktraceContext] - event_log_context: list[BreadcrumbContext] - is_unimportant_insight: bool - repeats_existing_idea: bool - is_incomplete_idea: bool + breadcrumb_context: list[BreadcrumbContext] + justification: str diff --git a/src/seer/automation/autofix/models.py b/src/seer/automation/autofix/models.py index 57040e5e..bf87dd58 100644 --- a/src/seer/automation/autofix/models.py +++ b/src/seer/automation/autofix/models.py @@ -122,6 +122,13 @@ class BaseStep(BaseModel): progress: list["ProgressItem | Step"] = Field(default_factory=list) completedMessage: Optional[str] = None + queued_user_messages: list[str] = [] + + def receive_user_message(self, message: str): + self.queued_user_messages.append(message) + print("HELLO user message") + print(self.queued_user_messages) + def find_child(self, *, id: str) -> "Step | None": for step in self.progress: if isinstance(step, (DefaultStep, RootCauseStep, ChangesStep)) and step.id == id: @@ -199,7 +206,6 @@ class AutofixGroupState(BaseModel): completed_at: datetime.datetime | None = None signals: list[str] = Field(default_factory=list) - class AutofixStateRequest(BaseModel): group_id: int | None = None run_id: int | None = None @@ -272,6 +278,7 @@ def validate_repo_duplicates(cls, repos): class AutofixUpdateType(str, enum.Enum): SELECT_ROOT_CAUSE = "select_root_cause" CREATE_PR = "create_pr" + USER_MESSAGE = "user_message" class AutofixRootCauseUpdatePayload(BaseModel): @@ -285,10 +292,14 @@ class AutofixCreatePrUpdatePayload(BaseModel): repo_external_id: str | None = None repo_id: int | None = None # TODO: Remove this when we won't be breaking LA customers. +class AutofixUserMessagePayload(BaseModel): + type: Literal[AutofixUpdateType.USER_MESSAGE] + text: str + class AutofixUpdateRequest(BaseModel): run_id: int - payload: Union[AutofixRootCauseUpdatePayload, AutofixCreatePrUpdatePayload] = Field( + payload: Union[AutofixRootCauseUpdatePayload, AutofixCreatePrUpdatePayload, AutofixUserMessagePayload] = Field( discriminator="type" ) @@ -301,7 +312,7 @@ def get_step_description(self) -> str: return "" step = self.steps[-1] if step.type == StepType.DEFAULT and step.key == "root_cause_analysis_processing": - return "finding the root cause of the issue" + return "figuring out what is causing the issue (not thinking about solutions yet)" elif step.type == StepType.DEFAULT and step.key == "plan": return "coming up with a fix for the issue" elif step.type == StepType.ROOT_CAUSE_ANALYSIS: diff --git a/src/seer/automation/autofix/steps/change_describer_step.py b/src/seer/automation/autofix/steps/change_describer_step.py index 7a1a924b..c13eda3b 100644 --- a/src/seer/automation/autofix/steps/change_describer_step.py +++ b/src/seer/automation/autofix/steps/change_describer_step.py @@ -92,4 +92,4 @@ def _invoke(self, **kwargs): codebase_changes.append(change) self.context.event_manager.send_coding_complete(codebase_changes) - self.context.event_manager.add_log("Here are some code changes that I think fix the issue. Feel free to edit them or tell me what I should change.") + self.context.event_manager.add_log("Above are some code changes that I think fix the issue. Feel free to edit them or tell me what I should change.") diff --git a/src/seer/automation/autofix/steps/root_cause_step.py b/src/seer/automation/autofix/steps/root_cause_step.py index 90a23240..c89484f9 100644 --- a/src/seer/automation/autofix/steps/root_cause_step.py +++ b/src/seer/automation/autofix/steps/root_cause_step.py @@ -67,4 +67,4 @@ def _invoke(self, **kwargs): ) self.context.event_manager.send_root_cause_analysis_result(root_cause_output) - self.context.event_manager.add_log("Here's what I think the root cause is. Feel free to edit it or provide your own idea below.") + self.context.event_manager.add_log("Above is what I think the root cause is. Feel free to edit it or propose your own root cause instead.") diff --git a/src/seer/automation/autofix/tasks.py b/src/seer/automation/autofix/tasks.py index a87ba9b4..6e48ef38 100644 --- a/src/seer/automation/autofix/tasks.py +++ b/src/seer/automation/autofix/tasks.py @@ -28,6 +28,7 @@ AutofixRootCauseUpdatePayload, AutofixStatus, AutofixUpdateRequest, + AutofixUserMessagePayload, ) from seer.automation.autofix.runs import create_initial_autofix_run from seer.automation.autofix.state import ContinuationState @@ -197,6 +198,13 @@ def run_autofix_create_pr(request: AutofixUpdateRequest): repo_external_id=request.payload.repo_external_id, repo_id=request.payload.repo_id ) +def receive_user_message(request: AutofixUpdateRequest): + if not isinstance(request.payload, AutofixUserMessagePayload): + raise ValueError("Invalid payload type for user_message") + + state = ContinuationState.from_id(request.run_id, model=AutofixContinuation) + with state.update() as cur: + cur.steps[-1].receive_user_message(request.payload.text) def run_autofix_evaluation(request: AutofixEvaluationRequest): langfuse = Langfuse() From 281cc25a69a5e47235e6271ee97d4743d962b0b1 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 16 Sep 2024 13:15:44 -0700 Subject: [PATCH 10/15] Update copy --- src/seer/automation/autofix/steps/change_describer_step.py | 4 +++- src/seer/automation/autofix/steps/root_cause_step.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/seer/automation/autofix/steps/change_describer_step.py b/src/seer/automation/autofix/steps/change_describer_step.py index c13eda3b..31513640 100644 --- a/src/seer/automation/autofix/steps/change_describer_step.py +++ b/src/seer/automation/autofix/steps/change_describer_step.py @@ -92,4 +92,6 @@ def _invoke(self, **kwargs): codebase_changes.append(change) self.context.event_manager.send_coding_complete(codebase_changes) - self.context.event_manager.add_log("Above are some code changes that I think fix the issue. Feel free to edit them or tell me what I should change.") + self.context.event_manager.add_log( + "Above are some code changes that I think fix the issue." + ) # TODO: add 'Feel free to edit them or tell me what I should change.' once those features are in diff --git a/src/seer/automation/autofix/steps/root_cause_step.py b/src/seer/automation/autofix/steps/root_cause_step.py index c89484f9..cce4a5a8 100644 --- a/src/seer/automation/autofix/steps/root_cause_step.py +++ b/src/seer/automation/autofix/steps/root_cause_step.py @@ -67,4 +67,6 @@ def _invoke(self, **kwargs): ) self.context.event_manager.send_root_cause_analysis_result(root_cause_output) - self.context.event_manager.add_log("Above is what I think the root cause is. Feel free to edit it or propose your own root cause instead.") + self.context.event_manager.add_log( + "Above is what I think the root cause is. Feel free to propose your own root cause instead." + ) # TODO add 'edit it or propose your own' once that feature is in From c62b3461c18d4a468a3cf90fb545d49139826cdf Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 16 Sep 2024 14:33:33 -0700 Subject: [PATCH 11/15] Add tests --- src/seer/automation/agent/agent.py | 16 +-- src/seer/automation/agent/client.py | 4 +- .../components/insight_sharing/component.py | 47 +++++--- .../components/insight_sharing/models.py | 5 + .../autofix/components/root_cause/models.py | 5 +- src/seer/automation/autofix/models.py | 10 +- src/seer/automation/autofix/tasks.py | 10 +- src/seer/automation/autofix/tools.py | 6 +- tests/automation/agent/test_agent.py | 72 ++++++++++++- .../components/test_insight_sharing.py | 100 ++++++++++++++++++ .../autofix/components/test_root_cause.py | 18 ++-- .../autofix/test_autofix_event_manager.py | 2 +- 12 files changed, 242 insertions(+), 53 deletions(-) create mode 100644 tests/automation/autofix/components/test_insight_sharing.py diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index f7fde4ec..6a885f67 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -18,7 +18,6 @@ from seer.automation.autofix.components.insight_sharing.component import InsightSharingComponent from seer.automation.autofix.components.insight_sharing.models import InsightSharingRequest from seer.automation.autofix.models import DefaultStep -from seer.automation.utils import extract_text_inside_tags from seer.dependency_injection import inject, injected logger = logging.getLogger("autofix") @@ -63,7 +62,7 @@ def get_completion(self): system_prompt=self.config.system_prompt if self.config.system_prompt else None, tools=(self.tools if len(self.tools) > 0 else None), ) - + def use_user_messages(self, context: AutofixContext): # adds any queued user messages to the memory user_msgs = context.state.get().steps[-1].queued_user_messages @@ -75,7 +74,7 @@ def use_user_messages(self, context: AutofixContext): def run_iteration(self, context: Optional[AutofixContext] = None): logger.debug(f"----[{self.name}] Running Iteration {self.iterations}----") - + message, usage = self.get_completion() # interrupt if user message is queued and awaiting handling @@ -90,7 +89,7 @@ def run_iteration(self, context: Optional[AutofixContext] = None): text_before_tag = message.content.split("<")[0] text = text_before_tag if text: - # call LLM separately with the same memory to generate structured output insight cards + # call LLM separately with the same memory to generate structured output insight cards insight_sharing = InsightSharingComponent(context) past_insights = context.state.get().get_all_insights() insight_card = insight_sharing.invoke( @@ -98,11 +97,13 @@ def run_iteration(self, context: Optional[AutofixContext] = None): latest_thought=text, memory=self.memory, task_description=context.state.get().get_step_description(), - past_insights=past_insights + past_insights=past_insights, ) ) if insight_card: - if context.state.get().steps and isinstance(context.state.get().steps[-1], DefaultStep): + if context.state.get().steps and isinstance( + context.state.get().steps[-1], DefaultStep + ): step = cast(DefaultStep, context.state.get().steps[-1]) step.insights.append(insight_card) with context.state.update() as cur: @@ -147,7 +148,8 @@ def run(self, prompt: str, context: Optional[AutofixContext] = None): self.reset_iterations() while self.should_continue(): - if context: self.use_user_messages(context) + if context: + self.use_user_messages(context) self.run_iteration(context=context) if self.iterations == self.config.max_iterations: diff --git a/src/seer/automation/agent/client.py b/src/seer/automation/agent/client.py index d7e74ed5..406d97be 100644 --- a/src/seer/automation/agent/client.py +++ b/src/seer/automation/agent/client.py @@ -148,7 +148,9 @@ def clean_tool_call_assistant_messages(self, messages: list[Message]) -> list[Me elif message.role == "tool": new_messages.append(Message(role="user", content=message.content, tool_calls=[])) elif message.role == "tool_use": - new_messages.append(Message(role="assistant", content=message.content, tool_calls=[])) + new_messages.append( + Message(role="assistant", content=message.content, tool_calls=[]) + ) else: new_messages.append(message) return new_messages diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 7d6b826e..6736e87b 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -1,14 +1,20 @@ -import textwrap import re +import textwrap + from langfuse.decorators import observe from sentry_sdk.ai.monitoring import ai_track from seer.automation.agent.client import GptClient from seer.automation.agent.models import Message, Usage from seer.automation.autofix.autofix_context import AutofixContext -from seer.automation.autofix.components.insight_sharing.models import InsightContextOutput, InsightSharingOutput, InsightSharingRequest +from seer.automation.autofix.components.insight_sharing.models import ( + InsightContextOutput, + InsightSharingOutput, + InsightSharingRequest, +) from seer.automation.component import BaseComponent -from seer.dependency_injection import inject, injected +from seer.dependency_injection import inject, injected + class InsightSharingPrompts: @staticmethod @@ -31,21 +37,18 @@ def format_step_one( latest_thought=latest_thought, insights="\n".join(past_insights) if past_insights else "None", ) - + @staticmethod - def format_step_two( - insight: str, - latest_thought: str - ): + def format_step_two(insight: str, latest_thought: str): return textwrap.dedent( """\ Return the pieces of context from the issue details or the files in the codebase that are directly relevant to the text below: {insight} That means choose the most relevant codebase snippets, event logs, stacktraces, or other information, that show specifically what the text mentions. Don't include any repeated information; just include what's needed. - + Also provide a one-line explanation of how the pieces of context directly explain the text. - + To know what's needed, reference these notes: {latest_thought}""" ).format( @@ -60,7 +63,9 @@ class InsightSharingComponent(BaseComponent[InsightSharingRequest, InsightSharin @observe(name="Sharing Insights") @ai_track(description="Sharing Insights") @inject - def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injected) -> InsightSharingOutput | None: + def invoke( + self, request: InsightSharingRequest, gpt_client: GptClient = injected + ) -> InsightSharingOutput | None: prompt_one = InsightSharingPrompts.format_step_one( task_description=request.task_description, latest_thought=request.latest_thought, @@ -72,13 +77,19 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte temperature=0.0, ) with self.context.state.update() as cur: - usage = Usage(completion_tokens=completion.usage.completion_tokens, prompt_tokens=completion.usage.prompt_tokens, total_tokens=completion.usage.total_tokens) + usage = Usage( + completion_tokens=completion.usage.completion_tokens, + prompt_tokens=completion.usage.prompt_tokens, + total_tokens=completion.usage.total_tokens, + ) cur.usage += usage insight = completion.choices[0].message.content if insight == "": return None - - insight = re.sub(r'^\d+\.\s+', '', insight) # since the model often starts the insight with a number, e.g. "3. Insight..." + + insight = re.sub( + r"^\d+\.\s+", "", insight + ) # since the model often starts the insight with a number, e.g. "3. Insight..." prompt_two = InsightSharingPrompts.format_step_two( insight=insight, @@ -98,7 +109,11 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte max_tokens=2048, ) with self.context.state.update() as cur: - usage = Usage(completion_tokens=completion.usage.completion_tokens, prompt_tokens=completion.usage.prompt_tokens, total_tokens=completion.usage.total_tokens) + usage = Usage( + completion_tokens=completion.usage.completion_tokens, + prompt_tokens=completion.usage.prompt_tokens, + total_tokens=completion.usage.total_tokens, + ) cur.usage += usage structured_message = completion.choices[0].message if structured_message.refusal: @@ -114,6 +129,6 @@ def invoke(self, request: InsightSharingRequest, gpt_client: GptClient = injecte error_message_context=res.error_message_context, codebase_context=res.codebase_context, stacktrace_context=res.stacktrace_context, - breadcrumb_context=res.event_log_context + breadcrumb_context=res.event_log_context, ) return response diff --git a/src/seer/automation/autofix/components/insight_sharing/models.py b/src/seer/automation/autofix/components/insight_sharing/models.py index 94614e5b..6c219ea4 100644 --- a/src/seer/automation/autofix/components/insight_sharing/models.py +++ b/src/seer/automation/autofix/components/insight_sharing/models.py @@ -9,6 +9,7 @@ class CodeSnippetContext(BaseModel): file_path: str snippet: str + class BreadcrumbContext(BaseModel): type: str category: str @@ -16,6 +17,7 @@ class BreadcrumbContext(BaseModel): level: str data_as_json: str + class StacktraceContext(BaseModel): file_name: str repo_name: str @@ -25,6 +27,7 @@ class StacktraceContext(BaseModel): code_snippet: str vars_as_json: str + class InsightContextOutput(BaseModel): explanation: str error_message_context: list[str] @@ -32,12 +35,14 @@ class InsightContextOutput(BaseModel): stacktrace_context: list[StacktraceContext] event_log_context: list[BreadcrumbContext] + class InsightSharingRequest(BaseComponentRequest): latest_thought: str task_description: str memory: list[Message] past_insights: list[str] + class InsightSharingOutput(BaseComponentOutput): insight: str error_message_context: list[str] diff --git a/src/seer/automation/autofix/components/root_cause/models.py b/src/seer/automation/autofix/components/root_cause/models.py index 32976b4c..b3aeae86 100644 --- a/src/seer/automation/autofix/components/root_cause/models.py +++ b/src/seer/automation/autofix/components/root_cause/models.py @@ -1,8 +1,6 @@ from typing import Annotated, Optional -from johen import gen -from johen.examples import Examples -from pydantic import BaseModel, Field, StringConstraints +from pydantic import BaseModel, StringConstraints from pydantic_xml import attr from seer.automation.component import BaseComponentOutput, BaseComponentRequest @@ -88,6 +86,7 @@ def to_model(self): } ) + class MultipleRootCauseAnalysisOutputPrompt(BaseModel): cause: RootCauseAnalysisItemPrompt diff --git a/src/seer/automation/autofix/models.py b/src/seer/automation/autofix/models.py index bf87dd58..7fef574e 100644 --- a/src/seer/automation/autofix/models.py +++ b/src/seer/automation/autofix/models.py @@ -168,6 +168,7 @@ class DefaultStep(BaseStep): type: Literal[StepType.DEFAULT] = StepType.DEFAULT insights: list[InsightSharingOutput] = [] + class RootCauseStep(BaseStep): type: Literal[StepType.ROOT_CAUSE_ANALYSIS] = StepType.ROOT_CAUSE_ANALYSIS @@ -206,6 +207,7 @@ class AutofixGroupState(BaseModel): completed_at: datetime.datetime | None = None signals: list[str] = Field(default_factory=list) + class AutofixStateRequest(BaseModel): group_id: int | None = None run_id: int | None = None @@ -292,6 +294,7 @@ class AutofixCreatePrUpdatePayload(BaseModel): repo_external_id: str | None = None repo_id: int | None = None # TODO: Remove this when we won't be breaking LA customers. + class AutofixUserMessagePayload(BaseModel): type: Literal[AutofixUpdateType.USER_MESSAGE] text: str @@ -299,9 +302,9 @@ class AutofixUserMessagePayload(BaseModel): class AutofixUpdateRequest(BaseModel): run_id: int - payload: Union[AutofixRootCauseUpdatePayload, AutofixCreatePrUpdatePayload, AutofixUserMessagePayload] = Field( - discriminator="type" - ) + payload: Union[ + AutofixRootCauseUpdatePayload, AutofixCreatePrUpdatePayload, AutofixUserMessagePayload + ] = Field(discriminator="type") class AutofixContinuation(AutofixGroupState): @@ -409,7 +412,6 @@ def get_all_insights(self): insights.append(insight.insight) return insights - @property def is_running(self): return self.status == AutofixStatus.PROCESSING diff --git a/src/seer/automation/autofix/tasks.py b/src/seer/automation/autofix/tasks.py index 6e48ef38..16cf5caa 100644 --- a/src/seer/automation/autofix/tasks.py +++ b/src/seer/automation/autofix/tasks.py @@ -198,14 +198,16 @@ def run_autofix_create_pr(request: AutofixUpdateRequest): repo_external_id=request.payload.repo_external_id, repo_id=request.payload.repo_id ) + def receive_user_message(request: AutofixUpdateRequest): - if not isinstance(request.payload, AutofixUserMessagePayload): + if not isinstance(request.payload, AutofixUserMessagePayload): raise ValueError("Invalid payload type for user_message") - - state = ContinuationState.from_id(request.run_id, model=AutofixContinuation) - with state.update() as cur: + + state = ContinuationState.from_id(request.run_id, model=AutofixContinuation) + with state.update() as cur: cur.steps[-1].receive_user_message(request.payload.text) + def run_autofix_evaluation(request: AutofixEvaluationRequest): langfuse = Langfuse() diff --git a/src/seer/automation/autofix/tools.py b/src/seer/automation/autofix/tools.py index b70f3f93..3f3b5e53 100644 --- a/src/seer/automation/autofix/tools.py +++ b/src/seer/automation/autofix/tools.py @@ -70,7 +70,7 @@ def list_directory(self, path: str, repo_name: str | None = None) -> str: # show potential corrected paths if nothing was found here other_paths = self._get_potential_abs_paths(path, repo_name) return f"\n{other_paths}".strip() - + self.context.event_manager.add_log(f"Looking at contents of `{path}` in `{repo_name}`...") joined = self._format_list_directory_output(dirs, files) @@ -218,7 +218,9 @@ def file_search_wildcard( all_paths = repo_client.get_index_file_set() found = [path for path in all_paths if fnmatch.fnmatch(path, pattern)] - self.context.event_manager.add_log(f"Searching for files with pattern `{pattern}` in `{repo_name}`...") + self.context.event_manager.add_log( + f"Searching for files with pattern `{pattern}` in `{repo_name}`..." + ) if len(found) == 0: return f"No files matching pattern '{pattern}' found in repository" diff --git a/tests/automation/agent/test_agent.py b/tests/automation/agent/test_agent.py index f1549629..7e4df42e 100644 --- a/tests/automation/agent/test_agent.py +++ b/tests/automation/agent/test_agent.py @@ -13,6 +13,9 @@ from seer.automation.agent.client import ClaudeClient, GptClient, LlmClient, T from seer.automation.agent.models import Message, ToolCall, Usage from seer.automation.agent.tools import FunctionTool +from seer.automation.autofix.autofix_context import AutofixContext +from seer.automation.autofix.components.insight_sharing.models import InsightSharingRequest +from seer.automation.autofix.models import DefaultStep from seer.dependency_injection import resolve @@ -143,18 +146,81 @@ def mock_client(self, agent_and_client_classes): _, client_class = agent_and_client_classes return resolve(client_class) - def test_run_iteration(self, agent, mock_client): + @pytest.fixture + def mock_context(self): + context = MagicMock(spec=AutofixContext) + state = MagicMock() + state.steps = [DefaultStep(title="Test step title")] + state.get_all_insights.return_value = [] + state.get_step_description.return_value = "Test step" + context.state = MagicMock() + context.state.get.return_value = state + return context + + @patch("seer.automation.agent.agent.InsightSharingComponent") + def test_run_iteration(self, mock_insight_sharing, agent, mock_context): + # Mock the message and usage mock_message = Message(role="assistant", content="Test response") mock_usage = Usage(completion_tokens=10, prompt_tokens=20, total_tokens=30) - mock_client.completion = MagicMock(return_value=(mock_message, mock_usage)) + agent.get_completion = MagicMock(return_value=(mock_message, mock_usage)) + + # Mock the insight sharing component + mock_insight_card = MagicMock() + mock_insight_sharing_instance = MagicMock() + mock_insight_sharing_instance.invoke.return_value = mock_insight_card + mock_insight_sharing.return_value = mock_insight_sharing_instance + agent.call_tool = MagicMock(return_value=None) - agent.run_iteration() + # Run the method + agent.run_iteration(context=mock_context) + # Assertions assert agent.iterations == 1 assert len(agent.memory) == 1 assert agent.memory[0] == mock_message assert agent.usage == mock_usage + # Check if insight sharing was called + mock_insight_sharing.assert_called_once_with(mock_context) + mock_insight_sharing_instance.invoke.assert_called_once() + assert isinstance( + mock_insight_sharing_instance.invoke.call_args[0][0], InsightSharingRequest + ) + + # Check if the insight was added to the step + assert mock_context.state.get().steps[-1].insights[-1] == mock_insight_card + + # Check if tool calls were not made + agent.call_tool.assert_not_called() + + def test_run_iteration_with_queued_user_messages(self, agent, mock_client, mock_context): + # Create a mock step with queued_user_messages as a list of strings + mock_step = MagicMock(spec=DefaultStep) + mock_step.queued_user_messages = ["User message 1", "User message 2"] + + # Set the mock step as the last step in the context + mock_context.state.get().steps = [mock_step] + + agent.use_user_messages = MagicMock() + + mock_message = Message(role="assistant", content="Test response") + mock_usage = Usage(completion_tokens=10, prompt_tokens=20, total_tokens=30) + mock_client.completion = MagicMock(return_value=(mock_message, mock_usage)) + + # Run the method + agent.run_iteration(context=mock_context) + + # Assertions + agent.use_user_messages.assert_called_once_with(mock_context) + assert agent.iterations == 0 # Should not increment + assert len(agent.memory) == 0 # Should not add to memory + + # Additional assertion to verify the queued_user_messages + assert mock_context.state.get().steps[-1].queued_user_messages == [ + "User message 1", + "User message 2", + ] + def test_get_completion(self, agent, mock_client): mock_message = Message(role="assistant", content="Test response") mock_usage = Usage(completion_tokens=10, prompt_tokens=20, total_tokens=30) diff --git a/tests/automation/autofix/components/test_insight_sharing.py b/tests/automation/autofix/components/test_insight_sharing.py new file mode 100644 index 00000000..73cc4bed --- /dev/null +++ b/tests/automation/autofix/components/test_insight_sharing.py @@ -0,0 +1,100 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from seer.automation.agent.models import Message +from seer.automation.autofix.autofix_context import AutofixContext +from seer.automation.autofix.components.insight_sharing.component import InsightSharingComponent +from seer.automation.autofix.components.insight_sharing.models import ( + InsightSharingOutput, + InsightSharingRequest, +) + + +class TestInsightSharingComponent: + @pytest.fixture + def component(self): + mock_context = MagicMock(spec=AutofixContext) + mock_context.state = MagicMock() + mock_context.skip_loading_codebase = True + return InsightSharingComponent(mock_context) + + @pytest.fixture + def mock_gpt_client(self): + with patch( + "seer.automation.autofix.components.insight_sharing.component.GptClient" + ) as mock: + yield mock + + def test_invoke_with_insight(self, component, mock_gpt_client): + request = InsightSharingRequest( + task_description="Test task", + latest_thought="Latest thought", + past_insights=["Past insight 1", "Past insight 2"], + memory=[Message(role="user", content="Test memory")], + ) + + mock_completion_1 = MagicMock() + mock_completion_1.choices[0].message.content = "New insight" + mock_completion_1.usage = MagicMock(completion_tokens=10, prompt_tokens=20, total_tokens=30) + + mock_completion_2 = MagicMock() + mock_completion_2.choices[0].message.parsed = MagicMock( + explanation="Test explanation", + error_message_context=["Test error context"], + codebase_context=[], + stacktrace_context=[], + event_log_context=[], + ) + mock_completion_2.choices[0].message.refusal = None + + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion_1 + mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = mock_completion_2 + + result = component.invoke(request, gpt_client=mock_gpt_client) + + assert isinstance(result, InsightSharingOutput) + assert result.insight == "New insight" + assert result.justification == "Test explanation" + assert result.error_message_context == ["Test error context"] + assert result.codebase_context == [] + assert result.stacktrace_context == [] + assert result.breadcrumb_context == [] + + def test_invoke_with_no_insight(self, component, mock_gpt_client): + request = InsightSharingRequest( + task_description="Test task", + latest_thought="Latest thought", + past_insights=["Past insight 1", "Past insight 2"], + memory=[Message(role="user", content="Test memory")], + ) + + mock_completion = MagicMock() + mock_completion.choices[0].message.content = "" + + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion + + result = component.invoke(request, gpt_client=mock_gpt_client) + + assert result is None + + def test_invoke_with_error(self, component, mock_gpt_client): + request = InsightSharingRequest( + task_description="Test task", + latest_thought="Latest thought", + past_insights=["Past insight 1"], + memory=[Message(role="user", content="Test memory")], + ) + + mock_completion_1 = MagicMock() + mock_completion_1.choices[0].message.content = "New insight" + mock_completion_1.usage = MagicMock(completion_tokens=10, prompt_tokens=20, total_tokens=30) + + mock_completion_2 = MagicMock() + mock_completion_2.choices[0].message.refusal = "Test refusal" + + mock_gpt_client.openai_client.chat.completions.create.return_value = mock_completion_1 + mock_gpt_client.openai_client.beta.chat.completions.parse.return_value = mock_completion_2 + + with pytest.raises(RuntimeError, match="Test refusal"): + component.invoke(request, gpt_client=mock_gpt_client) diff --git a/tests/automation/autofix/components/test_root_cause.py b/tests/automation/autofix/components/test_root_cause.py index 9e6bad6e..fd170add 100644 --- a/tests/automation/autofix/components/test_root_cause.py +++ b/tests/automation/autofix/components/test_root_cause.py @@ -45,16 +45,12 @@ def test_root_cause_simple_response_parsing(self, component, mock_gpt_agent): function_call=None, tool_calls=None, parsed=MultipleRootCauseAnalysisOutputPrompt( - causes=[ - RootCauseAnalysisItemPrompt( - title="Missing Null Check", - description="The root cause of the issue is ...", - likelihood=0.9, - actionability=1.0, - reproduction="Steps to reproduce", - relevant_code=None, - ) - ] + cause=RootCauseAnalysisItemPrompt( + title="Missing Null Check", + description="The root cause of the issue is ...", + reproduction="Steps to reproduce", + relevant_code=None, + ) ), refusal=None, ), @@ -78,8 +74,6 @@ def test_root_cause_simple_response_parsing(self, component, mock_gpt_agent): assert len(output.causes) == 1 assert output.causes[0].title == "Missing Null Check" assert output.causes[0].description == "The root cause of the issue is ..." - assert output.causes[0].likelihood == 0.9 - assert output.causes[0].actionability == 1.0 assert output.causes[0].reproduction == "Steps to reproduce" assert output.causes[0].code_context is None diff --git a/tests/automation/autofix/test_autofix_event_manager.py b/tests/automation/autofix/test_autofix_event_manager.py index 720af1a2..d5647f31 100644 --- a/tests/automation/autofix/test_autofix_event_manager.py +++ b/tests/automation/autofix/test_autofix_event_manager.py @@ -72,7 +72,7 @@ def test_add_log_no_processing_step(self, event_manager, state): event_manager.add_log("Test log message") - assert len(state.get().steps[0].progress) == 0 + assert len(state.get().steps[0].progress) == 1 def test_add_log_empty_steps(self, event_manager, state): state.get().steps = [] From f7294fe0e205b5cb97f8eb221a577405d27c6c22 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 16 Sep 2024 15:36:33 -0700 Subject: [PATCH 12/15] Add tests --- .../automation/autofix/test_autofix_tasks.py | 42 +++++++++++++++++++ tests/test_seer.py | 32 +++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tests/automation/autofix/test_autofix_tasks.py b/tests/automation/autofix/test_autofix_tasks.py index b6a06d3c..5fe26c66 100644 --- a/tests/automation/autofix/test_autofix_tasks.py +++ b/tests/automation/autofix/test_autofix_tasks.py @@ -11,11 +11,14 @@ AutofixRootCauseUpdatePayload, AutofixStatus, AutofixUpdateRequest, + AutofixUpdateType, + AutofixUserMessagePayload, ) from seer.automation.autofix.tasks import ( check_and_mark_recent_autofix_runs, get_autofix_state, get_autofix_state_from_pr_id, + receive_user_message, run_autofix_create_pr, run_autofix_execution, run_autofix_root_cause, @@ -215,3 +218,42 @@ def test_check_and_mark_recent_autofix_runs( mock_logger.info.assert_any_call("Got 2 runs") mock_check_and_mark.assert_has_calls([call(mock_run1), call(mock_run2)]) assert mock_check_and_mark.call_count == 2 + + +class TestHandleUserMessages: + @patch("seer.automation.autofix.tasks.ContinuationState") + def test_receive_user_message_success(self, mock_continuation_state): + # Create mock payload and request + mock_payload = AutofixUserMessagePayload( + type=AutofixUpdateType.USER_MESSAGE, text="testing" + ) + mock_request = MagicMock() + mock_request.payload = mock_payload + mock_request.run_id = 123 # Example run_id + + mock_continuation_state.from_id.return_value.update.return_value.__enter__.return_value = ( + MagicMock(steps=[MagicMock()]) + ) + mock_continuation_state.from_id.return_value.update.return_value.__enter__.return_value.steps[ + -1 + ].receive_user_message = MagicMock() + + # Call the function under test + receive_user_message(mock_request) + + # Assertions + mock_continuation_state.from_id.assert_called_once_with(123, model=AutofixContinuation) + mock_continuation_state.from_id.return_value.update.return_value.__enter__.return_value.steps[ + -1 + ].receive_user_message.assert_called_once_with( + "testing" + ) + + def test_receive_user_message_invalid_payload_type(self): + mock_payload = MagicMock() # incorrect payload type + mock_request = MagicMock() + mock_request.payload = mock_payload + + # Test for ValueError + with pytest.raises(ValueError, match="Invalid payload type for user_message"): + receive_user_message(mock_request) diff --git a/tests/test_seer.py b/tests/test_seer.py index 94a52a57..d57fb9d1 100644 --- a/tests/test_seer.py +++ b/tests/test_seer.py @@ -10,8 +10,14 @@ from johen.pytest import parametrize from sqlalchemy import text -from seer.app import app -from seer.automation.autofix.models import AutofixContinuation, AutofixEvaluationRequest +from seer.app import app, autofix_update_endpoint +from seer.automation.autofix.models import ( + AutofixContinuation, + AutofixEndpointResponse, + AutofixEvaluationRequest, + AutofixUpdateRequest, + AutofixUpdateType, +) from seer.automation.codebase.models import CodebaseStatusCheckRequest, CodebaseStatusCheckResponse from seer.automation.models import RepoDefinition from seer.automation.state import LocalMemoryState @@ -463,6 +469,28 @@ def test_autofix_evaluation_start_endpoint_with_run_on_item_id( # Assert that run_autofix_evaluation was called with the correct arguments mock_run_autofix_evaluation.assert_called_once_with(test_data) + def test_autofix_update_endpoint(self): + test_cases = [ + (AutofixUpdateType.SELECT_ROOT_CAUSE, "seer.app.run_autofix_execution"), + (AutofixUpdateType.CREATE_PR, "seer.app.run_autofix_create_pr"), + (AutofixUpdateType.USER_MESSAGE, "seer.app.receive_user_message"), + ] + + for autofix_type, expected_func in test_cases: + mock_request = mock.MagicMock(spec=AutofixUpdateRequest) + mock_request.run_id = 123 + mock_request.payload = mock.MagicMock() + mock_request.payload.type = autofix_type + + with mock.patch(expected_func) as mock_func: + response = autofix_update_endpoint(mock_request) + + mock_func.assert_called_once_with(mock_request) + + self.assertIsInstance(response, AutofixEndpointResponse) + self.assertTrue(response.started) + self.assertEqual(response.run_id, mock_request.run_id) + class TestGetCodebaseIndexStatusEndpoint(unittest.TestCase): @mock.patch("seer.app.CodebaseStatusCheckResponse") From 0717cef2c48f8962ec583b14ebbf8e69b02f3a56 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 16 Sep 2024 15:46:19 -0700 Subject: [PATCH 13/15] Bump cryptography --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6bf9802e..fa8016ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -106,4 +106,4 @@ langfuse @ git+https://github.com/jennmueng/langfuse-python.git@9d9350de1e4e84fa watchdog stumpy==1.13.0 pytest_alembic==0.11.1 -cryptography==43.0.0 +cryptography==43.0.1 From b3d010f564d2cad1c5f2a4be8fa03922275b6566 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Wed, 18 Sep 2024 14:39:06 -0700 Subject: [PATCH 14/15] Add interactive flag to disable interactivity by default --- src/seer/automation/agent/agent.py | 5 +++-- src/seer/automation/autofix/components/coding/component.py | 2 +- .../automation/autofix/components/root_cause/component.py | 4 +++- tests/automation/agent/test_agent.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/seer/automation/agent/agent.py b/src/seer/automation/agent/agent.py index 6a885f67..4baea4fe 100644 --- a/src/seer/automation/agent/agent.py +++ b/src/seer/automation/agent/agent.py @@ -32,6 +32,7 @@ class AgentConfig(BaseModel): stop_message: Optional[str] = Field( default=None, description="Message that signals the agent to stop" ) + interactive: bool = False # enables interactive user-facing features class Config: validate_assignment = True @@ -85,7 +86,7 @@ def run_iteration(self, context: Optional[AutofixContext] = None): self.memory.append(message) # log thoughts to the user - if message.content and context: + if message.content and context and self.config.interactive: text_before_tag = message.content.split("<")[0] text = text_before_tag if text: @@ -148,7 +149,7 @@ def run(self, prompt: str, context: Optional[AutofixContext] = None): self.reset_iterations() while self.should_continue(): - if context: + if context and self.config.interactive: self.use_user_messages(context) self.run_iteration(context=context) diff --git a/src/seer/automation/autofix/components/coding/component.py b/src/seer/automation/autofix/components/coding/component.py index 3c8fbb44..a9feca46 100644 --- a/src/seer/automation/autofix/components/coding/component.py +++ b/src/seer/automation/autofix/components/coding/component.py @@ -40,7 +40,7 @@ def invoke(self, request: CodingRequest) -> CodingOutput | None: agent = ClaudeAgent( tools=tools.get_tools(), - config=AgentConfig(system_prompt=CodingPrompts.format_system_msg()), + config=AgentConfig(system_prompt=CodingPrompts.format_system_msg(), interactive=True), ) task_str = ( diff --git a/src/seer/automation/autofix/components/root_cause/component.py b/src/seer/automation/autofix/components/root_cause/component.py index fa224ce0..97d76912 100644 --- a/src/seer/automation/autofix/components/root_cause/component.py +++ b/src/seer/automation/autofix/components/root_cause/component.py @@ -35,7 +35,9 @@ def invoke( agent = GptAgent( tools=tools.get_tools(), config=AgentConfig( - system_prompt=RootCauseAnalysisPrompts.format_system_msg(), max_iterations=24 + system_prompt=RootCauseAnalysisPrompts.format_system_msg(), + max_iterations=24, + interactive=True, ), ) diff --git a/tests/automation/agent/test_agent.py b/tests/automation/agent/test_agent.py index 7e4df42e..8dccd001 100644 --- a/tests/automation/agent/test_agent.py +++ b/tests/automation/agent/test_agent.py @@ -134,7 +134,7 @@ def agent_and_client_classes(self, request): @pytest.fixture def config(self): - return AgentConfig() + return AgentConfig(interactive=True) @pytest.fixture def agent(self, agent_and_client_classes, config): From f879db15f045d1aa349871f7b0082d0524d5ccd6 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Wed, 18 Sep 2024 15:53:48 -0700 Subject: [PATCH 15/15] Cleanup --- .../autofix/components/insight_sharing/component.py | 10 ++-------- src/seer/automation/autofix/models.py | 2 -- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/seer/automation/autofix/components/insight_sharing/component.py b/src/seer/automation/autofix/components/insight_sharing/component.py index 6736e87b..cb70695c 100644 --- a/src/seer/automation/autofix/components/insight_sharing/component.py +++ b/src/seer/automation/autofix/components/insight_sharing/component.py @@ -13,6 +13,7 @@ InsightSharingRequest, ) from seer.automation.component import BaseComponent +from seer.automation.utils import extract_parsed_model from seer.dependency_injection import inject, injected @@ -115,14 +116,7 @@ def invoke( total_tokens=completion.usage.total_tokens, ) cur.usage += usage - structured_message = completion.choices[0].message - if structured_message.refusal: - raise RuntimeError(structured_message.refusal) - if not structured_message.parsed: - raise RuntimeError("Failed to parse message") - - res = completion.choices[0].message.parsed - + res = extract_parsed_model(completion) response = InsightSharingOutput( insight=insight, justification=res.explanation, diff --git a/src/seer/automation/autofix/models.py b/src/seer/automation/autofix/models.py index 7fef574e..0a57a844 100644 --- a/src/seer/automation/autofix/models.py +++ b/src/seer/automation/autofix/models.py @@ -126,8 +126,6 @@ class BaseStep(BaseModel): def receive_user_message(self, message: str): self.queued_user_messages.append(message) - print("HELLO user message") - print(self.queued_user_messages) def find_child(self, *, id: str) -> "Step | None": for step in self.progress: