From c0d109ad803d8b54eb552e7a780aaf7898a40281 Mon Sep 17 00:00:00 2001 From: Goran Peretin Date: Sat, 8 Jun 2024 07:59:41 +0000 Subject: [PATCH] Various prompt and telemetry fixes. --- core/agents/external_docs.py | 61 +++++++++++-------- core/agents/orchestrator.py | 2 +- core/prompts/developer/breakdown.prompt | 15 +---- ...ries.prompt => create_docs_queries.prompt} | 4 +- .../external-docs/select_docset.prompt | 2 +- core/prompts/partials/doc_snippets.prompt | 14 +++++ core/prompts/troubleshooter/iteration.prompt | 3 + tests/agents/test_external_docs.py | 6 +- 8 files changed, 61 insertions(+), 46 deletions(-) rename core/prompts/external-docs/{create_queries.prompt => create_docs_queries.prompt} (80%) create mode 100644 core/prompts/partials/doc_snippets.prompt diff --git a/core/agents/external_docs.py b/core/agents/external_docs.py index 9ab61acb5..8226a68bc 100644 --- a/core/agents/external_docs.py +++ b/core/agents/external_docs.py @@ -2,17 +2,27 @@ from urllib.parse import urljoin import httpx +from pydantic import BaseModel from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse from core.config import EXTERNAL_DOCUMENTATION_API +from core.llm.parser import JSONParser from core.log import get_logger from core.telemetry import telemetry log = get_logger(__name__) +class DocQueries(BaseModel): + queries: list[str] + + +class SelectedDocsets(BaseModel): + docsets: list[str] + + class ExternalDocumentation(BaseAgent): """Agent in charge of collecting and storing additional documentation. @@ -34,14 +44,9 @@ class ExternalDocumentation(BaseAgent): display_name = "Documentation" async def run(self) -> AgentResponse: - current_task = self.current_state.current_task - if not current_task: - # If we have no active task, there's no docs to collect - return AgentResponse.done(self) - available_docsets = await self._get_available_docsets() selected_docsets = await self._select_docsets(available_docsets) - telemetry.set("docsets_used", selected_docsets) + await telemetry.trace_code_event("docsets_used", selected_docsets) if not selected_docsets: log.info("No documentation selected for this task.") @@ -50,7 +55,7 @@ async def run(self) -> AgentResponse: queries = await self._create_queries(selected_docsets) doc_snippets = await self._fetch_snippets(queries) - telemetry.set("doc_snippets_stored", len(doc_snippets)) + await telemetry.trace_code_event("doc_snippets", {"num_stored": len(doc_snippets)}) await self._store_docs(doc_snippets, available_docsets) return AgentResponse.done(self) @@ -75,19 +80,19 @@ async def _select_docsets(self, available_docsets: list[tuple]) -> dict[str, str return {} llm = self.get_llm() - convo = AgentConvo(self).template( - "select_docset", - current_task=self.current_state.current_task, - available_docsets=available_docsets, + convo = ( + AgentConvo(self) + .template( + "select_docset", + current_task=self.current_state.current_task, + available_docsets=available_docsets, + ) + .require_schema(SelectedDocsets) ) - await self.send_message("Determining if external documentation is needed...") - llm_response: str = await llm(convo) + await self.send_message("Determining if external documentation is needed for the next task...") + llm_response: SelectedDocsets = await llm(convo, parser=JSONParser(spec=SelectedDocsets)) available_docsets = dict(available_docsets) - if llm_response.strip().lower() == "done": - return {} - else: - selected_keys = llm_response.splitlines() - return {k: available_docsets[k] for k in selected_keys} + return {k: available_docsets[k] for k in llm_response.docsets} async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]]: """Return queries we have to make to the docs API. @@ -99,16 +104,18 @@ async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]] await self.send_message("Getting relevant documentation for the following topics:") for k, short_desc in docsets.items(): llm = self.get_llm() - convo = AgentConvo(self).template( - "create_queries", - short_description=short_desc, - current_task=self.current_state.current_task, + convo = ( + AgentConvo(self) + .template( + "create_docs_queries", + short_description=short_desc, + current_task=self.current_state.current_task, + ) + .require_schema(DocQueries) ) - llm_response: str = await llm(convo) - if llm_response.strip().lower() == "done": - continue - else: - queries[k] = llm_response.splitlines() + llm_response: DocQueries = await llm(convo, parser=JSONParser(spec=DocQueries)) + if llm_response.queries: + queries[k] = llm_response.queries return queries diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 0b67cd872..632015429 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -191,7 +191,7 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: return TechLead(self.state_manager, self.ui, process_manager=self.process_manager) current_task_docs = state.current_task.get("docs") if state.current_task else None - if current_task_docs is None: + if state.current_task and current_task_docs is None: return ExternalDocumentation(self.state_manager, self.ui) # Current task status must be checked before Developer is called because we might want diff --git a/core/prompts/developer/breakdown.prompt b/core/prompts/developer/breakdown.prompt index 5131dce9d..80a41260f 100644 --- a/core/prompts/developer/breakdown.prompt +++ b/core/prompts/developer/breakdown.prompt @@ -19,20 +19,7 @@ You are currently working on task #{{ current_task_index + 1 }} with the followi Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task. -{% if task.docs %} -We have some some documentation snippets that might be helpful while working on this task, we will now list those. - -{% for d in task.docs %} -Documentation snippets from {{ d.desc }}: -{% for snippet in d.snippets %} -{{ snippet }} - -{% endfor %} - -{% endfor %} - -This concludes the documentation snippets. -{% endif %} +{% include "partials/doc_snippets.prompt" %} **IMPORTANT** {%- if state.epics|length == 1 %} diff --git a/core/prompts/external-docs/create_queries.prompt b/core/prompts/external-docs/create_docs_queries.prompt similarity index 80% rename from core/prompts/external-docs/create_queries.prompt rename to core/prompts/external-docs/create_docs_queries.prompt index 6b0763330..6023d26cd 100644 --- a/core/prompts/external-docs/create_queries.prompt +++ b/core/prompts/external-docs/create_docs_queries.prompt @@ -1,7 +1,9 @@ {% include "partials/project_details.prompt" %} Here is the next task that needs to be implemented: +``` {{ current_task.description }} +``` Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second item is the short description of what that documentation contains. Here's an example for React API documentation: @@ -9,4 +11,4 @@ Here's an example for React API documentation: We have additional documentation from "{{ short_description }}" that might be useful for completing this task. -Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics one item per line, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 5 topics you think would be most useful. +Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics in JSON format, as a list of strings, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 5 topics you think would be most useful. diff --git a/core/prompts/external-docs/select_docset.prompt b/core/prompts/external-docs/select_docset.prompt index e43a418f6..8293c9080 100644 --- a/core/prompts/external-docs/select_docset.prompt +++ b/core/prompts/external-docs/select_docset.prompt @@ -12,4 +12,4 @@ Here is the list of available documentations: {{ docset[0], docset[1] }} {% endfor %} -Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return ONLY the keys from the available documentation list, without any formatting like quotes and bullets, DO NOT return anything else. If you don't need any additional documentation, just respond with "DONE". Return only the documentation that is absolutely required for the given task. +Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return only the documentation that is absolutely required for the given task. If there is no additional documentation in the list that you would like to use, return an empty list. diff --git a/core/prompts/partials/doc_snippets.prompt b/core/prompts/partials/doc_snippets.prompt new file mode 100644 index 000000000..0714f4d35 --- /dev/null +++ b/core/prompts/partials/doc_snippets.prompt @@ -0,0 +1,14 @@ +{% if task.docs %} +We have some some documentation snippets that might be helpful while working on this task, we will now list those. + +---START_OF_DOCUMENTATION_SNIPPETS--- +{% for d in task.docs %} +Documentation snippets from {{ d.desc }}: +{% for snippet in d.snippets %} +{{ snippet }} + +{% endfor %} + +{% endfor %} +---END_OF_DOCUMENTATION_SNIPPETS--- +{% endif %} diff --git a/core/prompts/troubleshooter/iteration.prompt b/core/prompts/troubleshooter/iteration.prompt index f20b15489..d99c30999 100644 --- a/core/prompts/troubleshooter/iteration.prompt +++ b/core/prompts/troubleshooter/iteration.prompt @@ -39,6 +39,9 @@ Focus on solving this issue in the following way: {{ next_solution_to_try }} ``` {% endif %} +{% with task=current_task %} + {% include "partials/doc_snippets.prompt" %} +{% endwith %} Now, you have to debug this issue and comply with the additional user feedback. **IMPORTANT** diff --git a/tests/agents/test_external_docs.py b/tests/agents/test_external_docs.py index f9d88621a..5fc2522f2 100644 --- a/tests/agents/test_external_docs.py +++ b/tests/agents/test_external_docs.py @@ -3,7 +3,7 @@ import pytest from httpx import HTTPError -from core.agents.external_docs import ExternalDocumentation +from core.agents.external_docs import DocQueries, ExternalDocumentation, SelectedDocsets @pytest.mark.asyncio @@ -14,7 +14,9 @@ async def test_stores_documentation_snippets_for_task(agentcontext): await sm.commit() ed = ExternalDocumentation(sm, ui) - ed.get_llm = mock_llm(side_effect=["vuejs-api-ref", "VueJS Options Rendering"]) + ed.get_llm = mock_llm( + side_effect=[SelectedDocsets(docsets=["vuejs-api-ref"]), DocQueries(queries=["VueJS component model"])] + ) await ed.run() assert ed.next_state.current_task["docs"][0]["key"] == "vuejs-api-ref"