From c0d109ad803d8b54eb552e7a780aaf7898a40281 Mon Sep 17 00:00:00 2001
From: Goran Peretin <goran.peretin@gmail.com>
Date: Sat, 8 Jun 2024 07:59:41 +0000
Subject: [PATCH] Various prompt and telemetry fixes.

---
 core/agents/external_docs.py                  | 61 +++++++++++--------
 core/agents/orchestrator.py                   |  2 +-
 core/prompts/developer/breakdown.prompt       | 15 +----
 ...ries.prompt => create_docs_queries.prompt} |  4 +-
 .../external-docs/select_docset.prompt        |  2 +-
 core/prompts/partials/doc_snippets.prompt     | 14 +++++
 core/prompts/troubleshooter/iteration.prompt  |  3 +
 tests/agents/test_external_docs.py            |  6 +-
 8 files changed, 61 insertions(+), 46 deletions(-)
 rename core/prompts/external-docs/{create_queries.prompt => create_docs_queries.prompt} (80%)
 create mode 100644 core/prompts/partials/doc_snippets.prompt

diff --git a/core/agents/external_docs.py b/core/agents/external_docs.py
index 9ab61acb5..8226a68bc 100644
--- a/core/agents/external_docs.py
+++ b/core/agents/external_docs.py
@@ -2,17 +2,27 @@
 from urllib.parse import urljoin
 
 import httpx
+from pydantic import BaseModel
 
 from core.agents.base import BaseAgent
 from core.agents.convo import AgentConvo
 from core.agents.response import AgentResponse
 from core.config import EXTERNAL_DOCUMENTATION_API
+from core.llm.parser import JSONParser
 from core.log import get_logger
 from core.telemetry import telemetry
 
 log = get_logger(__name__)
 
 
+class DocQueries(BaseModel):
+    queries: list[str]
+
+
+class SelectedDocsets(BaseModel):
+    docsets: list[str]
+
+
 class ExternalDocumentation(BaseAgent):
     """Agent in charge of collecting and storing additional documentation.
 
@@ -34,14 +44,9 @@ class ExternalDocumentation(BaseAgent):
     display_name = "Documentation"
 
     async def run(self) -> AgentResponse:
-        current_task = self.current_state.current_task
-        if not current_task:
-            # If we have no active task, there's no docs to collect
-            return AgentResponse.done(self)
-
         available_docsets = await self._get_available_docsets()
         selected_docsets = await self._select_docsets(available_docsets)
-        telemetry.set("docsets_used", selected_docsets)
+        await telemetry.trace_code_event("docsets_used", selected_docsets)
 
         if not selected_docsets:
             log.info("No documentation selected for this task.")
@@ -50,7 +55,7 @@ async def run(self) -> AgentResponse:
 
         queries = await self._create_queries(selected_docsets)
         doc_snippets = await self._fetch_snippets(queries)
-        telemetry.set("doc_snippets_stored", len(doc_snippets))
+        await telemetry.trace_code_event("doc_snippets", {"num_stored": len(doc_snippets)})
 
         await self._store_docs(doc_snippets, available_docsets)
         return AgentResponse.done(self)
@@ -75,19 +80,19 @@ async def _select_docsets(self, available_docsets: list[tuple]) -> dict[str, str
             return {}
 
         llm = self.get_llm()
-        convo = AgentConvo(self).template(
-            "select_docset",
-            current_task=self.current_state.current_task,
-            available_docsets=available_docsets,
+        convo = (
+            AgentConvo(self)
+            .template(
+                "select_docset",
+                current_task=self.current_state.current_task,
+                available_docsets=available_docsets,
+            )
+            .require_schema(SelectedDocsets)
         )
-        await self.send_message("Determining if external documentation is needed...")
-        llm_response: str = await llm(convo)
+        await self.send_message("Determining if external documentation is needed for the next task...")
+        llm_response: SelectedDocsets = await llm(convo, parser=JSONParser(spec=SelectedDocsets))
         available_docsets = dict(available_docsets)
-        if llm_response.strip().lower() == "done":
-            return {}
-        else:
-            selected_keys = llm_response.splitlines()
-            return {k: available_docsets[k] for k in selected_keys}
+        return {k: available_docsets[k] for k in llm_response.docsets}
 
     async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]]:
         """Return queries we have to make to the docs API.
@@ -99,16 +104,18 @@ async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]]
         await self.send_message("Getting relevant documentation for the following topics:")
         for k, short_desc in docsets.items():
             llm = self.get_llm()
-            convo = AgentConvo(self).template(
-                "create_queries",
-                short_description=short_desc,
-                current_task=self.current_state.current_task,
+            convo = (
+                AgentConvo(self)
+                .template(
+                    "create_docs_queries",
+                    short_description=short_desc,
+                    current_task=self.current_state.current_task,
+                )
+                .require_schema(DocQueries)
             )
-            llm_response: str = await llm(convo)
-            if llm_response.strip().lower() == "done":
-                continue
-            else:
-                queries[k] = llm_response.splitlines()
+            llm_response: DocQueries = await llm(convo, parser=JSONParser(spec=DocQueries))
+            if llm_response.queries:
+                queries[k] = llm_response.queries
 
         return queries
 
diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py
index 0b67cd872..632015429 100644
--- a/core/agents/orchestrator.py
+++ b/core/agents/orchestrator.py
@@ -191,7 +191,7 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent:
             return TechLead(self.state_manager, self.ui, process_manager=self.process_manager)
 
         current_task_docs = state.current_task.get("docs") if state.current_task else None
-        if current_task_docs is None:
+        if state.current_task and current_task_docs is None:
             return ExternalDocumentation(self.state_manager, self.ui)
 
         # Current task status must be checked before Developer is called because we might want
diff --git a/core/prompts/developer/breakdown.prompt b/core/prompts/developer/breakdown.prompt
index 5131dce9d..80a41260f 100644
--- a/core/prompts/developer/breakdown.prompt
+++ b/core/prompts/developer/breakdown.prompt
@@ -19,20 +19,7 @@ You are currently working on task #{{ current_task_index + 1 }} with the followi
 
 Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task.
 
-{% if task.docs %}
-We have some some documentation snippets that might be helpful while working on this task, we will now list those.
-
-{% for d in task.docs %}
-Documentation snippets from {{ d.desc }}:
-{% for snippet in d.snippets %}
-{{ snippet }}
-
-{% endfor %}
-
-{% endfor %}
-
-This concludes the documentation snippets.
-{% endif %}
+{% include "partials/doc_snippets.prompt" %}
 
 **IMPORTANT**
 {%- if state.epics|length == 1 %}
diff --git a/core/prompts/external-docs/create_queries.prompt b/core/prompts/external-docs/create_docs_queries.prompt
similarity index 80%
rename from core/prompts/external-docs/create_queries.prompt
rename to core/prompts/external-docs/create_docs_queries.prompt
index 6b0763330..6023d26cd 100644
--- a/core/prompts/external-docs/create_queries.prompt
+++ b/core/prompts/external-docs/create_docs_queries.prompt
@@ -1,7 +1,9 @@
 {% include "partials/project_details.prompt" %}
 
 Here is the next task that needs to be implemented:
+```
 {{ current_task.description }}
+```
 
 Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second  item is the short description of what that documentation contains.
 Here's an example for React API documentation:
@@ -9,4 +11,4 @@ Here's an example for React API documentation:
 
 We have additional documentation from "{{ short_description }}" that might be useful for completing this task.
 
-Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics one item per line, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 5 topics you think would be most useful.
+Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics in JSON format, as a list of strings, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 5 topics you think would be most useful.
diff --git a/core/prompts/external-docs/select_docset.prompt b/core/prompts/external-docs/select_docset.prompt
index e43a418f6..8293c9080 100644
--- a/core/prompts/external-docs/select_docset.prompt
+++ b/core/prompts/external-docs/select_docset.prompt
@@ -12,4 +12,4 @@ Here is the list of available documentations:
 {{ docset[0], docset[1] }}
 {% endfor %}
 
-Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return ONLY the keys from the available documentation list, without any formatting like quotes and bullets, DO NOT return anything else. If you don't need any additional documentation, just respond with "DONE". Return only the documentation that is absolutely required for the given task.
+Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return only the documentation that is absolutely required for the given task. If there is no additional documentation in the list that you would like to use, return an empty list.
diff --git a/core/prompts/partials/doc_snippets.prompt b/core/prompts/partials/doc_snippets.prompt
new file mode 100644
index 000000000..0714f4d35
--- /dev/null
+++ b/core/prompts/partials/doc_snippets.prompt
@@ -0,0 +1,14 @@
+{% if task.docs %}
+We have some some documentation snippets that might be helpful while working on this task, we will now list those.
+
+---START_OF_DOCUMENTATION_SNIPPETS---
+{% for d in task.docs %}
+Documentation snippets from {{ d.desc }}:
+{% for snippet in d.snippets %}
+{{ snippet }}
+
+{% endfor %}
+
+{% endfor %}
+---END_OF_DOCUMENTATION_SNIPPETS---
+{% endif %}
diff --git a/core/prompts/troubleshooter/iteration.prompt b/core/prompts/troubleshooter/iteration.prompt
index f20b15489..d99c30999 100644
--- a/core/prompts/troubleshooter/iteration.prompt
+++ b/core/prompts/troubleshooter/iteration.prompt
@@ -39,6 +39,9 @@ Focus on solving this issue in the following way:
 {{ next_solution_to_try }}
 ```
 {% endif %}
+{% with task=current_task %}
+    {% include "partials/doc_snippets.prompt" %}
+{% endwith %}
 Now, you have to debug this issue and comply with the additional user feedback.
 
 **IMPORTANT**
diff --git a/tests/agents/test_external_docs.py b/tests/agents/test_external_docs.py
index f9d88621a..5fc2522f2 100644
--- a/tests/agents/test_external_docs.py
+++ b/tests/agents/test_external_docs.py
@@ -3,7 +3,7 @@
 import pytest
 from httpx import HTTPError
 
-from core.agents.external_docs import ExternalDocumentation
+from core.agents.external_docs import DocQueries, ExternalDocumentation, SelectedDocsets
 
 
 @pytest.mark.asyncio
@@ -14,7 +14,9 @@ async def test_stores_documentation_snippets_for_task(agentcontext):
     await sm.commit()
 
     ed = ExternalDocumentation(sm, ui)
-    ed.get_llm = mock_llm(side_effect=["vuejs-api-ref", "VueJS Options Rendering"])
+    ed.get_llm = mock_llm(
+        side_effect=[SelectedDocsets(docsets=["vuejs-api-ref"]), DocQueries(queries=["VueJS component model"])]
+    )
     await ed.run()
     assert ed.next_state.current_task["docs"][0]["key"] == "vuejs-api-ref"