diff --git a/fern/docs.yml b/fern/docs.yml index 9dae93401..e0a5c423b 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -74,14 +74,16 @@ navigation: path: ./docs/pages/ui/gradio.mdx - page: Alternatives path: ./docs/pages/ui/alternatives.mdx - # Small code snippet or example of usage to help users - tab: recipes layout: - - section: Choice of LLM + - section: Getting started + contents: + - page: Quickstart + path: ./docs/pages/recipes/quickstart.mdx + - section: General use cases contents: - # TODO: add recipes - - page: List of LLMs - path: ./docs/pages/recipes/list-llm.mdx + - page: Summarize + path: ./docs/pages/recipes/summarize.mdx # More advanced usage of PrivateGPT, by API - tab: api-reference layout: diff --git a/fern/docs/pages/recipes/list-llm.mdx b/fern/docs/pages/recipes/list-llm.mdx deleted file mode 100644 index 103867a05..000000000 --- a/fern/docs/pages/recipes/list-llm.mdx +++ /dev/null @@ -1,122 +0,0 @@ -# List of working LLM - -**Do you have any working combination of LLM and embeddings?** - -Please open a PR to add it to the list, and come on our Discord to tell us about it! - -## Prompt style - -LLMs might have been trained with different prompt styles. -The prompt style is the way the prompt is written, and how the system message is injected in the prompt. - -For example, `llama2` looks like this: -```text -[INST] <> -{{ system_prompt }} -<> - -{{ user_message }} [/INST] -``` - -While `default` (the `llama_index` default) looks like this: -```text -system: {{ system_prompt }} -user: {{ user_message }} -assistant: {{ assistant_message }} -``` - -The "`tag`" style looks like this: - -```text -<|system|>: {{ system_prompt }} -<|user|>: {{ user_message }} -<|assistant|>: {{ assistant_message }} -``` - -The "`mistral`" style looks like this: - -```text -[INST] You are an AI assistant. [/INST][INST] Hello, how are you doing? [/INST] -``` - -The "`chatml`" style looks like this: -```text -<|im_start|>system -{{ system_prompt }}<|im_end|> -<|im_start|>user" -{{ user_message }}<|im_end|> -<|im_start|>assistant -{{ assistant_message }} -``` - -Some LLMs will not understand these prompt styles, and will not work (returning nothing). -You can try to change the prompt style to `default` (or `tag`) in the settings, and it will -change the way the messages are formatted to be passed to the LLM. - -## Example of configuration - -You might want to change the prompt depending on the language and model you are using. - -### English, with instructions - -`settings-en.yaml`: -```yml -local: - llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF - llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf - embedding_hf_model_name: BAAI/bge-small-en-v1.5 - prompt_style: "llama2" -``` - -### French, with instructions - -`settings-fr.yaml`: -```yml -local: - llm_hf_repo_id: TheBloke/Vigogne-2-7B-Instruct-GGUF - llm_hf_model_file: vigogne-2-7b-instruct.Q4_K_M.gguf - embedding_hf_model_name: dangvantuan/sentence-camembert-base - prompt_style: "default" - # prompt_style: "tag" # also works - # The default system prompt is injected only when the `prompt_style` != default, and there are no system message in the discussion - # default_system_prompt: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français. -``` - -You might want to change the prompt as the one above might not directly answer your question. -You can read online about how to write a good prompt, but in a nutshell, make it (extremely) directive. - -You can try and troubleshot your prompt by writing multiline requests in the UI, while -writing your interaction with the model, for example: - -```text -Tu es un programmeur senior qui programme en python et utilise le framework fastapi. Ecrit moi un serveur qui retourne "hello world". -``` - -Another example: -```text -Context: None -Situation: tu es au milieu d'un champ. -Tache: va a la rivière, en bas du champ. -Décrit comment aller a la rivière. -``` - -### Optimised Models -GodziLLa2-70B LLM (English, rank 2 on HuggingFace OpenLLM Leaderboard), bge large Embedding Model (rank 1 on HuggingFace MTEB Leaderboard) -`settings-optimised.yaml`: -```yml -local: - llm_hf_repo_id: TheBloke/GodziLLa2-70B-GGUF - llm_hf_model_file: godzilla2-70b.Q4_K_M.gguf - embedding_hf_model_name: BAAI/bge-large-en - prompt_style: "llama2" -``` -### German speaking model -`settings-de.yaml`: -```yml -local: - llm_hf_repo_id: TheBloke/em_german_leo_mistral-GGUF - llm_hf_model_file: em_german_leo_mistral.Q4_K_M.gguf - embedding_hf_model_name: T-Systems-onsite/german-roberta-sentence-transformer-v2 - #llama, default or tag - prompt_style: "default" -``` diff --git a/fern/docs/pages/recipes/quickstart.mdx b/fern/docs/pages/recipes/quickstart.mdx new file mode 100644 index 000000000..a0f6c877a --- /dev/null +++ b/fern/docs/pages/recipes/quickstart.mdx @@ -0,0 +1,23 @@ +# Recipes + +Recipes are predefined use cases that help users solve very specific tasks using PrivateGPT. +They provide a streamlined approach to achieve common goals with the platform, offering both a starting point and inspiration for further exploration. +The main goal of Recipes is to empower the community to create and share solutions, expanding the capabilities of PrivateGPT. + +## How to Create a New Recipe + +1. **Identify the Task**: Define a specific task or problem that the Recipe will address. +2. **Develop the Solution**: Create a clear and concise guide, including any necessary code snippets or configurations. +3. **Submit a PR**: Fork the PrivateGPT repository, add your Recipe to the appropriate section, and submit a PR for review. + +We encourage you to be creative and think outside the box! Your contributions help shape the future of PrivateGPT. + +## Available Recipes + + + + diff --git a/fern/docs/pages/recipes/summarize.mdx b/fern/docs/pages/recipes/summarize.mdx new file mode 100644 index 000000000..99594bf1b --- /dev/null +++ b/fern/docs/pages/recipes/summarize.mdx @@ -0,0 +1,20 @@ +The Summarize Recipe provides a method to extract concise summaries from ingested documents or texts using PrivateGPT. +This tool is particularly useful for quickly understanding large volumes of information by distilling key points and main ideas. + +## Use Case + +The primary use case for the `Summarize` tool is to automate the summarization of lengthy documents, +making it easier for users to grasp the essential information without reading through entire texts. +This can be applied in various scenarios, such as summarizing research papers, news articles, or business reports. + +## Key Features + +1. **Ingestion-compatible**: The user provides the text to be summarized. The text can be directly inputted or retrieved from ingested documents within the system. +2. **Customization**: The summary generation can be influenced by providing specific `instructions` or a `prompt`. These inputs guide the model on how to frame the summary, allowing for customization according to user needs. +3. **Streaming Support**: The tool supports streaming, allowing for real-time summary generation, which can be particularly useful for handling large texts or providing immediate feedback. + +## Contributing + +If you have ideas for improving the Summarize or want to add new features, feel free to contribute! +You can submit your enhancements via a pull request on our [GitHub repository](https://github.com/zylon-ai/private-gpt). + diff --git a/fern/openapi/openapi.json b/fern/openapi/openapi.json index af1646f1f..c17c4d123 100644 --- a/fern/openapi/openapi.json +++ b/fern/openapi/openapi.json @@ -339,6 +339,48 @@ } } }, + "/v1/summarize": { + "post": { + "tags": [ + "Recipes" + ], + "summary": "Summarize", + "description": "Given a text, the model will return a summary.\n\nOptionally include `instructions` to influence the way the summary is generated.\n\nIf `use_context`\nis set to `true`, the model will also use the content coming from the ingested\ndocuments in the summary. The documents being used can\nbe filtered by their metadata using the `context_filter`.\nIngested documents metadata can be found using `/ingest/list` endpoint.\nIf you want all ingested documents to be used, remove `context_filter` altogether.\n\nIf `prompt` is set, it will be used as the prompt for the summarization,\notherwise the default prompt will be used.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```", + "operationId": "summarize_v1_summarize_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SummarizeBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SummarizeResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, "/v1/embeddings": { "post": { "tags": [ @@ -500,6 +542,10 @@ "Chunk": { "properties": { "object": { + "type": "string", + "enum": [ + "context.chunk" + ], "const": "context.chunk", "title": "Object" }, @@ -612,10 +658,18 @@ "ChunksResponse": { "properties": { "object": { + "type": "string", + "enum": [ + "list" + ], "const": "list", "title": "Object" }, "model": { + "type": "string", + "enum": [ + "private-gpt" + ], "const": "private-gpt", "title": "Model" }, @@ -728,6 +782,10 @@ "title": "Index" }, "object": { + "type": "string", + "enum": [ + "embedding" + ], "const": "embedding", "title": "Object" }, @@ -779,10 +837,18 @@ "EmbeddingsResponse": { "properties": { "object": { + "type": "string", + "enum": [ + "list" + ], "const": "list", "title": "Object" }, "model": { + "type": "string", + "enum": [ + "private-gpt" + ], "const": "private-gpt", "title": "Model" }, @@ -818,6 +884,10 @@ "HealthResponse": { "properties": { "status": { + "type": "string", + "enum": [ + "ok" + ], "const": "ok", "title": "Status", "default": "ok" @@ -829,10 +899,18 @@ "IngestResponse": { "properties": { "object": { + "type": "string", + "enum": [ + "list" + ], "const": "list", "title": "Object" }, "model": { + "type": "string", + "enum": [ + "private-gpt" + ], "const": "private-gpt", "title": "Model" }, @@ -879,6 +957,10 @@ "IngestedDoc": { "properties": { "object": { + "type": "string", + "enum": [ + "ingest.document" + ], "const": "ingest.document", "title": "Object" }, @@ -1001,6 +1083,10 @@ ] }, "model": { + "type": "string", + "enum": [ + "private-gpt" + ], "const": "private-gpt", "title": "Model" }, @@ -1074,6 +1160,78 @@ "title": "OpenAIMessage", "description": "Inference result, with the source of the message.\n\nRole could be the assistant or system\n(providing a default response, not AI generated)." }, + "SummarizeBody": { + "properties": { + "text": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Text" + }, + "use_context": { + "type": "boolean", + "title": "Use Context", + "default": false + }, + "context_filter": { + "anyOf": [ + { + "$ref": "#/components/schemas/ContextFilter" + }, + { + "type": "null" + } + ] + }, + "prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Prompt" + }, + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions" + }, + "stream": { + "type": "boolean", + "title": "Stream", + "default": false + } + }, + "type": "object", + "title": "SummarizeBody" + }, + "SummarizeResponse": { + "properties": { + "summary": { + "type": "string", + "title": "Summary" + } + }, + "type": "object", + "required": [ + "summary" + ], + "title": "SummarizeResponse" + }, "ValidationError": { "properties": { "loc": { diff --git a/private_gpt/launcher.py b/private_gpt/launcher.py index 2245d56e4..968baae46 100644 --- a/private_gpt/launcher.py +++ b/private_gpt/launcher.py @@ -15,6 +15,7 @@ from private_gpt.server.embeddings.embeddings_router import embeddings_router from private_gpt.server.health.health_router import health_router from private_gpt.server.ingest.ingest_router import ingest_router +from private_gpt.server.recipes.summarize.summarize_router import summarize_router from private_gpt.settings.settings import Settings logger = logging.getLogger(__name__) @@ -32,12 +33,13 @@ async def bind_injector_to_request(request: Request) -> None: app.include_router(chat_router) app.include_router(chunks_router) app.include_router(ingest_router) + app.include_router(summarize_router) app.include_router(embeddings_router) app.include_router(health_router) # Add LlamaIndex simple observability global_handler = create_global_handler("simple") - if global_handler is not None: + if global_handler: LlamaIndexSettings.callback_manager = CallbackManager([global_handler]) settings = root_injector.get(Settings) diff --git a/private_gpt/server/recipes/summarize/__init__.py b/private_gpt/server/recipes/summarize/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/private_gpt/server/recipes/summarize/summarize_router.py b/private_gpt/server/recipes/summarize/summarize_router.py new file mode 100644 index 000000000..c1770c3c0 --- /dev/null +++ b/private_gpt/server/recipes/summarize/summarize_router.py @@ -0,0 +1,86 @@ +from fastapi import APIRouter, Depends, Request +from pydantic import BaseModel +from starlette.responses import StreamingResponse + +from private_gpt.open_ai.extensions.context_filter import ContextFilter +from private_gpt.open_ai.openai_models import ( + to_openai_sse_stream, +) +from private_gpt.server.recipes.summarize.summarize_service import SummarizeService +from private_gpt.server.utils.auth import authenticated + +summarize_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) + + +class SummarizeBody(BaseModel): + text: str | None = None + use_context: bool = False + context_filter: ContextFilter | None = None + prompt: str | None = None + instructions: str | None = None + stream: bool = False + + +class SummarizeResponse(BaseModel): + summary: str + + +@summarize_router.post( + "/summarize", + response_model=None, + summary="Summarize", + responses={200: {"model": SummarizeResponse}}, + tags=["Recipes"], +) +def summarize( + request: Request, body: SummarizeBody +) -> SummarizeResponse | StreamingResponse: + """Given a text, the model will return a summary. + + Optionally include `instructions` to influence the way the summary is generated. + + If `use_context` + is set to `true`, the model will also use the content coming from the ingested + documents in the summary. The documents being used can + be filtered by their metadata using the `context_filter`. + Ingested documents metadata can be found using `/ingest/list` endpoint. + If you want all ingested documents to be used, remove `context_filter` altogether. + + If `prompt` is set, it will be used as the prompt for the summarization, + otherwise the default prompt will be used. + + When using `'stream': true`, the API will return data chunks following [OpenAI's + streaming model](https://platform.openai.com/docs/api-reference/chat/streaming): + ``` + {"id":"12345","object":"completion.chunk","created":1694268190, + "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"}, + "finish_reason":null}]} + ``` + """ + service: SummarizeService = request.state.injector.get(SummarizeService) + + if body.stream: + completion_gen = service.stream_summarize( + text=body.text, + instructions=body.instructions, + use_context=body.use_context, + context_filter=body.context_filter, + prompt=body.prompt, + ) + return StreamingResponse( + to_openai_sse_stream( + response_generator=completion_gen, + ), + media_type="text/event-stream", + ) + else: + completion = service.summarize( + text=body.text, + instructions=body.instructions, + use_context=body.use_context, + context_filter=body.context_filter, + prompt=body.prompt, + ) + return SummarizeResponse( + summary=completion, + ) diff --git a/private_gpt/server/recipes/summarize/summarize_service.py b/private_gpt/server/recipes/summarize/summarize_service.py new file mode 100644 index 000000000..4bfd18f57 --- /dev/null +++ b/private_gpt/server/recipes/summarize/summarize_service.py @@ -0,0 +1,172 @@ +from itertools import chain + +from injector import inject, singleton +from llama_index.core import ( + Document, + StorageContext, + SummaryIndex, +) +from llama_index.core.base.response.schema import Response, StreamingResponse +from llama_index.core.node_parser import SentenceSplitter +from llama_index.core.response_synthesizers import ResponseMode +from llama_index.core.storage.docstore.types import RefDocInfo +from llama_index.core.types import TokenGen + +from private_gpt.components.embedding.embedding_component import EmbeddingComponent +from private_gpt.components.llm.llm_component import LLMComponent +from private_gpt.components.node_store.node_store_component import NodeStoreComponent +from private_gpt.components.vector_store.vector_store_component import ( + VectorStoreComponent, +) +from private_gpt.open_ai.extensions.context_filter import ContextFilter +from private_gpt.settings.settings import Settings + +DEFAULT_SUMMARIZE_PROMPT = ( + "Provide a comprehensive summary of the provided context information. " + "The summary should cover all the key points and main ideas presented in " + "the original text, while also condensing the information into a concise " + "and easy-to-understand format. Please ensure that the summary includes " + "relevant details and examples that support the main ideas, while avoiding " + "any unnecessary information or repetition." +) + + +@singleton +class SummarizeService: + @inject + def __init__( + self, + settings: Settings, + llm_component: LLMComponent, + node_store_component: NodeStoreComponent, + vector_store_component: VectorStoreComponent, + embedding_component: EmbeddingComponent, + ) -> None: + self.settings = settings + self.llm_component = llm_component + self.node_store_component = node_store_component + self.vector_store_component = vector_store_component + self.embedding_component = embedding_component + self.storage_context = StorageContext.from_defaults( + vector_store=vector_store_component.vector_store, + docstore=node_store_component.doc_store, + index_store=node_store_component.index_store, + ) + + @staticmethod + def _filter_ref_docs( + ref_docs: dict[str, RefDocInfo], context_filter: ContextFilter | None + ) -> list[RefDocInfo]: + if context_filter is None or not context_filter.docs_ids: + return list(ref_docs.values()) + + return [ + ref_doc + for doc_id, ref_doc in ref_docs.items() + if doc_id in context_filter.docs_ids + ] + + def _summarize( + self, + use_context: bool = False, + stream: bool = False, + text: str | None = None, + instructions: str | None = None, + context_filter: ContextFilter | None = None, + prompt: str | None = None, + ) -> str | TokenGen: + + nodes_to_summarize = [] + + # Add text to summarize + if text: + text_documents = [Document(text=text)] + nodes_to_summarize += ( + SentenceSplitter.from_defaults().get_nodes_from_documents( + text_documents + ) + ) + + # Add context documents to summarize + if use_context: + # 1. Recover all ref docs + ref_docs: dict[ + str, RefDocInfo + ] | None = self.storage_context.docstore.get_all_ref_doc_info() + if ref_docs is None: + raise ValueError("No documents have been ingested yet.") + + # 2. Filter documents based on context_filter (if provided) + filtered_ref_docs = self._filter_ref_docs(ref_docs, context_filter) + + # 3. Get all nodes from the filtered documents + filtered_node_ids = chain.from_iterable( + [ref_doc.node_ids for ref_doc in filtered_ref_docs] + ) + filtered_nodes = self.storage_context.docstore.get_nodes( + node_ids=list(filtered_node_ids), + ) + + nodes_to_summarize += filtered_nodes + + # Create a SummaryIndex to summarize the nodes + summary_index = SummaryIndex( + nodes=nodes_to_summarize, + storage_context=StorageContext.from_defaults(), # In memory SummaryIndex + show_progress=True, + ) + + # Make a tree summarization query + # above the set of all candidate nodes + query_engine = summary_index.as_query_engine( + llm=self.llm_component.llm, + response_mode=ResponseMode.TREE_SUMMARIZE, + streaming=stream, + use_async=self.settings.summarize.use_async, + ) + + prompt = prompt or DEFAULT_SUMMARIZE_PROMPT + + summarize_query = prompt + "\n" + (instructions or "") + + response = query_engine.query(summarize_query) + if isinstance(response, Response): + return response.response or "" + elif isinstance(response, StreamingResponse): + return response.response_gen + else: + raise TypeError(f"The result is not of a supported type: {type(response)}") + + def summarize( + self, + use_context: bool = False, + text: str | None = None, + instructions: str | None = None, + context_filter: ContextFilter | None = None, + prompt: str | None = None, + ) -> str: + return self._summarize( + use_context=use_context, + stream=False, + text=text, + instructions=instructions, + context_filter=context_filter, + prompt=prompt, + ) # type: ignore + + def stream_summarize( + self, + use_context: bool = False, + text: str | None = None, + instructions: str | None = None, + context_filter: ContextFilter | None = None, + prompt: str | None = None, + ) -> TokenGen: + return self._summarize( + use_context=use_context, + stream=True, + text=text, + instructions=instructions, + context_filter=context_filter, + prompt=prompt, + ) # type: ignore diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 8ed7a5a8c..c968f8088 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -353,6 +353,10 @@ class UISettings(BaseModel): default_query_system_prompt: str = Field( None, description="The default system prompt to use for the query mode." ) + default_summarization_system_prompt: str = Field( + None, + description="The default system prompt to use for the summarization mode.", + ) delete_file_button_enabled: bool = Field( True, description="If the button to delete a file is enabled or not." ) @@ -388,6 +392,13 @@ class RagSettings(BaseModel): rerank: RerankSettings +class SummarizeSettings(BaseModel): + use_async: bool = Field( + True, + description="If set to True, the summarization will be done asynchronously.", + ) + + class ClickHouseSettings(BaseModel): host: str = Field( "localhost", @@ -577,6 +588,7 @@ class Settings(BaseModel): vectorstore: VectorstoreSettings nodestore: NodeStoreSettings rag: RagSettings + summarize: SummarizeSettings qdrant: QdrantSettings | None = None postgres: PostgresSettings | None = None clickhouse: ClickHouseSettings | None = None diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index d621bd283..0bf06d193 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -3,6 +3,7 @@ import logging import time from collections.abc import Iterable +from enum import Enum from pathlib import Path from typing import Any @@ -11,6 +12,7 @@ from gradio.themes.utils.colors import slate # type: ignore from injector import inject, singleton from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole +from llama_index.core.types import TokenGen from pydantic import BaseModel from private_gpt.constants import PROJECT_ROOT_PATH @@ -19,6 +21,7 @@ from private_gpt.server.chat.chat_service import ChatService, CompletionGen from private_gpt.server.chunks.chunks_service import Chunk, ChunksService from private_gpt.server.ingest.ingest_service import IngestService +from private_gpt.server.recipes.summarize.summarize_service import SummarizeService from private_gpt.settings.settings import settings from private_gpt.ui.images import logo_svg @@ -32,7 +35,20 @@ SOURCES_SEPARATOR = "
Sources: \n" -MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"] + +class Modes(str, Enum): + RAG_MODE = "RAG" + SEARCH_MODE = "Search" + BASIC_CHAT_MODE = "Basic" + SUMMARIZE_MODE = "Summarize" + + +MODES: list[Modes] = [ + Modes.RAG_MODE, + Modes.SEARCH_MODE, + Modes.BASIC_CHAT_MODE, + Modes.SUMMARIZE_MODE, +] class Source(BaseModel): @@ -70,10 +86,12 @@ def __init__( ingest_service: IngestService, chat_service: ChatService, chunks_service: ChunksService, + summarizeService: SummarizeService, ) -> None: self._ingest_service = ingest_service self._chat_service = chat_service self._chunks_service = chunks_service + self._summarize_service = summarizeService # Cache the UI blocks self._ui_block = None @@ -84,7 +102,9 @@ def __init__( self.mode = MODES[0] self._system_prompt = self._get_default_system_prompt(self.mode) - def _chat(self, message: str, history: list[list[str]], mode: str, *_: Any) -> Any: + def _chat( + self, message: str, history: list[list[str]], mode: Modes, *_: Any + ) -> Any: def yield_deltas(completion_gen: CompletionGen) -> Iterable[str]: full_response: str = "" stream = completion_gen.response @@ -112,6 +132,12 @@ def yield_deltas(completion_gen: CompletionGen) -> Iterable[str]: full_response += sources_text yield full_response + def yield_tokens(token_gen: TokenGen) -> Iterable[str]: + full_response: str = "" + for token in token_gen: + full_response += str(token) + yield full_response + def build_history() -> list[ChatMessage]: history_messages: list[ChatMessage] = [] @@ -143,8 +169,7 @@ def build_history() -> list[ChatMessage]: ), ) match mode: - case "Query Files": - + case Modes.RAG_MODE: # Use only the selected file for the query context_filter = None if self._selected_filename is not None: @@ -163,14 +188,14 @@ def build_history() -> list[ChatMessage]: context_filter=context_filter, ) yield from yield_deltas(query_stream) - case "LLM Chat (no context from files)": + case Modes.BASIC_CHAT_MODE: llm_stream = self._chat_service.stream_chat( messages=all_messages, use_context=False, ) yield from yield_deltas(llm_stream) - case "Search Files": + case Modes.SEARCH_MODE: response = self._chunks_service.retrieve_relevant( text=message, limit=4, prev_next_chunks=0 ) @@ -183,37 +208,76 @@ def build_history() -> list[ChatMessage]: f"{source.text}" for index, source in enumerate(sources, start=1) ) + case Modes.SUMMARIZE_MODE: + # Summarize the given message, optionally using selected files + context_filter = None + if self._selected_filename: + docs_ids = [] + for ingested_document in self._ingest_service.list_ingested(): + if ( + ingested_document.doc_metadata["file_name"] + == self._selected_filename + ): + docs_ids.append(ingested_document.doc_id) + context_filter = ContextFilter(docs_ids=docs_ids) + + summary_stream = self._summarize_service.stream_summarize( + use_context=True, + context_filter=context_filter, + instructions=message, + ) + yield from yield_tokens(summary_stream) # On initialization and on mode change, this function set the system prompt # to the default prompt based on the mode (and user settings). @staticmethod - def _get_default_system_prompt(mode: str) -> str: + def _get_default_system_prompt(mode: Modes) -> str: p = "" match mode: # For query chat mode, obtain default system prompt from settings - case "Query Files": + case Modes.RAG_MODE: p = settings().ui.default_query_system_prompt # For chat mode, obtain default system prompt from settings - case "LLM Chat (no context from files)": + case Modes.BASIC_CHAT_MODE: p = settings().ui.default_chat_system_prompt + # For summarization mode, obtain default system prompt from settings + case Modes.SUMMARIZE_MODE: + p = settings().ui.default_summarization_system_prompt # For any other mode, clear the system prompt case _: p = "" return p + @staticmethod + def _get_default_mode_explanation(mode: Modes) -> str: + match mode: + case Modes.RAG_MODE: + return "Get contextualized answers from selected files." + case Modes.SEARCH_MODE: + return "Find relevant chunks of text in selected files." + case Modes.BASIC_CHAT_MODE: + return "Chat with the LLM using its training data. Files are ignored." + case Modes.SUMMARIZE_MODE: + return "Generate a summary of the selected files. Prompt to customize the result." + case _: + return "" + def _set_system_prompt(self, system_prompt_input: str) -> None: logger.info(f"Setting system prompt to: {system_prompt_input}") self._system_prompt = system_prompt_input - def _set_current_mode(self, mode: str) -> Any: + def _set_explanatation_mode(self, explanation_mode: str) -> None: + self._explanation_mode = explanation_mode + + def _set_current_mode(self, mode: Modes) -> Any: self.mode = mode self._set_system_prompt(self._get_default_system_prompt(mode)) - # Update placeholder and allow interaction if default system prompt is set - if self._system_prompt: - return gr.update(placeholder=self._system_prompt, interactive=True) - # Update placeholder and disable interaction if no default system prompt is set - else: - return gr.update(placeholder=self._system_prompt, interactive=False) + self._set_explanatation_mode(self._get_default_mode_explanation(mode)) + interactive = self._system_prompt is not None + return [ + gr.update(placeholder=self._system_prompt, interactive=interactive), + gr.update(value=self._explanation_mode), + ] def _list_ingested_files(self) -> list[list[str]]: files = set() @@ -326,10 +390,17 @@ def _build_ui_blocks(self) -> gr.Blocks: with gr.Row(equal_height=False): with gr.Column(scale=3): + default_mode = MODES[0] mode = gr.Radio( - MODES, + [mode.value for mode in MODES], label="Mode", - value="Query Files", + value=default_mode, + ) + explanation_mode = gr.Textbox( + placeholder=self._get_default_mode_explanation(default_mode), + show_label=False, + max_lines=3, + interactive=False, ) upload_button = gr.components.UploadButton( "Upload File(s)", @@ -413,9 +484,11 @@ def _build_ui_blocks(self) -> gr.Blocks: interactive=True, render=False, ) - # When mode changes, set default system prompt + # When mode changes, set default system prompt, and other stuffs mode.change( - self._set_current_mode, inputs=mode, outputs=system_prompt_input + self._set_current_mode, + inputs=mode, + outputs=[system_prompt_input, explanation_mode], ) # On blur, set system prompt to use in queries system_prompt_input.blur( diff --git a/settings.yaml b/settings.yaml index 2c68bd6bb..a890733e8 100644 --- a/settings.yaml +++ b/settings.yaml @@ -34,6 +34,13 @@ ui: You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided. + default_summarization_system_prompt: > + Provide a comprehensive summary of the provided context information. + The summary should cover all the key points and main ideas presented in + the original text, while also condensing the information into a concise + and easy-to-understand format. Please ensure that the summary includes + relevant details and examples that support the main ideas, while avoiding + any unnecessary information or repetition. delete_file_button_enabled: true delete_all_files_button_enabled: true @@ -57,6 +64,9 @@ rag: model: cross-encoder/ms-marco-MiniLM-L-2-v2 top_n: 1 +summarize: + use_async: true + clickhouse: host: localhost port: 8443 diff --git a/tests/server/recipes/test_summarize_router.py b/tests/server/recipes/test_summarize_router.py new file mode 100644 index 000000000..8b647b07e --- /dev/null +++ b/tests/server/recipes/test_summarize_router.py @@ -0,0 +1,159 @@ +from fastapi.testclient import TestClient + +from private_gpt.server.recipes.summarize.summarize_router import ( + SummarizeBody, + SummarizeResponse, +) + + +def test_summarize_route_produces_a_stream(test_client: TestClient) -> None: + body = SummarizeBody( + text="Test", + stream=True, + ) + response = test_client.post("/v1/summarize", json=body.model_dump()) + + raw_events = response.text.split("\n\n") + events = [ + item.removeprefix("data: ") for item in raw_events if item.startswith("data: ") + ] + assert response.status_code == 200 + assert "text/event-stream" in response.headers["content-type"] + assert len(events) > 0 + assert events[-1] == "[DONE]" + + +def test_summarize_route_produces_a_single_value(test_client: TestClient) -> None: + body = SummarizeBody( + text="test", + stream=False, + ) + response = test_client.post("/v1/summarize", json=body.model_dump()) + + # No asserts, if it validates it's good + SummarizeResponse.model_validate(response.json()) + assert response.status_code == 200 + + +def test_summarize_with_document_context(test_client: TestClient) -> None: + # Ingest an document + ingest_response = test_client.post( + "/v1/ingest/text", + json={ + "file_name": "file_name", + "text": "Lorem ipsum dolor sit amet", + }, + ) + assert ingest_response.status_code == 200 + ingested_docs = ingest_response.json()["data"] + assert len(ingested_docs) == 1 + + body = SummarizeBody( + use_context=True, + context_filter={"docs_ids": [doc["doc_id"] for doc in ingested_docs]}, + stream=False, + ) + response = test_client.post("/v1/summarize", json=body.model_dump()) + + completion: SummarizeResponse = SummarizeResponse.model_validate(response.json()) + assert response.status_code == 200 + # We can check the content of the completion, because mock LLM used in tests + # always echoes the prompt. In the case of summary, the input context is passed. + assert completion.summary.find("Lorem ipsum dolor sit amet") != -1 + + +def test_summarize_with_non_existent_document_context_not_fails( + test_client: TestClient, +) -> None: + body = SummarizeBody( + use_context=True, + context_filter={ + "docs_ids": ["non-existent-doc-id"], + }, + stream=False, + ) + + response = test_client.post("/v1/summarize", json=body.model_dump()) + + completion: SummarizeResponse = SummarizeResponse.model_validate(response.json()) + assert response.status_code == 200 + # We can check the content of the completion, because mock LLM used in tests + # always echoes the prompt. In the case of summary, the input context is passed. + assert completion.summary.find("Empty Response") != -1 + + +def test_summarize_with_metadata_and_document_context(test_client: TestClient) -> None: + docs = [] + + # Ingest a first document + document_1_content = "Content of document 1" + ingest_response = test_client.post( + "/v1/ingest/text", + json={ + "file_name": "file_name_1", + "text": document_1_content, + }, + ) + assert ingest_response.status_code == 200 + ingested_docs = ingest_response.json()["data"] + assert len(ingested_docs) == 1 + docs += ingested_docs + + # Ingest a second document + document_2_content = "Text of document 2" + ingest_response = test_client.post( + "/v1/ingest/text", + json={ + "file_name": "file_name_2", + "text": document_2_content, + }, + ) + assert ingest_response.status_code == 200 + ingested_docs = ingest_response.json()["data"] + assert len(ingested_docs) == 1 + docs += ingested_docs + + # Completions with the first document's id and the second document's metadata + body = SummarizeBody( + use_context=True, + context_filter={"docs_ids": [doc["doc_id"] for doc in docs]}, + stream=False, + ) + response = test_client.post("/v1/summarize", json=body.model_dump()) + + completion: SummarizeResponse = SummarizeResponse.model_validate(response.json()) + assert response.status_code == 200 + # Assert both documents are part of the used sources + # We can check the content of the completion, because mock LLM used in tests + # always echoes the prompt. In the case of summary, the input context is passed. + assert completion.summary.find(document_1_content) != -1 + assert completion.summary.find(document_2_content) != -1 + + +def test_summarize_with_prompt(test_client: TestClient) -> None: + ingest_response = test_client.post( + "/v1/ingest/text", + json={ + "file_name": "file_name", + "text": "Lorem ipsum dolor sit amet", + }, + ) + assert ingest_response.status_code == 200 + ingested_docs = ingest_response.json()["data"] + assert len(ingested_docs) == 1 + + body = SummarizeBody( + use_context=True, + context_filter={ + "docs_ids": [doc["doc_id"] for doc in ingested_docs], + }, + prompt="This is a custom summary prompt, 54321", + stream=False, + ) + response = test_client.post("/v1/summarize", json=body.model_dump()) + + completion: SummarizeResponse = SummarizeResponse.model_validate(response.json()) + assert response.status_code == 200 + # We can check the content of the completion, because mock LLM used in tests + # always echoes the prompt. In the case of summary, the input context is passed. + assert completion.summary.find("This is a custom summary prompt, 54321") != -1