From e18aa0169e80b8af9241e57f0ab66bf463ac8a4f Mon Sep 17 00:00:00 2001 From: sigoden Date: Mon, 11 Nov 2024 09:46:21 +0800 Subject: [PATCH] refactor: improve RAG (#982) - set max default_chuck_size to 2000 - set default rag_top_k to 4 - change reciprocal_rank_fusion weights --- config.example.yaml | 2 +- models.yaml | 10 +++++----- src/config/mod.rs | 2 +- src/rag/mod.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index b969a84b..706c8c6f 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -37,7 +37,7 @@ summary_prompt: 'This is a summary of the chat history as a recap: ' # See [RAG-Guide](https://github.com/sigoden/aichat/wiki/RAG-Guide) for more details. rag_embedding_model: null # Specifies the embedding model to use rag_reranker_model: null # Specifies the rerank model to use -rag_top_k: 4 # Specifies the number of documents to retrieve +rag_top_k: 5 # Specifies the number of documents to retrieve rag_chunk_size: null # Specifies the chunk size rag_chunk_overlap: null # Specifies the chunk overlap rag_min_score_vector_search: 0 # Specifies the minimum relevance score for vector-based searching diff --git a/models.yaml b/models.yaml index de82a601..80e3fc7f 100644 --- a/models.yaml +++ b/models.yaml @@ -66,13 +66,13 @@ type: embedding input_price: 0.13 max_tokens_per_chunk: 8191 - default_chunk_size: 3000 + default_chunk_size: 2000 max_batch_size: 100 - name: text-embedding-3-small type: embedding input_price: 0.02 max_tokens_per_chunk: 8191 - default_chunk_size: 3000 + default_chunk_size: 2000 max_batch_size: 100 # Links: @@ -1004,12 +1004,12 @@ - name: text-embedding-3-large type: embedding max_tokens_per_chunk: 8191 - default_chunk_size: 3000 + default_chunk_size: 2000 max_batch_size: 100 - name: text-embedding-3-small type: embedding max_tokens_per_chunk: 8191 - default_chunk_size: 3000 + default_chunk_size: 2000 max_batch_size: 100 - name: meta-llama-3.1-405b-instruct max_input_tokens: 128000 @@ -1714,7 +1714,7 @@ max_input_tokens: 120000 input_price: 0.12 max_tokens_per_chunk: 16000 - default_chunk_size: 3000 + default_chunk_size: 2000 max_batch_size: 128 - name: rerank-2 type: reranker diff --git a/src/config/mod.rs b/src/config/mod.rs index fa0882bf..24555888 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -186,7 +186,7 @@ impl Default for Config { rag_embedding_model: None, rag_reranker_model: None, - rag_top_k: 4, + rag_top_k: 5, rag_chunk_size: None, rag_chunk_overlap: None, rag_min_score_vector_search: 0.0, diff --git a/src/rag/mod.rs b/src/rag/mod.rs index 4ddb0da6..82da7c86 100644 --- a/src/rag/mod.rs +++ b/src/rag/mod.rs @@ -488,7 +488,7 @@ impl Rag { None => { let ids = reciprocal_rank_fusion( vec![vector_search_ids, keyword_search_ids], - vec![1.0, 1.0], + vec![1.125, 1.0], top_k, ); debug!("rrf_ids: {ids:?}");