From e18aa0169e80b8af9241e57f0ab66bf463ac8a4f Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Mon, 11 Nov 2024 09:46:21 +0800
Subject: [PATCH] refactor: improve RAG (#982)

- set max default_chuck_size to 2000
- set default rag_top_k to 4
- change reciprocal_rank_fusion weights
---
 config.example.yaml |  2 +-
 models.yaml         | 10 +++++-----
 src/config/mod.rs   |  2 +-
 src/rag/mod.rs      |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index b969a84b..706c8c6f 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -37,7 +37,7 @@ summary_prompt: 'This is a summary of the chat history as a recap: '
 # See [RAG-Guide](https://github.com/sigoden/aichat/wiki/RAG-Guide) for more details.
 rag_embedding_model: null                   # Specifies the embedding model to use
 rag_reranker_model: null                    # Specifies the rerank model to use
-rag_top_k: 4                                # Specifies the number of documents to retrieve
+rag_top_k: 5                                # Specifies the number of documents to retrieve
 rag_chunk_size: null                        # Specifies the chunk size
 rag_chunk_overlap: null                     # Specifies the chunk overlap
 rag_min_score_vector_search: 0              # Specifies the minimum relevance score for vector-based searching
diff --git a/models.yaml b/models.yaml
index de82a601..80e3fc7f 100644
--- a/models.yaml
+++ b/models.yaml
@@ -66,13 +66,13 @@
       type: embedding
       input_price: 0.13
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: text-embedding-3-small
       type: embedding
       input_price: 0.02
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
 
 # Links:
@@ -1004,12 +1004,12 @@
     - name: text-embedding-3-large
       type: embedding
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: text-embedding-3-small
       type: embedding
       max_tokens_per_chunk: 8191
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 100
     - name: meta-llama-3.1-405b-instruct
       max_input_tokens: 128000
@@ -1714,7 +1714,7 @@
       max_input_tokens: 120000
       input_price: 0.12
       max_tokens_per_chunk: 16000
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_batch_size: 128
     - name: rerank-2
       type: reranker
diff --git a/src/config/mod.rs b/src/config/mod.rs
index fa0882bf..24555888 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -186,7 +186,7 @@ impl Default for Config {
 
             rag_embedding_model: None,
             rag_reranker_model: None,
-            rag_top_k: 4,
+            rag_top_k: 5,
             rag_chunk_size: None,
             rag_chunk_overlap: None,
             rag_min_score_vector_search: 0.0,
diff --git a/src/rag/mod.rs b/src/rag/mod.rs
index 4ddb0da6..82da7c86 100644
--- a/src/rag/mod.rs
+++ b/src/rag/mod.rs
@@ -488,7 +488,7 @@ impl Rag {
             None => {
                 let ids = reciprocal_rank_fusion(
                     vec![vector_search_ids, keyword_search_ids],
-                    vec![1.0, 1.0],
+                    vec![1.125, 1.0],
                     top_k,
                 );
                 debug!("rrf_ids: {ids:?}");