From 80b62d50f5cc034c5df2dcd1f1b9c43da5b8289b Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:04:23 +0800 Subject: [PATCH] Fix/add es num_candidates (#9225) --- .../rag/datasource/vdb/elasticsearch/elasticsearch_vector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py index 8d578551209e09..f585e12b2e99c3 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py @@ -1,5 +1,6 @@ import json import logging +import math from typing import Any, Optional from urllib.parse import urlparse @@ -112,7 +113,8 @@ def delete(self) -> None: def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: top_k = kwargs.get("top_k", 10) - knn = {"field": Field.VECTOR.value, "query_vector": query_vector, "k": top_k} + num_candidates = math.ceil(top_k * 1.5) + knn = {"field": Field.VECTOR.value, "query_vector": query_vector, "k": top_k, "num_candidates": num_candidates} results = self._client.search(index=self._collection_name, knn=knn, size=top_k)