From becaef9b139e812e3d6e9b1b791babda4c4d9de1 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Tue, 24 Oct 2023 17:17:02 +0300 Subject: [PATCH 1/3] Add show_progress_bar flag and set to false --- pyproject.toml | 2 +- src/resin/knowledge_base/knowledge_base.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 069889a5..7bdab6d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ pinecone-client = "^2.2.2" python-dotenv = "^1.0.0" openai = "^0.27.5" tiktoken = "^0.3.3" -pinecone-datasets = "^0.6.1" +pinecone-datasets = "^0.6.2" pydantic = "^1.10.7" pinecone-text = { version = "^0.6.0", extras = ["openai"] } pandas-stubs = "^2.0.3.230814" diff --git a/src/resin/knowledge_base/knowledge_base.py b/src/resin/knowledge_base/knowledge_base.py index a6b68e59..24b4bdc3 100644 --- a/src/resin/knowledge_base/knowledge_base.py +++ b/src/resin/knowledge_base/knowledge_base.py @@ -468,7 +468,8 @@ def _query_index(self, def upsert(self, documents: List[Document], namespace: str = "", - batch_size: int = 100): + batch_size: int = 100, + show_progress_bar: bool = False): """ Upsert documents into the knowledge base. Upsert operation stands for "update or insert". @@ -551,7 +552,8 @@ def upsert(self, dataset.to_pinecone_index(self._index_name, namespace=namespace, should_create_index=False, - batch_size=batch_size) + batch_size=batch_size, + show_progress=show_progress_bar) def delete(self, document_ids: List[str], From 4f0095605ef9cd87c41940bf70175707cf763b43 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Tue, 24 Oct 2023 18:15:57 +0300 Subject: [PATCH 2/3] add to docstring --- src/resin/knowledge_base/knowledge_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/resin/knowledge_base/knowledge_base.py b/src/resin/knowledge_base/knowledge_base.py index 24b4bdc3..277c782b 100644 --- a/src/resin/knowledge_base/knowledge_base.py +++ b/src/resin/knowledge_base/knowledge_base.py @@ -487,6 +487,8 @@ def upsert(self, namespace: The namespace in the underlying index to upsert documents into. batch_size: Refers only to the actual upsert operation to the underlying index. The number of chunks (multiple piecies of text per document) to upsert in each batch. + Defaults to 100. + show_progress_bar: Whether to show a progress bar while upserting the documents. Returns: None From 49a92e990d4705abbe86ea9c777235d84008a2b9 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 18:48:20 +0300 Subject: [PATCH 3/3] lint --- src/resin/knowledge_base/knowledge_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resin/knowledge_base/knowledge_base.py b/src/resin/knowledge_base/knowledge_base.py index 277c782b..9cf14a01 100644 --- a/src/resin/knowledge_base/knowledge_base.py +++ b/src/resin/knowledge_base/knowledge_base.py @@ -554,7 +554,7 @@ def upsert(self, dataset.to_pinecone_index(self._index_name, namespace=namespace, should_create_index=False, - batch_size=batch_size, + batch_size=batch_size, show_progress=show_progress_bar) def delete(self,