diff --git a/pyproject.toml b/pyproject.toml index 7e6f5841..64daa0d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ pinecone-client = "^2.2.2" python-dotenv = "^1.0.0" openai = "^0.27.5" tiktoken = "^0.3.3" -pinecone-datasets = "^0.6.1" +pinecone-datasets = "^0.6.2" pydantic = "^1.10.7" pinecone-text = { version = "^0.6.0", extras = ["openai"] } pandas-stubs = "^2.0.3.230814" diff --git a/src/resin/knowledge_base/knowledge_base.py b/src/resin/knowledge_base/knowledge_base.py index 91239c6a..13a2f76c 100644 --- a/src/resin/knowledge_base/knowledge_base.py +++ b/src/resin/knowledge_base/knowledge_base.py @@ -468,7 +468,8 @@ def _query_index(self, def upsert(self, documents: List[Document], namespace: str = "", - batch_size: int = 100): + batch_size: int = 100, + show_progress_bar: bool = False): """ Upsert documents into the knowledge base. Upsert operation stands for "update or insert". @@ -486,6 +487,8 @@ def upsert(self, namespace: The namespace in the underlying index to upsert documents into. batch_size: Refers only to the actual upsert operation to the underlying index. The number of chunks (multiple piecies of text per document) to upsert in each batch. + Defaults to 100. + show_progress_bar: Whether to show a progress bar while upserting the documents. Returns: None @@ -551,7 +554,8 @@ def upsert(self, dataset.to_pinecone_index(self._index_name, namespace=namespace, should_create_index=False, - batch_size=batch_size) + batch_size=batch_size, + show_progress=show_progress_bar) def delete(self, document_ids: List[str],