Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Merge branch 'dev' into initial-lib-readme
Browse files Browse the repository at this point in the history
  • Loading branch information
acatav authored Oct 24, 2023
2 parents 9561db1 + 80a9237 commit bcaf26a
Show file tree
Hide file tree
Showing 42 changed files with 572 additions and 279 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/pre-release-CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: Pre Release CI

on:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:git
build-and-test:
name: Build & Test on ${{ matrix.os }}-py${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [3.9, '3.10', 3.11]
defaults:
run:
shell: bash

steps:
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.3.2

- name: Build wheel
run: |
poetry build
- name: Install the wheel
run: |
pip install dist/pinecone_resin*.whl
- name: Create dev requirements file
run: |
poetry export -f requirements.txt --without-hashes --only dev -o only-dev.txt
- name: Install dev requirements
run: |
pip install -r only-dev.txt
- name: Run tests
run: pytest --html=report.html --self-contained-html tests/unit

- name: Upload pytest reports
if: always()
uses: actions/upload-artifact@v3
with:
name: pytest-report-${{ matrix.os }}-py${{ matrix.python-version }}
path: .pytest_cache

3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ tiktoken = "^0.3.3"
pinecone-datasets = "^0.6.1"
pydantic = "^1.10.7"
pinecone-text = { version = "^0.6.0", extras = ["openai"] }
flake8-pyproject = "^1.2.3"
pandas-stubs = "^2.0.3.230814"
langchain = "^0.0.188"
fastapi = "^0.92.0"
uvicorn = "^0.20.0"
tenacity = "^8.2.1"
sse-starlette = "^1.6.5"
types-tqdm = "^4.61.0"


[tool.poetry.group.dev.dependencies]
Expand Down
4 changes: 4 additions & 0 deletions src/resin/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import importlib.metadata

# Taken from https://stackoverflow.com/a/67097076
__version__ = importlib.metadata.version("pinecone-resin")
2 changes: 1 addition & 1 deletion src/resin/context_engine/context_builder/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from resin.knoweldge_base.models import QueryResult
from resin.knowledge_base.models import QueryResult
from resin.models.data_models import Context
from resin.utils.config import ConfigurableMixin

Expand Down
2 changes: 1 addition & 1 deletion src/resin/context_engine/context_builder/stuffing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from resin.context_engine.context_builder.base import ContextBuilder
from resin.context_engine.models import ContextQueryResult, ContextSnippet
from resin.knoweldge_base.models import QueryResult, DocumentWithScore
from resin.knowledge_base.models import QueryResult, DocumentWithScore
from resin.tokenizer import Tokenizer
from resin.models.data_models import Context

Expand Down
4 changes: 2 additions & 2 deletions src/resin/context_engine/context_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from resin.context_engine.context_builder import StuffingContextBuilder
from resin.context_engine.context_builder.base import ContextBuilder
from resin.knoweldge_base import KnowledgeBase
from resin.knoweldge_base.base import BaseKnowledgeBase
from resin.knowledge_base import KnowledgeBase
from resin.knowledge_base.base import BaseKnowledgeBase
from resin.models.data_models import Context, Query
from resin.utils.config import ConfigurableMixin

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import List, Optional

from resin.knoweldge_base.models import QueryResult
from resin.knowledge_base.models import QueryResult
from resin.models.data_models import Query, Document
from resin.utils.config import ConfigurableMixin

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from resin.knoweldge_base.models import KBDocChunk
from resin.knowledge_base.models import KBDocChunk
from resin.models.data_models import Document
from resin.utils.config import ConfigurableMixin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .langchain_text_splitter import Language, RecursiveCharacterTextSplitter
from .recursive_character import RecursiveCharacterChunker
from resin.knoweldge_base.models import KBDocChunk
from resin.knowledge_base.models import KBDocChunk
from resin.models.data_models import Document


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from .langchain_text_splitter import RecursiveCharacterTextSplitter

from resin.knoweldge_base.chunker.base import Chunker
from resin.knoweldge_base.models import KBDocChunk
from resin.knowledge_base.chunker.base import Chunker
from resin.knowledge_base.models import KBDocChunk
from resin.tokenizer import Tokenizer
from resin.models.data_models import Document

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
from pinecone import list_indexes, delete_index, create_index, init \
as pinecone_init, whoami as pinecone_whoami
from pinecone import ApiException as PineconeApiException

try:
from pinecone import GRPCIndex as Index
Expand All @@ -15,13 +16,13 @@
from pinecone_datasets import Dataset
from pinecone_datasets import DenseModelMetadata, DatasetMetadata

from resin.knoweldge_base.base import BaseKnowledgeBase
from resin.knoweldge_base.chunker import Chunker, MarkdownChunker
from resin.knoweldge_base.record_encoder import (RecordEncoder,
from resin.knowledge_base.base import BaseKnowledgeBase
from resin.knowledge_base.chunker import Chunker, MarkdownChunker
from resin.knowledge_base.record_encoder import (RecordEncoder,
OpenAIRecordEncoder)
from resin.knoweldge_base.models import (KBQueryResult, KBQuery, QueryResult,
from resin.knowledge_base.models import (KBQueryResult, KBQuery, QueryResult,
KBDocChunkWithScore, DocumentWithScore)
from resin.knoweldge_base.reranker import Reranker, TransparentReranker
from resin.knowledge_base.reranker import Reranker, TransparentReranker
from resin.models.data_models import Query, Document


Expand Down Expand Up @@ -52,7 +53,7 @@ class KnowledgeBase(BaseKnowledgeBase):
This is a one-time setup process - the index will exist on Pinecone's managed service until it is deleted.
Example:
>>> from resin.knoweldge_base.knowledge_base import KnowledgeBase
>>> from resin.knowledge_base.knowledge_base import KnowledgeBase
>>> from tokenizer import Tokenizer
>>> Tokenizer.initialize()
>>> kb = KnowledgeBase(index_name="my_index")
Expand Down Expand Up @@ -89,7 +90,7 @@ def __init__(self,
Example:
create a new index:
>>> from resin.knoweldge_base.knowledge_base import KnowledgeBase
>>> from resin.knowledge_base.knowledge_base import KnowledgeBase
>>> from tokenizer import Tokenizer
>>> Tokenizer.initialize()
>>> kb = KnowledgeBase(index_name="my_index")
Expand Down Expand Up @@ -168,7 +169,7 @@ def _connect_pinecone():

def _connect_index(self,
connect_pinecone: bool = True
) -> Index:
) -> None:
if connect_pinecone:
self._connect_pinecone()

Expand All @@ -180,13 +181,14 @@ def _connect_index(self,
)

try:
index = Index(index_name=self.index_name)
self._index = Index(index_name=self.index_name)
self.verify_index_connection()
except Exception as e:
self._index = None
raise RuntimeError(
f"Unexpected error while connecting to index {self.index_name}. "
f"Please check your credentials and try again."
) from e
return index

@property
def _connection_error_msg(self) -> str:
Expand All @@ -210,8 +212,7 @@ def connect(self) -> None:
RuntimeError: If the knowledge base failed to connect to the underlying Pinecone index.
""" # noqa: E501
if self._index is None:
self._index = self._connect_index()
self.verify_index_connection()
self._connect_index()

def verify_index_connection(self) -> None:
"""
Expand Down Expand Up @@ -282,8 +283,15 @@ def create_resin_index(self,
"Please remove it from indexed_fields")

if dimension is None:
if self._encoder.dimension is not None:
dimension = self._encoder.dimension
try:
encoder_dimension = self._encoder.dimension
except Exception as e:
raise RuntimeError(
f"Failed to infer vectors' dimension from encoder due to error: "
f"{e}. Please fix the error or provide the dimension manually"
) from e
if encoder_dimension is not None:
dimension = encoder_dimension
else:
raise ValueError("Could not infer dimension from encoder. "
"Please provide the vectors' dimension")
Expand All @@ -307,10 +315,10 @@ def create_resin_index(self,
},
timeout=TIMEOUT_INDEX_CREATE,
**index_params)
except Exception as e:
except (Exception, PineconeApiException) as e:
raise RuntimeError(
f"Unexpected error while creating index {self.index_name}."
f"Please try again."
f"Failed to create index {self.index_name} due to error: "
f"{e.body if isinstance(e, PineconeApiException) else e}"
) from e

# wait for index to be provisioned
Expand All @@ -320,7 +328,7 @@ def _wait_for_index_provision(self):
start_time = time.time()
while True:
try:
self._index = self._connect_index(connect_pinecone=False)
self._connect_index(connect_pinecone=False)
break
except RuntimeError:
pass
Expand Down Expand Up @@ -387,7 +395,7 @@ def query(self,
A list of QueryResult objects.
Examples:
>>> from resin.knoweldge_base.knowledge_base import KnowledgeBase
>>> from resin.knowledge_base.knowledge_base import KnowledgeBase
>>> from tokenizer import Tokenizer
>>> Tokenizer.initialize()
>>> kb = KnowledgeBase(index_name="my_index")
Expand Down Expand Up @@ -432,13 +440,16 @@ def _query_index(self,
metadata_filter.update(global_metadata_filter)
top_k = query.top_k if query.top_k else self._default_top_k

query_params = deepcopy(query.query_params)
_check_return_type = query.query_params.pop('_check_return_type', False)
result = self._index.query(vector=query.values,
sparse_vector=query.sparse_values,
top_k=top_k,
namespace=query.namespace,
metadata_filter=metadata_filter,
include_metadata=True,
**query.query_params)
_check_return_type=_check_return_type,
**query_params)
documents: List[KBDocChunkWithScore] = []
for match in result['matches']:
metadata = match['metadata']
Expand Down Expand Up @@ -480,7 +491,7 @@ def upsert(self,
None
Example:
>>> from resin.knoweldge_base.knowledge_base import KnowledgeBase
>>> from resin.knowledge_base.knowledge_base import KnowledgeBase
>>> from tokenizer import Tokenizer
>>> Tokenizer.initialize()
>>> kb = KnowledgeBase(index_name="my_index")
Expand Down Expand Up @@ -558,7 +569,7 @@ def delete(self,
None
Example:
>>> from resin.knoweldge_base.knowledge_base import KnowledgeBase
>>> from resin.knowledge_base.knowledge_base import KnowledgeBase
>>> from tokenizer import Tokenizer
>>> Tokenizer.initialize()
>>> kb = KnowledgeBase(index_name="my_index")
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import List, Optional

from resin.knoweldge_base.models import KBEncodedDocChunk, KBQuery, KBDocChunk
from resin.knowledge_base.models import KBEncodedDocChunk, KBQuery, KBDocChunk
from resin.models.data_models import Query
from resin.utils.config import ConfigurableMixin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder

from .base import RecordEncoder
from resin.knoweldge_base.models import KBQuery, KBEncodedDocChunk, KBDocChunk
from resin.knowledge_base.models import KBQuery, KBEncodedDocChunk, KBDocChunk
from resin.models.data_models import Query


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
retry_if_exception_type,
)
from pinecone_text.dense.openai_encoder import OpenAIEncoder
from resin.knoweldge_base.models import KBDocChunk, KBEncodedDocChunk, KBQuery
from resin.knoweldge_base.record_encoder.dense import DenseRecordEncoder
from resin.knowledge_base.models import KBDocChunk, KBEncodedDocChunk, KBQuery
from resin.knowledge_base.record_encoder.dense import DenseRecordEncoder
from resin.models.data_models import Query
from resin.utils.openai_exceptions import OPEN_AI_TRANSIENT_EXCEPTIONS

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from typing import List

from resin.knoweldge_base.models import KBQueryResult
from resin.knowledge_base.models import KBQueryResult
from resin.utils.config import ConfigurableMixin


Expand Down
4 changes: 2 additions & 2 deletions src/resin/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def __init__(self,
self.available_models = [k["id"] for k in openai.Model.list().data]
if model_name not in self.available_models:
raise ValueError(
f"Model {model_name} not found. " +
" Available models: {self.available_models}"
f"Model {model_name} not found. "
f" Available models: {self.available_models}"
)

@retry(
Expand Down
Loading

0 comments on commit bcaf26a

Please sign in to comment.