From 666fd66148ad52fec5fa4fc16af2d0c1081f482f Mon Sep 17 00:00:00 2001
From: Oliver Rice
Date: Fri, 13 Oct 2023 10:17:43 -0600
Subject: [PATCH] Deployed 20622a0 to 0.4 with MkDocs 1.5.2 and mike 1.1.2
---
0.4/index.html | 4 ++--
0.4/search/search_index.json | 2 +-
0.4/sitemap.xml.gz | Bin 318 -> 318 bytes
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/0.4/index.html b/0.4/index.html
index 6d5f967..7aac276 100644
--- a/0.4/index.html
+++ b/0.4/index.html
@@ -128,7 +128,7 @@ vecs
-Documentation: https://supabase.github.io/vecs/api/
+Documentation: https://supabase.github.io/vecs/
Source Code: https://github.com/supabase/vecs
Vecs is a Python client library for managing and querying vector stores in PostgreSQL, leveraging the capabilities of the pgvector extension.
@@ -235,5 +235,5 @@ Usage
diff --git a/0.4/search/search_index.json b/0.4/search/search_index.json
index 5afc2cf..1b6d36c 100644
--- a/0.4/search/search_index.json
+++ b/0.4/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"vecs Documentation : https://supabase.github.io/vecs/api/ Source Code : https://github.com/supabase/vecs Vecs is a Python client library for managing and querying vector stores in PostgreSQL, leveraging the capabilities of the pgvector extension . Overview Vector Management: create collections to persist and update vectors in a PostgreSQL database. Querying: Query vectors efficiently using measures such as cosine distance, l2 distance, or max inner product. Metadata: Each vector can have associated metadata, which can also be used as filters during queries. Hybrid Data: vecs creates its own schema and can coexist with your existing relational data Visit the quickstart guide for how to get started. TL;DR Install pip install vecs Usage import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) # create a collection of vectors with 3 dimensions docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) # add records to the *docs* collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) # index the collection for fast search performance docs . create_index () # query the collection filtering metadata for \"year\" = 2012 docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 1 , # number of records to return filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) # Returns: [\"vec1\"] # Disconnect from the database vx . disconnect ()","title":"Introduction"},{"location":"#vecs","text":"Documentation : https://supabase.github.io/vecs/api/ Source Code : https://github.com/supabase/vecs Vecs is a Python client library for managing and querying vector stores in PostgreSQL, leveraging the capabilities of the pgvector extension .","title":"vecs"},{"location":"#overview","text":"Vector Management: create collections to persist and update vectors in a PostgreSQL database. Querying: Query vectors efficiently using measures such as cosine distance, l2 distance, or max inner product. Metadata: Each vector can have associated metadata, which can also be used as filters during queries. Hybrid Data: vecs creates its own schema and can coexist with your existing relational data Visit the quickstart guide for how to get started.","title":"Overview"},{"location":"#tldr","text":"","title":"TL;DR"},{"location":"#install","text":"pip install vecs","title":"Install"},{"location":"#usage","text":"import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) # create a collection of vectors with 3 dimensions docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) # add records to the *docs* collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) # index the collection for fast search performance docs . create_index () # query the collection filtering metadata for \"year\" = 2012 docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 1 , # number of records to return filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) # Returns: [\"vec1\"] # Disconnect from the database vx . disconnect ()","title":"Usage"},{"location":"api/","text":"API vecs is a python client for managing and querying vector stores in PostgreSQL with the pgvector extension . This guide will help you get started with using vecs. If you don't have a Postgres database with the pgvector ready, see hosting for easy options. Installation Requires: Python 3.7+ You can install vecs using pip: pip install vecs Usage Connecting Before you can interact with vecs, create the client to communicate with Postgres. If you haven't started a Postgres instance yet, see hosting . import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) Get or Create a Collection You can get a collection (or create if it doesn't exist), specifying the collection's name and the number of dimensions for the vectors you intend to store. docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) Upserting vectors vecs combines the concepts of \"insert\" and \"update\" into \"upsert\". Upserting records adds them to the collection if the id is not present, or updates the existing record if the id does exist. # add records to the collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) Deleting vectors Deleting records removes them from the collection. To delete records, specify a list of ids to the delete method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. docs . delete ( ids = [ \"vec0\" , \"vec1\" ]) Create an index Collections can be queried immediately after being created. However, for good throughput, the collection should be indexed after records have been upserted. Only one index may exist per-collection. By default, creating an index will replace any existing index. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , add them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections. Query Given a collection docs with several records: Basic The simplest form of search is to provide a query vector. Note Indexes are essential for good performance. See creating an index for more info. If you do not create an index, every query will return a warning query does not have a covering index for cosine_similarity. See Collection.create_index that incldues the IndexMeasure you should index. docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 5 , # number of records to return filters = {}, # metadata filters measure = \"cosine_distance\" , # distance measure to use include_value = False , # should distance measure values be returned? include_metadata = False , # should record metadata be returned? ) Which returns a list of vector record ids . Metadata Filtering The metadata that is associated with each record can also be filtered during a query. As an example, {\"year\": {\"$eq\": 2005}} filters a year metadata key to be equal to 2005 In context: docs . query ( data = [ 0.4 , 0.5 , 0.6 ], filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) For a complete reference, see the metadata guide . Disconnect When you're done with a collection, be sure to disconnect the client from the database. vx . disconnect () alternatively, use the client as a context manager and it will automatically close the connection on exit. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client with vecs . create_client ( DB_CONNECTION ) as vx : # do some work here pass # connections are now closed Adapters Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters make it possible to interact with a collection using only your project's native data type (eg. just raw text), rather than manually handling vectors. For a complete list of available adapters, see built-in adapters . As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) Deprecated Create collection Note Deprecated: use get_or_create_collection You can create a collection to store vectors specifying the collections name and the number of dimensions in the vectors you intend to store. docs = vx . create_collection ( name = \"docs\" , dimension = 3 ) Get an existing collection Note Deprecated: use get_or_create_collection To access a previously created collection, use get_collection to retrieve it by name docs = vx . get_collection ( name = \"docs\" )","title":"API"},{"location":"api/#api","text":"vecs is a python client for managing and querying vector stores in PostgreSQL with the pgvector extension . This guide will help you get started with using vecs. If you don't have a Postgres database with the pgvector ready, see hosting for easy options.","title":"API"},{"location":"api/#installation","text":"Requires: Python 3.7+ You can install vecs using pip: pip install vecs","title":"Installation"},{"location":"api/#usage","text":"","title":"Usage"},{"location":"api/#connecting","text":"Before you can interact with vecs, create the client to communicate with Postgres. If you haven't started a Postgres instance yet, see hosting . import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION )","title":"Connecting"},{"location":"api/#get-or-create-a-collection","text":"You can get a collection (or create if it doesn't exist), specifying the collection's name and the number of dimensions for the vectors you intend to store. docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 )","title":"Get or Create a Collection"},{"location":"api/#upserting-vectors","text":"vecs combines the concepts of \"insert\" and \"update\" into \"upsert\". Upserting records adds them to the collection if the id is not present, or updates the existing record if the id does exist. # add records to the collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] )","title":"Upserting vectors"},{"location":"api/#deleting-vectors","text":"Deleting records removes them from the collection. To delete records, specify a list of ids to the delete method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. docs . delete ( ids = [ \"vec0\" , \"vec1\" ])","title":"Deleting vectors"},{"location":"api/#create-an-index","text":"Collections can be queried immediately after being created. However, for good throughput, the collection should be indexed after records have been upserted. Only one index may exist per-collection. By default, creating an index will replace any existing index. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , add them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Create an index"},{"location":"api/#query","text":"Given a collection docs with several records:","title":"Query"},{"location":"api/#basic","text":"The simplest form of search is to provide a query vector. Note Indexes are essential for good performance. See creating an index for more info. If you do not create an index, every query will return a warning query does not have a covering index for cosine_similarity. See Collection.create_index that incldues the IndexMeasure you should index. docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 5 , # number of records to return filters = {}, # metadata filters measure = \"cosine_distance\" , # distance measure to use include_value = False , # should distance measure values be returned? include_metadata = False , # should record metadata be returned? ) Which returns a list of vector record ids .","title":"Basic"},{"location":"api/#metadata-filtering","text":"The metadata that is associated with each record can also be filtered during a query. As an example, {\"year\": {\"$eq\": 2005}} filters a year metadata key to be equal to 2005 In context: docs . query ( data = [ 0.4 , 0.5 , 0.6 ], filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) For a complete reference, see the metadata guide .","title":"Metadata Filtering"},{"location":"api/#disconnect","text":"When you're done with a collection, be sure to disconnect the client from the database. vx . disconnect () alternatively, use the client as a context manager and it will automatically close the connection on exit. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client with vecs . create_client ( DB_CONNECTION ) as vx : # do some work here pass # connections are now closed","title":"Disconnect"},{"location":"api/#adapters","text":"Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters make it possible to interact with a collection using only your project's native data type (eg. just raw text), rather than manually handling vectors. For a complete list of available adapters, see built-in adapters . As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" )","title":"Adapters"},{"location":"api/#deprecated","text":"","title":"Deprecated"},{"location":"api/#create-collection","text":"Note Deprecated: use get_or_create_collection You can create a collection to store vectors specifying the collections name and the number of dimensions in the vectors you intend to store. docs = vx . create_collection ( name = \"docs\" , dimension = 3 )","title":"Create collection"},{"location":"api/#get-an-existing-collection","text":"Note Deprecated: use get_or_create_collection To access a previously created collection, use get_collection to retrieve it by name docs = vx . get_collection ( name = \"docs\" )","title":"Get an existing collection"},{"location":"concepts_adapters/","text":"Adapters Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters provide a customizable and modular way to express data transformations and make interacting with collections more ergonomic. Additionally, adapter transformations are applied lazily and can internally batch operations which can make them more memory and CPU efficient compared to manually executing transforms. Example: As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) In summary, Adapter s allow you to work with a collection as though they store your prefered data type natively. Built-in Adapters vecs provides several built-in Adapters. ParagraphChunker TextEmbedding Have an idea for a useful adapter? Open an issue requesting it. ParagraphChunker The ParagraphChunker AdapterStep splits text media into paragraphs and yields each paragraph as a separate record. That can be a useful preprocessing step when upserting large documents that contain multiple paragraphs. The ParagraphChunker delimits paragraphs by two consecutive line breaks \\n\\n . ParagrphChunker is a pre-preocessing step and must be used in combination with another adapter step like TextEmbedding to transform the chunked text into a vector. from vecs.adapter import Adapter , ParagraphChunker ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), ... ] ) ) When querying the collection, you probably do not want to chunk the text. To skip text chunking during queries, set the skip_during_query argument to True . Setting skip_during_query to False will raise an exception if the input text contains more than one paragraph. TextEmbedding The TextEmbedding AdapterStep accepts text and converts it into a vector that can be consumed by the Collection . TextEmbedding supports all models available in the sentence_transformers package. A complete list of supported models is available in vecs.adapter.TextEmbeddingModel . from vecs.adapter import Adapter , TextEmbedding ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ TextEmbedding ( model = 'all-MiniLM-L6-v2' ) ] ) ) # search by text docs . query ( data = \"foo bar\" ) Interface Adapters are objects that take in data in the form of Iterable[Tuple[str, Any, Optional[Dict]]] where Tuple[str, Any, Optional[Dict]]] represents records of (id, media, metadata) . The main use of Adapters is to transform the media part of the records into a form that is ready to be ingested into the collection (like converting text into embeddings). However, Adapters can also modify the id or metadata if required. Due to the common interface, adapters may be comprised of multiple adapter steps to create multi-stage preprocessing pipelines. For example, a multi-step adapter might first convert text into chunks and then convert each text chunk into an embedding vector.","title":"Adapters"},{"location":"concepts_adapters/#adapters","text":"Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters provide a customizable and modular way to express data transformations and make interacting with collections more ergonomic. Additionally, adapter transformations are applied lazily and can internally batch operations which can make them more memory and CPU efficient compared to manually executing transforms.","title":"Adapters"},{"location":"concepts_adapters/#example","text":"As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) In summary, Adapter s allow you to work with a collection as though they store your prefered data type natively.","title":"Example:"},{"location":"concepts_adapters/#built-in-adapters","text":"vecs provides several built-in Adapters. ParagraphChunker TextEmbedding Have an idea for a useful adapter? Open an issue requesting it.","title":"Built-in Adapters"},{"location":"concepts_adapters/#paragraphchunker","text":"The ParagraphChunker AdapterStep splits text media into paragraphs and yields each paragraph as a separate record. That can be a useful preprocessing step when upserting large documents that contain multiple paragraphs. The ParagraphChunker delimits paragraphs by two consecutive line breaks \\n\\n . ParagrphChunker is a pre-preocessing step and must be used in combination with another adapter step like TextEmbedding to transform the chunked text into a vector. from vecs.adapter import Adapter , ParagraphChunker ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), ... ] ) ) When querying the collection, you probably do not want to chunk the text. To skip text chunking during queries, set the skip_during_query argument to True . Setting skip_during_query to False will raise an exception if the input text contains more than one paragraph.","title":"ParagraphChunker"},{"location":"concepts_adapters/#textembedding","text":"The TextEmbedding AdapterStep accepts text and converts it into a vector that can be consumed by the Collection . TextEmbedding supports all models available in the sentence_transformers package. A complete list of supported models is available in vecs.adapter.TextEmbeddingModel . from vecs.adapter import Adapter , TextEmbedding ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ TextEmbedding ( model = 'all-MiniLM-L6-v2' ) ] ) ) # search by text docs . query ( data = \"foo bar\" )","title":"TextEmbedding"},{"location":"concepts_adapters/#interface","text":"Adapters are objects that take in data in the form of Iterable[Tuple[str, Any, Optional[Dict]]] where Tuple[str, Any, Optional[Dict]]] represents records of (id, media, metadata) . The main use of Adapters is to transform the media part of the records into a form that is ready to be ingested into the collection (like converting text into embeddings). However, Adapters can also modify the id or metadata if required. Due to the common interface, adapters may be comprised of multiple adapter steps to create multi-stage preprocessing pipelines. For example, a multi-step adapter might first convert text into chunks and then convert each text chunk into an embedding vector.","title":"Interface"},{"location":"concepts_collections/","text":"Collections A collection is an group of vector records. Records can be added to or updated in a collection. Collections can be queried at any time, but should be indexed for scalable query performance. Each vector record has the form: Record ( id : String vec : Numeric [] metadata : JSON ) For example: ( \"vec1\" , [ 0.1 , 0.2 , 0.3 ], { \"year\" : 1990 }) Underneath every vecs a collection is Postgres table create table < collection_name > ( id string primary key , vec vector ( < dimension > ), metadata jsonb ) where rows in the table map 1:1 with vecs vector records. It is safe to select collection tables from outside the vecs client but issuing DDL is not recommended.","title":"Collections"},{"location":"concepts_collections/#collections","text":"A collection is an group of vector records. Records can be added to or updated in a collection. Collections can be queried at any time, but should be indexed for scalable query performance. Each vector record has the form: Record ( id : String vec : Numeric [] metadata : JSON ) For example: ( \"vec1\" , [ 0.1 , 0.2 , 0.3 ], { \"year\" : 1990 }) Underneath every vecs a collection is Postgres table create table < collection_name > ( id string primary key , vec vector ( < dimension > ), metadata jsonb ) where rows in the table map 1:1 with vecs vector records. It is safe to select collection tables from outside the vecs client but issuing DDL is not recommended.","title":"Collections"},{"location":"concepts_indexes/","text":"Indexes Indexes are tools for optimizing query performance of a collection . Collections can be queried without an index, but that will emit a python warning and should never be done in production. query does not have a covering index for cosine_similarity. See Collection.create_index As each query vector must be checked against every record in the collection. When the number of dimensions and/or number of records becomes large, that becomes extremely slow and computationally expensive. An index is a heuristic datastructure that pre-computes distances among key points in the vector space. It is smaller and can be traversed more quickly than the whole collection enabling much more performant seraching. Only one index may exist per-collection. An index optimizes a collection for searching according to a selected distance measure. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , ass them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Indexes"},{"location":"concepts_indexes/#indexes","text":"Indexes are tools for optimizing query performance of a collection . Collections can be queried without an index, but that will emit a python warning and should never be done in production. query does not have a covering index for cosine_similarity. See Collection.create_index As each query vector must be checked against every record in the collection. When the number of dimensions and/or number of records becomes large, that becomes extremely slow and computationally expensive. An index is a heuristic datastructure that pre-computes distances among key points in the vector space. It is smaller and can be traversed more quickly than the whole collection enabling much more performant seraching. Only one index may exist per-collection. An index optimizes a collection for searching according to a selected distance measure. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , ass them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Indexes"},{"location":"concepts_metadata/","text":"Metadata vecs allows you to associate key-value pairs of metadata with indexes and ids in your collections. You can then add filters to queries that reference the metadata metadata. Types Metadata is stored as binary JSON. As a result, allowed metadata types are drawn from JSON primitive types. Boolean String Number The technical limit of a metadata field associated with a vector is 1GB. In practice you should keep metadata fields as small as possible to maximize performance. Metadata Query Language The metadata query language is based loosely on mongodb's selectors . vecs currently supports a subset of those operators. Comparison Operators Comparison operators compare a provided value with a value stored in metadata field of the vector store. Operator Description $eq Matches values that are equal to a specified value $ne Matches values that are not equal to a specified value $gt Matches values that are greater than a specified value $gte Matches values that are greater than or equal to a specified value $lt Matches values that are less than a specified value $lte Matches values that are less than or equal to a specified value $in Matches values that are contained by scalar list of specified values Logical Operators Logical operators compose other operators, and can be nested. Operator Description $and Joins query clauses with a logical AND returns all documents that match the conditions of both clauses. $or Joins query clauses with a logical OR returns all documents that match the conditions of either clause. Performance For best performance, use scalar key-value pairs for metadata and prefer $eq , $and and $or filters where possible. Those variants are most consistently able to make use of indexes. Examples year equals 2020 { \"year\" : { \"$eq\" : 2020 }} year equals 2020 or gross greater than or equal to 5000.0 { \"$or\" : [ { \"year\" : { \"$eq\" : 2020 }}, { \"gross\" : { \"$gte\" : 5000.0 }} ] } last_name is less than \"Brown\" and is_priority_customer is true { \"$and\" : [ { \"last_name\" : { \"$lt\" : \"Brown\" }}, { \"is_priority_customer\" : { \"$gte\" : 5000.00 }} ] } priority contained by [\"enterprise\", \"pro\"] { \"priority\" : { \"$in\" : [ \"enterprise\" , \"pro\" ]} }","title":"Metadata"},{"location":"concepts_metadata/#metadata","text":"vecs allows you to associate key-value pairs of metadata with indexes and ids in your collections. You can then add filters to queries that reference the metadata metadata.","title":"Metadata"},{"location":"concepts_metadata/#types","text":"Metadata is stored as binary JSON. As a result, allowed metadata types are drawn from JSON primitive types. Boolean String Number The technical limit of a metadata field associated with a vector is 1GB. In practice you should keep metadata fields as small as possible to maximize performance.","title":"Types"},{"location":"concepts_metadata/#metadata-query-language","text":"The metadata query language is based loosely on mongodb's selectors . vecs currently supports a subset of those operators.","title":"Metadata Query Language"},{"location":"concepts_metadata/#comparison-operators","text":"Comparison operators compare a provided value with a value stored in metadata field of the vector store. Operator Description $eq Matches values that are equal to a specified value $ne Matches values that are not equal to a specified value $gt Matches values that are greater than a specified value $gte Matches values that are greater than or equal to a specified value $lt Matches values that are less than a specified value $lte Matches values that are less than or equal to a specified value $in Matches values that are contained by scalar list of specified values","title":"Comparison Operators"},{"location":"concepts_metadata/#logical-operators","text":"Logical operators compose other operators, and can be nested. Operator Description $and Joins query clauses with a logical AND returns all documents that match the conditions of both clauses. $or Joins query clauses with a logical OR returns all documents that match the conditions of either clause.","title":"Logical Operators"},{"location":"concepts_metadata/#performance","text":"For best performance, use scalar key-value pairs for metadata and prefer $eq , $and and $or filters where possible. Those variants are most consistently able to make use of indexes.","title":"Performance"},{"location":"concepts_metadata/#examples","text":"year equals 2020 { \"year\" : { \"$eq\" : 2020 }} year equals 2020 or gross greater than or equal to 5000.0 { \"$or\" : [ { \"year\" : { \"$eq\" : 2020 }}, { \"gross\" : { \"$gte\" : 5000.0 }} ] } last_name is less than \"Brown\" and is_priority_customer is true { \"$and\" : [ { \"last_name\" : { \"$lt\" : \"Brown\" }}, { \"is_priority_customer\" : { \"$gte\" : 5000.00 }} ] } priority contained by [\"enterprise\", \"pro\"] { \"priority\" : { \"$in\" : [ \"enterprise\" , \"pro\" ]} }","title":"Examples"},{"location":"hosting/","text":"Deployment vecs is comatible with any Postgres 13+ with the pgvector extension installed. In the following we show we show instructions for hosting a database on Supabase and locally in docker since both are fast and free. Supabase Cloud Hosted Create an account Create a supabase account at https://app.supabase.com/sign-up . Create a new project Select New Project Complete the prompts. Be sure to remember or write down your password as we'll need that when connecting with vecs. Connection Info On the project page, navigate to Settings > Database > Database Settings and substitue those fields into the conenction string postgresql://:@:/ i.e. postgres://postgres:[YOUR PASSWORD]@db.cvykdyhlwwwojivopztl.supabase.co:5432/postgres Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide , Local You can also use Supabase locally on your machine. Doing so will keep your project setup consistent when deploying to hosted Supabase. Install the CLI To install the CLI, use the relevant system instructions below macOS Windows Linux npm brew install supabase/tap/supabase scoop bucket add supabase https://github.com/supabase/scoop-bucket.git scoop install supabase Linux packages are provided in Releases. To install, download the .apk/.deb/.rpm file depending on your package manager and run one of the following: sudo apk add --allow-untrusted <...>.apk or sudo dpkg -i <...>.deb or sudo rpm -i <...>.rpm npm install supabase --save-dev Start the Project From your project directory, create the supabase/ sub-directory required for supabase projects by running: supabase init next start the application using: supabase start which will download the latest Supabase containers and provide a URL to each service: Seeding data supabase/seed.sql...me... Started supabase local development setup. API URL: http://localhost:54321 GraphQL URL: http://localhost:54321/graphql/v1 DB URL: postgresql://postgres:postgres@localhost:54322/postgres Studio URL: http://localhost:54323 Inbucket URL: http://localhost:54324 JWT secret: super-secret-jwt-token-with-at-least-32-characters-long anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFz service_role key: eyJhbGciOiJIUzI1NiIsInR5cClJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU The service we need for vecs is DB URL . Note it down for use as our DB_CONNECTION postgresql://:@:/ For more info on running a local Supabase project, checkout the Supabase CLI guide Docker Install docker if you don't have it already at Get Docker Start the Postgres Container Next, run docker run --rm -d \\ --name vecs_hosting_guide \\ -p 5019 :5432 \\ -e POSTGRES_DB = vecs_db \\ -e POSTGRES_PASSWORD = password \\ -e POSTGRES_USER = postgres \\ supabase/postgres:15.1.0.74 Connection Info Substitue the values from the previous section into the postgres conenction string postgresql://:@:/ i.e. postgresql://postgres:password@localhost:5019/vecs_db Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide","title":"Hosting"},{"location":"hosting/#deployment","text":"vecs is comatible with any Postgres 13+ with the pgvector extension installed. In the following we show we show instructions for hosting a database on Supabase and locally in docker since both are fast and free.","title":"Deployment"},{"location":"hosting/#supabase","text":"","title":"Supabase"},{"location":"hosting/#cloud-hosted","text":"","title":"Cloud Hosted"},{"location":"hosting/#create-an-account","text":"Create a supabase account at https://app.supabase.com/sign-up .","title":"Create an account"},{"location":"hosting/#create-a-new-project","text":"Select New Project Complete the prompts. Be sure to remember or write down your password as we'll need that when connecting with vecs.","title":"Create a new project"},{"location":"hosting/#connection-info","text":"On the project page, navigate to Settings > Database > Database Settings and substitue those fields into the conenction string postgresql://:@:/ i.e. postgres://postgres:[YOUR PASSWORD]@db.cvykdyhlwwwojivopztl.supabase.co:5432/postgres Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide ,","title":"Connection Info"},{"location":"hosting/#local","text":"You can also use Supabase locally on your machine. Doing so will keep your project setup consistent when deploying to hosted Supabase.","title":"Local"},{"location":"hosting/#install-the-cli","text":"To install the CLI, use the relevant system instructions below macOS Windows Linux npm brew install supabase/tap/supabase scoop bucket add supabase https://github.com/supabase/scoop-bucket.git scoop install supabase Linux packages are provided in Releases. To install, download the .apk/.deb/.rpm file depending on your package manager and run one of the following: sudo apk add --allow-untrusted <...>.apk or sudo dpkg -i <...>.deb or sudo rpm -i <...>.rpm npm install supabase --save-dev","title":"Install the CLI"},{"location":"hosting/#start-the-project","text":"From your project directory, create the supabase/ sub-directory required for supabase projects by running: supabase init next start the application using: supabase start which will download the latest Supabase containers and provide a URL to each service: Seeding data supabase/seed.sql...me... Started supabase local development setup. API URL: http://localhost:54321 GraphQL URL: http://localhost:54321/graphql/v1 DB URL: postgresql://postgres:postgres@localhost:54322/postgres Studio URL: http://localhost:54323 Inbucket URL: http://localhost:54324 JWT secret: super-secret-jwt-token-with-at-least-32-characters-long anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFz service_role key: eyJhbGciOiJIUzI1NiIsInR5cClJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU The service we need for vecs is DB URL . Note it down for use as our DB_CONNECTION postgresql://:@:/ For more info on running a local Supabase project, checkout the Supabase CLI guide","title":"Start the Project"},{"location":"hosting/#docker","text":"Install docker if you don't have it already at Get Docker","title":"Docker"},{"location":"hosting/#start-the-postgres-container","text":"Next, run docker run --rm -d \\ --name vecs_hosting_guide \\ -p 5019 :5432 \\ -e POSTGRES_DB = vecs_db \\ -e POSTGRES_PASSWORD = password \\ -e POSTGRES_USER = postgres \\ supabase/postgres:15.1.0.74","title":"Start the Postgres Container"},{"location":"hosting/#connection-info_1","text":"Substitue the values from the previous section into the postgres conenction string postgresql://:@:/ i.e. postgresql://postgres:password@localhost:5019/vecs_db Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide","title":"Connection Info"},{"location":"integrations_huggingface_inference_endpoints/","text":"Integration: Hugging Face Inference Endpoints This guide will walk you through an example integration of the Hugging Face Inference API with vecs. We will create embeddings using Hugging Face's sentence-transformers/all-MiniLM-L6-v2 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence. Create a Hugging Face Inference Endpoint Head over to Hugging Face's inference endpoints and select New Endpoint . Configure your endpoint with your model and provider of choice. In this example we'll use sentence-transformers/all-MiniLM-L6-v2 and AWS . Under \"Advanced Configuration\" select \"Sentence Embeddings\" as the \"Task\". Then click \"Create Endpoint\" Once the endpoint starts up, take note of the Endpoint URL Tip Don't forget to pause or delete your Hugging Face Inference Endpoint when you're not using it Finally, create and copy an API key we can use to authenticate with the inference endpoint. Create an Environment Next, you need to set up your environment. You will need Python 3.7+ with the vecs and requests installed. pip install vecs requests You'll also need a Postgres Database with the pgvector extension Create Embeddings We can use the Hugging Face endpoint to create embeddings for a set of sentences. import requests import json huggingface_endpoint_url = '' huggingface_api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] records = [] for sentence in dataset : response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : sentence } ) embedding = response . json ()[ \"embeddings\" ] records . append (( sentence , embedding , {})) Store the Embeddings with vecs Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs subbing in your DB_CONNECTION string. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 384 dimensional vectors (default dimension for paraphrase-MiniLM-L6-v2) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 384 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = records ) # create an index for the 'sentences' collection sentences . create_index () Querying for Most Similar Sentences Finally, we can query vecs to find the most similar sentences to a given query sentence. The query sentence is embedded using the same method as the sentences in the dataset, then we query the sentences collection with vecs. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : query_sentence } ) query_embedding = response . json ()[ \"embeddings\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and theirdistance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.256648302882697) ('The cat sat on the mat.', 0.78635900041167) ('To be or not to be, that is the question.', 1.04114070479544)","title":"HuggingFace Inference Endpoints"},{"location":"integrations_huggingface_inference_endpoints/#integration-hugging-face-inference-endpoints","text":"This guide will walk you through an example integration of the Hugging Face Inference API with vecs. We will create embeddings using Hugging Face's sentence-transformers/all-MiniLM-L6-v2 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence.","title":"Integration: Hugging Face Inference Endpoints"},{"location":"integrations_huggingface_inference_endpoints/#create-a-hugging-face-inference-endpoint","text":"Head over to Hugging Face's inference endpoints and select New Endpoint . Configure your endpoint with your model and provider of choice. In this example we'll use sentence-transformers/all-MiniLM-L6-v2 and AWS . Under \"Advanced Configuration\" select \"Sentence Embeddings\" as the \"Task\". Then click \"Create Endpoint\" Once the endpoint starts up, take note of the Endpoint URL Tip Don't forget to pause or delete your Hugging Face Inference Endpoint when you're not using it Finally, create and copy an API key we can use to authenticate with the inference endpoint.","title":"Create a Hugging Face Inference Endpoint"},{"location":"integrations_huggingface_inference_endpoints/#create-an-environment","text":"Next, you need to set up your environment. You will need Python 3.7+ with the vecs and requests installed. pip install vecs requests You'll also need a Postgres Database with the pgvector extension","title":"Create an Environment"},{"location":"integrations_huggingface_inference_endpoints/#create-embeddings","text":"We can use the Hugging Face endpoint to create embeddings for a set of sentences. import requests import json huggingface_endpoint_url = '' huggingface_api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] records = [] for sentence in dataset : response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : sentence } ) embedding = response . json ()[ \"embeddings\" ] records . append (( sentence , embedding , {}))","title":"Create Embeddings"},{"location":"integrations_huggingface_inference_endpoints/#store-the-embeddings-with-vecs","text":"Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs subbing in your DB_CONNECTION string. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 384 dimensional vectors (default dimension for paraphrase-MiniLM-L6-v2) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 384 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = records ) # create an index for the 'sentences' collection sentences . create_index ()","title":"Store the Embeddings with vecs"},{"location":"integrations_huggingface_inference_endpoints/#querying-for-most-similar-sentences","text":"Finally, we can query vecs to find the most similar sentences to a given query sentence. The query sentence is embedded using the same method as the sentences in the dataset, then we query the sentences collection with vecs. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : query_sentence } ) query_embedding = response . json ()[ \"embeddings\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and theirdistance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.256648302882697) ('The cat sat on the mat.', 0.78635900041167) ('To be or not to be, that is the question.', 1.04114070479544)","title":"Querying for Most Similar Sentences"},{"location":"integrations_openai/","text":"Integration: Open AI This guide will walk you through an example integration of the OpenAI API with the vecs Python library. We will create embeddings using OpenAI's text-embedding-ada-002 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence. Create an Environment First, you need to set up your environment. You will need Python 3.7 with the vecs and openai libraries installed. You can install the necessary Python libraries using pip: pip install vecs openai You'll also need: An OpenAI API Key A Postgres Database with the pgvector extension Create Embeddings Next, we will use OpenAI's text-embedding-ada-002 model to create embeddings for a set of sentences. import openai openai . api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] embeddings = [] for sentence in dataset : response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ sentence ] ) embeddings . append (( sentence , response [ \"data\" ][ 0 ][ \"embedding\" ], {})) Store the Embeddings with vecs Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 1536 dimensional vectors (default dimension for text-embedding-ada-002) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 1536 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = embeddings ) # create an index for the 'sentences' collection sentences . create_index () Querying for Most Similar Sentences Finally, we can query vecs to find the most similar sentences to a given query sentence. We will first need to create an embedding for the query sentence using the text-embedding-ada-002 model. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ query_sentence ] ) query_embedding = response [ \"data\" ][ 0 ][ \"embedding\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and their distance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.0633971456300456) ('The cat sat on the mat.', 0.16474785399561) ('To be or not to be, that is the question.', 0.24531234467506)","title":"OpenAI"},{"location":"integrations_openai/#integration-open-ai","text":"This guide will walk you through an example integration of the OpenAI API with the vecs Python library. We will create embeddings using OpenAI's text-embedding-ada-002 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence.","title":"Integration: Open AI"},{"location":"integrations_openai/#create-an-environment","text":"First, you need to set up your environment. You will need Python 3.7 with the vecs and openai libraries installed. You can install the necessary Python libraries using pip: pip install vecs openai You'll also need: An OpenAI API Key A Postgres Database with the pgvector extension","title":"Create an Environment"},{"location":"integrations_openai/#create-embeddings","text":"Next, we will use OpenAI's text-embedding-ada-002 model to create embeddings for a set of sentences. import openai openai . api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] embeddings = [] for sentence in dataset : response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ sentence ] ) embeddings . append (( sentence , response [ \"data\" ][ 0 ][ \"embedding\" ], {}))","title":"Create Embeddings"},{"location":"integrations_openai/#store-the-embeddings-with-vecs","text":"Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 1536 dimensional vectors (default dimension for text-embedding-ada-002) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 1536 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = embeddings ) # create an index for the 'sentences' collection sentences . create_index ()","title":"Store the Embeddings with vecs"},{"location":"integrations_openai/#querying-for-most-similar-sentences","text":"Finally, we can query vecs to find the most similar sentences to a given query sentence. We will first need to create an embedding for the query sentence using the text-embedding-ada-002 model. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ query_sentence ] ) query_embedding = response [ \"data\" ][ 0 ][ \"embedding\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and their distance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.0633971456300456) ('The cat sat on the mat.', 0.16474785399561) ('To be or not to be, that is the question.', 0.24531234467506)","title":"Querying for Most Similar Sentences"},{"location":"support_changelog/","text":"Changelog 0.1.0 Initial release 0.2.7 Feature: Added vecs.Collection.disconnect() to drop database connection Feature: vecs.Client can be used as a context maanger to auto-close connections Feature: Uses (indexed) containment operator @> for metadata equality filters where possible Docs: Added docstrings to all methods, functions and modules 0.3.0 Feature: Collections can have adapters allowing upserting/querying by native media t types Breaking Change: Renamed argument Collection.upsert(vectors, ...) to Collection.upsert(records, ...) in support of adapters Breaking Change: Renamed argument Collection.query(query_vector, ...) to Collection.query(data, ...) in support of adapters 0.3.1 Feature: Metadata filtering with $in 0.4.0 Feature: pgvector 0.5.0 Feature: HNSW index support 0.4.1 Bugfix: removed errant print statement master","title":"Changelog"},{"location":"support_changelog/#changelog","text":"","title":"Changelog"},{"location":"support_changelog/#010","text":"Initial release","title":"0.1.0"},{"location":"support_changelog/#027","text":"Feature: Added vecs.Collection.disconnect() to drop database connection Feature: vecs.Client can be used as a context maanger to auto-close connections Feature: Uses (indexed) containment operator @> for metadata equality filters where possible Docs: Added docstrings to all methods, functions and modules","title":"0.2.7"},{"location":"support_changelog/#030","text":"Feature: Collections can have adapters allowing upserting/querying by native media t types Breaking Change: Renamed argument Collection.upsert(vectors, ...) to Collection.upsert(records, ...) in support of adapters Breaking Change: Renamed argument Collection.query(query_vector, ...) to Collection.query(data, ...) in support of adapters","title":"0.3.0"},{"location":"support_changelog/#031","text":"Feature: Metadata filtering with $in","title":"0.3.1"},{"location":"support_changelog/#040","text":"Feature: pgvector 0.5.0 Feature: HNSW index support","title":"0.4.0"},{"location":"support_changelog/#041","text":"Bugfix: removed errant print statement","title":"0.4.1"},{"location":"support_changelog/#master","text":"","title":"master"}]}
\ No newline at end of file
+{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"vecs Documentation : https://supabase.github.io/vecs/ Source Code : https://github.com/supabase/vecs Vecs is a Python client library for managing and querying vector stores in PostgreSQL, leveraging the capabilities of the pgvector extension . Overview Vector Management: create collections to persist and update vectors in a PostgreSQL database. Querying: Query vectors efficiently using measures such as cosine distance, l2 distance, or max inner product. Metadata: Each vector can have associated metadata, which can also be used as filters during queries. Hybrid Data: vecs creates its own schema and can coexist with your existing relational data Visit the quickstart guide for how to get started. TL;DR Install pip install vecs Usage import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) # create a collection of vectors with 3 dimensions docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) # add records to the *docs* collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) # index the collection for fast search performance docs . create_index () # query the collection filtering metadata for \"year\" = 2012 docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 1 , # number of records to return filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) # Returns: [\"vec1\"] # Disconnect from the database vx . disconnect ()","title":"Introduction"},{"location":"#vecs","text":"Documentation : https://supabase.github.io/vecs/ Source Code : https://github.com/supabase/vecs Vecs is a Python client library for managing and querying vector stores in PostgreSQL, leveraging the capabilities of the pgvector extension .","title":"vecs"},{"location":"#overview","text":"Vector Management: create collections to persist and update vectors in a PostgreSQL database. Querying: Query vectors efficiently using measures such as cosine distance, l2 distance, or max inner product. Metadata: Each vector can have associated metadata, which can also be used as filters during queries. Hybrid Data: vecs creates its own schema and can coexist with your existing relational data Visit the quickstart guide for how to get started.","title":"Overview"},{"location":"#tldr","text":"","title":"TL;DR"},{"location":"#install","text":"pip install vecs","title":"Install"},{"location":"#usage","text":"import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) # create a collection of vectors with 3 dimensions docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) # add records to the *docs* collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) # index the collection for fast search performance docs . create_index () # query the collection filtering metadata for \"year\" = 2012 docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 1 , # number of records to return filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) # Returns: [\"vec1\"] # Disconnect from the database vx . disconnect ()","title":"Usage"},{"location":"api/","text":"API vecs is a python client for managing and querying vector stores in PostgreSQL with the pgvector extension . This guide will help you get started with using vecs. If you don't have a Postgres database with the pgvector ready, see hosting for easy options. Installation Requires: Python 3.7+ You can install vecs using pip: pip install vecs Usage Connecting Before you can interact with vecs, create the client to communicate with Postgres. If you haven't started a Postgres instance yet, see hosting . import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION ) Get or Create a Collection You can get a collection (or create if it doesn't exist), specifying the collection's name and the number of dimensions for the vectors you intend to store. docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 ) Upserting vectors vecs combines the concepts of \"insert\" and \"update\" into \"upsert\". Upserting records adds them to the collection if the id is not present, or updates the existing record if the id does exist. # add records to the collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] ) Deleting vectors Deleting records removes them from the collection. To delete records, specify a list of ids to the delete method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. docs . delete ( ids = [ \"vec0\" , \"vec1\" ]) Create an index Collections can be queried immediately after being created. However, for good throughput, the collection should be indexed after records have been upserted. Only one index may exist per-collection. By default, creating an index will replace any existing index. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , add them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections. Query Given a collection docs with several records: Basic The simplest form of search is to provide a query vector. Note Indexes are essential for good performance. See creating an index for more info. If you do not create an index, every query will return a warning query does not have a covering index for cosine_similarity. See Collection.create_index that incldues the IndexMeasure you should index. docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 5 , # number of records to return filters = {}, # metadata filters measure = \"cosine_distance\" , # distance measure to use include_value = False , # should distance measure values be returned? include_metadata = False , # should record metadata be returned? ) Which returns a list of vector record ids . Metadata Filtering The metadata that is associated with each record can also be filtered during a query. As an example, {\"year\": {\"$eq\": 2005}} filters a year metadata key to be equal to 2005 In context: docs . query ( data = [ 0.4 , 0.5 , 0.6 ], filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) For a complete reference, see the metadata guide . Disconnect When you're done with a collection, be sure to disconnect the client from the database. vx . disconnect () alternatively, use the client as a context manager and it will automatically close the connection on exit. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client with vecs . create_client ( DB_CONNECTION ) as vx : # do some work here pass # connections are now closed Adapters Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters make it possible to interact with a collection using only your project's native data type (eg. just raw text), rather than manually handling vectors. For a complete list of available adapters, see built-in adapters . As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) Deprecated Create collection Note Deprecated: use get_or_create_collection You can create a collection to store vectors specifying the collections name and the number of dimensions in the vectors you intend to store. docs = vx . create_collection ( name = \"docs\" , dimension = 3 ) Get an existing collection Note Deprecated: use get_or_create_collection To access a previously created collection, use get_collection to retrieve it by name docs = vx . get_collection ( name = \"docs\" )","title":"API"},{"location":"api/#api","text":"vecs is a python client for managing and querying vector stores in PostgreSQL with the pgvector extension . This guide will help you get started with using vecs. If you don't have a Postgres database with the pgvector ready, see hosting for easy options.","title":"API"},{"location":"api/#installation","text":"Requires: Python 3.7+ You can install vecs using pip: pip install vecs","title":"Installation"},{"location":"api/#usage","text":"","title":"Usage"},{"location":"api/#connecting","text":"Before you can interact with vecs, create the client to communicate with Postgres. If you haven't started a Postgres instance yet, see hosting . import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . create_client ( DB_CONNECTION )","title":"Connecting"},{"location":"api/#get-or-create-a-collection","text":"You can get a collection (or create if it doesn't exist), specifying the collection's name and the number of dimensions for the vectors you intend to store. docs = vx . get_or_create_collection ( name = \"docs\" , dimension = 3 )","title":"Get or Create a Collection"},{"location":"api/#upserting-vectors","text":"vecs combines the concepts of \"insert\" and \"update\" into \"upsert\". Upserting records adds them to the collection if the id is not present, or updates the existing record if the id does exist. # add records to the collection docs . upsert ( records = [ ( \"vec0\" , # the vector's identifier [ 0.1 , 0.2 , 0.3 ], # the vector. list or np.array { \"year\" : 1973 } # associated metadata ), ( \"vec1\" , [ 0.7 , 0.8 , 0.9 ], { \"year\" : 2012 } ) ] )","title":"Upserting vectors"},{"location":"api/#deleting-vectors","text":"Deleting records removes them from the collection. To delete records, specify a list of ids to the delete method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. docs . delete ( ids = [ \"vec0\" , \"vec1\" ])","title":"Deleting vectors"},{"location":"api/#create-an-index","text":"Collections can be queried immediately after being created. However, for good throughput, the collection should be indexed after records have been upserted. Only one index may exist per-collection. By default, creating an index will replace any existing index. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , add them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Create an index"},{"location":"api/#query","text":"Given a collection docs with several records:","title":"Query"},{"location":"api/#basic","text":"The simplest form of search is to provide a query vector. Note Indexes are essential for good performance. See creating an index for more info. If you do not create an index, every query will return a warning query does not have a covering index for cosine_similarity. See Collection.create_index that incldues the IndexMeasure you should index. docs . query ( data = [ 0.4 , 0.5 , 0.6 ], # required limit = 5 , # number of records to return filters = {}, # metadata filters measure = \"cosine_distance\" , # distance measure to use include_value = False , # should distance measure values be returned? include_metadata = False , # should record metadata be returned? ) Which returns a list of vector record ids .","title":"Basic"},{"location":"api/#metadata-filtering","text":"The metadata that is associated with each record can also be filtered during a query. As an example, {\"year\": {\"$eq\": 2005}} filters a year metadata key to be equal to 2005 In context: docs . query ( data = [ 0.4 , 0.5 , 0.6 ], filters = { \"year\" : { \"$eq\" : 2012 }}, # metadata filters ) For a complete reference, see the metadata guide .","title":"Metadata Filtering"},{"location":"api/#disconnect","text":"When you're done with a collection, be sure to disconnect the client from the database. vx . disconnect () alternatively, use the client as a context manager and it will automatically close the connection on exit. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client with vecs . create_client ( DB_CONNECTION ) as vx : # do some work here pass # connections are now closed","title":"Disconnect"},{"location":"api/#adapters","text":"Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters make it possible to interact with a collection using only your project's native data type (eg. just raw text), rather than manually handling vectors. For a complete list of available adapters, see built-in adapters . As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" )","title":"Adapters"},{"location":"api/#deprecated","text":"","title":"Deprecated"},{"location":"api/#create-collection","text":"Note Deprecated: use get_or_create_collection You can create a collection to store vectors specifying the collections name and the number of dimensions in the vectors you intend to store. docs = vx . create_collection ( name = \"docs\" , dimension = 3 )","title":"Create collection"},{"location":"api/#get-an-existing-collection","text":"Note Deprecated: use get_or_create_collection To access a previously created collection, use get_collection to retrieve it by name docs = vx . get_collection ( name = \"docs\" )","title":"Get an existing collection"},{"location":"concepts_adapters/","text":"Adapters Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters provide a customizable and modular way to express data transformations and make interacting with collections more ergonomic. Additionally, adapter transformations are applied lazily and can internally batch operations which can make them more memory and CPU efficient compared to manually executing transforms. Example: As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) In summary, Adapter s allow you to work with a collection as though they store your prefered data type natively. Built-in Adapters vecs provides several built-in Adapters. ParagraphChunker TextEmbedding Have an idea for a useful adapter? Open an issue requesting it. ParagraphChunker The ParagraphChunker AdapterStep splits text media into paragraphs and yields each paragraph as a separate record. That can be a useful preprocessing step when upserting large documents that contain multiple paragraphs. The ParagraphChunker delimits paragraphs by two consecutive line breaks \\n\\n . ParagrphChunker is a pre-preocessing step and must be used in combination with another adapter step like TextEmbedding to transform the chunked text into a vector. from vecs.adapter import Adapter , ParagraphChunker ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), ... ] ) ) When querying the collection, you probably do not want to chunk the text. To skip text chunking during queries, set the skip_during_query argument to True . Setting skip_during_query to False will raise an exception if the input text contains more than one paragraph. TextEmbedding The TextEmbedding AdapterStep accepts text and converts it into a vector that can be consumed by the Collection . TextEmbedding supports all models available in the sentence_transformers package. A complete list of supported models is available in vecs.adapter.TextEmbeddingModel . from vecs.adapter import Adapter , TextEmbedding ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ TextEmbedding ( model = 'all-MiniLM-L6-v2' ) ] ) ) # search by text docs . query ( data = \"foo bar\" ) Interface Adapters are objects that take in data in the form of Iterable[Tuple[str, Any, Optional[Dict]]] where Tuple[str, Any, Optional[Dict]]] represents records of (id, media, metadata) . The main use of Adapters is to transform the media part of the records into a form that is ready to be ingested into the collection (like converting text into embeddings). However, Adapters can also modify the id or metadata if required. Due to the common interface, adapters may be comprised of multiple adapter steps to create multi-stage preprocessing pipelines. For example, a multi-step adapter might first convert text into chunks and then convert each text chunk into an embedding vector.","title":"Adapters"},{"location":"concepts_adapters/#adapters","text":"Adapters are an optional feature to transform data before adding to or querying from a collection. Adapters provide a customizable and modular way to express data transformations and make interacting with collections more ergonomic. Additionally, adapter transformations are applied lazily and can internally batch operations which can make them more memory and CPU efficient compared to manually executing transforms.","title":"Adapters"},{"location":"concepts_adapters/#example","text":"As an example, we'll create a collection with an adapter that chunks text into paragraphs and converts each chunk into an embedding vector using the all-MiniLM-L6-v2 model. First, install vecs with optional dependencies for text embeddings: pip install \"vecs[text_embedding]\" Then create a collection with an adapter to chunk text into paragraphs and embed each paragraph using the all-MiniLM-L6-v2 384 dimensional text embedding model. import vecs from vecs.adapter import Adapter , ParagraphChunker , TextEmbedding # create vector store client vx = vecs . Client ( \"postgresql://:@:/\" ) # create a collection with an adapter docs = vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), TextEmbedding ( model = 'all-MiniLM-L6-v2' ), ] ) ) With the adapter registered against the collection, we can upsert records into the collection passing in text rather than vectors. # add records to the collection using text as the media type docs . upsert ( records = [ ( \"vec0\" , \"four score and ....\" , # <- note that we can now pass text here { \"year\" : 1973 } ), ( \"vec1\" , \"hello, world!\" , { \"year\" : \"2012\" } ) ] ) Similarly, we can query the collection using text. # search by text docs . query ( data = \"foo bar\" ) In summary, Adapter s allow you to work with a collection as though they store your prefered data type natively.","title":"Example:"},{"location":"concepts_adapters/#built-in-adapters","text":"vecs provides several built-in Adapters. ParagraphChunker TextEmbedding Have an idea for a useful adapter? Open an issue requesting it.","title":"Built-in Adapters"},{"location":"concepts_adapters/#paragraphchunker","text":"The ParagraphChunker AdapterStep splits text media into paragraphs and yields each paragraph as a separate record. That can be a useful preprocessing step when upserting large documents that contain multiple paragraphs. The ParagraphChunker delimits paragraphs by two consecutive line breaks \\n\\n . ParagrphChunker is a pre-preocessing step and must be used in combination with another adapter step like TextEmbedding to transform the chunked text into a vector. from vecs.adapter import Adapter , ParagraphChunker ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ ParagraphChunker ( skip_during_query = True ), ... ] ) ) When querying the collection, you probably do not want to chunk the text. To skip text chunking during queries, set the skip_during_query argument to True . Setting skip_during_query to False will raise an exception if the input text contains more than one paragraph.","title":"ParagraphChunker"},{"location":"concepts_adapters/#textembedding","text":"The TextEmbedding AdapterStep accepts text and converts it into a vector that can be consumed by the Collection . TextEmbedding supports all models available in the sentence_transformers package. A complete list of supported models is available in vecs.adapter.TextEmbeddingModel . from vecs.adapter import Adapter , TextEmbedding ... vx . get_or_create_collection ( name = \"docs\" , adapter = Adapter ( [ TextEmbedding ( model = 'all-MiniLM-L6-v2' ) ] ) ) # search by text docs . query ( data = \"foo bar\" )","title":"TextEmbedding"},{"location":"concepts_adapters/#interface","text":"Adapters are objects that take in data in the form of Iterable[Tuple[str, Any, Optional[Dict]]] where Tuple[str, Any, Optional[Dict]]] represents records of (id, media, metadata) . The main use of Adapters is to transform the media part of the records into a form that is ready to be ingested into the collection (like converting text into embeddings). However, Adapters can also modify the id or metadata if required. Due to the common interface, adapters may be comprised of multiple adapter steps to create multi-stage preprocessing pipelines. For example, a multi-step adapter might first convert text into chunks and then convert each text chunk into an embedding vector.","title":"Interface"},{"location":"concepts_collections/","text":"Collections A collection is an group of vector records. Records can be added to or updated in a collection. Collections can be queried at any time, but should be indexed for scalable query performance. Each vector record has the form: Record ( id : String vec : Numeric [] metadata : JSON ) For example: ( \"vec1\" , [ 0.1 , 0.2 , 0.3 ], { \"year\" : 1990 }) Underneath every vecs a collection is Postgres table create table < collection_name > ( id string primary key , vec vector ( < dimension > ), metadata jsonb ) where rows in the table map 1:1 with vecs vector records. It is safe to select collection tables from outside the vecs client but issuing DDL is not recommended.","title":"Collections"},{"location":"concepts_collections/#collections","text":"A collection is an group of vector records. Records can be added to or updated in a collection. Collections can be queried at any time, but should be indexed for scalable query performance. Each vector record has the form: Record ( id : String vec : Numeric [] metadata : JSON ) For example: ( \"vec1\" , [ 0.1 , 0.2 , 0.3 ], { \"year\" : 1990 }) Underneath every vecs a collection is Postgres table create table < collection_name > ( id string primary key , vec vector ( < dimension > ), metadata jsonb ) where rows in the table map 1:1 with vecs vector records. It is safe to select collection tables from outside the vecs client but issuing DDL is not recommended.","title":"Collections"},{"location":"concepts_indexes/","text":"Indexes Indexes are tools for optimizing query performance of a collection . Collections can be queried without an index, but that will emit a python warning and should never be done in production. query does not have a covering index for cosine_similarity. See Collection.create_index As each query vector must be checked against every record in the collection. When the number of dimensions and/or number of records becomes large, that becomes extremely slow and computationally expensive. An index is a heuristic datastructure that pre-computes distances among key points in the vector space. It is smaller and can be traversed more quickly than the whole collection enabling much more performant seraching. Only one index may exist per-collection. An index optimizes a collection for searching according to a selected distance measure. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , ass them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Indexes"},{"location":"concepts_indexes/#indexes","text":"Indexes are tools for optimizing query performance of a collection . Collections can be queried without an index, but that will emit a python warning and should never be done in production. query does not have a covering index for cosine_similarity. See Collection.create_index As each query vector must be checked against every record in the collection. When the number of dimensions and/or number of records becomes large, that becomes extremely slow and computationally expensive. An index is a heuristic datastructure that pre-computes distances among key points in the vector space. It is smaller and can be traversed more quickly than the whole collection enabling much more performant seraching. Only one index may exist per-collection. An index optimizes a collection for searching according to a selected distance measure. To create an index: docs . create_index () You may optionally provide a distance measure and index method. Available options for distance measure are: vecs.IndexMeasure.cosine_distance vecs.IndexMeasure.l2_distance vecs.IndexMeasure.max_inner_product which correspond to different methods for comparing query vectors to the vectors in the database. If you aren't sure which to use, the default of cosine_distance is the most widely compatible with off-the-shelf embedding methods. Available options for index method are: vecs.IndexMethod.auto vecs.IndexMethod.hnsw vecs.IndexMethod.ivfflat Where auto selects the best available index method, hnsw uses the HNSW method and ivfflat uses IVFFlat . When using IVFFlat indexes, the index must be created after the collection has been populated with records. Building an IVFFlat index on an empty collection will result in significantly reduced recall. You can continue upserting new documents after the index has been created, but should rebuild the index if the size of the collection more than doubles since the last index operation. HNSW indexes can be created immediately after the collection without populating records. To manually specify method and measure , ass them as arguments to create_index for example: docs . create_index ( method = IndexMethod . hnsw , measure = IndexMeasure . cosine_distance , ) Note The time required to create an index grows with the number of records and size of vectors. For a few thousand records expect sub-minute a response in under a minute. It may take a few minutes for larger collections.","title":"Indexes"},{"location":"concepts_metadata/","text":"Metadata vecs allows you to associate key-value pairs of metadata with indexes and ids in your collections. You can then add filters to queries that reference the metadata metadata. Types Metadata is stored as binary JSON. As a result, allowed metadata types are drawn from JSON primitive types. Boolean String Number The technical limit of a metadata field associated with a vector is 1GB. In practice you should keep metadata fields as small as possible to maximize performance. Metadata Query Language The metadata query language is based loosely on mongodb's selectors . vecs currently supports a subset of those operators. Comparison Operators Comparison operators compare a provided value with a value stored in metadata field of the vector store. Operator Description $eq Matches values that are equal to a specified value $ne Matches values that are not equal to a specified value $gt Matches values that are greater than a specified value $gte Matches values that are greater than or equal to a specified value $lt Matches values that are less than a specified value $lte Matches values that are less than or equal to a specified value $in Matches values that are contained by scalar list of specified values Logical Operators Logical operators compose other operators, and can be nested. Operator Description $and Joins query clauses with a logical AND returns all documents that match the conditions of both clauses. $or Joins query clauses with a logical OR returns all documents that match the conditions of either clause. Performance For best performance, use scalar key-value pairs for metadata and prefer $eq , $and and $or filters where possible. Those variants are most consistently able to make use of indexes. Examples year equals 2020 { \"year\" : { \"$eq\" : 2020 }} year equals 2020 or gross greater than or equal to 5000.0 { \"$or\" : [ { \"year\" : { \"$eq\" : 2020 }}, { \"gross\" : { \"$gte\" : 5000.0 }} ] } last_name is less than \"Brown\" and is_priority_customer is true { \"$and\" : [ { \"last_name\" : { \"$lt\" : \"Brown\" }}, { \"is_priority_customer\" : { \"$gte\" : 5000.00 }} ] } priority contained by [\"enterprise\", \"pro\"] { \"priority\" : { \"$in\" : [ \"enterprise\" , \"pro\" ]} }","title":"Metadata"},{"location":"concepts_metadata/#metadata","text":"vecs allows you to associate key-value pairs of metadata with indexes and ids in your collections. You can then add filters to queries that reference the metadata metadata.","title":"Metadata"},{"location":"concepts_metadata/#types","text":"Metadata is stored as binary JSON. As a result, allowed metadata types are drawn from JSON primitive types. Boolean String Number The technical limit of a metadata field associated with a vector is 1GB. In practice you should keep metadata fields as small as possible to maximize performance.","title":"Types"},{"location":"concepts_metadata/#metadata-query-language","text":"The metadata query language is based loosely on mongodb's selectors . vecs currently supports a subset of those operators.","title":"Metadata Query Language"},{"location":"concepts_metadata/#comparison-operators","text":"Comparison operators compare a provided value with a value stored in metadata field of the vector store. Operator Description $eq Matches values that are equal to a specified value $ne Matches values that are not equal to a specified value $gt Matches values that are greater than a specified value $gte Matches values that are greater than or equal to a specified value $lt Matches values that are less than a specified value $lte Matches values that are less than or equal to a specified value $in Matches values that are contained by scalar list of specified values","title":"Comparison Operators"},{"location":"concepts_metadata/#logical-operators","text":"Logical operators compose other operators, and can be nested. Operator Description $and Joins query clauses with a logical AND returns all documents that match the conditions of both clauses. $or Joins query clauses with a logical OR returns all documents that match the conditions of either clause.","title":"Logical Operators"},{"location":"concepts_metadata/#performance","text":"For best performance, use scalar key-value pairs for metadata and prefer $eq , $and and $or filters where possible. Those variants are most consistently able to make use of indexes.","title":"Performance"},{"location":"concepts_metadata/#examples","text":"year equals 2020 { \"year\" : { \"$eq\" : 2020 }} year equals 2020 or gross greater than or equal to 5000.0 { \"$or\" : [ { \"year\" : { \"$eq\" : 2020 }}, { \"gross\" : { \"$gte\" : 5000.0 }} ] } last_name is less than \"Brown\" and is_priority_customer is true { \"$and\" : [ { \"last_name\" : { \"$lt\" : \"Brown\" }}, { \"is_priority_customer\" : { \"$gte\" : 5000.00 }} ] } priority contained by [\"enterprise\", \"pro\"] { \"priority\" : { \"$in\" : [ \"enterprise\" , \"pro\" ]} }","title":"Examples"},{"location":"hosting/","text":"Deployment vecs is comatible with any Postgres 13+ with the pgvector extension installed. In the following we show we show instructions for hosting a database on Supabase and locally in docker since both are fast and free. Supabase Cloud Hosted Create an account Create a supabase account at https://app.supabase.com/sign-up . Create a new project Select New Project Complete the prompts. Be sure to remember or write down your password as we'll need that when connecting with vecs. Connection Info On the project page, navigate to Settings > Database > Database Settings and substitue those fields into the conenction string postgresql://:@:/ i.e. postgres://postgres:[YOUR PASSWORD]@db.cvykdyhlwwwojivopztl.supabase.co:5432/postgres Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide , Local You can also use Supabase locally on your machine. Doing so will keep your project setup consistent when deploying to hosted Supabase. Install the CLI To install the CLI, use the relevant system instructions below macOS Windows Linux npm brew install supabase/tap/supabase scoop bucket add supabase https://github.com/supabase/scoop-bucket.git scoop install supabase Linux packages are provided in Releases. To install, download the .apk/.deb/.rpm file depending on your package manager and run one of the following: sudo apk add --allow-untrusted <...>.apk or sudo dpkg -i <...>.deb or sudo rpm -i <...>.rpm npm install supabase --save-dev Start the Project From your project directory, create the supabase/ sub-directory required for supabase projects by running: supabase init next start the application using: supabase start which will download the latest Supabase containers and provide a URL to each service: Seeding data supabase/seed.sql...me... Started supabase local development setup. API URL: http://localhost:54321 GraphQL URL: http://localhost:54321/graphql/v1 DB URL: postgresql://postgres:postgres@localhost:54322/postgres Studio URL: http://localhost:54323 Inbucket URL: http://localhost:54324 JWT secret: super-secret-jwt-token-with-at-least-32-characters-long anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFz service_role key: eyJhbGciOiJIUzI1NiIsInR5cClJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU The service we need for vecs is DB URL . Note it down for use as our DB_CONNECTION postgresql://:@:/ For more info on running a local Supabase project, checkout the Supabase CLI guide Docker Install docker if you don't have it already at Get Docker Start the Postgres Container Next, run docker run --rm -d \\ --name vecs_hosting_guide \\ -p 5019 :5432 \\ -e POSTGRES_DB = vecs_db \\ -e POSTGRES_PASSWORD = password \\ -e POSTGRES_USER = postgres \\ supabase/postgres:15.1.0.74 Connection Info Substitue the values from the previous section into the postgres conenction string postgresql://:@:/ i.e. postgresql://postgres:password@localhost:5019/vecs_db Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide","title":"Hosting"},{"location":"hosting/#deployment","text":"vecs is comatible with any Postgres 13+ with the pgvector extension installed. In the following we show we show instructions for hosting a database on Supabase and locally in docker since both are fast and free.","title":"Deployment"},{"location":"hosting/#supabase","text":"","title":"Supabase"},{"location":"hosting/#cloud-hosted","text":"","title":"Cloud Hosted"},{"location":"hosting/#create-an-account","text":"Create a supabase account at https://app.supabase.com/sign-up .","title":"Create an account"},{"location":"hosting/#create-a-new-project","text":"Select New Project Complete the prompts. Be sure to remember or write down your password as we'll need that when connecting with vecs.","title":"Create a new project"},{"location":"hosting/#connection-info","text":"On the project page, navigate to Settings > Database > Database Settings and substitue those fields into the conenction string postgresql://:@:/ i.e. postgres://postgres:[YOUR PASSWORD]@db.cvykdyhlwwwojivopztl.supabase.co:5432/postgres Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide ,","title":"Connection Info"},{"location":"hosting/#local","text":"You can also use Supabase locally on your machine. Doing so will keep your project setup consistent when deploying to hosted Supabase.","title":"Local"},{"location":"hosting/#install-the-cli","text":"To install the CLI, use the relevant system instructions below macOS Windows Linux npm brew install supabase/tap/supabase scoop bucket add supabase https://github.com/supabase/scoop-bucket.git scoop install supabase Linux packages are provided in Releases. To install, download the .apk/.deb/.rpm file depending on your package manager and run one of the following: sudo apk add --allow-untrusted <...>.apk or sudo dpkg -i <...>.deb or sudo rpm -i <...>.rpm npm install supabase --save-dev","title":"Install the CLI"},{"location":"hosting/#start-the-project","text":"From your project directory, create the supabase/ sub-directory required for supabase projects by running: supabase init next start the application using: supabase start which will download the latest Supabase containers and provide a URL to each service: Seeding data supabase/seed.sql...me... Started supabase local development setup. API URL: http://localhost:54321 GraphQL URL: http://localhost:54321/graphql/v1 DB URL: postgresql://postgres:postgres@localhost:54322/postgres Studio URL: http://localhost:54323 Inbucket URL: http://localhost:54324 JWT secret: super-secret-jwt-token-with-at-least-32-characters-long anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFz service_role key: eyJhbGciOiJIUzI1NiIsInR5cClJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU The service we need for vecs is DB URL . Note it down for use as our DB_CONNECTION postgresql://:@:/ For more info on running a local Supabase project, checkout the Supabase CLI guide","title":"Start the Project"},{"location":"hosting/#docker","text":"Install docker if you don't have it already at Get Docker","title":"Docker"},{"location":"hosting/#start-the-postgres-container","text":"Next, run docker run --rm -d \\ --name vecs_hosting_guide \\ -p 5019 :5432 \\ -e POSTGRES_DB = vecs_db \\ -e POSTGRES_PASSWORD = password \\ -e POSTGRES_USER = postgres \\ supabase/postgres:15.1.0.74","title":"Start the Postgres Container"},{"location":"hosting/#connection-info_1","text":"Substitue the values from the previous section into the postgres conenction string postgresql://:@:/ i.e. postgresql://postgres:password@localhost:5019/vecs_db Keep that connection string secret and safe. Its your DB_CONNECTION in the quickstart guide","title":"Connection Info"},{"location":"integrations_huggingface_inference_endpoints/","text":"Integration: Hugging Face Inference Endpoints This guide will walk you through an example integration of the Hugging Face Inference API with vecs. We will create embeddings using Hugging Face's sentence-transformers/all-MiniLM-L6-v2 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence. Create a Hugging Face Inference Endpoint Head over to Hugging Face's inference endpoints and select New Endpoint . Configure your endpoint with your model and provider of choice. In this example we'll use sentence-transformers/all-MiniLM-L6-v2 and AWS . Under \"Advanced Configuration\" select \"Sentence Embeddings\" as the \"Task\". Then click \"Create Endpoint\" Once the endpoint starts up, take note of the Endpoint URL Tip Don't forget to pause or delete your Hugging Face Inference Endpoint when you're not using it Finally, create and copy an API key we can use to authenticate with the inference endpoint. Create an Environment Next, you need to set up your environment. You will need Python 3.7+ with the vecs and requests installed. pip install vecs requests You'll also need a Postgres Database with the pgvector extension Create Embeddings We can use the Hugging Face endpoint to create embeddings for a set of sentences. import requests import json huggingface_endpoint_url = '' huggingface_api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] records = [] for sentence in dataset : response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : sentence } ) embedding = response . json ()[ \"embeddings\" ] records . append (( sentence , embedding , {})) Store the Embeddings with vecs Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs subbing in your DB_CONNECTION string. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 384 dimensional vectors (default dimension for paraphrase-MiniLM-L6-v2) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 384 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = records ) # create an index for the 'sentences' collection sentences . create_index () Querying for Most Similar Sentences Finally, we can query vecs to find the most similar sentences to a given query sentence. The query sentence is embedded using the same method as the sentences in the dataset, then we query the sentences collection with vecs. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : query_sentence } ) query_embedding = response . json ()[ \"embeddings\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and theirdistance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.256648302882697) ('The cat sat on the mat.', 0.78635900041167) ('To be or not to be, that is the question.', 1.04114070479544)","title":"HuggingFace Inference Endpoints"},{"location":"integrations_huggingface_inference_endpoints/#integration-hugging-face-inference-endpoints","text":"This guide will walk you through an example integration of the Hugging Face Inference API with vecs. We will create embeddings using Hugging Face's sentence-transformers/all-MiniLM-L6-v2 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence.","title":"Integration: Hugging Face Inference Endpoints"},{"location":"integrations_huggingface_inference_endpoints/#create-a-hugging-face-inference-endpoint","text":"Head over to Hugging Face's inference endpoints and select New Endpoint . Configure your endpoint with your model and provider of choice. In this example we'll use sentence-transformers/all-MiniLM-L6-v2 and AWS . Under \"Advanced Configuration\" select \"Sentence Embeddings\" as the \"Task\". Then click \"Create Endpoint\" Once the endpoint starts up, take note of the Endpoint URL Tip Don't forget to pause or delete your Hugging Face Inference Endpoint when you're not using it Finally, create and copy an API key we can use to authenticate with the inference endpoint.","title":"Create a Hugging Face Inference Endpoint"},{"location":"integrations_huggingface_inference_endpoints/#create-an-environment","text":"Next, you need to set up your environment. You will need Python 3.7+ with the vecs and requests installed. pip install vecs requests You'll also need a Postgres Database with the pgvector extension","title":"Create an Environment"},{"location":"integrations_huggingface_inference_endpoints/#create-embeddings","text":"We can use the Hugging Face endpoint to create embeddings for a set of sentences. import requests import json huggingface_endpoint_url = '' huggingface_api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] records = [] for sentence in dataset : response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : sentence } ) embedding = response . json ()[ \"embeddings\" ] records . append (( sentence , embedding , {}))","title":"Create Embeddings"},{"location":"integrations_huggingface_inference_endpoints/#store-the-embeddings-with-vecs","text":"Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs subbing in your DB_CONNECTION string. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 384 dimensional vectors (default dimension for paraphrase-MiniLM-L6-v2) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 384 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = records ) # create an index for the 'sentences' collection sentences . create_index ()","title":"Store the Embeddings with vecs"},{"location":"integrations_huggingface_inference_endpoints/#querying-for-most-similar-sentences","text":"Finally, we can query vecs to find the most similar sentences to a given query sentence. The query sentence is embedded using the same method as the sentences in the dataset, then we query the sentences collection with vecs. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = requests . post ( huggingface_endpoint_url , headers = { \"Authorization\" : f \"Bearer { huggingface_api_key } \" , \"Content-Type\" : \"application/json\" }, json = { \"inputs\" : query_sentence } ) query_embedding = response . json ()[ \"embeddings\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and theirdistance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.256648302882697) ('The cat sat on the mat.', 0.78635900041167) ('To be or not to be, that is the question.', 1.04114070479544)","title":"Querying for Most Similar Sentences"},{"location":"integrations_openai/","text":"Integration: Open AI This guide will walk you through an example integration of the OpenAI API with the vecs Python library. We will create embeddings using OpenAI's text-embedding-ada-002 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence. Create an Environment First, you need to set up your environment. You will need Python 3.7 with the vecs and openai libraries installed. You can install the necessary Python libraries using pip: pip install vecs openai You'll also need: An OpenAI API Key A Postgres Database with the pgvector extension Create Embeddings Next, we will use OpenAI's text-embedding-ada-002 model to create embeddings for a set of sentences. import openai openai . api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] embeddings = [] for sentence in dataset : response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ sentence ] ) embeddings . append (( sentence , response [ \"data\" ][ 0 ][ \"embedding\" ], {})) Store the Embeddings with vecs Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 1536 dimensional vectors (default dimension for text-embedding-ada-002) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 1536 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = embeddings ) # create an index for the 'sentences' collection sentences . create_index () Querying for Most Similar Sentences Finally, we can query vecs to find the most similar sentences to a given query sentence. We will first need to create an embedding for the query sentence using the text-embedding-ada-002 model. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ query_sentence ] ) query_embedding = response [ \"data\" ][ 0 ][ \"embedding\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and their distance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.0633971456300456) ('The cat sat on the mat.', 0.16474785399561) ('To be or not to be, that is the question.', 0.24531234467506)","title":"OpenAI"},{"location":"integrations_openai/#integration-open-ai","text":"This guide will walk you through an example integration of the OpenAI API with the vecs Python library. We will create embeddings using OpenAI's text-embedding-ada-002 model, insert these embeddings into a PostgreSQL database using vecs, and then query vecs to find the most similar sentences to a given query sentence.","title":"Integration: Open AI"},{"location":"integrations_openai/#create-an-environment","text":"First, you need to set up your environment. You will need Python 3.7 with the vecs and openai libraries installed. You can install the necessary Python libraries using pip: pip install vecs openai You'll also need: An OpenAI API Key A Postgres Database with the pgvector extension","title":"Create an Environment"},{"location":"integrations_openai/#create-embeddings","text":"Next, we will use OpenAI's text-embedding-ada-002 model to create embeddings for a set of sentences. import openai openai . api_key = '' dataset = [ \"The cat sat on the mat.\" , \"The quick brown fox jumps over the lazy dog.\" , \"Friends, Romans, countrymen, lend me your ears\" , \"To be or not to be, that is the question.\" , ] embeddings = [] for sentence in dataset : response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ sentence ] ) embeddings . append (( sentence , response [ \"data\" ][ 0 ][ \"embedding\" ], {}))","title":"Create Embeddings"},{"location":"integrations_openai/#store-the-embeddings-with-vecs","text":"Now that we have our embeddings, we can insert them into a PostgreSQL database using vecs. import vecs DB_CONNECTION = \"postgresql://:@:/\" # create vector store client vx = vecs . Client ( DB_CONNECTION ) # create a collection named 'sentences' with 1536 dimensional vectors (default dimension for text-embedding-ada-002) sentences = vx . get_or_create_collection ( name = \"sentences\" , dimension = 1536 ) # upsert the embeddings into the 'sentences' collection sentences . upsert ( records = embeddings ) # create an index for the 'sentences' collection sentences . create_index ()","title":"Store the Embeddings with vecs"},{"location":"integrations_openai/#querying-for-most-similar-sentences","text":"Finally, we can query vecs to find the most similar sentences to a given query sentence. We will first need to create an embedding for the query sentence using the text-embedding-ada-002 model. query_sentence = \"A quick animal jumps over a lazy one.\" # create an embedding for the query sentence response = openai . Embedding . create ( model = \"text-embedding-ada-002\" , input = [ query_sentence ] ) query_embedding = response [ \"data\" ][ 0 ][ \"embedding\" ] # query the 'sentences' collection for the most similar sentences results = sentences . query ( data = query_embedding , limit = 3 , include_value = True ) # print the results for result in results : print ( result ) Returns the most similar 3 records and their distance to the query vector. ('The quick brown fox jumps over the lazy dog.', 0.0633971456300456) ('The cat sat on the mat.', 0.16474785399561) ('To be or not to be, that is the question.', 0.24531234467506)","title":"Querying for Most Similar Sentences"},{"location":"support_changelog/","text":"Changelog 0.1.0 Initial release 0.2.7 Feature: Added vecs.Collection.disconnect() to drop database connection Feature: vecs.Client can be used as a context maanger to auto-close connections Feature: Uses (indexed) containment operator @> for metadata equality filters where possible Docs: Added docstrings to all methods, functions and modules 0.3.0 Feature: Collections can have adapters allowing upserting/querying by native media t types Breaking Change: Renamed argument Collection.upsert(vectors, ...) to Collection.upsert(records, ...) in support of adapters Breaking Change: Renamed argument Collection.query(query_vector, ...) to Collection.query(data, ...) in support of adapters 0.3.1 Feature: Metadata filtering with $in 0.4.0 Feature: pgvector 0.5.0 Feature: HNSW index support 0.4.1 Bugfix: removed errant print statement master","title":"Changelog"},{"location":"support_changelog/#changelog","text":"","title":"Changelog"},{"location":"support_changelog/#010","text":"Initial release","title":"0.1.0"},{"location":"support_changelog/#027","text":"Feature: Added vecs.Collection.disconnect() to drop database connection Feature: vecs.Client can be used as a context maanger to auto-close connections Feature: Uses (indexed) containment operator @> for metadata equality filters where possible Docs: Added docstrings to all methods, functions and modules","title":"0.2.7"},{"location":"support_changelog/#030","text":"Feature: Collections can have adapters allowing upserting/querying by native media t types Breaking Change: Renamed argument Collection.upsert(vectors, ...) to Collection.upsert(records, ...) in support of adapters Breaking Change: Renamed argument Collection.query(query_vector, ...) to Collection.query(data, ...) in support of adapters","title":"0.3.0"},{"location":"support_changelog/#031","text":"Feature: Metadata filtering with $in","title":"0.3.1"},{"location":"support_changelog/#040","text":"Feature: pgvector 0.5.0 Feature: HNSW index support","title":"0.4.0"},{"location":"support_changelog/#041","text":"Bugfix: removed errant print statement","title":"0.4.1"},{"location":"support_changelog/#master","text":"","title":"master"}]}
\ No newline at end of file
diff --git a/0.4/sitemap.xml.gz b/0.4/sitemap.xml.gz
index 94b9d1bc0c32df87300524a1f1f173d2e8c6e5d5..37b1568d1fdc731b1e8dc9508761af1f838b187d 100644
GIT binary patch
delta 15
WcmdnTw2z5RzMF$%dG1Cw3q}AXFa$*a
delta 15
WcmdnTw2z5RzMF$%d)7ua3q}AXfdo