From e535d9501a3fcd0c57bf1bb5e2b6f05b870c1554 Mon Sep 17 00:00:00 2001 From: Ankith Reddy Chitti Date: Tue, 2 Apr 2024 15:54:01 -0400 Subject: [PATCH 1/5] Fix Makefile for MacOS, remove redundant log from crud test --- Makefile | 2 +- test/expected/pinecone_crud.out | 2 -- test/sql/pinecone_crud.sql | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index f4452634..ba44109f 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ OPTFLAGS = -march=native -O0 -fno-strict-aliasing -DPINECONE_MOCK -g ifeq ($(shell uname -s), Darwin) ifeq ($(shell uname -p), arm) # no difference with -march=armv8.5-a - OPTFLAGS = + OPTFLAGS = -O0 -fno-strict-aliasing -DPINECONE_MOCK -g endif endif diff --git a/test/expected/pinecone_crud.out b/test/expected/pinecone_crud.out index 4c3f1cea..c360ab97 100644 --- a/test/expected/pinecone_crud.out +++ b/test/expected/pinecone_crud.out @@ -3,8 +3,6 @@ \o /dev/null -- apikey SET pinecone.api_key = 'fake'; --- apikey -SET pinecone.api_key = 'fake'; -- logging level SET client_min_messages = 'notice'; -- flush each vector individually diff --git a/test/sql/pinecone_crud.sql b/test/sql/pinecone_crud.sql index b44ea8fb..42979a8d 100644 --- a/test/sql/pinecone_crud.sql +++ b/test/sql/pinecone_crud.sql @@ -3,8 +3,6 @@ \o /dev/null -- apikey SET pinecone.api_key = 'fake'; --- apikey -SET pinecone.api_key = 'fake'; -- logging level SET client_min_messages = 'notice'; -- flush each vector individually From 874302eef70f9259c28ad585ce9ede9f0636be89 Mon Sep 17 00:00:00 2001 From: Chitti Ankith Date: Tue, 2 Apr 2024 16:07:11 -0400 Subject: [PATCH 2/5] Update README.md --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2dee4e8d..a30ad8ec 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # pgvector-remote -pgvector-remote is a PostgreSQL extension developed by the Georgia Tech Database Labs. It builds upon the functionality provided by pgvector, introducing seamless integration with dedicated vector stores like Pinecone, with plans to support other vendors in the future. +pgvector-remote is a PostgreSQL extension developed by the Georgia Tech Database Labs. It builds upon the functionality provided by pgvector, introducing seamless integration with dedicated remote vector stores like Pinecone, with plans to support other vendors in the future. -This extension simplifies the process of storing and retrieving vectors in vector stores while leveraging the power and familiarity of PostgreSQL. +This extension simplifies the process of storing and retrieving vectors while leveraging the power and familiarity of PostgreSQL, along with the rich functionality and performance of serverless vector stores. Supports: - exact and approximate nearest neighbor search -- Metadata filtering with vector similarity search -- L2 distance, inner product, and cosine distance -- vectors are buffered and batch-inserted into remote stores per user-defined sizes -- Seamless data integration and synchronization between pgvector and Pinecone +- metadata filtering with vector similarity search +- L2 distance, inner product, and cosine distance metrics +- buffering and batch-insertion of vectors into remote stores per user-defined sizes +- seamless data integration and synchronization between pgvector and Pinecone ## Installation @@ -319,6 +319,7 @@ Thanks to: - [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf) - [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf) - [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf) +- [Pinecone: Vector database and search service designed for real-time applications](https://docs.pinecone.io/introduction) ## Contributing From 342c2e577888c60216af99a88db799be218434cb Mon Sep 17 00:00:00 2001 From: Chitti Ankith Date: Tue, 2 Apr 2024 16:09:12 -0400 Subject: [PATCH 3/5] Update README.md Credits --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a30ad8ec..36212ba3 100644 --- a/README.md +++ b/README.md @@ -308,9 +308,9 @@ docker pull kslohith17/pgvector-remote:latest This contains postgres along with pgvector-remote configured to run on it. -## Thanks +## Credits -Thanks to: +We give special thanks to these projects, which enabled us to develop our extension: - [pgvector: Open-source vector similarity search for Postgres](https://github.com/pgvector/pgvector) - [PASE: PostgreSQL Ultra-High-Dimensional Approximate Nearest Neighbor Search Extension](https://dl.acm.org/doi/pdf/10.1145/3318464.3386131) From 522283381745d0fd88d69d9f7e761f7049083971 Mon Sep 17 00:00:00 2001 From: Shashank Suman Date: Tue, 2 Apr 2024 17:03:36 -0400 Subject: [PATCH 4/5] added missing curl library in Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index eef0c8fd..549acd5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY . /tmp/pgvector RUN apt-get update && \ apt-mark hold locales && \ - apt-get install -y --no-install-recommends build-essential postgresql-server-dev-$PG_MAJOR && \ + apt-get install -y --no-install-recommends build-essential libcurl4-openssl-dev postgresql-server-dev-$PG_MAJOR && \ cd /tmp/pgvector && \ make clean && \ make OPTFLAGS="" && \ From a12f0e843d65710bd861f39a2ee48ead4f8cf99d Mon Sep 17 00:00:00 2001 From: Chitti Ankith Date: Wed, 3 Apr 2024 15:42:52 -0400 Subject: [PATCH 5/5] Update README.md Query Options --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 36212ba3..fb2fa629 100644 --- a/README.md +++ b/README.md @@ -215,11 +215,11 @@ SELECT category_id, AVG(embedding) FROM items GROUP BY category_id; ### Query Options -pinecone.top_k: Get the top K relevant results from pinecone. -pinecone.vectors_per_request: Number of vectors per request. -pinecone.requests_per_batch: Number of requests to be sent in one batch. -The buffer size is calculated as pinecone.vectors_per_request * pinecone.requests_per_batch -pinecone.max_buffer_scan: Pinecone max buffer search +* **pinecone.top_k:** Get the top K relevant results from pinecone. +* **pinecone.vectors_per_request:** Number of vectors per request. +* **pinecone.requests_per_batch:** Number of requests to be sent in one batch. +* The buffer size is calculated as ***pinecone.vectors_per_request * pinecone.requests_per_batch*** +* **pinecone.max_buffer_scan:** Pinecone max buffer search ## Reference