Skip to content

Commit

Permalink
fixed seg fault on NULL vector
Browse files Browse the repository at this point in the history
  • Loading branch information
swetavooda committed Apr 22, 2024
1 parent ec90e20 commit 5a1f375
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/pinecone/pinecone_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ void load_buffer_into_sort(Relation index, PineconeScanOpaque so, Datum query_da
page = BufferGetPage(buf);

// add all tuples on the page to the sortstate
for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page) && n_sortedtuple > pinecone_max_buffer_scan; offno = OffsetNumberNext(offno)) {
for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page) && n_sortedtuple > pinecone_max_buffer_scan; offno = OffsetNumberNext(offno)) {
// get the tid and the vector from the heap tuple
ItemId itemid;
Item item;
Expand Down
8 changes: 7 additions & 1 deletion src/pinecone/pinecone_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ cJSON* tuple_get_pinecone_vector(TupleDesc tup_desc, Datum *values, bool *isnull
Vector *vector;
cJSON *json_values;
bool isNonZero;

// Check if the first Datum is zero, which indicates a NULL pointer/ NULL vector
if(values[0]==0) {
ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("Invalid vector: NULL vector"),
errhint("Pinecone insists that vectors cannot be NULL.")));
return NULL;
}
vector = DatumGetVector(values[0]);
isNonZero = validate_vector_nonzero(vector);
if(!isNonZero) return NULL;
Expand Down
31 changes: 20 additions & 11 deletions test/expected/pinecone_zero_vector_insert.out
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,18 @@ CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"
-- insert vectors: throws warning while flushing zero-vector
INSERT INTO t (id, val) VALUES (1, '[100,1,1]');
INSERT INTO t (id, val) VALUES (2, '[0,0,0]');
INSERT INTO t (id, val) VALUES (3, '[10120,76,1]');
INSERT INTO t (id, val) VALUES (3, NULL);
WARNING: Invalid vector: zero vector
HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine.
WARNING: No vectors to flush to pinecone
-- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone)
INSERT INTO t (id, val) VALUES (4, '[10120,76,1]');
WARNING: Invalid vector: NULL vector
HINT: Pinecone insists that vectors cannot be NULL.
WARNING: No vectors to flush to pinecone
-- returns only id = 1 as it is flushed to pinecone (zero vector not flushed to pinecone)
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
NOTICE: Buffer is too large
HINT: There are 0 tuples in the buffer that have not yet been flushed to pinecone and 2 tuples in pinecone that are not yet live. You may want to consider flushing the buffer.
HINT: There are 0 tuples in the buffer that have not yet been flushed to pinecone and 3 tuples in pinecone that are not yet live. You may want to consider flushing the buffer.
NOTICE: Reached max local scan
id | val
----+-----------
Expand All @@ -85,17 +89,19 @@ SELECT * FROM t;
----+--------------
1 | [100,1,1]
2 | [0,0,0]
3 | [10120,76,1]
(3 rows)
3 |
4 | [10120,76,1]
(4 rows)

DROP INDEX i2;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
id | val
----+--------------
2 | [0,0,0]
1 | [100,1,1]
3 | [10120,76,1]
(3 rows)
4 | [10120,76,1]
3 |
(4 rows)

DELETE FROM pinecone_mock
WHERE url_prefix = 'https://fakehost/query' AND method = 'POST';
Expand All @@ -108,7 +114,7 @@ VALUES ('https://fakehost/query', 'POST', $${
"values": []
},
{
"id": "000000000003",
"id": "000000000004",
"score": 2,
"values": []
}],
Expand All @@ -121,20 +127,23 @@ VALUES ('https://fakehost/query', 'POST', $${
CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}');
WARNING: Invalid vector: zero vector
HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine.
WARNING: Invalid vector: NULL vector
HINT: Pinecone insists that vectors cannot be NULL.
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
NOTICE: Reached max local scan
id | val
----+--------------
1 | [100,1,1]
3 | [10120,76,1]
4 | [10120,76,1]
(2 rows)

SELECT * FROM t;
id | val
----+--------------
1 | [100,1,1]
2 | [0,0,0]
3 | [10120,76,1]
(3 rows)
3 |
4 | [10120,76,1]
(4 rows)

DROP TABLE t;
9 changes: 5 additions & 4 deletions test/sql/pinecone_zero_vector_insert.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
-- SETUP
-- suppress output
\o /dev/null
delete from pinecone_mock;
-- logging level
SET client_min_messages = 'notice';
-- flush each vector individually
Expand Down Expand Up @@ -73,9 +72,11 @@ CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"
-- insert vectors: throws warning while flushing zero-vector
INSERT INTO t (id, val) VALUES (1, '[100,1,1]');
INSERT INTO t (id, val) VALUES (2, '[0,0,0]');
INSERT INTO t (id, val) VALUES (3, '[10120,76,1]');
INSERT INTO t (id, val) VALUES (3, NULL);
INSERT INTO t (id, val) VALUES (4, '[10120,76,1]');

-- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone)

-- returns only id = 1 as it is flushed to pinecone (zero vector not flushed to pinecone)
SELECT * FROM t ORDER BY val <-> '[3,3,3]';

SELECT * FROM t;
Expand All @@ -96,7 +97,7 @@ VALUES ('https://fakehost/query', 'POST', $${
"values": []
},
{
"id": "000000000003",
"id": "000000000004",
"score": 2,
"values": []
}],
Expand Down

0 comments on commit 5a1f375

Please sign in to comment.