From 8fbff6956d2b018e66740b447b6181e1e326fbd3 Mon Sep 17 00:00:00 2001 From: Oscar Laird Date: Sun, 7 Apr 2024 16:46:27 -0400 Subject: [PATCH] medium_create test [skip ci] --- .github/workflows/build.yml | 2 +- src/pinecone/pinecone.c | 2 +- src/pinecone/pinecone_build.c | 2 - test/data/cities_coordinates.csv | 52 +++++++++++++++++++ test/expected/pinecone_medium_create.out | 65 ++++++++++++++++++++++++ test/sql/pinecone_medium_create.sql | 61 ++++++++++++++++++++++ 6 files changed, 180 insertions(+), 4 deletions(-) create mode 100644 test/data/cities_coordinates.csv create mode 100644 test/expected/pinecone_medium_create.out create mode 100644 test/sql/pinecone_medium_create.sql diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 47ab5016..3f6ed640 100755 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,7 +52,7 @@ jobs: - run: psql test -c 'alter database test set enable_seqscan = off' # setup the database for testing - - run: make installcheck REGRESS=pinecone_crud REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" + - run: make installcheck REGRESS=pinecone_crud,pinecone_medium_create REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" - if: ${{ failure() }} run: cat regression.diffs # mac: diff --git a/src/pinecone/pinecone.c b/src/pinecone/pinecone.c index 970aade7..719a4da8 100644 --- a/src/pinecone/pinecone.c +++ b/src/pinecone/pinecone.c @@ -52,7 +52,7 @@ void PineconeInit(void) 0, NULL, NULL, NULL); // todo: you can have a check_hook that checks that the api key is valid. DefineCustomIntVariable("pinecone.top_k", "Pinecone top k", "Pinecone top k", &pinecone_top_k, - 10000, 1, 10000, + 500, 1, 10000, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomIntVariable("pinecone.vectors_per_request", "Pinecone vectors per request", "Pinecone vectors per request", diff --git a/src/pinecone/pinecone_build.c b/src/pinecone/pinecone_build.c index 11857864..1bc3f8dc 100644 --- a/src/pinecone/pinecone_build.c +++ b/src/pinecone/pinecone_build.c @@ -97,8 +97,6 @@ IndexBuildResult *pinecone_build(Relation heap, Relation index, IndexInfo *index // i.e. describe stats is equal to result->index_tuples index_stats_response = pinecone_get_index_stats(pinecone_api_key, host); while (cJSON_GetObjectItemCaseSensitive(index_stats_response, "totalVectorCount")->valueint < result->index_tuples) { - elog(DEBUG1, "Waiting for remote index to process vectors..."); - elog(DEBUG1, "Got response: %s", cJSON_Print(index_stats_response)); sleep(1); index_stats_response = pinecone_get_index_stats(pinecone_api_key, host); } diff --git a/test/data/cities_coordinates.csv b/test/data/cities_coordinates.csv new file mode 100644 index 00000000..234ece2a --- /dev/null +++ b/test/data/cities_coordinates.csv @@ -0,0 +1,52 @@ +City,Coordinates +Tokyo,"[-0.6193558309386907, 0.5254062107056782, 0.5833923794789785]" +Delhi,"[0.19435560415000272, 0.8560818011736064, 0.47890484319417903]" +Shanghai,"[-0.44644821198103857, 0.7292884746553274, 0.5184807756839587]" +São Paulo,"[0.6294715843131387, -0.6664229635353052, -0.3995572026820798]" +Mumbai,"[0.278244917074267, 0.9031982689844369, 0.3268220510091861]" +Mexico City,"[-0.14968789524176762, -0.9310777308550825, 0.3326977504039369]" +Beijing,"[-0.3411764480414554, 0.6870726709775106, 0.6415058659884925]" +Osaka,"[-0.5864623826768065, 0.5762687036221328, 0.5691891205308738]" +Cairo,"[0.7401559856285986, 0.44888496431360186, 0.5006709555699573]" +New York,"[0.2088536810457709, -0.7286473179008321, 0.6522677563926911]" +Dhaka,"[-0.006586657431747957, 0.91486339779959, 0.4037097711357327]" +Karachi,"[0.354507054092229, 0.8352108639823966, 0.4204135598248799]" +Buenos Aires,"[0.4315177558190849, -0.7009176397609634, -0.567896899696398]" +Kolkata,"[0.026364235705609956, 0.9230174440008866, 0.3838537809449442]" +Istanbul,"[0.6601396178172725, 0.365596100123556, 0.6561670340412662]" +Tokyo,"[-0.6193558309386907, 0.5254062107056782, 0.5833923794789785]" +Lagos,"[0.9917961095142174, 0.05856223284587094, 0.11362632633579398]" +Manila,"[-0.4981794402215084, 0.8296279535356087, 0.2520609133779023]" +Rio de Janeiro,"[0.6717797619406024, -0.6302455143763187, -0.38923327588421447]" +Guangzhou,"[-0.36322752510262685, 0.8448494529068836, 0.3928042348686684]" +Los Angeles,"[-0.39207823524685376, -0.7298855539254963, 0.5599479758125608]" +Moscow,"[0.4457345288017871, 0.3434762508502159, 0.8266467171263238]" +Kinshasa,"[0.9618148272488081, 0.26251480508538944, -0.07744814520261718]" +Tianjin,"[-0.3554399619399389, 0.6868394730036702, 0.6339668538497699]" +Paris,"[0.6573914875834973, 0.02700348223283558, 0.7530652322342716]" +Lima,"[0.21928549762040267, -0.953077442659091, -0.20870375853774067]" +Bangkok,"[-0.1770382724720343, 0.955045592646577, 0.2377926955279878]" +London,"[0.6224120058502514, -0.0013883114418182503, 0.7826885508072845]" +Hong Kong,"[-0.3787616617717799, 0.8439881673099093, 0.3797677935432919]" +Bangalore,"[0.20934529171928778, 0.9517292892867235, 0.2244680573016001]" +Bangkok,"[-0.1770382724720343, 0.955045592646577, 0.2377926955279878]" +Taipei,"[-0.4742987689507612, 0.7720058675658211, 0.42314018742697607]" +Jakarta,"[-0.2880937837254856, 0.9514752266498772, -0.10815204505832528]" +Chongqing,"[-0.24777637154613263, 0.8337690305784, 0.4933924131477784]" +Seoul,"[-0.47678331846746586, 0.6332185611357113, 0.6096818195291462]" +Wuhan,"[-0.3543093330690614, 0.7845073905881568, 0.5089332476982792]" +Chennai,"[0.16460709515534913, 0.9600348516560672, 0.22635721289641045]" +Lahore,"[0.22997772175084677, 0.8205682613791028, 0.5232379725474505]" +Berlin,"[0.5919066962046825, 0.1410666519079721, 0.7935657897789777]" +Johannesburg,"[0.7918562140394597, 0.42187630895285566, -0.4415700581237456]" +Baghdad,"[0.5974032731711619, 0.5843284824610712, 0.5492445300493429]" +Toronto,"[0.1333030708849013, -0.7111453634564938, 0.6902916509179191]" +Santiago,"[0.27619576123825035, -0.7873386304102029, -0.5511930537822562]" +Kuala Lumpur,"[-0.2022594820737603, 0.9777998826332261, 0.05475848275567348]" +San Francisco,"[-0.4237560024915061, -0.6672331382519411, 0.6125608456070996]" +Philadelphia,"[0.19626869021140214, -0.7410244858238303, 0.6421536519028982]" +Quito,"[0.19991762244096672, -0.9798076330348093, -0.003153804730118826]" +Montreal,"[0.19827347318790306, -0.6722587006119277, 0.7132712452367391]" +Sydney,"[-0.7276762684093141, 0.39988961696487235, -0.5572930491559926]" +Singapore,"[-0.2388025339893586, 0.9707814168790945, 0.023596406558605315]" +Dubai,"[0.5148220270520575, 0.7433934389654809, 0.4269947017997836]" \ No newline at end of file diff --git a/test/expected/pinecone_medium_create.out b/test/expected/pinecone_medium_create.out new file mode 100644 index 00000000..234020b8 --- /dev/null +++ b/test/expected/pinecone_medium_create.out @@ -0,0 +1,65 @@ +-- SETUP +-- suppress output +\o /dev/null +-- logging level +SET client_min_messages = 'notice'; +-- flush each vector individually +SET pinecone.vectors_per_request = 5; +SET pinecone.requests_per_batch = 5; +-- disable flat scan to force use of the index +SET enable_seqscan = off; +-- Testing database is responsible for initializing the mock table with +-- SELECT pinecone_create_mock_table(); +DELETE FROM pinecone_mock; +-- CREATE TABLE +DROP TABLE IF EXISTS cities; +CREATE TABLE cities (name text, coords vector(3)); +\o +-- COPY FROM CSV +copy cities(name, coords) +from '/home/oscar/cities_coordinates.csv' +delimiter ',' +csv header; +-- CREATE INDEX +-- mock create index +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://api.pinecone.io/indexes', 'POST', $${ + "name": "invalid", + "metric": "euclidean", + "dimension": 3, + "status": { + "ready": true, + "state": "Ready" + }, + "host": "fakehost", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-west-2" + } + } +}$$); +-- mock describe index stats +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":51}'); +-- mock upsert +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', 'POST', '{"upsertedCount":5}'); +-- create index +CREATE INDEX i2 ON cities USING pinecone (coords) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); +-- SELECT +-- select from table +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', '{"results":[],"matches":[{"id":"000000000016","score":0.34670651,"values":[]},{"id":"000000000027","score":0.412868381,"values":[]},{"id":"00000000001c","score":0.434622884,"values":[]},{"id":"000000000019","score":0.493869543,"values":[]},{"id":"000000000030","score":0.57345736,"values":[]},{"id":"00000000002a","score":0.619416595,"values":[]},{"id":"00000000000f","score":0.68766582,"values":[]},{"id":"00000000000a","score":0.695464492,"values":[]}]}'); +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/fetch', 'GET', '{"code": 3, "message": "No IDs provided for fetch query", "details": [] }'); +SELECT name,coords<->'[0,0,1]' as dist FROM cities ORDER BY coords <-> '[0, 0, 1]' limit 5; + name | dist +----------+-------------------- + Moscow | 0.5888179423173123 + Berlin | 0.6425483719914729 + London | 0.6592593674715698 + Paris | 0.7027585028110576 + Montreal | 0.7572697534412217 +(5 rows) + diff --git a/test/sql/pinecone_medium_create.sql b/test/sql/pinecone_medium_create.sql new file mode 100644 index 00000000..fd40921d --- /dev/null +++ b/test/sql/pinecone_medium_create.sql @@ -0,0 +1,61 @@ + + +-- SETUP +-- suppress output +\o /dev/null +-- logging level +SET client_min_messages = 'notice'; +-- flush each vector individually +SET pinecone.vectors_per_request = 5; +SET pinecone.requests_per_batch = 5; +-- disable flat scan to force use of the index +SET enable_seqscan = off; +-- Testing database is responsible for initializing the mock table with +-- SELECT pinecone_create_mock_table(); +DELETE FROM pinecone_mock; +-- CREATE TABLE +DROP TABLE IF EXISTS cities; +CREATE TABLE cities (name text, coords vector(3)); +\o + +-- COPY FROM CSV +copy cities(name, coords) +from '/home/oscar/cities_coordinates.csv' +delimiter ',' +csv header; + +-- CREATE INDEX +-- mock create index +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://api.pinecone.io/indexes', 'POST', $${ + "name": "invalid", + "metric": "euclidean", + "dimension": 3, + "status": { + "ready": true, + "state": "Ready" + }, + "host": "fakehost", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-west-2" + } + } +}$$); +-- mock describe index stats +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":51}'); +-- mock upsert +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', 'POST', '{"upsertedCount":5}'); +-- create index +CREATE INDEX i2 ON cities USING pinecone (coords) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); + +-- SELECT +-- select from table +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', '{"results":[],"matches":[{"id":"000000000016","score":0.34670651,"values":[]},{"id":"000000000027","score":0.412868381,"values":[]},{"id":"00000000001c","score":0.434622884,"values":[]},{"id":"000000000019","score":0.493869543,"values":[]},{"id":"000000000030","score":0.57345736,"values":[]},{"id":"00000000002a","score":0.619416595,"values":[]},{"id":"00000000000f","score":0.68766582,"values":[]},{"id":"00000000000a","score":0.695464492,"values":[]}]}'); +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/fetch', 'GET', '{"code": 3, "message": "No IDs provided for fetch query", "details": [] }'); +SELECT name,coords<->'[0,0,1]' as dist FROM cities ORDER BY coords <-> '[0, 0, 1]' limit 5;