Skip to content

Commit

Permalink
Fix #28 prompts.duckdb is not picked up unless I pass --duckdb (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
Florents-Tselai authored Jul 8, 2024
1 parent d5b2dc3 commit 07c1078
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 161 deletions.
1 change: 1 addition & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
github: [Florents-Tselai]
94 changes: 23 additions & 71 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,20 @@
[![codecov](https://codecov.io/gh/Florents-Tselai/tsellm/branch/main/graph/badge.svg)](https://codecov.io/gh/Florents-Tselai/tsellm)
[![License](https://img.shields.io/badge/BSD%20license-blue.svg)](https://github.com/Florents-Tselai/tsellm/blob/main/LICENSE)

**tsellm** is the easiest way to access LLMs through your SQLite or DuckDB database.
**tsellm** is the easiest way to access LLMs from SQLite or DuckDB.

```shell
pip install tsellm
```

```shell
usage: tsellm [-h] [--sqlite | --duckdb] [-v] [filename] [sql]

Use LLMs in SQLite and DuckDB

positional arguments:
filename SQLite/DuckDB database to open (defaults to SQLite ':memory:').
A new database is created if the file does not previously exist.
sql An SQL query to execute. Any returned rows are printed to
stdout.

options:
-h, --help show this help message and exit
--sqlite SQLite mode
--duckdb DuckDB mode
-v, --version Print tsellm version

```bash
cat <<EOF | tee >(sqlite3 prompts.sqlite3) | duckdb prompts.duckdb
CREATE TABLE prompts ( p TEXT);
INSERT INTO prompts VALUES('hello world!');
INSERT INTO prompts VALUES('how are you?');
INSERT INTO prompts VALUES('is this real life?');
INSERT INTO prompts VALUES('1+1=?');
EOF
```

Behind the scenes, **tsellm** is based on the beautiful [llm](https://llm.datasette.io) library,
Expand All @@ -42,95 +33,56 @@ For example, to access `gpt4all` models

```shell
llm install llm-gpt4all
# Then pick any gpt4all (it will be downloaded automatically the first time you use any model
tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-mini-3b-gguf2-q4_0')"
tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-2-7b')"
```

```sql
tsellm prompts.duckdb "select prompt(p, 'orca-mini-3b-gguf2-q4_0') from prompts"
tsellm prompts.sqlite3 "select prompt(p, 'orca-2-7b') from prompts"
```

## Embeddings

```shell
llm install llm-sentence-transformers
llm sentence-transformers register all-MiniLM-L12-v2
tsellm :memory: "select embed('Hello', 'sentence-transformers/all-MiniLM-L12-v2')"
```

```sql
tsellm prompts.sqlite3 "select embed(p, 'sentence-transformers/all-MiniLM-L12-v2')"
```

### Embeddings for binary (`BLOB`) columns

```shell
wget https://tselai.com/img/flo.jpg
sqlite3 images.db <<EOF
sqlite3 images.sqlite3 <<EOF
CREATE TABLE images(name TEXT, type TEXT, img BLOB);
INSERT INTO images(name,type,img) VALUES('flo','jpg',readfile('flo.jpg'));
EOF
```

```shell
llm install llm-clip
tsellm images.db "select embed(img, 'clip') from images"
```
## Examples
Things get more interesting if you
combine models in your standard queries.
First, create a db with some data.
You can easily toggle between SQLite and DuckDB,
and **tsellm** will pick this up automatically.
### SQLite
```bash
sqlite3 prompts.db <<EOF
CREATE TABLE prompts (
p TEXT
);
INSERT INTO prompts VALUES('hello world!');
INSERT INTO prompts VALUES('how are you?');
INSERT INTO prompts VALUES('is this real life?');
INSERT INTO prompts VALUES('1+1=?');
EOF
```

With a single query you can access get prompt
responses from different LLMs:
```sql
tsellm prompts.db "
select p,
prompt(p, 'orca-2-7b'),
prompt(p, 'orca-mini-3b-gguf2-q4_0'),
embed(p, 'sentence-transformers/all-MiniLM-L12-v2')
from prompts"
tsellm images.sqlite3 "select embed(img, 'clip') from images"
```

### DuckDB
### Multiple Prompts

```bash
duckdb prompts.duckdb <<EOF
CREATE TABLE prompts (
p TEXT
);
INSERT INTO prompts VALUES('hello world!');
INSERT INTO prompts VALUES('how are you?');
INSERT INTO prompts VALUES('is this real life?');
INSERT INTO prompts VALUES('1+1=?');
EOF
```
With a single query you can access get prompt
With a single query you can easily access get prompt
responses from different LLMs:

```sql
tsellm prompts.duckdb "
tsellm prompts.sqlite3 "
select p,
prompt(p, 'orca-2-7b'),
prompt(p, 'orca-mini-3b-gguf2-q4_0'),
embed(p, 'sentence-transformers/all-MiniLM-L12-v2')
from prompts"
```

## Interactive Shell

If you don't provide an SQL query,
Expand Down
80 changes: 43 additions & 37 deletions tests/test_tsellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
from llm import cli as llm_cli

from tsellm.__version__ import __version__
from tsellm.cli import (
cli,
TsellmConsole,
SQLiteConsole,
TsellmConsoleMixin,
)
from tsellm.cli import cli, TsellmConsole, SQLiteConsole, DuckDBConsole, DBSniffer


def new_tempfile():
Expand All @@ -25,17 +20,33 @@ def new_tempfile():
def new_sqlite_file():
f = new_tempfile()
with sqlite3.connect(f) as db:
db.execute("SELECT 1")
db.execute("CREATE TABLE my(x text)")
return f


def new_duckdb_file():
f = new_tempfile()
con = duckdb.connect(f.__str__())
con.sql("SELECT 1")
con.sql("CREATE TABLE my(x text)")
return f


class TestDBSniffer(unittest.TestCase):
def setUp(self):
self.sqlite_fp = new_sqlite_file()
self.duckdb_fp = new_duckdb_file()

def test_sniff_sqlite(self):
sqlite_sni = DBSniffer(self.sqlite_fp)
self.assertTrue(sqlite_sni.is_sqlite)
self.assertFalse(sqlite_sni.is_duckdb)

def test_snif_duckdb(self):
duckdb_sni = DBSniffer(self.duckdb_fp)
self.assertFalse(duckdb_sni.is_sqlite)
self.assertTrue(duckdb_sni.is_duckdb)


class TsellmConsoleTest(unittest.TestCase):
def setUp(self):
super().setUp()
Expand Down Expand Up @@ -69,23 +80,15 @@ def expect_failure(self, *args):
self.assertEqual(out, "")
return err

def test_sniff_sqlite(self):
self.assertTrue(TsellmConsoleMixin().is_sqlite(new_sqlite_file()))

def test_sniff_duckdb(self):
self.assertTrue(TsellmConsoleMixin().is_duckdb(new_duckdb_file()))

def test_console_factory_sqlite(self):
s = new_sqlite_file()
self.assertTrue(TsellmConsoleMixin().is_sqlite(s))
obj = TsellmConsole.create_console(s)
self.assertIsInstance(obj, SQLiteConsole)

# def test_console_factory_duckdb(self):
# s = new_duckdb_file()
# self.assertTrue(TsellmConsole.is_duckdb(s))
# obj = TsellmConsole.create_console(s)
# self.assertIsInstance(obj, DuckDBConsole)
d = new_duckdb_file()
self.assertTrue(TsellmConsole.create_console(d))
obj = TsellmConsole.create_console(d)
self.assertIsInstance(obj, DuckDBConsole)

def test_cli_help(self):
out = self.expect_success("-h")
Expand All @@ -98,11 +101,6 @@ def test_cli_version(self):
def test_choose_db(self):
self.expect_failure("--sqlite", "--duckdb")

def test_deault_sqlite(self):
f = new_tempfile()
self.expect_success(str(f), "select 1")
self.assertTrue(TsellmConsoleMixin().is_sqlite(f))

MEMORY_DB_MSG = "Connected to :memory:"
PS1 = "tsellm> "
PS2 = "... "
Expand All @@ -112,7 +110,7 @@ def run_cli(self, *args, commands=()):
captured_stdin() as stdin,
captured_stdout() as stdout,
captured_stderr() as stderr,
self.assertRaises(SystemExit) as cm
self.assertRaises(SystemExit) as cm,
):
for cmd in commands:
stdin.write(cmd + "\n")
Expand All @@ -121,8 +119,9 @@ def run_cli(self, *args, commands=()):

out = stdout.getvalue()
err = stderr.getvalue()
self.assertEqual(cm.exception.code, 0,
f"Unexpected failure: {args=}\n{out}\n{err}")
self.assertEqual(
cm.exception.code, 0, f"Unexpected failure: {args=}\n{out}\n{err}"
)
return out, err

def test_interact(self):
Expand Down Expand Up @@ -197,13 +196,6 @@ def test_cli_execute_incomplete_sql(self):
stderr = self.expect_failure(*self.path_args, "sel")
self.assertIn("OperationalError (SQLITE_ERROR)", stderr)

def test_cli_on_disk_db(self):
self.addCleanup(unlink, TESTFN)
out = self.expect_success(TESTFN, "create table t(t)")
self.assertEqual(out, "")
out = self.expect_success(TESTFN, "select count(t) from t")
self.assertIn("(0,)", out)

def assertMarkovResult(self, prompt, generated):
# Every word should be one of the original prompt (see https://github.com/simonw/llm-markov/blob/657ca504bcf9f0bfc1c6ee5fe838cde9a8976381/tests/test_llm_markov.py#L20)
for w in prompt.split(" "):
Expand Down Expand Up @@ -256,7 +248,7 @@ class DiskSQLiteTest(InMemorySQLiteTest):

def setUp(self):
super().setUp()
self.db_fp = str(new_tempfile())
self.db_fp = str(new_sqlite_file())
self.path_args = (
"--sqlite",
self.db_fp,
Expand All @@ -265,7 +257,7 @@ def setUp(self):
def test_embed_default_hazo_leaves_valid_db_behind(self):
# This should probably be called for all test cases
super().test_embed_default_hazo()
self.assertTrue(TsellmConsoleMixin().is_sqlite(self.db_fp))
self.assertTrue(DBSniffer(self.db_fp).is_sqlite)


class InMemoryDuckDBTest(InMemorySQLiteTest):
Expand Down Expand Up @@ -299,5 +291,19 @@ def test_embed_hazo_binary(self):
pass


class DiskDuckDBTest(InMemoryDuckDBTest):
db_fp = None
path_args = ()

def setUp(self):
super().setUp()
self.db_fp = str(new_duckdb_file())
self.path_args = (self.db_fp,)

def test_duckdb_is_picked_up(self):
# https://github.com/Florents-Tselai/tsellm/issues/28
super().test_cli_execute_sql()


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 07c1078

Please sign in to comment.