Fix #28 prompts.duckdb is not picked up unless I pass --duckdb (#31)

Florents-Tselai · Jul 8, 2024 · 07c1078 · 07c1078
1 parent d5b2dc3
commit 07c1078
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 161 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: [Florents-Tselai]
diff --git a/README.md b/README.md
@@ -8,29 +8,20 @@
 [![codecov](https://codecov.io/gh/Florents-Tselai/tsellm/branch/main/graph/badge.svg)](https://codecov.io/gh/Florents-Tselai/tsellm)
 [![License](https://img.shields.io/badge/BSD%20license-blue.svg)](https://github.com/Florents-Tselai/tsellm/blob/main/LICENSE)
 
-**tsellm** is the easiest way to access LLMs through your SQLite or DuckDB database.
+**tsellm** is the easiest way to access LLMs from SQLite or DuckDB.
 
 ```shell
 pip install tsellm
 ```
 
-```shell
-usage: tsellm [-h] [--sqlite | --duckdb] [-v] [filename] [sql]
-
-Use LLMs in SQLite and DuckDB
-
-positional arguments:
-  filename       SQLite/DuckDB database to open (defaults to SQLite ':memory:').
-                 A new database is created if the file does not previously exist.
-  sql            An SQL query to execute. Any returned rows are printed to
-                 stdout.
-
-options:
-  -h, --help     show this help message and exit
-  --sqlite       SQLite mode
-  --duckdb       DuckDB mode
-  -v, --version  Print tsellm version
-
+```bash
+cat <<EOF | tee >(sqlite3 prompts.sqlite3) | duckdb prompts.duckdb
+CREATE TABLE prompts ( p TEXT);
+INSERT INTO prompts VALUES('hello world!');
+INSERT INTO prompts VALUES('how are you?');
+INSERT INTO prompts VALUES('is this real life?');
+INSERT INTO prompts VALUES('1+1=?');
+EOF
 ```
 
 Behind the scenes, **tsellm** is based on the beautiful [llm](https://llm.datasette.io) library,
@@ -42,95 +33,56 @@ For example, to access `gpt4all` models
 
 ```shell
 llm install llm-gpt4all
-# Then pick any gpt4all (it will be downloaded automatically the first time you use any model
-tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-mini-3b-gguf2-q4_0')"
-tsellm :memory: "select prompt('What is the capital of Greece?', 'orca-2-7b')"
+```
+
+```sql
+tsellm prompts.duckdb "select prompt(p, 'orca-mini-3b-gguf2-q4_0') from prompts"
+tsellm prompts.sqlite3 "select prompt(p, 'orca-2-7b') from prompts"
 ```
 
 ## Embeddings
 
 ```shell
 llm install llm-sentence-transformers
 llm sentence-transformers register all-MiniLM-L12-v2
-tsellm :memory: "select embed('Hello', 'sentence-transformers/all-MiniLM-L12-v2')"
+```
+
+```sql
+tsellm prompts.sqlite3 "select embed(p, 'sentence-transformers/all-MiniLM-L12-v2')"
 ```
 
 ### Embeddings for binary (`BLOB`) columns
 
 ```shell
 wget https://tselai.com/img/flo.jpg
-sqlite3 images.db <<EOF
+sqlite3 images.sqlite3 <<EOF
 CREATE TABLE images(name TEXT, type TEXT, img BLOB);
 INSERT INTO images(name,type,img) VALUES('flo','jpg',readfile('flo.jpg'));
 EOF
 ```
 
 ```shell
 llm install llm-clip
-tsellm images.db "select embed(img, 'clip') from images"
-```
-
-## Examples
-
-Things get more interesting if you
-combine models in your standard queries.
-
-First, create a db with some data.
-You can easily toggle between SQLite and DuckDB,
-and **tsellm** will pick this up automatically.
-
-### SQLite
-```bash
-sqlite3 prompts.db <<EOF
-CREATE TABLE prompts (
-   p TEXT
-);
-INSERT INTO prompts VALUES('hello world!');
-INSERT INTO prompts VALUES('how are you?');
-INSERT INTO prompts VALUES('is this real life?');
-INSERT INTO prompts VALUES('1+1=?');
-EOF
 ```
 
-With a single query you can access get prompt 
-responses from different LLMs:
-
 ```sql
-tsellm prompts.db "
-        select p,
-        prompt(p, 'orca-2-7b'),
-        prompt(p, 'orca-mini-3b-gguf2-q4_0'),
-        embed(p, 'sentence-transformers/all-MiniLM-L12-v2') 
-        from prompts"
+tsellm images.sqlite3 "select embed(img, 'clip') from images"
 ```
 
-### DuckDB
+### Multiple Prompts
 
-```bash
-duckdb prompts.duckdb <<EOF
-CREATE TABLE prompts (
-   p TEXT
-);
-INSERT INTO prompts VALUES('hello world!');
-INSERT INTO prompts VALUES('how are you?');
-INSERT INTO prompts VALUES('is this real life?');
-INSERT INTO prompts VALUES('1+1=?');
-EOF
-```
-
-With a single query you can access get prompt 
+With a single query you can easily access get prompt 
 responses from different LLMs:
 
 ```sql
-tsellm prompts.duckdb "
+tsellm prompts.sqlite3 "
         select p,
         prompt(p, 'orca-2-7b'),
         prompt(p, 'orca-mini-3b-gguf2-q4_0'),
         embed(p, 'sentence-transformers/all-MiniLM-L12-v2') 
         from prompts"
 ```
 
-
 ## Interactive Shell
 
 If you don't provide an SQL query,

diff --git a/tests/test_tsellm.py b/tests/test_tsellm.py
@@ -10,12 +10,7 @@
 from llm import cli as llm_cli
 
 from tsellm.__version__ import __version__
-from tsellm.cli import (
-    cli,
-    TsellmConsole,
-    SQLiteConsole,
-    TsellmConsoleMixin,
-)
+from tsellm.cli import cli, TsellmConsole, SQLiteConsole, DuckDBConsole, DBSniffer
 
 
 def new_tempfile():
@@ -25,17 +20,33 @@ def new_tempfile():
 def new_sqlite_file():
     f = new_tempfile()
     with sqlite3.connect(f) as db:
-        db.execute("SELECT 1")
+        db.execute("CREATE TABLE my(x text)")
     return f
 
 
 def new_duckdb_file():
     f = new_tempfile()
     con = duckdb.connect(f.__str__())
-    con.sql("SELECT 1")
+    con.sql("CREATE TABLE my(x text)")
     return f
 
 
+class TestDBSniffer(unittest.TestCase):
+    def setUp(self):
+        self.sqlite_fp = new_sqlite_file()
+        self.duckdb_fp = new_duckdb_file()
+
+    def test_sniff_sqlite(self):
+        sqlite_sni = DBSniffer(self.sqlite_fp)
+        self.assertTrue(sqlite_sni.is_sqlite)
+        self.assertFalse(sqlite_sni.is_duckdb)
+
+    def test_snif_duckdb(self):
+        duckdb_sni = DBSniffer(self.duckdb_fp)
+        self.assertFalse(duckdb_sni.is_sqlite)
+        self.assertTrue(duckdb_sni.is_duckdb)
+
+
 class TsellmConsoleTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
@@ -69,23 +80,15 @@ def expect_failure(self, *args):
         self.assertEqual(out, "")
         return err
 
-    def test_sniff_sqlite(self):
-        self.assertTrue(TsellmConsoleMixin().is_sqlite(new_sqlite_file()))
-
-    def test_sniff_duckdb(self):
-        self.assertTrue(TsellmConsoleMixin().is_duckdb(new_duckdb_file()))
-
     def test_console_factory_sqlite(self):
         s = new_sqlite_file()
-        self.assertTrue(TsellmConsoleMixin().is_sqlite(s))
         obj = TsellmConsole.create_console(s)
         self.assertIsInstance(obj, SQLiteConsole)
 
-    # def test_console_factory_duckdb(self):
-    #     s = new_duckdb_file()
-    #     self.assertTrue(TsellmConsole.is_duckdb(s))
-    #     obj = TsellmConsole.create_console(s)
-    #     self.assertIsInstance(obj, DuckDBConsole)
+        d = new_duckdb_file()
+        self.assertTrue(TsellmConsole.create_console(d))
+        obj = TsellmConsole.create_console(d)
+        self.assertIsInstance(obj, DuckDBConsole)
 
     def test_cli_help(self):
         out = self.expect_success("-h")
@@ -98,11 +101,6 @@ def test_cli_version(self):
     def test_choose_db(self):
         self.expect_failure("--sqlite", "--duckdb")
 
-    def test_deault_sqlite(self):
-        f = new_tempfile()
-        self.expect_success(str(f), "select 1")
-        self.assertTrue(TsellmConsoleMixin().is_sqlite(f))
-
     MEMORY_DB_MSG = "Connected to :memory:"
     PS1 = "tsellm> "
     PS2 = "... "
@@ -112,7 +110,7 @@ def run_cli(self, *args, commands=()):
             captured_stdin() as stdin,
             captured_stdout() as stdout,
             captured_stderr() as stderr,
-            self.assertRaises(SystemExit) as cm
+            self.assertRaises(SystemExit) as cm,
         ):
             for cmd in commands:
                 stdin.write(cmd + "\n")
@@ -121,8 +119,9 @@ def run_cli(self, *args, commands=()):
 
         out = stdout.getvalue()
         err = stderr.getvalue()
-        self.assertEqual(cm.exception.code, 0,
-                         f"Unexpected failure: {args=}\n{out}\n{err}")
+        self.assertEqual(
+            cm.exception.code, 0, f"Unexpected failure: {args=}\n{out}\n{err}"
+        )
         return out, err
 
     def test_interact(self):
@@ -197,13 +196,6 @@ def test_cli_execute_incomplete_sql(self):
         stderr = self.expect_failure(*self.path_args, "sel")
         self.assertIn("OperationalError (SQLITE_ERROR)", stderr)
 
-    def test_cli_on_disk_db(self):
-        self.addCleanup(unlink, TESTFN)
-        out = self.expect_success(TESTFN, "create table t(t)")
-        self.assertEqual(out, "")
-        out = self.expect_success(TESTFN, "select count(t) from t")
-        self.assertIn("(0,)", out)
-
     def assertMarkovResult(self, prompt, generated):
         # Every word should be one of the original prompt (see https://github.com/simonw/llm-markov/blob/657ca504bcf9f0bfc1c6ee5fe838cde9a8976381/tests/test_llm_markov.py#L20)
         for w in prompt.split(" "):
@@ -256,7 +248,7 @@ class DiskSQLiteTest(InMemorySQLiteTest):
 
     def setUp(self):
         super().setUp()
-        self.db_fp = str(new_tempfile())
+        self.db_fp = str(new_sqlite_file())
         self.path_args = (
             "--sqlite",
             self.db_fp,
@@ -265,7 +257,7 @@ def setUp(self):
     def test_embed_default_hazo_leaves_valid_db_behind(self):
         # This should probably be called for all test cases
         super().test_embed_default_hazo()
-        self.assertTrue(TsellmConsoleMixin().is_sqlite(self.db_fp))
+        self.assertTrue(DBSniffer(self.db_fp).is_sqlite)
 
 
 class InMemoryDuckDBTest(InMemorySQLiteTest):
@@ -299,5 +291,19 @@ def test_embed_hazo_binary(self):
         pass
 
 
+class DiskDuckDBTest(InMemoryDuckDBTest):
+    db_fp = None
+    path_args = ()
+
+    def setUp(self):
+        super().setUp()
+        self.db_fp = str(new_duckdb_file())
+        self.path_args = (self.db_fp,)
+
+    def test_duckdb_is_picked_up(self):
+        # https://github.com/Florents-Tselai/tsellm/issues/28
+        super().test_cli_execute_sql()
+
+
 if __name__ == "__main__":
     unittest.main()