From 50cd057453e587f877123412bf2d046d4ad243a6 Mon Sep 17 00:00:00 2001 From: Scott Lepper Date: Thu, 17 Oct 2024 12:25:22 -0400 Subject: [PATCH] add option to run in docker (#20) add option to run in docker --- .github/workflows/go.yml | 3 ++ duck/duckdb.go | 88 +++++++++++++++++++++++++++------------- duck/duckdb_test.go | 41 +++++++++++++++++++ 3 files changed, 103 insertions(+), 29 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 73f5c72..0c95a3f 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -27,6 +27,9 @@ jobs: unzip /tmp/duckdb.zip -d /usr/local/bin/ duckdb --version + - name: add duckdb image + run: docker pull datacatering/duckdb:v1.0.0 + - name: Build run: go build -v ./... diff --git a/duck/duckdb.go b/duck/duckdb.go index 03b9448..756556e 100644 --- a/duck/duckdb.go +++ b/duck/duckdb.go @@ -28,6 +28,8 @@ type DuckDB struct { chunk int cacheDuration int cache cache + docker bool + image string } type Opts struct { @@ -36,9 +38,14 @@ type Opts struct { Chunk int Exe string CacheDuration int + Docker bool + Image string } const newline = "\n" +const duckdbImage = "datacatering/duckdb:v1.0.0" + +var tempDir = getTempDir() // NewInMemoryDB creates a new in-memory DuckDB func NewInMemoryDB(opts ...Opts) *DuckDB { @@ -68,10 +75,15 @@ func NewDuckDB(name string, opts ...Opts) *DuckDB { if opt.CacheDuration > 0 { db.cacheDuration = opt.CacheDuration } + db.image = duckdbImage + if opt.Image != "" { + db.image = opt.Image + } + db.docker = opt.Docker } // Find the executable if it is not configured - if db.exe == "" { + if db.exe == "" && !db.docker { db.exe = which.Which("duckdb") if db.exe == "" { db.exe = "/usr/local/bin/duckdb" @@ -83,33 +95,7 @@ func NewDuckDB(name string, opts ...Opts) *DuckDB { // RunCommands runs a series of of sql commands against duckdb func (d *DuckDB) RunCommands(commands []string) (string, error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - var b bytes.Buffer - b.Write([]byte(fmt.Sprintf(".mode %s %s", d.mode, newline))) - for _, c := range commands { - cmd := fmt.Sprintf("%s %s", c, newline) - b.Write([]byte(cmd)) - } - - cmd := exec.Command(d.exe, d.Name) - cmd.Stdin = &b - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - if err != nil { - message := err.Error() + stderr.String() - logger.Error("error running command", "cmd", b.String(), "message", message, "error", err) - return "", errors.New(message) - } - if stderr.String() != "" { - logger.Error("error running command", "cmd", b.String(), "error", stderr.String()) - return "", errors.New(stderr.String()) - } - - return stdout.String(), nil + return d.runCommands(commands) } // Query runs a query against the database. For Databases that are NOT in-memory. @@ -242,6 +228,42 @@ func resultsToFrame(name string, res string, f *sdk.Frame, frames []*sdk.Frame) return nil } +func (d *DuckDB) runCommands(commands []string) (string, error) { + var stdout bytes.Buffer + var stderr bytes.Buffer + + var b bytes.Buffer + b.Write([]byte(fmt.Sprintf(".mode %s %s", d.mode, newline))) + for _, c := range commands { + cmd := fmt.Sprintf("%s %s", c, newline) + b.Write([]byte(cmd)) + } + + var cmd *exec.Cmd + if d.docker { + volume := fmt.Sprintf("%s:%s", tempDir, tempDir) + logger.Debug("running command in docker", "volume", volume, "image", duckdbImage) + cmd = exec.Command("docker", "run", "-i", "-v", volume, duckdbImage) + } else { + cmd = exec.Command(d.exe, d.Name) + } + cmd.Stdin = &b + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + if err != nil { + message := err.Error() + stderr.String() + logger.Error("error running command", "cmd", b.String(), "message", message, "error", err) + return "", errors.New(message) + } + if stderr.String() != "" { + logger.Error("error running command", "cmd", b.String(), "error", stderr.String()) + return "", errors.New(stderr.String()) + } + return stdout.String(), nil +} + // TODO // func applyLabels(resultsFrame sdk.Frame, sourceFrames []*sdk.Frame) { @@ -263,4 +285,12 @@ func resultsToFrame(name string, res string, f *sdk.Frame, frames []*sdk.Frame) // } // } // return nil -// +// } + +func getTempDir() string { + temp := os.Getenv("TMPDIR") + if temp == "" { + temp = "/tmp" + } + return temp +} diff --git a/duck/duckdb_test.go b/duck/duckdb_test.go index 0c9d153..8d78f28 100644 --- a/duck/duckdb_test.go +++ b/duck/duckdb_test.go @@ -26,6 +26,22 @@ func TestCommands(t *testing.T) { assert.Contains(t, res, `[{"i":1,"j":5}]`) } +func TestCommandsDocker(t *testing.T) { + db := NewInMemoryDB(Opts{Docker: true}) + + commands := []string{ + "CREATE TABLE t1 (i INTEGER, j INTEGER);", + "INSERT INTO t1 VALUES (1, 5);", + "SELECT * from t1;", + } + res, err := db.RunCommands(commands) + if err != nil { + t.Fail() + return + } + assert.Contains(t, res, `[{"i":1,"j":5}]`) +} + func TestQuery(t *testing.T) { db := NewDuckDB("foo") @@ -149,6 +165,31 @@ func TestQueryFrameIntoFrame(t *testing.T) { fmt.Printf("GOT: %s", txt) } +func TestQueryFrameIntoFrameDocker(t *testing.T) { + db := NewInMemoryDB(Opts{Docker: true}) + + var values = []string{"2024-02-23 09:01:54"} + frame := data.NewFrame("foo", data.NewField("value", nil, values)) + frame.RefID = "foo" + + var values2 = []string{"2024-02-23 09:02:54"} + frame2 := data.NewFrame("foo", data.NewField("value", nil, values2)) + frame2.RefID = "foo" + + frames := []*data.Frame{frame, frame2} + + model := &data.Frame{} + err := db.QueryFramesInto("foo", "select * from foo order by value desc", frames, model) + assert.Nil(t, err) + + assert.Equal(t, 2, model.Rows()) + + txt, err := model.StringTable(-1, -1) + assert.Nil(t, err) + + fmt.Printf("GOT: %s", txt) +} + func TestQueryFrameIntoFrameMultipleColumns(t *testing.T) { db := NewInMemoryDB()