diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..cd88554 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "gomod" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a963bb..f766796 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,7 @@ jobs: goarch: [amd64] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.1 - uses: lmangani/go-release-action@v1.37-ubuntu with: github_token: ${{ secrets.GITHUB_TOKEN }} @@ -30,10 +30,11 @@ jobs: executable_compression: upx compress_assets: OFF build_flags: -buildvcs=false -# ldflags: "-linkmode external -extldflags -static" + #ldflags: "-linkmode external -extldflags -static" + extra_files: LICENSE README.md - name: Log in to the Container registry - uses: docker/login-action@v2.1.0 + uses: docker/login-action@v3.1.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -41,12 +42,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4.3.0 + uses: docker/metadata-action@v5.5.1 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - name: Build and push Docker image - uses: docker/build-push-action@v4.0.0 + uses: docker/build-push-action@v5.3.0 with: context: . push: true diff --git a/.github/workflows/starlight.yml b/.github/workflows/starlight.yml new file mode 100644 index 0000000..fc1abad --- /dev/null +++ b/.github/workflows/starlight.yml @@ -0,0 +1,16 @@ +name: Starring Partner +on: + issues: + types: [opened, reopened] +jobs: + # This workflow checks if a user has starred a repository and takes actions + starcheck: + runs-on: ubuntu-latest + steps: + - name: Please Star First + uses: qxip/please-star-light@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + message: "Thanks for opening an Issue! Please star this repository to motivate developers! :star:" + label: "stargazed" + autoclose: false diff --git a/Dockerfile b/Dockerfile index e275439..8f27ad7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ COPY . . RUN CGO_ENABLED=1 go build -o quackpipe quackpipe.go RUN strip quackpipe -FROM ubuntu:20.04 +FROM debian:12 COPY --from=builder /quackpipe /quackpipe RUN echo "INSTALL httpfs; INSTALL json; INSTALL parquet; INSTALL fts;" | /quackpipe --stdin CMD ["/quackpipe"] diff --git a/README.md b/README.md index 3fd0bb9..2459ee4 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,22 @@ -> _quack, motherducker!_ +> _a pipe for quackheads_ # :baby_chick: quackpipe -_QuackPipe is an OLAP API built on top of DuckDB with a few extra compatibility bits. If you know, you know._ +_QuackPipe is a serverless OLAP API built on top of DuckDB emulating and aliasing the ClickHouse HTTP API_ Play with DuckDB SQL and Cloud storage though a familiar API, without giving up old habits and integrations. -### Demos -:hatched_chick: try a [sample s3/parquet query](https://quackpipe.metrico.in/?user=default#U0VMRUNUCiAgICB0b3duLAogICAgZGlzdHJpY3QsCiAgICBjb3VudCgpIEFTIGMsCkZST00gcmVhZF9wYXJxdWV0KCdodHRwczovL2RhdGFzZXRzLWRvY3VtZW50YXRpb24uczMuZXUtd2VzdC0zLmFtYXpvbmF3cy5jb20vaG91c2VfcGFycXVldC9ob3VzZV8wLnBhcnF1ZXQnKQpXSEVSRSByZWFkX3BhcnF1ZXQudG93biA9PSAnTE9ORE9OJwpHUk9VUCBCWQogICAgdG93biwKICAgIGRpc3RyaWN0Ck9SREVSIEJZIGMgREVTQwpMSU1JVCAxMA==) _(deta.space free tier, AWS Lambdas)_
-:hatched_chick: try our [miniature playground](https://quackpipe.fly.dev) _(fly.io free tier, 1x-shared-vcpu, 256Mb)_ +### :hatched_chick: Demos +:hatched_chick: try a [sample s3/parquet query](https://quackpipe.fly.dev/?user=default#U0VMRUNUCiAgICB0b3duLAogICAgZGlzdHJpY3QsCiAgICBjb3VudCgpIEFTIGMsCkZST00gcmVhZF9wYXJxdWV0KCdodHRwczovL2RhdGFzZXRzLWRvY3VtZW50YXRpb24uczMuZXUtd2VzdC0zLmFtYXpvbmF3cy5jb20vaG91c2VfcGFycXVldC9ob3VzZV8wLnBhcnF1ZXQnKQpXSEVSRSByZWFkX3BhcnF1ZXQudG93biA9PSAnTE9ORE9OJwpHUk9VUCBCWQogICAgdG93biwKICAgIGRpc3RyaWN0Ck9SREVSIEJZIGMgREVTQwpMSU1JVCAxMA==) in our [miniature playground](https://quackpipe.fly.dev) _(fly.io free tier, 1x-shared-vcpu, 256Mb)_
+:hatched_chick: launch your own _free instance_ on fly.io + + + + +

@@ -55,7 +60,7 @@ Run with `-h` for a full list of parameters #### :point_right: Playground Execute queries using the embedded playground -![image](https://user-images.githubusercontent.com/1423657/230783859-1c69910b-6bf2-42df-8b1d-876b94fc3419.png) + #### :point_right: API Execute queries using the POST API @@ -73,8 +78,35 @@ hello,v0.7.1 ``` ### :fist_right: Extensions -Several extensions are pre-installed by default in Docker images, including _parquet, json, httpfs_
-When using HTTP API, _httpfs, parquet, json_ extensions are automatically pre-loaded. +Several extensions are pre-installed by default in [Docker images](https://github.com/metrico/quackpipe/blob/main/Dockerfile#L9), including _parquet, json, httpfs_
+When using HTTP API, _httpfs, parquet, json_ extensions are automatically pre-loaded by the wrapper. + +Users can pre-install extensions and execute quackpipe using a custom parameters: +``` +echo "INSTALL httpfs;" | ./quackpipe --stdin --params "?extension_directory=/tmp/" +./quackpipe --port 8123 --host 0.0.0.0 --params "?extension_directory=/tmp/" +``` + + +### ClickHouse UDF + +Quackpipe can be used as [executable UDF](https://clickhouse.com/docs/en/engines/table-functions/executable) to get DuckDB data IN/OUT of ClickHouse queries: + +```sql +SELECT * +FROM executable('quackpipe -stdin -format TSV', TSV, 'id UInt32, num UInt32', ( + SELECT 'SELECT 1, 2' +)) +Query id: dd878948-bec8-4abe-9e06-2f5813653c3a +┌─id─┬─num─┐ +│ 1 │ 2 │ +└────┴─────┘ +1 rows in set. Elapsed: 0.155 sec. +``` + +🃏 What is this? Think of it as a SELECT within a SELECT with a different syntax.
+🃏 Format confusion? Make DuckDB SQL feel like ClickHouse with the included [ClickHouse Macro Aliases](https://github.com/metrico/quackpipe/blob/main/aliases.sql) +
@@ -85,7 +117,7 @@ When using HTTP API, _httpfs, parquet, json_ extensions are automatically pre-lo - [x] [cgo](https://github.com/marcboeker/go-duckdb) binding - [x] Extension preloading - [ ] Aliases Extension -- [x] REST API [^3] [^4] +- [x] REST API [^3] - [x] CH FORMAT Emulation - [x] CSV, CSVWithNames - [x] TSV, TSVWithNames @@ -93,6 +125,7 @@ When using HTTP API, _httpfs, parquet, json_ extensions are automatically pre-lo - [ ] Native - [x] Web Playground _(from ClickkHouse, Apache2 Licensed)_ [^2] - [x] STDIN Fast Query Execution +- [x] ClickHouse Executable UDF - [x] `:memory:` mode Cloud Storage _(s3/r2/minio, httpfs, etc)_ - [x] `:file:` mode using optional _parameters_ @@ -111,7 +144,6 @@ When using HTTP API, _httpfs, parquet, json_ extensions are automatically pre-lo ###### :black_joker: Disclaimers -[^1]: DuckDB ® is a trademark of MotherDuck. No direct affiliation or endorsement. +[^1]: DuckDB ® is a trademark of DuckDB Foundation. All rights reserved by their respective owners. [^2]: ClickHouse ® is a trademark of ClickHouse Inc. No direct affiliation or endorsement. [^3]: Released under the MIT license. See LICENSE for details. All rights reserved by their respective owners. -[^4]: Elements of this experiments (including potential bugs) were co-authored by ChatGPT. diff --git a/aliases.sql b/aliases.sql new file mode 100644 index 0000000..eb46571 --- /dev/null +++ b/aliases.sql @@ -0,0 +1,36 @@ +CREATE OR REPLACE MACRO toString(expr) AS CAST(expr AS VARCHAR); +CREATE OR REPLACE MACRO toInt8(expr) AS CAST(expr AS INT8); +CREATE OR REPLACE MACRO toInt16(expr) AS CAST(expr AS INT16); +CREATE OR REPLACE MACRO toInt32(expr) AS CAST(expr AS INT32); +CREATE OR REPLACE MACRO toInt64(expr) AS CAST(expr AS INT64); +CREATE OR REPLACE MACRO toInt128(expr) AS CAST(expr AS INT128); +CREATE OR REPLACE MACRO toInt256(expr) AS CAST(expr AS HUGEINT); +CREATE OR REPLACE MACRO toInt8OrZero(expr) AS CASE WHEN TRY_CAST(expr AS INT8) THEN CAST(expr as INT8) ELSE 0 END; +CREATE OR REPLACE MACRO toInt16OrZero(expr) AS CASE WHEN TRY_CAST(expr AS INT16) THEN CAST(expr as INT16) ELSE 0 END; +CREATE OR REPLACE MACRO toInt32OrZero(expr) AS CASE WHEN TRY_CAST(expr AS INT32) THEN CAST(expr as INT32) ELSE 0 END; +CREATE OR REPLACE MACRO toInt64OrZero(expr) AS CASE WHEN TRY_CAST(expr AS INT64) THEN CAST(expr as INT64) ELSE 0 END; +CREATE OR REPLACE MACRO toInt128OrZero(expr) AS CASE WHEN TRY_CAST(expr AS INT128) THEN CAST(expr as INT128) ELSE 0 END; +CREATE OR REPLACE MACRO toInt256OrZero(expr) AS CASE WHEN TRY_CAST(expr AS HUGEINT) THEN CAST(expr as HUGEINT) ELSE 0 END; +CREATE OR REPLACE MACRO toInt8OrNull(expr) AS TRY_CAST(expr AS INT8); +CREATE OR REPLACE MACRO toInt16OrNull(expr) AS TRY_CAST(expr AS INT16); +CREATE OR REPLACE MACRO toInt32OrNull(expr) AS TRY_CAST(expr AS INT32); +CREATE OR REPLACE MACRO toInt64OrNull(expr) AS TRY_CAST(expr AS INT64); +CREATE OR REPLACE MACRO toInt128OrNull(expr) AS TRY_CAST(expr AS INT128); +CREATE OR REPLACE MACRO toInt256OrNull(expr) AS TRY_CAST(expr AS HUGEINT); +CREATE OR REPLACE MACRO toUInt8(expr) AS CAST(expr AS UTINYINT); +CREATE OR REPLACE MACRO toUInt16(expr) AS CAST(expr AS USMALLINT); +CREATE OR REPLACE MACRO toUInt32(expr) AS CAST(expr AS UINTEGER); +CREATE OR REPLACE MACRO toUInt64(expr) AS CAST(expr AS UBIGINT); +CREATE OR REPLACE MACRO toUInt8rZero(expr) AS CASE WHEN TRY_CAST(expr AS UTINYINT) THEN CAST(expr as UTINYINT) ELSE 0 END; +CREATE OR REPLACE MACRO toUInt16rZero(expr) AS CASE WHEN TRY_CAST(expr AS USMALLINT) THEN CAST(expr as USMALLINT) ELSE 0 END; +CREATE OR REPLACE MACRO toUInt32rZero(expr) AS CASE WHEN TRY_CAST(expr AS UINTEGER) THEN CAST(expr as UINTEGER) ELSE 0 END; +CREATE OR REPLACE MACRO toUInt64rZero(expr) AS CASE WHEN TRY_CAST(expr AS UBIGINT) THEN CAST(expr as UBIGINT) ELSE 0 END; +CREATE OR REPLACE MACRO toUInt8rNull(expr) AS TRY_CAST(expr AS UTINYINT); +CREATE OR REPLACE MACRO toUInt16rNull(expr) AS TRY_CAST(expr AS USMALLINT); +CREATE OR REPLACE MACRO toUInt32rNull(expr) AS TRY_CAST(expr AS UINTEGER); +CREATE OR REPLACE MACRO toUInt64rNull(expr) AS TRY_CAST(expr AS UBIGINT); +CREATE OR REPLACE MACRO toFloat(expr) AS CAST(expr AS DOUBLE); +CREATE OR REPLACE MACRO toFloatOrNull(expr) AS TRY_CAST(expr AS DOUBLE); +CREATE OR REPLACE MACRO toFloatOrZero(expr) AS CASE WHEN TRY_CAST(expr AS DOUBLE) THEN CAST(expr as DOUBLE) ELSE 0 END; +CREATE OR REPLACE MACRO intDiv(a, b) AS (a / b); +CREATE OR REPLACE MACRO match(string,token) AS string LIKE token; diff --git a/go.mod b/go.mod index 9f51c51..c378e7a 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,20 @@ module quackpipe -go 1.18 +go 1.20 -require github.com/marcboeker/go-duckdb v1.2.2 +require github.com/marcboeker/go-duckdb v1.6.4 -require github.com/mitchellh/mapstructure v1.5.0 // indirect +require ( + github.com/apache/arrow/go/v14 v14.0.2 // indirect + github.com/goccy/go-json v0.10.2 // indirect + github.com/google/flatbuffers v23.5.26+incompatible // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/pierrec/lz4/v4 v4.1.18 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + golang.org/x/mod v0.13.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/tools v0.14.0 // indirect + golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect +) diff --git a/go.sum b/go.sum index e57157d..41cfe77 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,36 @@ +github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= +github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/marcboeker/go-duckdb v1.2.2 h1:Qy5yW83qAcZgsEmGo+pkEZeZvxA2dzuQNPLh7wcorb0= -github.com/marcboeker/go-duckdb v1.2.2/go.mod h1:wm91jO2GNKa6iO9NTcjXIRsW+/ykPoJbQcHSXhdAl28= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= +github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/marcboeker/go-duckdb v1.6.4 h1:p7iFopIcIWoHZStQhvQ+ffhKL3ExM/oXdXAUI62gBWE= +github.com/marcboeker/go-duckdb v1.6.4/go.mod h1:WtWeqqhZoTke/Nbd7V9lnBx7I2/A/q0SAq/urGzPCMs= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= +github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= +golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= +golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= +golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= +golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/play.html b/play.html index 6a9e670..0c060c0 100644 --- a/play.html +++ b/play.html @@ -458,6 +458,16 @@  (Ctrl/Cmd+Enter) + 🌑🌞 @@ -480,6 +490,14 @@ /// This is to avoid race conditions. let request_num = 0; + // query presets + var querybox = document.getElementById('query'); + var querypresets = document.getElementById('dropdown'); + querypresets.onchange = function() { + var newquery = querypresets.options[querypresets.selectedIndex].value; + querybox.value = newquery; + } + /// Save query in history only if it is different. let previous_query = ''; diff --git a/quackpipe.go b/quackpipe.go index 99fd09d..b7268a7 100644 --- a/quackpipe.go +++ b/quackpipe.go @@ -22,6 +22,9 @@ import ( //go:embed play.html var staticPlay string +//go:embed aliases.sql +var staticAliases string + // params for Flags type CommandLineFlags struct { Host *string `json:"host"` @@ -54,7 +57,11 @@ func quack(query string, stdin bool, format string, params string) (string, erro if !stdin { check(db.Exec("LOAD httpfs; LOAD json; LOAD parquet;")) } - + + if staticAliases != "" { + check(db.Exec(staticAliases)) + } + startTime := time.Now() rows, err := db.Query(query) if err != nil { @@ -297,7 +304,8 @@ func main() { // handle query parameter if r.URL.Query().Get("query") != "" { - query = r.Form.Get("query") + // query = r.FormValue("query") + query = r.URL.Query().Get("query") } else if r.Body != nil { bodyBytes, err = ioutil.ReadAll(r.Body) if err != nil { @@ -354,7 +362,7 @@ func main() { } }) - fmt.Printf("API Running: %s:%s\n", *appFlags.Host, *appFlags.Port) + fmt.Printf("QuackPipe API Running: %s:%s\n", *appFlags.Host, *appFlags.Port) if err := http.ListenAndServe(*appFlags.Host+":"+*appFlags.Port, nil); err != nil { panic(err) }