diff --git a/Cargo.lock b/Cargo.lock index a37e5c34..85835bf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,12 +126,6 @@ version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - [[package]] name = "arrayvec" version = "0.7.4" @@ -159,23 +153,23 @@ dependencies = [ [[package]] name = "arrow" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" dependencies = [ - "arrow-arith 51.0.0", - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-cast 51.0.0", + "arrow-arith 50.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", "arrow-csv", - "arrow-data 51.0.0", + "arrow-data 50.0.0", "arrow-ipc", "arrow-json", - "arrow-ord 51.0.0", - "arrow-row 51.0.0", - "arrow-schema 51.0.0", - "arrow-select 51.0.0", - "arrow-string 51.0.0", + "arrow-ord 50.0.0", + "arrow-row 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", + "arrow-string 50.0.0", ] [[package]] @@ -195,14 +189,14 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "chrono", "half", "num", @@ -224,6 +218,22 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +dependencies = [ + "ahash 0.8.8", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "chrono", + "half", + "hashbrown 0.14.3", + "num", +] + [[package]] name = "arrow-array" version = "51.0.0" @@ -251,6 +261,17 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-buffer" version = "51.0.0" @@ -283,36 +304,33 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", - "arrow-select 51.0.0", - "atoi", - "base64 0.22.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", + "base64 0.21.7", "chrono", - "comfy-table", "half", "lexical-core", "num", - "ryu", ] [[package]] name = "arrow-csv" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-cast 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "chrono", "csv", "csv-core", @@ -333,6 +351,18 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-data" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +dependencies = [ + "arrow-buffer 50.0.0", + "arrow-schema 50.0.0", + "half", + "num", +] + [[package]] name = "arrow-data" version = "51.0.0" @@ -347,29 +377,29 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-cast 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "flatbuffers", ] [[package]] name = "arrow-json" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-cast 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "chrono", "half", "indexmap", @@ -396,15 +426,15 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", - "arrow-select 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "half", "num", ] @@ -426,15 +456,15 @@ dependencies = [ [[package]] name = "arrow-row" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash 0.8.8", - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "half", "hashbrown 0.14.3", ] @@ -448,6 +478,12 @@ dependencies = [ "bitflags 2.4.2", ] +[[package]] +name = "arrow-schema" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" + [[package]] name = "arrow-schema" version = "51.0.0" @@ -470,15 +506,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash 0.8.8", - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "num", ] @@ -500,16 +536,15 @@ dependencies = [ [[package]] name = "arrow-string" -version = "51.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" dependencies = [ - "arrow-array 51.0.0", - "arrow-buffer 51.0.0", - "arrow-data 51.0.0", - "arrow-schema 51.0.0", - "arrow-select 51.0.0", - "memchr", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "num", "regex", "regex-syntax", @@ -585,12 +620,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "atoi_simd" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" - [[package]] name = "atomic-write-file" version = "0.1.2" @@ -766,26 +795,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "bytemuck" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" -dependencies = [ - "bytemuck_derive", -] - -[[package]] -name = "bytemuck_derive" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.50", -] - [[package]] name = "byteorder" version = "1.5.0" @@ -986,25 +995,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-queue" version = "0.3.11" @@ -1215,19 +1205,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime", - "is-terminal", - "log", - "regex", - "termcolor", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1255,12 +1232,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "ethnum" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" - [[package]] name = "event-listener" version = "2.5.3" @@ -1279,12 +1250,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fast-float" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" - [[package]] name = "fastrand" version = "2.0.1" @@ -1335,15 +1300,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "float-cmp" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" -dependencies = [ - "num-traits", -] - [[package]] name = "flume" version = "0.11.0" @@ -1361,12 +1317,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1612,16 +1562,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "halfbrown" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8588661a8607108a5ca69cab034063441a0413a0b041c13618a7dd348021ef6f" -dependencies = [ - "hashbrown 0.14.3", - "serde", -] - [[package]] name = "hashbrown" version = "0.12.3" @@ -1639,7 +1579,6 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash 0.8.8", "allocator-api2", - "rayon", ] [[package]] @@ -1865,17 +1804,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" -[[package]] -name = "is-terminal" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.52.0", -] - [[package]] name = "itertools" version = "0.11.0" @@ -2081,26 +2009,6 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "md-5" version = "0.10.6" @@ -2117,15 +2025,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memmap2" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" -dependencies = [ - "libc", -] - [[package]] name = "mime" version = "0.3.17" @@ -2174,28 +2073,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "multiversion" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" -dependencies = [ - "multiversion-macros", - "target-features", -] - -[[package]] -name = "multiversion-macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "target-features", -] - [[package]] name = "nix" version = "0.27.1" @@ -2571,195 +2448,6 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" -[[package]] -name = "planus" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" -dependencies = [ - "array-init-cursor", -] - -[[package]] -name = "polars-arrow" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d43b8f7aa78c9c158b9ca61246c425b4145b44ba96c948b8aea539b308c97e6" -dependencies = [ - "ahash 0.8.8", - "atoi_simd", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", - "fast-float", - "foreign_vec", - "getrandom", - "hashbrown 0.14.3", - "itoa", - "lz4", - "multiversion", - "num-traits", - "polars-arrow-format", - "polars-error", - "polars-utils", - "ryu", - "simdutf8", - "streaming-iterator", - "strength_reduce", - "version_check", - "zstd", -] - -[[package]] -name = "polars-arrow-format" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b0ef2474af9396b19025b189d96e992311e6a47f90c53cd998b36c4c64b84c" -dependencies = [ - "planus", - "serde", -] - -[[package]] -name = "polars-compute" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f6ac10fd22183c1ff65e4e423499dac1136416af16d608f26519ef29252a1fe" -dependencies = [ - "bytemuck", - "either", - "num-traits", - "polars-arrow", - "polars-error", - "polars-utils", - "strength_reduce", - "version_check", -] - -[[package]] -name = "polars-core" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd8653734085eaa028976b97d57ff9bbf78e582438b31f1916f828323e27211" -dependencies = [ - "ahash 0.8.8", - "bitflags 2.4.2", - "bytemuck", - "either", - "hashbrown 0.14.3", - "indexmap", - "num-traits", - "once_cell", - "polars-arrow", - "polars-compute", - "polars-error", - "polars-row", - "polars-utils", - "rayon", - "smartstring", - "thiserror", - "version_check", - "xxhash-rust", -] - -[[package]] -name = "polars-error" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402eb509a2feca529eca47c2b6fbb1c50133149ee7b872be4111e8998f872060" -dependencies = [ - "polars-arrow-format", - "simdutf8", - "thiserror", -] - -[[package]] -name = "polars-io" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4d3a2256f03a14aaf7abcb1c0c3c4bebec35d7a8e4f6582b37aa7608fa3d46" -dependencies = [ - "ahash 0.8.8", - "atoi_simd", - "bytes", - "fast-float", - "flate2", - "home", - "itoa", - "memchr", - "memmap2", - "num-traits", - "once_cell", - "percent-encoding", - "polars-arrow", - "polars-core", - "polars-error", - "polars-json", - "polars-utils", - "rayon", - "regex", - "ryu", - "serde_json", - "simd-json", - "simdutf8", - "smartstring", - "zstd", -] - -[[package]] -name = "polars-json" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3f0aba0b6b2911bd98b130d53176c43ed27f0f62154b1fa770fbba9adce7e9" -dependencies = [ - "ahash 0.8.8", - "chrono", - "fallible-streaming-iterator", - "hashbrown 0.14.3", - "indexmap", - "itoa", - "num-traits", - "polars-arrow", - "polars-error", - "polars-utils", - "ryu", - "simd-json", - "streaming-iterator", -] - -[[package]] -name = "polars-row" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65278c78db5e2b12acab42b1362a8541bd73c716bb71799fb525a88c4839254" -dependencies = [ - "bytemuck", - "polars-arrow", - "polars-error", - "polars-utils", -] - -[[package]] -name = "polars-utils" -version = "0.38.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38411c9dce645036c796f473389a0368b2affa4d8d270d663146a02fbcbf9952" -dependencies = [ - "ahash 0.8.8", - "bytemuck", - "hashbrown 0.14.3", - "indexmap", - "num-traits", - "once_cell", - "polars-error", - "raw-cpuid", - "rayon", - "smartstring", - "version_check", -] - [[package]] name = "portable-atomic" version = "1.6.0" @@ -2805,16 +2493,6 @@ dependencies = [ "termtree", ] -[[package]] -name = "pretty_env_logger" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" -dependencies = [ - "env_logger", - "log", -] - [[package]] name = "prettyplease" version = "0.2.16" @@ -2963,7 +2641,6 @@ dependencies = [ "quary_proto", "regex", "serde_yaml", - "snowflake-api", "sqlx", "tempfile", "testcontainers", @@ -3003,6 +2680,7 @@ dependencies = [ name = "quary-databases" version = "0.0.113" dependencies = [ + "arrow-array 50.0.0", "arrow-array 51.0.0", "assert_cmd", "async-trait", @@ -3088,35 +2766,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "raw-cpuid" -version = "11.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" -dependencies = [ - "bitflags 2.4.2", -] - -[[package]] -name = "rayon" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "redox_syscall" version = "0.2.16" @@ -3135,26 +2784,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "ref-cast" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.50", -] - [[package]] name = "regex" version = "1.10.4" @@ -3719,24 +3348,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "simd-json" -version = "0.13.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2faf8f101b9bc484337a6a6b0409cf76c139f2fb70a9e3aee6b6774be7bfbf76" -dependencies = [ - "ahash 0.8.8", - "getrandom", - "halfbrown", - "lexical-core", - "once_cell", - "ref-cast", - "serde", - "serde_json", - "simdutf8", - "value-trait", -] - [[package]] name = "simdutf8" version = "0.1.4" @@ -3770,17 +3381,6 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - [[package]] name = "snafu" version = "0.7.5" @@ -3806,19 +3406,16 @@ dependencies = [ [[package]] name = "snowflake-api" version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e20db2ea77690628e34db7a6f63f539557195afbbc3cd349a8cbe293e1fffc" dependencies = [ - "anyhow", - "arrow 51.0.0", + "arrow 50.0.0", "async-trait", - "base64 0.22.0", + "base64 0.21.7", "bytes", - "clap", "futures", "log", "object_store", - "polars-core", - "polars-io", - "pretty_env_logger", "regex", "reqwest", "reqwest-middleware", @@ -3827,7 +3424,6 @@ dependencies = [ "serde_json", "snowflake-jwt", "thiserror", - "tokio", "url", "uuid", ] @@ -4123,18 +3719,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" - -[[package]] -name = "strength_reduce" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" - [[package]] name = "stringprep" version = "0.1.4" @@ -4264,12 +3848,6 @@ dependencies = [ "xattr", ] -[[package]] -name = "target-features" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" - [[package]] name = "task-local-extensions" version = "0.1.4" @@ -4291,15 +3869,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - [[package]] name = "termtree" version = "0.4.1" @@ -4639,18 +4208,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "value-trait" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad8db98c1e677797df21ba03fca7d3bf9bec3ca38db930954e4fe6e1ea27eb4" -dependencies = [ - "float-cmp", - "halfbrown", - "itoa", - "ryu", -] - [[package]] name = "vcpkg" version = "0.2.15" @@ -5036,12 +4593,6 @@ dependencies = [ "rustix", ] -[[package]] -name = "xxhash-rust" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" - [[package]] name = "yup-oauth2" version = "8.3.2" @@ -5094,31 +4645,3 @@ name = "zeroize" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" - -[[package]] -name = "zstd" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/rust/cli/Cargo.toml b/rust/cli/Cargo.toml index 7d5ded1c..2b63ec05 100644 --- a/rust/cli/Cargo.toml +++ b/rust/cli/Cargo.toml @@ -20,7 +20,6 @@ indicatif = "0.17" gcp-bigquery-client = "0.18" prost = "0.12" google-cloud-auth = { version = "0.14", default-features = false, features = ["external-account", "rustls-tls"] } -snowflake-api = { path = "../snowflake-api" } arrow-array = "51" duckdb = { version = "0.10", features = ["bundled"] } regex = "1.10.4" diff --git a/rust/quary-databases/Cargo.toml b/rust/quary-databases/Cargo.toml index 4698ff58..3c55c6e9 100644 --- a/rust/quary-databases/Cargo.toml +++ b/rust/quary-databases/Cargo.toml @@ -16,12 +16,16 @@ async-trait = "0.1" gcp-bigquery-client = "0.18" prost = "0.12" google-cloud-auth = { version = "0.14", default-features = false, features = ["external-account", "rustls-tls"] } -snowflake-api = { path = "../snowflake-api" } -arrow-array = "51" -duckdb = { version = "0.10", features = ["bundled"] } regex = "1.10.4" chrono = "0.4" +duckdb = { version = "0.10", features = ["bundled"] } +arrow_array_51 = { package = "arrow-array", version = "51" } + +snowflake-api = "0.7.0" +arrow_array_50 = { package = "arrow-array", version = "50" } + + [dev-dependencies] assert_cmd = "2" tempfile = "3" diff --git a/rust/quary-databases/src/databases_snowflake.rs b/rust/quary-databases/src/databases_snowflake.rs index 9224a9c2..5c57c46a 100644 --- a/rust/quary-databases/src/databases_snowflake.rs +++ b/rust/quary-databases/src/databases_snowflake.rs @@ -1,4 +1,4 @@ -use arrow_array::array; +use arrow_array_50::{array, ArrayRef}; use async_trait::async_trait; use quary_core::database_snowflake::{ validate_snowfalke_account_identifier, DatabaseQueryGeneratorSnowflake, diff --git a/rust/snowflake-api/Cargo.toml b/rust/snowflake-api/Cargo.toml deleted file mode 100644 index 2755314d..00000000 --- a/rust/snowflake-api/Cargo.toml +++ /dev/null @@ -1,52 +0,0 @@ -[package] -authors = ["Andrew Korzhuev "] -categories = ["api-bindings", "database"] -description = "Snowflake API bindings" -documentation = "http://docs.rs/sqlite-api/" -edition = "2021" -keywords = ["api", "database", "snowflake"] -license = "Apache-2.0" -name = "snowflake-api" -readme = "README.md" -repository = "https://github.com/mycelial/snowflake-rs" -version = "0.7.0" - -[features] -default = ["cert-auth"] -all = ["cert-auth", "polars"] -cert-auth = ["dep:snowflake-jwt"] -# support for conversion of arrow and json payloads to dataframes -polars = ["dep:polars-core", "dep:polars-io"] - -[dependencies] -arrow = "51" -async-trait = "0.1" -base64 = "0.22" -bytes = "1" -futures = "0.3" -log = "0.4" -object_store = { version = "0.9", features = ["aws"] } -regex = "1" -reqwest = { version = "0.11", default-features = false, features = [ - "gzip", - "json", - "rustls-tls", -] } -reqwest-middleware = "0.2" -reqwest-retry = "0.3" -serde = { version = "1", features = ["derive"] } -serde_json = "1" -snowflake-jwt = { version = "0.3.0", optional = true } -thiserror = "1" -url = "2" -uuid = { version = "1", features = ["v4"] } -polars-io = { version = ">=0.32", features = ["json", "ipc_streaming"], optional = true} -polars-core = { version = ">=0.32", optional = true} - - -[dev-dependencies] -anyhow = "1" -arrow = { version = "51", features = ["prettyprint"] } -clap = { version = "4", features = ["derive"] } -pretty_env_logger = "0.5" -tokio = { version = "1", features = ["macros", "rt-multi-thread"] } diff --git a/rust/snowflake-api/LICENSE b/rust/snowflake-api/LICENSE deleted file mode 100644 index f49a4e16..00000000 --- a/rust/snowflake-api/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/rust/snowflake-api/README.md b/rust/snowflake-api/README.md deleted file mode 100644 index 83de272b..00000000 --- a/rust/snowflake-api/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# snowflake-rs - -Snowflake library for undocumented public API. If you want to query documented public [SQL REST API](https://docs.snowflake.com/developer-guide/sql-api/intro) use [snowflake-jwt](https://crates.io/crates/snowflake-jwt) together with your favourite request library, see [./jwt/examples](../jwt/examples) for how it's done. - -## Features - -Since it does a lot of I/O the library is async-only, and currently has hard dependency on [tokio](https://tokio.rs/) as a runtime due to use of [reqwest](https://github.com/seanmonstar/reqwest). - -- [x] Single statements [example](./examples/run_sql.rs) -- [ ] Multiple statements -- [ ] Async requests (is it needed if whole library is async?) -- [x] Query results in [Arrow](https://arrow.apache.org/) -- [x] Chunked query results -- [x] Password, certificate, env auth -- [ ] Browser-auth -- [x] Closing session -- [x] Token renewal -- [x] PUT support [example](./examples/filetransfer.rs) -- [ ] GET support -- [x] AWS integration -- [ ] GCloud integration -- [ ] Azure integration -- [ ] Parallel uploading of small files -- [x] Polars support [example](./examples/polars/src/main.rs) -- [x] Tracing / custom reqwest middlware [example](./examples/tracing/src/main.rs) - -## Why - -Snowflake has 2 public APIs, one is [SQL REST API](https://docs.snowflake.com/developer-guide/sql-api/intro), which is limited in its support of [PUT](https://docs.snowflake.com/en/sql-reference/sql/put) and [GET](https://docs.snowflake.com/en/sql-reference/sql/get) statements and another undocumented API, which is used by official [Drivers](https://docs.snowflake.com/en/developer-guide/drivers) with the support for both. - -This implementation emulates [gosnowflake](https://github.com/snowflakedb/gosnowflake) library, as each official driver comes with a different set of internal flags and defaults (which are selected by `CLIENT_APP_ID`) the Go implementation is the only one currently outputting Arrow by-default. - -We've chosen not to generate bindings for C/C++ [libsnowflakeclient](https://github.com/snowflakedb/libsnowflakeclient) library (which backs ODBC driver) as it is in active development and building it under macOS M1 is bigger effort than writing our own API wrapper. - -## Usage - -In your Cargo.toml: - -```toml -[dependencies] -snowflake-api = "0.7.0" -``` - -Check [examples](./examples) for working programs using the library. - -```rust -use anyhow::Result; -use snowflake_api::{QueryResult, SnowflakeApi}; - -async fn run_query(sql: &str) -> Result { - let mut api = SnowflakeApi::with_password_auth( - "ACCOUNT_IDENTIFIER", - Some("WAREHOUSE"), - Some("DATABASE"), - Some("SCHEMA"), - "USERNAME", - Some("ROLE"), - "PASSWORD", - )?; - let res = api.exec(sql).await?; - - Ok(res) -} -``` - -Or using environment variables: - -```rust - use anyhow::Result; -use snowflake_api::{QueryResult, SnowflakeApi}; - -async fn run_query(sql: &str) -> Result { - let mut api = SnowflakeApi::from_env()?; - let res = api.exec(sql).await?; - - Ok(res) -} -``` - -## PUT / GET - -[PUT](https://docs.snowflake.com/en/sql-reference/sql/put)/[GET](https://docs.snowflake.com/en/sql-reference/sql/get) statements allow you to access Snowflake-owned storage instead of provisioning your own when doing [COPY INTO](https://docs.snowflake.com/en/sql-reference/sql/copy-into-table). Storage provider depends on which cloud your Snowflake account was provisioned in, hence the need to support multiple cloud backends. diff --git a/rust/snowflake-api/examples/filetransfer.rs b/rust/snowflake-api/examples/filetransfer.rs deleted file mode 100644 index e54f64c4..00000000 --- a/rust/snowflake-api/examples/filetransfer.rs +++ /dev/null @@ -1,117 +0,0 @@ -use anyhow::Result; -use arrow::util::pretty::pretty_format_batches; -use clap::Parser; -use snowflake_api::{QueryResult, SnowflakeApi}; -use std::fs; - -extern crate snowflake_api; - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -struct Args { - /// Path to RSA PEM private key - #[arg(long)] - private_key: Option, - - /// Password if certificate is not present - #[arg(long)] - password: Option, - - /// in Snowflake format, uppercase - #[arg(short, long)] - account_identifier: String, - - /// Database name - #[arg(short, long)] - database: String, - - /// Schema name - #[arg(long)] - schema: String, - - /// Warehouse - #[arg(short, long)] - warehouse: String, - - /// username to whom the private key belongs to - #[arg(short, long)] - username: String, - - /// role which user will assume - #[arg(short, long)] - role: String, - - #[arg(long)] - csv_path: String, -} - -#[tokio::main] -async fn main() -> Result<()> { - pretty_env_logger::init(); - - let args = Args::parse(); - - let mut api = match (&args.private_key, &args.password) { - (Some(pkey), None) => { - let pem = fs::read_to_string(pkey)?; - SnowflakeApi::with_certificate_auth( - &args.account_identifier, - Some(&args.warehouse), - Some(&args.database), - Some(&args.schema), - &args.username, - Some(&args.role), - &pem, - )? - } - (None, Some(pwd)) => SnowflakeApi::with_password_auth( - &args.account_identifier, - Some(&args.warehouse), - Some(&args.database), - Some(&args.schema), - &args.username, - Some(&args.role), - pwd, - )?, - _ => { - panic!("Either private key path or password must be set") - } - }; - - log::info!("Creating table"); - api.exec( - "CREATE OR REPLACE TABLE OSCAR_AGE_MALE(Index integer, Year integer, Age integer, Name varchar, Movie varchar);" - ).await?; - - log::info!("Uploading CSV file"); - api.exec(&format!("PUT file://{} @%OSCAR_AGE_MALE;", &args.csv_path)) - .await?; - - log::info!("Create temporary file format"); - api.exec( - "CREATE OR REPLACE TEMPORARY FILE FORMAT CUSTOM_CSV_FORMAT TYPE = CSV COMPRESSION = NONE FIELD_DELIMITER = ',' FILE_EXTENSION = 'csv' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '\"' TRIM_SPACE = TRUE SKIP_BLANK_LINES = TRUE;" - ).await?; - - log::info!("Copying into table"); - api.exec("COPY INTO OSCAR_AGE_MALE FILE_FORMAT = CUSTOM_CSV_FORMAT;") - .await?; - - log::info!("Querying for results"); - let res = api.exec("SELECT * FROM OSCAR_AGE_MALE;").await?; - - match res { - QueryResult::Arrow(a) => { - println!("{}", pretty_format_batches(&a).unwrap()); - } - QueryResult::Empty => { - println!("Nothing was returned"); - } - QueryResult::Json(j) => { - println!("{j}"); - } - } - - api.close_session().await?; - - Ok(()) -} diff --git a/rust/snowflake-api/examples/oscar_age_male.csv b/rust/snowflake-api/examples/oscar_age_male.csv deleted file mode 100644 index f79355e0..00000000 --- a/rust/snowflake-api/examples/oscar_age_male.csv +++ /dev/null @@ -1,90 +0,0 @@ -"Index", "Year", "Age", "Name", "Movie" - 1, 1928, 44, "Emil Jannings", "The Last Command, The Way of All Flesh" - 2, 1929, 41, "Warner Baxter", "In Old Arizona" - 3, 1930, 62, "George Arliss", "Disraeli" - 4, 1931, 53, "Lionel Barrymore", "A Free Soul" - 5, 1932, 47, "Wallace Beery", "The Champ" - 6, 1933, 35, "Fredric March", "Dr. Jekyll and Mr. Hyde" - 7, 1934, 34, "Charles Laughton", "The Private Life of Henry VIII" - 8, 1935, 34, "Clark Gable", "It Happened One Night" - 9, 1936, 49, "Victor McLaglen", "The Informer" -10, 1937, 41, "Paul Muni", "The Story of Louis Pasteur" -11, 1938, 37, "Spencer Tracy", "Captains Courageous" -12, 1939, 38, "Spencer Tracy", "Boys Town" -13, 1940, 34, "Robert Donat", "Goodbye, Mr. Chips" -14, 1941, 32, "James Stewart", "The Philadelphia Story" -15, 1942, 40, "Gary Cooper", "Sergeant York" -16, 1943, 43, "James Cagney", "Yankee Doodle Dandy" -17, 1944, 48, "Paul Lukas", "Watch on the Rhine" -18, 1945, 41, "Bing Crosby", "Going My Way" -19, 1946, 39, "Ray Milland", "The Lost Weekend" -20, 1947, 49, "Fredric March", "The Best Years of Our Lives" -21, 1948, 57, "Ronald Colman", "A Double Life" -22, 1949, 41, "Laurence Olivier", "Hamlet" -23, 1950, 38, "Broderick Crawford", "All the King's Men" -24, 1951, 39, "José Ferrer", "Cyrano de Bergerac" -25, 1952, 52, "Humphrey Bogart", "The African Queen" -26, 1953, 51, "Gary Cooper", "High Noon" -27, 1954, 35, "William Holden", "Stalag 17" -28, 1955, 30, "Marlon Brando", "On the Waterfront" -29, 1956, 39, "Ernest Borgnine", "Marty" -30, 1957, 36, "Yul Brynner", "The King and I" -31, 1958, 43, "Alec Guinness", "The Bridge on the River Kwai" -32, 1959, 49, "David Niven", "Separate Tables" -33, 1960, 36, "Charlton Heston", "Ben-Hur" -34, 1961, 47, "Burt Lancaster", "Elmer Gantry" -35, 1962, 31, "Maximilian Schell", "Judgment at Nuremberg" -36, 1963, 47, "Gregory Peck", "To Kill a Mockingbird" -37, 1964, 37, "Sidney Poitier", "Lilies of the Field" -38, 1965, 57, "Rex Harrison", "My Fair Lady" -39, 1966, 42, "Lee Marvin", "Cat Ballou" -40, 1967, 45, "Paul Scofield", "A Man for All Seasons" -41, 1968, 42, "Rod Steiger", "In the Heat of the Night" -42, 1969, 45, "Cliff Robertson", "Charly" -43, 1970, 62, "John Wayne", "True Grit" -44, 1971, 43, "George C. Scott", "Patton" -45, 1972, 42, "Gene Hackman", "The French Connection" -46, 1973, 48, "Marlon Brando", "The Godfather" -47, 1974, 49, "Jack Lemmon", "Save the Tiger" -48, 1975, 56, "Art Carney", "Harry and Tonto" -49, 1976, 38, "Jack Nicholson", "One Flew Over the Cuckoo's Nest" -50, 1977, 60, "Peter Finch", "Network" -51, 1978, 30, "Richard Dreyfuss", "The Goodbye Girl" -52, 1979, 40, "Jon Voight", "Coming Home" -53, 1980, 42, "Dustin Hoffman", "Kramer vs. Kramer" -54, 1981, 37, "Robert De Niro", "Raging Bull" -55, 1982, 76, "Henry Fonda", "On Golden Pond" -56, 1983, 39, "Ben Kingsley", "Gandhi" -57, 1984, 53, "Robert Duvall", "Tender Mercies" -58, 1985, 45, "F. Murray Abraham", "Amadeus" -59, 1986, 36, "William Hurt", "Kiss of the Spider Woman" -60, 1987, 62, "Paul Newman", "The Color of Money" -61, 1988, 43, "Michael Douglas", "Wall Street" -62, 1989, 51, "Dustin Hoffman", "Rain Man" -63, 1990, 32, "Daniel Day-Lewis", "My Left Foot" -64, 1991, 42, "Jeremy Irons", "Reversal of Fortune" -65, 1992, 54, "Anthony Hopkins", "The Silence of the Lambs" -66, 1993, 52, "Al Pacino", "Scent of a Woman" -67, 1994, 37, "Tom Hanks", "Philadelphia" -68, 1995, 38, "Tom Hanks", "Forrest Gump" -69, 1996, 32, "Nicolas Cage", "Leaving Las Vegas" -70, 1997, 45, "Geoffrey Rush", "Shine" -71, 1998, 60, "Jack Nicholson", "As Good as It Gets" -72, 1999, 46, "Roberto Benigni", "Life Is Beautiful" -73, 2000, 40, "Kevin Spacey", "American Beauty" -74, 2001, 36, "Russell Crowe", "Gladiator" -75, 2002, 47, "Denzel Washington", "Training Day" -76, 2003, 29, "Adrien Brody", "The Pianist" -77, 2004, 43, "Sean Penn", "Mystic River" -78, 2005, 37, "Jamie Foxx", "Ray" -79, 2006, 38, "Philip Seymour Hoffman", "Capote" -80, 2007, 45, "Forest Whitaker", "The Last King of Scotland" -81, 2008, 50, "Daniel Day-Lewis", "There Will Be Blood" -82, 2009, 48, "Sean Penn", "Milk" -83, 2010, 60, "Jeff Bridges", "Crazy Heart" -84, 2011, 50, "Colin Firth", "The King's Speech" -85, 2012, 39, "Jean Dujardin", "The Artist" -86, 2013, 55, "Daniel Day-Lewis", "Lincoln" -87, 2014, 44, "Matthew McConaughey", "Dallas Buyers Club" -88, 2015, 33, "Eddie Redmayne", "The Theory of Everything" -89, 2016, 41, "Leonardo DiCaprio", "The Revenant" \ No newline at end of file diff --git a/rust/snowflake-api/examples/polars/Cargo.toml b/rust/snowflake-api/examples/polars/Cargo.toml deleted file mode 100644 index 166e934f..00000000 --- a/rust/snowflake-api/examples/polars/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "polars-example" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -anyhow = "1.0.79" -dotenv = "0.15.0" -tokio = { version = "1.35.1", features = ["full"] } -tracing = "0.1.40" -tracing-subscriber = "0.3" - -snowflake-api = { path = "../../../snowflake-api", features = ["polars"]} -polars = { version = ">=0.37", features = ["fmt"] } diff --git a/rust/snowflake-api/examples/polars/src/main.rs b/rust/snowflake-api/examples/polars/src/main.rs deleted file mode 100644 index 635212b8..00000000 --- a/rust/snowflake-api/examples/polars/src/main.rs +++ /dev/null @@ -1,44 +0,0 @@ -use anyhow::Result; -use polars::frame::DataFrame; - -use snowflake_api::SnowflakeApi; - -#[tokio::main] -async fn main() -> Result<()> { - dotenv::dotenv().ok(); - tracing_subscriber::fmt::init(); - - let api = SnowflakeApi::from_env()?; - - // run a query that returns a tabular arrow response - run_and_print( - &api, - r" - select - count(query_id) as num_queries, - user_name - from snowflake.account_usage.query_history - where - start_time > current_date - 7 - group by user_name; - ", - ) - .await?; - - // run a query that returns a json response - run_and_print(&api, r"SHOW DATABASES;").await?; - - Ok(()) -} - -async fn run_and_print(api: &SnowflakeApi, sql: &str) -> Result<()> { - let res = api.exec_raw(sql).await?; - - let df = DataFrame::try_from(res)?; - // alternatively, you can use the `try_into` method on the response - // let df: DataFrame = res.try_into()?; - - println!("{:?}", df); - - Ok(()) -} diff --git a/rust/snowflake-api/examples/run_sql.rs b/rust/snowflake-api/examples/run_sql.rs deleted file mode 100644 index 18ec8a92..00000000 --- a/rust/snowflake-api/examples/run_sql.rs +++ /dev/null @@ -1,120 +0,0 @@ -extern crate snowflake_api; - -use anyhow::Result; -use arrow::util::pretty::pretty_format_batches; -use clap::Parser; -use std::fs; - -use snowflake_api::{QueryResult, SnowflakeApi}; - -#[derive(clap::ValueEnum, Clone, Debug)] -enum Output { - Arrow, - Json, - Query, -} - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -struct Args { - /// Path to RSA PEM private key - #[arg(long)] - private_key: Option, - - /// Password if certificate is not present - #[arg(long)] - password: Option, - - /// in Snowflake format, uppercase - #[arg(short, long)] - account_identifier: String, - - /// Database name - #[arg(short, long)] - database: Option, - - /// Schema name - #[arg(long)] - schema: Option, - - /// Warehouse - #[arg(short, long)] - warehouse: Option, - - /// username to whom the private key belongs to - #[arg(short, long)] - username: String, - - /// role which user will assume - #[arg(short, long)] - role: Option, - - /// sql statement to execute and print result from - #[arg(long)] - sql: String, - - #[arg(long)] - #[arg(value_enum, default_value_t = Output::Arrow)] - output: Output, -} - -#[tokio::main] -async fn main() -> Result<()> { - pretty_env_logger::init(); - - let args = Args::parse(); - - let mut api = match (&args.private_key, &args.password) { - (Some(pkey), None) => { - let pem = fs::read_to_string(pkey)?; - SnowflakeApi::with_certificate_auth( - &args.account_identifier, - args.warehouse.as_deref(), - args.database.as_deref(), - args.schema.as_deref(), - &args.username, - args.role.as_deref(), - &pem, - )? - } - (None, Some(pwd)) => SnowflakeApi::with_password_auth( - &args.account_identifier, - args.warehouse.as_deref(), - args.database.as_deref(), - args.schema.as_deref(), - &args.username, - args.role.as_deref(), - pwd, - )?, - _ => { - panic!("Either private key path or password must be set") - } - }; - - match args.output { - Output::Arrow => { - let res = api.exec(&args.sql).await?; - match res { - QueryResult::Arrow(a) => { - println!("{}", pretty_format_batches(&a).unwrap()); - } - QueryResult::Json(j) => { - println!("{j}"); - } - QueryResult::Empty => { - println!("Query finished successfully") - } - } - } - Output::Json => { - let res = api.exec_json(&args.sql).await?; - println!("{res}"); - } - Output::Query => { - let res = api.exec_response(&args.sql).await?; - println!("{:?}", res); - } - } - - Ok(()) -} diff --git a/rust/snowflake-api/examples/tracing/Cargo.toml b/rust/snowflake-api/examples/tracing/Cargo.toml deleted file mode 100644 index 01a07ecc..00000000 --- a/rust/snowflake-api/examples/tracing/Cargo.toml +++ /dev/null @@ -1,24 +0,0 @@ -[package] -name = "snowflake-rust-tracing" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -anyhow = "1.0.79" -arrow = { version = "50.0.0", features = ["prettyprint"] } -dotenv = "0.15.0" -snowflake-api = { path = "../../../snowflake-api" } - - -tokio = { version = "1.35.1", features = ["full"] } -tracing = "0.1.40" -tracing-subscriber = "0.3" -tracing-opentelemetry = "0.22" -opentelemetry-otlp = "0.14" -opentelemetry = "0.21" -opentelemetry_sdk = { version = "0.21", features = ["rt-tokio"] } -reqwest-tracing = { version = "0.4", features = ["opentelemetry_0_21"] } -reqwest-middleware = { version = "*" } -opentelemetry-semantic-conventions = "0.13" diff --git a/rust/snowflake-api/examples/tracing/src/main.rs b/rust/snowflake-api/examples/tracing/src/main.rs deleted file mode 100644 index 2c1c0024..00000000 --- a/rust/snowflake-api/examples/tracing/src/main.rs +++ /dev/null @@ -1,71 +0,0 @@ -use anyhow::Result; -use arrow::util::pretty::pretty_format_batches; -use opentelemetry::global; -use opentelemetry::KeyValue; -use opentelemetry_otlp::WithExportConfig; -use opentelemetry_sdk::{runtime, trace as sdktrace, Resource}; -use reqwest_middleware::Extension; -use reqwest_tracing::{OtelName, SpanBackendWithUrl}; -use tracing_subscriber::layer::SubscriberExt; - -use snowflake_api::connection::Connection; -use snowflake_api::{AuthArgs, QueryResult, SnowflakeApiBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let tracer = opentelemetry_otlp::new_pipeline() - .tracing() - .with_exporter( - opentelemetry_otlp::new_exporter() - .tonic() - .with_endpoint("http://localhost:4317"), - ) - .with_trace_config( - sdktrace::config().with_resource(Resource::new(vec![KeyValue::new( - opentelemetry_semantic_conventions::resource::SERVICE_NAME, - "snowflake-rust-client-demo", - )])), - ) - .install_batch(runtime::Tokio)?; - - let telemetry = tracing_opentelemetry::layer().with_tracer(tracer.clone()); - let subscriber = tracing_subscriber::Registry::default().with(telemetry); - tracing::subscriber::set_global_default(subscriber)?; - - dotenv::dotenv().ok(); - - let mut client = Connection::default_client_builder()?; - client = client - .with_init(Extension(OtelName(std::borrow::Cow::Borrowed( - "snowflake-api", - )))) - .with(reqwest_tracing::TracingMiddleware::::new()); - - let builder = SnowflakeApiBuilder::new(AuthArgs::from_env()?).with_client(client.build()); - let api = builder.build()?; - - run_in_span(&api).await?; - - global::shutdown_tracer_provider(); - - Ok(()) -} - -#[tracing::instrument(name = "snowflake_api", skip(api))] -async fn run_in_span(api: &snowflake_api::SnowflakeApi) -> anyhow::Result<()> { - let res = api.exec("select 'hello from snowflake' as col1;").await?; - - match res { - QueryResult::Arrow(a) => { - println!("{}", pretty_format_batches(&a).unwrap()); - } - QueryResult::Json(j) => { - println!("{}", j); - } - QueryResult::Empty => { - println!("Query finished successfully") - } - } - - Ok(()) -} diff --git a/rust/snowflake-api/src/connection.rs b/rust/snowflake-api/src/connection.rs deleted file mode 100644 index e7087e15..00000000 --- a/rust/snowflake-api/src/connection.rs +++ /dev/null @@ -1,199 +0,0 @@ -use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue}; -use reqwest_middleware::ClientWithMiddleware; -use reqwest_retry::policies::ExponentialBackoff; -use reqwest_retry::RetryTransientMiddleware; -use std::collections::HashMap; -use std::time::{SystemTime, UNIX_EPOCH}; -use thiserror::Error; -use url::Url; -use uuid::Uuid; - -#[derive(Error, Debug)] -pub enum ConnectionError { - #[error(transparent)] - RequestError(#[from] reqwest::Error), - - #[error(transparent)] - RequestMiddlewareError(#[from] reqwest_middleware::Error), - - #[error(transparent)] - UrlParsing(#[from] url::ParseError), - - #[error(transparent)] - Deserialization(#[from] serde_json::Error), - - #[error(transparent)] - InvalidHeader(#[from] header::InvalidHeaderValue), -} - -/// Container for query parameters -/// This API has different endpoints and MIME types for different requests -struct QueryContext { - path: &'static str, - accept_mime: &'static str, -} - -pub enum QueryType { - LoginRequest, - TokenRequest, - CloseSession, - JsonQuery, - ArrowQuery, -} - -impl QueryType { - const fn query_context(&self) -> QueryContext { - match self { - Self::LoginRequest => QueryContext { - path: "session/v1/login-request", - accept_mime: "application/json", - }, - Self::TokenRequest => QueryContext { - path: "/session/token-request", - accept_mime: "application/snowflake", - }, - Self::CloseSession => QueryContext { - path: "session", - accept_mime: "application/snowflake", - }, - Self::JsonQuery => QueryContext { - path: "queries/v1/query-request", - accept_mime: "application/json", - }, - Self::ArrowQuery => QueryContext { - path: "queries/v1/query-request", - accept_mime: "application/snowflake", - }, - } - } -} - -/// Connection pool -/// Minimal session will have at least 2 requests - login and query -pub struct Connection { - // no need for Arc as it's already inside the reqwest client - client: ClientWithMiddleware, -} - -impl Connection { - pub fn new() -> Result { - let client = Self::default_client_builder()?; - - Ok(Self::new_with_middware(client.build())) - } - - /// Allow a user to provide their own middleware - /// - /// Users can provide their own middleware to the connection like this: - /// ```rust - /// use snowflake_api::connection::Connection; - /// let mut client = Connection::default_client_builder(); - /// // modify the client builder here - /// let connection = Connection::new_with_middware(client.unwrap().build()); - /// ``` - /// This is not intended to be called directly, but is used by `SnowflakeApiBuilder::with_client` - pub fn new_with_middware(client: ClientWithMiddleware) -> Self { - Self { client } - } - - pub fn default_client_builder() -> Result { - let retry_policy = ExponentialBackoff::builder().build_with_max_retries(3); - - let client = reqwest::ClientBuilder::new() - .user_agent("Rust/0.0.1") - .gzip(true) - .referer(false); - - #[cfg(debug_assertions)] - let client = client.connection_verbose(true); - - let client = client.build()?; - - Ok(reqwest_middleware::ClientBuilder::new(client) - .with(RetryTransientMiddleware::new_with_policy(retry_policy))) - } - - /// Perform request of given query type with extra body or parameters - // todo: implement soft error handling - // todo: is there better way to not repeat myself? - pub async fn request( - &self, - query_type: QueryType, - account_identifier: &str, - extra_get_params: &[(&str, &str)], - auth: Option<&str>, - body: impl serde::Serialize, - ) -> Result { - let context = query_type.query_context(); - - let request_id = Uuid::new_v4(); - let request_guid = Uuid::new_v4(); - let client_start_time = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() - .to_string(); - // fixme: update uuid's on the retry - let request_id = request_id.to_string(); - let request_guid = request_guid.to_string(); - - let mut get_params = vec![ - ("clientStartTime", client_start_time.as_str()), - ("requestId", request_id.as_str()), - ("request_guid", request_guid.as_str()), - ]; - get_params.extend_from_slice(extra_get_params); - - let url = format!( - "https://{}.snowflakecomputing.com/{}", - &account_identifier, context.path - ); - let url = Url::parse_with_params(&url, get_params)?; - - let mut headers = HeaderMap::new(); - - headers.append( - header::ACCEPT, - HeaderValue::from_static(context.accept_mime), - ); - if let Some(auth) = auth { - let mut auth_val = HeaderValue::from_str(auth)?; - auth_val.set_sensitive(true); - headers.append(header::AUTHORIZATION, auth_val); - } - - // todo: persist client to use connection polling - let resp = self - .client - .post(url) - .headers(headers) - .json(&body) - .send() - .await?; - - Ok(resp.json::().await?) - } - - pub async fn get_chunk( - &self, - url: &str, - headers: &HashMap, - ) -> Result { - let mut header_map = HeaderMap::new(); - for (k, v) in headers { - header_map.insert( - HeaderName::from_bytes(k.as_bytes()).unwrap(), - HeaderValue::from_bytes(v.as_bytes()).unwrap(), - ); - } - let bytes = self - .client - .get(url) - .headers(header_map) - .send() - .await? - .bytes() - .await?; - Ok(bytes) - } -} diff --git a/rust/snowflake-api/src/lib.rs b/rust/snowflake-api/src/lib.rs deleted file mode 100644 index 31f3e00d..00000000 --- a/rust/snowflake-api/src/lib.rs +++ /dev/null @@ -1,553 +0,0 @@ -#![doc( - issue_tracker_base_url = "https://github.com/mycelial/snowflake-rs/issues", - test(no_crate_inject) -)] -#![doc = include_str!("../README.md")] -#![warn(clippy::all, clippy::pedantic)] -#![allow( -clippy::must_use_candidate, -clippy::missing_errors_doc, -clippy::module_name_repetitions, -clippy::struct_field_names, -clippy::future_not_send, // This one seems like something we should eventually fix -clippy::missing_panics_doc -)] - -use std::fmt::{Display, Formatter}; -use std::io; -use std::path::Path; -use std::sync::Arc; - -use arrow::error::ArrowError; -use arrow::ipc::reader::StreamReader; -use arrow::record_batch::RecordBatch; -use base64::Engine; -use bytes::{Buf, Bytes}; -use futures::future::try_join_all; -use object_store::aws::AmazonS3Builder; -use object_store::local::LocalFileSystem; -use object_store::ObjectStore; -use regex::Regex; -use reqwest_middleware::ClientWithMiddleware; -use thiserror::Error; - -use crate::connection::{Connection, ConnectionError}; -use responses::ExecResponse; -use session::{AuthError, Session}; - -use crate::connection::QueryType; -use crate::requests::ExecRequest; -use crate::responses::{ - AwsPutGetStageInfo, ExecResponseRowType, PutGetExecResponse, PutGetStageInfo, SnowflakeType, -}; -use crate::session::AuthError::MissingEnvArgument; - -pub mod connection; -#[cfg(feature = "polars")] -mod polars; -mod requests; -mod responses; -mod session; - -#[derive(Error, Debug)] -pub enum SnowflakeApiError { - #[error(transparent)] - RequestError(#[from] ConnectionError), - - #[error(transparent)] - AuthError(#[from] AuthError), - - #[error(transparent)] - ResponseDeserializationError(#[from] base64::DecodeError), - - #[error(transparent)] - ArrowError(#[from] ArrowError), - - #[error("S3 bucket path in PUT request is invalid: `{0}`")] - InvalidBucketPath(String), - - #[error("Couldn't extract filename from the local path: `{0}`")] - InvalidLocalPath(String), - - #[error(transparent)] - LocalIoError(#[from] io::Error), - - #[error(transparent)] - ObjectStoreError(#[from] object_store::Error), - - #[error(transparent)] - ObjectStorePathError(#[from] object_store::path::Error), - - #[error("Snowflake API error. Code: `{0}`. Message: `{1}`")] - ApiError(String, String), - - #[error("Snowflake API empty response could mean that query wasn't executed correctly or API call was faulty")] - EmptyResponse, - - #[error("No usable rowsets were included in the response")] - BrokenResponse, - - #[error("Following feature is not implemented yet: {0}")] - Unimplemented(String), - - #[error("Unexpected API response")] - UnexpectedResponse, -} - -/// Even if Arrow is specified as a return type non-select queries -/// will return Json array of arrays: `[[42, "answer"], [43, "non-answer"]]`. -pub struct JsonResult { - // todo: can it _only_ be a json array of arrays or something else too? - pub value: serde_json::Value, - /// Field ordering matches the array ordering - pub schema: Vec, -} - -impl Display for JsonResult { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.value) - } -} - -/// Based on the [`ExecResponseRowType`] -pub struct FieldSchema { - pub name: String, - // todo: is it a good idea to expose internal response struct to the user? - pub type_: SnowflakeType, - pub scale: Option, - pub precision: Option, - pub nullable: bool, -} - -impl From for FieldSchema { - fn from(value: ExecResponseRowType) -> Self { - FieldSchema { - name: value.name, - type_: value.type_, - scale: value.scale, - precision: value.precision, - nullable: value.nullable, - } - } -} - -/// Container for query result. -/// Arrow is returned by-default for all SELECT statements, -/// unless there is session configuration issue or it's a different statement type. -pub enum QueryResult { - Arrow(Vec), - Json(JsonResult), - Empty, -} - -/// Raw query result -/// Can be transformed into [`QueryResult`] -pub enum RawQueryResult { - /// Arrow IPC chunks - /// see: https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc - Bytes(Vec), - /// Json payload is deserialized, - /// as it's already a part of REST response - Json(JsonResult), - Empty, -} - -impl RawQueryResult { - pub fn deserialize_arrow(self) -> Result { - match self { - RawQueryResult::Bytes(bytes) => { - Self::flat_bytes_to_batches(bytes).map(QueryResult::Arrow) - } - RawQueryResult::Json(j) => Ok(QueryResult::Json(j)), - RawQueryResult::Empty => Ok(QueryResult::Empty), - } - } - - fn flat_bytes_to_batches(bytes: Vec) -> Result, ArrowError> { - let mut res = vec![]; - for b in bytes { - let mut batches = Self::bytes_to_batches(b)?; - res.append(&mut batches); - } - Ok(res) - } - - fn bytes_to_batches(bytes: Bytes) -> Result, ArrowError> { - let record_batches = StreamReader::try_new_unbuffered(bytes.reader(), None)?; - record_batches.into_iter().collect() - } -} - -pub struct AuthArgs { - pub account_identifier: String, - pub warehouse: Option, - pub database: Option, - pub schema: Option, - pub username: String, - pub role: Option, - pub auth_type: AuthType, -} - -impl AuthArgs { - pub fn from_env() -> Result { - let auth_type = if let Ok(password) = std::env::var("SNOWFLAKE_PASSWORD") { - Ok(AuthType::Password(PasswordArgs { password })) - } else if let Ok(private_key_pem) = std::env::var("SNOWFLAKE_PRIVATE_KEY") { - Ok(AuthType::Certificate(CertificateArgs { private_key_pem })) - } else { - Err(MissingEnvArgument( - "SNOWFLAKE_PASSWORD or SNOWFLAKE_PRIVATE_KEY".to_owned(), - )) - }; - - Ok(AuthArgs { - account_identifier: std::env::var("SNOWFLAKE_ACCOUNT") - .map_err(|_| MissingEnvArgument("SNOWFLAKE_ACCOUNT".to_owned()))?, - warehouse: std::env::var("SNOWLFLAKE_WAREHOUSE").ok(), - database: std::env::var("SNOWFLAKE_DATABASE").ok(), - schema: std::env::var("SNOWFLAKE_SCHEMA").ok(), - username: std::env::var("SNOWFLAKE_USER") - .map_err(|_| MissingEnvArgument("SNOWFLAKE_USER".to_owned()))?, - role: std::env::var("SNOWFLAKE_ROLE").ok(), - auth_type: auth_type?, - }) - } -} - -pub enum AuthType { - Password(PasswordArgs), - Certificate(CertificateArgs), -} - -pub struct PasswordArgs { - pub password: String, -} - -pub struct CertificateArgs { - pub private_key_pem: String, -} - -#[must_use] -pub struct SnowflakeApiBuilder { - pub auth: AuthArgs, - client: Option, -} - -impl SnowflakeApiBuilder { - pub fn new(auth: AuthArgs) -> Self { - Self { auth, client: None } - } - - pub fn with_client(mut self, client: ClientWithMiddleware) -> Self { - self.client = Some(client); - self - } - - pub fn build(self) -> Result { - let connection = match self.client { - Some(client) => Arc::new(Connection::new_with_middware(client)), - None => Arc::new(Connection::new()?), - }; - - let session = match self.auth.auth_type { - AuthType::Password(args) => Session::password_auth( - Arc::clone(&connection), - &self.auth.account_identifier, - self.auth.warehouse.as_deref(), - self.auth.database.as_deref(), - self.auth.schema.as_deref(), - &self.auth.username, - self.auth.role.as_deref(), - &args.password, - ), - AuthType::Certificate(args) => Session::cert_auth( - Arc::clone(&connection), - &self.auth.account_identifier, - self.auth.warehouse.as_deref(), - self.auth.database.as_deref(), - self.auth.schema.as_deref(), - &self.auth.username, - self.auth.role.as_deref(), - &args.private_key_pem, - ), - }; - - let account_identifier = self.auth.account_identifier.to_uppercase(); - - Ok(SnowflakeApi { - connection: Arc::clone(&connection), - session, - account_identifier, - }) - } -} - -/// Snowflake API, keeps connection pool and manages session for you -pub struct SnowflakeApi { - connection: Arc, - session: Session, - account_identifier: String, -} - -impl SnowflakeApi { - /// Initialize object with password auth. Authentication happens on the first request. - pub fn with_password_auth( - account_identifier: &str, - warehouse: Option<&str>, - database: Option<&str>, - schema: Option<&str>, - username: &str, - role: Option<&str>, - password: &str, - ) -> Result { - let connection = Arc::new(Connection::new()?); - - let session = Session::password_auth( - Arc::clone(&connection), - account_identifier, - warehouse, - database, - schema, - username, - role, - password, - ); - - let account_identifier = account_identifier.to_uppercase(); - Ok(Self { - connection: Arc::clone(&connection), - session, - account_identifier, - }) - } - - /// Initialize object with private certificate auth. Authentication happens on the first request. - pub fn with_certificate_auth( - account_identifier: &str, - warehouse: Option<&str>, - database: Option<&str>, - schema: Option<&str>, - username: &str, - role: Option<&str>, - private_key_pem: &str, - ) -> Result { - let connection = Arc::new(Connection::new()?); - - let session = Session::cert_auth( - Arc::clone(&connection), - account_identifier, - warehouse, - database, - schema, - username, - role, - private_key_pem, - ); - - let account_identifier = account_identifier.to_uppercase(); - Ok(Self { - connection: Arc::clone(&connection), - session, - account_identifier, - }) - } - - pub fn from_env() -> Result { - SnowflakeApiBuilder::new(AuthArgs::from_env()?).build() - } - - /// Closes the current session, this is necessary to clean up temporary objects (tables, functions, etc) - /// which are Snowflake session dependent. - /// If another request is made the new session will be initiated. - pub async fn close_session(&mut self) -> Result<(), SnowflakeApiError> { - self.session.close().await?; - Ok(()) - } - - /// Execute a single query against API. - /// If statement is PUT, then file will be uploaded to the Snowflake-managed storage - pub async fn exec(&self, sql: &str) -> Result { - let raw = self.exec_raw(sql).await?; - let res = raw.deserialize_arrow()?; - Ok(res) - } - - /// Executes a single query against API. - /// If statement is PUT, then file will be uploaded to the Snowflake-managed storage - /// Returns raw bytes in the Arrow response - pub async fn exec_raw(&self, sql: &str) -> Result { - let put_re = Regex::new(r"(?i)^(?:/\*.*\*/\s*)*put\s+").unwrap(); - - // put commands go through a different flow and result is side-effect - if put_re.is_match(sql) { - log::info!("Detected PUT query"); - - self.exec_put(sql).await.map(|()| RawQueryResult::Empty) - } else { - self.exec_arrow_raw(sql).await - } - } - - async fn exec_put(&self, sql: &str) -> Result<(), SnowflakeApiError> { - let resp = self - .run_sql::(sql, QueryType::JsonQuery) - .await?; - log::debug!("Got PUT response: {:?}", resp); - - match resp { - ExecResponse::Query(_) => Err(SnowflakeApiError::UnexpectedResponse), - ExecResponse::PutGet(pg) => self.put(pg).await, - ExecResponse::Error(e) => Err(SnowflakeApiError::ApiError( - e.data.error_code, - e.message.unwrap_or_default(), - )), - } - } - - async fn put(&self, resp: PutGetExecResponse) -> Result<(), SnowflakeApiError> { - match resp.data.stage_info { - PutGetStageInfo::Aws(info) => self.put_to_s3(&resp.data.src_locations, info).await, - PutGetStageInfo::Azure(_) => Err(SnowflakeApiError::Unimplemented( - "PUT local file requests for Azure".to_string(), - )), - PutGetStageInfo::Gcs(_) => Err(SnowflakeApiError::Unimplemented( - "PUT local file requests for GCS".to_string(), - )), - } - } - - async fn put_to_s3( - &self, - src_locations: &[String], - info: AwsPutGetStageInfo, - ) -> Result<(), SnowflakeApiError> { - let (bucket_name, bucket_path) = info - .location - .split_once('/') - .ok_or(SnowflakeApiError::InvalidBucketPath(info.location.clone()))?; - - let s3 = AmazonS3Builder::new() - .with_region(info.region) - .with_bucket_name(bucket_name) - .with_access_key_id(info.creds.aws_key_id) - .with_secret_access_key(info.creds.aws_secret_key) - .with_token(info.creds.aws_token) - .build()?; - - // todo: security vulnerability, external system tells you which local files to upload - for src_path in src_locations { - let path = Path::new(src_path); - let filename = path - .file_name() - .ok_or(SnowflakeApiError::InvalidLocalPath(src_path.clone()))?; - - // fixme: unwrap - let dest_path = format!("{}{}", bucket_path, filename.to_str().unwrap()); - let dest_path = object_store::path::Path::parse(dest_path)?; - - let src_path = object_store::path::Path::parse(src_path)?; - - let fs = LocalFileSystem::new().get(&src_path).await?; - - s3.put(&dest_path, fs.bytes().await?).await?; - } - - Ok(()) - } - - /// Useful for debugging to get the straight query response - #[cfg(debug_assertions)] - pub async fn exec_response(&mut self, sql: &str) -> Result { - self.run_sql::(sql, QueryType::ArrowQuery) - .await - } - - /// Useful for debugging to get raw JSON response - #[cfg(debug_assertions)] - pub async fn exec_json(&mut self, sql: &str) -> Result { - self.run_sql::(sql, QueryType::JsonQuery) - .await - } - - async fn exec_arrow_raw(&self, sql: &str) -> Result { - let resp = self - .run_sql::(sql, QueryType::ArrowQuery) - .await?; - log::debug!("Got query response: {:?}", resp); - - let resp = match resp { - // processable response - ExecResponse::Query(qr) => Ok(qr), - ExecResponse::PutGet(_) => Err(SnowflakeApiError::UnexpectedResponse), - ExecResponse::Error(e) => Err(SnowflakeApiError::ApiError( - e.data.error_code, - e.message.unwrap_or_default(), - )), - }?; - - // if response was empty, base64 data is empty string - // todo: still return empty arrow batch with proper schema? (schema always included) - if resp.data.returned == 0 { - log::debug!("Got response with 0 rows"); - Ok(RawQueryResult::Empty) - } else if let Some(value) = resp.data.rowset { - log::debug!("Got JSON response"); - // NOTE: json response could be chunked too. however, go clients should receive arrow by-default, - // unless user sets session variable to return json. This case was added for debugging and status - // information being passed through that fields. - Ok(RawQueryResult::Json(JsonResult { - value, - schema: resp.data.rowtype.into_iter().map(Into::into).collect(), - })) - } else if let Some(base64) = resp.data.rowset_base64 { - // fixme: is it possible to give streaming interface? - let mut chunks = try_join_all(resp.data.chunks.iter().map(|chunk| { - self.connection - .get_chunk(&chunk.url, &resp.data.chunk_headers) - })) - .await?; - - // fixme: should base64 chunk go first? - // fixme: if response is chunked is it both base64 + chunks or just chunks? - if !base64.is_empty() { - log::debug!("Got base64 encoded response"); - let bytes = Bytes::from(base64::engine::general_purpose::STANDARD.decode(base64)?); - chunks.push(bytes); - } - - Ok(RawQueryResult::Bytes(chunks)) - } else { - Err(SnowflakeApiError::BrokenResponse) - } - } - - async fn run_sql( - &self, - sql_text: &str, - query_type: QueryType, - ) -> Result { - log::debug!("Executing: {}", sql_text); - - let parts = self.session.get_token().await?; - - let body = ExecRequest { - sql_text: sql_text.to_string(), - async_exec: false, - sequence_id: parts.sequence_id, - is_internal: false, - }; - - let resp = self - .connection - .request::( - query_type, - &self.account_identifier, - &[], - Some(&parts.session_token_auth_header), - body, - ) - .await?; - - Ok(resp) - } -} diff --git a/rust/snowflake-api/src/polars.rs b/rust/snowflake-api/src/polars.rs deleted file mode 100644 index c7243b7f..00000000 --- a/rust/snowflake-api/src/polars.rs +++ /dev/null @@ -1,91 +0,0 @@ -use std::convert::TryFrom; - -use bytes::{Buf, Bytes}; -use polars_core::frame::DataFrame; -use polars_io::ipc::IpcStreamReader; -use polars_io::json::{JsonFormat, JsonReader}; -use polars_io::SerReader; -use serde::de::Error; -use serde_json::{Map, Value}; -use thiserror::Error; - -use crate::{JsonResult, RawQueryResult}; - -#[derive(Error, Debug)] -pub enum PolarsCastError { - #[error(transparent)] - SerdeError(#[from] serde_json::Error), - - #[error(transparent)] - PolarsError(#[from] polars_core::error::PolarsError), -} - -impl RawQueryResult { - pub fn to_polars(self) -> Result { - match self { - RawQueryResult::Bytes(bytes) => dataframe_from_bytes(bytes), - RawQueryResult::Json(json) => dataframe_from_json(&json), - RawQueryResult::Empty => Ok(DataFrame::empty()), - } - } -} - -fn dataframe_from_json(json_result: &JsonResult) -> Result { - let objects = arrays_to_objects(json_result)?; - // fixme: serializing json again, is it possible to keep bytes? or implement casting? - let json_string = serde_json::to_string(&objects)?; - let reader = std::io::Cursor::new(json_string.as_bytes()); - let df = JsonReader::new(reader) - .with_json_format(JsonFormat::Json) - .infer_schema_len(Some(5)) - .finish()?; - Ok(df) -} - -/// This is required because the polars json reader expects an array of objects, and -/// the snowflake json response is an array of arrays (without real column names). -/// -/// This is apparent if you run a system query (not a select) like `SHOW DATABASES;`. -fn arrays_to_objects(json_result: &JsonResult) -> Result { - let arrays: &Vec = json_result - .value - .as_array() - .ok_or(serde_json::Error::custom("Input must be array an array"))?; - let names: Vec = json_result.schema.iter().map(|s| s.name.clone()).collect(); - - let objects: Result, PolarsCastError> = arrays - .iter() - .map(|array| { - array - .as_array() - .ok_or(serde_json::Error::custom("Input must be array of array")) - .map(|array| { - // fixme: lots of copying - let map: Map = - names.clone().into_iter().zip(array.clone()).collect(); - Value::Object(map) - }) - .map_err(PolarsCastError::SerdeError) - }) - .collect(); - - objects.map(Value::Array) -} - -fn dataframe_from_bytes(bytes: Vec) -> Result { - let mut df = DataFrame::empty(); - for b in bytes { - let df_chunk = IpcStreamReader::new(b.reader()).finish()?; - df.vstack_mut(&df_chunk)?; - } - df.align_chunks(); - Ok(df) -} - -impl TryFrom for DataFrame { - type Error = PolarsCastError; - - fn try_from(value: RawQueryResult) -> Result { - value.to_polars() - } -} diff --git a/rust/snowflake-api/src/requests.rs b/rust/snowflake-api/src/requests.rs deleted file mode 100644 index 77b04344..00000000 --- a/rust/snowflake-api/src/requests.rs +++ /dev/null @@ -1,70 +0,0 @@ -use serde::Serialize; - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct ExecRequest { - pub sql_text: String, - pub async_exec: bool, - pub sequence_id: u64, - pub is_internal: bool, -} - -#[derive(Serialize, Debug)] -pub struct LoginRequest { - pub data: T, -} - -pub type PasswordLoginRequest = LoginRequest; -#[cfg(feature = "cert-auth")] -pub type CertLoginRequest = LoginRequest; - -#[derive(Serialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct LoginRequestCommon { - pub client_app_id: String, - pub client_app_version: String, - pub svn_revision: String, - pub account_name: String, - pub login_name: String, - pub session_parameters: SessionParameters, - pub client_environment: ClientEnvironment, -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct SessionParameters { - pub client_validate_default_parameters: bool, -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct ClientEnvironment { - pub application: String, - pub os: String, - pub os_version: String, - pub ocsp_mode: String, -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct PasswordRequestData { - #[serde(flatten)] - pub login_request_common: LoginRequestCommon, - pub password: String, -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct CertRequestData { - #[serde(flatten)] - pub login_request_common: LoginRequestCommon, - pub authenticator: String, - pub token: String, -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct RenewSessionRequest { - pub old_session_token: String, - pub request_type: String, -} diff --git a/rust/snowflake-api/src/responses.rs b/rust/snowflake-api/src/responses.rs deleted file mode 100644 index dee26c32..00000000 --- a/rust/snowflake-api/src/responses.rs +++ /dev/null @@ -1,306 +0,0 @@ -use std::collections::HashMap; - -use serde::Deserialize; - -#[allow(clippy::large_enum_variant)] -#[derive(Deserialize, Debug)] -#[serde(untagged)] -pub enum ExecResponse { - Query(QueryExecResponse), - PutGet(PutGetExecResponse), - Error(ExecErrorResponse), -} - -// todo: add close session response, which should be just empty? -#[allow(clippy::large_enum_variant)] -#[derive(Deserialize, Debug)] -#[serde(untagged)] -pub enum AuthResponse { - Login(LoginResponse), - Auth(AuthenticatorResponse), - Renew(RenewSessionResponse), - Close(CloseSessionResponse), - Error(AuthErrorResponse), -} - -#[derive(Deserialize, Debug)] -pub struct BaseRestResponse { - // null for auth - pub code: Option, - pub message: Option, - pub success: bool, - pub data: D, -} - -pub type PutGetExecResponse = BaseRestResponse; -pub type QueryExecResponse = BaseRestResponse; -pub type ExecErrorResponse = BaseRestResponse; -pub type AuthErrorResponse = BaseRestResponse; -pub type AuthenticatorResponse = BaseRestResponse; -pub type LoginResponse = BaseRestResponse; -pub type RenewSessionResponse = BaseRestResponse; -// Data should be always `null` on successful close session response -pub type CloseSessionResponse = BaseRestResponse>; - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct ExecErrorResponseData { - pub age: i64, - pub error_code: String, - pub internal_error: bool, - - // come when query is invalid - pub line: Option, - pub pos: Option, - - // fixme: only valid for exec query response error? present in any exec query response? - pub query_id: String, - pub sql_state: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct AuthErrorResponseData { - pub authn_method: String, -} - -#[derive(Deserialize, Debug)] -pub struct NameValueParameter { - pub name: String, - pub value: serde_json::Value, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct LoginResponseData { - pub session_id: i64, - pub token: String, - pub master_token: String, - pub server_version: String, - #[serde(default)] - pub parameters: Vec, - pub session_info: SessionInfo, - pub master_validity_in_seconds: i64, - pub validity_in_seconds: i64, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct SessionInfo { - pub database_name: Option, - pub schema_name: Option, - pub warehouse_name: Option, - pub role_name: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct AuthenticatorResponseData { - pub token_url: String, - pub sso_url: String, - pub proof_key: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct RenewSessionResponseData { - pub session_token: String, - pub validity_in_seconds_s_t: i64, - pub master_token: String, - pub validity_in_seconds_m_t: i64, - pub session_id: i64, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct QueryExecResponseData { - pub parameters: Vec, - pub rowtype: Vec, - // default for non-SELECT queries - // GET / PUT has their own response format - pub rowset: Option, - // only exists when binary response is given, eg Arrow - // default for all SELECT queries - // is base64-encoded Arrow IPC payload - pub rowset_base64: Option, - pub total: i64, - pub returned: i64, // unused in .NET - pub query_id: String, // unused in .NET - pub database_provider: Option, - pub final_database_name: Option, // unused in .NET - pub final_schema_name: Option, - pub final_warehouse_name: Option, // unused in .NET - pub final_role_name: String, // unused in .NET - // only present on SELECT queries - pub number_of_binds: Option, // unused in .NET - // todo: deserialize into enum - pub statement_type_id: i64, - pub version: i64, - // if response is chunked - #[serde(default)] // soft-default to empty Vec if not present - pub chunks: Vec, - // x-amz-server-side-encryption-customer-key, when chunks are present for download - pub qrmk: Option, - #[serde(default)] // chunks are present - pub chunk_headers: HashMap, - // when async query is run (ping pong request?) - pub get_result_url: Option, - // multi-statement response, comma-separated - pub result_ids: Option, - // `progressDesc`, and `queryAbortAfterSecs` are not used but exist in .NET - // `sendResultTime`, `queryResultFormat`, `queryContext` also exist -} - -#[derive(Deserialize, Debug)] -pub struct ExecResponseRowType { - pub name: String, - #[serde(rename = "byteLength")] - pub byte_length: Option, - // unused in .NET - pub length: Option, - #[serde(rename = "type")] - pub type_: SnowflakeType, - pub scale: Option, - pub precision: Option, - pub nullable: bool, -} - -// fixme: is it good idea to keep this as an enum if more types could be added in future? -#[derive(Deserialize, Debug)] -#[serde(rename_all = "snake_case")] -pub enum SnowflakeType { - Fixed, - Real, - Text, - Date, - Variant, - TimestampLtz, - TimestampNtz, - TimestampTz, - Object, - Binary, - Time, - Boolean, - Array, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct ExecResponseChunk { - pub url: String, - pub row_count: i32, - pub uncompressed_size: i64, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct PutGetResponseData { - // `kind`, `operation` are present in Go implementation, but not in .NET - pub command: CommandType, - pub local_location: Option, - // inconsistent case naming - #[serde(rename = "src_locations", default)] - pub src_locations: Vec, - // todo: support upload parallelism - // file upload parallelism - pub parallel: i32, - // file size threshold, small ones are should be uploaded with given parallelism - pub threshold: i64, - // doesn't need compression if source is already compressed - pub auto_compress: bool, - pub overwrite: bool, - // maps to one of the predefined compression algos - // todo: support different compression formats? - pub source_compression: String, - pub stage_info: PutGetStageInfo, - pub encryption_material: EncryptionMaterialVariant, - // GCS specific. If you request multiple files? - #[serde(default)] - pub presigned_urls: Vec, - #[serde(default)] - pub parameters: Vec, - pub statement_type_id: Option, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "UPPERCASE")] -pub enum CommandType { - Upload, - Download, -} - -#[derive(Deserialize, Debug)] -#[serde(untagged)] -pub enum PutGetStageInfo { - Aws(AwsPutGetStageInfo), - Azure(AzurePutGetStageInfo), - Gcs(GcsPutGetStageInfo), -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct AwsPutGetStageInfo { - pub location_type: String, - pub location: String, - pub region: String, - pub creds: AwsCredentials, - // FIPS endpoint - pub end_point: Option, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct AwsCredentials { - pub aws_key_id: String, - pub aws_secret_key: String, - pub aws_token: String, - pub aws_id: String, - pub aws_key: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct GcsPutGetStageInfo { - pub location_type: String, - pub location: String, - pub storage_account: String, - pub creds: GcsCredentials, - pub presigned_url: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct GcsCredentials { - pub gcs_access_token: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct AzurePutGetStageInfo { - pub location_type: String, - pub location: String, - pub storage_account: String, - pub creds: AzureCredentials, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -pub struct AzureCredentials { - pub azure_sas_token: String, -} - -#[derive(Deserialize, Debug)] -#[serde(untagged)] -pub enum EncryptionMaterialVariant { - Single(PutGetEncryptionMaterial), - Multiple(Vec), -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct PutGetEncryptionMaterial { - // base64 encoded - pub query_stage_master_key: String, - pub query_id: String, - pub smk_id: i64, -} diff --git a/rust/snowflake-api/src/session.rs b/rust/snowflake-api/src/session.rs deleted file mode 100644 index 58fcf7f4..00000000 --- a/rust/snowflake-api/src/session.rs +++ /dev/null @@ -1,426 +0,0 @@ -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use futures::lock::Mutex; -#[cfg(feature = "cert-auth")] -use snowflake_jwt::generate_jwt_token; -use thiserror::Error; - -use crate::connection; -use crate::connection::{Connection, QueryType}; -#[cfg(feature = "cert-auth")] -use crate::requests::{CertLoginRequest, CertRequestData}; -use crate::requests::{ - ClientEnvironment, LoginRequest, LoginRequestCommon, PasswordLoginRequest, PasswordRequestData, - RenewSessionRequest, SessionParameters, -}; -use crate::responses::AuthResponse; - -#[derive(Error, Debug)] -pub enum AuthError { - #[error(transparent)] - #[cfg(feature = "cert-auth")] - JwtError(#[from] snowflake_jwt::JwtError), - - #[error(transparent)] - RequestError(#[from] connection::ConnectionError), - - #[error("Environment variable `{0}` is required, but were not set")] - MissingEnvArgument(String), - - #[error("Password auth was requested, but password wasn't provided")] - MissingPassword, - - #[error("Certificate auth was requested, but certificate wasn't provided")] - MissingCertificate, - - #[error("Unexpected API response")] - UnexpectedResponse, - - // todo: add code mapping to meaningful message and/or refer to docs - // eg https://docs.snowflake.com/en/user-guide/key-pair-auth-troubleshooting - #[error("Failed to authenticate. Error code: {0}. Message: {1}")] - AuthFailed(String, String), - - #[error("Can not renew closed session token")] - OutOfOrderRenew, - - #[error("Failed to exchange or request a new token")] - TokenFetchFailed, - - #[error("Enable the cert-auth feature to use certificate authentication")] - CertAuthNotEnabled, -} - -#[derive(Debug)] -struct AuthTokens { - session_token: AuthToken, - master_token: AuthToken, - /// expected by snowflake api for all requests within session to follow sequence id - sequence_id: u64, -} - -#[derive(Debug, Clone)] -struct AuthToken { - token: String, - valid_for: Duration, - issued_on: Instant, -} - -#[derive(Debug, Clone)] -pub struct AuthParts { - pub session_token_auth_header: String, - pub sequence_id: u64, -} - -impl AuthToken { - pub fn new(token: &str, validity_in_seconds: i64) -> Self { - let token = token.to_string(); - - let valid_for = if validity_in_seconds < 0 { - Duration::from_secs(u64::MAX) - } else { - // Note for reviewer: I beliebe this only fails on negative numbers. I imagine we will - // never get negative numbers, but if we do, is MAX or 0 a more sane default? - Duration::from_secs(u64::try_from(validity_in_seconds).unwrap_or(u64::MAX)) - }; - let issued_on = Instant::now(); - - Self { - token, - valid_for, - issued_on, - } - } - - pub fn is_expired(&self) -> bool { - Instant::now().duration_since(self.issued_on) >= self.valid_for - } - - pub fn auth_header(&self) -> String { - format!("Snowflake Token=\"{}\"", &self.token) - } -} - -enum AuthType { - Certificate, - Password, -} - -/// Requests, caches, and renews authentication tokens. -/// Tokens are given as response to creating new session in Snowflake. Session persists -/// the configuration state and temporary objects (tables, procedures, etc). -// todo: split warehouse-database-schema and username-role-key into its own structs -// todo: close session after object is dropped -pub struct Session { - connection: Arc, - - auth_tokens: Mutex>, - auth_type: AuthType, - account_identifier: String, - - warehouse: Option, - database: Option, - schema: Option, - - username: String, - role: Option, - // This is not used with the certificate auth crate - #[allow(dead_code)] - private_key_pem: Option, - password: Option, -} - -// todo: make builder -impl Session { - /// Authenticate using private certificate and JWT - // fixme: add builder or introduce structs - #[allow(clippy::too_many_arguments)] - pub fn cert_auth( - connection: Arc, - account_identifier: &str, - warehouse: Option<&str>, - database: Option<&str>, - schema: Option<&str>, - username: &str, - role: Option<&str>, - private_key_pem: &str, - ) -> Self { - // uppercase everything as this is the convention - let account_identifier = account_identifier.to_uppercase(); - - let database = database.map(str::to_uppercase); - let schema = schema.map(str::to_uppercase); - - let username = username.to_uppercase(); - let role = role.map(str::to_uppercase); - let private_key_pem = Some(private_key_pem.to_string()); - - Self { - connection, - auth_tokens: Mutex::new(None), - auth_type: AuthType::Certificate, - private_key_pem, - account_identifier, - warehouse: warehouse.map(str::to_uppercase), - database, - username, - role, - schema, - password: None, - } - } - - /// Authenticate using password - // fixme: add builder or introduce structs - #[allow(clippy::too_many_arguments)] - pub fn password_auth( - connection: Arc, - account_identifier: &str, - warehouse: Option<&str>, - database: Option<&str>, - schema: Option<&str>, - username: &str, - role: Option<&str>, - password: &str, - ) -> Self { - let account_identifier = account_identifier.to_uppercase(); - - let database = database.map(str::to_uppercase); - let schema = schema.map(str::to_uppercase); - - let username = username.to_uppercase(); - let password = Some(password.to_string()); - let role = role.map(str::to_uppercase); - - Self { - connection, - auth_tokens: Mutex::new(None), - auth_type: AuthType::Password, - account_identifier, - warehouse: warehouse.map(str::to_uppercase), - database, - username, - role, - password, - schema, - private_key_pem: None, - } - } - - /// Get cached token or request a new one if old one has expired. - pub async fn get_token(&self) -> Result { - let mut auth_tokens = self.auth_tokens.lock().await; - if auth_tokens.is_none() - || auth_tokens - .as_ref() - .is_some_and(|at| at.master_token.is_expired()) - { - // Create new session if tokens are absent or can not be exchange - let tokens = match self.auth_type { - AuthType::Certificate => { - log::info!("Starting session with certificate authentication"); - if cfg!(feature = "cert-auth") { - self.create(self.cert_request_body()?).await - } else { - Err(AuthError::MissingCertificate)? - } - } - AuthType::Password => { - log::info!("Starting session with password authentication"); - self.create(self.passwd_request_body()?).await - } - }?; - *auth_tokens = Some(tokens); - } else if auth_tokens - .as_ref() - .is_some_and(|at| at.session_token.is_expired()) - { - // Renew old session token - let tokens = self.renew().await?; - *auth_tokens = Some(tokens); - } - auth_tokens.as_mut().unwrap().sequence_id += 1; - let session_token_auth_header = auth_tokens.as_ref().unwrap().session_token.auth_header(); - Ok(AuthParts { - session_token_auth_header, - sequence_id: auth_tokens.as_ref().unwrap().sequence_id, - }) - } - - pub async fn close(&mut self) -> Result<(), AuthError> { - if let Some(tokens) = self.auth_tokens.lock().await.take() { - log::debug!("Closing sessions"); - - let resp = self - .connection - .request::( - QueryType::CloseSession, - &self.account_identifier, - &[("delete", "true")], - Some(&tokens.session_token.auth_header()), - serde_json::Value::default(), - ) - .await?; - - match resp { - AuthResponse::Close(_) => Ok(()), - AuthResponse::Error(e) => Err(AuthError::AuthFailed( - e.code.unwrap_or_default(), - e.message.unwrap_or_default(), - )), - _ => Err(AuthError::UnexpectedResponse), - } - } else { - Ok(()) - } - } - - #[cfg(feature = "cert-auth")] - fn cert_request_body(&self) -> Result { - let full_identifier = format!("{}.{}", &self.account_identifier, &self.username); - let private_key_pem = self - .private_key_pem - .as_ref() - .ok_or(AuthError::MissingCertificate)?; - let jwt_token = generate_jwt_token(private_key_pem, &full_identifier)?; - - Ok(CertLoginRequest { - data: CertRequestData { - login_request_common: self.login_request_common(), - authenticator: "SNOWFLAKE_JWT".to_string(), - token: jwt_token, - }, - }) - } - - fn passwd_request_body(&self) -> Result { - let password = self.password.as_ref().ok_or(AuthError::MissingPassword)?; - - Ok(PasswordLoginRequest { - data: PasswordRequestData { - login_request_common: self.login_request_common(), - password: password.to_string(), - }, - }) - } - - /// Start new session, all the Snowflake temporary objects will be scoped towards it, - /// as well as temporary configuration parameters - async fn create( - &self, - body: LoginRequest, - ) -> Result { - let mut get_params = Vec::new(); - if let Some(warehouse) = &self.warehouse { - get_params.push(("warehouse", warehouse.as_str())); - } - - if let Some(database) = &self.database { - get_params.push(("databaseName", database.as_str())); - } - - if let Some(schema) = &self.schema { - get_params.push(("schemaName", schema.as_str())); - } - - if let Some(role) = &self.role { - get_params.push(("roleName", role.as_str())); - } - - let resp = self - .connection - .request::( - QueryType::LoginRequest, - &self.account_identifier, - &get_params, - None, - body, - ) - .await?; - log::debug!("Auth response: {:?}", resp); - - match resp { - AuthResponse::Login(lr) => { - let session_token = AuthToken::new(&lr.data.token, lr.data.validity_in_seconds); - let master_token = - AuthToken::new(&lr.data.master_token, lr.data.master_validity_in_seconds); - - Ok(AuthTokens { - session_token, - master_token, - sequence_id: 0, - }) - } - AuthResponse::Error(e) => Err(AuthError::AuthFailed( - e.code.unwrap_or_default(), - e.message.unwrap_or_default(), - )), - _ => Err(AuthError::UnexpectedResponse), - } - } - - fn login_request_common(&self) -> LoginRequestCommon { - LoginRequestCommon { - client_app_id: "Go".to_string(), - client_app_version: "1.6.22".to_string(), - svn_revision: String::new(), - account_name: self.account_identifier.clone(), - login_name: self.username.clone(), - session_parameters: SessionParameters { - client_validate_default_parameters: true, - }, - client_environment: ClientEnvironment { - application: "Rust".to_string(), - // todo: detect os - os: "darwin".to_string(), - os_version: "gc-arm64".to_string(), - ocsp_mode: "FAIL_OPEN".to_string(), - }, - } - } - - async fn renew(&self) -> Result { - if let Some(token) = self.auth_tokens.lock().await.take() { - log::debug!("Renewing the token"); - let auth = token.master_token.auth_header(); - let body = RenewSessionRequest { - old_session_token: token.session_token.token.clone(), - request_type: "RENEW".to_string(), - }; - - let resp = self - .connection - .request( - QueryType::TokenRequest, - &self.account_identifier, - &[], - Some(&auth), - body, - ) - .await?; - - match resp { - AuthResponse::Renew(rs) => { - let session_token = - AuthToken::new(&rs.data.session_token, rs.data.validity_in_seconds_s_t); - let master_token = - AuthToken::new(&rs.data.master_token, rs.data.validity_in_seconds_m_t); - - Ok(AuthTokens { - session_token, - master_token, - sequence_id: token.sequence_id, - }) - } - AuthResponse::Error(e) => Err(AuthError::AuthFailed( - e.code.unwrap_or_default(), - e.message.unwrap_or_default(), - )), - _ => Err(AuthError::UnexpectedResponse), - } - } else { - Err(AuthError::OutOfOrderRenew) - } - } -}