diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 468d0e9c2..5ddf9d196 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,7 @@ jobs: - "-p stac-arrow" - "-p stac-async" - "-p stac-cli --no-default-features" + - "-p stac-duckdb" - "-p stac-server --no-default-features" - "-p stac-server --no-default-features -F axum" - "-p stac-server --no-default-features -F memory-item-search" diff --git a/Cargo.toml b/Cargo.toml index cf6baafae..e4de02681 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "stac-arrow", "stac-async", "stac-cli", + "stac-duckdb", "stac-server", "stac-validate", ] @@ -16,6 +17,7 @@ default-members = [ "stac-arrow", "stac-async", "stac-cli", + "stac-duckdb", "stac-server", "stac-validate", ] diff --git a/stac-duckdb/Cargo.toml b/stac-duckdb/Cargo.toml new file mode 100644 index 000000000..caad292e5 --- /dev/null +++ b/stac-duckdb/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "stac-duckdb" +version = "0.1.0" +edition = "2021" + +[dependencies] +duckdb = { version = "0.10", features = ["parquet"] } +stac = { version = "0.7", path = "../stac" } +stac-api = { version = "0.4", path = "../stac-api" } +stac-arrow = { version = "0.1", path = "../stac-arrow" } +thiserror = "1" diff --git a/stac-duckdb/data/naip.parquet b/stac-duckdb/data/naip.parquet new file mode 100644 index 000000000..e54d47a2e Binary files /dev/null and b/stac-duckdb/data/naip.parquet differ diff --git a/stac-duckdb/src/lib.rs b/stac-duckdb/src/lib.rs new file mode 100644 index 000000000..fdf753266 --- /dev/null +++ b/stac-duckdb/src/lib.rs @@ -0,0 +1,79 @@ +//! Use [STAC](https://stacspec.org/) with [DuckDB](https://duckdb.org). + +use duckdb::Connection; +use stac::Item; +use stac_api::Search; +use thiserror::Error; + +/// Crate-specific error enum. +#[derive(Debug, Error)] +pub enum Error { + /// [duckdb::Error] + #[error(transparent)] + DuckDb(#[from] duckdb::Error), + + /// [stac_arrow::Error] + #[error(transparent)] + StacArrow(#[from] stac_arrow::Error), +} + +pub type Result = std::result::Result; + +/// A STAC-DuckDB client. +pub struct Client { + connection: Connection, + path: String, +} + +impl Client { + /// Creates a new client for the given path. + /// + /// # Examples + /// + /// ``` + /// use stac_duckdb::Client; + /// + /// let client = Client::from_path("data/naip.parquet").unwrap(); + /// ``` + pub fn from_path(path: impl ToString) -> Result { + Ok(Client { + connection: Connection::open_in_memory()?, + path: path.to_string(), + }) + } + + /// Searches this client's data store for items. + /// + /// TODO actually support search. + /// + /// # Examples + /// + /// ``` + /// use stac_duckdb::Client; + /// + /// let client = Client::from_path("data/naip.parquet").unwrap(); + /// let items = client.search(Default::default()).unwrap(); + /// ``` + pub fn search(&self, _: Search) -> Result> { + let mut statement = self + .connection + .prepare(&format!("SELECT * from '{}'", self.path))?; + let mut items = Vec::new(); + for record_batch in statement.query_arrow([])? { + items.extend(stac_arrow::record_batch_to_items(record_batch)?); + } + Ok(items) + } +} + +#[cfg(test)] +mod tests { + use super::Client; + + #[test] + fn select_all() { + let client = Client::from_path("data/naip.parquet").unwrap(); + let items = client.search(Default::default()).unwrap(); + assert_eq!(items.len(), 5); + } +}