-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
1,763 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
stac-api-validator | ||
stac-geoparquet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import subprocess | ||
from pathlib import Path | ||
from tempfile import TemporaryDirectory | ||
|
||
import pyarrow.parquet | ||
import stac_geoparquet.arrow | ||
|
||
with TemporaryDirectory() as temporary_directory: | ||
path = Path(temporary_directory) / "naip.parquet" | ||
subprocess.run( | ||
[ | ||
"cargo", | ||
"run", | ||
"-p", | ||
"stac-cli", | ||
"--no-default-features", | ||
"-F", | ||
"parquet", | ||
"--", | ||
"convert", | ||
"stac-arrow/data/naip.json", | ||
path, | ||
], | ||
check=True, | ||
) | ||
table = pyarrow.parquet.read_table(path) | ||
items = list(stac_geoparquet.arrow.stac_table_to_items(table)) | ||
assert len(items) == 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
|
||
## Unreleased |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[package] | ||
name = "stac-arrow" | ||
version = "0.1.0" | ||
authors = ["Pete Gadomski <pete.gadomski@gmail.com>"] | ||
edition = "2021" | ||
description = "Read and write STAC data stored in geoarrow and geoparquet formats" | ||
homepage = "https://github.com/stac-utils/stac-rs" | ||
repository = "https://github.com/stac-utils/stac-rs" | ||
keywords = ["geospatial", "stac", "metadata", "geo", "arrow", "parquet"] | ||
categories = ["science", "data-structures"] | ||
|
||
[dependencies] | ||
arrow = { version = "51", default-features = false, features = ["chrono-tz"] } | ||
arrow-json = "51" # TODO port `record_batches_to_json_rows` so we can go to the latest version | ||
geo = "0.28" | ||
geoarrow = "0.2" | ||
geojson = "0.24" | ||
geozero = { version = "0.13", features = ["with-wkb"] } | ||
serde_json = "1" | ||
stac = { version = "0.7", features = ["wkb"], path = "../stac" } | ||
thiserror = "1" | ||
|
||
[dev-dependencies] | ||
criterion = { version = "0.5", features = ["html_reports"] } | ||
geoarrow = { version = "0.2", features = ["parquet"] } | ||
parquet = "51" | ||
stac-validate = { version = "0.1", path = "../stac-validate" } | ||
|
||
[[bench]] | ||
name = "read" | ||
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# stac-arrow | ||
|
||
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/stac-utils/stac-rs/ci.yml?branch=main&style=for-the-badge)](https://github.com/stac-utils/stac-rs/actions/workflows/ci.yml) | ||
[![docs.rs](https://img.shields.io/docsrs/stac-arrow?style=for-the-badge)](https://docs.rs/stac-arrow/latest/stac_arrow/) | ||
[![Crates.io](https://img.shields.io/crates/v/stac-arrow?style=for-the-badge)](https://crates.io/crates/stac-arrow) | ||
![Crates.io](https://img.shields.io/crates/l/stac-arrow?style=for-the-badge) | ||
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg?style=for-the-badge)](./CODE_OF_CONDUCT) | ||
|
||
Read and write [STAC](https://stacspec.org/) data stored in [arrow](https://arrow.apache.org/). | ||
Data are formatted per the [stac-geoparquet spec](https://github.com/stac-utils/stac-geoparquet/blob/main/spec/stac-geoparquet-spec.md). | ||
|
||
## Usage | ||
|
||
To use the library in your project: | ||
|
||
```toml | ||
[dependencies] | ||
stac-arrow = "0.1" | ||
``` | ||
|
||
## Examples | ||
|
||
Reading from a [geoparquet](https://geoparquet.org/) file: | ||
|
||
```rust | ||
use std::fs::File; | ||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; | ||
|
||
let file = File::open("data/naip.parquet").unwrap(); | ||
let reader = ParquetRecordBatchReaderBuilder::try_new(file) | ||
.unwrap() | ||
.build() | ||
.unwrap(); | ||
let mut items = Vec::new(); | ||
for result in reader { | ||
items.extend(stac_arrow::record_batch_to_items(result.unwrap()).unwrap()); | ||
} | ||
assert_eq!(items.len(), 5); | ||
``` | ||
|
||
Writing: | ||
|
||
```rust | ||
use stac::ItemCollection; | ||
use std::io::Cursor; | ||
|
||
let item_collection: ItemCollection = stac::read_json("data/naip.json").unwrap(); | ||
let mut geo_table = stac_arrow::items_to_geo_table(item_collection.items).unwrap(); | ||
let mut cursor = Cursor::new(Vec::new()); | ||
geoarrow::io::parquet::write_geoparquet(&mut geo_table, &mut cursor, None).unwrap(); | ||
``` | ||
|
||
Please see the [documentation](https://docs.rs/stac-arrow) for more usage examples. | ||
|
||
## Other info | ||
|
||
This crate is part of the [stac-rs](https://github.com/stac-utils/stac-rs) monorepo, see its README for contributing and license information. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
//! How fast is `RecordBatch` -> `Vec<Map<String, Value>>` when going through | ||
//! full serialization vs the deprecated `record_batches_to_json_rows`? | ||
use arrow::array::RecordBatch; | ||
use arrow_json::ArrayWriter; | ||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; | ||
use serde_json::{Map, Value}; | ||
use std::fs::File; | ||
|
||
#[allow(deprecated)] | ||
fn record_batches_to_json_rows(record_batch: &RecordBatch) { | ||
let _ = arrow_json::writer::record_batches_to_json_rows(&[record_batch]).unwrap(); | ||
} | ||
|
||
fn writer(record_batch: &RecordBatch) { | ||
let mut writer = ArrayWriter::new(Vec::new()); | ||
writer.write(record_batch).unwrap(); | ||
writer.finish().unwrap(); | ||
let _: Vec<Map<String, Value>> = | ||
serde_json::from_reader(writer.into_inner().as_slice()).unwrap(); | ||
} | ||
|
||
fn criterion_benchmark(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("read"); | ||
let file = File::open("data/naip.parquet").unwrap(); | ||
let mut reader = ParquetRecordBatchReaderBuilder::try_new(file) | ||
.unwrap() | ||
.build() | ||
.unwrap(); | ||
let mut record_batch = reader.next().unwrap().unwrap(); | ||
let index = record_batch.schema().index_of("geometry").unwrap(); | ||
record_batch.remove_column(index); | ||
group.bench_function("record_batches_to_json_rows", |b| { | ||
b.iter(|| record_batches_to_json_rows(&record_batch)) | ||
}); | ||
group.bench_function("writer", |b| b.iter(|| writer(&record_batch))); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
Oops, something went wrong.