Skip to content

Commit

Permalink
feat: stac-arrow benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Jun 5, 2024
1 parent 3ab3ea1 commit 189436c
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
5 changes: 5 additions & 0 deletions stac-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,10 @@ thiserror = "1"
wkb = "0.7"

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
parquet = "51"
stac-validate = { version = "0.1", path = "../stac-validate" }

[[bench]]
name = "read"
harness = false
41 changes: 41 additions & 0 deletions stac-arrow/benches/read.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//! How fast is `RecordBatch` -> `Vec<Map<String, Value>>` when going through
//! full serialization vs the deprecated `record_batches_to_json_rows`?
use arrow::array::RecordBatch;
use arrow_json::ArrayWriter;
use criterion::{criterion_group, criterion_main, Criterion};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use serde_json::{Map, Value};
use std::fs::File;

#[allow(deprecated)]
fn record_batches_to_json_rows(record_batch: &RecordBatch) {
let _ = arrow_json::writer::record_batches_to_json_rows(&[record_batch]).unwrap();
}

fn writer(record_batch: &RecordBatch) {
let mut writer = ArrayWriter::new(Vec::new());
writer.write(record_batch).unwrap();
writer.finish().unwrap();
let _: Vec<Map<String, Value>> =
serde_json::from_reader(writer.into_inner().as_slice()).unwrap();
}

fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("read");
let file = File::open("data/naip.parquet").unwrap();
let mut reader = ParquetRecordBatchReaderBuilder::try_new(file)
.unwrap()
.build()
.unwrap();
let mut record_batch = reader.next().unwrap().unwrap();
let index = record_batch.schema().index_of("geometry").unwrap();
record_batch.remove_column(index);
group.bench_function("record_batches_to_json_rows", |b| {
b.iter(|| record_batches_to_json_rows(&record_batch))
});
group.bench_function("writer", |b| b.iter(|| writer(&record_batch)));
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
3 changes: 3 additions & 0 deletions stac-arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,6 @@ mod readme {

external_doc_test!(include_str!("../README.md"));
}

#[cfg(test)]
use criterion as _;

0 comments on commit 189436c

Please sign in to comment.