Skip to content

Commit

Permalink
add decode benchmarks for Rust piece to measure CGO overhead
Browse files Browse the repository at this point in the history
  • Loading branch information
Daulet Zhanguzin committed Jul 10, 2024
1 parent 8b4f0ce commit 21d1792
Show file tree
Hide file tree
Showing 9 changed files with 517 additions and 25 deletions.
427 changes: 405 additions & 22 deletions Cargo.lock

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ crate-type = ["staticlib"]
libc = "0.2.140"
tokenizers = {version = "0.19.1" }

[registries.crates-io]
# speed up "Updating crates.io index"
protocol = "sparse"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"

[[bench]]
name = "decode_benchmark"
harness = false
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ fmt.Println(tk.Decode([]uint32{2829, 4419, 14523, 2058, 1996, 13971, 3899}, true

## Benchmarks

Decoding overhead (due to CGO) is between negligible and 9% depending on the benchmark.

```bash
go test . -bench=. -benchmem -benchtime=10s

Expand All @@ -60,6 +62,23 @@ PASS
ok github.com/daulet/tokenizers 126.681s
```

Run equivalent Rust tests with `cargo bench`.

```bash
decode_n_times time: [3.9812 µs 3.9874 µs 3.9939 µs]
change: [-0.4103% -0.1338% +0.1275%] (p = 0.33 > 0.05)
No change in performance detected.
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high mild

decode_n_tokens time: [651.72 ns 661.73 ns 675.78 ns]
change: [+0.3504% +2.0016% +3.5507%] (p = 0.01 < 0.05)
Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe
```

## Contributing

Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute a PR to this project.
36 changes: 36 additions & 0 deletions benches/decode_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::Rng;
use std::time::Instant;
use tokenizers::tokenizer::Tokenizer;

fn decode(tokenizer:&Tokenizer, ids_slice: &[u32], skip_special_tokens: bool) -> String {
tokenizer.decode(ids_slice, skip_special_tokens).expect("failed to decode input")
}

fn bench_decode_n_times(c: &mut Criterion) {
let tokenizer = Tokenizer::from_file("./test/data/bert-base-uncased.json").expect("failed to create tokenizer");
c.bench_function("decode_n_times",
|b| b.iter(||
decode(&tokenizer, black_box(&[2829, 4419, 14523, 2058, 1996, 13971, 3899]), black_box(true))
)
);
}

fn bench_decode_n_tokens(c: &mut Criterion) {
let tokenizer = Tokenizer::from_file("./test/data/bert-base-uncased.json").expect("failed to create tokenizer");
let max_token_id = tokenizer.get_vocab_size(true);
let mut rng = rand::thread_rng();

c.bench_function("decode_n_tokens",
move |b| { b.iter_custom(|iters| {
let tokens: Vec<u32> = (0..iters).map(|_| rng.gen_range(0..max_token_id) as u32).collect();

let start = Instant::now();
decode(&tokenizer, black_box(&tokens), black_box(true));
start.elapsed()
})}
);
}

criterion_group!(benches, bench_decode_n_times, bench_decode_n_tokens);
criterion_main!(benches);
36 changes: 36 additions & 0 deletions test/benchmark/go_results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Run with `go test -bench=Decode -count=10 -run=^\$ > test/benchmark/go_results.txt` then `benchstat test/benchmark/go_results.txt`

goos: darwin
goarch: arm64
pkg: github.com/daulet/tokenizers
BenchmarkDecodeNTimes-10 239250 4343 ns/op
BenchmarkDecodeNTimes-10 271682 4356 ns/op
BenchmarkDecodeNTimes-10 274546 4346 ns/op
BenchmarkDecodeNTimes-10 271051 4368 ns/op
BenchmarkDecodeNTimes-10 272458 4372 ns/op
BenchmarkDecodeNTimes-10 271284 4350 ns/op
BenchmarkDecodeNTimes-10 272586 4350 ns/op
BenchmarkDecodeNTimes-10 271552 4358 ns/op
BenchmarkDecodeNTimes-10 268934 4349 ns/op
BenchmarkDecodeNTimes-10 273238 4364 ns/op
BenchmarkDecodeNTokens-10 1840972 657.1 ns/op
BenchmarkDecodeNTokens-10 1817886 636.0 ns/op
BenchmarkDecodeNTokens-10 1884613 641.0 ns/op
BenchmarkDecodeNTokens-10 1823654 637.8 ns/op
BenchmarkDecodeNTokens-10 1883685 646.7 ns/op
BenchmarkDecodeNTokens-10 1852138 642.2 ns/op
BenchmarkDecodeNTokens-10 1852321 643.3 ns/op
BenchmarkDecodeNTokens-10 1850312 649.7 ns/op
BenchmarkDecodeNTokens-10 1838618 640.8 ns/op
BenchmarkDecodeNTokens-10 1881645 642.7 ns/op
PASS
ok github.com/daulet/tokenizers 31.929s

goos: darwin
goarch: arm64
pkg: github.com/daulet/tokenizers
│ test/benchmark/go_results.txt │
│ sec/op │
DecodeNTimes-10 4.353µ ± 0%
DecodeNTokens-10 642.5n ± 1%
geomean 1.672µ
14 changes: 14 additions & 0 deletions test/benchmark/rust_results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
run with `cargo bench`

ecode_n_times time: [3.9812 µs 3.9874 µs 3.9939 µs]
change: [-0.4103% -0.1338% +0.1275%] (p = 0.33 > 0.05)
No change in performance detected.
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high mild

decode_n_tokens time: [651.72 ns 661.73 ns 675.78 ns]
change: [+0.3504% +2.0016% +3.5507%] (p = 0.01 < 0.05)
Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe

0 comments on commit 21d1792

Please sign in to comment.