diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..40a03c10 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,48 @@ + +# Capture benchmarks + +Run the following command to capture benchmarks for your current commit: +``` +make build +go test . -bench=. -benchmem -count=6 -timeout 30m | tee benchmarks/$(git rev-parse HEAD).txt +``` + +Then do the same for the previous commit in upstream/main and then publish the diff along with your PR: +``` +git checkout . +benchstat benchmarks/$(git rev-parse HEAD^1).txt benchmarks/$(git rev-parse HEAD).txt +``` + +It should look something like this: +``` +goos: darwin +goarch: arm64 +pkg: github.com/daulet/tokenizers + │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ + │ sec/op │ sec/op vs base │ +EncodeNTimes-10 13.26µ ± 4% 13.11µ ± 1% -1.09% (p=0.041 n=6) +EncodeNChars-10 3.170n ± 530% 2.989n ± 272% ~ (p=0.937 n=6) +DecodeNTimes-10 4.496µ ± 4% 4.535µ ± 2% ~ (p=0.132 n=6) +DecodeNTokens-10 646.8n ± 6% 656.1n ± 3% ~ (p=0.589 n=6) +geomean 591.2n 584.3n -1.17% + + │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ + │ B/op │ B/op vs base │ +EncodeNTimes-10 232.0 ± 0% 232.0 ± 0% ~ (p=1.000 n=6) ¹ +EncodeNChars-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ +DecodeNTimes-10 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=6) ¹ +DecodeNTokens-10 7.000 ± 0% 7.000 ± 0% ~ (p=1.000 n=6) ¹ +geomean ² +0.00% ² +¹ all samples are equal +² summaries must be >0 to compute geomean + + │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ + │ allocs/op │ allocs/op vs base │ +EncodeNTimes-10 12.00 ± 0% 12.00 ± 0% ~ (p=1.000 n=6) ¹ +EncodeNChars-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ +DecodeNTimes-10 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=6) ¹ +DecodeNTokens-10 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=6) ¹ +geomean ² +0.00% ² +¹ all samples are equal +² summaries must be >0 to compute geomean +``` diff --git a/README.md b/README.md index fe568833..3dd601b0 100644 --- a/README.md +++ b/README.md @@ -57,3 +57,7 @@ BenchmarkDecodeNTokens-10 65191378 211.0 ns/op 7 B/op 0 a PASS ok github.com/daulet/tokenizers 126.681s ``` + +## Contributing + +Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute a PR to this project. diff --git a/benchmarks/3188ded27885d1002698a0e25f0b32306c430e88.txt b/benchmarks/3188ded27885d1002698a0e25f0b32306c430e88.txt new file mode 100644 index 00000000..6318b139 --- /dev/null +++ b/benchmarks/3188ded27885d1002698a0e25f0b32306c430e88.txt @@ -0,0 +1,29 @@ +goos: darwin +goarch: arm64 +pkg: github.com/daulet/tokenizers +BenchmarkEncodeNTimes-10 101848 12317 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNTimes-10 97996 11903 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNTimes-10 98641 11991 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNTimes-10 98586 12234 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNTimes-10 99187 11781 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNTimes-10 98481 11984 ns/op 84 B/op 4 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.443 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.579 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.723 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.531 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.787 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.590 ns/op 0 B/op 0 allocs/op +BenchmarkDecodeNTimes-10 690247 1707 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 685672 1696 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 679148 1705 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 653304 1745 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 669532 1687 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 675759 1696 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTokens-10 5312313 213.2 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 5563538 186.9 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 6347782 195.1 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 6054649 199.6 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 6216045 184.5 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 5972562 192.2 ns/op 7 B/op 0 allocs/op +PASS +ok github.com/daulet/tokenizers 431.946s \ No newline at end of file diff --git a/benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt b/benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt new file mode 100644 index 00000000..8b8cab1f --- /dev/null +++ b/benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt @@ -0,0 +1,29 @@ +goos: darwin +goarch: arm64 +pkg: github.com/daulet/tokenizers +BenchmarkEncodeNTimes-10 89750 13168 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNTimes-10 89104 13092 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNTimes-10 91214 13135 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNTimes-10 91635 13164 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNTimes-10 91681 13034 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNTimes-10 91050 13085 ns/op 232 B/op 12 allocs/op +BenchmarkEncodeNChars-10 1000000000 3.680 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.547 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 11.13 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.496 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 841242856 3.430 ns/op 0 B/op 0 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.540 ns/op 0 B/op 0 allocs/op +BenchmarkDecodeNTimes-10 245875 4610 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 261669 4544 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 260374 4525 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 260748 4514 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 256246 4492 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTimes-10 258206 4560 ns/op 96 B/op 3 allocs/op +BenchmarkDecodeNTokens-10 1756308 674.8 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 1847517 644.9 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 1813251 657.5 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 1849479 649.5 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 1847059 654.6 ns/op 7 B/op 0 allocs/op +BenchmarkDecodeNTokens-10 1726924 661.2 ns/op 7 B/op 0 allocs/op +PASS +ok github.com/daulet/tokenizers 163.493s