Skip to content

Commit

Permalink
Make Page always fully init (#1193)
Browse files Browse the repository at this point in the history
* Make `Page` always fully init

Per discussion on the snapshotting proposal,
this PR changes the type of `Page.row_data` to `[u8; _]`,
where previously it was `[MaybeUninit<u8>; _]`.

This turns out to be shockingly easy,
as our serialization codepaths never write padding bytes into a page.
The only place pages ever became `poison` was the initial allocation;
changing this to `alloc_zeroed` causes the `row_data` to always be valid at `[u8; _]`.

The majority of this diff is replacing `MaybeUninit`-specific operators
with their initialized equivalents,
and updating comments and documentation to reflect the new requirements.

This change also revealed a bug in the benchmarks
introduced when we swapped the order of sum tags and payloads
( #1063 ),
where benchmarks used a hardcoded offset for the tag which had not been updated.

* Update blake3

Blake3 only supports running under Miri as of 1.15.1, the latest version.
Prior versions hard-depended on SIMD intrinsics which Miri doesn't support.

* Address Mazdak's review.

Still pending his agreeing with me that `poison` is a better name than `uninit`.

* "Poison" -> "uninit"

Against my best wishes, for consistency with the broader Rust community's poor choices.

* Remove unnecessary `unsafe` blocks

* More unnecessary `unsafe`; remove forgotten SAFETY comments
  • Loading branch information
gefjon authored May 2, 2024
1 parent e258262 commit 484ba82
Show file tree
Hide file tree
Showing 18 changed files with 225 additions and 263 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ axum-extra = { version = "0.9", features = ["typed-header"] }
backtrace = "0.3.66"
base64 = "0.21.2"
bitflags = "2.3.3"
blake3 = "1.5"
blake3 = "1.5.1"
brotli = "3.5"
byte-unit = "4.0.18"
bytes = "1.2.1"
Expand Down
37 changes: 26 additions & 11 deletions crates/table/benches/page.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::hash::BuildHasher;
use core::mem::{self, MaybeUninit};
use core::mem;
use core::time::Duration;
use criterion::measurement::{Measurement, WallTime};
use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput};
Expand All @@ -12,12 +12,11 @@ use spacetimedb_table::bflatn_from::serialize_row_from_page;
use spacetimedb_table::bflatn_to::write_row_to_page;
use spacetimedb_table::blob_store::NullBlobStore;
use spacetimedb_table::eq::eq_row_in_page;
use spacetimedb_table::indexes::{PageOffset, RowHash};
use spacetimedb_table::indexes::{Byte, Bytes, PageOffset, RowHash};
use spacetimedb_table::layout::{row_size_for_type, RowTypeLayout};
use spacetimedb_table::page::Page;
use spacetimedb_table::row_hash::hash_row_in_page;
use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram};
use spacetimedb_table::util;
use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenGranule, VarLenMembers, VarLenRef};

fn time<R>(acc: &mut Duration, body: impl FnOnce() -> R) -> R {
Expand Down Expand Up @@ -51,8 +50,11 @@ fn clear_zero(page: &mut Page) {
unsafe { page.zero_data() };
}

fn as_bytes<T>(t: &T) -> &[MaybeUninit<u8>] {
let ptr = (t as *const T).cast::<MaybeUninit<u8>>();
// Strictly this would be unsafe,
// since it causes UB when applied to types that contain padding/`poison`,
// but it's a benchmark so who cares.
fn as_bytes<T>(t: &T) -> &Bytes {
let ptr = (t as *const T).cast::<Byte>();
unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) }
}

Expand All @@ -66,12 +68,15 @@ unsafe trait Row {
}

#[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares.
/// Apply only to types which:
/// - Contain no padding bytes.
/// - Contain no members which are stored BFLATN as var-len.
unsafe trait FixedLenRow: Row + Sized {
fn as_bytes(&self) -> &[MaybeUninit<u8>] {
fn as_bytes(&self) -> &Bytes {
as_bytes(self)
}

unsafe fn from_bytes(bytes: &[MaybeUninit<u8>]) -> &Self {
unsafe fn from_bytes(bytes: &Bytes) -> &Self {
let ptr = bytes.as_ptr();
debug_assert_eq!(ptr as usize % mem::align_of::<Self>(), 0);
debug_assert_eq!(bytes.len(), mem::size_of::<Self>());
Expand Down Expand Up @@ -241,6 +246,7 @@ fn insert_var_len_clean_page(c: &mut Criterion, visitor: &impl VarLenMembers, vi
|b, &len_in_bytes| {
let mut page = Page::new(row_size_for_type::<VarLenRef>());
unsafe { page.zero_data() };
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
let data = [0xa5u8].repeat(len_in_bytes);
iter_time_with_page(b, &mut page, clear_zero, |_, _, page| {
fill_with_var_len(page, &data, visitor)
Expand All @@ -263,6 +269,7 @@ fn insert_var_len_dirty_page(c: &mut Criterion, visitor: &impl VarLenMembers, vi
&len_in_bytes,
|b, &len_in_bytes| {
let mut page = Page::new(row_size_for_type::<VarLenRef>());
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
let data = [0xa5u8].repeat(len_in_bytes);
fill_with_var_len(&mut page, &data, visitor);

Expand Down Expand Up @@ -294,11 +301,13 @@ fn insert_opt_str(c: &mut Criterion) {
assert!(fixed_row_size.len() == 6);
let mut clean_page_group = c.benchmark_group("insert_optional_str/clean_page");

let mut variant_none = util::uninit_array::<u8, 6>();
variant_none[4].write(1);
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
let mut variant_none = [0xa5u8; 6];
variant_none[0] = 1;

let mut variant_some = util::uninit_array::<u8, 6>();
variant_some[4].write(0);
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
let mut variant_some = [0xa5u8; 6];
variant_some[0] = 0;

for some_ratio in [0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] {
for &data_length_in_bytes in if some_ratio == 0.0 { &[0][..] } else { &VL_SIZES } {
Expand All @@ -311,6 +320,7 @@ fn insert_opt_str(c: &mut Criterion) {

clean_page_group.throughput(Throughput::Bytes((rows_per_page * avg_row_useful_size) as u64));

// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
let var_len_data = [0xa5].repeat(data_length_in_bytes);
clean_page_group.bench_with_input(
BenchmarkId::new(
Expand Down Expand Up @@ -391,6 +401,7 @@ fn iter_read_fixed_len<Row: FixedLenRow>(c: &mut Criterion) {
// Construct a page which is approximately `fullness_ratio` full,
// i.e. contains approximately `fullness_ratio * U64S_PER_PAGE` rows.
let mut partial_page = Page::new(row_size_for_type::<Row>());
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
fill_with_fixed_len::<Row>(&mut partial_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor);
// `delete_u64s_to_approx_fullness_ratio` uses a seeded `StdRng`,
// so this should be consistent-ish.
Expand All @@ -414,6 +425,7 @@ fn iter_read_fixed_len<Row: FixedLenRow>(c: &mut Criterion) {
}

let mut full_page = Page::new(row_size_for_type::<Row>());
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
fill_with_fixed_len(&mut full_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor);
group.throughput(Throughput::Bytes((full_page.num_rows() * mem::size_of::<Row>()) as u64));
group.bench_with_input(
Expand All @@ -436,6 +448,7 @@ fn copy_filter_into_fixed_len_keep_ratio<Row: FixedLenRow>(b: &mut Bencher, keep
let mut target_page = Page::new(row_size_for_type::<Row>());

let mut src_page = Page::new(row_size_for_type::<Row>());
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
fill_with_fixed_len::<Row>(&mut src_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor);

let mut rng = StdRng::seed_from_u64(0xa5a5a5a5_a5a5a5a5);
Expand Down Expand Up @@ -525,6 +538,7 @@ fn product_value_test_cases() -> impl Iterator<
(
"U32",
[AlgebraicType::U32].into(),
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
product![0xa5a5_a5a5u32],
Some(NullVarLenVisitor),
Some(AlignedVarLenOffsets::from_offsets(&[])),
Expand All @@ -539,6 +553,7 @@ fn product_value_test_cases() -> impl Iterator<
(
"Option<U32>/Some",
[AlgebraicType::option(AlgebraicType::U32)].into(),
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
product![AlgebraicValue::OptionSome(AlgebraicValue::U32(0xa5a5_a5a5))],
Some(NullVarLenVisitor),
Some(AlignedVarLenOffsets::from_offsets(&[])),
Expand Down
20 changes: 14 additions & 6 deletions crates/table/benches/page_manager.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::iter;
use core::mem::{self, MaybeUninit};
use core::mem;
use core::time::Duration;
use criterion::measurement::{Measurement, WallTime};
use criterion::{
Expand All @@ -12,7 +12,8 @@ use spacetimedb_sats::db::def::{TableDef, TableSchema};
use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue};
use spacetimedb_table::blob_store::NullBlobStore;
use spacetimedb_table::btree_index::BTreeIndex;
use spacetimedb_table::indexes::{PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE};
use spacetimedb_table::indexes::Byte;
use spacetimedb_table::indexes::{Bytes, PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE};
use spacetimedb_table::layout::{row_size_for_bytes, row_size_for_type};
use spacetimedb_table::pages::Pages;
use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram};
Expand Down Expand Up @@ -45,8 +46,11 @@ fn iter_time_with<P, B, X>(
})
}

fn as_bytes<T>(t: &T) -> &[MaybeUninit<u8>] {
let ptr = (t as *const T).cast::<MaybeUninit<u8>>();
// Strictly this would be unsafe,
// since it causes UB when applied to types that contain padding/`poison`,
// but it's a benchmark so who cares.
fn as_bytes<T>(t: &T) -> &Bytes {
let ptr = (t as *const T).cast::<Byte>();
unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) }
}

Expand All @@ -64,12 +68,15 @@ unsafe trait Row {
}

#[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares.
/// Apply only to types which:
/// - Contain no padding bytes.
/// - Contain no members which are stored BFLATN as var-len.
unsafe trait FixedLenRow: Row + Sized {
fn as_bytes(&self) -> &[MaybeUninit<u8>] {
fn as_bytes(&self) -> &Bytes {
as_bytes(self)
}

unsafe fn from_bytes(bytes: &[MaybeUninit<u8>]) -> &Self {
unsafe fn from_bytes(bytes: &Bytes) -> &Self {
let ptr = bytes.as_ptr();
debug_assert_eq!(ptr as usize % mem::align_of::<Self>(), 0);
debug_assert_eq!(bytes.len(), mem::size_of::<Self>());
Expand Down Expand Up @@ -234,6 +241,7 @@ fn insert_one_page_fixed_len(c: &mut Criterion) {
));
group.bench_function(name, |b| {
let mut pages = Pages::default();
// `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious.
insert_one_page_worth_fixed_len(&mut pages, visitor, &R::from_u64(0xa5a5a5a5_a5a5a5a5));
let pre = |_, pages: &mut Pages| pages.clear();
iter_time_with(b, &mut pages, pre, |_, _, pages| {
Expand Down
22 changes: 11 additions & 11 deletions crates/table/benches/var_len_visitor.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use core::slice;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use spacetimedb_sats::{AlgebraicType, ProductType};
use spacetimedb_table::indexes::{Byte, Bytes};
use spacetimedb_table::row_type_visitor::{dump_visitor_program, row_type_visitor, VarLenVisitorProgram};
use spacetimedb_table::util::uninit_array;
use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenMembers, VarLenRef};
use std::mem::{self, MaybeUninit};
use std::mem;

fn visit_count(row: &[MaybeUninit<u8>], visitor: &impl VarLenMembers) {
fn visit_count(row: &Bytes, visitor: &impl VarLenMembers) {
black_box(unsafe { visitor.visit_var_len(row) }.count());
}

Expand All @@ -23,8 +23,8 @@ fn visitor_program(row_ty: impl Into<ProductType>) -> VarLenVisitorProgram {
}

fn visit_fixed_len(c: &mut C) {
let row = &uninit_array::<u32, 1>();
let row = row.as_ptr().cast::<MaybeUninit<u8>>();
let row = &[0xa5a5_a5a5u32; 1];
let row = row.as_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts(row, mem::size_of::<u32>()) };

let mut group = c.benchmark_group("visit_fixed_len/u64");
Expand All @@ -49,8 +49,8 @@ fn visit_fixed_len(c: &mut C) {
}

fn visit_var_len_product(c: &mut C) {
let row = &uninit_array::<VarLenRef, 1>();
let row = row.as_ptr().cast::<MaybeUninit<u8>>();
let row = &[VarLenRef::NULL; 1];
let row = row.as_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts(row, mem::size_of::<VarLenRef>()) };

let mut group = c.benchmark_group("visit_var_len_product/VarLenRef");
Expand All @@ -73,20 +73,20 @@ fn visit_var_len_sum(c: &mut C) {

let visitor = &visitor_program([AlgebraicType::sum([AlgebraicType::String, AlgebraicType::unit()])]);

let row = &mut uninit_array::<u16, 3>();
let row = row.as_mut_ptr().cast::<MaybeUninit<u8>>();
let row = &mut [0xa5a5u16; 3];
let row = row.as_mut_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts_mut(row, 6) };

group.bench_function("none/VarLenVisitorProgram", |b| {
// None
row[4].write(1);
row[0] = 1;

b.iter(|| visit_count(row, visitor));
});

group.bench_function("some/VarLenVisitorProgram", |b| {
// Some
row[4].write(0);
row[0] = 0;

b.iter(|| visit_count(row, visitor));
});
Expand Down
16 changes: 2 additions & 14 deletions crates/table/src/bflatn_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ unsafe fn serialize_sum<S: Serializer>(
ty: &SumTypeLayout,
) -> Result<S::Ok, S::Error> {
// Read the tag of the sum value.
// SAFETY: `bytes[curr_offset..]` hold a sum value at `ty`.
let (tag, data_ty) = unsafe { read_tag(bytes, ty, curr_offset.get()) };
let (tag, data_ty) = read_tag(bytes, ty, curr_offset.get());

// Serialize the variant data value.
let data_offset = &Cell::new(curr_offset.get() + ty.offset_of_variant_data(tag));
Expand All @@ -133,20 +132,9 @@ unsafe fn serialize_sum<S: Serializer>(
}

/// Reads the tag of the sum value and selects the data variant type.
///
/// # Safety
///
/// `bytes[curr_offset..]` has a sum value typed at `ty`.
pub unsafe fn read_tag<'ty>(
bytes: &Bytes,
ty: &'ty SumTypeLayout,
curr_offset: usize,
) -> (u8, &'ty AlgebraicTypeLayout) {
pub fn read_tag<'ty>(bytes: &Bytes, ty: &'ty SumTypeLayout, curr_offset: usize) -> (u8, &'ty AlgebraicTypeLayout) {
let tag_offset = ty.offset_of_tag();
let tag = bytes[curr_offset + tag_offset];
// SAFETY: Caller promised that `bytes[curr_offset..]` has a sum value typed at `ty`.
// We can therefore assume that `curr_offset + tag_offset` refers to a valid `u8`.
let tag = unsafe { tag.assume_init() };

// Extract the variant data type depending on the tag.
let data_ty = &ty.variants[tag as usize].ty;
Expand Down
24 changes: 14 additions & 10 deletions crates/table/src/bflatn_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use super::{
},
page::{GranuleOffsetIter, Page, VarView},
pages::Pages,
util::{maybe_uninit_write_slice, range_move},
var_len::{visit_var_len_assume_init, VarLenGranule, VarLenMembers, VarLenRef},
util::range_move,
var_len::{VarLenGranule, VarLenMembers, VarLenRef},
};
use spacetimedb_sats::{bsatn::to_writer, buffer::BufWriter, AlgebraicType, AlgebraicValue, ProductValue, SumValue};
use thiserror::Error;
Expand Down Expand Up @@ -151,12 +151,12 @@ impl BflatnSerializedRowBuffer<'_> {
// and `fixed_buf.len()` matches exactly the size of the row type.
// - `fixed_buf`'s `VarLenRef`s are initialized up to `last_allocated_var_len_index`.
// - `visitor` is proper for the row type.
let visitor_iter = unsafe { visit_var_len_assume_init(visitor, self.fixed_buf) };
let visitor_iter = unsafe { visitor.visit_var_len(self.fixed_buf) };
for vlr in visitor_iter.take(self.last_allocated_var_len_index) {
// SAFETY: The `vlr` came from the allocation in `write_var_len_obj`
// which wrote it to the fixed part using `write_var_len_ref`.
// Thus, it points to a valid `VarLenGranule`.
unsafe { self.var_view.free_object_ignore_blob(vlr) };
unsafe { self.var_view.free_object_ignore_blob(*vlr) };
}
}

Expand All @@ -165,7 +165,8 @@ impl BflatnSerializedRowBuffer<'_> {
for (vlr, value) in self.large_blob_insertions {
// SAFETY: `vlr` was given to us by `alloc_for_slice`
// so it is properly aligned for a `VarLenGranule` and in bounds of the page.
// However, as it was added to `self.large_blob_insertion`, it is also uninit.
// However, as it was added to `self.large_blob_insertions`,
// we have not yet written the hash to that granule.
unsafe {
self.var_view.write_large_blob_hash_to_granule(blob_store, &value, vlr);
}
Expand Down Expand Up @@ -250,7 +251,8 @@ impl BflatnSerializedRowBuffer<'_> {
// so we need to check that our `ProductValue` has the same number of elements
// as our `ProductTypeLayout` to be sure it's typed correctly.
// Otherwise, if the value is too long, we'll discard its fields (whatever),
// or if it's too long, we'll leave some fields in the page uninit (very bad).
// or if it's too long, we'll leave some fields in the page "uninit"
// (actually valid-unconstrained) (very bad).
if ty.elements.len() != val.elements.len() {
return Err(Error::WrongType(
ty.algebraic_type(),
Expand Down Expand Up @@ -302,7 +304,9 @@ impl BflatnSerializedRowBuffer<'_> {
} else {
// Write directly to the page.
// SAFETY: `vlr.first_granule` points to a granule
// even though the granule's data is uninit as of yet.
// even though the granule's data is not initialized as of yet.
// Note that the granule stores valid-unconstrained bytes (i.e. they are not uninit),
// but they may be leftovers from a previous allocation.
let iter = unsafe { self.var_view.granule_offset_iter(vlr.first_granule) };
let mut writer = GranuleBufWriter { buf: None, iter };
to_writer(&mut writer, val).unwrap();
Expand Down Expand Up @@ -347,7 +351,7 @@ impl BflatnSerializedRowBuffer<'_> {

// Write to the granule.
for (to, byte) in write_to.iter_mut().zip(extend_with) {
to.write(*byte);
*to = *byte;
}

slice = rest;
Expand Down Expand Up @@ -377,7 +381,7 @@ impl BflatnSerializedRowBuffer<'_> {
/// Write `bytes: &[u8; N]` starting at the current offset
/// and advance the offset by `N`.
fn write_bytes<const N: usize>(&mut self, bytes: &[u8; N]) {
maybe_uninit_write_slice(&mut self.fixed_buf[range_move(0..N, self.curr_offset)], bytes);
self.fixed_buf[range_move(0..N, self.curr_offset)].copy_from_slice(bytes);
self.curr_offset += N;
}

Expand Down Expand Up @@ -458,7 +462,7 @@ pub mod test {
use spacetimedb_sats::proptest::generate_typed_row;

proptest! {
#![proptest_config(ProptestConfig::with_cases(2048))]
#![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))]
#[test]
fn av_serde_round_trip_through_page((ty, val) in generate_typed_row()) {
let ty: RowTypeLayout = ty.into();
Expand Down
Loading

2 comments on commit 484ba82

@github-actions
Copy link

@github-actions github-actions bot commented on 484ba82 May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Criterion benchmark results

Criterion benchmark report

YOU SHOULD PROBABLY IGNORE THESE RESULTS.

Criterion is a wall time based benchmarking system that is extremely noisy when run on CI. We collect these results for longitudinal analysis, but they are not reliable for comparing individual PRs.

Go look at the callgrind report instead.

empty

db on disk new latency old latency new throughput old throughput
sqlite 💿 426.7±3.16ns 429.1±2.26ns - -
sqlite 🧠 417.1±1.79ns 424.5±2.03ns - -
stdb_raw 💿 740.7±3.56ns 749.5±1.23ns - -
stdb_raw 🧠 712.3±1.49ns 718.7±1.20ns - -

insert_1

db on disk schema indices preload new latency old latency new throughput old throughput

insert_bulk

db on disk schema indices preload count new latency old latency new throughput old throughput
sqlite 💿 u32_u64_str btree_each_column 2048 256 511.6±1.12µs 510.7±2.08µs 1954 tx/sec 1958 tx/sec
sqlite 💿 u32_u64_str unique_0 2048 256 137.5±0.69µs 136.9±0.50µs 7.1 Ktx/sec 7.1 Ktx/sec
sqlite 💿 u32_u64_u64 btree_each_column 2048 256 415.2±0.80µs 413.7±0.43µs 2.4 Ktx/sec 2.4 Ktx/sec
sqlite 💿 u32_u64_u64 unique_0 2048 256 122.7±0.41µs 125.3±0.58µs 8.0 Ktx/sec 7.8 Ktx/sec
sqlite 🧠 u32_u64_str btree_each_column 2048 256 442.5±0.58µs 441.5±1.15µs 2.2 Ktx/sec 2.2 Ktx/sec
sqlite 🧠 u32_u64_str unique_0 2048 256 120.7±0.40µs 121.1±0.46µs 8.1 Ktx/sec 8.1 Ktx/sec
sqlite 🧠 u32_u64_u64 btree_each_column 2048 256 362.6±1.07µs 359.5±0.42µs 2.7 Ktx/sec 2.7 Ktx/sec
sqlite 🧠 u32_u64_u64 unique_0 2048 256 106.9±0.97µs 104.5±0.58µs 9.1 Ktx/sec 9.3 Ktx/sec
stdb_raw 💿 u32_u64_str btree_each_column 2048 256 491.5±15.43µs 595.6±18.44µs 2034 tx/sec 1678 tx/sec
stdb_raw 💿 u32_u64_str unique_0 2048 256 471.2±34.61µs 496.7±16.14µs 2.1 Ktx/sec 2013 tx/sec
stdb_raw 💿 u32_u64_u64 btree_each_column 2048 256 357.7±15.27µs 400.9±8.07µs 2.7 Ktx/sec 2.4 Ktx/sec
stdb_raw 💿 u32_u64_u64 unique_0 2048 256 326.4±4.98µs 372.7±6.44µs 3.0 Ktx/sec 2.6 Ktx/sec
stdb_raw 🧠 u32_u64_str btree_each_column 2048 256 330.7±0.35µs 334.3±0.18µs 3.0 Ktx/sec 2.9 Ktx/sec
stdb_raw 🧠 u32_u64_str unique_0 2048 256 257.3±0.14µs 258.2±0.22µs 3.8 Ktx/sec 3.8 Ktx/sec
stdb_raw 🧠 u32_u64_u64 btree_each_column 2048 256 269.0±0.15µs 271.9±0.10µs 3.6 Ktx/sec 3.6 Ktx/sec
stdb_raw 🧠 u32_u64_u64 unique_0 2048 256 244.6±0.15µs 243.1±0.18µs 4.0 Ktx/sec 4.0 Ktx/sec

iterate

db on disk schema indices new latency old latency new throughput old throughput
sqlite 💿 u32_u64_str unique_0 19.9±0.19µs 19.0±0.13µs 49.0 Ktx/sec 51.4 Ktx/sec
sqlite 💿 u32_u64_u64 unique_0 17.9±0.14µs 18.0±0.07µs 54.6 Ktx/sec 54.3 Ktx/sec
sqlite 🧠 u32_u64_str unique_0 19.0±0.28µs 17.8±0.06µs 51.4 Ktx/sec 54.7 Ktx/sec
sqlite 🧠 u32_u64_u64 unique_0 17.1±0.32µs 16.7±0.08µs 57.2 Ktx/sec 58.4 Ktx/sec
stdb_raw 💿 u32_u64_str unique_0 3.7±0.00µs 3.7±0.00µs 263.8 Ktx/sec 264.3 Ktx/sec
stdb_raw 💿 u32_u64_u64 unique_0 3.6±0.00µs 3.6±0.00µs 271.1 Ktx/sec 271.7 Ktx/sec
stdb_raw 🧠 u32_u64_str unique_0 3.7±0.00µs 3.7±0.00µs 265.7 Ktx/sec 266.0 Ktx/sec
stdb_raw 🧠 u32_u64_u64 unique_0 3.6±0.00µs 3.6±0.00µs 273.6 Ktx/sec 274.2 Ktx/sec

find_unique

db on disk key type preload new latency old latency new throughput old throughput

filter

db on disk key type index strategy load count new latency old latency new throughput old throughput
sqlite 💿 string index 2048 256 67.1±0.21µs 65.9±0.25µs 14.6 Ktx/sec 14.8 Ktx/sec
sqlite 💿 u64 index 2048 256 63.6±0.18µs 63.3±0.11µs 15.3 Ktx/sec 15.4 Ktx/sec
sqlite 🧠 string index 2048 256 63.8±0.21µs 64.5±0.17µs 15.3 Ktx/sec 15.1 Ktx/sec
sqlite 🧠 u64 index 2048 256 58.7±0.20µs 59.8±0.07µs 16.6 Ktx/sec 16.3 Ktx/sec
stdb_raw 💿 string index 2048 256 5.2±0.00µs 5.2±0.00µs 189.1 Ktx/sec 189.2 Ktx/sec
stdb_raw 💿 u64 index 2048 256 5.1±0.00µs 5.1±0.00µs 192.0 Ktx/sec 191.9 Ktx/sec
stdb_raw 🧠 string index 2048 256 5.1±0.00µs 5.1±0.00µs 190.3 Ktx/sec 190.4 Ktx/sec
stdb_raw 🧠 u64 index 2048 256 5.1±0.00µs 5.1±0.00µs 193.2 Ktx/sec 193.0 Ktx/sec

serialize

schema format count new latency old latency new throughput old throughput
u32_u64_str bflatn_to_bsatn_fast_path 100 3.7±0.01µs 3.7±0.01µs 25.7 Mtx/sec 25.9 Mtx/sec
u32_u64_str bflatn_to_bsatn_slow_path 100 3.5±0.01µs 3.5±0.03µs 27.0 Mtx/sec 27.3 Mtx/sec
u32_u64_str bsatn 100 2.4±0.00µs 2.4±0.02µs 39.3 Mtx/sec 39.5 Mtx/sec
u32_u64_str json 100 5.0±0.02µs 4.7±0.02µs 19.2 Mtx/sec 20.1 Mtx/sec
u32_u64_str product_value 100 1015.9±0.51ns 1013.4±3.02ns 93.9 Mtx/sec 94.1 Mtx/sec
u32_u64_u64 bflatn_to_bsatn_fast_path 100 1409.7±15.48ns 1332.8±55.64ns 67.7 Mtx/sec 71.6 Mtx/sec
u32_u64_u64 bflatn_to_bsatn_slow_path 100 2.9±0.04µs 2.8±0.00µs 33.0 Mtx/sec 33.5 Mtx/sec
u32_u64_u64 bsatn 100 1716.8±31.70ns 1750.3±31.41ns 55.5 Mtx/sec 54.5 Mtx/sec
u32_u64_u64 json 100 3.2±0.03µs 3.1±0.04µs 29.9 Mtx/sec 30.7 Mtx/sec
u32_u64_u64 product_value 100 1011.3±0.24ns 1010.1±0.70ns 94.3 Mtx/sec 94.4 Mtx/sec
u64_u64_u32 bflatn_to_bsatn_fast_path 100 1157.9±13.67ns 1066.4±1.94ns 82.4 Mtx/sec 89.4 Mtx/sec
u64_u64_u32 bflatn_to_bsatn_slow_path 100 2.9±0.01µs 2.9±0.01µs 33.0 Mtx/sec 33.3 Mtx/sec
u64_u64_u32 bsatn 100 1689.7±26.97ns 1744.7±26.78ns 56.4 Mtx/sec 54.7 Mtx/sec
u64_u64_u32 json 100 3.2±0.03µs 3.2±0.13µs 29.7 Mtx/sec 30.0 Mtx/sec
u64_u64_u32 product_value 100 1010.5±2.88ns 1010.7±0.54ns 94.4 Mtx/sec 94.4 Mtx/sec

stdb_module_large_arguments

arg size new latency old latency new throughput old throughput
64KiB 99.1±5.78µs 90.8±15.82µs - -

stdb_module_print_bulk

line count new latency old latency new throughput old throughput
1 49.4±3.58µs 44.8±5.56µs - -
100 351.5±20.28µs 350.5±5.84µs - -
1000 1959.5±64.11µs 2.1±0.31ms - -

remaining

name new latency old latency new throughput old throughput
sqlite/💿/update_bulk/u32_u64_str/unique_0/load=2048/count=256 45.0±0.12µs 45.6±0.14µs 21.7 Ktx/sec 21.4 Ktx/sec
sqlite/💿/update_bulk/u32_u64_u64/unique_0/load=2048/count=256 39.7±0.09µs 39.6±0.07µs 24.6 Ktx/sec 24.6 Ktx/sec
sqlite/🧠/update_bulk/u32_u64_str/unique_0/load=2048/count=256 38.8±0.17µs 38.3±0.07µs 25.2 Ktx/sec 25.5 Ktx/sec
sqlite/🧠/update_bulk/u32_u64_u64/unique_0/load=2048/count=256 34.0±0.14µs 34.8±0.13µs 28.7 Ktx/sec 28.1 Ktx/sec
stdb_module/💿/update_bulk/u32_u64_str/unique_0/load=2048/count=256 1384.7±22.55µs 1379.7±12.99µs 722 tx/sec 724 tx/sec
stdb_module/💿/update_bulk/u32_u64_u64/unique_0/load=2048/count=256 1067.8±22.27µs 1039.0±5.31µs 936 tx/sec 962 tx/sec
stdb_raw/💿/update_bulk/u32_u64_str/unique_0/load=2048/count=256 591.3±14.94µs 698.7±19.04µs 1691 tx/sec 1431 tx/sec
stdb_raw/💿/update_bulk/u32_u64_u64/unique_0/load=2048/count=256 538.6±9.86µs 534.6±7.87µs 1856 tx/sec 1870 tx/sec
stdb_raw/🧠/update_bulk/u32_u64_str/unique_0/load=2048/count=256 433.7±0.40µs 435.2±0.73µs 2.3 Ktx/sec 2.2 Ktx/sec
stdb_raw/🧠/update_bulk/u32_u64_u64/unique_0/load=2048/count=256 394.8±0.22µs 394.7±0.22µs 2.5 Ktx/sec 2.5 Ktx/sec

@github-actions
Copy link

@github-actions github-actions bot commented on 484ba82 May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Callgrind benchmark results

Callgrind Benchmark Report

These benchmarks were run using callgrind,
an instruction-level profiler. They allow comparisons between sqlite (sqlite), SpacetimeDB running through a module (stdb_module), and the underlying SpacetimeDB data storage engine (stdb_raw). Callgrind emulates a CPU to collect the below estimates.

Measurement changes larger than five percent are in bold.

In-memory benchmarks

callgrind: empty transaction

db total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw 6064 6064 0.00% 6908 6820 1.29%
sqlite 5676 5676 0.00% 6156 6140 0.26%

callgrind: filter

db schema indices count preload _column data_type total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str no_index 64 128 2 string 120757 120757 0.00% 121575 121535 0.03%
stdb_raw u32_u64_str no_index 64 128 1 u64 78491 78491 0.00% 79239 79083 0.20%
stdb_raw u32_u64_str btree_each_column 64 128 2 string 25218 25219 -0.00% 25816 25709 0.42%
stdb_raw u32_u64_str btree_each_column 64 128 1 u64 24179 24179 0.00% 24583 24683 -0.41%
sqlite u32_u64_str no_index 64 128 2 string 143664 143664 0.00% 145324 145150 0.12%
sqlite u32_u64_str no_index 64 128 1 u64 123020 123005 0.01% 124378 124213 0.13%
sqlite u32_u64_str btree_each_column 64 128 1 u64 130322 130322 0.00% 131922 131708 0.16%
sqlite u32_u64_str btree_each_column 64 128 2 string 133542 133527 0.01% 135312 135111 0.15%

callgrind: insert bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 64 128 946905 815803 16.07% 976591 839121 16.38%
stdb_raw u32_u64_str btree_each_column 64 128 1077980 947014 13.83% 1114798 991402 12.45%
sqlite u32_u64_str unique_0 64 128 396307 396313 -0.00% 414839 411011 0.93%
sqlite u32_u64_str btree_each_column 64 128 969380 969380 0.00% 1006956 1004314 0.26%

callgrind: iterate

db schema indices count total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 147996 147996 0.00% 148072 148138 -0.04%
stdb_raw u32_u64_str unique_0 64 15810 15802 0.05% 15882 15940 -0.36%
sqlite u32_u64_str unique_0 1024 1046901 1046901 0.00% 1050233 1050277 -0.00%
sqlite u32_u64_str unique_0 64 75041 75041 0.00% 75987 76123 -0.18%

callgrind: serialize_product_value

count format total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
64 bsatn 25717 25717 0.00% 28063 28029 0.12%
64 json 47438 47438 0.00% 50060 49992 0.14%
16 bsatn 8118 8118 0.00% 9512 9478 0.36%
16 json 12142 12142 0.00% 14012 13944 0.49%

callgrind: update bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 1024 22446984 22184838 1.18% 23134646 22750220 1.69%
stdb_raw u32_u64_str unique_0 64 128 1423746 1292779 10.13% 1504628 1335317 12.68%
sqlite u32_u64_str unique_0 1024 1024 1802084 1802084 0.00% 1811312 1811248 0.00%
sqlite u32_u64_str unique_0 64 128 128620 128620 0.00% 131530 131468 0.05%
On-disk benchmarks

callgrind: empty transaction

db total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw 6458 6458 0.00% 7294 7206 1.22%
sqlite 5728 5734 -0.10% 6238 6330 -1.45%

callgrind: filter

db schema indices count preload _column data_type total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str no_index 64 128 2 string 121151 121151 0.00% 122045 121953 0.08%
stdb_raw u32_u64_str no_index 64 128 1 u64 78885 78885 0.00% 79645 79457 0.24%
stdb_raw u32_u64_str btree_each_column 64 128 1 u64 24573 24573 0.00% 25041 25157 -0.46%
stdb_raw u32_u64_str btree_each_column 64 128 2 string 25810 25812 -0.01% 26512 26434 0.30%
sqlite u32_u64_str no_index 64 128 2 string 145585 145585 0.00% 147529 147311 0.15%
sqlite u32_u64_str no_index 64 128 1 u64 124926 124926 0.00% 126732 126422 0.25%
sqlite u32_u64_str btree_each_column 64 128 2 string 135577 135577 0.00% 137597 137455 0.10%
sqlite u32_u64_str btree_each_column 64 128 1 u64 132418 132418 0.00% 134316 134042 0.20%

callgrind: insert bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 64 128 897975 766998 17.08% 958669 793706 20.78%
stdb_raw u32_u64_str btree_each_column 64 128 1025313 894582 14.61% 1091665 942314 15.85%
sqlite u32_u64_str unique_0 64 128 413855 413861 -0.00% 431959 428101 0.90%
sqlite u32_u64_str btree_each_column 64 128 1019955 1019955 0.00% 1056275 1053447 0.27%

callgrind: iterate

db schema indices count total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 148390 148390 0.00% 148458 148564 -0.07%
stdb_raw u32_u64_str unique_0 64 16213 16204 0.06% 16281 16378 -0.59%
sqlite u32_u64_str unique_0 1024 1049963 1049963 0.00% 1053685 1053625 0.01%
sqlite u32_u64_str unique_0 64 76813 76813 0.00% 78143 78055 0.11%

callgrind: serialize_product_value

count format total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
64 bsatn 25717 25717 0.00% 28063 28029 0.12%
64 json 47438 47438 0.00% 50060 49992 0.14%
16 bsatn 8118 8118 0.00% 9512 9478 0.36%
16 json 12142 12142 0.00% 14012 13944 0.49%

callgrind: update bulk

db schema indices count preload total reads + writes old total reads + writes Δrw estimated cycles old estimated cycles Δcycles
stdb_raw u32_u64_str unique_0 1024 1024 21398757 21140824 1.22% 22168471 21789060 1.74%
stdb_raw u32_u64_str unique_0 64 128 1379956 1248342 10.54% 1459054 1296500 12.54%
sqlite u32_u64_str unique_0 1024 1024 1809880 1809880 0.00% 1818508 1818588 -0.00%
sqlite u32_u64_str unique_0 64 128 132768 132768 0.00% 135810 135760 0.04%

Please sign in to comment.