From f0b039c82c96c6e752a5c15033642b1107a77f51 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Fri, 25 Oct 2024 13:57:20 -0400 Subject: [PATCH 1/2] colblk: fix Invert to initialize memory Previously, Invert used slices.Grow to grow the b.words slice if necessary. If len(b.words) < cap(b.words), words in the unused capacity were not initialized to zero and were assumed to be zero. Most of the time this was okay, because Reset zeroed b.words up to len(b.words). However if the BitmapBuilder was previously used to Finish fewer rows than were Set, it's possible Finish may have truncated b.words to exclude words with non-zero bits. Since Reset only reset words within len(b.words), the truncated words were left unzeroed. This commit updates Invert to make no assumptions as to the value of words beyond len(b.words). It also removes the zeroing in Reset which obscured this bug in most cases, instead relying on Set, Finish and Invert to properly initialize memory as necessary as it's used. Fix #4103. --- sstable/colblk/bitmap.go | 21 +++++++++++++++---- sstable/colblk/bitmap_test.go | 38 ++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go index e305de4da0..cdfc13fea6 100644 --- a/sstable/colblk/bitmap.go +++ b/sstable/colblk/bitmap.go @@ -9,12 +9,12 @@ import ( "io" "math" "math/bits" - "slices" "strings" "unsafe" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/binfmt" + "github.com/cockroachdb/pebble/internal/invariants" "github.com/cockroachdb/pebble/internal/treeprinter" ) @@ -280,6 +280,8 @@ func (b *BitmapBuilder) Set(i int) { } w := i >> 6 // divide by 64 for len(b.words) <= w { + // We append zeros because if b.words has additional capacity, it has + // not been zeroed. b.words = append(b.words, 0) } b.words[w] |= 1 << uint(i&63) @@ -294,7 +296,16 @@ func (b *BitmapBuilder) isZero(rows int) bool { // Reset resets the bitmap to the empty state. func (b *BitmapBuilder) Reset() { - clear(b.words) + if invariants.Sometimes(50) { + // Sometimes trash the bitmap with all ones to catch bugs that assume + // b.words is zeroed. + for i := 0; i < len(b.words); i++ { + b.words[i] = ^uint64(0) + } + } + + // NB: We don't zero the contents of b.words. When the BitmapBuilder reuses + // b.words, it must ensure it zeroes the contents as necessary. b.words = b.words[:0] b.minNonZeroRowCount = 0 } @@ -339,8 +350,10 @@ func (b *BitmapBuilder) Invert(nRows int) { b.minNonZeroRowCount = 1 // If the tail of b is sparse, fill in zeroes before inverting. nBitmapWords := (nRows + 63) >> 6 - if len(b.words) < nBitmapWords { - b.words = slices.Grow(b.words, nBitmapWords-len(b.words)) + for len(b.words) < nBitmapWords { + // We append zeros because if b.words has additional capacity, it has + // not been zeroed. + b.words = append(b.words, 0) } b.words = b.words[:nBitmapWords] for i := range b.words { diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go index 486f787935..750cb2f2df 100644 --- a/sstable/colblk/bitmap_test.go +++ b/sstable/colblk/bitmap_test.go @@ -147,17 +147,31 @@ func TestBitmapRandom(t *testing.T) { seed := uint64(time.Now().UnixNano()) t.Logf("seed: %d", seed) rng := rand.New(rand.NewPCG(0, seed)) - size := rng.IntN(4096) + 1 - testWithProbability := func(t *testing.T, p float64) { - var builder BitmapBuilder - v := make([]bool, size) - for i := 0; i < size; i++ { + var builder BitmapBuilder + testWithProbability := func(t *testing.T, p float64, size int, invert bool) { + defer builder.Reset() + rng := rand.New(rand.NewPCG(0, seed)) + + // Sometimes Set +1 bit to test the common pattern of excluding the last + // row from a data block. + buildSize := size + rng.IntN(5) + v := make([]bool, buildSize) + for i := range v { v[i] = rng.Float64() < p if v[i] { builder.Set(i) } } + + // Sometimes invert the bitmap. + if invert { + builder.Invert(size) + for i := 0; i < size; i++ { + v[i] = !v[i] + } + } + data := make([]byte, builder.Size(size, 0)) _ = builder.Finish(0, size, 0, data) bitmap, endOffset := DecodeBitmap(data, 0, size) @@ -219,16 +233,26 @@ func TestBitmapRandom(t *testing.T) { } } + fixedSizes := []int{1, 2, 3, 4, 16, 63, 64, 65, 128, 129, 256, 257, 1024, 1025, 4096} fixedProbabilities := []float64{0.00001, 0.0001, 0.001, 0.1, 0.5, 0.9999} for _, p := range fixedProbabilities { t.Run(fmt.Sprintf("p=%05f", p), func(t *testing.T) { - testWithProbability(t, p) + for _, sz := range fixedSizes { + t.Run(fmt.Sprintf("size=%d", sz), func(t *testing.T) { + t.Run("invert", func(t *testing.T) { + testWithProbability(t, p, sz, true /* invert */) + }) + t.Run("no-invert", func(t *testing.T) { + testWithProbability(t, p, sz, false /* invert */) + }) + }) + } }) } for i := 0; i < 10; i++ { p := rng.ExpFloat64() * 0.1 t.Run(fmt.Sprintf("p=%05f", p), func(t *testing.T) { - testWithProbability(t, p) + testWithProbability(t, p, rng.IntN(4096)+1, rng.IntN(2) == 1) }) } } From 86c652e3dc6db98dc96f8a07553223fad555ee24 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Fri, 25 Oct 2024 14:37:22 -0400 Subject: [PATCH 2/2] colblk: fix summary table bounds Fix an off-by-one on the summary table end bound. This could cause SeekSetBitGE to return the wrong index in bitmaps with more than 4096 represented bits. --- sstable/colblk/bitmap.go | 4 +++- sstable/colblk/bitmap_test.go | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go index cdfc13fea6..14f28389a1 100644 --- a/sstable/colblk/bitmap.go +++ b/sstable/colblk/bitmap.go @@ -226,9 +226,11 @@ func (b Bitmap) SeekUnsetBitLE(i int) int { return (wordIdx << 6) + 63 - bits.LeadingZeros64(^word) } +// summaryTableBounds returns the indexes of the bitmap words containing the +// summary table. The summary table's words lie within [startOffset, endOffset). func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) { startOffset = (b.bitCount + 63) >> 6 - endOffset = startOffset + startOffset>>6 + endOffset = startOffset + (startOffset+63)>>6 return startOffset, endOffset } diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go index 750cb2f2df..e484646865 100644 --- a/sstable/colblk/bitmap_test.go +++ b/sstable/colblk/bitmap_test.go @@ -233,7 +233,7 @@ func TestBitmapRandom(t *testing.T) { } } - fixedSizes := []int{1, 2, 3, 4, 16, 63, 64, 65, 128, 129, 256, 257, 1024, 1025, 4096} + fixedSizes := []int{1, 2, 3, 4, 16, 63, 64, 65, 128, 129, 256, 257, 1024, 1025, 4096, 4097, 8012, 8200} fixedProbabilities := []float64{0.00001, 0.0001, 0.001, 0.1, 0.5, 0.9999} for _, p := range fixedProbabilities { t.Run(fmt.Sprintf("p=%05f", p), func(t *testing.T) { @@ -252,7 +252,7 @@ func TestBitmapRandom(t *testing.T) { for i := 0; i < 10; i++ { p := rng.ExpFloat64() * 0.1 t.Run(fmt.Sprintf("p=%05f", p), func(t *testing.T) { - testWithProbability(t, p, rng.IntN(4096)+1, rng.IntN(2) == 1) + testWithProbability(t, p, rng.IntN(8200)+1, rng.IntN(2) == 1) }) } }