Skip to content

Commit

Permalink
sstable: new layout format
Browse files Browse the repository at this point in the history
This started with a seemingly simple TODO about mixing compressed and
uncompressed offsets but from there it escalated. We now use the
treeprinter in conjunction with the binary formatter.
  • Loading branch information
RaduBerinde committed Oct 15, 2024
1 parent a09523b commit 248af3e
Show file tree
Hide file tree
Showing 44 changed files with 9,861 additions and 9,821 deletions.
18 changes: 13 additions & 5 deletions internal/binfmt/binfmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"strconv"
"strings"
"unsafe"

"github.com/cockroachdb/pebble/internal/treeprinter"
)

// New constructs a new binary formatter.
Expand Down Expand Up @@ -114,11 +116,6 @@ func (f *Formatter) Byte(format string, args ...interface{}) int {
return 1
}

// CommentLine adds a full-width comment line to the output.
func (f *Formatter) CommentLine(format string, args ...interface{}) {
f.newline("", strings.TrimSpace(fmt.Sprintf(format, args...)))
}

// HexBytesln formats the next n bytes in hexadecimal format, appending the
// formatted comment string to each line and ending on a newline.
func (f *Formatter) HexBytesln(n int, format string, args ...interface{}) int {
Expand Down Expand Up @@ -208,6 +205,17 @@ func (f *Formatter) String() string {
return f.buf.String()
}

// ToTreePrinter formats the current output and creates a treeprinter child node
// for each line. The current output is reset; the position within the binary
// buffer is not.
func (f *Formatter) ToTreePrinter(tp treeprinter.Node) {
for _, l := range strings.Split(strings.TrimRight(f.String(), "\n"), "\n") {
tp.Child(l)
}
f.buf.Reset()
f.lines = f.lines[:0]
}

// Pointer returns a pointer into the original data slice at the specified
// offset.
func (f *Formatter) Pointer(off int) unsafe.Pointer {
Expand Down
3 changes: 1 addition & 2 deletions iterator_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,7 @@ func TestIterHistories(t *testing.T) {
if err != nil {
return err.Error()
}
l.Describe(&buf, verbose, r, nil)
return buf.String()
return l.Describe(verbose, r, nil)
case "lsm":
return runLSMCmd(td, d)
case "metrics":
Expand Down
8 changes: 6 additions & 2 deletions sstable/colblk/bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/treeprinter"
)

// Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
Expand Down Expand Up @@ -407,15 +408,17 @@ func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
fmt.Fprint(w, "bitmap")
}

func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
func bitmapToBinFormatter(f *binfmt.Formatter, tp treeprinter.Node, rows int) {
encoding := bitmapEncoding(f.PeekUint(1))
f.HexBytesln(1, "bitmap encoding")
if encoding == zeroBitmapEncoding {
f.HexBytesln(1, "zero bitmap encoding")
f.ToTreePrinter(tp)
return
}
if encoding != defaultBitmapEncoding {
panic(fmt.Sprintf("unknown bitmap encoding %d", encoding))
}
f.HexBytesln(1, "default bitmap encoding")
if aligned := align(f.RelativeOffset(), 8); aligned-f.RelativeOffset() != 0 {
f.HexBytesln(aligned-f.RelativeOffset(), "padding to align to 64-bit boundary")
}
Expand All @@ -427,6 +430,7 @@ func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
for i := 0; i < summaryWords; i++ {
f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
}
f.ToTreePrinter(tp)
}

// nextBitInWord returns the index of the smallest set bit with an index ≥ bit
Expand Down
6 changes: 4 additions & 2 deletions sstable/colblk/bitmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/treeprinter"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -61,8 +62,9 @@ func TestBitmapFixed(t *testing.T) {
if off > 0 {
f.HexBytesln(int(off), "initial offset")
}
bitmapToBinFormatter(f, n)
fmt.Fprint(&buf, f.String())
tp := treeprinter.New()
bitmapToBinFormatter(f, tp.Child("bitmap"), n)
fmt.Fprint(&buf, tp.String())

case "seek-set-ge":
var indexes []int
Expand Down
37 changes: 23 additions & 14 deletions sstable/colblk/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/aligned"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/treeprinter"
)

// Version indicates the version of the columnar block format encoded. The
Expand Down Expand Up @@ -380,33 +381,38 @@ func (d *BlockDecoder) pointer(offset uint32) unsafe.Pointer {
// data.
func (d *BlockDecoder) FormattedString() string {
f := binfmt.New(d.data)
d.headerToBinFormatter(f)
tp := treeprinter.New()
n := tp.Child("block")
d.headerToBinFormatter(f, n)
for i := 0; i < int(d.header.Columns); i++ {
d.columnToBinFormatter(f, i, int(d.header.Rows))
d.columnToBinFormatter(f, n, i, int(d.header.Rows))
}
f.HexBytesln(1, "block trailer padding")
return f.String()
f.ToTreePrinter(n)
return tp.String()
}

func (d *BlockDecoder) headerToBinFormatter(f *binfmt.Formatter) {
f.CommentLine("columnar block header")
func (d *BlockDecoder) headerToBinFormatter(f *binfmt.Formatter, tp treeprinter.Node) {
f.HexBytesln(1, "version %v", Version(f.PeekUint(1)))
f.HexBytesln(2, "%d columns", d.header.Columns)
f.HexBytesln(4, "%d rows", d.header.Rows)
for i := 0; i < int(d.header.Columns); i++ {
f.Byte("col %d: %s", i, d.DataType(i))
f.HexBytesln(4, "col %d: page start %d", i, d.pageStart(i))
}
f.ToTreePrinter(tp.Child("columnar block header"))
}

func (d *BlockDecoder) formatColumn(
f *binfmt.Formatter, col int, fn func(*binfmt.Formatter, DataType),
f *binfmt.Formatter,
tp treeprinter.Node,
col int,
fn func(*binfmt.Formatter, treeprinter.Node, DataType),
) {
f.CommentLine("data for column %d", col)
dataType := d.DataType(col)
colSize := d.pageStart(col+1) - d.pageStart(col)
endOff := f.Offset() + int(colSize)
fn(f, dataType)
fn(f, tp, dataType)

// We expect formatting the column data to have consumed all the bytes
// between the column's pageOffset and the next column's pageOffset.
Expand All @@ -419,17 +425,20 @@ func (d *BlockDecoder) formatColumn(
}
}

func (d *BlockDecoder) columnToBinFormatter(f *binfmt.Formatter, col, rows int) {
d.formatColumn(f, col, func(f *binfmt.Formatter, dataType DataType) {
func (d *BlockDecoder) columnToBinFormatter(
f *binfmt.Formatter, tp treeprinter.Node, col, rows int,
) {
d.formatColumn(f, tp, col, func(f *binfmt.Formatter, tp treeprinter.Node, dataType DataType) {
n := tp.Childf("data for column %d (%s)", col, dataType)
switch dataType {
case DataTypeBool:
bitmapToBinFormatter(f, rows)
bitmapToBinFormatter(f, n, rows)
case DataTypeUint:
uintsToBinFormatter(f, rows, nil)
uintsToBinFormatter(f, n, rows, nil)
case DataTypePrefixBytes:
prefixBytesToBinFormatter(f, rows, nil)
prefixBytesToBinFormatter(f, n, rows, nil)
case DataTypeBytes:
rawBytesToBinFormatter(f, rows, nil)
rawBytesToBinFormatter(f, n, rows, nil)
default:
panic("unimplemented")
}
Expand Down
9 changes: 5 additions & 4 deletions sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ func (d *DataBlockDecoder) Init(schema KeySchema, data []byte) {

// Describe descirbes the binary format of the data block, assuming f.Offset()
// is positioned at the beginning of the same data block described by r.
func (d *DataBlockDecoder) Describe(f *binfmt.Formatter) {
func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
// Set the relative offset. When loaded into memory, the beginning of blocks
// are aligned. Padding that ensures alignment is done relative to the
// current offset. Setting the relative offset ensures that if we're
Expand All @@ -802,13 +802,14 @@ func (d *DataBlockDecoder) Describe(f *binfmt.Formatter) {
// aligned.
f.SetAnchorOffset()

f.CommentLine("data block header")
n := tp.Child("data block header")
f.HexBytesln(4, "maximum key length: %d", d.maximumKeyLength)
d.d.headerToBinFormatter(f)
d.d.headerToBinFormatter(f, n)
for i := 0; i < int(d.d.header.Columns); i++ {
d.d.columnToBinFormatter(f, i, int(d.d.header.Rows))
d.d.columnToBinFormatter(f, n, i, int(d.d.header.Rows))
}
f.HexBytesln(1, "block padding byte")
f.ToTreePrinter(n)
}

// Assert that *DataBlockIter implements block.DataBlockIterator.
Expand Down
11 changes: 7 additions & 4 deletions sstable/colblk/data_block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/itertest"
"github.com/cockroachdb/pebble/internal/testkeys"
"github.com/cockroachdb/pebble/internal/treeprinter"
"github.com/cockroachdb/pebble/sstable/block"
)

Expand Down Expand Up @@ -87,20 +88,22 @@ func TestDataBlock(t *testing.T) {
}
r.Init(testKeysSchema, rewrittenBlock)
f := binfmt.New(r.d.data).LineWidth(20)
r.Describe(f)
tp := treeprinter.New()
r.Describe(f, tp)
fmt.Fprintf(&buf, "Start: %s\nEnd: %s\n%s",
start.Pretty(testkeys.Comparer.FormatKey),
end.Pretty(testkeys.Comparer.FormatKey),
f.String())
tp.String())
return buf.String()
case "finish":
rows := w.Rows()
td.MaybeScanArgs(t, "rows", &rows)
block, lastKey := w.Finish(rows, sizes[rows-1])
r.Init(testKeysSchema, block)
f := binfmt.New(r.d.data).LineWidth(20)
r.Describe(f)
fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), f.String())
tp := treeprinter.New()
r.Describe(f, tp)
fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), tp.String())
return buf.String()
case "iter":
var seqNum uint64
Expand Down
15 changes: 9 additions & 6 deletions sstable/colblk/index_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/treeprinter"
"github.com/cockroachdb/pebble/sstable/block"
)

Expand Down Expand Up @@ -154,13 +155,14 @@ func (r *IndexBlockDecoder) Init(data []byte) {
// representation.
func (r *IndexBlockDecoder) DebugString() string {
f := binfmt.New(r.bd.data).LineWidth(20)
r.Describe(f)
return f.String()
tp := treeprinter.New()
r.Describe(f, tp.Child("index-block-decoder"))
return tp.String()
}

// Describe describes the binary format of the index block, assuming f.Offset()
// is positioned at the beginning of the same index block described by r.
func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter) {
func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
// Set the relative offset. When loaded into memory, the beginning of blocks
// are aligned. Padding that ensures alignment is done relative to the
// current offset. Setting the relative offset ensures that if we're
Expand All @@ -169,12 +171,13 @@ func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter) {
// aligned.
f.SetAnchorOffset()

f.CommentLine("index block header")
r.bd.headerToBinFormatter(f)
n := tp.Child("index block header")
r.bd.headerToBinFormatter(f, n)
for i := 0; i < indexBlockColumnCount; i++ {
r.bd.columnToBinFormatter(f, i, int(r.bd.header.Rows))
r.bd.columnToBinFormatter(f, n, i, int(r.bd.header.Rows))
}
f.HexBytesln(1, "block padding byte")
f.ToTreePrinter(n)
}

// IndexIter is an iterator over the block entries in an index block.
Expand Down
15 changes: 9 additions & 6 deletions sstable/colblk/keyspan.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,15 @@ func (d *KeyspanDecoder) Init(data []byte) {
// representation.
func (d *KeyspanDecoder) DebugString() string {
f := binfmt.New(d.blockDecoder.data).LineWidth(20)
d.Describe(f)
return f.String()
tp := treeprinter.New()
d.Describe(f, tp.Child("keyspan-decoder"))
return tp.String()
}

// Describe describes the binary format of the keyspan block, assuming
// f.Offset() is positioned at the beginning of the same keyspan block described
// by r.
func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
func (d *KeyspanDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
// Set the relative offset. When loaded into memory, the beginning of blocks
// are aligned. Padding that ensures alignment is done relative to the
// current offset. Setting the relative offset ensures that if we're
Expand All @@ -264,9 +265,10 @@ func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
// aligned.
f.SetAnchorOffset()

f.CommentLine("keyspan block header")
n := tp.Child("keyspan block header")
f.HexBytesln(4, "user key count: %d", d.boundaryKeysCount)
d.blockDecoder.headerToBinFormatter(f)
f.ToTreePrinter(n)
d.blockDecoder.headerToBinFormatter(f, n)

for i := 0; i < keyspanColumnCount; i++ {
// Not all columns in a keyspan block have the same number of rows; the
Expand All @@ -277,9 +279,10 @@ func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
if i == keyspanColBoundaryUserKeys || i == keyspanColBoundaryKeyIndices {
rows = int(d.boundaryKeysCount)
}
d.blockDecoder.columnToBinFormatter(f, i, rows)
d.blockDecoder.columnToBinFormatter(f, n, i, rows)
}
f.HexBytesln(1, "block padding byte")
f.ToTreePrinter(n)
}

// searchBoundaryKeys returns the index of the first boundary key greater than
Expand Down
26 changes: 16 additions & 10 deletions sstable/colblk/prefix_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/treeprinter"
"github.com/cockroachdb/pebble/sstable/block"
)

Expand Down Expand Up @@ -599,21 +600,25 @@ func (b *PrefixBytes) Search(k []byte) (rowIndex int, isEqual bool) {
return b.rows, false
}

func prefixBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter func([]byte) string) {
func prefixBytesToBinFormatter(
f *binfmt.Formatter, tp treeprinter.Node, count int, sliceFormatter func([]byte) string,
) {
if sliceFormatter == nil {
sliceFormatter = defaultSliceFormatter
}
pb, _ := DecodePrefixBytes(f.RelativeData(), uint32(f.RelativeOffset()), count)
f.CommentLine("PrefixBytes")
f.HexBytesln(1, "bundleSize: %d", 1<<pb.bundleShift)
f.CommentLine("Offsets table")

f.HexBytesln(1, "bundle size: %d", 1<<pb.bundleShift)
f.ToTreePrinter(tp)

n := tp.Child("offsets table")
dataOffset := uint64(f.RelativeOffset()) + uint64(uintptr(pb.rawBytes.data)-uintptr(pb.rawBytes.start))
uintsToBinFormatter(f, pb.rawBytes.slices+1,
func(offsetDelta, offsetBase uint64) string {
// NB: offsetBase will always be zero for PrefixBytes columns.
return fmt.Sprintf("%d [%d overall]", offsetDelta+offsetBase, offsetDelta+offsetBase+dataOffset)
})
f.CommentLine("Data")
uintsToBinFormatter(f, n, pb.rawBytes.slices+1, func(offsetDelta, offsetBase uint64) string {
// NB: offsetBase will always be zero for PrefixBytes columns.
return fmt.Sprintf("%d [%d overall]", offsetDelta+offsetBase, offsetDelta+offsetBase+dataOffset)
})

n = tp.Child("data")

// The first offset encodes the length of the block prefix.
blockPrefixLen := pb.rawBytes.offsets.At(0)
Expand Down Expand Up @@ -647,6 +652,7 @@ func prefixBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter fu
}
startOff = endOff
}
f.ToTreePrinter(n)
}

// PrefixBytesBuilder encodes a column of lexicographically-sorted byte slices,
Expand Down
Loading

0 comments on commit 248af3e

Please sign in to comment.