sstable: new layout format

This started with a seemingly simple TODO about mixing compressed and uncompressed offsets but from there it escalated. We now use the treeprinter in conjunction with the binary formatter.
cockroachdb · Oct 15, 2024 · 248af3e · 248af3e
1 parent a09523b
commit 248af3e
Show file tree

Hide file tree

Showing 44 changed files with 9,861 additions and 9,821 deletions.
diff --git a/internal/binfmt/binfmt.go b/internal/binfmt/binfmt.go
@@ -14,6 +14,8 @@ import (
 	"strconv"
 	"strings"
 	"unsafe"
+
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 )
 
 // New constructs a new binary formatter.
@@ -114,11 +116,6 @@ func (f *Formatter) Byte(format string, args ...interface{}) int {
 	return 1
 }
 
-// CommentLine adds a full-width comment line to the output.
-func (f *Formatter) CommentLine(format string, args ...interface{}) {
-	f.newline("", strings.TrimSpace(fmt.Sprintf(format, args...)))
-}
-
 // HexBytesln formats the next n bytes in hexadecimal format, appending the
 // formatted comment string to each line and ending on a newline.
 func (f *Formatter) HexBytesln(n int, format string, args ...interface{}) int {
@@ -208,6 +205,17 @@ func (f *Formatter) String() string {
 	return f.buf.String()
 }
 
+// ToTreePrinter formats the current output and creates a treeprinter child node
+// for each line. The current output is reset; the position within the binary
+// buffer is not.
+func (f *Formatter) ToTreePrinter(tp treeprinter.Node) {
+	for _, l := range strings.Split(strings.TrimRight(f.String(), "\n"), "\n") {
+		tp.Child(l)
+	}
+	f.buf.Reset()
+	f.lines = f.lines[:0]
+}
+
 // Pointer returns a pointer into the original data slice at the specified
 // offset.
 func (f *Formatter) Pointer(off int) unsafe.Pointer {

diff --git a/iterator_histories_test.go b/iterator_histories_test.go
@@ -251,8 +251,7 @@ func TestIterHistories(t *testing.T) {
 				if err != nil {
 					return err.Error()
 				}
-				l.Describe(&buf, verbose, r, nil)
-				return buf.String()
+				return l.Describe(verbose, r, nil)
 			case "lsm":
 				return runLSMCmd(td, d)
 			case "metrics":

diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go
@@ -15,6 +15,7 @@ import (
 
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/pebble/internal/binfmt"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 )
 
 // Bitmap is a bitmap structure built on a []uint64. A bitmap utilizes ~1
@@ -407,15 +408,17 @@ func (b *BitmapBuilder) WriteDebug(w io.Writer, rows int) {
 	fmt.Fprint(w, "bitmap")
 }
 
-func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
+func bitmapToBinFormatter(f *binfmt.Formatter, tp treeprinter.Node, rows int) {
 	encoding := bitmapEncoding(f.PeekUint(1))
-	f.HexBytesln(1, "bitmap encoding")
 	if encoding == zeroBitmapEncoding {
+		f.HexBytesln(1, "zero bitmap encoding")
+		f.ToTreePrinter(tp)
 		return
 	}
 	if encoding != defaultBitmapEncoding {
 		panic(fmt.Sprintf("unknown bitmap encoding %d", encoding))
 	}
+	f.HexBytesln(1, "default bitmap encoding")
 	if aligned := align(f.RelativeOffset(), 8); aligned-f.RelativeOffset() != 0 {
 		f.HexBytesln(aligned-f.RelativeOffset(), "padding to align to 64-bit boundary")
 	}
@@ -427,6 +430,7 @@ func bitmapToBinFormatter(f *binfmt.Formatter, rows int) {
 	for i := 0; i < summaryWords; i++ {
 		f.Line(8).Append("b ").Binary(8).Done("bitmap summary word %d-%d", i*64, i*64+63)
 	}
+	f.ToTreePrinter(tp)
 }
 
 // nextBitInWord returns the index of the smallest set bit with an index ≥ bit

diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go
@@ -16,6 +16,7 @@ import (
 
 	"github.com/cockroachdb/datadriven"
 	"github.com/cockroachdb/pebble/internal/binfmt"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 	"github.com/stretchr/testify/require"
 )
 
@@ -61,8 +62,9 @@ func TestBitmapFixed(t *testing.T) {
 			if off > 0 {
 				f.HexBytesln(int(off), "initial offset")
 			}
-			bitmapToBinFormatter(f, n)
-			fmt.Fprint(&buf, f.String())
+			tp := treeprinter.New()
+			bitmapToBinFormatter(f, tp.Child("bitmap"), n)
+			fmt.Fprint(&buf, tp.String())
 
 		case "seek-set-ge":
 			var indexes []int

diff --git a/sstable/colblk/block.go b/sstable/colblk/block.go
@@ -141,6 +141,7 @@ import (
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/pebble/internal/aligned"
 	"github.com/cockroachdb/pebble/internal/binfmt"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 )
 
 // Version indicates the version of the columnar block format encoded. The
@@ -380,33 +381,38 @@ func (d *BlockDecoder) pointer(offset uint32) unsafe.Pointer {
 // data.
 func (d *BlockDecoder) FormattedString() string {
 	f := binfmt.New(d.data)
-	d.headerToBinFormatter(f)
+	tp := treeprinter.New()
+	n := tp.Child("block")
+	d.headerToBinFormatter(f, n)
 	for i := 0; i < int(d.header.Columns); i++ {
-		d.columnToBinFormatter(f, i, int(d.header.Rows))
+		d.columnToBinFormatter(f, n, i, int(d.header.Rows))
 	}
 	f.HexBytesln(1, "block trailer padding")
-	return f.String()
+	f.ToTreePrinter(n)
+	return tp.String()
 }
 
-func (d *BlockDecoder) headerToBinFormatter(f *binfmt.Formatter) {
-	f.CommentLine("columnar block header")
+func (d *BlockDecoder) headerToBinFormatter(f *binfmt.Formatter, tp treeprinter.Node) {
 	f.HexBytesln(1, "version %v", Version(f.PeekUint(1)))
 	f.HexBytesln(2, "%d columns", d.header.Columns)
 	f.HexBytesln(4, "%d rows", d.header.Rows)
 	for i := 0; i < int(d.header.Columns); i++ {
 		f.Byte("col %d: %s", i, d.DataType(i))
 		f.HexBytesln(4, "col %d: page start %d", i, d.pageStart(i))
 	}
+	f.ToTreePrinter(tp.Child("columnar block header"))
 }
 
 func (d *BlockDecoder) formatColumn(
-	f *binfmt.Formatter, col int, fn func(*binfmt.Formatter, DataType),
+	f *binfmt.Formatter,
+	tp treeprinter.Node,
+	col int,
+	fn func(*binfmt.Formatter, treeprinter.Node, DataType),
 ) {
-	f.CommentLine("data for column %d", col)
 	dataType := d.DataType(col)
 	colSize := d.pageStart(col+1) - d.pageStart(col)
 	endOff := f.Offset() + int(colSize)
-	fn(f, dataType)
+	fn(f, tp, dataType)
 
 	// We expect formatting the column data to have consumed all the bytes
 	// between the column's pageOffset and the next column's pageOffset.
@@ -419,17 +425,20 @@ func (d *BlockDecoder) formatColumn(
 	}
 }
 
-func (d *BlockDecoder) columnToBinFormatter(f *binfmt.Formatter, col, rows int) {
-	d.formatColumn(f, col, func(f *binfmt.Formatter, dataType DataType) {
+func (d *BlockDecoder) columnToBinFormatter(
+	f *binfmt.Formatter, tp treeprinter.Node, col, rows int,
+) {
+	d.formatColumn(f, tp, col, func(f *binfmt.Formatter, tp treeprinter.Node, dataType DataType) {
+		n := tp.Childf("data for column %d (%s)", col, dataType)
 		switch dataType {
 		case DataTypeBool:
-			bitmapToBinFormatter(f, rows)
+			bitmapToBinFormatter(f, n, rows)
 		case DataTypeUint:
-			uintsToBinFormatter(f, rows, nil)
+			uintsToBinFormatter(f, n, rows, nil)
 		case DataTypePrefixBytes:
-			prefixBytesToBinFormatter(f, rows, nil)
+			prefixBytesToBinFormatter(f, n, rows, nil)
 		case DataTypeBytes:
-			rawBytesToBinFormatter(f, rows, nil)
+			rawBytesToBinFormatter(f, n, rows, nil)
 		default:
 			panic("unimplemented")
 		}

diff --git a/sstable/colblk/data_block.go b/sstable/colblk/data_block.go
@@ -793,7 +793,7 @@ func (d *DataBlockDecoder) Init(schema KeySchema, data []byte) {
 
 // Describe descirbes the binary format of the data block, assuming f.Offset()
 // is positioned at the beginning of the same data block described by r.
-func (d *DataBlockDecoder) Describe(f *binfmt.Formatter) {
+func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
 	// Set the relative offset. When loaded into memory, the beginning of blocks
 	// are aligned. Padding that ensures alignment is done relative to the
 	// current offset. Setting the relative offset ensures that if we're
@@ -802,13 +802,14 @@ func (d *DataBlockDecoder) Describe(f *binfmt.Formatter) {
 	// aligned.
 	f.SetAnchorOffset()
 
-	f.CommentLine("data block header")
+	n := tp.Child("data block header")
 	f.HexBytesln(4, "maximum key length: %d", d.maximumKeyLength)
-	d.d.headerToBinFormatter(f)
+	d.d.headerToBinFormatter(f, n)
 	for i := 0; i < int(d.d.header.Columns); i++ {
-		d.d.columnToBinFormatter(f, i, int(d.d.header.Rows))
+		d.d.columnToBinFormatter(f, n, i, int(d.d.header.Rows))
 	}
 	f.HexBytesln(1, "block padding byte")
+	f.ToTreePrinter(n)
 }
 
 // Assert that *DataBlockIter implements block.DataBlockIterator.

diff --git a/sstable/colblk/data_block_test.go b/sstable/colblk/data_block_test.go
@@ -18,6 +18,7 @@ import (
 	"github.com/cockroachdb/pebble/internal/binfmt"
 	"github.com/cockroachdb/pebble/internal/itertest"
 	"github.com/cockroachdb/pebble/internal/testkeys"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 	"github.com/cockroachdb/pebble/sstable/block"
 )
 
@@ -87,20 +88,22 @@ func TestDataBlock(t *testing.T) {
 				}
 				r.Init(testKeysSchema, rewrittenBlock)
 				f := binfmt.New(r.d.data).LineWidth(20)
-				r.Describe(f)
+				tp := treeprinter.New()
+				r.Describe(f, tp)
 				fmt.Fprintf(&buf, "Start: %s\nEnd: %s\n%s",
 					start.Pretty(testkeys.Comparer.FormatKey),
 					end.Pretty(testkeys.Comparer.FormatKey),
-					f.String())
+					tp.String())
 				return buf.String()
 			case "finish":
 				rows := w.Rows()
 				td.MaybeScanArgs(t, "rows", &rows)
 				block, lastKey := w.Finish(rows, sizes[rows-1])
 				r.Init(testKeysSchema, block)
 				f := binfmt.New(r.d.data).LineWidth(20)
-				r.Describe(f)
-				fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), f.String())
+				tp := treeprinter.New()
+				r.Describe(f, tp)
+				fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), tp.String())
 				return buf.String()
 			case "iter":
 				var seqNum uint64

diff --git a/sstable/colblk/index_block.go b/sstable/colblk/index_block.go
@@ -11,6 +11,7 @@ import (
 	"github.com/cockroachdb/pebble/internal/base"
 	"github.com/cockroachdb/pebble/internal/binfmt"
 	"github.com/cockroachdb/pebble/internal/invariants"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 	"github.com/cockroachdb/pebble/sstable/block"
 )
 
@@ -154,13 +155,14 @@ func (r *IndexBlockDecoder) Init(data []byte) {
 // representation.
 func (r *IndexBlockDecoder) DebugString() string {
 	f := binfmt.New(r.bd.data).LineWidth(20)
-	r.Describe(f)
-	return f.String()
+	tp := treeprinter.New()
+	r.Describe(f, tp.Child("index-block-decoder"))
+	return tp.String()
 }
 
 // Describe describes the binary format of the index block, assuming f.Offset()
 // is positioned at the beginning of the same index block described by r.
-func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter) {
+func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
 	// Set the relative offset. When loaded into memory, the beginning of blocks
 	// are aligned. Padding that ensures alignment is done relative to the
 	// current offset. Setting the relative offset ensures that if we're
@@ -169,12 +171,13 @@ func (r *IndexBlockDecoder) Describe(f *binfmt.Formatter) {
 	// aligned.
 	f.SetAnchorOffset()
 
-	f.CommentLine("index block header")
-	r.bd.headerToBinFormatter(f)
+	n := tp.Child("index block header")
+	r.bd.headerToBinFormatter(f, n)
 	for i := 0; i < indexBlockColumnCount; i++ {
-		r.bd.columnToBinFormatter(f, i, int(r.bd.header.Rows))
+		r.bd.columnToBinFormatter(f, n, i, int(r.bd.header.Rows))
 	}
 	f.HexBytesln(1, "block padding byte")
+	f.ToTreePrinter(n)
 }
 
 // IndexIter is an iterator over the block entries in an index block.

diff --git a/sstable/colblk/keyspan.go b/sstable/colblk/keyspan.go
@@ -248,14 +248,15 @@ func (d *KeyspanDecoder) Init(data []byte) {
 // representation.
 func (d *KeyspanDecoder) DebugString() string {
 	f := binfmt.New(d.blockDecoder.data).LineWidth(20)
-	d.Describe(f)
-	return f.String()
+	tp := treeprinter.New()
+	d.Describe(f, tp.Child("keyspan-decoder"))
+	return tp.String()
 }
 
 // Describe describes the binary format of the keyspan block, assuming
 // f.Offset() is positioned at the beginning of the same keyspan block described
 // by r.
-func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
+func (d *KeyspanDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
 	// Set the relative offset. When loaded into memory, the beginning of blocks
 	// are aligned. Padding that ensures alignment is done relative to the
 	// current offset. Setting the relative offset ensures that if we're
@@ -264,9 +265,10 @@ func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
 	// aligned.
 	f.SetAnchorOffset()
 
-	f.CommentLine("keyspan block header")
+	n := tp.Child("keyspan block header")
 	f.HexBytesln(4, "user key count: %d", d.boundaryKeysCount)
-	d.blockDecoder.headerToBinFormatter(f)
+	f.ToTreePrinter(n)
+	d.blockDecoder.headerToBinFormatter(f, n)
 
 	for i := 0; i < keyspanColumnCount; i++ {
 		// Not all columns in a keyspan block have the same number of rows; the
@@ -277,9 +279,10 @@ func (d *KeyspanDecoder) Describe(f *binfmt.Formatter) {
 		if i == keyspanColBoundaryUserKeys || i == keyspanColBoundaryKeyIndices {
 			rows = int(d.boundaryKeysCount)
 		}
-		d.blockDecoder.columnToBinFormatter(f, i, rows)
+		d.blockDecoder.columnToBinFormatter(f, n, i, rows)
 	}
 	f.HexBytesln(1, "block padding byte")
+	f.ToTreePrinter(n)
 }
 
 // searchBoundaryKeys returns the index of the first boundary key greater than

diff --git a/sstable/colblk/prefix_bytes.go b/sstable/colblk/prefix_bytes.go
@@ -17,6 +17,7 @@ import (
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/pebble/internal/binfmt"
 	"github.com/cockroachdb/pebble/internal/invariants"
+	"github.com/cockroachdb/pebble/internal/treeprinter"
 	"github.com/cockroachdb/pebble/sstable/block"
 )
 
@@ -599,21 +600,25 @@ func (b *PrefixBytes) Search(k []byte) (rowIndex int, isEqual bool) {
 	return b.rows, false
 }
 
-func prefixBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter func([]byte) string) {
+func prefixBytesToBinFormatter(
+	f *binfmt.Formatter, tp treeprinter.Node, count int, sliceFormatter func([]byte) string,
+) {
 	if sliceFormatter == nil {
 		sliceFormatter = defaultSliceFormatter
 	}
 	pb, _ := DecodePrefixBytes(f.RelativeData(), uint32(f.RelativeOffset()), count)
-	f.CommentLine("PrefixBytes")
-	f.HexBytesln(1, "bundleSize: %d", 1<<pb.bundleShift)
-	f.CommentLine("Offsets table")
+
+	f.HexBytesln(1, "bundle size: %d", 1<<pb.bundleShift)
+	f.ToTreePrinter(tp)
+
+	n := tp.Child("offsets table")
 	dataOffset := uint64(f.RelativeOffset()) + uint64(uintptr(pb.rawBytes.data)-uintptr(pb.rawBytes.start))
-	uintsToBinFormatter(f, pb.rawBytes.slices+1,
-		func(offsetDelta, offsetBase uint64) string {
-			// NB: offsetBase will always be zero for PrefixBytes columns.
-			return fmt.Sprintf("%d [%d overall]", offsetDelta+offsetBase, offsetDelta+offsetBase+dataOffset)
-		})
-	f.CommentLine("Data")
+	uintsToBinFormatter(f, n, pb.rawBytes.slices+1, func(offsetDelta, offsetBase uint64) string {
+		// NB: offsetBase will always be zero for PrefixBytes columns.
+		return fmt.Sprintf("%d [%d overall]", offsetDelta+offsetBase, offsetDelta+offsetBase+dataOffset)
+	})
+
+	n = tp.Child("data")
 
 	// The first offset encodes the length of the block prefix.
 	blockPrefixLen := pb.rawBytes.offsets.At(0)
@@ -647,6 +652,7 @@ func prefixBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter fu
 		}
 		startOff = endOff
 	}
+	f.ToTreePrinter(n)
 }
 
 // PrefixBytesBuilder encodes a column of lexicographically-sorted byte slices,