Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colblk: add DataBlockDecoder.Validate #4108

Merged
merged 1 commit into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,53 @@ func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
f.ToTreePrinter(n)
}

// Validate validates invariants that should hold across all data blocks.
func (d *DataBlockDecoder) Validate(comparer *base.Comparer, keySchema *KeySchema) error {
// TODO(jackson): Consider avoiding these allocations, even if this is only
// called in invariants builds.
n := d.d.header.Rows
meta := &KeySeekerMetadata{}
keySchema.InitKeySeekerMetadata(meta, d)
keySeeker := keySchema.KeySeeker(meta)
prevKey := base.InternalKey{UserKey: make([]byte, 0, d.maximumKeyLength+1)}
var curKey PrefixBytesIter
curKey.Init(int(d.maximumKeyLength), nil)

for i := 0; i < int(n); i++ {
k := base.InternalKey{
UserKey: keySeeker.MaterializeUserKey(&curKey, i-1, i),
Trailer: base.InternalKeyTrailer(d.trailers.At(i)),
}
// Ensure the keys are ordered.
ucmp := comparer.Compare(k.UserKey, prevKey.UserKey)
if ucmp < 0 || (ucmp == 0 && k.Trailer >= prevKey.Trailer) {
return errors.AssertionFailedf("key %s (row %d) and key %s (row %d) are out of order",
prevKey, i-1, k, i)
}
// Ensure the obsolete bit is set if the key is definitively obsolete.
// Not all sources of obsolescence are evident with only a data block
// available (range deletions or point keys in previous blocks may cause
// a key to be obsolete).
if ucmp == 0 && prevKey.Kind() != base.InternalKeyKindMerge && !d.isObsolete.At(i) {
return errors.AssertionFailedf("key %s (row %d) is shadowed by previous key %s but is not marked as obsolete",
k, i, prevKey)
}
// Ensure that the prefix-changed bit is set correctly.
if i > 0 {
currPrefix := comparer.Split.Prefix(k.UserKey)
prevPrefix := comparer.Split.Prefix(prevKey.UserKey)
prefixChanged := !bytes.Equal(prevPrefix, currPrefix)
if prefixChanged != d.prefixChanged.At(i) {
return errors.AssertionFailedf("prefix changed bit for key %q (row %d) is %t, expected %t [prev key was %q]",
k.UserKey, i, d.prefixChanged.At(i), prefixChanged, prevKey.UserKey)
}
}

prevKey.CopyFrom(k)
}
return nil
}

// Assert that *DataBlockIter implements block.DataBlockIterator.
var _ block.DataBlockIterator = (*DataBlockIter)(nil)

Expand Down
8 changes: 8 additions & 0 deletions sstable/colblk/data_block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func TestDataBlock(t *testing.T) {
if td.Cmd == "write-block" {
w.Init(&testKeysSchema)
}
var prevKey base.InternalKey
for _, line := range strings.Split(td.Input, "\n") {
line, isObsolete := strings.CutSuffix(line, "obsolete")

Expand All @@ -68,8 +69,12 @@ func TestDataBlock(t *testing.T) {
if strings.HasPrefix(valueString, "valueHandle") {
vp = block.ValueHandlePrefix(kcmp.PrefixEqual(), 0)
}
if kcmp.UserKeyComparison == 0 && prevKey.Kind() != base.InternalKeyKindMerge {
isObsolete = true
}
v := []byte(line[j+1:])
w.Add(ik, v, vp, kcmp, isObsolete)
prevKey = ik
sizes = append(sizes, w.Size())
}
if td.Cmd == "write-block" {
Expand Down Expand Up @@ -105,6 +110,9 @@ func TestDataBlock(t *testing.T) {
tp := treeprinter.New()
r.Describe(f, tp)
fmt.Fprintf(&buf, "LastKey: %s\n%s", lastKey.Pretty(testkeys.Comparer.FormatKey), tp.String())
if err := r.Validate(testkeys.Comparer, &testKeysSchema); err != nil {
fmt.Fprintln(&buf, err)
}
return buf.String()
case "iter":
var seqNum uint64
Expand Down
9 changes: 6 additions & 3 deletions sstable/colblk/testdata/data_block/external_value
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ blockprefix_kiwi@99#0,SET:valueHandle-kiwi99
blockprefix_kiwi@98#0,SET:valueHandle-kiwi98
blockprefix_lemon@92#0,DEL:
----
size=650:
size=673:
0: prefixes: prefixbytes(16): 20 keys
1: suffixes: bytes: 20 rows set; 54 bytes in data
2: trailers: uint: 20 rows
Expand Down Expand Up @@ -260,8 +260,11 @@ data block header
│ ├── 632-640: b 1100111011111011000001110000000000000000000000000000000000000000 # bitmap word 0
│ └── 640-648: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
├── data for column 6 (bool)
│ └── 648-649: x 01 # zero bitmap encoding
└── 649-650: x 00 # block padding byte
│ ├── 648-649: x 00 # default bitmap encoding
│ ├── 649-656: x 00000000000000 # padding to align to 64-bit boundary
│ ├── 656-664: b 0100000000000000000000000000000000000000000000000000000000000000 # bitmap word 0
│ └── 664-672: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
└── 672-673: x 00 # block padding byte

# Scan across the block using next.
iter
Expand Down
8 changes: 5 additions & 3 deletions sstable/colblk/testdata/data_block/next_prefix
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ blockprefix_kiwi@99#0,SET:kiwi99
blockprefix_kiwi@98#0,SET:kiwi98
blockprefix_lemon@92#0,DEL:
----
size=417:
size=433:
0: prefixes: prefixbytes(16): 20 keys
1: suffixes: bytes: 20 rows set; 54 bytes in data
2: trailers: uint: 20 rows
Expand Down Expand Up @@ -235,8 +235,10 @@ data block header
├── data for column 5 (bool)
│ └── 414-415: x 01 # zero bitmap encoding
├── data for column 6 (bool)
│ └── 415-416: x 01 # zero bitmap encoding
└── 416-417: x 00 # block padding byte
│ ├── 415-416: x 00 # default bitmap encoding
│ ├── 416-424: b 0100000000000000000000000000000000000000000000000000000000000000 # bitmap word 0
│ └── 424-432: b 0000000100000000000000000000000000000000000000000000000000000000 # bitmap summary word 0-63
└── 432-433: x 00 # block padding byte

# Scan across the block using next prefix.

Expand Down
8 changes: 8 additions & 0 deletions sstable/colblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,14 @@ func (w *RawColumnWriter) enqueueDataBlock(
// it's unnecessary.
w.meta.SetLargestPointKey(lastKey.Clone())

if invariants.Enabled {
var dec colblk.DataBlockDecoder
dec.Init(w.opts.KeySchema, serializedBlock)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this initialize a KeySeeker that we can use?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, no, I was thinking of BlockIter.Init

if err := dec.Validate(w.comparer, w.opts.KeySchema); err != nil {
panic(err)
}
}

// Serialize the data block, compress it and send it to the write queue.
cb := compressedBlockPool.Get().(*compressedBlock)
cb.blockBuf.checksummer.Type = w.opts.Checksum
Expand Down
Loading