Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crdb: fix untyped version seek, add tests #4101

Merged
merged 2 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions internal/crdbtest/crdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ const (
// hasEmptySuffixes is set if there is at least one key with no suffix in the block.
hasEmptySuffixes
// hasNonMVCCSuffixes is set if there is at least one key with a non-empty,
// non-MVCC sufffix.
// non-MVCC suffix.
hasNonMVCCSuffixes
)

Expand All @@ -435,7 +435,7 @@ func (s suffixTypes) String() string {
if s&hasEmptySuffixes != 0 {
suffixes = append(suffixes, "empty")
}
if s&hasEmptySuffixes != 0 {
if s&hasNonMVCCSuffixes != 0 {
suffixes = append(suffixes, "non-mvcc")
}
if len(suffixes) == 0 {
Expand Down Expand Up @@ -527,7 +527,7 @@ func (kw *cockroachKeyWriter) WriteKey(
switch versionLen {
case 0:
// No-op.
kw.suffixTypes |= hasNonMVCCSuffixes
kw.suffixTypes |= hasEmptySuffixes
case 9:
kw.suffixTypes |= hasMVCCSuffixes
wallTime = binary.BigEndian.Uint64(key[keyPrefixLen : keyPrefixLen+8])
Expand All @@ -539,7 +539,7 @@ func (kw *cockroachKeyWriter) WriteKey(
// longer consulted and can be ignored during decoding.
default:
// Not a MVCC timestamp.
kw.suffixTypes |= hasEmptySuffixes
kw.suffixTypes |= hasNonMVCCSuffixes
untypedVersion = key[keyPrefixLen:]
}
kw.wallTimes.Set(row, wallTime)
Expand Down Expand Up @@ -731,7 +731,7 @@ func (ks *cockroachKeySeeker) seekGEOnSuffix(index int, seekSuffix []byte) (row
for l < u {
h := int(uint(l+u) >> 1) // avoid overflow when computing h
// l ≤ h < u
if bytes.Compare(ks.untypedVersions.At(h), seekSuffix) >= 0 {
if bytes.Compare(ks.untypedVersions.At(h), seekSuffix) <= 0 {
u = h // preserves f(u) == true
} else {
l = h + 1 // preserves f(l-1) == false
Expand Down Expand Up @@ -774,6 +774,9 @@ func (ks *cockroachKeySeeker) seekGEOnSuffix(index int, seekSuffix []byte) (row
func (ks *cockroachKeySeeker) MaterializeUserKey(
ki *colblk.PrefixBytesIter, prevRow, row int,
) []byte {
if invariants.Enabled && (row < 0 || row >= ks.roachKeys.Rows()) {
panic(errors.AssertionFailedf("invalid row number %d", row))
}
if prevRow+1 == row && prevRow >= 0 {
ks.roachKeys.SetNext(ki)
} else {
Expand Down
100 changes: 100 additions & 0 deletions internal/crdbtest/crdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,103 @@ func (g *cockroachKeyGen) randTimestamp() (wallTime uint64, logicalTime uint32)
}
return wallTime, logicalTime
}

// formatUserKey formats a user key in the format:
//
// <roach-key> [ @ <version-hex> ]
func formatUserKey(key []byte) string {
n := Split(key)
if key[n-1] != 0 {
panic("expected sentinel byte")
}
prefix := key[:n-1]
if n == len(key) {
return string(prefix)
}
suffix := key[n : len(key)-1]
if key[len(key)-1] != byte(len(suffix)+1) {
panic("invalid suffix length byte")
}
return fmt.Sprintf("%s @ %X", prefix, suffix)
}

// formatKey formats an internal key in the format:
//
// <roach-key> [ @ <version-hex> ] #<seq-num>,<kind>
func formatKey(key base.InternalKey) string {
return fmt.Sprintf("%s #%d,%s", formatUserKey(key.UserKey), key.SeqNum(), key.Kind())
}

// formatKV formats an internal key in the format:
//
// <roach-key> [ @ <version-hex> ] #<seq-num>,<kind> = value
//
// For example:
//
// foo @ 0001020304050607 #1,SET
func formatKV(kv base.InternalKV) string {
val, _, err := kv.V.Value(nil)
if err != nil {
panic(err)
}
if len(val) == 0 {
return formatKey(kv.K)
}
return fmt.Sprintf("%s = %s", formatKey(kv.K), val)
}

// parseKey parses a cockroach user key in the following format:
//
// <roach-key> [@ <version-hex>]
//
// For example:
//
// foo @ 0001020304050607
func parseUserKey(userKeyStr string) []byte {
roachKey, versionStr := splitStringAt(userKeyStr, " @ ")
// Append sentinel byte.
userKey := append([]byte(roachKey), 0)
if versionStr != "" {
var version []byte
if _, err := fmt.Sscanf(versionStr, "%X", &version); err != nil {
panic(fmt.Sprintf("invalid user key string %q: cannot parse version %X", userKeyStr, version))
}
userKey = append(userKey, version...)
userKey = append(userKey, byte(len(version)+1))
}
return userKey
}

// parseKey parses a cockroach key in the following format:
//
// <roach-key> [@ <version-hex>] #<seq-num>,<kind>
//
// For example:
//
// foo @ 0001020304050607 #1,SET
func parseKey(keyStr string) base.InternalKey {
userKeyStr, trailerStr := splitStringAt(keyStr, " #")
return base.InternalKey{
UserKey: parseUserKey(userKeyStr),
Trailer: base.ParseInternalKey(fmt.Sprintf("foo#%s", trailerStr)).Trailer,
}
}

// parseKey parses a cockroach KV in the following format:
//
// <roach-key> [@ <version-hex>] #<seq-num>,<kind> = value
//
// For example:
//
// foo @ 0001020304050607 #1,SET = bar
func parseKV(input string) (key base.InternalKey, value []byte) {
keyStr, valStr := splitStringAt(input, " = ")
return parseKey(keyStr), []byte(valStr)
}

func splitStringAt(str string, sep string) (before, after string) {
if s := strings.SplitN(str, sep, 2); len(s) == 2 {
return s[0], s[1]
}
return str, ""
}
105 changes: 105 additions & 0 deletions internal/crdbtest/key_schema_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package crdbtest

import (
"bytes"
"fmt"
"strings"
"testing"

"github.com/cockroachdb/crlib/crstrings"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/treeprinter"
"github.com/cockroachdb/pebble/sstable/block"
"github.com/cockroachdb/pebble/sstable/colblk"
"github.com/stretchr/testify/require"
)

// TestKeySchema tests the cockroachKeyWriter and cockroachKeySeeker.
func TestKeySchema(t *testing.T) {
for _, file := range []string{"suffix_types", "block_encoding", "seek"} {
t.Run(file, func(t *testing.T) {
runDataDrivenTest(t, fmt.Sprintf("testdata/%s", file))
})
}
}

func runDataDrivenTest(t *testing.T, path string) {
var blockData []byte
var e colblk.DataBlockEncoder
e.Init(&KeySchema)
var iter colblk.DataBlockIter
iter.InitOnce(&KeySchema, Comparer.Compare, Comparer.Split, nil)

datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "init":
e.Reset()
for _, l := range crstrings.Lines(td.Input) {
key, value := parseKV(l)
kcmp := e.KeyWriter.ComparePrev(key.UserKey)
e.Add(key, value, 0, kcmp, false /* isObsolete */)
}
numRows := e.Rows()
size := e.Size()
blockData, _ = e.Finish(numRows, size)
require.Equal(t, size, len(blockData))
return fmt.Sprintf("%d rows, total size %dB", numRows, size)

case "describe":
var d colblk.DataBlockDecoder
d.Init(&KeySchema, blockData)
f := binfmt.New(blockData)
tp := treeprinter.New()
d.Describe(f, tp)
return tp.String()

case "suffix-types":
var d colblk.DataBlockDecoder
d.Init(&KeySchema, blockData)
var ks cockroachKeySeeker
ks.init(&d)
return fmt.Sprintf("suffix-types: %s", ks.suffixTypes)

case "keys":
var d colblk.DataBlockDecoder
d.Init(&KeySchema, blockData)
require.NoError(t, iter.Init(&d, block.IterTransforms{}))
defer iter.Close()
var buf bytes.Buffer
var prevKey base.InternalKey
for kv := iter.First(); kv != nil; kv = iter.Next() {
fmt.Fprintf(&buf, "%s", formatKV(*kv))
if prevKey.UserKey != nil && base.InternalCompare(Comparer.Compare, prevKey, kv.K) != -1 {
buf.WriteString(" !!! OUT OF ORDER KEY !!!")
}
buf.WriteString("\n")
prevKey = kv.K.Clone()
}
return buf.String()

case "seek":
var d colblk.DataBlockDecoder
d.Init(&KeySchema, blockData)
require.NoError(t, iter.Init(&d, block.IterTransforms{}))
defer iter.Close()
var buf strings.Builder
for _, l := range crstrings.Lines(td.Input) {
key := parseUserKey(l)
fmt.Fprintf(&buf, "%s: ", formatUserKey(key))
kv := iter.SeekGE(key, base.SeekGEFlagsNone)
require.NoError(t, iter.Error())
if kv == nil {
buf.WriteString(".\n")
} else {
fmt.Fprintf(&buf, "%s\n", formatKV(*kv))
}
}
return buf.String()

default:
return fmt.Sprintf("unknown command: %s", td.Cmd)
}
})
}
Loading
Loading