Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use offset as a disambiguator #106

Merged
merged 5 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/appendable/index_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
const CurrentVersion = 1

type DataHandler interface {
btree.DataParser
Synchronize(f *IndexFile, df []byte) error
Format() Format
}
Expand Down
55 changes: 29 additions & 26 deletions pkg/btree/bptree.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@ type BPTree struct {
tree ReadWriteSeekPager
meta MetaPage

Data []byte
Data []byte
DataParser DataParser
}

func NewBPTree(tree ReadWriteSeekPager, meta MetaPage) *BPTree {
return &BPTree{tree: tree, meta: meta}
}

func NewBPTreeWithData(tree ReadWriteSeekPager, meta MetaPage, data []byte, parser DataParser) *BPTree {
return &BPTree{tree: tree, meta: meta, Data: data, DataParser: parser}
}

func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
mp, err := t.meta.Root()
if err != nil || mp.Length == 0 {
Expand All @@ -38,26 +43,26 @@ func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
return root, mp, nil
}

func (t *BPTree) Find(key []byte) (MemoryPointer, bool, error) {
func (t *BPTree) Find(key ReferencedValue) (ReferencedValue, MemoryPointer, error) {
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
root, rootOffset, err := t.root()
if err != nil {
return MemoryPointer{}, false, fmt.Errorf("read root node: %w", err)
return ReferencedValue{}, MemoryPointer{}, fmt.Errorf("read root node: %w", err)
}
if root == nil {
return MemoryPointer{}, false, nil
return ReferencedValue{}, MemoryPointer{}, nil
}
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return MemoryPointer{}, false, err
return ReferencedValue{}, MemoryPointer{}, err
}
return path[0].node.Pointer(path[0].index), path[0].found, nil
return path[0].node.Keys[path[0].index], path[0].node.Pointer(path[0].index), nil
}

func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
if _, err := t.tree.Seek(int64(ptr.Offset), io.SeekStart); err != nil {
return nil, err
}
node := &BPTreeNode{Data: t.Data}
node := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
if _, err := node.ReadFrom(t.tree); err != nil {
return nil, err
}
Expand All @@ -67,27 +72,24 @@ func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
type TraversalRecord struct {
node *BPTreeNode
index int
found bool
// the offset is useful so we know which page to free when we split
ptr MemoryPointer
}

// traverse returns the path from root to leaf in reverse order (leaf first)
// the last element is always the node passed in
func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key (or gte if leaf)
index, found := slices.BinarySearchFunc(node.Keys, ReferencedValue{Value: key}, func(e ReferencedValue, t ReferencedValue) int {
if cmp := bytes.Compare(e.Value, t.Value); cmp == 0 && !node.leaf() {
return -1
} else {
return cmp
}
})
func (t *BPTree) traverse(key ReferencedValue, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key
index, found := slices.BinarySearchFunc(node.Keys, key, CompareReferencedValues)

if node.leaf() {
return []TraversalRecord{{node: node, index: index, found: found, ptr: ptr}}, nil
return []TraversalRecord{{node: node, index: index, ptr: ptr}}, nil
}

if found {
// if the key is found, we need to go to the right child
index++
}
child, err := t.readNode(node.Pointer(index))
if err != nil {
return nil, err
Expand All @@ -96,7 +98,7 @@ func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]Tr
if err != nil {
return nil, err
}
return append(path, TraversalRecord{node: node, index: index, found: found, ptr: ptr}), nil
return append(path, TraversalRecord{node: node, index: index, ptr: ptr}), nil
}

func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
Expand All @@ -106,7 +108,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
}
if root == nil {
// special case, create the root as the first node
node := &BPTreeNode{Data: t.Data}
node := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
node.Keys = []ReferencedValue{key}
node.leafPointers = []MemoryPointer{value}
buf, err := node.MarshalBinary()
Expand All @@ -120,16 +122,17 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
return t.meta.SetRoot(MemoryPointer{Offset: uint64(offset), Length: uint32(len(buf))})
}

path, err := t.traverse(key.Value, root, rootOffset)
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return err
}

// insert the key into the leaf
n := path[0].node
j, _ := slices.BinarySearchFunc(n.Keys, key, func(e ReferencedValue, t ReferencedValue) int {
return bytes.Compare(e.Value, t.Value)
})
j, found := slices.BinarySearchFunc(n.Keys, key, CompareReferencedValues)
if found {
return fmt.Errorf("key already exists")
}
if j == len(n.Keys) {
n.Keys = append(n.Keys, key)
n.leafPointers = append(n.leafPointers, value)
Expand All @@ -151,7 +154,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
midKey := n.Keys[mid]

// n is the left node, m the right node
m := &BPTreeNode{Data: t.Data}
m := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
if n.leaf() {
m.leafPointers = n.leafPointers[mid:]
m.Keys = n.Keys[mid:]
Expand Down Expand Up @@ -205,7 +208,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
// the parent will be written to disk in the next iteration
} else {
// the root split, so create a new root
p := &BPTreeNode{Data: t.Data}
p := &BPTreeNode{Data: t.Data, DataParser: t.DataParser}
p.Keys = []ReferencedValue{midKey}
p.internalPointers = []uint64{
uint64(noffset), uint64(moffset),
Expand Down
101 changes: 57 additions & 44 deletions pkg/btree/bptree_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"math/rand"
"testing"
Expand Down Expand Up @@ -30,11 +31,11 @@ func TestBPTree(t *testing.T) {
}
tree := NewBPTree(p, &testMetaPage{})
// find a key that doesn't exist
_, found, err := tree.Find([]byte("hello"))
k, _, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if found {
if len(k.Value) != 0 {
t.Fatal("expected not found")
}
})
Expand All @@ -49,11 +50,11 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find([]byte("hello"))
k, v, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v.Offset != 1 {
Expand All @@ -74,21 +75,21 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("world")}, MemoryPointer{Offset: 2}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
Expand All @@ -115,41 +116,41 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("cooow")}, MemoryPointer{Offset: 4}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
t.Fatalf("expected value 2, got %d", v2)
}
v3, f3, err := tree.Find([]byte("moooo"))
k3, v3, err := tree.Find(ReferencedValue{Value: []byte("moooo")})
if err != nil {
t.Fatal(err)
}
if !f3 {
if !bytes.Equal(k3.Value, []byte("moooo")) {
t.Fatal("expected to find key")
}
if v3.Offset != 3 {
t.Fatalf("expected value 3, got %d", v3)
}
v4, f4, err := tree.Find([]byte("cooow"))
k4, v4, err := tree.Find(ReferencedValue{Value: []byte("cooow")})
if err != nil {
t.Fatal(err)
}
if !f4 {
if !bytes.Equal(k4.Value, []byte("cooow")) {
t.Fatal("expected to find key")
}
if v4.Offset != 4 {
Expand Down Expand Up @@ -180,37 +181,45 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
})
}

t.Run("insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
func TestBPTree_SequentialInsertionTest(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
v, found, err := tree.Find(buf)
if err != nil {
t.Fatal(err)
}
if !found {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
})
}
}

type StubDataParser struct{}

func (s *StubDataParser) Parse(value []byte) []byte {
return []byte{1, 2, 3, 4, 5, 6, 7, 8}
}

func TestBPTree_RandomTests(t *testing.T) {
t.Run("random insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
Expand All @@ -234,11 +243,11 @@ func TestBPTree(t *testing.T) {
if _, err := s.Read(buf); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find(buf)
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
Expand All @@ -253,9 +262,13 @@ func TestBPTree(t *testing.T) {
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
tree := NewBPTreeWithData(p, &testMetaPage{}, make([]byte, 65536*4+8), &StubDataParser{})
for i := 0; i < 65536*4; i++ {
if err := tree.Insert(ReferencedValue{Value: []byte{1, 2, 3, 4, 5, 6, 7, 8}}, MemoryPointer{Offset: uint64(i)}); err != nil {
if err := tree.Insert(ReferencedValue{
Value: []byte{1, 2, 3, 4, 5, 6, 7, 8},
// DataPointer is used as a disambiguator.
DataPointer: MemoryPointer{Offset: uint64(i), Length: 8},
}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/btree/multi.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ func (m *LinkedMetaPage) SetRoot(mp MemoryPointer) error {
//
// Generally, passing data is required, however if the tree
// consists of only inlined values, it is not necessary.
func (m *LinkedMetaPage) BPTree(data []byte) *BPTree {
func (m *LinkedMetaPage) BPTree(data []byte, parser DataParser) *BPTree {
t := NewBPTree(m.rws, m)
if data != nil {
t.Data = data
t.DataParser = parser
}
return t
}
Expand Down
Loading
Loading