Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use offset as a disambiguator #106

Merged
merged 5 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 21 additions & 18 deletions pkg/btree/bptree.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
"log"
"slices"
)

Expand Down Expand Up @@ -40,19 +41,20 @@ func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
return root, mp, nil
}

func (t *BPTree) Find(key []byte) (MemoryPointer, bool, error) {
func (t *BPTree) Find(key ReferencedValue) (ReferencedValue, MemoryPointer, error) {
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
root, rootOffset, err := t.root()
if err != nil {
return MemoryPointer{}, false, fmt.Errorf("read root node: %w", err)
return ReferencedValue{}, MemoryPointer{}, fmt.Errorf("read root node: %w", err)
}
if root == nil {
return MemoryPointer{}, false, nil
return ReferencedValue{}, MemoryPointer{}, nil
}
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return MemoryPointer{}, false, err
return ReferencedValue{}, MemoryPointer{}, err
}
return path[0].node.Pointer(path[0].index), path[0].found, nil
log.Printf("path %#v", path)
return path[0].node.Keys[path[0].index], path[0].node.Pointer(path[0].index), nil
}

func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
Expand All @@ -76,15 +78,9 @@ type TraversalRecord struct {

// traverse returns the path from root to leaf in reverse order (leaf first)
// the last element is always the node passed in
func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key (or gte if leaf)
index, found := slices.BinarySearchFunc(node.Keys, ReferencedValue{Value: key}, func(e ReferencedValue, t ReferencedValue) int {
if cmp := bytes.Compare(e.Value, t.Value); cmp == 0 && !node.leaf() {
return -1
} else {
return cmp
}
})
func (t *BPTree) traverse(key ReferencedValue, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key
index, found := slices.BinarySearchFunc(node.Keys, key, CompareReferencedValues)

if node.leaf() {
return []TraversalRecord{{node: node, index: index, found: found, ptr: ptr}}, nil
Expand All @@ -102,6 +98,7 @@ func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]Tr
}

func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
fmt.Printf("\ninsert parameters look like \nkey: %v\nvalue: %v\n\n", key, value)
root, rootOffset, err := t.root()
if err != nil {
return fmt.Errorf("read root node: %w", err)
Expand All @@ -122,16 +119,21 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
return t.meta.SetRoot(MemoryPointer{Offset: uint64(offset), Length: uint32(len(buf))})
}

path, err := t.traverse(key.Value, root, rootOffset)
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return err
}

log.Printf("path: %v\n", path)

// insert the key into the leaf
n := path[0].node
j, _ := slices.BinarySearchFunc(n.Keys, key, func(e ReferencedValue, t ReferencedValue) int {
return bytes.Compare(e.Value, t.Value)
})
fmt.Printf("keys %v and key %v", n.Keys, key)
j, found := slices.BinarySearchFunc(n.Keys, key, CompareReferencedValues)
fmt.Printf("binary search results: j: %v and found %v", j, found)
if found {
return fmt.Errorf("key already exists")
}
if j == len(n.Keys) {
n.Keys = append(n.Keys, key)
n.leafPointers = append(n.leafPointers, value)
Expand All @@ -147,6 +149,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
tr := path[i]
n := tr.node
if int(n.Size()) > t.tree.PageSize() {
log.Printf("split!")
// split the node
// mid is the key that will be inserted into the parent
mid := len(n.Keys) / 2
Expand Down
94 changes: 51 additions & 43 deletions pkg/btree/bptree_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"math/rand"
"testing"
Expand Down Expand Up @@ -30,11 +31,11 @@ func TestBPTree(t *testing.T) {
}
tree := NewBPTree(p, &testMetaPage{})
// find a key that doesn't exist
_, found, err := tree.Find([]byte("hello"))
k, _, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if found {
if len(k.Value) != 0 {
t.Fatal("expected not found")
}
})
Expand All @@ -49,11 +50,11 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find([]byte("hello"))
k, v, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v.Offset != 1 {
Expand All @@ -74,21 +75,21 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("world")}, MemoryPointer{Offset: 2}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
Expand All @@ -115,41 +116,41 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("cooow")}, MemoryPointer{Offset: 4}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
t.Fatalf("expected value 2, got %d", v2)
}
v3, f3, err := tree.Find([]byte("moooo"))
k3, v3, err := tree.Find(ReferencedValue{Value: []byte("moooo")})
if err != nil {
t.Fatal(err)
}
if !f3 {
if !bytes.Equal(k3.Value, []byte("moooo")) {
t.Fatal("expected to find key")
}
if v3.Offset != 3 {
t.Fatalf("expected value 3, got %d", v3)
}
v4, f4, err := tree.Find([]byte("cooow"))
k4, v4, err := tree.Find(ReferencedValue{Value: []byte("cooow")})
if err != nil {
t.Fatal(err)
}
if !f4 {
if !bytes.Equal(k4.Value, []byte("cooow")) {
t.Fatal("expected to find key")
}
if v4.Offset != 4 {
Expand Down Expand Up @@ -180,37 +181,39 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
})
}

t.Run("insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
func TestBPTree_SequentialInsertionTest(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
v, found, err := tree.Find(buf)
if err != nil {
t.Fatal(err)
}
if !found {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
})
}
}

func TestBPTree_RandomTests(t *testing.T) {
t.Run("random insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
Expand All @@ -234,11 +237,11 @@ func TestBPTree(t *testing.T) {
if _, err := s.Read(buf); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find(buf)
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
Expand All @@ -254,8 +257,13 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
tree.Data = make([]byte, 65536*4+8)
for i := 0; i < 65536*4; i++ {
if err := tree.Insert(ReferencedValue{Value: []byte{1, 2, 3, 4, 5, 6, 7, 8}}, MemoryPointer{Offset: uint64(i)}); err != nil {
if err := tree.Insert(ReferencedValue{
Value: []byte{1, 2, 3, 4, 5, 6, 7, 8},
// DataPointer is used as a disambiguator.
DataPointer: MemoryPointer{Offset: uint64(i), Length: 8},
}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/btree/node.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"fmt"
"io"
Expand All @@ -17,10 +18,27 @@ type ReferencedValue struct {
// value is taken to be unreferenced and is stored directly in the node.
// if it is set, the value is used for comparison but the value is stored
// as a reference to the DataPointer.
//
// caveat: DataPointer is used as a disambiguator for the value. the b+ tree
// implementation does not support duplicate keys and uses the DataPointer
// to disambiguate between keys that compare as equal.
DataPointer MemoryPointer
Value []byte
}

func CompareReferencedValues(a, b ReferencedValue) int {
cmp := bytes.Compare(a.Value, b.Value)
if cmp != 0 {
return cmp
}

fmt.Printf("the offsets to compare are: %v %v\n", a.DataPointer.Offset, b.DataPointer.Offset)
if a.DataPointer.Offset != b.DataPointer.Offset {
return int(a.DataPointer.Offset - b.DataPointer.Offset)
}
return int(a.DataPointer.Length - b.DataPointer.Length)
}

type BPTreeNode struct {
Data []byte
// contains the offset of the child node or the offset of the record for leaf
Expand Down
Loading
Loading