Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use offset as a disambiguator #106

Merged
merged 5 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions pkg/btree/bptree.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
"log"
"slices"
)

Expand Down Expand Up @@ -40,19 +41,20 @@ func (t *BPTree) root() (*BPTreeNode, MemoryPointer, error) {
return root, mp, nil
}

func (t *BPTree) Find(key []byte) (MemoryPointer, bool, error) {
func (t *BPTree) Find(key ReferencedValue) (ReferencedValue, MemoryPointer, error) {
friendlymatthew marked this conversation as resolved.
Show resolved Hide resolved
root, rootOffset, err := t.root()
if err != nil {
return MemoryPointer{}, false, fmt.Errorf("read root node: %w", err)
return ReferencedValue{}, MemoryPointer{}, fmt.Errorf("read root node: %w", err)
}
if root == nil {
return MemoryPointer{}, false, nil
return ReferencedValue{}, MemoryPointer{}, nil
}
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return MemoryPointer{}, false, err
return ReferencedValue{}, MemoryPointer{}, err
}
return path[0].node.Pointer(path[0].index), path[0].found, nil
log.Printf("path %#v", path)
return path[0].node.Keys[path[0].index], path[0].node.Pointer(path[0].index), nil
}

func (t *BPTree) readNode(ptr MemoryPointer) (*BPTreeNode, error) {
Expand All @@ -76,15 +78,9 @@ type TraversalRecord struct {

// traverse returns the path from root to leaf in reverse order (leaf first)
// the last element is always the node passed in
func (t *BPTree) traverse(key []byte, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key (or gte if leaf)
index, found := slices.BinarySearchFunc(node.Keys, ReferencedValue{Value: key}, func(e ReferencedValue, t ReferencedValue) int {
if cmp := bytes.Compare(e.Value, t.Value); cmp == 0 && !node.leaf() {
return -1
} else {
return cmp
}
})
func (t *BPTree) traverse(key ReferencedValue, node *BPTreeNode, ptr MemoryPointer) ([]TraversalRecord, error) {
// binary search node.Keys to find the first key greater than key
index, found := slices.BinarySearchFunc(node.Keys, key, CompareReferencedValues)

if node.leaf() {
return []TraversalRecord{{node: node, index: index, found: found, ptr: ptr}}, nil
Expand Down Expand Up @@ -122,16 +118,19 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
return t.meta.SetRoot(MemoryPointer{Offset: uint64(offset), Length: uint32(len(buf))})
}

path, err := t.traverse(key.Value, root, rootOffset)
path, err := t.traverse(key, root, rootOffset)
if err != nil {
return err
}

log.Printf("path: %v", path)

// insert the key into the leaf
n := path[0].node
j, _ := slices.BinarySearchFunc(n.Keys, key, func(e ReferencedValue, t ReferencedValue) int {
return bytes.Compare(e.Value, t.Value)
})
j, found := slices.BinarySearchFunc(n.Keys, key, CompareReferencedValues)
if found {
return fmt.Errorf("key already exists")
}
if j == len(n.Keys) {
n.Keys = append(n.Keys, key)
n.leafPointers = append(n.leafPointers, value)
Expand All @@ -147,6 +146,7 @@ func (t *BPTree) Insert(key ReferencedValue, value MemoryPointer) error {
tr := path[i]
n := tr.node
if int(n.Size()) > t.tree.PageSize() {
log.Printf("split!")
// split the node
// mid is the key that will be inserted into the parent
mid := len(n.Keys) / 2
Expand Down
94 changes: 51 additions & 43 deletions pkg/btree/bptree_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"math/rand"
"testing"
Expand Down Expand Up @@ -30,11 +31,11 @@ func TestBPTree(t *testing.T) {
}
tree := NewBPTree(p, &testMetaPage{})
// find a key that doesn't exist
_, found, err := tree.Find([]byte("hello"))
k, _, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if found {
if len(k.Value) != 0 {
t.Fatal("expected not found")
}
})
Expand All @@ -49,11 +50,11 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("hello")}, MemoryPointer{Offset: 1}); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find([]byte("hello"))
k, v, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v.Offset != 1 {
Expand All @@ -74,21 +75,21 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("world")}, MemoryPointer{Offset: 2}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
Expand All @@ -115,41 +116,41 @@ func TestBPTree(t *testing.T) {
if err := tree.Insert(ReferencedValue{Value: []byte("cooow")}, MemoryPointer{Offset: 4}); err != nil {
t.Fatal(err)
}
v1, f1, err := tree.Find([]byte("hello"))
k1, v1, err := tree.Find(ReferencedValue{Value: []byte("hello")})
if err != nil {
t.Fatal(err)
}
if !f1 {
if !bytes.Equal(k1.Value, []byte("hello")) {
t.Fatal("expected to find key")
}
if v1.Offset != 1 {
t.Fatalf("expected value 1, got %d", v1)
}
v2, f2, err := tree.Find([]byte("world"))
k2, v2, err := tree.Find(ReferencedValue{Value: []byte("world")})
if err != nil {
t.Fatal(err)
}
if !f2 {
if !bytes.Equal(k2.Value, []byte("world")) {
t.Fatal("expected to find key")
}
if v2.Offset != 2 {
t.Fatalf("expected value 2, got %d", v2)
}
v3, f3, err := tree.Find([]byte("moooo"))
k3, v3, err := tree.Find(ReferencedValue{Value: []byte("moooo")})
if err != nil {
t.Fatal(err)
}
if !f3 {
if !bytes.Equal(k3.Value, []byte("moooo")) {
t.Fatal("expected to find key")
}
if v3.Offset != 3 {
t.Fatalf("expected value 3, got %d", v3)
}
v4, f4, err := tree.Find([]byte("cooow"))
k4, v4, err := tree.Find(ReferencedValue{Value: []byte("cooow")})
if err != nil {
t.Fatal(err)
}
if !f4 {
if !bytes.Equal(k4.Value, []byte("cooow")) {
t.Fatal("expected to find key")
}
if v4.Offset != 4 {
Expand Down Expand Up @@ -180,37 +181,39 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
})
}

t.Run("insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
func TestBPTree_SequentialInsertionTest(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
for i := 0; i < 256; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
if err := tree.Insert(ReferencedValue{Value: buf}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
for i := 0; i < 16384; i++ {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(i))
v, found, err := tree.Find(buf)
if err != nil {
t.Fatal(err)
}
if !found {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
if v.Offset != uint64(i) {
t.Fatalf("expected value %d, got %d", i, v)
}
})
}
}

func TestBPTree_RandomTests(t *testing.T) {
t.Run("random insertion test", func(t *testing.T) {
b := buftest.NewSeekableBuffer()
p, err := NewPageFile(b)
Expand All @@ -234,11 +237,11 @@ func TestBPTree(t *testing.T) {
if _, err := s.Read(buf); err != nil {
t.Fatal(err)
}
v, found, err := tree.Find(buf)
k, v, err := tree.Find(ReferencedValue{Value: buf})
if err != nil {
t.Fatal(err)
}
if !found {
if !bytes.Equal(k.Value, buf) {
t.Fatalf("expected to find key %d", i)
}
if v.Offset != uint64(i) {
Expand All @@ -254,8 +257,13 @@ func TestBPTree(t *testing.T) {
t.Fatal(err)
}
tree := NewBPTree(p, &testMetaPage{})
tree.Data = make([]byte, 65536*4+8)
for i := 0; i < 65536*4; i++ {
if err := tree.Insert(ReferencedValue{Value: []byte{1, 2, 3, 4, 5, 6, 7, 8}}, MemoryPointer{Offset: uint64(i)}); err != nil {
if err := tree.Insert(ReferencedValue{
Value: []byte{1, 2, 3, 4, 5, 6, 7, 8},
// DataPointer is used as a disambiguator.
DataPointer: MemoryPointer{Offset: uint64(i), Length: 8},
}, MemoryPointer{Offset: uint64(i)}); err != nil {
t.Fatal(err)
}
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/btree/node.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package btree

import (
"bytes"
"encoding/binary"
"fmt"
"io"
Expand All @@ -17,10 +18,25 @@ type ReferencedValue struct {
// value is taken to be unreferenced and is stored directly in the node.
// if it is set, the value is used for comparison but the value is stored
// as a reference to the DataPointer.
//
// caveat: DataPointer is used as a disambiguator for the value. the b+ tree
// implementation does not support duplicate keys and uses the DataPointer
// to disambiguate between keys that compare as equal.
DataPointer MemoryPointer
Value []byte
}

func CompareReferencedValues(a, b ReferencedValue) int {
cmp := bytes.Compare(a.Value, b.Value)
if cmp != 0 {
return cmp
}
if a.DataPointer.Offset != b.DataPointer.Offset {
return int(a.DataPointer.Offset - b.DataPointer.Offset)
}
return int(a.DataPointer.Length - b.DataPointer.Length)
}

type BPTreeNode struct {
Data []byte
// contains the offset of the child node or the offset of the record for leaf
Expand Down
9 changes: 5 additions & 4 deletions pkg/handlers/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"testing"

"github.com/kevmo314/appendable/pkg/appendable"
"github.com/kevmo314/appendable/pkg/btree"
"github.com/kevmo314/appendable/pkg/buftest"
)

Expand Down Expand Up @@ -98,26 +99,26 @@
t.Errorf("got len(i.Indexes) = %d, want 1", len(collected))
}

mp1, found, err := collected[0].BPTree(r2).Find([]byte("test1"))
mp1, found, err := collected[0].BPTree(r2).Find(btree.ReferencedValue{Value: []byte("test1")})
if err != nil {
t.Fatal(err)
}
if !found {

Check failure on line 106 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

invalid operation: operator ! not defined on found (variable of type btree.MemoryPointer)
t.Errorf("got i.Indexes[0].BPTree().Find(test1) = nil, want non-nil")
}
if mp1.Offset != uint64(len("test\n")) || mp1.Length != uint32(len("test1")) {

Check failure on line 109 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp1.Offset undefined (type btree.ReferencedValue has no field or method Offset)

Check failure on line 109 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp1.Length undefined (type btree.ReferencedValue has no field or method Length)
t.Errorf("got i.Indexes[0].BPTree().Find(\"test1\") = %+v, want {%d, %d}", mp1, len("test\n"), len("test1"))
}

mp2, found, err := collected[0].BPTree(r2).Find([]byte("test2"))
mp2, found, err := collected[0].BPTree(r2).Find(btree.ReferencedValue{Value: []byte("test2")})
if err != nil {
t.Fatal(err)
}
if !found {

Check failure on line 117 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

invalid operation: operator ! not defined on found (variable of type btree.MemoryPointer)
t.Errorf("got i.Indexes[0].BPTree().Find(\"test2\") = nil, want non-nil")
}

if mp2.Offset != uint64(len("test\ntest1\n")) || mp2.Length != uint32(len("test2")) {

Check failure on line 121 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp2.Offset undefined (type btree.ReferencedValue has no field or method Offset)

Check failure on line 121 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp2.Length undefined (type btree.ReferencedValue has no field or method Length)
t.Errorf("got i.Indexes[0].BPTree().Find(\"test2\") = %+v, want {%d, %d}", mp2, len("test\ntest1\n"), len("test2"))
}
})
Expand Down Expand Up @@ -157,14 +158,14 @@
t.Errorf("got len(i.Indexes) = %d, want 1", len(collected))
}

mp1, found, err := collected[0].BPTree(r2).Find([]byte("test1"))
mp1, found, err := collected[0].BPTree(r2).Find(btree.ReferencedValue{Value: []byte("test1")})
if err != nil {
t.Fatal(err)
}
if !found {

Check failure on line 165 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

invalid operation: operator ! not defined on found (variable of type btree.MemoryPointer)
t.Errorf("got i.Indexes[0].BPTree().Find(\"test1\") = nil, want non-nil")
}
if mp1.Offset != uint64(len("test\n")) || mp1.Length != uint32(len("test1")) {

Check failure on line 168 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp1.Offset undefined (type btree.ReferencedValue has no field or method Offset)

Check failure on line 168 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

mp1.Length undefined (type btree.ReferencedValue has no field or method Length)
t.Errorf("got i.Indexes[0].BPTree().Find(\"test1\") = %+v, want {%d, %d}", mp1, len("test\n"), len("test1"))
}

Expand All @@ -182,11 +183,11 @@

v2 := make([]byte, 8)
binary.BigEndian.PutUint64(v2, math.Float64bits(123))
mp2, found, err := collected[1].BPTree(r2).Find(v2)
mp2, found, err := collected[1].BPTree(r2).Find(btree.ReferencedValue{Value: v2})
if err != nil {
t.Fatal(err)
}
if !found {

Check failure on line 190 in pkg/handlers/csv_test.go

View workflow job for this annotation

GitHub Actions / go-test (ubuntu-latest)

invalid operation: operator ! not defined on found (variable of type btree.MemoryPointer)
t.Errorf("got i.Indexes[1].BPTree().Find(\"test3\") = nil, want non-nil")
}
if mp2.Offset != uint64(len("test\ntest1\n")) || mp2.Length != uint32(len("123")) {
Expand Down
Loading
Loading