Skip to content

Commit

Permalink
manifest: more accurate in-use key ranges
Browse files Browse the repository at this point in the history
We now take into account the exclusivity of the largest key, with more
accurate ("tighter") resulting ranges.
  • Loading branch information
RaduBerinde committed Apr 23, 2024
1 parent 1d37865 commit 28ba805
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 72 deletions.
12 changes: 6 additions & 6 deletions internal/manifest/l0_sublevels.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ type intervalKey struct {
isInclusiveEndBound bool
}

func (k *intervalKey) toEndBoundary() base.UserKeyBoundary {
return base.UserKeyExclusiveIf(k.key, !k.isInclusiveEndBound)
}

// intervalKeyTemp is used in the sortAndSweep step. It contains additional metadata
// which is used to generate the {min,max}IntervalIndex for files.
type intervalKeyTemp struct {
Expand Down Expand Up @@ -943,19 +947,15 @@ func (s *L0Sublevels) InUseKeyRanges(smallest, largest []byte) []base.UserKeyBou
// maxIdx starts. We must set curr.End now, before making that leap,
// because this iteration may be the last.
i = maxIdx
curr.End.Key = s.orderedIntervals[i+1].startKey.key
// TODO(radu): make the kind more accurate.
curr.End.Kind = base.Inclusive
curr.End = s.orderedIntervals[i+1].startKey.toEndBoundary()
continue
}

// No files overlapping with this interval overlap with the next
// interval. Update the current end to be the next interval's start key.
// Note that curr is not necessarily finished, because there may be an
// abutting non-empty interval.
curr.End.Key = s.orderedIntervals[i+1].startKey.key
// TODO(radu): make the kind more accurate.
curr.End.Kind = base.Inclusive
curr.End = s.orderedIntervals[i+1].startKey.toEndBoundary()
i++
}
return keyRanges
Expand Down
13 changes: 6 additions & 7 deletions internal/manifest/manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,17 @@ func TestInuseKeyRangesRandomized(t *testing.T) {
// this file extends before or after smallest/largest, truncate
// it to be within [smallest,largest] for the purpose of
// correctness checking.
fileSmallest := f.Smallest.UserKey
fileLargest := f.Largest.UserKey
if cmp(fileSmallest, smallest) < 0 {
fileSmallest = smallest
b := f.UserKeyBounds()
if cmp(b.Start, smallest) < 0 {
b.Start = smallest
}
if cmp(fileLargest, largest) > 0 {
fileLargest = largest
if cmp(b.End.Key, largest) >= 0 {
b.End = base.UserKeyInclusive(largest)
}

var containedWithin bool
for _, kr := range keyRanges {
containedWithin = containedWithin || (cmp(fileSmallest, kr.Start) >= 0 && kr.End.IsUpperBoundFor(cmp, fileLargest))
containedWithin = containedWithin || kr.ContainsBounds(cmp, &b)
}
if !containedWithin {
t.Fatalf("file L%d.%s overlaps [%s, %s] but no in-use key range contains it",
Expand Down
37 changes: 24 additions & 13 deletions internal/manifest/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,7 @@ func (v *Version) CalculateInuseKeyRanges(
// we can seek to the accumulated range's end. Otherwise, we need to
// start at the first overlapping file within the level.
if currAccum != nil && v.cmp.Compare(currAccum.Start, smallest) <= 0 {
currFile = seekGT(&iter, cmp, currAccum.End.Key)
currFile = seekGT(&iter, cmp, currAccum.End)
} else {
currFile = iter.First()
}
Expand All @@ -1355,8 +1355,7 @@ func (v *Version) CalculateInuseKeyRanges(
case currAccum == nil || (currFile != nil && cmp(currFile.Largest.UserKey, currAccum.Start) < 0):
// This file is strictly before the current accumulated range,
// or there are no more accumulated ranges.
// TODO(radu): refine the boundary type.
output = append(output, base.UserKeyBoundsInclusive(currFile.Smallest.UserKey, currFile.Largest.UserKey))
output = append(output, currFile.UserKeyBounds())
currFile = iter.Next()
case currFile == nil || (currAccum != nil && cmp(currAccum.End.Key, currFile.Smallest.UserKey) < 0):
// The current accumulated key range is strictly before the
Expand All @@ -1369,34 +1368,46 @@ func (v *Version) CalculateInuseKeyRanges(
default:
// The current accumulated range and the current file overlap.
// Adjust the accumulated range to be the union.
if cmp(currFile.Smallest.UserKey, currAccum.Start) < 0 {
currAccum.Start = currFile.Smallest.UserKey
fileBounds := currFile.UserKeyBounds()
if cmp(fileBounds.Start, currAccum.Start) < 0 {
currAccum.Start = fileBounds.Start
}
if cmp(currFile.Largest.UserKey, currAccum.End.Key) > 0 {
currAccum.End.Key = currFile.Largest.UserKey
if fileBounds.End.IsUpperBoundFor(cmp, currAccum.End.Key) {
currAccum.End = fileBounds.End
}

// Extending `currAccum`'s end boundary may have caused it to
// overlap with `input` key ranges that we haven't processed
// yet. Merge any such key ranges.
for len(input) > 0 && cmp(input[0].Start, currAccum.End.Key) <= 0 {
if cmp(input[0].End.Key, currAccum.End.Key) > 0 {
if input[0].End.IsUpperBoundFor(cmp, currAccum.End.Key) {
currAccum.End = input[0].End
}
input = input[1:]
}
// Seek the level iterator past our current accumulated end.
currFile = seekGT(&iter, cmp, currAccum.End.Key)
currFile = seekGT(&iter, cmp, currAccum.End)
}
}
}
return output
}

func seekGT(iter *LevelIterator, cmp base.Compare, key []byte) *FileMetadata {
f := iter.SeekGE(cmp, key)
for f != nil && cmp(f.Largest.UserKey, key) == 0 {
f = iter.Next()
// seekGT seeks to the first file that ends with a boundary that is after the
// given boundary. Specifically:
// - if boundary.End is inclusive, the returned file ending boundary is strictly
// greater than boundary.End.Key
// - if boundary.End is exclusive, the returned file ending boundary is either
// greater than boundary.End.Key, or it's inclusive at boundary.End.Key.
func seekGT(iter *LevelIterator, cmp base.Compare, boundary base.UserKeyBoundary) *FileMetadata {
f := iter.SeekGE(cmp, boundary.Key)
if f == nil {
return nil
}
// If boundary is inclusive or the file boundary is exclusive we do not
// tolerate an equal largest key.
if (boundary.Kind == base.Inclusive || f.Largest.IsExclusiveSentinel()) && cmp(boundary.Key, f.Largest.UserKey) == 0 {
return iter.Next()
}
return f
}
Expand Down
2 changes: 1 addition & 1 deletion testdata/compaction_elide_tombstone
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,4 @@ elideTombstone("a") = true
elideTombstone("b") = true
elideTombstone("g") = false
elideTombstone("goo") = false
elideTombstone("z") = false
elideTombstone("z") = true
90 changes: 45 additions & 45 deletions testdata/compaction_inuse_key_ranges
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ L0 g h
L1 a b
----
L0 a-b: [a, b]
L0 c-d: [d, e]
L0 c-d: [d, e)
L0 g-h: .
L1 a-b: .

Expand Down Expand Up @@ -113,9 +113,9 @@ L0 a z
L0 a c
L0 g z
----
L0 a-z: [a, b] [c, d] [f, g] [i, j]
L0 a-c: [a, b] [c, d]
L0 g-z: [i, j]
L0 a-z: [a, b) [c, d) [f, g) [i, j)
L0 a-c: [a, b) [c, d)
L0 g-z: [i, j)

define
L1
Expand All @@ -139,7 +139,7 @@ L6:
inuse-key-ranges
L0 a z
----
L0 a-z: [a, j] [k, z]
L0 a-z: [a, j) [k, z)

define
L0
Expand All @@ -164,7 +164,7 @@ L6:
inuse-key-ranges
L0 a z
----
L0 a-z: [a, j] [k, z]
L0 a-z: [a, j) [k, z)

define
L0
Expand Down Expand Up @@ -196,10 +196,10 @@ L0 q r
L0 1 2
L0 ddd dddd
----
L0 a-z: [a, dd] [e, p]
L0 e-p: [e, p]
L0 e-f: [e, m]
L0 b-c: [b, dd]
L0 a-z: [a, dd) [e, p)
L0 e-p: [e, p)
L0 e-f: [e, m)
L0 b-c: [b, dd)
L0 q-r: .
L0 1-2: .
L0 ddd-dddd: .
Expand Down Expand Up @@ -240,13 +240,13 @@ L5 mm zz
L5 l x
L5 l zz
----
L5 a-z: [m, z]
L5 a-z: [m, z)
L5 a-b: .
L5 m-z: [m, z]
L5 m-zz: [m, z]
L5 mm-zz: [m, z]
L5 l-x: [m, z]
L5 l-zz: [m, z]
L5 m-z: [m, z)
L5 m-zz: [m, z)
L5 mm-zz: [m, z)
L5 l-x: [m, z)
L5 l-zz: [m, z)

inuse-key-ranges
L3 a z
Expand All @@ -255,21 +255,21 @@ L3 k m
L3 l ll
L3 b n
----
L3 a-z: [f, k] [m, z]
L3 f-k: [f, k]
L3 k-m: [m, z]
L3 a-z: [f, k) [m, z)
L3 f-k: [f, k)
L3 k-m: [m, z)
L3 l-ll: .
L3 b-n: [f, k] [m, z]
L3 b-n: [f, k) [m, z)

inuse-key-ranges
L2 a z
----
L2 a-z: [b, c] [f, k] [m, z]
L2 a-z: [b, c) [f, k) [m, z)

inuse-key-ranges
L1 a z
----
L1 a-z: [b, d] [f, k] [m, z]
L1 a-z: [b, d) [f, k) [m, z)

inuse-key-ranges
L0 a z
Expand All @@ -278,11 +278,11 @@ L0 a b
L0 bb bc
L0 f k
----
L0 a-z: [a, k] [m, z]
L0 a-k: [a, k]
L0 a-b: [a, c]
L0 bb-bc: [b, c]
L0 f-k: [d, k]
L0 a-z: [a, k) [m, z)
L0 a-k: [a, k)
L0 a-b: [a, c)
L0 bb-bc: [b, c)
L0 f-k: [d, k)

define
L1
Expand Down Expand Up @@ -321,16 +321,16 @@ L0 p z
L0 pp z
L0 oo z
----
L3 a-z: [a, f] [w, z]
L2 a-z: [a, k] [s, z]
L1 a-z: [a, n] [o, z]
L0 a-z: [a, z]
L0 a-n: [a, p]
L0 a-mm: [a, p]
L0 a-nn: [a, p]
L0 p-z: [o, z]
L0 pp-z: [o, z]
L0 oo-z: [m, z]
L3 a-z: [a, f) [w, z)
L2 a-z: [a, k) [s, z)
L1 a-z: [a, n) [o, z)
L0 a-z: [a, z)
L0 a-n: [a, p)
L0 a-mm: [a, p)
L0 a-nn: [a, p)
L0 p-z: [o, z)
L0 pp-z: [o, z)
L0 oo-z: [m, z)

define
L1
Expand All @@ -351,8 +351,8 @@ inuse-key-ranges
L0 a c
L0 a cc
----
L0 a-c: [a, c]
L0 a-cc: [a, c] [cc, cca]
L0 a-c: [a, c)
L0 a-cc: [a, c) [cc, cca)

define
L1
Expand All @@ -379,10 +379,10 @@ L0 a cc
L0 a d
L0 c c
----
L0 a-c: [a, ca]
L0 a-cc: [a, d]
L0 a-d: [a, d]
L0 c-c: [c, ca]
L0 a-c: [a, ca)
L0 a-cc: [a, d)
L0 a-d: [a, d)
L0 c-c: [c, ca)

define
L0
Expand Down Expand Up @@ -415,5 +415,5 @@ inuse-key-ranges
L0 a z
L1 a z
----
L0 a-z: [a, aa] [b, ba] [bb, bba] [c, ca] [d, i]
L1 a-z: [b, ba] [bb, bba] [c, ca] [e, ea]
L0 a-z: [a, aa) [b, ba) [bb, bba) [c, ca) [d, i)
L1 a-z: [b, ba) [bb, bba) [c, ca) [e, ea)

0 comments on commit 28ba805

Please sign in to comment.