Skip to content

Commit

Permalink
sstable: refactor suffix-rewriting logic
Browse files Browse the repository at this point in the history
Refactor the suffix-rewriting logic to pull more logic behind the RawWriter
interface. Much of the existing logic peered into RawRowWriter internals.
Future work will follow up with an implementation fo the rewriteSuffixes method
for RawColumnWriter.
  • Loading branch information
jbowens committed Sep 23, 2024
1 parent 5b8552f commit 785dc8d
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 207 deletions.
7 changes: 7 additions & 0 deletions sstable/colblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,13 @@ func (w *RawColumnWriter) Close() (err error) {
return nil
}

// rewriteSuffixes implements RawWriter.
func (w *RawColumnWriter) rewriteSuffixes(
r *Reader, wo WriterOptions, from, to []byte, concurrency int,
) error {
panic("unimplemented")
}

func shouldFlushWithoutLatestKV(
sizeWithKV int,
sizeWithoutKV int,
Expand Down
8 changes: 3 additions & 5 deletions sstable/copier.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ func CopySpan(
// positives for keys in blocks of the original file that we don't copy, but
// filters can always have false positives, so this is fine.
if r.tableFilter != nil {
if w.filter != nil && r.Properties.FilterPolicyName != w.filter.policyName() {
return 0, errors.New("mismatched filters")
}
filterBlock, err := r.readFilter(ctx, rh, nil, nil)
if err != nil {
return 0, errors.Wrap(err, "reading filter")
Expand All @@ -127,11 +130,6 @@ func CopySpan(
w.props.TopLevelIndexSize = 0
w.props.IndexSize = 0
w.props.IndexType = 0
if w.filter != nil {
if err := checkWriterFilterMatchesReader(r, w); err != nil {
return 0, err
}
}

// Find the blocks that intersect our span.
blocks, err := intersectingIndexEntries(ctx, r, rh, indexH, start, end)
Expand Down
99 changes: 99 additions & 0 deletions sstable/rowblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,105 @@ func newRowWriter(writable objstorage.Writable, o WriterOptions) *RawRowWriter {
return w
}

// rewriteSuffixes implements RawWriter.
func (w *RawRowWriter) rewriteSuffixes(
r *Reader, wo WriterOptions, from, to []byte, concurrency int,
) error {
for _, c := range w.blockPropCollectors {
if !c.SupportsSuffixReplacement() {
return errors.Errorf("block property collector %s does not support suffix replacement", c.Name())
}
}
l, err := r.Layout()
if err != nil {
return errors.Wrap(err, "reading layout")
}

// Copy data blocks in parallel, rewriting suffixes as we go.
blocks, err := rewriteDataBlocksInParallel(r, wo, l.Data, from, to, concurrency, func() blockRewriter {
return rowblk.NewRewriter(r.Comparer, wo.BlockRestartInterval)
})
if err != nil {
return errors.Wrap(err, "rewriting data blocks")
}

// oldShortIDs maps the shortID for the block property collector in the old
// blocks to the shortID in the new blocks. Initialized once for the sstable.
oldShortIDs, err := getShortIDs(r, w.blockPropCollectors)
if err != nil {
return errors.Wrap(err, "getting short IDs")
}
oldProps := make([][]byte, len(w.blockPropCollectors))

for i := range blocks {
// Write the rewritten block to the file.
bh, err := w.layout.WritePrecompressedDataBlock(blocks[i].physical)
if err != nil {
return err
}

// Load any previous values for our prop collectors into oldProps.
for i := range oldProps {
oldProps[i] = nil
}
decoder := makeBlockPropertiesDecoder(len(oldProps), l.Data[i].Props)
for !decoder.Done() {
id, val, err := decoder.Next()
if err != nil {
return err
}
if oldShortIDs[id].IsValid() {
oldProps[oldShortIDs[id]] = val
}
}
for i, p := range w.blockPropCollectors {
if err := p.AddCollectedWithSuffixReplacement(oldProps[i], from, to); err != nil {
return err
}
}

bhp, err := w.maybeAddBlockPropertiesToBlockHandle(bh)
if err != nil {
return err
}
var nextKey InternalKey
if i+1 < len(blocks) {
nextKey = blocks[i+1].start
}
if err = w.addIndexEntrySync(blocks[i].end, nextKey, bhp, w.dataBlockBuf.tmp[:]); err != nil {
return err
}
}
if len(blocks) > 0 {
w.meta.Size = w.layout.offset
w.meta.updateSeqNum(blocks[0].start.SeqNum())
w.props.NumEntries = r.Properties.NumEntries
w.props.RawKeySize = r.Properties.RawKeySize
w.props.RawValueSize = r.Properties.RawValueSize
w.meta.SetSmallestPointKey(blocks[0].start)
w.meta.SetLargestPointKey(blocks[len(blocks)-1].end)
}

// Copy range key block, replacing suffixes if it exists.
if err := rewriteRangeKeyBlockToWriter(r, w, from, to); err != nil {
return errors.Wrap(err, "rewriting range key blocks")
}
// Copy over the filter block if it exists (rewriteDataBlocksToWriter will
// already have ensured this is valid if it exists).
if w.filter != nil {
if filterBlockBH, ok := l.FilterByName(w.filter.metaName()); ok {
filterBlock, _, err := readBlockBuf(r, filterBlockBH, nil)
if err != nil {
return errors.Wrap(err, "reading filter")
}
w.filter = copyFilterWriter{
origPolicyName: w.filter.policyName(), origMetaName: w.filter.metaName(), data: filterBlock,
}
}
}
return nil
}

// SetSnapshotPinnedProperties sets the properties for pinned keys. Should only
// be used internally by Pebble.
func (w *RawRowWriter) SetSnapshotPinnedProperties(
Expand Down
Loading

0 comments on commit 785dc8d

Please sign in to comment.