From 41145ee9d232340747c0c4bf11154bd0ba18cbcf Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 2 Oct 2024 16:28:50 -0400 Subject: [PATCH] sstable: populate range deletion, range key props in RawColumnWriter Update RawColumnWriter to update sstable properties when writing range deletions and range keys. The properties are computed differently in columnar sstables, because the old accounting was tightly coupled with the encoding. --- sstable/colblk_writer.go | 28 ++++++ sstable/properties.go | 9 +- .../testdata/columnar_writer/simple_binary | 94 ++++++++++--------- sstable/testdata/writer_v5 | 53 ++++++++++- sstable/writer_test.go | 3 + 5 files changed, 137 insertions(+), 50 deletions(-) diff --git a/sstable/colblk_writer.go b/sstable/colblk_writer.go index bbcc10d812..85de5620d8 100644 --- a/sstable/colblk_writer.go +++ b/sstable/colblk_writer.go @@ -258,6 +258,34 @@ func (w *RawColumnWriter) EncodeSpan(span keyspan.Span) error { blockWriter := &w.rangeKeyBlock if span.Keys[0].Kind() == base.InternalKeyKindRangeDelete { blockWriter = &w.rangeDelBlock + // Update range delete properties. + // NB: These properties are computed differently than the rowblk sstable + // writer because this writer does not flatten them into row key-value + // pairs. + w.props.RawKeySize += uint64(len(span.Start) + len(span.End)) + count := uint64(len(span.Keys)) + w.props.NumEntries += count + w.props.NumDeletions += count + w.props.NumRangeDeletions += count + } else { + // Update range key properties. + // NB: These properties are computed differently than the rowblk sstable + // writer because this writer does not flatten them into row key-value + // pairs. + w.props.RawRangeKeyKeySize += uint64(len(span.Start) + len(span.End)) + for _, k := range span.Keys { + w.props.RawRangeKeyValueSize += uint64(len(k.Value)) + switch k.Kind() { + case base.InternalKeyKindRangeKeyDelete: + w.props.NumRangeKeyDels++ + case base.InternalKeyKindRangeKeySet: + w.props.NumRangeKeySets++ + case base.InternalKeyKindRangeKeyUnset: + w.props.NumRangeKeyUnsets++ + default: + panic(errors.Errorf("pebble: invalid range key type: %s", k.Kind())) + } + } } if !w.disableKeyOrderChecks && blockWriter.KeyCount() > 0 { // Check that spans are being added in fragmented order. If the two diff --git a/sstable/properties.go b/sstable/properties.go index 9e10b82361..45eef946c3 100644 --- a/sstable/properties.go +++ b/sstable/properties.go @@ -11,6 +11,7 @@ import ( "math" "reflect" "sort" + "strings" "unsafe" "github.com/cockroachdb/pebble/internal/intern" @@ -254,7 +255,13 @@ func (p *Properties) String() string { } sort.Strings(keys) for _, key := range keys { - fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key]) + // If there are characters outside of the printable ASCII range, print + // the value in hexadecimal. + if strings.IndexFunc(p.UserProperties[key], func(r rune) bool { return r < ' ' || r > '~' }) != -1 { + fmt.Fprintf(&buf, "%s: hex:%x\n", key, p.UserProperties[key]) + } else { + fmt.Fprintf(&buf, "%s: %s\n", key, p.UserProperties[key]) + } } return buf.String() } diff --git a/sstable/testdata/columnar_writer/simple_binary b/sstable/testdata/columnar_writer/simple_binary index 92dc67b768..e8abe76427 100644 --- a/sstable/testdata/columnar_writer/simple_binary +++ b/sstable/testdata/columnar_writer/simple_binary @@ -175,7 +175,7 @@ rocksdb.block.based.table.index.type: 0 rocksdb.merge.operator: pebble.concatenate rocksdb.merge.operands: 0 rocksdb.property.collectors: [obsolete-key] -obsolete-key: +obsolete-key: hex:00 build block-size=150 a.SET.1:apple @@ -1108,7 +1108,7 @@ rocksdb.block.based.table.index.type: 0 rocksdb.merge.operator: pebble.concatenate rocksdb.merge.operands: 0 rocksdb.property.collectors: [obsolete-key] -obsolete-key: +obsolete-key: hex:00 layout ---- @@ -1856,18 +1856,18 @@ describe-binary 275-295: x 745f62797465733d303b207a7374645f6d61785f # t_bytes=0; zstd_max_ 295-315: x 747261696e5f62797465733d303b20656e61626c # train_bytes=0; enabl 315-335: x 65643d303b20080901646174612e73697a650009 # ed=0; ...data.size.. -335-355: x 0b01656c657465642e6b65797300080b0166696c # ..eleted.keys....fil +335-355: x 0b01656c657465642e6b65797301080b0166696c # ..eleted.keys....fil 355-375: x 7465722e73697a6500080a01696e6465782e7369 # ter.size....index.si 375-395: x 7a6521080e016d657267652e6f706572616e6473 # ze!...merge.operands 395-415: x 00130312746f72706562626c652e636f6e636174 # ....torpebble.concat 415-435: x 656e617465080f016e756d2e646174612e626c6f # enate...num.data.blo -435-455: x 636b73000c0701656e7472696573000c0f017261 # cks....entries....ra +435-455: x 636b73000c0701656e7472696573010c0f017261 # cks....entries....ra 455-475: x 6e67652d64656c6574696f6e730108130e70726f # nge-deletions....pro 475-495: x 70657274792e636f6c6c6563746f72735b6f6273 # perty.collectors[obs 495-515: x 6f6c6574652d6b65795d080c017261772e6b6579 # olete-key]...raw.key -515-535: x 2e73697a65000c0a0176616c75652e73697a6500 # .size....value.size. +515-535: x 2e73697a65020c0a0176616c75652e73697a6500 # .size....value.size. 535-543: x 0000000001000000 # ........ -543-548: x 00dbad0e95 # properties block trailer +543-548: x 0082ba1e47 # properties block trailer # block 3 meta-index (0548-0607) 548-568: x 001203726f636b7364622e70726f706572746965 # meta-index 568-588: x 735fc003001202726f636b7364622e72616e6765 # (continued...) @@ -1976,43 +1976,47 @@ describe-binary 100-100: x # data[1]: 100-101: x 00 # block padding byte 101-106: x 00e75b3245 # range-key block trailer -# block 2 properties (0106-0554) -106-126: x 000c026f62736f6c6574652d6b65790074002404 # ...obsolete-key.t.$. -126-146: x 726f636b7364622e626c6f636b2e62617365642e # rocksdb.block.based. -146-166: x 7461626c652e696e6465782e7479706500000000 # table.index.type.... -166-186: x 080a18636f6d70617261746f72706562626c652e # ...comparatorpebble. -186-206: x 696e7465726e616c2e746573746b6579730c070d # internal.testkeys... -206-226: x 72657373696f6e4e6f436f6d7072657373696f6e # ressionNoCompression -226-246: x 13085f5f6f7074696f6e7377696e646f775f6269 # ..__optionswindow_bi -246-266: x 74733d2d31343b206c6576656c3d33323736373b # ts=-14; level=32767; -266-286: x 2073747261746567793d303b206d61785f646963 # strategy=0; max_dic -286-306: x 745f62797465733d303b207a7374645f6d61785f # t_bytes=0; zstd_max_ -306-326: x 747261696e5f62797465733d303b20656e61626c # train_bytes=0; enabl -326-346: x 65643d303b20080901646174612e73697a650009 # ed=0; ...data.size.. -346-366: x 0b01656c657465642e6b65797300080b0166696c # ..eleted.keys....fil -366-386: x 7465722e73697a6500080a01696e6465782e7369 # ter.size....index.si -386-406: x 7a6521080e016d657267652e6f706572616e6473 # ze!...merge.operands -406-426: x 00130312746f72706562626c652e636f6e636174 # ....torpebble.concat -426-446: x 656e617465080f016e756d2e646174612e626c6f # enate...num.data.blo -446-466: x 636b73000c0701656e7472696573000c0f017261 # cks....entries....ra -466-486: x 6e67652d64656c6574696f6e730008130e70726f # nge-deletions....pro -486-506: x 70657274792e636f6c6c6563746f72735b6f6273 # perty.collectors[obs -506-526: x 6f6c6574652d6b65795d080c017261772e6b6579 # olete-key]...raw.key -526-546: x 2e73697a65000c0a0176616c75652e73697a6500 # .size....value.size. -546-554: x 0000000001000000 # ........ -554-559: x 00eb027e31 # properties block trailer -# block 3 meta-index (0559-0616) -559-579: x 001002706562626c652e72616e67655f6b657921 # meta-index -579-599: x 44001203726f636b7364622e70726f7065727469 # (continued...) -599-616: x 65736ac003000000001500000002000000 # (continued...) -616-621: x 006d0a7327 # meta-index block trailer +# block 2 properties (0106-0641) +106-126: x 000c026f62736f6c6574652d6b65790074001901 # ...obsolete-key.t... +126-146: x 706562626c652e6e756d2e72616e67652d6b6579 # pebble.num.range-key +146-166: x 2d64656c73011504017365747301150601756e73 # -dels....sets....uns +166-186: x 657473000716017261772e72616e67652d6b6579 # ets....raw.range-key +186-206: x 2e6b65792e73697a6504150a0176616c75652e73 # .key.size....value.s +206-226: x 697a6503002404726f636b7364622e626c6f636b # ize..$.rocksdb.block +226-246: x 2e62617365642e7461626c652e696e6465782e74 # .based.table.index.t +246-266: x 79706500000000080a18636f6d70617261746f72 # ype.......comparator +266-286: x 706562626c652e696e7465726e616c2e74657374 # pebble.internal.test +286-306: x 6b6579730c070d72657373696f6e4e6f436f6d70 # keys...ressionNoComp +306-326: x 72657373696f6e13085f5f6f7074696f6e737769 # ression..__optionswi +326-346: x 6e646f775f626974733d2d31343b206c6576656c # ndow_bits=-14; level +346-366: x 3d33323736373b2073747261746567793d303b20 # =32767; strategy=0; +366-386: x 6d61785f646963745f62797465733d303b207a73 # max_dict_bytes=0; zs +386-406: x 74645f6d61785f747261696e5f62797465733d30 # td_max_train_bytes=0 +406-426: x 3b20656e61626c65643d303b2008090164617461 # ; enabled=0; ...data +426-446: x 2e73697a6500090b01656c657465642e6b657973 # .size....eleted.keys +446-466: x 00080b0166696c7465722e73697a6500080a0169 # ....filter.size....i +466-486: x 6e6465782e73697a6521080e016d657267652e6f # ndex.size!...merge.o +486-506: x 706572616e647300130312746f72706562626c65 # perands....torpebble +506-526: x 2e636f6e636174656e617465080f016e756d2e64 # .concatenate...num.d +526-546: x 6174612e626c6f636b73000c0701656e74726965 # ata.blocks....entrie +546-566: x 73000c0f0172616e67652d64656c6574696f6e73 # s....range-deletions +566-586: x 0008130e70726f70657274792e636f6c6c656374 # ....property.collect +586-606: x 6f72735b6f62736f6c6574652d6b65795d080c01 # ors[obsolete-key]... +606-626: x 7261772e6b65792e73697a65000c0a0176616c75 # raw.key.size....valu +626-641: x 652e73697a65000000000001000000 # e.size......... +641-646: x 0087d3f282 # properties block trailer +# block 3 meta-index (0646-0703) +646-666: x 001002706562626c652e72616e67655f6b657921 # meta-index +666-686: x 44001203726f636b7364622e70726f7065727469 # (continued...) +686-703: x 65736a9704000000001500000002000000 # (continued...) +703-708: x 00e7b6e7c9 # meta-index block trailer # sstable footer -621-622: x 01 # checksum type -622-624: x af04 # uvarint(559): metaindex.Offset -624-625: x 39 # uvarint(57): metaindex.Length -625-626: x 00 # uvarint(0): index.Offset -626-627: x 1c # uvarint(28): index.Length -627-647: x 0000000000000000000000000000000000000000 # padding -647-662: x 000000000000000000000000000000 # (continued...) -662-666: x 05000000 # table version -666-674: x f09faab3f09faab3 # magic +708-709: x 01 # checksum type +709-711: x 8605 # uvarint(646): metaindex.Offset +711-712: x 39 # uvarint(57): metaindex.Length +712-713: x 00 # uvarint(0): index.Offset +713-714: x 1c # uvarint(28): index.Length +714-734: x 0000000000000000000000000000000000000000 # padding +734-749: x 000000000000000000000000000000 # (continued...) +749-753: x 05000000 # table version +753-761: x f09faab3f09faab3 # magic diff --git a/sstable/testdata/writer_v5 b/sstable/testdata/writer_v5 index 1fdcd5523e..316e32f875 100644 --- a/sstable/testdata/writer_v5 +++ b/sstable/testdata/writer_v5 @@ -88,6 +88,26 @@ s-z:{(#1,RANGEDEL)} scan-range-key ---- +props +---- +rocksdb.num.entries: 9 +rocksdb.raw.key.size: 10 +rocksdb.raw.value.size: 0 +rocksdb.deleted.keys: 9 +rocksdb.num.range-deletions: 9 +rocksdb.num.data.blocks: 0 +rocksdb.compression: Snappy +rocksdb.compression_options: window_bits=-14; level=32767; strategy=0; max_dict_bytes=0; zstd_max_train_bytes=0; enabled=0; +rocksdb.comparator: pebble.internal.testkeys +rocksdb.data.size: 0 +rocksdb.filter.size: 0 +rocksdb.index.size: 33 +rocksdb.block.based.table.index.type: 0 +rocksdb.merge.operator: pebble.concatenate +rocksdb.merge.operands: 0 +rocksdb.property.collectors: [obsolete-key] +obsolete-key: hex:0074 + # The range tombstone upper bound is exclusive, so a point operation # on that same key will be the actual boundary. @@ -276,7 +296,32 @@ layout ---- 0 index (28) 33 range-key (84) - 122 properties (441) - 568 meta-index (57) - 630 footer (53) - 683 EOF + 122 properties (528) + 655 meta-index (57) + 717 footer (53) + 770 EOF + +props +---- +rocksdb.num.entries: 0 +rocksdb.raw.key.size: 0 +rocksdb.raw.value.size: 0 +rocksdb.deleted.keys: 0 +rocksdb.num.range-deletions: 0 +pebble.num.range-key-dels: 0 +pebble.num.range-key-sets: 3 +rocksdb.num.data.blocks: 0 +rocksdb.compression: Snappy +rocksdb.compression_options: window_bits=-14; level=32767; strategy=0; max_dict_bytes=0; zstd_max_train_bytes=0; enabled=0; +rocksdb.comparator: pebble.internal.testkeys +rocksdb.data.size: 0 +rocksdb.filter.size: 0 +rocksdb.index.size: 33 +rocksdb.block.based.table.index.type: 0 +rocksdb.merge.operator: pebble.concatenate +rocksdb.merge.operands: 0 +pebble.num.range-key-unsets: 0 +rocksdb.property.collectors: [obsolete-key] +pebble.raw.range-key.key.size: 6 +pebble.raw.range-key.value.size: 9 +obsolete-key: hex:0074 diff --git a/sstable/writer_test.go b/sstable/writer_test.go index 85314eab9a..4d0b6cb7a7 100644 --- a/sstable/writer_test.go +++ b/sstable/writer_test.go @@ -281,6 +281,9 @@ func runDataDriven(t *testing.T, file string, tableFormat TableFormat, paralleli } return formatWriterMetadata(td, meta) + case "props": + return r.Properties.String() + default: return fmt.Sprintf("unknown command: %s", td.Cmd) }