Skip to content

Commit

Permalink
[fix](segment cache) estimate momory consumed by segment
Browse files Browse the repository at this point in the history
The memory consumed in segment cache is 0 after
https://github.com/apache/doris/pull/35432/files.

The pr also tracks memory usage of column readers.
  • Loading branch information
dataroaring committed May 30, 2024
1 parent fad30bf commit dddd55f
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 7 deletions.
2 changes: 2 additions & 0 deletions be/src/olap/primary_key_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

namespace doris {

static bvar::Adder<size_t> g_primary_key_index_memory_bytes("doris_primary_key_index_memory_bytes");

Status PrimaryKeyIndexBuilder::init() {
// TODO(liaoxin) using the column type directly if there's only one column in unique key columns
const auto* type_info = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>();
Expand Down
8 changes: 7 additions & 1 deletion be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
namespace doris {
namespace segment_v2 {

static bvar::Adder<size_t> g_column_reader_memory_bytes("doris_column_reader_memory_bytes");

Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& meta,
uint64_t num_rows, const io::FileReaderSPtr& file_reader,
std::unique_ptr<ColumnReader>* reader) {
Expand Down Expand Up @@ -206,9 +208,13 @@ ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB&
_meta_is_nullable = meta.is_nullable();
_meta_dict_page = meta.dict_page();
_meta_compression = meta.compression();

g_column_reader_memory_bytes << sizeof(*this);
}

ColumnReader::~ColumnReader() = default;
ColumnReader::~ColumnReader() {
g_column_reader_memory_bytes << -sizeof(*this);
}

Status ColumnReader::init(const ColumnMetaPB* meta) {
_type_info = get_type_info(meta);
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class ColumnReader {

enum DictEncodingType { UNKNOWN_DICT_ENCODING, PARTIAL_DICT_ENCODING, ALL_DICT_ENCODING };

~ColumnReader();
virtual ~ColumnReader();

// create a new column iterator. Client should delete returned iterator
Status new_iterator(ColumnIterator** iterator);
Expand Down
8 changes: 8 additions & 0 deletions be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ static bvar::Adder<uint64_t> g_index_reader_pk_pages("doris_pk", "index_reader_p
static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_pk_bytes_per_second(
"doris_pk", "index_reader_pk_pages_per_second", &g_index_reader_pk_pages, 60);

static bvar::Adder<uint64_t> g_index_reader_memory_bytes("doris_index_reader_memory_bytes");

using strings::Substitute;

Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
Expand Down Expand Up @@ -91,6 +93,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
}
}
_num_values = _meta.num_values();

g_index_reader_memory_bytes << sizeof(*this);
return Status::OK();
}

Expand Down Expand Up @@ -134,6 +138,10 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle,
return st;
}

IndexedColumnReader::~IndexedColumnReader() {
g_index_reader_memory_bytes << -sizeof(*this);
}

///////////////////////////////////////////////////////////////////////////////

Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/indexed_column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class IndexedColumnReader {
explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta)
: _file_reader(std::move(file_reader)), _meta(meta) {}

~IndexedColumnReader();

Status load(bool use_page_cache, bool kept_in_memory);

// read a page specified by `pp' from `file' into `handle'
Expand Down
13 changes: 13 additions & 0 deletions be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/page_handle.h"
#include "olap/rowset/segment_v2/page_io.h"
#include "ordinal_page_index.h"
#include "util/slice.h"

namespace doris {

static bvar::Adder<size_t> g_ordinal_index_memory_bytes("doris_ordinal_index_memory_bytes");

namespace segment_v2 {

void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data_pp) {
Expand Down Expand Up @@ -122,6 +126,9 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory,
_pages[i] = reader.get_value(i);
}
_ordinals[_num_pages] = _num_values;

g_ordinal_index_memory_bytes << sizeof(*this) + _ordinals.size() * sizeof(ordinal_t) +
_pages.size() * sizeof(PagePointer) + sizeof(OrdinalIndexReader);
return Status::OK();
}

Expand All @@ -146,5 +153,11 @@ OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t ordinal
return OrdinalPageIndexIterator(this, left);
}

OrdinalIndexReader::~OrdinalIndexReader() {
g_ordinal_index_memory_bytes << -sizeof(*this) - _ordinals.size() * sizeof(ordinal_t) -
_pages.size() * sizeof(PagePointer) - sizeof(OrdinalIndexReader);
}


} // namespace segment_v2
} // namespace doris
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/ordinal_page_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class OrdinalIndexReader {
_meta_pb.reset(new OrdinalIndexPB(meta_pb));
}

~OrdinalIndexReader();

// load and parse the index page into memory
Status load(bool use_page_cache, bool kept_in_memory);

Expand Down
20 changes: 15 additions & 5 deletions be/src/olap/rowset/segment_v2/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
#include "vec/olap/vgeneric_iterators.h"

namespace doris::segment_v2 {
bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
static bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
class InvertedIndexIterator;

Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
Expand Down Expand Up @@ -112,6 +112,17 @@ Status Segment::_open() {
// DCHECK(footer.has_short_key_index_page());
_sk_index_page = _footer_pb->short_key_index_page();
_num_rows = _footer_pb->num_rows();

// An estimated memory usage of a segment
_meta_mem_usage += _footer_pb->ByteSizeLong();
_meta_mem_usage += sizeof(*this);
_meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader;

// 1024 comes from SegmentWriterOptions
_meta_mem_usage += (_num_rows + 1023) / 1024 * (36 + 4);
// 0.01 comes from PrimaryKeyIndexBuilder::init
_meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;

return Status::OK();
}

Expand Down Expand Up @@ -298,7 +309,7 @@ Status Segment::_load_pk_bloom_filter() {
auto status = [this]() {
return _load_pk_bf_once.call([this] {
RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta));
_meta_mem_usage += _pk_index_reader->get_bf_memory_size();
// _meta_mem_usage += _pk_index_reader->get_bf_memory_size();
return Status::OK();
});
}();
Expand Down Expand Up @@ -335,7 +346,7 @@ Status Segment::_load_index_impl() {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) {
_pk_index_reader.reset(new PrimaryKeyIndexReader());
RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta));
_meta_mem_usage += _pk_index_reader->get_memory_size();
// _meta_mem_usage += _pk_index_reader->get_memory_size();
return Status::OK();
} else {
// read and parse short key index page
Expand All @@ -357,7 +368,7 @@ Status Segment::_load_index_impl() {
DCHECK_EQ(footer.type(), SHORT_KEY_PAGE);
DCHECK(footer.has_short_key_page_footer());

_meta_mem_usage += body.get_size();
// _meta_mem_usage += body.get_size();
_sk_index_decoder.reset(new ShortKeyIndexDecoder);
return _sk_index_decoder->parse(body, footer.short_key_page_footer());
}
Expand Down Expand Up @@ -427,7 +438,6 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(),
_file_reader, &reader));
_column_readers.emplace(column.unique_id(), std::move(reader));
_meta_mem_usage += config::estimated_mem_per_column_reader;
}

// init by column path
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ class Segment : public std::enable_shared_from_this<Segment> {
// used to hold short key index page in memory
PageHandle _sk_index_handle;
// short key index decoder
// all content is in memory
std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder;
// primary key index reader
std::unique_ptr<PrimaryKeyIndexReader> _pk_index_reader;
Expand Down
11 changes: 11 additions & 0 deletions be/src/olap/rowset/segment_v2/zone_map_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
namespace doris {
struct uint24_t;

static bvar::Adder<size_t> g_zone_map_memory_bytes("doris_zone_map_memory_bytes");

namespace segment_v2 {

template <PrimitiveType Type>
Expand Down Expand Up @@ -173,9 +175,18 @@ Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
return Status::Corruption("Failed to parse zone map");
}
}

g_zone_map_memory_bytes << sizeof(*this) + sizeof(ZoneMapPB) * _page_zone_maps.size() +
sizeof(IndexedColumnMetaPB);

return Status::OK();
}

ZoneMapIndexReader::~ZoneMapIndexReader() {
// Maybe wrong due to load failures.
g_zone_map_memory_bytes << -sizeof(*this) - sizeof(ZoneMapPB) * _page_zone_maps.size() -
sizeof(IndexedColumnMetaPB);
}
#define APPLY_FOR_PRIMITITYPE(M) \
M(TYPE_TINYINT) \
M(TYPE_SMALLINT) \
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/zone_map_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ class ZoneMapIndexReader {
_page_zone_maps_meta.reset(new IndexedColumnMetaPB(page_zone_maps));
}

virtual ~ZoneMapIndexReader();

// load all page zone maps into memory
Status load(bool use_page_cache, bool kept_in_memory);

Expand Down
14 changes: 14 additions & 0 deletions be/src/olap/short_key_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
#include <ostream>

#include "gutil/strings/substitute.h"
#include "short_key_index.h"
#include "util/coding.h"

using strings::Substitute;

namespace doris {

static bvar::Adder<size_t> g_short_key_index_memory_bytes("doris_short_key_index_memory_bytes");

Status ShortKeyIndexBuilder::add_item(const Slice& key) {
put_varint32(&_offset_buf, _key_buf.size());
_key_buf.append(key.data, key.size);
Expand Down Expand Up @@ -85,7 +88,18 @@ Status ShortKeyIndexDecoder::parse(const Slice& body, const segment_v2::ShortKey
return Status::Corruption("Still has data after parse all key offset");
}
_parsed = true;

g_short_key_index_memory_bytes << sizeof(_footer) + _key_data.size +
_offsets.size() * sizeof(uint32_t) + sizeof(*this);

return Status::OK();
}

ShortKeyIndexDecoder::~ShortKeyIndexDecoder() {
if (_parsed) {
g_short_key_index_memory_bytes << -sizeof(_footer) - _key_data.size -
_offsets.size() * sizeof(uint32_t) - sizeof(*this);
}
}

} // namespace doris
1 change: 1 addition & 0 deletions be/src/olap/short_key_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class ShortKeyIndexIterator {
class ShortKeyIndexDecoder {
public:
ShortKeyIndexDecoder() : _parsed(false) {}
virtual ~ShortKeyIndexDecoder();

// client should assure that body is available when this class is used
Status parse(const Slice& body, const segment_v2::ShortKeyFooterPB& footer);
Expand Down

0 comments on commit dddd55f

Please sign in to comment.