Skip to content

Commit

Permalink
[improvement](segmentcache) limit segment cache by memory or segment num
Browse files Browse the repository at this point in the history
Also enlarge columns per segment.
  • Loading branch information
dataroaring committed Jun 28, 2024
1 parent aeb89db commit 6150cf3
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 10 deletions.
2 changes: 1 addition & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");

// max number of segment cache, default -1 for backward compatibility fd_number*2/5
DEFINE_mInt32(segment_cache_capacity, "-1");
DEFINE_mInt32(estimated_num_columns_per_segment, "30");
DEFINE_mInt32(estimated_num_columns_per_segment, "200");
DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
// The value is calculate by storage_page_cache_limit * index_page_cache_percentage
DEFINE_mInt32(segment_cache_memory_percentage, "2");
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/lru_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ namespace doris {

DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_capacity, MetricUnit::BYTES);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage, MetricUnit::BYTES);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT);
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT);
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count, MetricUnit::OPERATIONS);
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS);
Expand Down Expand Up @@ -640,15 +641,18 @@ void ShardedLRUCache::update_cache_metrics() const {
size_t total_usage = 0;
size_t total_lookup_count = 0;
size_t total_hit_count = 0;
size_t total_element_count = 0;
for (int i = 0; i < _num_shards; i++) {
total_capacity += _shards[i]->get_capacity();
total_usage += _shards[i]->get_usage();
total_lookup_count += _shards[i]->get_lookup_count();
total_hit_count += _shards[i]->get_hit_count();
total_element_count += _shards[i]->get_element_count();
}

cache_capacity->set_value(total_capacity);
cache_usage->set_value(total_usage);
cache_element_count->set_value(total_element_count);
cache_lookup_count->set_value(total_lookup_count);
cache_hit_count->set_value(total_hit_count);
cache_usage_ratio->set_value(total_capacity == 0 ? 0 : ((double)total_usage / total_capacity));
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/lru_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ class LRUCache {
uint64_t get_hit_count() const { return _hit_count; }
size_t get_usage() const { return _usage; }
size_t get_capacity() const { return _capacity; }
size_t get_element_count() const { return _table.element_count(); }

private:
void _lru_remove(LRUHandle* e);
Expand Down Expand Up @@ -433,6 +434,7 @@ class ShardedLRUCache : public Cache {
std::shared_ptr<MetricEntity> _entity;
IntGauge* cache_capacity = nullptr;
IntGauge* cache_usage = nullptr;
IntGauge* cache_element_count = nullptr;
DoubleGauge* cache_usage_ratio = nullptr;
IntAtomicCounter* cache_lookup_count = nullptr;
IntAtomicCounter* cache_hit_count = nullptr;
Expand Down
13 changes: 8 additions & 5 deletions be/src/olap/segment_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,11 @@ class SegmentCache : public LRUCachePolicyTrackingManual {
segment_v2::SegmentSharedPtr segment;
};

SegmentCache(size_t capacity)
: LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE, capacity,
LRUCacheType::SIZE,
config::tablet_rowset_stale_sweep_time_sec) {}
SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
: LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE,
memory_bytes_limit, LRUCacheType::SIZE,
config::tablet_rowset_stale_sweep_time_sec,
DEFAULT_LRU_CACHE_NUM_SHARDS * 3, segment_num_limit) {}

// Lookup the given segment in the cache.
// If the segment is found, the cache entry will be written into handle.
Expand All @@ -110,7 +111,9 @@ class SegmentLoader {
// After the estimation of segment memory usage is provided later, it is recommended
// to use Memory as the capacity limit of the cache.

SegmentLoader(size_t capacity) { _segment_cache = std::make_unique<SegmentCache>(capacity); }
SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) {
_segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, segment_num_count);
}

// Load segments of "rowset", return the "cache_handle" which contains segments.
// If use_cache is true, it will be loaded from _cache.
Expand Down
8 changes: 4 additions & 4 deletions be/src/runtime/exec_env_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,8 @@ Status ExecEnv::_init_mem_env() {
// SegmentLoader caches segments in rowset granularity. So the size of
// opened files will greater than segment_cache_capacity.
int64_t segment_cache_capacity = config::segment_cache_capacity;
if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 / 5) {
segment_cache_capacity = fd_number * 2 / 5;
if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 1 / 5) {
segment_cache_capacity = fd_number * 1 / 5;
}

int64_t segment_cache_mem_limit =
Expand All @@ -526,8 +526,8 @@ Status ExecEnv::_init_mem_env() {
min(segment_cache_mem_limit, segment_cache_capacity *
config::estimated_num_columns_per_segment *
config::estimated_mem_per_column_reader);
_segment_loader = new SegmentLoader(min_segment_cache_mem_limit);
LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " << fd_number
_segment_loader = new SegmentLoader(min_segment_cache_mem_limit, segment_cache_capacity);
LOG(INFO) << "segment_cache_capacity <= fd_number * 1 / 5, fd_number: " << fd_number
<< " segment_cache_capacity: " << segment_cache_capacity
<< " min_segment_cache_mem_limit " << min_segment_cache_mem_limit;

Expand Down

0 comments on commit 6150cf3

Please sign in to comment.