From ac5bef7c74f98fdc8423d8ef5c9d78f1351d79cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Feb 2024 06:16:01 +0100 Subject: [PATCH 1/3] A small preparation for better handling of primary key in memory --- src/Processors/QueryPlan/PartsSplitter.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 21 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 363fdca22c54..0fc6ddd64081 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -54,7 +54,7 @@ class IndexAccess Values getValue(size_t part_idx, size_t mark) const { - const auto & index = parts[part_idx].data_part->index; + const auto & index = parts[part_idx].data_part->getIndex(); Values values(index.size()); for (size_t i = 0; i < values.size(); ++i) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 0f82e00edff3..bc64632356f0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -313,13 +313,13 @@ IMergeTreeDataPart::IMergeTreeDataPart( const IMergeTreeDataPart * parent_part_) : DataPartStorageHolder(data_part_storage_) , storage(storage_) - , mutable_name(name_) , name(mutable_name) , info(info_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) , parent_part_name(parent_part ? parent_part->name : "") + , mutable_name(name_) { if (parent_part) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fcf9d5bd17da..878258bddf0d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -75,6 +75,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this; using NameToNumber = std::unordered_map; + using Index = Columns; using IndexSizeByName = std::unordered_map; using Type = MergeTreeDataPartType; @@ -212,10 +213,6 @@ class IMergeTreeDataPart : public std::enable_shared_from_this & affordable_states) const; - /// Primary key (correspond to primary.idx file). - /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple. - /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h. - using Index = Columns; - Index index; - MergeTreePartition partition; /// Amount of rows between marks @@ -369,6 +360,9 @@ class IMergeTreeDataPart : public std::enable_shared_from_this last_removal_attempt_time = 0; protected: + /// Primary key (correspond to primary.idx file). + /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple. + /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h. + Index index; /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk ColumnSize total_columns_size; @@ -623,6 +621,9 @@ class IMergeTreeDataPart : public std::enable_shared_from_thisindex[0]; + const auto & primary_key_column = *part->getIndex()[0]; auto & min_column = assert_cast(*partition_minmax_count_columns[pos]); insert(min_column, primary_key_column[0]); } @@ -6835,7 +6835,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( { for (const auto & part : real_parts) { - const auto & primary_key_column = *part->index[0]; + const auto & primary_key_column = *part->getIndex()[0]; auto & max_column = assert_cast(*partition_minmax_count_columns[pos]); insert(max_column, primary_key_column[primary_key_column.size() - 1]); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a76d370d057f..1ba287136803 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1087,7 +1087,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRanges res; size_t marks_count = part->index_granularity.getMarksCount(); - const auto & index = part->index; + const auto & index = part->getIndex(); if (marks_count == 0) return res; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 1d10a1433efd..f2fe2e0f2559 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - new_part->index = writer->releaseIndexColumns(); + new_part->setIndex(writer->releaseIndexColumns()); new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9959688d8894..6882963fd249 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -899,7 +899,7 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; - new_data_part->index = source_part->index; + new_data_part->setIndex(source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); From fce3a8cafc13b4b454ed53966b9f7fd1d7f845fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Feb 2024 06:16:56 +0100 Subject: [PATCH 2/3] A small preparation for better handling of primary key in memory --- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 878258bddf0d..e82dc8fc2a35 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -563,7 +563,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this Date: Sat, 17 Feb 2024 06:20:44 +0100 Subject: [PATCH 3/3] Move methods to .cpp --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 +++++++++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index bc64632356f0..70a8cee6106d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -342,6 +342,19 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } + +const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const +{ + return index; +} + + +void IMergeTreeDataPart::setIndex(Columns index_) +{ + index = std::move(index_); +} + + void IMergeTreeDataPart::setName(const String & new_name) { mutable_name = new_name; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index e82dc8fc2a35..f93b1a4c9c17 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -360,8 +360,8 @@ class IMergeTreeDataPart : public std::enable_shared_from_this