Skip to content

Commit

Permalink
Merge pull request ClickHouse#60092 from ClickHouse/index-preparation
Browse files Browse the repository at this point in the history
A small preparation for better handling of primary key in memory
  • Loading branch information
alexey-milovidov committed Feb 17, 2024
2 parents 6cae757 + e9cf922 commit 0e944d4
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/Processors/QueryPlan/PartsSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class IndexAccess

Values getValue(size_t part_idx, size_t mark) const
{
const auto & index = parts[part_idx].data_part->index;
const auto & index = parts[part_idx].data_part->getIndex();
Values values(index.size());
for (size_t i = 0; i < values.size(); ++i)
{
Expand Down
15 changes: 14 additions & 1 deletion src/Storages/MergeTree/IMergeTreeDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,13 @@ IMergeTreeDataPart::IMergeTreeDataPart(
const IMergeTreeDataPart * parent_part_)
: DataPartStorageHolder(data_part_storage_)
, storage(storage_)
, mutable_name(name_)
, name(mutable_name)
, info(info_)
, index_granularity_info(storage_, part_type_)
, part_type(part_type_)
, parent_part(parent_part_)
, parent_part_name(parent_part ? parent_part->name : "")
, mutable_name(name_)
{
if (parent_part)
{
Expand All @@ -342,6 +342,19 @@ IMergeTreeDataPart::~IMergeTreeDataPart()
decrementTypeMetric(part_type);
}


const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const
{
return index;
}


void IMergeTreeDataPart::setIndex(Columns index_)
{
index = std::move(index_);
}


void IMergeTreeDataPart::setName(const String & new_name)
{
mutable_name = new_name;
Expand Down
21 changes: 11 additions & 10 deletions src/Storages/MergeTree/IMergeTreeDataPart.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
using NameToNumber = std::unordered_map<std::string, size_t>;

using Index = Columns;
using IndexSizeByName = std::unordered_map<std::string, ColumnSize>;

using Type = MergeTreeDataPartType;
Expand Down Expand Up @@ -212,10 +213,6 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar

const MergeTreeData & storage;

private:
String mutable_name;
mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};

public:
const String & name; // const ref to private mutable_name
MergeTreePartInfo info;
Expand Down Expand Up @@ -309,12 +306,6 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
/// Throws an exception if state of the part is not in affordable_states
void assertState(const std::initializer_list<MergeTreeDataPartState> & affordable_states) const;

/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
using Index = Columns;
Index index;

MergeTreePartition partition;

/// Amount of rows between marks
Expand Down Expand Up @@ -369,6 +360,9 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
/// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree.
int32_t metadata_version;

const Index & getIndex() const;
void setIndex(Columns index_);

/// For data in RAM ('index')
UInt64 getIndexSizeInBytes() const;
UInt64 getIndexSizeInAllocatedBytes() const;
Expand Down Expand Up @@ -567,6 +561,10 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
mutable std::atomic<time_t> last_removal_attempt_time = 0;

protected:
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h.
Index index;

/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
ColumnSize total_columns_size;
Expand Down Expand Up @@ -623,6 +621,9 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
void initializeIndexGranularityInfo();

private:
String mutable_name;
mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary};

/// In compact parts order of columns is necessary
NameToNumber column_name_to_position;

Expand Down
4 changes: 2 additions & 2 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6824,7 +6824,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
{
for (const auto & part : real_parts)
{
const auto & primary_key_column = *part->index[0];
const auto & primary_key_column = *part->getIndex()[0];
auto & min_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
insert(min_column, primary_key_column[0]);
}
Expand All @@ -6835,7 +6835,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
{
for (const auto & part : real_parts)
{
const auto & primary_key_column = *part->index[0];
const auto & primary_key_column = *part->getIndex()[0];
auto & max_column = assert_cast<ColumnAggregateFunction &>(*partition_minmax_count_columns[pos]);
insert(max_column, primary_key_column[primary_key_column.size() - 1]);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
MarkRanges res;

size_t marks_count = part->index_granularity.getMarksCount();
const auto & index = part->index;
const auto & index = part->getIndex();
if (marks_count == 0)
return res;

Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MergedBlockOutputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(

new_part->rows_count = rows_count;
new_part->modification_time = time(nullptr);
new_part->index = writer->releaseIndexColumns();
new_part->setIndex(writer->releaseIndexColumns());
new_part->checksums = checksums;
new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MutateTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ void finalizeMutatedPart(

new_data_part->rows_count = source_part->rows_count;
new_data_part->index_granularity = source_part->index_granularity;
new_data_part->index = source_part->index;
new_data_part->setIndex(source_part->getIndex());
new_data_part->minmax_idx = source_part->minmax_idx;
new_data_part->modification_time = time(nullptr);

Expand Down

0 comments on commit 0e944d4

Please sign in to comment.