Skip to content

Commit

Permalink
[Performance](opt) opt the order by performance in permutation (apach…
Browse files Browse the repository at this point in the history
…e#38985)

## Proposed changes

Before:
```
select l_quantity from lineitem order by l_quantity limit 10000020;
+--------------+
| ReturnedRows |
+--------------+
| 10000020     |
+--------------+
1 row in set (2 min 24.42 sec)

```

after:
```
mysql [tpch]>select l_quantity from lineitem order by l_quantity limit 10000020;
+--------------+
| ReturnedRows |
+--------------+
| 10000020     |
+--------------+
1 row in set (28.42 sec)
```

<!--Describe your changes.-->
  • Loading branch information
HappenLee authored Aug 8, 2024
1 parent 7c58c71 commit df55639
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 14 deletions.
25 changes: 17 additions & 8 deletions be/src/vec/columns/column_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#pragma once

#include <glog/logging.h>
#include <pdqsort.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
Expand Down Expand Up @@ -269,14 +270,22 @@ class ColumnDecimal final : public COWHelper<IColumn, ColumnDecimal<T>> {
for (U i = 0; i < s; ++i) res[i] = i;

auto sort_end = res.end();
if (limit && limit < s) sort_end = res.begin() + limit;

if (reverse)
std::partial_sort(res.begin(), sort_end, res.end(),
[this](size_t a, size_t b) { return data[a] > data[b]; });
else
std::partial_sort(res.begin(), sort_end, res.end(),
[this](size_t a, size_t b) { return data[a] < data[b]; });
if (limit && limit < s / 8.0) {
sort_end = res.begin() + limit;
if (reverse)
std::partial_sort(res.begin(), sort_end, res.end(),
[this](size_t a, size_t b) { return data[a] > data[b]; });
else
std::partial_sort(res.begin(), sort_end, res.end(),
[this](size_t a, size_t b) { return data[a] < data[b]; });
} else {
if (reverse)
pdqsort(res.begin(), res.end(),
[this](size_t a, size_t b) { return data[a] > data[b]; });
else
pdqsort(res.begin(), res.end(),
[this](size_t a, size_t b) { return data[a] < data[b]; });
}
}

void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint32_t& hash) const {
Expand Down
9 changes: 4 additions & 5 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,8 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t limit, int /*nan_directi
res[i] = i;
}

if (limit >= s) {
limit = 0;
}
// std::partial_sort need limit << s can get performance benefit
if (limit > (s / 8.0)) limit = 0;

if (limit) {
if (reverse) {
Expand All @@ -495,9 +494,9 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t limit, int /*nan_directi
}
} else {
if (reverse) {
std::sort(res.begin(), res.end(), less<false>(*this));
pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
std::sort(res.begin(), res.end(), less<true>(*this));
pdqsort(res.begin(), res.end(), less<true>(*this));
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/columns/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t limit, int nan_direct

if (s == 0) return;

if (limit >= s) limit = 0;
// std::partial_sort need limit << s can get performance benefit
if (limit > (s / 8.0)) limit = 0;

if (limit) {
for (size_t i = 0; i < s; ++i) res[i] = i;
Expand Down

0 comments on commit df55639

Please sign in to comment.