From ec6abb6b4328268cd1b167d0c43b4a1ba6cc2efa Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Thu, 30 May 2024 10:33:13 +0800 Subject: [PATCH] [fix](set) incorrect result of set operator (#35607) If there are duplicated expressions in the select list, the result will be incorrect. ## Proposed changes Issue Number: close #28438 --- be/src/pipeline/dependency.h | 4 ++-- be/src/pipeline/exec/set_sink_operator.cpp | 2 +- be/src/pipeline/exec/set_source_operator.cpp | 4 ++-- be/src/vec/exec/vset_operation_node.cpp | 6 +++--- be/src/vec/exec/vset_operation_node.h | 4 ++-- .../data/query_p0/operator/test_set_operator.out | 12 ++++++++++++ .../query_p0/operator/test_set_operator.groovy | 8 ++++++++ 7 files changed, 30 insertions(+), 10 deletions(-) diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index cdc0eec3933a78..d7084f85d5d6ac 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -609,8 +609,8 @@ struct SetSharedState : public BasicSharedState { vectorized::Block build_block; // build to source //record element size in hashtable int64_t valid_element_in_hash_tbl = 0; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map build_col_idx; //// shared static states (shared, decided in prepare/open...) diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index be44d4dbf7d0d6..ff6bdfed1a37aa 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -137,7 +137,7 @@ Status SetSinkOperatorX::_extract_build_column( raw_ptrs[i] = block.get_by_position(result_col_id).column.get(); DCHECK_GE(result_col_id, 0); - local_state._shared_state->build_col_idx.insert({result_col_id, i}); + local_state._shared_state->build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp index 0f118a7818e4d8..0994350430b4a2 100644 --- a/be/src/pipeline/exec/set_source_operator.cpp +++ b/be/src/pipeline/exec/set_source_operator.cpp @@ -151,8 +151,8 @@ void SetSourceOperatorX::_add_result_columns( auto it = value.begin(); for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) { - auto& column = *build_block.get_by_position(idx->first).column; - local_state._mutable_cols[idx->second]->insert_from(column, it->row_num); + auto& column = *build_block.get_by_position(idx->second).column; + local_state._mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index c207fb18f051ac..2b2573d83bc557 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -331,8 +331,8 @@ void VSetOperationNode::add_result_columns(RowRefListWithFlags& va int& block_size) { auto it = value.begin(); for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); ++idx) { - const auto& column = *_build_block.get_by_position(idx->first).column; - _mutable_cols[idx->second]->insert_from(column, it->row_num); + const auto& column = *_build_block.get_by_position(idx->second).column; + _mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } @@ -434,7 +434,7 @@ Status VSetOperationNode::extract_build_column(Block& block, Colum } raw_ptrs[i] = block.get_by_position(result_col_id).column.get(); DCHECK_GE(result_col_id, 0); - _build_col_idx.insert({result_col_id, i}); + _build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index 508f8073689b7e..9f3ba8fba363b5 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -112,8 +112,8 @@ class VSetOperationNode final : public ExecNode { std::vector _child_expr_lists; //record build column type DataTypes _left_table_data_types; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map _build_col_idx; //record insert column id during probe std::vector _probe_column_inserted_id; diff --git a/regression-test/data/query_p0/operator/test_set_operator.out b/regression-test/data/query_p0/operator/test_set_operator.out index 1d8bc5ef93e2f8..48eb4a0c9bac40 100644 --- a/regression-test/data/query_p0/operator/test_set_operator.out +++ b/regression-test/data/query_p0/operator/test_set_operator.out @@ -13,3 +13,15 @@ 9 9 +-- !select_minus -- +3 3 +4 4 +5 5 +7 7 + +-- !select_except -- +3 3 +4 4 +5 5 +7 7 + diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy b/regression-test/suites/query_p0/operator/test_set_operator.groovy index 1bc9cc29e4c535..7d6219585e4c4c 100644 --- a/regression-test/suites/query_p0/operator/test_set_operator.groovy +++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy @@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") { t3 on t2.col1=t3.col1; """ + + order_qt_select_minus """ + select col1, col1 from t1 minus select col1, col1 from t2; + """ + + order_qt_select_except """ + select col1, col1 from t1 except select col1, col1 from t2; + """ }