From efb73c3cfcadf477156aae52eb221e719a145411 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Sat, 13 Jul 2024 16:41:07 +0800 Subject: [PATCH] [Fix](multi-catalog) Fix core in orc and parquet reader sometimes after low mem exception. (#36574) ## Proposed changes ### Issue ``` SIGSEGV address not mapped to object (@0x8) received by PID 2264997 (TID 2267559 OR 0x7ff60b2a4640) from PID 8; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 2# JVM_handle_linux_signal in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 3# 0x00007FFB96D77520 in /lib/x86_64-linux-gnu/libc.so.6 4# doris::vectorized::Block::clear_column_data(int) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/core/block.cpp:705 5# doris::vectorized::OrcReader::get_next_block_impl(doris::vectorized::Block*, unsigned long*, bool*) in /mnt/hdd01/PERFORMANCE_ENV/be/lib/doris_be 6# doris::vectorized::OrcReader::get_next_block(doris::vectorized::Block*, unsigned long*, bool*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/exec/format/orc/vorc_reader.cpp:1533 7# doris::vectorized::VFileScanner::_get_block_wrapped(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/exec/scan/vfile_scanner.cpp:355 8# doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/exec/scan/vfile_scanner.cpp:298 9# doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) in /mnt/hdd01/PERFORMANCE_ENV/be/lib/doris_be 10# doris::vectorized::VScanner::get_block_after_projects(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/exec/scan/vscanner.cpp:96 11# doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:258 ``` ### Root cause It is found that when executing conjuncts expressions when there is insufficient memory, column names and types may be inserted, but the columns are not inserted into the block, which eventually leads to a crash. If an exception occurs after `block->insert({nullptr, _data_type, _expr_name});`, it will lead to the situation mentioned above ``` Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, std::vector& args) { ... // prepare a column to save result block->insert({nullptr, _data_type, _expr_name}); if (_can_fast_execute) { auto can_fast_execute = fast_execute(*block, args, num_columns_without_result, block->rows(), _function->get_name()); if (can_fast_execute) { *result_column_id = num_columns_without_result; return Status::OK(); } } ``` ### Solution #37086 --- be/src/vec/core/block.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 22062dc5310aa2..35b8c0eb69a9a4 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -730,8 +730,10 @@ void Block::clear_column_data(int column_size) noexcept { } } for (auto& d : data) { - DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count(); - (*std::move(d.column)).assume_mutable()->clear(); + if (d.column) { + DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count(); + (*std::move(d.column)).assume_mutable()->clear(); + } } row_same_bit.clear(); }