Skip to content

Commit

Permalink
GH-43414: [C++][Compute] Fix invalid memory access when resizing var-…
Browse files Browse the repository at this point in the history
…length buffer in row table (#43415)

### Rationale for this change

As #43414 explained. The UT in PR reproduces this issue well (may need ASAN to detect).

### What changes are included in this PR?

Check if `is_fixed_length` before treating the second buffer as offset.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

None.

* GitHub Issue: #43414

Lead-authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Co-authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
  • Loading branch information
zanmato1984 and felipecrv authored Aug 3, 2024
1 parent 8068554 commit 45b1767
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 21 deletions.
8 changes: 6 additions & 2 deletions cpp/src/arrow/compute/row/row_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,10 @@ Status RowTableImpl::ResizeFixedLengthBuffers(int64_t num_extra_rows) {
}

Status RowTableImpl::ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes) {
DCHECK(!metadata_.is_fixed_length);

int64_t num_bytes = offsets()[num_rows_];
if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) {
if (bytes_capacity_ >= num_bytes + num_extra_bytes) {
return Status::OK();
}

Expand Down Expand Up @@ -397,7 +399,9 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
Status RowTableImpl::AppendEmpty(uint32_t num_rows_to_append,
uint32_t num_extra_bytes_to_append) {
RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
if (!metadata_.is_fixed_length) {
RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
}
num_rows_ += num_rows_to_append;
if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
memset(rows_->mutable_data(), 0, bytes_capacity_);
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/compute/row/row_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,14 @@ class ARROW_EXPORT RowTableImpl {
}

private:
/// \brief Resize the fixed length buffers to store `num_extra_rows` more rows. The
/// fixed length buffers are buffers_[0] for null masks, buffers_[1] for row data if the
/// row is fixed length, or for row offsets otherwise.
Status ResizeFixedLengthBuffers(int64_t num_extra_rows);

/// \brief Resize the optional varying length buffer to store `num_extra_bytes` more
/// bytes.
/// \pre !metadata_.is_fixed_length
Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);

// Helper functions to determine the number of bytes needed for each
Expand Down
47 changes: 28 additions & 19 deletions cpp/src/arrow/compute/row/row_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ TEST(RowTableMemoryConsumption, Encode) {
constexpr int64_t num_rows_max = 8192;
constexpr int64_t padding_for_vectors = 64;

ASSERT_OK_AND_ASSIGN(
auto fixed_length_column,
::arrow::gen::Constant(std::make_shared<UInt32Scalar>(0))->Generate(num_rows_max));
std::vector<std::shared_ptr<Array>> fixed_length_columns;
for (const auto& dt : {int8(), uint16(), int32(), uint64(), fixed_size_binary(16),
fixed_size_binary(32)}) {
ASSERT_OK_AND_ASSIGN(auto fixed_length_column,
::arrow::gen::Random(dt)->Generate(num_rows_max));
fixed_length_columns.push_back(std::move(fixed_length_column));
}

ASSERT_OK_AND_ASSIGN(auto var_length_column,
::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
->Generate(num_rows_max));
Expand All @@ -81,22 +86,26 @@ TEST(RowTableMemoryConsumption, Encode) {
{
SCOPED_TRACE("encoding fixed length column of " + std::to_string(num_rows) +
" rows");
ASSERT_OK_AND_ASSIGN(auto row_table,
MakeRowTableFromColumn(fixed_length_column, num_rows,
uint32()->byte_width(), 0));
ASSERT_NE(row_table.data(0), NULLPTR);
ASSERT_NE(row_table.data(1), NULLPTR);
ASSERT_EQ(row_table.data(2), NULLPTR);

int64_t actual_null_mask_size =
num_rows * row_table.metadata().null_masks_bytes_per_row;
ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors);
ASSERT_GT(actual_null_mask_size * 2,
row_table.buffer_size(0) - padding_for_vectors);

int64_t actual_rows_size = num_rows * uint32()->byte_width();
ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors);
ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors);
for (const auto& col : fixed_length_columns) {
const auto& dt = col->type();
SCOPED_TRACE("encoding fixed length column of type " + dt->ToString());
ASSERT_OK_AND_ASSIGN(auto row_table,
MakeRowTableFromColumn(col, num_rows, dt->byte_width(),
/*string_alignment=*/0));
ASSERT_NE(row_table.data(0), NULLPTR);
ASSERT_NE(row_table.data(1), NULLPTR);
ASSERT_EQ(row_table.data(2), NULLPTR);

int64_t actual_null_mask_size =
num_rows * row_table.metadata().null_masks_bytes_per_row;
ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors);
ASSERT_GT(actual_null_mask_size * 2,
row_table.buffer_size(0) - padding_for_vectors);

int64_t actual_rows_size = num_rows * dt->byte_width();
ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors);
ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors);
}
}

// Var length column.
Expand Down

0 comments on commit 45b1767

Please sign in to comment.