Skip to content

Commit

Permalink
Merge branch 'branch-23.10' into processed_bytes_stream_distinct_coun…
Browse files Browse the repository at this point in the history
…t_nvbench
  • Loading branch information
harrism authored Sep 25, 2023
2 parents d68426f + f3402c4 commit 7495e0f
Show file tree
Hide file tree
Showing 68 changed files with 1,650 additions and 581 deletions.
1 change: 1 addition & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down
3 changes: 3 additions & 0 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ gbench_version:
gtest_version:
- ">=1.13.0"

aws_sdk_cpp_version:
- "<1.11"

libarrow_version:
- "=12"

Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ requirements:
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- zlib {{ zlib_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}

outputs:
- name: libcudf
Expand Down Expand Up @@ -107,6 +108,7 @@ outputs:
- dlpack {{ dlpack_version }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}
test:
commands:
- test -f $PREFIX/lib/libcudf.so
Expand Down
18 changes: 10 additions & 8 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <benchmarks/io/cuio_common.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>

#include <cstdio>
Expand Down Expand Up @@ -141,17 +142,18 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
return col_names_to_read;
}

std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk)
std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk_idx)
{
CUDF_EXPECTS(num_segments >= num_chunks,
"Number of chunks cannot be greater than the number of segments in the file");
auto start_segment = [num_segments, num_chunks](int chunk) {
return num_segments * chunk / num_chunks;
};
std::vector<cudf::size_type> selected_segments;
for (auto segment = start_segment(chunk); segment < start_segment(chunk + 1); ++segment) {
selected_segments.push_back(segment);
}
CUDF_EXPECTS(chunk_idx < num_chunks,
"Chunk index must be smaller than the number of chunks in the file");

auto const segments_in_chunk = cudf::util::div_rounding_up_unsafe(num_segments, num_chunks);
auto const begin_segment = std::min(chunk_idx * segments_in_chunk, num_segments);
auto const end_segment = std::min(begin_segment + segments_in_chunk, num_segments);
std::vector<cudf::size_type> selected_segments(end_segment - begin_segment);
std::iota(selected_segments.begin(), selected_segments.end(), begin_segment);

return selected_segments;
}
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/io/orc/orc_reader_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/io/orc_metadata.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -30,7 +31,7 @@
constexpr int64_t data_size = 512 << 20;
// The number of separate read calls to use when reading files in multiple chunks
// Each call reads roughly equal amounts of data
constexpr int32_t chunked_read_num_chunks = 8;
constexpr int32_t chunked_read_num_chunks = 4;

std::vector<std::string> get_top_level_col_names(cudf::io::source_info const& source)
{
Expand Down Expand Up @@ -88,7 +89,7 @@ void BM_orc_read_varying_options(nvbench::state& state,

auto const num_stripes =
cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes();
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -99,7 +100,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
timer.start();
cudf::size_type rows_read = 0;
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
auto const is_last_chunk = chunk == (num_chunks - 1);
switch (RowSelection) {
case row_selection::ALL: break;
case row_selection::STRIPES:
Expand All @@ -108,7 +108,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
case row_selection::NROWS:
read_options.set_skip_rows(chunk * chunk_row_cnt);
read_options.set_num_rows(chunk_row_cnt);
if (is_last_chunk) read_options.set_num_rows(-1);
break;
default: CUDF_FAIL("Unsupported row selection method");
}
Expand All @@ -132,9 +131,6 @@ using col_selections = nvbench::enum_type_list<column_selection::ALL,
column_selection::ALTERNATE,
column_selection::FIRST_HALF,
column_selection::SECOND_HALF>;
using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::STRIPES, row_selection::NROWS>;

NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(col_selections,
nvbench::enum_type_list<row_selection::ALL>,
Expand All @@ -146,6 +142,8 @@ NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
{"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"})
.set_min_samples(4);

using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::NROWS, row_selection::STRIPES>;
NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
row_selections,
Expand Down
65 changes: 38 additions & 27 deletions cpp/benchmarks/io/parquet/parquet_reader_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,29 @@
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/parquet.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
// run on most GPUs, but large enough to allow highest throughput
constexpr std::size_t data_size = 512 << 20;
constexpr std::size_t row_group_size = 128 << 20;
constexpr std::size_t data_size = 512 << 20;
// The number of separate read calls to use when reading files in multiple chunks
// Each call reads roughly equal amounts of data
constexpr int32_t chunked_read_num_chunks = 4;

std::vector<std::string> get_top_level_col_names(cudf::io::source_info const& source)
{
cudf::io::parquet_reader_options const read_options =
cudf::io::parquet_reader_options::builder(source);
auto const schema = cudf::io::read_parquet(read_options).metadata.schema_info;

std::vector<std::string> names;
names.reserve(schema.size());
std::transform(schema.cbegin(), schema.cend(), std::back_inserter(names), [](auto const& c) {
return c.name;
});
return names;
auto const top_lvl_cols = cudf::io::read_parquet_metadata(source).schema().root().children();
std::vector<std::string> col_names;
std::transform(top_lvl_cols.cbegin(),
top_lvl_cols.cend(),
std::back_inserter(col_names),
[](auto const& col_meta) { return col_meta.name(); });

return col_names;
}

template <column_selection ColSelection,
Expand All @@ -55,6 +56,8 @@ void BM_parquet_read_options(nvbench::state& state,
nvbench::enum_type<UsesPandasMetadata>,
nvbench::enum_type<Timestamp>>)
{
auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;

auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES;
auto constexpr uses_pd_metadata = UsesPandasMetadata == uses_pandas_metadata::YES;

Expand Down Expand Up @@ -87,9 +90,8 @@ void BM_parquet_read_options(nvbench::state& state,
.use_pandas_metadata(uses_pd_metadata)
.timestamp_type(ts_type);

// TODO: add read_parquet_metadata to properly calculate #row_groups
auto constexpr num_row_groups = data_size / row_group_size;
auto constexpr num_chunks = 1;
auto const num_row_groups = read_parquet_metadata(source_sink.make_source_info()).num_rowgroups();
auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -100,18 +102,15 @@ void BM_parquet_read_options(nvbench::state& state,
timer.start();
cudf::size_type rows_read = 0;
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
auto const is_last_chunk = chunk == (num_chunks - 1);
switch (RowSelection) {
case row_selection::ALL: break;
case row_selection::ROW_GROUPS: {
auto row_groups_to_read = segments_in_chunk(num_row_groups, num_chunks, chunk);
if (is_last_chunk) {
// Need to assume that an additional "overflow" row group is present
row_groups_to_read.push_back(num_row_groups);
}
read_options.set_row_groups({row_groups_to_read});
read_options.set_row_groups({segments_in_chunk(num_row_groups, num_chunks, chunk)});
} break;
case row_selection::NROWS: [[fallthrough]];
case row_selection::NROWS:
read_options.set_skip_rows(chunk * chunk_row_cnt);
read_options.set_num_rows(chunk_row_cnt);
break;
default: CUDF_FAIL("Unsupported row selection method");
}

Expand All @@ -130,14 +129,26 @@ void BM_parquet_read_options(nvbench::state& state,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::NROWS, row_selection::ROW_GROUPS>;
NVBENCH_BENCH_TYPES(BM_parquet_read_options,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
row_selections,
nvbench::enum_type_list<converts_strings::YES>,
nvbench::enum_type_list<uses_pandas_metadata::YES>,
nvbench::enum_type_list<cudf::type_id::EMPTY>))
.set_name("parquet_read_row_selection")
.set_type_axes_names({"column_selection",
"row_selection",
"str_to_categories",
"uses_pandas_metadata",
"timestamp_type"})
.set_min_samples(4);

using col_selections = nvbench::enum_type_list<column_selection::ALL,
column_selection::ALTERNATE,
column_selection::FIRST_HALF,
column_selection::SECOND_HALF>;

// TODO: row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file
// and determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863

NVBENCH_BENCH_TYPES(BM_parquet_read_options,
NVBENCH_TYPE_AXES(col_selections,
nvbench::enum_type_list<row_selection::ALL>,
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/text/ngrams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt)
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::strings_column_view input(column->view());
auto const separator = cudf::string_scalar("_");

for (auto _ : state) {
cuda_event_timer raii(state, true);
switch (nt) {
case ngrams_type::tokens: nvtext::generate_ngrams(input); break;
case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break;
case ngrams_type::characters: nvtext::generate_character_ngrams(input); break;
}
}
Expand Down
7 changes: 5 additions & 2 deletions cpp/benchmarks/text/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,11 @@ static void bench_tokenize(nvbench::state& state)
auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "ngrams") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); });
auto const delimiter = cudf::string_scalar("");
auto const separator = cudf::string_scalar("_");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::ngrams_tokenize(input, 2, delimiter, separator);
});
} else if (tokenize_type == "characters") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); });
Expand Down
Loading

0 comments on commit 7495e0f

Please sign in to comment.