From 525c4e940a1bdfe2149546c5f2a7af59ac2a6b8e Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 31 Oct 2024 15:50:25 -0400 Subject: [PATCH 1/3] Move strings to date/time types benchmarks to nvbench --- cpp/benchmarks/CMakeLists.txt | 4 +- cpp/benchmarks/string/convert_datetime.cpp | 86 +++++++------- cpp/benchmarks/string/convert_durations.cpp | 122 ++++++++------------ 3 files changed, 88 insertions(+), 124 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 68781889c53..f7be8072914 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -356,8 +356,6 @@ ConfigureNVBench( # * strings benchmark ------------------------------------------------------------------- ConfigureBench( STRINGS_BENCH - string/convert_datetime.cpp - string/convert_durations.cpp string/convert_fixed_point.cpp string/convert_numerics.cpp string/copy.cu @@ -375,6 +373,8 @@ ConfigureNVBench( string/char_types.cpp string/combine.cpp string/contains.cpp + string/convert_datetime.cpp + string/convert_durations.cpp string/copy_if_else.cpp string/copy_range.cpp string/count.cpp diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp index 5deca3664b7..7ace9e42d73 100644 --- a/cpp/benchmarks/string/convert_datetime.cpp +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -16,62 +16,58 @@ #include #include -#include -#include #include #include +#include #include -class StringDateTime : public cudf::benchmark {}; +#include -enum class direction { to, from }; +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_D, "cudf::timestamp_D", "cudf::timestamp_D"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_s, "cudf::timestamp_s", "cudf::timestamp_s"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_us, "cudf::timestamp_us", "cudf::timestamp_us"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ns, "cudf::timestamp_ns", "cudf::timestamp_ns"); -template -void BM_convert_datetime(benchmark::State& state, direction dir) +using Types = nvbench::type_list; + +template +void bench_convert_datetime(nvbench::state& state, nvbench::type_list) { - auto const n_rows = static_cast(state.range(0)); - auto const data_type = cudf::data_type(cudf::type_to_id()); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const data_type = cudf::data_type(cudf::type_to_id()); + auto const from_ts = state.get_string("dir") == "from"; - auto const column = create_random_column(data_type.id(), row_count{n_rows}); - cudf::column_view input(column->view()); + auto const ts_col = create_random_column(data_type.id(), row_count{num_rows}); + cudf::column_view input(ts_col->view()); - auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S") - : make_empty_column(cudf::data_type{cudf::type_id::STRING}); - cudf::strings_column_view source_string(source->view()); + auto format = std::string{"%Y-%m-%d %H:%M:%S"}; + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); - for (auto _ : state) { - cuda_event_timer raii(state, true); - if (dir == direction::to) - cudf::strings::to_timestamps(source_string, data_type, "%Y-%m-%d %H:%M:%S"); - else - cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S"); + if (from_ts) { + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes((format.size() + 2) * num_rows); // +2 for 4-digit year + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::from_timestamps(input, format); }); + } else { + auto source = cudf::strings::from_timestamps(input, format); + auto view = cudf::strings_column_view(source->view()); + state.add_global_memory_reads(view.chars_size(stream)); + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::to_timestamps(view, data_type, format); + }); } - - auto const bytes = dir == direction::to ? source_string.chars_size(cudf::get_default_stream()) - : n_rows * sizeof(TypeParam); - state.SetBytesProcessed(state.iterations() * bytes); } -#define STR_BENCHMARK_DEFINE(name, type, dir) \ - BENCHMARK_DEFINE_F(StringDateTime, name)(::benchmark::State & state) \ - { \ - BM_convert_datetime(state, dir); \ - } \ - BENCHMARK_REGISTER_F(StringDateTime, name) \ - ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 25) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -STR_BENCHMARK_DEFINE(from_days, cudf::timestamp_D, direction::from); -STR_BENCHMARK_DEFINE(from_seconds, cudf::timestamp_s, direction::from); -STR_BENCHMARK_DEFINE(from_mseconds, cudf::timestamp_ms, direction::from); -STR_BENCHMARK_DEFINE(from_useconds, cudf::timestamp_us, direction::from); -STR_BENCHMARK_DEFINE(from_nseconds, cudf::timestamp_ns, direction::from); - -STR_BENCHMARK_DEFINE(to_days, cudf::timestamp_D, direction::to); -STR_BENCHMARK_DEFINE(to_seconds, cudf::timestamp_s, direction::to); -STR_BENCHMARK_DEFINE(to_mseconds, cudf::timestamp_ms, direction::to); -STR_BENCHMARK_DEFINE(to_useconds, cudf::timestamp_us, direction::to); -STR_BENCHMARK_DEFINE(to_nseconds, cudf::timestamp_ns, direction::to); +NVBENCH_BENCH_TYPES(bench_convert_datetime, NVBENCH_TYPE_AXES(Types)) + .set_name("datetime") + .set_type_axes_names({"DataType"}) + .add_string_axis("dir", {"to", "from"}) + .add_int64_axis("num_rows", {1 << 10, 1 << 15, 1 << 20, 1 << 25}); diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp index f12d292c2e7..9d2377f2d82 100644 --- a/cpp/benchmarks/string/convert_durations.cpp +++ b/cpp/benchmarks/string/convert_durations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,92 +14,60 @@ * limitations under the License. */ +#include #include -#include - -#include #include #include -#include +#include #include -#include -#include - -class DurationsToString : public cudf::benchmark {}; -template -void BM_convert_from_durations(benchmark::State& state) -{ - cudf::size_type const source_size = state.range(0); - - // Every element is valid - auto data = cudf::detail::make_counting_transform_iterator( - 0, [source_size](auto i) { return TypeParam{i - source_size / 2}; }); +#include - cudf::test::fixed_width_column_wrapper source_durations(data, data + source_size); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_D, "cudf::duration_D", "cudf::duration_D"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_s, "cudf::duration_s", "cudf::duration_s"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_ms, "cudf::duration_ms", "cudf::duration_ms"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_us, "cudf::duration_us", "cudf::duration_us"); +NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_ns, "cudf::duration_ns", "cudf::duration_ns"); - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf::strings::from_durations(source_durations, "%D days %H:%M:%S"); - } - - state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam)); -} +using Types = nvbench::type_list; -class StringToDurations : public cudf::benchmark {}; -template -void BM_convert_to_durations(benchmark::State& state) +template +void bench_convert_duration(nvbench::state& state, nvbench::type_list) { - cudf::size_type const source_size = state.range(0); - - // Every element is valid - auto data = cudf::detail::make_counting_transform_iterator( - 0, [source_size](auto i) { return TypeParam{i - source_size / 2}; }); - - cudf::test::fixed_width_column_wrapper source_durations(data, data + source_size); - auto results = cudf::strings::from_durations(source_durations, "%D days %H:%M:%S"); - cudf::strings_column_view source_string(*results); - auto output_type = cudf::data_type(cudf::type_to_id()); - - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf::strings::to_durations(source_string, output_type, "%D days %H:%M:%S"); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const data_type = cudf::data_type(cudf::type_to_id()); + auto const from_dur = state.get_string("dir") == "from"; + + auto const ts_col = create_random_column(data_type.id(), row_count{num_rows}); + cudf::column_view input(ts_col->view()); + + auto format = std::string{"%D days %H:%M:%S"}; + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + if (from_dur) { + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(format.size() * num_rows); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::from_durations(input, format); }); + } else { + auto source = cudf::strings::from_durations(input, format); + auto view = cudf::strings_column_view(source->view()); + state.add_global_memory_reads(view.chars_size(stream)); + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::to_durations(view, data_type, format); + }); } - - state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam)); } -#define DSBM_BENCHMARK_DEFINE(name, type) \ - BENCHMARK_DEFINE_F(DurationsToString, name)(::benchmark::State & state) \ - { \ - BM_convert_from_durations(state); \ - } \ - BENCHMARK_REGISTER_F(DurationsToString, name) \ - ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 25) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -#define SDBM_BENCHMARK_DEFINE(name, type) \ - BENCHMARK_DEFINE_F(StringToDurations, name)(::benchmark::State & state) \ - { \ - BM_convert_to_durations(state); \ - } \ - BENCHMARK_REGISTER_F(StringToDurations, name) \ - ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 25) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -DSBM_BENCHMARK_DEFINE(from_durations_D, cudf::duration_D); -DSBM_BENCHMARK_DEFINE(from_durations_s, cudf::duration_s); -DSBM_BENCHMARK_DEFINE(from_durations_ms, cudf::duration_ms); -DSBM_BENCHMARK_DEFINE(from_durations_us, cudf::duration_us); -DSBM_BENCHMARK_DEFINE(from_durations_ns, cudf::duration_ns); - -SDBM_BENCHMARK_DEFINE(to_durations_D, cudf::duration_D); -SDBM_BENCHMARK_DEFINE(to_durations_s, cudf::duration_s); -SDBM_BENCHMARK_DEFINE(to_durations_ms, cudf::duration_ms); -SDBM_BENCHMARK_DEFINE(to_durations_us, cudf::duration_us); -SDBM_BENCHMARK_DEFINE(to_durations_ns, cudf::duration_ns); +NVBENCH_BENCH_TYPES(bench_convert_duration, NVBENCH_TYPE_AXES(Types)) + .set_name("duration") + .set_type_axes_names({"DataType"}) + .add_string_axis("dir", {"to", "from"}) + .add_int64_axis("num_rows", {1 << 10, 1 << 15, 1 << 20, 1 << 25}); From 033c41f20ca868aaf2876ee33a072f9e6561873f Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 6 Nov 2024 13:28:14 -0500 Subject: [PATCH 2/3] refactor some code for clarity --- cpp/benchmarks/string/convert_datetime.cpp | 27 +++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp index 7ace9e42d73..288aa6029d3 100644 --- a/cpp/benchmarks/string/convert_datetime.cpp +++ b/cpp/benchmarks/string/convert_datetime.cpp @@ -39,29 +39,30 @@ using Types = nvbench::type_list void bench_convert_datetime(nvbench::state& state, nvbench::type_list) { - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const data_type = cudf::data_type(cudf::type_to_id()); - auto const from_ts = state.get_string("dir") == "from"; + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const from_ts = state.get_string("dir") == "from"; - auto const ts_col = create_random_column(data_type.id(), row_count{num_rows}); - cudf::column_view input(ts_col->view()); + auto const data_type = cudf::data_type(cudf::type_to_id()); + auto const ts_col = create_random_column(data_type.id(), row_count{num_rows}); auto format = std::string{"%Y-%m-%d %H:%M:%S"}; + auto s_col = cudf::strings::from_timestamps(ts_col->view(), format); + auto sv = cudf::strings_column_view(s_col->view()); + auto stream = cudf::get_default_stream(); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); if (from_ts) { state.add_global_memory_reads(num_rows); - state.add_global_memory_writes((format.size() + 2) * num_rows); // +2 for 4-digit year - state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { cudf::strings::from_timestamps(input, format); }); + state.add_global_memory_writes(sv.chars_size(stream)); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::from_timestamps(ts_col->view(), format); + }); } else { - auto source = cudf::strings::from_timestamps(input, format); - auto view = cudf::strings_column_view(source->view()); - state.add_global_memory_reads(view.chars_size(stream)); + state.add_global_memory_reads(sv.chars_size(stream)); state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - cudf::strings::to_timestamps(view, data_type, format); + cudf::strings::to_timestamps(sv, data_type, format); }); } } @@ -70,4 +71,4 @@ NVBENCH_BENCH_TYPES(bench_convert_datetime, NVBENCH_TYPE_AXES(Types)) .set_name("datetime") .set_type_axes_names({"DataType"}) .add_string_axis("dir", {"to", "from"}) - .add_int64_axis("num_rows", {1 << 10, 1 << 15, 1 << 20, 1 << 25}); + .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22}); From 8d82f6562bcebdad8360b1fd1796ca1b1290025f Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 7 Nov 2024 06:55:45 -0500 Subject: [PATCH 3/3] fix style violation --- cpp/benchmarks/CMakeLists.txt | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index bf608326986..e1474172e15 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -355,14 +355,8 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- ConfigureBench( - STRINGS_BENCH - string/copy.cu - string/factory.cu - string/filter.cpp - string/repeat_strings.cpp - string/replace.cpp - string/translate.cpp - string/url_decode.cu + STRINGS_BENCH string/copy.cu string/factory.cu string/filter.cpp string/repeat_strings.cpp + string/replace.cpp string/translate.cpp string/url_decode.cu ) ConfigureNVBench(