Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move strings to date/time types benchmarks to nvbench #17229

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,8 @@ ConfigureNVBench(
# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(
STRINGS_BENCH
string/convert_datetime.cpp
string/convert_durations.cpp
string/factory.cu
string/filter.cpp
string/repeat_strings.cpp
string/replace.cpp
string/translate.cpp
string/url_decode.cu
STRINGS_BENCH string/factory.cu string/filter.cpp string/repeat_strings.cpp string/replace.cpp
string/translate.cpp string/url_decode.cu
)

ConfigureNVBench(
Expand All @@ -372,6 +365,8 @@ ConfigureNVBench(
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cpp
Expand Down
87 changes: 42 additions & 45 deletions cpp/benchmarks/string/convert_datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,59 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/wrappers/timestamps.hpp>

class StringDateTime : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

enum class direction { to, from };
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_D, "cudf::timestamp_D", "cudf::timestamp_D");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_s, "cudf::timestamp_s", "cudf::timestamp_s");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_us, "cudf::timestamp_us", "cudf::timestamp_us");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ns, "cudf::timestamp_ns", "cudf::timestamp_ns");

template <class TypeParam>
void BM_convert_datetime(benchmark::State& state, direction dir)
using Types = nvbench::type_list<cudf::timestamp_D,
cudf::timestamp_s,
cudf::timestamp_ms,
cudf::timestamp_us,
cudf::timestamp_ns>;

template <class DataType>
void bench_convert_datetime(nvbench::state& state, nvbench::type_list<DataType>)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const data_type = cudf::data_type(cudf::type_to_id<TypeParam>());
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_ts = state.get_string("dir") == "from";

auto const column = create_random_column(data_type.id(), row_count{n_rows});
cudf::column_view input(column->view());
auto const data_type = cudf::data_type(cudf::type_to_id<DataType>());
auto const ts_col = create_random_column(data_type.id(), row_count{num_rows});

auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S")
: make_empty_column(cudf::data_type{cudf::type_id::STRING});
cudf::strings_column_view source_string(source->view());
auto format = std::string{"%Y-%m-%d %H:%M:%S"};
auto s_col = cudf::strings::from_timestamps(ts_col->view(), format);
auto sv = cudf::strings_column_view(s_col->view());

for (auto _ : state) {
cuda_event_timer raii(state, true);
if (dir == direction::to)
cudf::strings::to_timestamps(source_string, data_type, "%Y-%m-%d %H:%M:%S");
else
cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S");
}
auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

auto const bytes = dir == direction::to ? source_string.chars_size(cudf::get_default_stream())
: n_rows * sizeof(TypeParam);
state.SetBytesProcessed(state.iterations() * bytes);
if (from_ts) {
state.add_global_memory_reads<DataType>(num_rows);
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::from_timestamps(ts_col->view(), format);
});
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<DataType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::to_timestamps(sv, data_type, format);
});
}
}

#define STR_BENCHMARK_DEFINE(name, type, dir) \
BENCHMARK_DEFINE_F(StringDateTime, name)(::benchmark::State & state) \
{ \
BM_convert_datetime<type>(state, dir); \
} \
BENCHMARK_REGISTER_F(StringDateTime, name) \
->RangeMultiplier(1 << 5) \
->Range(1 << 10, 1 << 25) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

STR_BENCHMARK_DEFINE(from_days, cudf::timestamp_D, direction::from);
STR_BENCHMARK_DEFINE(from_seconds, cudf::timestamp_s, direction::from);
STR_BENCHMARK_DEFINE(from_mseconds, cudf::timestamp_ms, direction::from);
STR_BENCHMARK_DEFINE(from_useconds, cudf::timestamp_us, direction::from);
STR_BENCHMARK_DEFINE(from_nseconds, cudf::timestamp_ns, direction::from);

STR_BENCHMARK_DEFINE(to_days, cudf::timestamp_D, direction::to);
STR_BENCHMARK_DEFINE(to_seconds, cudf::timestamp_s, direction::to);
STR_BENCHMARK_DEFINE(to_mseconds, cudf::timestamp_ms, direction::to);
STR_BENCHMARK_DEFINE(to_useconds, cudf::timestamp_us, direction::to);
STR_BENCHMARK_DEFINE(to_nseconds, cudf::timestamp_ns, direction::to);
NVBENCH_BENCH_TYPES(bench_convert_datetime, NVBENCH_TYPE_AXES(Types))
.set_name("datetime")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
122 changes: 45 additions & 77 deletions cpp/benchmarks/string/convert_durations.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,92 +14,60 @@
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/strings/convert/convert_durations.hpp>
#include <cudf/types.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/wrappers/durations.hpp>

#include <algorithm>
#include <random>

class DurationsToString : public cudf::benchmark {};
template <class TypeParam>
void BM_convert_from_durations(benchmark::State& state)
{
cudf::size_type const source_size = state.range(0);

// Every element is valid
auto data = cudf::detail::make_counting_transform_iterator(
0, [source_size](auto i) { return TypeParam{i - source_size / 2}; });
#include <nvbench/nvbench.cuh>

cudf::test::fixed_width_column_wrapper<TypeParam> source_durations(data, data + source_size);
NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_D, "cudf::duration_D", "cudf::duration_D");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_s, "cudf::duration_s", "cudf::duration_s");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_ms, "cudf::duration_ms", "cudf::duration_ms");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_us, "cudf::duration_us", "cudf::duration_us");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::duration_ns, "cudf::duration_ns", "cudf::duration_ns");
Comment on lines +27 to +31
Copy link
Contributor

@karthikeyann karthikeyann Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move this to a common header?
And extend for other dtypes too.

Copy link
Contributor Author

@davidwendt davidwendt Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've since seen other usages that I feel would require a separate PR to resolve.
Example: https://github.com/rapidsai/cudf/pull/17194/files#diff-2fa34b25a2e1b46e1e15f633b539e9b930a7728b9159e5ec810dd9d0e17bc50aR159-R219


for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf::strings::from_durations(source_durations, "%D days %H:%M:%S");
}

state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam));
}
using Types = nvbench::type_list<cudf::duration_D,
cudf::duration_s,
cudf::duration_ms,
cudf::duration_us,
cudf::duration_ns>;

class StringToDurations : public cudf::benchmark {};
template <class TypeParam>
void BM_convert_to_durations(benchmark::State& state)
template <class DataType>
void bench_convert_duration(nvbench::state& state, nvbench::type_list<DataType>)
{
cudf::size_type const source_size = state.range(0);

// Every element is valid
auto data = cudf::detail::make_counting_transform_iterator(
0, [source_size](auto i) { return TypeParam{i - source_size / 2}; });

cudf::test::fixed_width_column_wrapper<TypeParam> source_durations(data, data + source_size);
auto results = cudf::strings::from_durations(source_durations, "%D days %H:%M:%S");
cudf::strings_column_view source_string(*results);
auto output_type = cudf::data_type(cudf::type_to_id<TypeParam>());

for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf::strings::to_durations(source_string, output_type, "%D days %H:%M:%S");
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const data_type = cudf::data_type(cudf::type_to_id<DataType>());
auto const from_dur = state.get_string("dir") == "from";

auto const ts_col = create_random_column(data_type.id(), row_count{num_rows});
cudf::column_view input(ts_col->view());

auto format = std::string{"%D days %H:%M:%S"};
auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_dur) {
state.add_global_memory_reads<DataType>(num_rows);
state.add_global_memory_writes<int8_t>(format.size() * num_rows);
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::from_durations(input, format); });
} else {
auto source = cudf::strings::from_durations(input, format);
auto view = cudf::strings_column_view(source->view());
state.add_global_memory_reads<int8_t>(view.chars_size(stream));
state.add_global_memory_writes<DataType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::to_durations(view, data_type, format);
});
}

state.SetBytesProcessed(state.iterations() * source_size * sizeof(TypeParam));
}

#define DSBM_BENCHMARK_DEFINE(name, type) \
BENCHMARK_DEFINE_F(DurationsToString, name)(::benchmark::State & state) \
{ \
BM_convert_from_durations<type>(state); \
} \
BENCHMARK_REGISTER_F(DurationsToString, name) \
->RangeMultiplier(1 << 5) \
->Range(1 << 10, 1 << 25) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define SDBM_BENCHMARK_DEFINE(name, type) \
BENCHMARK_DEFINE_F(StringToDurations, name)(::benchmark::State & state) \
{ \
BM_convert_to_durations<type>(state); \
} \
BENCHMARK_REGISTER_F(StringToDurations, name) \
->RangeMultiplier(1 << 5) \
->Range(1 << 10, 1 << 25) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

DSBM_BENCHMARK_DEFINE(from_durations_D, cudf::duration_D);
DSBM_BENCHMARK_DEFINE(from_durations_s, cudf::duration_s);
DSBM_BENCHMARK_DEFINE(from_durations_ms, cudf::duration_ms);
DSBM_BENCHMARK_DEFINE(from_durations_us, cudf::duration_us);
DSBM_BENCHMARK_DEFINE(from_durations_ns, cudf::duration_ns);

SDBM_BENCHMARK_DEFINE(to_durations_D, cudf::duration_D);
SDBM_BENCHMARK_DEFINE(to_durations_s, cudf::duration_s);
SDBM_BENCHMARK_DEFINE(to_durations_ms, cudf::duration_ms);
SDBM_BENCHMARK_DEFINE(to_durations_us, cudf::duration_us);
SDBM_BENCHMARK_DEFINE(to_durations_ns, cudf::duration_ns);
NVBENCH_BENCH_TYPES(bench_convert_duration, NVBENCH_TYPE_AXES(Types))
.set_name("duration")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 10, 1 << 15, 1 << 20, 1 << 25});
Loading