From f3402c402c2d0be54a6f2060e1bd74e284c1e687 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Mon, 25 Sep 2023 14:10:44 -0700 Subject: [PATCH] Add stream parameter to external dict APIs (#14115) This PR adds stream parameter to public dictionary APIs, which include: 1. `cudf::dictionary::encode` 2. `cudf::dictionary::decode` 3. `cudf::dictionary::get_index` 4. `cudf::dictionary::add_keys` 5. `cudf::dictionary::remove_keys` 6. `cudf::dictionary::remove_unused_keys` 7. `cudf::dictionary::set_keys` 8. `cudf::dictionary::match_dictionaries` Reference [13744](https://github.com/rapidsai/cudf/issues/13744) Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) - Yunsong Wang (https://github.com/PointKernel) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14115 --- cpp/include/cudf/dictionary/encode.hpp | 6 +- cpp/include/cudf/dictionary/search.hpp | 6 +- cpp/include/cudf/dictionary/update_keys.hpp | 16 ++- cpp/include/cudf_test/column_wrapper.hpp | 18 +++- cpp/src/dictionary/add_keys.cu | 3 +- cpp/src/dictionary/decode.cu | 5 +- cpp/src/dictionary/encode.cu | 5 +- cpp/src/dictionary/remove_keys.cu | 6 +- cpp/src/dictionary/search.cu | 11 +- cpp/src/dictionary/set_keys.cu | 9 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/dictionary_test.cpp | 105 ++++++++++++++++++++ 12 files changed, 164 insertions(+), 27 deletions(-) create mode 100644 cpp/tests/streams/dictionary_test.cpp diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index fb13eabe11a..959b785bf87 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,12 +53,14 @@ namespace dictionary { * * @param column The column to dictionary encode * @param indices_type The integer type to use for the indices + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Returns a dictionary column */ std::unique_ptr encode( column_view const& column, data_type indices_type = data_type{type_id::UINT32}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -72,11 +74,13 @@ std::unique_ptr encode( * @endcode * * @param dictionary_column Existing dictionary column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New column with type matching the dictionary_column's keys */ std::unique_ptr decode( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index ed7a9c84693..1b72cf42acd 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,12 +37,14 @@ namespace dictionary { * * @param dictionary The dictionary to search for the key. * @param key The value to search for in the dictionary keyset. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory. - * @return Numeric scalar index value of the key within the dictionary + * @return Numeric scalar index value of the key within the dictionary. */ std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index 2fcfb5e1f7c..81728e1ff73 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,13 +51,15 @@ namespace dictionary { * @throw cudf_logic_error if the new_keys contain nulls. * * @param dictionary_column Existing dictionary column. - * @param new_keys New keys to incorporate into the dictionary_column + * @param new_keys New keys to incorporate into the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,13 +83,15 @@ std::unique_ptr add_keys( * @throw cudf_logic_error if the keys_to_remove contain nulls. * * @param dictionary_column Existing dictionary column. - * @param keys_to_remove The keys to remove from the dictionary_column + * @param keys_to_remove The keys to remove from the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -103,11 +107,13 @@ std::unique_ptr remove_keys( * @endcode * * @param dictionary_column Existing dictionary column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -134,12 +140,14 @@ std::unique_ptr remove_unused_keys( * * @param dictionary_column Existing dictionary column. * @param keys New keys to use for the output column. Must not contain nulls. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -149,11 +157,13 @@ std::unique_ptr set_keys( * The result is a vector of new dictionaries with a common set of keys. * * @param input Dictionary columns to match keys. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary columns. */ std::vector> match_dictionaries( cudf::host_span input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index cc8cac35ef4..c0932b81dc3 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -944,8 +944,10 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end)); + wrapped = + cudf::dictionary::encode(fixed_width_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -978,7 +980,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { : column_wrapper{} { wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end, v)); + fixed_width_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1134,7 +1138,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1169,7 +1175,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index ab22c07e4d5..3973100aced 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -130,10 +130,11 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::add_keys(dictionary_column, keys, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 01411d06b62..fdf546b5875 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,10 +65,11 @@ std::unique_ptr decode(dictionary_column_view const& source, } // namespace detail std::unique_ptr decode(dictionary_column_view const& source, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::decode(source, cudf::get_default_stream(), mr); + return detail::decode(source, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index fe8e777b694..c92b57f0cac 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,10 +89,11 @@ data_type get_indices_type_for_size(size_type keys_size) std::unique_ptr encode(column_view const& input_column, data_type indices_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr); + return detail::encode(input_column, indices_type, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 9fe4a63373b..86b70f1119b 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -195,17 +195,19 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr); + return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr); } std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr); + return detail::remove_unused_keys(dictionary_column, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 8e97a387780..e35aded1984 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,10 +79,8 @@ struct find_index_fn { using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); - auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()), - keys_view->begin(), - keys_view->end(), - find_key); + auto iter = thrust::equal_range( + rmm::exec_policy(stream), keys_view->begin(), keys_view->end(), find_key); return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, thrust::distance(keys_view->begin(), iter.first), @@ -176,10 +174,11 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_index(dictionary, key, cudf::get_default_stream(), mr); + return detail::get_index(dictionary, key, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 36f5021d305..b49cf7850b1 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -241,17 +241,20 @@ std::pair>, std::vector> match_d std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::set_keys(dictionary_column, keys, stream, mr); } std::vector> match_dictionaries( - cudf::host_span input, rmm::mr::device_memory_resource* mr) + cudf::host_span input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::match_dictionaries(input, cudf::get_default_stream(), mr); + return detail::match_dictionaries(input, stream, mr); } } // namespace dictionary diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 956bfc7c27d..68ff6c54c99 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -629,6 +629,7 @@ ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE testing diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp new file mode 100644 index 00000000000..f48e64c078e --- /dev/null +++ b/cpp/tests/streams/dictionary_test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +class DictionaryTest : public cudf::test::BaseFixture {}; + +TEST_F(DictionaryTest, Encode) +{ + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4, 5}); + cudf::data_type int32_type(cudf::type_id::UINT32); + cudf::column_view col_view = col; + cudf::dictionary::encode(col_view, int32_type, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, Decode) +{ + // keys = {0, 2, 6}, indices = {0, 1, 1, 2, 2} + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::decode(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, GetIndex) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::numeric_scalar key_scalar(2, true, cudf::test::get_default_stream()); + cudf::dictionary::get_index(dict_col_view, key_scalar, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, AddKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper new_keys_col({8, 9}); + cudf::dictionary::add_keys(dict_col_view, new_keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_to_remove_col({2}); + cudf::dictionary::remove_keys( + dict_col_view, keys_to_remove_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveUnsedKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::remove_unused_keys(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, SetKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::set_keys(dict_col_view, keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, MatchDictionaries) +{ + std::vector elements_a{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col_a(elements_a.begin(), elements_a.end()); + cudf::dictionary_column_view dict_col_view_a = dict_col_a; + + std::vector elements_b{1, 3, 4, 5, 5}; + cudf::test::dictionary_column_wrapper dict_col_b(elements_b.begin(), elements_b.end()); + cudf::dictionary_column_view dict_col_view_b = dict_col_b; + + std::vector dicts = {dict_col_view_a, dict_col_view_b}; + + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::match_dictionaries(dicts, cudf::test::get_default_stream()); +}