diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu index 161f67425c1..996519fa0b3 100644 --- a/cpp/benchmarks/copying/contiguous_split.cu +++ b/cpp/benchmarks/copying/contiguous_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ void chunked_pack(cudf::table_view const& src_table, std::vector(100L * 1024 * 1024, stream, mr); - auto chunked_pack = cudf::chunked_pack::create(src_table, user_buffer.size(), mr); + auto chunked_pack = cudf::chunked_pack::create(src_table, user_buffer.size()); while (chunked_pack->has_next()) { auto iter_size = chunked_pack->next(user_buffer); } diff --git a/cpp/include/cudf/contiguous_split.hpp b/cpp/include/cudf/contiguous_split.hpp index 41eef9559b8..ba16694e101 100644 --- a/cpp/include/cudf/contiguous_split.hpp +++ b/cpp/include/cudf/contiguous_split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,6 +114,7 @@ struct packed_table { * * @param input View of a table to split * @param splits A vector of indices where the view will be split + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr An optional memory resource to use for all returned device allocations * @return The set of requested views of `input` indicated by the `splits` and the viewed memory * buffer @@ -121,6 +122,7 @@ struct packed_table { std::vector contiguous_split( cudf::table_view const& input, std::vector const& splits, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); namespace detail { @@ -198,12 +200,14 @@ class chunked_pack { * @param input source `table_view` to pack * @param user_buffer_size buffer size (in bytes) that will be passed on `next`. Must be * at least 1MB + * @param stream CUDA stream used for device memory operations and kernel launches * @param temp_mr An optional memory resource to be used for temporary and scratch allocations * only */ explicit chunked_pack( cudf::table_view const& input, std::size_t user_buffer_size, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr = cudf::get_current_device_resource_ref()); /** @@ -263,12 +267,14 @@ class chunked_pack { * @param input source `table_view` to pack * @param user_buffer_size buffer size (in bytes) that will be passed on `next`. Must be * at least 1MB + * @param stream CUDA stream used for device memory operations and kernel launches * @param temp_mr RMM memory resource to be used for temporary and scratch allocations only * @return a unique_ptr of chunked_pack */ [[nodiscard]] static std::unique_ptr create( cudf::table_view const& input, std::size_t user_buffer_size, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr = cudf::get_current_device_resource_ref()); private: @@ -284,11 +290,13 @@ class chunked_pack { * `cudf::unpack` to deserialize. * * @param input View of the table to pack + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr An optional memory resource to use for all returned device allocations * @return packed_columns A struct containing the serialized metadata and data in contiguous host * and device memory respectively */ packed_columns pack(cudf::table_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 3413f75357b..281949af7fb 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -2072,14 +2071,16 @@ std::vector contiguous_split(cudf::table_view const& input, std::vector contiguous_split(cudf::table_view const& input, std::vector const& splits, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::contiguous_split(input, splits, cudf::get_default_stream(), mr); + return detail::contiguous_split(input, splits, stream, mr); } chunked_pack::chunked_pack(cudf::table_view const& input, std::size_t user_buffer_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref temp_mr) { CUDF_EXPECTS(user_buffer_size >= desired_batch_size, @@ -2087,7 +2088,7 @@ chunked_pack::chunked_pack(cudf::table_view const& input, // We pass `std::nullopt` for the first `mr` in `contiguous_split_state` to indicate // that it does not allocate any user-bound data for the `chunked_pack` case. state = std::make_unique( - input, user_buffer_size, cudf::get_default_stream(), std::nullopt, temp_mr); + input, user_buffer_size, stream, std::nullopt, temp_mr); } // required for the unique_ptr to work with a incomplete type (contiguous_split_state) @@ -2112,9 +2113,10 @@ std::unique_ptr> chunked_pack::build_metadata() const std::unique_ptr chunked_pack::create(cudf::table_view const& input, std::size_t user_buffer_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref temp_mr) { - return std::make_unique(input, user_buffer_size, temp_mr); + return std::make_unique(input, user_buffer_size, stream, temp_mr); } }; // namespace cudf diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 42ea28f5961..0c6b7977752 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -257,10 +256,12 @@ void metadata_builder::clear() { return impl->clear(); } /** * @copydoc cudf::pack */ -packed_columns pack(cudf::table_view const& input, rmm::device_async_resource_ref mr) +packed_columns pack(cudf::table_view const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::pack(input, cudf::get_default_stream(), mr); + return detail::pack(input, stream, mr); } /** diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index b56b0f2d3f8..eeed3ec17e3 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1394,7 +1394,8 @@ std::vector do_chunked_pack(cudf::table_view const& input) auto bounce_buff_span = cudf::device_span(static_cast(bounce_buff.data()), bounce_buff.size()); - auto chunked_pack = cudf::chunked_pack::create(input, bounce_buff_span.size(), mr); + auto chunked_pack = + cudf::chunked_pack::create(input, bounce_buff_span.size(), cudf::get_default_stream(), mr); // right size the final buffer rmm::device_buffer final_buff( diff --git a/cpp/tests/streams/copying_test.cpp b/cpp/tests/streams/copying_test.cpp index cb09331e87d..c845ff611e8 100644 --- a/cpp/tests/streams/copying_test.cpp +++ b/cpp/tests/streams/copying_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -337,3 +338,25 @@ TEST_F(CopyingTest, PurgeNonEmptyNulls) cudf::purge_nonempty_nulls(*input, cudf::test::get_default_stream()); } + +TEST_F(CopyingTest, ContiguousSplit) +{ + std::vector splits{ + 2, 16, 31, 35, 64, 97, 158, 190, 638, 899, 900, 901, 996, 4200, 7131, 8111}; + + cudf::size_type size = 10002; + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return static_cast(i); }); + + std::vector base_strings( + {"banana", "pear", "apple", "pecans", "vanilla", "cat", "mouse", "green"}); + auto string_randomizer = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [&base_strings](cudf::size_type i) { return base_strings[rand() % base_strings.size()]; }); + + cudf::test::fixed_width_column_wrapper col(iter, iter + size); + std::vector strings(string_randomizer, string_randomizer + size); + cudf::test::strings_column_wrapper col2(strings.begin(), strings.end()); + cudf::table_view tbl({col, col2}); + auto result = cudf::contiguous_split(tbl, splits, cudf::test::get_default_stream()); +} diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index a6c7ae9ba18..50c6ae842f4 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -4181,10 +4181,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_makeChunkedPack( cudf::table_view* n_table = reinterpret_cast(input_table); // `temp_mr` is the memory resource that `cudf::chunked_pack` will use to create temporary // and scratch memory only. - auto temp_mr = memoryResourceHandle != 0 - ? reinterpret_cast(memoryResourceHandle) - : cudf::get_current_device_resource_ref(); - auto chunked_pack = cudf::chunked_pack::create(*n_table, bounce_buffer_size, temp_mr); + auto temp_mr = memoryResourceHandle != 0 + ? reinterpret_cast(memoryResourceHandle) + : cudf::get_current_device_resource(); + auto chunked_pack = + cudf::chunked_pack::create(*n_table, bounce_buffer_size, cudf::get_default_stream(), temp_mr); return reinterpret_cast(chunked_pack.release()); } CATCH_STD(env, 0);