Skip to content

Commit

Permalink
Fastpath single strings column in cudf::sort(rapidsai#7075)
Browse files Browse the repository at this point in the history
Closes rapidsai#7027 

The internal `cudf::strings::detail::sort()` function is faster sorting a single strings coumn than `cudf::sort`. Details are in the rapidsai#7027 comments.

Results using the sort gbenchmark:

```
Baseline:
SortStrings/stringssort/1024/manual_time               1.18 ms         1.20 ms          593
SortStrings/stringssort/4096/manual_time               1.98 ms         2.00 ms          352
SortStrings/stringssort/32768/manual_time              2.73 ms         2.75 ms          256
SortStrings/stringssort/262144/manual_time             4.36 ms         4.38 ms          160
SortStrings/stringssort/2097152/manual_time            66.2 ms         66.2 ms           10
SortStrings/stringssort/16777216/manual_time            547 ms          548 ms            1

Calling cudf::strings::detail::sort from cudf::sort:
SortStrings/stringssort/1024/manual_time              0.692 ms        0.711 ms         1002
SortStrings/stringssort/4096/manual_time               1.13 ms         1.15 ms          615
SortStrings/stringssort/32768/manual_time              1.59 ms         1.61 ms          440
SortStrings/stringssort/262144/manual_time             2.82 ms         2.84 ms          247
SortStrings/stringssort/2097152/manual_time            43.1 ms         43.1 ms           16
SortStrings/stringssort/16777216/manual_time            386 ms          386 ms            2

```

Authors:
  - davidwendt <[email protected]>

Approvers:
  - AJ Schmidt (@ajschmidt8)
  - Conor Hoekstra (@codereport)
  - Jake Hemstad (@jrhemstad)
  - Christopher Harris (@cwharris)

URL: rapidsai#7075
  • Loading branch information
davidwendt authored Jan 15, 2021
1 parent e0e2cf8 commit c2e9ffd
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 157 deletions.
1 change: 0 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ test:
- test -f $PREFIX/include/cudf/strings/padding.hpp
- test -f $PREFIX/include/cudf/strings/replace.hpp
- test -f $PREFIX/include/cudf/strings/replace_re.hpp
- test -f $PREFIX/include/cudf/strings/sorting.hpp
- test -f $PREFIX/include/cudf/strings/split/partition.hpp
- test -f $PREFIX/include/cudf/strings/split/split.hpp
- test -f $PREFIX/include/cudf/strings/strings_column_view.hpp
Expand Down
8 changes: 4 additions & 4 deletions cpp/benchmarks/sort/sort_strings_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/sorting.hpp>
#include <cudf/types.hpp>

class SortStrings : public cudf::benchmark {
class Sort : public cudf::benchmark {
};

static void BM_sort(benchmark::State& state)
Expand All @@ -38,12 +38,12 @@ static void BM_sort(benchmark::State& state)
}

#define SORT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(SortStrings, name) \
BENCHMARK_DEFINE_F(Sort, name) \
(::benchmark::State & st) { BM_sort(st); } \
BENCHMARK_REGISTER_F(SortStrings, name) \
BENCHMARK_REGISTER_F(Sort, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(stringssort)
SORT_BENCHMARK_DEFINE(strings)
105 changes: 105 additions & 0 deletions cpp/include/cudf/strings/detail/sorting.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sequence.h>
#include <thrust/sort.h>

namespace cudf {
namespace strings {
namespace detail {

/**
* @brief Comparator for sorting strings column rows.
*/
struct sort_strings_comparator {
__device__ bool operator()(size_type lhs, size_type rhs)
{
if (has_nulls) {
bool lhs_null{d_column.is_null(lhs)};
bool rhs_null{d_column.is_null(rhs)};
if (lhs_null || rhs_null) {
if (!ascending) thrust::swap(lhs_null, rhs_null);
return null_prec == cudf::null_order::BEFORE ? !rhs_null : !lhs_null;
}
}
auto const lhs_str = d_column.element<string_view>(lhs);
auto const rhs_str = d_column.element<string_view>(rhs);
auto const cmp = lhs_str.compare(rhs_str);
return ascending ? (cmp < 0) : (cmp > 0);
}
column_device_view const d_column;
bool has_nulls;
bool ascending;
cudf::null_order null_prec;
};

/**
* @brief Returns an indices column that is the sorted rows of the
* input strings column.
*
* @param strings Strings instance for this operation.
* @param sort_order Sort strings in ascending or descending order.
* @param null_precedence Sort nulls to the beginning or the end of the new column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Indices of the sorted rows.
*/
template <bool stable = false>
std::unique_ptr<cudf::column> sorted_order(
strings_column_view const strings,
cudf::order sort_order = cudf::order::ASCENDING,
cudf::null_order null_precedence = cudf::null_order::BEFORE,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;

std::unique_ptr<column> sorted_indices = cudf::make_numeric_column(
data_type(type_to_id<size_type>()), strings.size(), mask_state::UNALLOCATED, stream, mr);
auto d_indices = sorted_indices->mutable_view();
thrust::sequence(
rmm::exec_policy(stream), d_indices.begin<size_type>(), d_indices.end<size_type>(), 0);

sort_strings_comparator comparator{
d_column, strings.has_nulls(), sort_order == cudf::order::ASCENDING, null_precedence};
if (stable) {
thrust::stable_sort(rmm::exec_policy(stream),
d_indices.begin<size_type>(),
d_indices.end<size_type>(),
comparator);
} else {
thrust::sort(rmm::exec_policy(stream),
d_indices.begin<size_type>(),
d_indices.end<size_type>(),
comparator);
}
return sorted_indices;
}

} // namespace detail
} // namespace strings
} // namespace cudf
58 changes: 0 additions & 58 deletions cpp/include/cudf/strings/sorting.hpp

This file was deleted.

6 changes: 3 additions & 3 deletions cpp/src/sort/sort.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-20, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,14 +14,14 @@
* limitations under the License.
*/

#include "sort_impl.cuh"

#include <cudf/column/column.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table_view.hpp>

#include <sort/sort_impl.cuh>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
Expand Down
13 changes: 12 additions & 1 deletion cpp/src/sort/sort_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/strings/detail/sorting.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -52,6 +53,16 @@ std::unique_ptr<column> sorted_order(table_view input,
"Mismatch between number of columns and null_precedence size.");
}

// fast-path for single strings column sort
if (input.num_columns() == 1 && input.column(0).type().id() == type_id::STRING) {
return cudf::strings::detail::sorted_order<stable>(
strings_column_view(input.column(0)),
column_order.empty() ? order::ASCENDING : column_order.front(),
null_precedence.empty() ? null_order::BEFORE : null_precedence.front(),
stream,
mr);
}

std::unique_ptr<column> sorted_indices = cudf::make_numeric_column(
data_type(type_to_id<size_type>()), input.num_rows(), mask_state::UNALLOCATED, stream, mr);

Expand Down
82 changes: 0 additions & 82 deletions cpp/src/strings/sorting/sorting.cu

This file was deleted.

16 changes: 8 additions & 8 deletions cpp/tests/strings/array_tests.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,11 +18,12 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/sorting.hpp>
#include <cudf/strings/copying.hpp>
#include <cudf/strings/detail/scatter.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/sorting.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/error.hpp>

#include <tests/strings/utilities.h>
Expand All @@ -44,18 +45,17 @@ TEST_F(StringsColumnTest, Sort)
cudf::test::strings_column_wrapper h_expected({"<null>", "", "aa", "bb", "bbb", "eee", "ééé"},
{0, 1, 1, 1, 1, 1, 1});

auto strings_view = cudf::strings_column_view(h_strings);
auto results = cudf::strings::detail::sort(strings_view, cudf::strings::detail::name);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, h_expected);
auto results =
cudf::sort(cudf::table_view({h_strings}), {cudf::order::ASCENDING}, {cudf::null_order::BEFORE});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), h_expected);
}

TEST_F(StringsColumnTest, SortZeroSizeStringsColumn)
{
cudf::column_view zero_size_strings_column(
cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
auto strings_view = cudf::strings_column_view(zero_size_strings_column);
auto results = cudf::strings::detail::sort(strings_view, cudf::strings::detail::name);
cudf::test::expect_strings_empty(results->view());
auto results = cudf::sort(cudf::table_view({zero_size_strings_column}));
cudf::test::expect_strings_empty(results->view().column(0));
}

class SliceParmsTest : public StringsColumnTest,
Expand Down

0 comments on commit c2e9ffd

Please sign in to comment.