forked from NVIDIA/cccl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add transform benchmark requiring a stable address * Make thrust::transform use cub::DeviceTransform * Introduces address stability detection and opt-in in libcu++ * Mark lambdas in Thrust BabelStream benchmark address oblivious * Optimize prefetch cub::DeviceTransform for small problems Fixes: NVIDIA#2263
- Loading branch information
1 parent
c358bde
commit c97f2e3
Showing
9 changed files
with
326 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _CUDA___FUNCTIONAL_ADDRESS_STABILITY_H | ||
#define _CUDA___FUNCTIONAL_ADDRESS_STABILITY_H | ||
|
||
#include <cuda/std/detail/__config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__type_traits/integral_constant.h> | ||
#include <cuda/std/__utility/move.h> | ||
|
||
_LIBCUDACXX_BEGIN_NAMESPACE_CUDA | ||
|
||
//! Trait telling whether a function object type F does not rely on the memory addresses of its arguments. The nested | ||
//! value is true when the addresses of the arguments do not matter and arguments can be provided from arbitrary copies | ||
//! of the respective sources. This trait can be specialized for custom function objects types. | ||
//! @see proclaim_copyable_arguments | ||
template <typename F, typename SFINAE = void> | ||
struct proclaims_copyable_arguments : _CUDA_VSTD::false_type | ||
{}; | ||
|
||
#if !defined(_CCCL_NO_VARIABLE_TEMPLATES) | ||
template <typename F, typename... Args> | ||
_CCCL_INLINE_VAR constexpr bool proclaims_copyable_arguments_v = proclaims_copyable_arguments<F, Args...>::value; | ||
#endif // !_CCCL_NO_VARIABLE_TEMPLATES | ||
|
||
// Wrapper for a callable to mark it as permitting copied arguments | ||
template <typename F> | ||
struct __callable_permitting_copied_arguments : F | ||
{ | ||
using F::operator(); | ||
}; | ||
|
||
template <typename F> | ||
struct proclaims_copyable_arguments<__callable_permitting_copied_arguments<F>> : _CUDA_VSTD::true_type | ||
{}; | ||
|
||
//! Creates a new function object from an existing one, which is marked as permitting its arguments to be copies of | ||
//! whatever source they come from. This implies that the addresses of the arguments are irrelevant to the function | ||
//! object. | ||
//! @see proclaims_copyable_arguments | ||
template <typename F> | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto | ||
proclaim_copyable_arguments(F f) -> __callable_permitting_copied_arguments<F> | ||
{ | ||
return __callable_permitting_copied_arguments<F>{_CUDA_VSTD::move(f)}; | ||
} | ||
|
||
_LIBCUDACXX_END_NAMESPACE_CUDA | ||
|
||
#endif // _CUDA___FUNCTIONAL_ADDRESS_STABILITY_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include <cuda/__functional/address_stability.h> | ||
|
||
#include <unittest/unittest.h> | ||
|
||
struct my_plus | ||
{ | ||
_CCCL_HOST_DEVICE auto operator()(int a, int b) const -> int | ||
{ | ||
return a + b; | ||
} | ||
}; | ||
|
||
void TestAddressStability() | ||
{ | ||
using ::cuda::proclaim_copyable_arguments; | ||
using ::cuda::proclaims_copyable_arguments; | ||
|
||
static_assert(!proclaims_copyable_arguments<thrust::plus<int>>::value, ""); | ||
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(thrust::plus<int>{}))>::value, ""); | ||
|
||
static_assert(!proclaims_copyable_arguments<my_plus>::value, ""); | ||
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus{}))>::value, ""); | ||
} | ||
DECLARE_UNITTEST(TestAddressStability); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.