Skip to content

Commit

Permalink
add accessor with properties
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato committed Jan 15, 2025
1 parent 43fb061 commit d4b987f
Show file tree
Hide file tree
Showing 8 changed files with 829 additions and 0 deletions.
267 changes: 267 additions & 0 deletions cudax/include/cuda/experimental/__mdspan/accessor_with_properties.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__MDSPAN_ACCESSOR_WITH_PROPERTIES_H
#define _CUDAX__MDSPAN_ACCESSOR_WITH_PROPERTIES_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__memory/assume_aligned.h>
#include <cuda/std/__type_traits/conditional.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_abstract.h>
#include <cuda/std/__type_traits/is_array.h>
#include <cuda/std/__type_traits/is_convertible.h>
#include <cuda/std/__type_traits/type_list.h>
#include <cuda/std/mdspan>

#include <cuda/experimental/__mdspan/load_store.cuh>
#include <cuda/experimental/__mdspan/properties.cuh>
#include <cuda/experimental/__mdspan/property_traits.cuh>

namespace cuda::experimental
{

namespace detail
{

/***********************************************************************************************************************
* Find Duplicate Utilities
**********************************************************************************************************************/

template <template <typename> class _Predicate>
struct type_count_if
{
template <class... Ts>
using __call = ::cuda::std::integral_constant<int, (_Predicate<Ts>::value + ... + 0)>;
};

template <class... Ts>
using count_eviction = ::cuda::std::__type_call<type_count_if<is_eviction_policy>, Ts...>;

template <class... Ts>
using count_prefetch = ::cuda::std::__type_call<type_count_if<is_prefetch>, Ts...>;

template <class... Ts>
using count_alignment = ::cuda::std::__type_call<type_count_if<is_alignment>, Ts...>;

template <class... Ts>
using count_aliasing = ::cuda::std::__type_call<type_count_if<is_ptr_aliasing_policy>, Ts...>;

/***********************************************************************************************************************
* Find Property Utilities
**********************************************************************************************************************/

template <template <typename> class _Predicate>
struct predicate_call
{
template <typename _Property>
using __call = ::cuda::std::bool_constant<_Predicate<_Property>::value>;
};

template <template <typename> class _Predicate, typename _DefaultValue, typename... UserProperties>
struct find_property
{
using ret = ::cuda::std::__type_find_if<::cuda::std::__type_list<UserProperties...>, predicate_call<_Predicate>>;

using type = ::cuda::std::__type_front<::cuda::std::__type_concat<ret, ::cuda::std::__type_list<_DefaultValue>>>;
};

template <typename... UserProperties>
using find_eviction_policy =
find_property<::cuda::experimental::is_eviction_policy, eviction_none_t, UserProperties...>;

template <typename... UserProperties>
using find_prefetch_size = find_property<::cuda::experimental::is_prefetch, no_prefetch_t, UserProperties...>;

template <size_t _DefaultAlignment, typename... UserProperties>
using find_alignment = find_property<is_alignment, aligned_size_t<_DefaultAlignment>, UserProperties...>;

template <typename... UserProperties>
using find_aliasing_policy = find_property<is_ptr_aliasing_policy, ptr_no_aliasing_t, UserProperties...>;

} // namespace detail

/***********************************************************************************************************************
* accessor_with_properties Forward Declaration
**********************************************************************************************************************/

template <typename ElementType, typename Restrict, typename Alignment, typename Eviction, typename Prefetch>
struct accessor_with_properties;

/***********************************************************************************************************************
* accessor_reference
**********************************************************************************************************************/

template <typename ElementType, typename Restrict, typename Alignment, typename Eviction, typename Prefetch>
class accessor_reference
{
static constexpr bool _is_restrict = ::cuda::std::is_same_v<Restrict, ptr_no_aliasing_t>;

using pointer_type = ::cuda::std::conditional_t<_is_restrict, ElementType * _CCCL_RESTRICT, ElementType*>;

pointer_type _ptr;

friend class accessor_with_properties<ElementType, Restrict, Alignment, Eviction, Prefetch>;

public:
explicit accessor_reference() noexcept = default;

accessor_reference(const accessor_reference&) noexcept = default;

accessor_reference(accessor_reference&&) noexcept = default;

accessor_reference& operator=(accessor_reference&&) noexcept = default;

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE accessor_reference& operator=(const accessor_reference& value) noexcept
{
return operator=(static_cast<ElementType>(value));
}

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE accessor_reference& operator=(ElementType value) noexcept
{
NV_IF_ELSE_TARGET(
NV_IS_HOST,
(static_assert(
::cuda::std::is_same_v<Eviction, eviction_none_t> && ::cuda::std::is_same_v<Prefetch, no_prefetch_t>);
return * _ptr = value;),
(::cuda::experimental::store(value, _ptr, Eviction{})));
return *this;
}

_CCCL_NODISCARD _CCCL_HOST_DEVICE _CCCL_FORCEINLINE operator ElementType() const noexcept
{
NV_IF_ELSE_TARGET(
NV_IS_HOST,
(static_assert(
::cuda::std::is_same_v<Eviction, eviction_none_t> && ::cuda::std::is_same_v<Prefetch, no_prefetch_t>);
return *_ptr;),
(return ::cuda::experimental::load(_ptr, read_write, Eviction{}, Prefetch{});));
}

private:
_CCCL_HOST_DEVICE explicit accessor_reference(pointer_type ptr) noexcept
: _ptr{ptr}
{}
};

/***********************************************************************************************************************
* accessor_with_properties Definition
**********************************************************************************************************************/

template <typename ElementType, typename Restrict, typename Alignment, typename Eviction, typename Prefetch>
class accessor_with_properties
{
static_assert(!::cuda::std::is_array_v<ElementType>,
"accessor_with_properties: template argument may not be an array type");
static_assert(!::cuda::std::is_abstract_v<ElementType>,
"accessor_with_properties: template argument may not be an abstract class");

static_assert(is_ptr_aliasing_policy_v<Restrict>, "Restrict must be a pointer aliasing policy");
static_assert(is_eviction_policy_v<Eviction>, "Eviction must be an eviction policy");
static_assert(is_prefetch_v<Prefetch>, "Restrict must be a prefetch policy");
static_assert(is_alignment_v<Alignment>, "Alignment must be an alignment policy");

static constexpr bool _is_const_elem = ::cuda::std::is_const_v<ElementType>;
static constexpr bool _is_restrict = ::cuda::std::is_same_v<Restrict, ptr_no_aliasing_t>;

public:
using offset_policy = accessor_with_properties;
using element_type = ElementType;
using reference = ::cuda::std::
conditional_t<_is_const_elem, ElementType, accessor_reference<ElementType, Restrict, Alignment, Eviction, Prefetch>>;
using data_handle_type = ::cuda::std::conditional_t<_is_restrict, ElementType * _CCCL_RESTRICT, ElementType*>;

explicit accessor_with_properties() noexcept = default;

// template <typename _OtherElementType,
// typename... _OtherProperties,
// ::cuda::std::enable_if_t<::cuda::std::is_convertible_v<_OtherElementType (*)[], ElementType (*)[]>>>
//_CCCL_HOST_DEVICE constexpr accessor_with_properties(
// accessor_with_properties<_OtherElementType, _OtherProperties...>) noexcept
//{}

accessor_with_properties(const accessor_with_properties&) noexcept = default;

_CCCL_NODISCARD _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference access(data_handle_type ptr, size_t i) const noexcept
{
auto ptr1 = ::cuda::std::assume_aligned<Alignment::align>(ptr);
if constexpr (_is_const_elem)
{
NV_IF_ELSE_TARGET(
NV_IS_HOST,
(static_assert(
::cuda::std::is_same_v<Eviction, eviction_none_t> && ::cuda::std::is_same_v<Prefetch, no_prefetch_t>);
return ptr1[i];),
(return ::cuda::experimental::load(ptr1 + i, read_only, Eviction{}, Prefetch{});));
}
else
{
return reference{ptr1 + i};
}
}

_CCCL_NODISCARD _CCCL_HOST_DEVICE _CCCL_FORCEINLINE data_handle_type
offset(data_handle_type ptr, size_t i) const noexcept
{
return ::cuda::std::assume_aligned<Alignment::align>(ptr) + i;
}
};

/***********************************************************************************************************************
* make_accessor_with_properties() / add_properties()
**********************************************************************************************************************/

template <typename ElementType, typename... UserProperties>
_CCCL_NODISCARD _CCCL_HOST_DEVICE _CCCL_FORCEINLINE auto make_accessor_with_properties(UserProperties...) noexcept
{
using namespace detail;
using Restrict = typename find_aliasing_policy<UserProperties...>::type;
using Alignment = typename find_alignment<alignof(ElementType), UserProperties...>::type;
using Eviction = typename find_eviction_policy<UserProperties...>::type;
using Prefetch = typename find_prefetch_size<UserProperties...>::type;
static_assert(count_aliasing<UserProperties...>::value <= 1, "Duplicate aliasing policy found");
static_assert(count_alignment<UserProperties...>::value <= 1, "Duplicate alignment found");
static_assert(count_eviction<UserProperties...>::value <= 1, "Duplicate eviction policy found");
static_assert(count_prefetch<UserProperties...>::value <= 1, "Duplicate prefetch policy found");
return accessor_with_properties<ElementType, Restrict, Alignment, Eviction, Prefetch>();
}

template <typename E, typename T, typename L, typename A, typename... UserProperties>
_CCCL_NODISCARD _CCCL_HOST_DEVICE _CCCL_FORCEINLINE auto
add_properties(::cuda::std::mdspan<T, E, L, A> mdspan, UserProperties... properties) noexcept
{
static_assert(::cuda::std::is_same_v<A, ::cuda::std::default_accessor<T>>, "requires default_accessor");
auto accessor = ::cuda::experimental::make_accessor_with_properties<T>(properties...);
return ::cuda::std::mdspan{mdspan.data_handle(), mdspan.mapping(), accessor};
}

/***********************************************************************************************************************
* Predefined Accessors with Properties
**********************************************************************************************************************/

template <typename T>
using streaming_accessor = decltype(make_accessor_with_properties<const T>(::cuda::experimental::eviction_no_alloc));

template <typename T>
using cache_all_accessor = decltype(make_accessor_with_properties<T>(::cuda::experimental::eviction_last));

} // namespace cuda::experimental

#endif // _CUDAX__MDSPAN_ACCESSOR_WITH_PROPERTIES_H
118 changes: 118 additions & 0 deletions cudax/include/cuda/experimental/__mdspan/load_store.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDAX_CUDA_LOAD_STORE
#define __CUDAX_CUDA_LOAD_STORE

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/ptx>
#include <cuda/std/type_traits>

#include <cuda/experimental/__mdspan/properties.cuh>

#define _CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, EVICT_POLICY, PREFETCH) \
switch (PREFETCH) \
{ \
case no_prefetch: \
return ::cuda::ptx::ld_global_##LOAD_BEHAVIOR##EVICT_POLICY(ptr); \
case prefetch_64B: \
return ::cuda::ptx::ld_global_##LOAD_BEHAVIOR##EVICT_POLICY##_L2_64B(ptr); \
case prefetch_128B: \
return ::cuda::ptx::ld_global_##LOAD_BEHAVIOR##EVICT_POLICY##_L2_128B(ptr); \
case prefetch_256B: \
return ::cuda::ptx::ld_global_##LOAD_BEHAVIOR##EVICT_POLICY##_L2_256B(ptr); \
}

#define _CUDAX_LOAD_ADD_EVICTION_POLICY(LOAD_BEHAVIOR, EVICTION_POLICY, PREFETCH) \
switch (EVICTION_POLICY) \
{ \
case eviction_normal: \
_CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, eviction_normal, PREFETCH); \
case eviction_first: \
_CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, eviction_first, PREFETCH); \
case eviction_last: \
_CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, eviction_last, PREFETCH); \
case eviction_last_use: \
_CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, eviction_last_use, PREFETCH); \
case eviction_no_alloc: \
_CUDAX_LOAD_ADD_PREFETCH(LOAD_BEHAVIOR, eviction_no_alloc, PREFETCH); \
}

namespace cuda::experimental
{

template <typename T, LoadBehavior B, EvictionPolicyEnum E, PrefetchSizeEnum P>
_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE T
load(const T* ptr,
load_behavior_t<B> load_behavior = read_write,
eviction_policy_t<E> eviction_policy = eviction_normal,
prefetch_t<P> prefetch = no_prefetch) noexcept
{
#if _CCCL_PTX_LD_ENABLED

if constexpr (eviction_policy == eviction_none && prefetch == no_prefetch)
{
return *ptr; // do not skip NVVM
}
else
{
if constexpr (load_behavior == read_write)
{
_CUDAX_LOAD_ADD_EVICTION_POLICY(, eviction_policy, prefetch);
}
else
{
_CUDAX_LOAD_ADD_EVICTION_POLICY(nc, eviction_policy, prefetch);
}
}
#else
return *ptr;
#endif
}

template <typename T, LoadBehavior B, EvictionPolicyEnum E, PrefetchSizeEnum P>
_CCCL_DEVICE _CCCL_FORCEINLINE void
store(T value, T* ptr, eviction_policy_t<E> eviction_policy = eviction_normal) noexcept
{
static_assert(!::cuda::std::is_const_v<T>);
#if _CCCL_PTX_ST_ENABLED
switch (eviction_policy)
{
case eviction_none:
*ptr = value;
case eviction_first:
::cuda::ptx::st_global_eviction_first(ptr, value);
case eviction_last:
::cuda::ptx::st_global_eviction_last(ptr, value);
case eviction_last_use:
::cuda::ptx::st_global_eviction_last_use(ptr, value);
case eviction_no_alloc:
::cuda::ptx::st_global_eviction_no_alloc(ptr, value);
}
#else
*ptr = value;
#endif
}

} // namespace cuda::experimental

#undef _CUDAX_LOAD_ADD_PREFETCH
#undef _CUDAX_LOAD_ADD_EVICTION_POLICY

#endif // __CUDAX_CUDA_LOAD_STORE
Loading

0 comments on commit d4b987f

Please sign in to comment.