From bad68138832d592b965657d82c59f0d03934087d Mon Sep 17 00:00:00 2001 From: David Bayer Date: Fri, 10 Jan 2025 20:38:19 +0100 Subject: [PATCH 1/2] refactor `` --- .../include/cuda/std/__cstdlib/malloc.h | 78 +++++++++ libcudacxx/include/cuda/std/cstdlib | 5 +- .../cuda/std/detail/libcxx/include/cstdlib | 148 ------------------ .../cstdlib/aligned_alloc.pass.cpp | 86 ++++++++++ .../language.support/cstdlib/calloc.pass.cpp | 102 ++++++++++++ 5 files changed, 270 insertions(+), 149 deletions(-) create mode 100644 libcudacxx/include/cuda/std/__cstdlib/malloc.h delete mode 100644 libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib create mode 100644 libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp diff --git a/libcudacxx/include/cuda/std/__cstdlib/malloc.h b/libcudacxx/include/cuda/std/__cstdlib/malloc.h new file mode 100644 index 00000000000..90c97c987ed --- /dev/null +++ b/libcudacxx/include/cuda/std/__cstdlib/malloc.h @@ -0,0 +1,78 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___CSTDLIB_MALLOC_H +#define _LIBCUDACXX___CSTDLIB_MALLOC_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +#if !_CCCL_COMPILER(NVRTC) +# include +#endif // !_CCCL_COMPILER(NVRTC) + +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +using ::free; +using ::malloc; + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size) noexcept +{ + void* __ptr{}; + + NV_IF_ELSE_TARGET( + NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) { + __ptr = ::malloc(__nbytes); + if (__ptr != nullptr) + { + ::memset(__ptr, 0, __nbytes); + } + })) + + return __ptr; +} + +#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) +# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST 1 +# define _LIBCUDACXX_ALIGNED_ALLOC_HOST _CCCL_HOST +#else +# define _LIBCUDACXX_ALIGNED_ALLOC_HOST +#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) + +#if _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG) +# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE 1 +# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE _CCCL_DEVICE +#else +# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE +#endif // _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG) + +#define _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE _LIBCUDACXX_ALIGNED_ALLOC_HOST _LIBCUDACXX_ALIGNED_ALLOC_DEVICE + +_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void* +aligned_alloc(size_t __nbytes, size_t __align) noexcept +{ + NV_IF_TARGET( + NV_IS_HOST, (return ::aligned_alloc(__align, __nbytes);), (return ::__nv_aligned_device_malloc(__nbytes, __align);)) +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___CSTDLIB_MALLOC_H diff --git a/libcudacxx/include/cuda/std/cstdlib b/libcudacxx/include/cuda/std/cstdlib index 05574706ea0..48f090d5c05 100644 --- a/libcudacxx/include/cuda/std/cstdlib +++ b/libcudacxx/include/cuda/std/cstdlib @@ -23,7 +23,10 @@ _CCCL_PUSH_MACROS -#include +#include +#include +#include +#include _CCCL_POP_MACROS diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib b/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib deleted file mode 100644 index cceb2513b22..00000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib +++ /dev/null @@ -1,148 +0,0 @@ -// -*- C++ -*- -//===--------------------------- cstdlib ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_CSTDLIB -#define _LIBCUDACXX_CSTDLIB - -/* - cstdlib synopsis - -Macros: - - EXIT_FAILURE - EXIT_SUCCESS - MB_CUR_MAX - NULL - RAND_MAX - -namespace std -{ - -Types: - - size_t - div_t - ldiv_t - lldiv_t // C99 - -double atof (const char* nptr); -int atoi (const char* nptr); -long atol (const char* nptr); -long long atoll(const char* nptr); // C99 -double strtod (const char* restrict nptr, char** restrict endptr); -float strtof (const char* restrict nptr, char** restrict endptr); // C99 -long double strtold (const char* restrict nptr, char** restrict endptr); // C99 -long strtol (const char* restrict nptr, char** restrict endptr, int base); -long long strtoll (const char* restrict nptr, char** restrict endptr, int base); // C99 -unsigned long strtoul (const char* restrict nptr, char** restrict endptr, int base); -unsigned long long strtoull(const char* restrict nptr, char** restrict endptr, int base); // C99 -int rand(); -void srand(unsigned int seed); -void* calloc(size_t nmemb, size_t size); -void free(void* ptr); -void* malloc(size_t size); -void* realloc(void* ptr, size_t size); -void abort(); -int atexit(void (*func)(void)); -void exit(int status); -void _Exit(int status); -char* getenv(const char* name); -int system(const char* string); -void* bsearch(const void* key, const void* base, size_t nmemb, size_t size, - int (*compar)(const void *, const void *)); -void qsort(void* base, size_t nmemb, size_t size, - int (*compar)(const void *, const void *)); -int abs( int j); -long abs( long j); -long long abs(long long j); // C++0X -long labs( long j); -long long llabs(long long j); // C99 -div_t div( int numer, int denom); -ldiv_t div( long numer, long denom); -lldiv_t div(long long numer, long long denom); // C++0X -ldiv_t ldiv( long numer, long denom); -lldiv_t lldiv(long long numer, long long denom); // C99 -int mblen(const char* s, size_t n); -int mbtowc(wchar_t* restrict pwc, const char* restrict s, size_t n); -int wctomb(char* s, wchar_t wchar); -size_t mbstowcs(wchar_t* restrict pwcs, const char* restrict s, size_t n); -size_t wcstombs(char* restrict s, const wchar_t* restrict pwcs, size_t n); -int at_quick_exit(void (*func)(void)) // C++11 -void quick_exit(int status); // C++11 -void *aligned_alloc(size_t alignment, size_t size); // C11 - -} // std - -*/ - -#include - -#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -#endif // no system header - -#if !_CCCL_COMPILER(NVRTC) -# include -#endif // !_CCCL_COMPILER(NVRTC) - -#include -#include - -_CCCL_PUSH_MACROS - -_LIBCUDACXX_BEGIN_NAMESPACE_STD - -#if !_CCCL_COMPILER(NVRTC) -using ::_Exit; -using ::abort; -using ::atexit; -using ::atof; -using ::atoi; -using ::atol; -using ::atoll; -using ::calloc; -using ::exit; -using ::free; -using ::malloc; -using ::rand; -using ::realloc; -using ::size_t; -using ::srand; -using ::strtod; -using ::strtof; -using ::strtol; -using ::strtold; -using ::strtoll; -using ::strtoul; -using ::strtoull; -# ifndef _LIBCUDACXX_WINDOWS_STORE_APP -using ::getenv; -using ::system; -# endif // _LIBCUDACXX_WINDOWS_STORE_APP -using ::bsearch; -using ::mblen; -using ::mbstowcs; -using ::mbtowc; -using ::qsort; -using ::wcstombs; -using ::wctomb; -# if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) -using ::aligned_alloc; -# endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) -#endif // !defined(_CCCL_COMPILER_NVRTC) - -_LIBCUDACXX_END_NAMESPACE_STD - -_CCCL_POP_MACROS - -#endif // _LIBCUDACXX_CSTDLIB diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp new file mode 100644 index 00000000000..ca4ca74d9de --- /dev/null +++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include "test_macros.h" + +template +_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void +test_aligned_alloc_success(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T)) +{ +#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) + static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), ""); + + T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); + + // check that the memory was allocated + assert(ptr != nullptr); + + // check memory alignment + assert(((align - 1) & reinterpret_cast(ptr)) == 0); + + cuda::std::free(ptr); +#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) +} + +template +_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void +test_aligned_alloc_fail(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T)) +{ +#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) + T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); + + // check that the memory allocation failed + assert(ptr == nullptr); +#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(C +} + +struct BigStruct +{ + int data[32]; +}; + +struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct +{ + char data[32]; +}; + +struct TEST_ALIGNAS(128) OverAlignedStruct +{ + char data[32]; +}; + +_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void test() +{ + test_aligned_alloc_success(10, 4); + test_aligned_alloc_success(128, 8); + test_aligned_alloc_success(8, 32); + test_aligned_alloc_success(4, 128); + test_aligned_alloc_success(16); + test_aligned_alloc_success(1); + test_aligned_alloc_success(1, 256); + + test_aligned_alloc_fail(10, 3); +} + +int main(int, char**) +{ +#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST + NV_IF_TARGET(NV_IS_HOST, test();) +#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST +#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE + NV_IF_TARGET(NV_IS_DEVICE, test();) +#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE + + return 0; +} diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp new file mode 100644 index 00000000000..c17978e7117 --- /dev/null +++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp @@ -0,0 +1,102 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include "test_macros.h" + +template +__host__ __device__ void test_calloc_success(cuda::std::size_t n) +{ + T* ptr = static_cast(cuda::std::calloc(n, sizeof(T))); + + // check that the memory was allocated + assert(ptr != nullptr); + + // check that the memory is zeroed + for (cuda::std::size_t i = 0; i < n; ++i) + { + assert(ptr[i] == T{}); + } + + // check memory alignment + assert(((TEST_ALIGNOF(T) - 1) & reinterpret_cast(ptr)) == 0); + + cuda::std::free(ptr); +} + +template +__host__ __device__ void test_calloc_fail(cuda::std::size_t n) +{ + T* ptr = static_cast(cuda::std::calloc(n, sizeof(T))); + + // check that the memory was not allocated + assert(ptr == nullptr); +} + +struct BigStruct +{ + static constexpr cuda::std::size_t n = 32; + + int data[n]; + + __host__ __device__ bool operator==(const BigStruct& other) const + { + for (cuda::std::size_t i{}; i < n; ++i) + { + if (data[i] != other.data[i]) + { + return false; + } + } + + return true; + } +}; + +struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct +{ + static constexpr cuda::std::size_t n = 32; + + char data[n]; + + __host__ __device__ bool operator==(const AlignedStruct& other) const + { + for (cuda::std::size_t i{}; i < n; ++i) + { + if (data[i] != other.data[i]) + { + return false; + } + } + + return true; + } +}; + +__host__ __device__ void test() +{ + test_calloc_success(10); + test_calloc_success(128); + test_calloc_success(8); + test_calloc_success(4); + test_calloc_success(16); + + test_calloc_fail(cuda::std::numeric_limits::max()); +} + +int main(int, char**) +{ + test(); + + return 0; +} From 5fe1416ecd4ee5d3e1cb668e44756bc4529fd2c5 Mon Sep 17 00:00:00 2001 From: David Bayer Date: Wed, 15 Jan 2025 11:41:24 +0100 Subject: [PATCH 2/2] fix review --- .../include/cuda/std/__cstdlib/malloc.h | 46 ++++++---- .../cstdlib/aligned_alloc.pass.cpp | 92 +++++++++++-------- 2 files changed, 81 insertions(+), 57 deletions(-) diff --git a/libcudacxx/include/cuda/std/__cstdlib/malloc.h b/libcudacxx/include/cuda/std/__cstdlib/malloc.h index 90c97c987ed..b51fb8e48aa 100644 --- a/libcudacxx/include/cuda/std/__cstdlib/malloc.h +++ b/libcudacxx/include/cuda/std/__cstdlib/malloc.h @@ -40,7 +40,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size NV_IF_ELSE_TARGET( NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) { - __ptr = ::malloc(__nbytes); + __ptr = _CUDA_VSTD::malloc(__nbytes); if (__ptr != nullptr) { ::memset(__ptr, 0, __nbytes); @@ -50,27 +50,37 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size return __ptr; } -#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) -# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST 1 -# define _LIBCUDACXX_ALIGNED_ALLOC_HOST _CCCL_HOST -#else -# define _LIBCUDACXX_ALIGNED_ALLOC_HOST -#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) - -#if _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG) -# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE 1 -# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE _CCCL_DEVICE +_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_HOST void* __aligned_alloc_host(size_t __nbytes, size_t __align) noexcept +{ + void* __ptr{}; +#if _CCCL_STD_VER < 2017 + _CCCL_ASSERT(false, "Use of aligned_alloc in host code is only available in C++17 and later"); +#elif _CCCL_COMPILER(MSVC) + _CCCL_ASSERT(false, "Use of aligned_alloc in host code is not supported with MSVC"); #else -# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE -#endif // _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG) + __ptr = ::aligned_alloc(__align, __nbytes); +#endif + return __ptr; +} -#define _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE _LIBCUDACXX_ALIGNED_ALLOC_HOST _LIBCUDACXX_ALIGNED_ALLOC_DEVICE +#if _CCCL_HAS_CUDA_COMPILER +_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_DEVICE void* __aligned_alloc_device(size_t __nbytes, size_t __align) noexcept +{ + void* __ptr{}; +# if _CCCL_CUDA_COMPILER(CLANG) + _CCCL_ASSERT(false, "Use of aligned_alloc in device code is not supported with clang-cuda"); +# else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG) + __ptr = ::__nv_aligned_device_malloc(__nbytes, __align); +# endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^ + return __ptr; +} +#endif // _CCCL_HAS_CUDA_COMPILER -_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void* -aligned_alloc(size_t __nbytes, size_t __align) noexcept +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* aligned_alloc(size_t __nbytes, size_t __align) noexcept { - NV_IF_TARGET( - NV_IS_HOST, (return ::aligned_alloc(__align, __nbytes);), (return ::__nv_aligned_device_malloc(__nbytes, __align);)) + NV_IF_ELSE_TARGET(NV_IS_HOST, + (return _CUDA_VSTD::__aligned_alloc_host(__nbytes, __align);), + (return _CUDA_VSTD::__aligned_alloc_device(__nbytes, __align);)) } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp index ca4ca74d9de..eb2b20374b2 100644 --- a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp @@ -7,42 +7,43 @@ // //===----------------------------------------------------------------------===// +// Disable CCCL assertions in this test to test the erroneous behavior +#undef CCCL_ENABLE_ASSERTIONS + #include #include #include #include +#include #include "test_macros.h" +#include template -_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void -test_aligned_alloc_success(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T)) +__host__ __device__ void +test_aligned_alloc(bool expect_success, cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T)) { -#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) - static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), ""); + if (expect_success) + { + static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), ""); - T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); + T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); - // check that the memory was allocated - assert(ptr != nullptr); + // check that the memory was allocated + assert(ptr != nullptr); - // check memory alignment - assert(((align - 1) & reinterpret_cast(ptr)) == 0); + // check memory alignment + assert(((align - 1) & reinterpret_cast(ptr)) == 0); - cuda::std::free(ptr); -#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) -} + cuda::std::free(ptr); + } + else + { + T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); -template -_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void -test_aligned_alloc_fail(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T)) -{ -#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)) - T* ptr = static_cast(cuda::std::aligned_alloc(n * sizeof(T), align)); - - // check that the memory allocation failed - assert(ptr == nullptr); -#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(C + // check that the memory allocation failed + assert(ptr == nullptr); + } } struct BigStruct @@ -60,27 +61,40 @@ struct TEST_ALIGNAS(128) OverAlignedStruct char data[32]; }; -_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void test() +__host__ __device__ bool should_expect_success() +{ + bool host_has_aligned_alloc = false; +#if TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) + host_has_aligned_alloc = true; +#endif // ^^^ TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) ^^^ + + bool device_has_aligned_alloc = false; +#if !_CCCL_CUDA_COMPILER(CLANG) + device_has_aligned_alloc = true; +#endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^ + + unused(host_has_aligned_alloc, device_has_aligned_alloc); + + NV_IF_ELSE_TARGET(NV_IS_HOST, (return host_has_aligned_alloc;), (return device_has_aligned_alloc;)) +} + +__host__ __device__ void test() { - test_aligned_alloc_success(10, 4); - test_aligned_alloc_success(128, 8); - test_aligned_alloc_success(8, 32); - test_aligned_alloc_success(4, 128); - test_aligned_alloc_success(16); - test_aligned_alloc_success(1); - test_aligned_alloc_success(1, 256); - - test_aligned_alloc_fail(10, 3); + const bool expect_success = should_expect_success(); + + test_aligned_alloc(expect_success, 10, 4); + test_aligned_alloc(expect_success, 128, 8); + test_aligned_alloc(expect_success, 8, 32); + test_aligned_alloc(expect_success, 4, 128); + test_aligned_alloc(expect_success, 16); + test_aligned_alloc(expect_success, 1); + test_aligned_alloc(expect_success, 1, 256); + + test_aligned_alloc(false, 10, 3); } int main(int, char**) { -#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST - NV_IF_TARGET(NV_IS_HOST, test();) -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST -#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE - NV_IF_TARGET(NV_IS_DEVICE, test();) -#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE - + test(); return 0; }