From bad68138832d592b965657d82c59f0d03934087d Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Fri, 10 Jan 2025 20:38:19 +0100
Subject: [PATCH 1/2] refactor `<cuda/std/cstdlib>`

---
 .../include/cuda/std/__cstdlib/malloc.h       |  78 +++++++++
 libcudacxx/include/cuda/std/cstdlib           |   5 +-
 .../cuda/std/detail/libcxx/include/cstdlib    | 148 ------------------
 .../cstdlib/aligned_alloc.pass.cpp            |  86 ++++++++++
 .../language.support/cstdlib/calloc.pass.cpp  | 102 ++++++++++++
 5 files changed, 270 insertions(+), 149 deletions(-)
 create mode 100644 libcudacxx/include/cuda/std/__cstdlib/malloc.h
 delete mode 100644 libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
 create mode 100644 libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
 create mode 100644 libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp
diff --git a/libcudacxx/include/cuda/std/__cstdlib/malloc.h b/libcudacxx/include/cuda/std/__cstdlib/malloc.h
new file mode 100644
index 00000000000..90c97c987ed
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__cstdlib/malloc.h
@@ -0,0 +1,78 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___CSTDLIB_MALLOC_H
+#define _LIBCUDACXX___CSTDLIB_MALLOC_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cstddef/types.h>
+
+#if !_CCCL_COMPILER(NVRTC)
+#  include <cstdlib>
+#endif // !_CCCL_COMPILER(NVRTC)
+
+#include <nv/target>
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+using ::free;
+using ::malloc;
+
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size) noexcept
+{
+  void* __ptr{};
+
+  NV_IF_ELSE_TARGET(
+    NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) {
+      __ptr = ::malloc(__nbytes);
+      if (__ptr != nullptr)
+      {
+        ::memset(__ptr, 0, __nbytes);
+      }
+    }))
+
+  return __ptr;
+}
+
+#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
+#  define _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST 1
+#  define _LIBCUDACXX_ALIGNED_ALLOC_HOST     _CCCL_HOST
+#else
+#  define _LIBCUDACXX_ALIGNED_ALLOC_HOST
+#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
+
+#if _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)
+#  define _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE 1
+#  define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE     _CCCL_DEVICE
+#else
+#  define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
+#endif // _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)
+
+#define _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE _LIBCUDACXX_ALIGNED_ALLOC_HOST _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
+
+_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void*
+aligned_alloc(size_t __nbytes, size_t __align) noexcept
+{
+  NV_IF_TARGET(
+    NV_IS_HOST, (return ::aligned_alloc(__align, __nbytes);), (return ::__nv_aligned_device_malloc(__nbytes, __align);))
+}
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#endif // _LIBCUDACXX___CSTDLIB_MALLOC_H
diff --git a/libcudacxx/include/cuda/std/cstdlib b/libcudacxx/include/cuda/std/cstdlib
index 05574706ea0..48f090d5c05 100644
--- a/libcudacxx/include/cuda/std/cstdlib
+++ b/libcudacxx/include/cuda/std/cstdlib
@@ -23,7 +23,10 @@
 
 _CCCL_PUSH_MACROS
 
-#include <cuda/std/detail/libcxx/include/cstdlib>
+#include <cuda/std/__cstdlib/abs.h>
+#include <cuda/std/__cstdlib/div.h>
+#include <cuda/std/__cstdlib/malloc.h>
+#include <cuda/std/version>
 
 _CCCL_POP_MACROS
 
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib b/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
deleted file mode 100644
index cceb2513b22..00000000000
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib
+++ /dev/null
@@ -1,148 +0,0 @@
-// -*- C++ -*-
-//===--------------------------- cstdlib ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCUDACXX_CSTDLIB
-#define _LIBCUDACXX_CSTDLIB
-
-/*
-    cstdlib synopsis
-
-Macros:
-
-    EXIT_FAILURE
-    EXIT_SUCCESS
-    MB_CUR_MAX
-    NULL
-    RAND_MAX
-
-namespace std
-{
-
-Types:
-
-    size_t
-    div_t
-    ldiv_t
-    lldiv_t                                                               // C99
-
-double    atof (const char* nptr);
-int       atoi (const char* nptr);
-long      atol (const char* nptr);
-long long atoll(const char* nptr);                                        // C99
-double             strtod  (const char* restrict nptr, char** restrict endptr);
-float              strtof  (const char* restrict nptr, char** restrict endptr); // C99
-long double        strtold (const char* restrict nptr, char** restrict endptr); // C99
-long               strtol  (const char* restrict nptr, char** restrict endptr, int base);
-long long          strtoll (const char* restrict nptr, char** restrict endptr, int base); // C99
-unsigned long      strtoul (const char* restrict nptr, char** restrict endptr, int base);
-unsigned long long strtoull(const char* restrict nptr, char** restrict endptr, int base); // C99
-int rand();
-void srand(unsigned int seed);
-void* calloc(size_t nmemb, size_t size);
-void free(void* ptr);
-void* malloc(size_t size);
-void* realloc(void* ptr, size_t size);
-void abort();
-int atexit(void (*func)(void));
-void exit(int status);
-void _Exit(int status);
-char* getenv(const char* name);
-int system(const char* string);
-void* bsearch(const void* key, const void* base, size_t nmemb, size_t size,
-              int (*compar)(const void *, const void *));
-void qsort(void* base, size_t nmemb, size_t size,
-           int (*compar)(const void *, const void *));
-int         abs(      int j);
-long        abs(     long j);
-long long   abs(long long j);                                             // C++0X
-long       labs(     long j);
-long long llabs(long long j);                                             // C99
-div_t     div(      int numer,       int denom);
-ldiv_t    div(     long numer,      long denom);
-lldiv_t   div(long long numer, long long denom);                          // C++0X
-ldiv_t   ldiv(     long numer,      long denom);
-lldiv_t lldiv(long long numer, long long denom);                          // C99
-int mblen(const char* s, size_t n);
-int mbtowc(wchar_t* restrict pwc, const char* restrict s, size_t n);
-int wctomb(char* s, wchar_t wchar);
-size_t mbstowcs(wchar_t* restrict pwcs, const char* restrict s, size_t n);
-size_t wcstombs(char* restrict s, const wchar_t* restrict pwcs, size_t n);
-int at_quick_exit(void (*func)(void))                                     // C++11
-void quick_exit(int status);                                              // C++11
-void *aligned_alloc(size_t alignment, size_t size);                       // C11
-
-}  // std
-
-*/
-
-#include <cuda/std/detail/__config>
-
-#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
-#  pragma GCC system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
-#  pragma clang system_header
-#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
-#  pragma system_header
-#endif // no system header
-
-#if !_CCCL_COMPILER(NVRTC)
-#  include <cstdlib>
-#endif // !_CCCL_COMPILER(NVRTC)
-
-#include <cuda/std/__cstdlib/abs.h>
-#include <cuda/std/__cstdlib/div.h>
-
-_CCCL_PUSH_MACROS
-
-_LIBCUDACXX_BEGIN_NAMESPACE_STD
-
-#if !_CCCL_COMPILER(NVRTC)
-using ::_Exit;
-using ::abort;
-using ::atexit;
-using ::atof;
-using ::atoi;
-using ::atol;
-using ::atoll;
-using ::calloc;
-using ::exit;
-using ::free;
-using ::malloc;
-using ::rand;
-using ::realloc;
-using ::size_t;
-using ::srand;
-using ::strtod;
-using ::strtof;
-using ::strtol;
-using ::strtold;
-using ::strtoll;
-using ::strtoul;
-using ::strtoull;
-#  ifndef _LIBCUDACXX_WINDOWS_STORE_APP
-using ::getenv;
-using ::system;
-#  endif // _LIBCUDACXX_WINDOWS_STORE_APP
-using ::bsearch;
-using ::mblen;
-using ::mbstowcs;
-using ::mbtowc;
-using ::qsort;
-using ::wcstombs;
-using ::wctomb;
-#  if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
-using ::aligned_alloc;
-#  endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
-#endif // !defined(_CCCL_COMPILER_NVRTC)
-
-_LIBCUDACXX_END_NAMESPACE_STD
-
-_CCCL_POP_MACROS
-
-#endif // _LIBCUDACXX_CSTDLIB
diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
new file mode 100644
index 00000000000..ca4ca74d9de
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
@@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/std/cstdlib>
+#include <cuda/std/limits>
+
+#include "test_macros.h"
+
+template <class T>
+_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
+test_aligned_alloc_success(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
+{
+#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
+  static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), "");
+
+  T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
+
+  // check that the memory was allocated
+  assert(ptr != nullptr);
+
+  // check memory alignment
+  assert(((align - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);
+
+  cuda::std::free(ptr);
+#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
+}
+
+template <class T>
+_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
+test_aligned_alloc_fail(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
+{
+#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
+  T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
+
+  // check that the memory allocation failed
+  assert(ptr == nullptr);
+#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(C
+}
+
+struct BigStruct
+{
+  int data[32];
+};
+
+struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct
+{
+  char data[32];
+};
+
+struct TEST_ALIGNAS(128) OverAlignedStruct
+{
+  char data[32];
+};
+
+_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void test()
+{
+  test_aligned_alloc_success<int>(10, 4);
+  test_aligned_alloc_success<char>(128, 8);
+  test_aligned_alloc_success<double>(8, 32);
+  test_aligned_alloc_success<BigStruct>(4, 128);
+  test_aligned_alloc_success<AlignedStruct>(16);
+  test_aligned_alloc_success<OverAlignedStruct>(1);
+  test_aligned_alloc_success<OverAlignedStruct>(1, 256);
+
+  test_aligned_alloc_fail<int>(10, 3);
+}
+
+int main(int, char**)
+{
+#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
+  NV_IF_TARGET(NV_IS_HOST, test();)
+#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
+#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE
+  NV_IF_TARGET(NV_IS_DEVICE, test();)
+#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE
+
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp
new file mode 100644
index 00000000000..c17978e7117
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/calloc.pass.cpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/std/cstdlib>
+#include <cuda/std/limits>
+
+#include "test_macros.h"
+
+template <class T>
+__host__ __device__ void test_calloc_success(cuda::std::size_t n)
+{
+  T* ptr = static_cast<T*>(cuda::std::calloc(n, sizeof(T)));
+
+  // check that the memory was allocated
+  assert(ptr != nullptr);
+
+  // check that the memory is zeroed
+  for (cuda::std::size_t i = 0; i < n; ++i)
+  {
+    assert(ptr[i] == T{});
+  }
+
+  // check memory alignment
+  assert(((TEST_ALIGNOF(T) - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);
+
+  cuda::std::free(ptr);
+}
+
+template <class T>
+__host__ __device__ void test_calloc_fail(cuda::std::size_t n)
+{
+  T* ptr = static_cast<T*>(cuda::std::calloc(n, sizeof(T)));
+
+  // check that the memory was not allocated
+  assert(ptr == nullptr);
+}
+
+struct BigStruct
+{
+  static constexpr cuda::std::size_t n = 32;
+
+  int data[n];
+
+  __host__ __device__ bool operator==(const BigStruct& other) const
+  {
+    for (cuda::std::size_t i{}; i < n; ++i)
+    {
+      if (data[i] != other.data[i])
+      {
+        return false;
+      }
+    }
+
+    return true;
+  }
+};
+
+struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct
+{
+  static constexpr cuda::std::size_t n = 32;
+
+  char data[n];
+
+  __host__ __device__ bool operator==(const AlignedStruct& other) const
+  {
+    for (cuda::std::size_t i{}; i < n; ++i)
+    {
+      if (data[i] != other.data[i])
+      {
+        return false;
+      }
+    }
+
+    return true;
+  }
+};
+
+__host__ __device__ void test()
+{
+  test_calloc_success<int>(10);
+  test_calloc_success<char>(128);
+  test_calloc_success<double>(8);
+  test_calloc_success<BigStruct>(4);
+  test_calloc_success<AlignedStruct>(16);
+
+  test_calloc_fail<int>(cuda::std::numeric_limits<cuda::std::size_t>::max());
+}
+
+int main(int, char**)
+{
+  test();
+
+  return 0;
+}

From 5fe1416ecd4ee5d3e1cb668e44756bc4529fd2c5 Mon Sep 17 00:00:00 2001
From: David Bayer <bayer.david99@gmail.com>
Date: Wed, 15 Jan 2025 11:41:24 +0100
Subject: [PATCH 2/2] fix review

---
 .../include/cuda/std/__cstdlib/malloc.h       | 46 ++++++----
 .../cstdlib/aligned_alloc.pass.cpp            | 92 +++++++++++--------
 2 files changed, 81 insertions(+), 57 deletions(-)

diff --git a/libcudacxx/include/cuda/std/__cstdlib/malloc.h b/libcudacxx/include/cuda/std/__cstdlib/malloc.h
index 90c97c987ed..b51fb8e48aa 100644
--- a/libcudacxx/include/cuda/std/__cstdlib/malloc.h
+++ b/libcudacxx/include/cuda/std/__cstdlib/malloc.h
@@ -40,7 +40,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size
 
   NV_IF_ELSE_TARGET(
     NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) {
-      __ptr = ::malloc(__nbytes);
+      __ptr = _CUDA_VSTD::malloc(__nbytes);
       if (__ptr != nullptr)
       {
         ::memset(__ptr, 0, __nbytes);
@@ -50,27 +50,37 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size
   return __ptr;
 }
 
-#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
-#  define _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST 1
-#  define _LIBCUDACXX_ALIGNED_ALLOC_HOST     _CCCL_HOST
-#else
-#  define _LIBCUDACXX_ALIGNED_ALLOC_HOST
-#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
-
-#if _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)
-#  define _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE 1
-#  define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE     _CCCL_DEVICE
+_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_HOST void* __aligned_alloc_host(size_t __nbytes, size_t __align) noexcept
+{
+  void* __ptr{};
+#if _CCCL_STD_VER < 2017
+  _CCCL_ASSERT(false, "Use of aligned_alloc in host code is only available in C++17 and later");
+#elif _CCCL_COMPILER(MSVC)
+  _CCCL_ASSERT(false, "Use of aligned_alloc in host code is not supported with MSVC");
 #else
-#  define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
-#endif // _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)
+  __ptr = ::aligned_alloc(__align, __nbytes);
+#endif
+  return __ptr;
+}
 
-#define _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE _LIBCUDACXX_ALIGNED_ALLOC_HOST _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
+#if _CCCL_HAS_CUDA_COMPILER
+_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_DEVICE void* __aligned_alloc_device(size_t __nbytes, size_t __align) noexcept
+{
+  void* __ptr{};
+#  if _CCCL_CUDA_COMPILER(CLANG)
+  _CCCL_ASSERT(false, "Use of aligned_alloc in device code is not supported with clang-cuda");
+#  else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
+  __ptr = ::__nv_aligned_device_malloc(__nbytes, __align);
+#  endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^
+  return __ptr;
+}
+#endif // _CCCL_HAS_CUDA_COMPILER
 
-_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void*
-aligned_alloc(size_t __nbytes, size_t __align) noexcept
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* aligned_alloc(size_t __nbytes, size_t __align) noexcept
 {
-  NV_IF_TARGET(
-    NV_IS_HOST, (return ::aligned_alloc(__align, __nbytes);), (return ::__nv_aligned_device_malloc(__nbytes, __align);))
+  NV_IF_ELSE_TARGET(NV_IS_HOST,
+                    (return _CUDA_VSTD::__aligned_alloc_host(__nbytes, __align);),
+                    (return _CUDA_VSTD::__aligned_alloc_device(__nbytes, __align);))
 }
 
 _LIBCUDACXX_END_NAMESPACE_STD
diff --git a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
index ca4ca74d9de..eb2b20374b2 100644
--- a/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/language.support/cstdlib/aligned_alloc.pass.cpp
@@ -7,42 +7,43 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Disable CCCL assertions in this test to test the erroneous behavior
+#undef CCCL_ENABLE_ASSERTIONS
+
 #include <cuda/std/cassert>
 #include <cuda/std/cstdint>
 #include <cuda/std/cstdlib>
 #include <cuda/std/limits>
+#include <cuda/std/type_traits>
 
 #include "test_macros.h"
+#include <nv/target>
 
 template <class T>
-_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
-test_aligned_alloc_success(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
+__host__ __device__ void
+test_aligned_alloc(bool expect_success, cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
 {
-#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
-  static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), "");
+  if (expect_success)
+  {
+    static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), "");
 
-  T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
+    T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
 
-  // check that the memory was allocated
-  assert(ptr != nullptr);
+    // check that the memory was allocated
+    assert(ptr != nullptr);
 
-  // check memory alignment
-  assert(((align - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);
+    // check memory alignment
+    assert(((align - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);
 
-  cuda::std::free(ptr);
-#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
-}
+    cuda::std::free(ptr);
+  }
+  else
+  {
+    T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
 
-template <class T>
-_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
-test_aligned_alloc_fail(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
-{
-#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
-  T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
-
-  // check that the memory allocation failed
-  assert(ptr == nullptr);
-#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(C
+    // check that the memory allocation failed
+    assert(ptr == nullptr);
+  }
 }
 
 struct BigStruct
@@ -60,27 +61,40 @@ struct TEST_ALIGNAS(128) OverAlignedStruct
   char data[32];
 };
 
-_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void test()
+__host__ __device__ bool should_expect_success()
+{
+  bool host_has_aligned_alloc = false;
+#if TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
+  host_has_aligned_alloc = true;
+#endif // ^^^ TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) ^^^
+
+  bool device_has_aligned_alloc = false;
+#if !_CCCL_CUDA_COMPILER(CLANG)
+  device_has_aligned_alloc = true;
+#endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^
+
+  unused(host_has_aligned_alloc, device_has_aligned_alloc);
+
+  NV_IF_ELSE_TARGET(NV_IS_HOST, (return host_has_aligned_alloc;), (return device_has_aligned_alloc;))
+}
+
+__host__ __device__ void test()
 {
-  test_aligned_alloc_success<int>(10, 4);
-  test_aligned_alloc_success<char>(128, 8);
-  test_aligned_alloc_success<double>(8, 32);
-  test_aligned_alloc_success<BigStruct>(4, 128);
-  test_aligned_alloc_success<AlignedStruct>(16);
-  test_aligned_alloc_success<OverAlignedStruct>(1);
-  test_aligned_alloc_success<OverAlignedStruct>(1, 256);
-
-  test_aligned_alloc_fail<int>(10, 3);
+  const bool expect_success = should_expect_success();
+
+  test_aligned_alloc<int>(expect_success, 10, 4);
+  test_aligned_alloc<char>(expect_success, 128, 8);
+  test_aligned_alloc<double>(expect_success, 8, 32);
+  test_aligned_alloc<BigStruct>(expect_success, 4, 128);
+  test_aligned_alloc<AlignedStruct>(expect_success, 16);
+  test_aligned_alloc<OverAlignedStruct>(expect_success, 1);
+  test_aligned_alloc<OverAlignedStruct>(expect_success, 1, 256);
+
+  test_aligned_alloc<int>(false, 10, 3);
 }
 
 int main(int, char**)
 {
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
-  NV_IF_TARGET(NV_IS_HOST, test();)
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
-#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE
-  NV_IF_TARGET(NV_IS_DEVICE, test();)
-#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE
-
+  test();
   return 0;
 }