NVIDIA · davebayer · Jan 10, 2025 · Jan 15, 2025
@@ -0,0 +1,88 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___CSTDLIB_MALLOC_H
+#define _LIBCUDACXX___CSTDLIB_MALLOC_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cstddef/types.h>
+
+#if !_CCCL_COMPILER(NVRTC)
+#  include <cstdlib>
+#endif // !_CCCL_COMPILER(NVRTC)
+
+#include <nv/target>
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+using ::free;
+using ::malloc;
+
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size) noexcept
+{
+  void* __ptr{};
+
+  NV_IF_ELSE_TARGET(
+    NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) {
+      __ptr = _CUDA_VSTD::malloc(__nbytes);
+      if (__ptr != nullptr)
+      {
+        ::memset(__ptr, 0, __nbytes);
+      }
+    }))
+
+  return __ptr;
+}
+
+_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_HOST void* __aligned_alloc_host(size_t __nbytes, size_t __align) noexcept
+{
+  void* __ptr{};
+#if _CCCL_STD_VER < 2017
+  _CCCL_ASSERT(false, "Use of aligned_alloc in host code is only available in C++17 and later");
+#elif _CCCL_COMPILER(MSVC)
+  _CCCL_ASSERT(false, "Use of aligned_alloc in host code is not supported with MSVC");
+#else
+  __ptr = ::aligned_alloc(__align, __nbytes);
+#endif
+  return __ptr;
+}
+
+#if _CCCL_HAS_CUDA_COMPILER
+_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_DEVICE void* __aligned_alloc_device(size_t __nbytes, size_t __align) noexcept
+{
+  void* __ptr{};
+#  if _CCCL_CUDA_COMPILER(CLANG)
+  _CCCL_ASSERT(false, "Use of aligned_alloc in device code is not supported with clang-cuda");
+#  else // ^^^ _CCCL_CUDA_COMPILER(CLANG) ^^^ / vvv !_CCCL_CUDA_COMPILER(CLANG)
+  __ptr = ::__nv_aligned_device_malloc(__nbytes, __align);
+#  endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^
+  return __ptr;
+}
+#endif // _CCCL_HAS_CUDA_COMPILER
+
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* aligned_alloc(size_t __nbytes, size_t __align) noexcept
+{
+  NV_IF_ELSE_TARGET(NV_IS_HOST,
+                    (return _CUDA_VSTD::__aligned_alloc_host(__nbytes, __align);),
+                    (return _CUDA_VSTD::__aligned_alloc_device(__nbytes, __align);))
+}
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#endif // _LIBCUDACXX___CSTDLIB_MALLOC_H
@@ -23,7 +23,10 @@
 
 _CCCL_PUSH_MACROS
 
-#include <cuda/std/detail/libcxx/include/cstdlib>
+#include <cuda/std/__cstdlib/abs.h>
+#include <cuda/std/__cstdlib/div.h>
+#include <cuda/std/__cstdlib/malloc.h>
+#include <cuda/std/version>
 
 _CCCL_POP_MACROS
 

@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// Disable CCCL assertions in this test to test the erroneous behavior
+#undef CCCL_ENABLE_ASSERTIONS
+
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/std/cstdlib>
+#include <cuda/std/limits>
+#include <cuda/std/type_traits>
+
+#include "test_macros.h"
+#include <nv/target>
+
+template <class T>
+__host__ __device__ void
+test_aligned_alloc(bool expect_success, cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
+{
+  if (expect_success)
+  {
+    static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), "");
+
+    T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
+
+    // check that the memory was allocated
+    assert(ptr != nullptr);
+
+    // check memory alignment
+    assert(((align - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);
+
+    cuda::std::free(ptr);
+  }
+  else
+  {
+    T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));
+
+    // check that the memory allocation failed
+    assert(ptr == nullptr);
+  }
+}
+
+struct BigStruct
+{
+  int data[32];
+};
+
+struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct
+{
+  char data[32];
+};
+
+struct TEST_ALIGNAS(128) OverAlignedStruct
+{
+  char data[32];
+};
+
+__host__ __device__ bool should_expect_success()
+{
+  bool host_has_aligned_alloc = false;
+#if TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
+  host_has_aligned_alloc = true;
+#endif // ^^^ TEST_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC) ^^^
+
+  bool device_has_aligned_alloc = false;
+#if !_CCCL_CUDA_COMPILER(CLANG)
+  device_has_aligned_alloc = true;
+#endif // ^^^ !_CCCL_CUDA_COMPILER(CLANG) ^^^
+
+  unused(host_has_aligned_alloc, device_has_aligned_alloc);
+
+  NV_IF_ELSE_TARGET(NV_IS_HOST, (return host_has_aligned_alloc;), (return device_has_aligned_alloc;))
+}
+
+__host__ __device__ void test()
+{
+  const bool expect_success = should_expect_success();
+
+  test_aligned_alloc<int>(expect_success, 10, 4);
+  test_aligned_alloc<char>(expect_success, 128, 8);
+  test_aligned_alloc<double>(expect_success, 8, 32);
+  test_aligned_alloc<BigStruct>(expect_success, 4, 128);
+  test_aligned_alloc<AlignedStruct>(expect_success, 16);
+  test_aligned_alloc<OverAlignedStruct>(expect_success, 1);
+  test_aligned_alloc<OverAlignedStruct>(expect_success, 1, 256);
+
+  test_aligned_alloc<int>(false, 10, 3);
+}
+
+int main(int, char**)
+{
+  test();
+  return 0;
+}