Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor <cuda/std/cstdlib> #3339

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions libcudacxx/include/cuda/std/__cstdlib/malloc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___CSTDLIB_MALLOC_H
#define _LIBCUDACXX___CSTDLIB_MALLOC_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cstddef/types.h>

#if !_CCCL_COMPILER(NVRTC)
# include <cstdlib>
#endif // !_CCCL_COMPILER(NVRTC)

#include <nv/target>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

using ::free;
using ::malloc;

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI void* calloc(size_t __n, size_t __size) noexcept
{
void* __ptr{};

NV_IF_ELSE_TARGET(
NV_IS_HOST, (__ptr = ::calloc(__n, __size);), (size_t __nbytes = __n * __size; if (::__umul64hi(__n, __size) == 0) {
__ptr = ::malloc(__nbytes);
if (__ptr != nullptr)
{
::memset(__ptr, 0, __nbytes);
}
}))

return __ptr;
}

#if _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)
# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST 1
# define _LIBCUDACXX_ALIGNED_ALLOC_HOST _CCCL_HOST
#else
# define _LIBCUDACXX_ALIGNED_ALLOC_HOST
#endif // _CCCL_STD_VER >= 2017 && !_CCCL_COMPILER(MSVC)

#if _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)
# define _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE 1
# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE _CCCL_DEVICE
#else
# define _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
#endif // _CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG)

#define _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE _LIBCUDACXX_ALIGNED_ALLOC_HOST _LIBCUDACXX_ALIGNED_ALLOC_DEVICE
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate what the issue is here?

Shouldnt this rather be a _CCCL_EXECK_CHECK_DISABLE?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is quite simple, to use aligned_alloc

  • on host, we need C++17, but MSVC does not implement that at all
  • on device, we need support for __nv_aligned_device_malloc, which I did not a way to make it work with clang-cuda

Using _CCCL_EXECK_CHECK_DISABLE would probably solve the issue, too


_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void*
aligned_alloc(size_t __nbytes, size_t __align) noexcept
{
NV_IF_TARGET(
NV_IS_HOST, (return ::aligned_alloc(__align, __nbytes);), (return ::__nv_aligned_device_malloc(__nbytes, __align);))
}

_LIBCUDACXX_END_NAMESPACE_STD

#endif // _LIBCUDACXX___CSTDLIB_MALLOC_H
5 changes: 4 additions & 1 deletion libcudacxx/include/cuda/std/cstdlib
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@

_CCCL_PUSH_MACROS

#include <cuda/std/detail/libcxx/include/cstdlib>
#include <cuda/std/__cstdlib/abs.h>
#include <cuda/std/__cstdlib/div.h>
#include <cuda/std/__cstdlib/malloc.h>
#include <cuda/std/version>

_CCCL_POP_MACROS

Expand Down
148 changes: 0 additions & 148 deletions libcudacxx/include/cuda/std/detail/libcxx/include/cstdlib

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#include <cuda/std/cassert>
#include <cuda/std/cstdint>
#include <cuda/std/cstdlib>
#include <cuda/std/limits>

#include "test_macros.h"

template <class T>
_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
test_aligned_alloc_success(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
{
#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
static_assert(noexcept(cuda::std::aligned_alloc(n * sizeof(T), align)), "");

T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));

// check that the memory was allocated
assert(ptr != nullptr);

// check memory alignment
assert(((align - 1) & reinterpret_cast<cuda::std::uintptr_t>(ptr)) == 0);

cuda::std::free(ptr);
#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
}

template <class T>
_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void
test_aligned_alloc_fail(cuda::std::size_t n, cuda::std::size_t align = TEST_ALIGNOF(T))
{
#if (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(CLANG))
T* ptr = static_cast<T*>(cuda::std::aligned_alloc(n * sizeof(T), align));

// check that the memory allocation failed
assert(ptr == nullptr);
#endif // (TEST_STD_VER >= 17 && !_CCCL_COMPILER(MSVC)) || (_CCCL_HAS_CUDA_COMPILER && !_CCCL_CUDA_COMPILER(C
}

struct BigStruct
{
int data[32];
};

struct TEST_ALIGNAS(cuda::std::max_align_t) AlignedStruct
{
char data[32];
};

struct TEST_ALIGNAS(128) OverAlignedStruct
{
char data[32];
};

_LIBCUDACXX_ALIGNED_ALLOC_EXSPACE void test()
{
test_aligned_alloc_success<int>(10, 4);
test_aligned_alloc_success<char>(128, 8);
test_aligned_alloc_success<double>(8, 32);
test_aligned_alloc_success<BigStruct>(4, 128);
test_aligned_alloc_success<AlignedStruct>(16);
test_aligned_alloc_success<OverAlignedStruct>(1);
test_aligned_alloc_success<OverAlignedStruct>(1, 256);

test_aligned_alloc_fail<int>(10, 3);
}

int main(int, char**)
{
#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
NV_IF_TARGET(NV_IS_HOST, test();)
#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_HOST
#if _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE
NV_IF_TARGET(NV_IS_DEVICE, test();)
#endif // _LIBCUDACXX_HAS_ALIGNED_ALLOC_DEVICE

return 0;
}
Loading
Loading