diff --git a/libcudacxx/include/cuda/std/__cccl/builtin.h b/libcudacxx/include/cuda/std/__cccl/builtin.h index 20a4fd3fbb..d7e8723b86 100644 --- a/libcudacxx/include/cuda/std/__cccl/builtin.h +++ b/libcudacxx/include/cuda/std/__cccl/builtin.h @@ -150,6 +150,20 @@ # undef _CCCL_BUILTIN_BSWAP128 #endif // _CCCL_CUDA_COMPILER(NVCC) +#if _CCCL_CHECK_BUILTIN(builtin_cbrt) || _CCCL_COMPILER(GCC) +# define _CCCL_BUILTIN_CBRTF(...) __builtin_cbrtf(__VA_ARGS__) +# define _CCCL_BUILTIN_CBRT(...) __builtin_cbrt(__VA_ARGS__) +# define _CCCL_BUILTIN_CBRTL(...) __builtin_cbrtl(__VA_ARGS__) +#endif // _CCCL_CHECK_BUILTIN(builtin_cbrt) + +// Below 11.7 nvcc treats the builtin as a host only function +// clang-cuda fails with fatal error: error in backend: Undefined external symbol "cbrt" +#if _CCCL_CUDACC_BELOW(11, 7) || _CCCL_CUDA_COMPILER(CLANG) +# undef _CCCL_BUILTIN_CBRTF +# undef _CCCL_BUILTIN_CBRT +# undef _CCCL_BUILTIN_CBRTL +#endif // _CCCL_CUDACC_BELOW(11, 7) || _CCCL_CUDA_COMPILER(CLANG) + #if _CCCL_CHECK_BUILTIN(builtin_ceil) || _CCCL_COMPILER(GCC) # define _CCCL_BUILTIN_CEILF(...) __builtin_ceilf(__VA_ARGS__) # define _CCCL_BUILTIN_CEIL(...) __builtin_ceil(__VA_ARGS__) @@ -576,6 +590,19 @@ # undef _CCCL_BUILTIN_SIGNBIT #endif // _CCCL_CUDACC_BELOW(11, 7) +#if _CCCL_CHECK_BUILTIN(builtin_sqrt) || _CCCL_COMPILER(GCC) +# define _CCCL_BUILTIN_SQRTF(...) __builtin_sqrtf(__VA_ARGS__) +# define _CCCL_BUILTIN_SQRT(...) __builtin_sqrt(__VA_ARGS__) +# define _CCCL_BUILTIN_SQRTL(...) __builtin_sqrtl(__VA_ARGS__) +#endif // _CCCL_CHECK_BUILTIN(builtin_sqrt) + +// Below 11.7 nvcc treats the builtin as a host only function +#if _CCCL_CUDACC_BELOW(11, 7) +# undef _CCCL_BUILTIN_SQRTF +# undef _CCCL_BUILTIN_SQRT +# undef _CCCL_BUILTIN_SQRTL +#endif // _CCCL_CUDACC_BELOW(11, 7) + #if _CCCL_CHECK_BUILTIN(builtin_trunc) || _CCCL_COMPILER(GCC) # define _CCCL_BUILTIN_TRUNCF(...) __builtin_truncf(__VA_ARGS__) # define _CCCL_BUILTIN_TRUNC(...) __builtin_trunc(__VA_ARGS__) diff --git a/libcudacxx/include/cuda/std/__cmath/nvbf16.h b/libcudacxx/include/cuda/std/__cmath/nvbf16.h index 8f116968f8..3f8a26964d 100644 --- a/libcudacxx/include/cuda/std/__cmath/nvbf16.h +++ b/libcudacxx/include/cuda/std/__cmath/nvbf16.h @@ -70,11 +70,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 atan2(__nv_bfloat16 __x, __nv_bfloat16 _ return __float2bfloat16(::atan2f(__bfloat162float(__x), __bfloat162float(__y))); } -_LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 sqrt(__nv_bfloat16 __x) -{ - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2bfloat16(::sqrtf(__bfloat162float(__x)));)) -} - // floating point helper _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 __constexpr_copysign(__nv_bfloat16 __x, __nv_bfloat16 __y) noexcept { diff --git a/libcudacxx/include/cuda/std/__cmath/nvfp16.h b/libcudacxx/include/cuda/std/__cmath/nvfp16.h index dbcaebbb4e..2f1fe3d42a 100644 --- a/libcudacxx/include/cuda/std/__cmath/nvfp16.h +++ b/libcudacxx/include/cuda/std/__cmath/nvfp16.h @@ -135,11 +135,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __half atan2(__half __x, __half __y) return __float2half(::atan2f(__half2float(__x), __half2float(__y))); } -_LIBCUDACXX_HIDE_FROM_ABI __half sqrt(__half __x) -{ - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2half(::sqrtf(__half2float(__x)));)) -} - // floating point helper _LIBCUDACXX_HIDE_FROM_ABI __half __constexpr_copysign(__half __x, __half __y) noexcept { diff --git a/libcudacxx/include/cuda/std/__cmath/roots.h b/libcudacxx/include/cuda/std/__cmath/roots.h new file mode 100644 index 0000000000..0d2065dcf5 --- /dev/null +++ b/libcudacxx/include/cuda/std/__cmath/roots.h @@ -0,0 +1,171 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___CMATH_ROOTS_H +#define _LIBCUDACXX___CMATH_ROOTS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// sqrt + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float sqrt(float __x) noexcept +{ +#if defined(_CCCL_BUILTIN_SQRTF) + return _CCCL_BUILTIN_SQRTF(__x); +#else // ^^^ _CCCL_BUILTIN_SQRTF ^^^ // vvv !_CCCL_BUILTIN_SQRTF vvv + return ::sqrtf(__x); +#endif // !_CCCL_BUILTIN_SQRTF +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float sqrtf(float __x) noexcept +{ +#if defined(_CCCL_BUILTIN_SQRTF) + return _CCCL_BUILTIN_SQRTF(__x); +#else // ^^^ _CCCL_BUILTIN_SQRTF ^^^ // vvv !_CCCL_BUILTIN_SQRTF vvv + return ::sqrtf(__x); +#endif // !_CCCL_BUILTIN_SQRTF +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double sqrt(double __x) noexcept +{ +#if defined(_CCCL_BUILTIN_SQRT) + return _CCCL_BUILTIN_SQRT(__x); +#else // ^^^ _CCCL_BUILTIN_SQRT ^^^ // vvv !_CCCL_BUILTIN_SQRT vvv + return ::sqrt(__x); +#endif // !_CCCL_BUILTIN_SQRT +} + +#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double sqrt(long double __x) noexcept +{ +# if defined(_CCCL_BUILTIN_SQRTL) + return _CCCL_BUILTIN_SQRTL(__x); +# else // ^^^ _CCCL_BUILTIN_SQRTL ^^^ // vvv !_CCCL_BUILTIN_SQRTL vvv + return ::sqrtl(__x); +# endif // !_CCCL_BUILTIN_SQRTL +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double sqrtl(long double __x) noexcept +{ +# if defined(_CCCL_BUILTIN_SQRTL) + return _CCCL_BUILTIN_SQRTL(__x); +# else // ^^^ _CCCL_BUILTIN_SQRTL ^^^ // vvv !_CCCL_BUILTIN_SQRTL vvv + return ::sqrtl(__x); +# endif // !_CCCL_BUILTIN_SQRTL +} +#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE + +#if defined(_LIBCUDACXX_HAS_NVFP16) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __half sqrt(__half __x) noexcept +{ + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2half(_CUDA_VSTD::sqrt(__half2float(__x)));)) +} +#endif // _LIBCUDACXX_HAS_NVFP16 + +#if defined(_LIBCUDACXX_HAS_NVBF16) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 sqrt(__nv_bfloat16 __x) noexcept +{ + NV_IF_ELSE_TARGET( + NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2bfloat16(_CUDA_VSTD::sqrt(__bfloat162float(__x)));)) +} +#endif // _LIBCUDACXX_HAS_NVBF16 + +template = 0> +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double sqrt(_Integer __x) noexcept +{ + return _CUDA_VSTD::sqrt((double) __x); +} + +// cbrt + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float cbrt(float __x) noexcept +{ +#if defined(_CCCL_BUILTIN_CBRTF) + return _CCCL_BUILTIN_CBRTF(__x); +#else // ^^^ _CCCL_BUILTIN_CBRTF ^^^ // vvv !_CCCL_BUILTIN_CBRTF vvv + return ::cbrtf(__x); +#endif // !_CCCL_BUILTIN_CBRTF +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float cbrtf(float __x) noexcept +{ +#if defined(_CCCL_BUILTIN_CBRTF) + return _CCCL_BUILTIN_CBRTF(__x); +#else // ^^^ _CCCL_BUILTIN_CBRTF ^^^ // vvv !_CCCL_BUILTIN_CBRTF vvv + return ::cbrtf(__x); +#endif // !_CCCL_BUILTIN_CBRTF +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double cbrt(double __x) noexcept +{ +#if defined(_CCCL_BUILTIN_CBRT) + return _CCCL_BUILTIN_CBRT(__x); +#else // ^^^ _CCCL_BUILTIN_CBRT ^^^ // vvv !_CCCL_BUILTIN_CBRT vvv + return ::cbrt(__x); +#endif // !_CCCL_BUILTIN_CBRT +} + +#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double cbrt(long double __x) noexcept +{ +# if defined(_CCCL_BUILTIN_CBRTL) + return _CCCL_BUILTIN_CBRTL(__x); +# else // ^^^ _CCCL_BUILTIN_CBRTL ^^^ // vvv !_CCCL_BUILTIN_CBRTL vvv + return ::cbrtl(__x); +# endif // !_CCCL_BUILTIN_CBRTL +} + +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double cbrtl(long double __x) noexcept +{ +# if defined(_CCCL_BUILTIN_CBRTL) + return _CCCL_BUILTIN_CBRTL(__x); +# else // ^^^ _CCCL_BUILTIN_CBRTL ^^^ // vvv !_CCCL_BUILTIN_CBRTL vvv + return ::cbrtl(__x); +# endif // !_CCCL_BUILTIN_CBRTL +} +#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE + +#if defined(_LIBCUDACXX_HAS_NVFP16) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __half cbrt(__half __x) noexcept +{ + return __float2half(_CUDA_VSTD::cbrt(__half2float(__x))); +} +#endif // _LIBCUDACXX_HAS_NVFP16 + +#if defined(_LIBCUDACXX_HAS_NVBF16) +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 cbrt(__nv_bfloat16 __x) noexcept +{ + return __float2bfloat16(_CUDA_VSTD::cbrt(__bfloat162float(__x))); +} +#endif // _LIBCUDACXX_HAS_NVBF16 + +template = 0> +_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double cbrt(_Integer __x) noexcept +{ + return _CUDA_VSTD::cbrt((double) __x); +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___CMATH_ROOTS_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath index 88fd4d9aed..7ff5c2ea99 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath @@ -323,6 +323,7 @@ long double truncl(long double x); #include #include #include +#include #include #include #include @@ -371,8 +372,6 @@ using ::sinf; using ::sinh; using ::sinhf; -using ::sqrt; -using ::sqrtf; using ::tan; using ::tanf; @@ -413,8 +412,6 @@ using ::sinf; using ::sinh; using ::sinhf; -using ::sqrt; -using ::sqrtf; using ::tan; using ::tanf; @@ -427,8 +424,6 @@ using ::asinh; using ::asinhf; using ::atanh; using ::atanhf; -using ::cbrt; -using ::cbrtf; using ::copysign; using ::copysignf; @@ -476,13 +471,11 @@ using ::modfl; using ::powl; using ::sinhl; using ::sinl; -using ::sqrtl; using ::tanl; using ::acoshl; using ::asinhl; using ::atanhl; -using ::cbrtl; using ::tanhl; using ::copysignl; diff --git a/libcudacxx/test/libcudacxx/std/numerics/c.math/roots.pass.cpp b/libcudacxx/test/libcudacxx/std/numerics/c.math/roots.pass.cpp new file mode 100644 index 0000000000..86abbd98cb --- /dev/null +++ b/libcudacxx/test/libcudacxx/std/numerics/c.math/roots.pass.cpp @@ -0,0 +1,138 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +// + +#include +#include +#include +#include + +#include "fp_compare.h" +#include "test_macros.h" + +#if defined(TEST_COMPILER_MSVC) +# pragma warning(disable : 4244) // conversion from 'double' to 'float', possible loss of data +# pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned +#endif // TEST_COMPILER_MSVC + +template +__host__ __device__ bool eq(T lhs, T rhs) noexcept +{ + return lhs == rhs; +} + +template ::value, int> = 0> +__host__ __device__ bool eq(T lhs, U rhs) noexcept +{ + return eq(lhs, T(rhs)); +} + +#ifdef _LIBCUDACXX_HAS_NVFP16 +__host__ __device__ bool eq(__half lhs, __half rhs) noexcept +{ + return ::__heq(lhs, rhs); +} +#endif // _LIBCUDACXX_HAS_NVFP16 +#ifdef _LIBCUDACXX_HAS_NVBF16 +__host__ __device__ bool eq(__nv_bfloat16 lhs, __nv_bfloat16 rhs) noexcept +{ + return ::__heq(lhs, rhs); +} +#endif // _LIBCUDACXX_HAS_NVBF16 + +template +__host__ __device__ void test_sqrt(T val) +{ + using ret = cuda::std::conditional_t::value, double, T>; + static_assert(cuda::std::is_same::value, ""); + + assert(eq(cuda::std::sqrt(val), T(8.0))); + assert(eq(cuda::std::sqrt(T(0.0)), T(0.0))); + assert(eq(cuda::std::sqrt(T(cuda::std::numeric_limits::infinity())), cuda::std::numeric_limits::infinity())); + if (cuda::std::is_same::value) + { + assert(eq(cuda::std::sqrtf(val), T(8.0))); + assert(eq(cuda::std::sqrtf(T(0.0)), T(0.0))); + } +#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE) + else if (cuda::std::is_same::value) + { + assert(eq(cuda::std::sqrtl(val), T(8))); + assert(eq(cuda::std::sqrtl(T(0.0)), T(0.0))); + } +#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE +} + +template +__host__ __device__ void test_cbrt(T val) +{ + using ret = cuda::std::conditional_t::value, double, T>; + static_assert(cuda::std::is_same::value, ""); + + assert(eq(cuda::std::cbrt(val), T(2))); + assert(eq(cuda::std::cbrt(T(0.0)), T(0.0))); + assert(eq(cuda::std::cbrt(-T(0.0)), -T(0.0))); + assert(eq(cuda::std::cbrt(T(cuda::std::numeric_limits::infinity())), cuda::std::numeric_limits::infinity())); + if (cuda::std::is_same::value) + { + assert(eq(cuda::std::cbrtf(val), T(2))); + assert(eq(cuda::std::cbrtf(T(0.0)), T(0.0))); + } +#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE) + else if (cuda::std::is_same::value) + { + assert(eq(cuda::std::cbrtl(val), T(2))); + assert(eq(cuda::std::cbrtl(T(0.0)), T(0.0))); + } +#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE +} + +template +__host__ __device__ void test(const T val) +{ + test_sqrt(val); + test_cbrt(val / T(8)); +} + +__host__ __device__ void test(const float val) +{ + test(val); + test(val); +#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE) + test(); +#endif //!_LIBCUDACXX_HAS_NO_LONG_DOUBLE + +#ifdef _LIBCUDACXX_HAS_NVFP16 + test<__half>(val); +#endif // _LIBCUDACXX_HAS_NVFP16 +#ifdef _LIBCUDACXX_HAS_NVBF16 + test<__nv_bfloat16>(val); +#endif // _LIBCUDACXX_HAS_NVBF16 + + test(static_cast(val)); + test(static_cast(val)); + test(static_cast(val)); + test(static_cast(val)); + test(static_cast(val)); + test(static_cast(val)); + test(static_cast(val)); +} + +__global__ void test_global_kernel(float* val) +{ + test(*val); +} + +int main(int, char**) +{ + volatile float val = 64.f; + test(val); + return 0; +}