From db5bb9a3b68dfe700c3084a3437b75172c9993c3 Mon Sep 17 00:00:00 2001 From: Naoki Shibata Date: Sat, 7 Sep 2024 00:04:45 +0900 Subject: [PATCH] Faster integer division --- CMakeLists.txt | 2 +- src/include/tlfloat/bigint.hpp | 27 +++++++++++++++++---------- src/tester/test_bigint2.cpp | 16 ++++++++++++++-- winbuild-clang.bat | 3 +-- winbuild-msvc.bat | 4 ++-- 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1db427b..16f4f6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,7 +229,7 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT WIN32) elseif(COMMAND_GOLD) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold") endif() -elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /clang:-fconstexpr-steps=1000000000") set(INLINE_CXX_FLAGS "/clang:-mllvm;/clang:-inline-threshold=100000") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") diff --git a/src/include/tlfloat/bigint.hpp b/src/include/tlfloat/bigint.hpp index 438855f..03e0c49 100644 --- a/src/include/tlfloat/bigint.hpp +++ b/src/include/tlfloat/bigint.hpp @@ -32,7 +32,7 @@ /*! \endcond */ #ifndef TLFLOAT_DISABLE_ARCH_OPTIMIZATION -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) #if defined(__x86_64__) && !defined(__CUDA_ARCH__) #include @@ -57,7 +57,7 @@ #undef TLFLOAT_NOINLINE #define TLFLOAT_NOINLINE __attribute__((noinline)) -#endif // #if defined(__GNUC__) || defined(__clang__) +#endif // #if defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) #ifdef _MSC_VER #if !defined(__CUDA_ARCH__) @@ -72,8 +72,12 @@ #ifdef TLFLOAT_ENABLE_INLINING #undef TLFLOAT_INLINE +#if !defined(__clang__) #define TLFLOAT_INLINE __forceinline +#else +#define TLFLOAT_INLINE __attribute__((always_inline)) #endif +#endif // #ifdef TLFLOAT_ENABLE_INLINING #endif // #if !defined(__CUDA_ARCH__) #endif // #ifdef _MSC_VER #endif // #ifndef TLFLOAT_DISABLE_ARCH_OPTIMIZATION @@ -687,17 +691,11 @@ namespace tlfloat { } constexpr TLFLOAT_INLINE BigUInt operator/(const BigUInt& rhs) const { - if (rhs == 1) return *this; - BigUInt q = this->mulhi(rhs.reciprocal()); - if (!(rhs > *this - q * rhs)) q++; - return q; + return div(*this, rhs).first; } constexpr TLFLOAT_INLINE BigUInt operator%(const BigUInt& rhs) const { - BigUInt q = this->mulhi(rhs.reciprocal()); - BigUInt m = *this - q * rhs; - if (!(rhs > m)) m -= rhs; - return m; + return div(*this, rhs).second; } /** This method returns ((1 << N) / *this) */ @@ -888,6 +886,11 @@ namespace tlfloat { return xpair(q, m); } + /** This method performs division and modulo at a time. */ + constexpr TLFLOAT_INLINE xpair divmod(const BigUInt& rhs) const { + return div(*this, rhs); + } + /** This method finds the quotient and remainder of (*this << ((1 << N)-1)) divided by (rhs | (1 << ((1 << N)-1))) at a time. Give rhs.reciprocal2() as the second argument. */ @@ -1184,6 +1187,10 @@ namespace tlfloat { constexpr TLFLOAT_INLINE xpair divmod2(const BigUInt& rhs) const { return div(BigUInt<7>(*this) << ((1 << 6)-1), rhs | (1ULL << ((1 << 6)-1))); } + + constexpr TLFLOAT_INLINE xpair divmod(const BigUInt& rhs) const { + return xpair { *this / rhs, *this % rhs }; + } }; /** diff --git a/src/tester/test_bigint2.cpp b/src/tester/test_bigint2.cpp index 757d9cf..9d2bcc2 100644 --- a/src/tester/test_bigint2.cpp +++ b/src/tester/test_bigint2.cpp @@ -25,7 +25,7 @@ static_assert(is_trivially_copyable_v> == true); static_assert(is_trivially_copyable_v> == true); template -xpair, BigUInt> xdivmod(BigUInt n, BigUInt d) { +xpair, BigUInt> xdivmod2(BigUInt n, BigUInt d) { BigUInt xn = BigUInt(n) << ((1 << N) - 1); BigUInt xd = d | (BigUInt(1) << ((1 << N)-1)); return xpair, BigUInt>(BigUInt(xn / xd), BigUInt(xn % xd)); @@ -53,7 +53,7 @@ void doTestRec2(BigUInt d) { template void doTestDivmod2(BigUInt n, BigUInt d) { - auto c = xdivmod(n, d); + auto c = xdivmod2(n, d); auto t = n.divmod2(d, d.reciprocal2()); if (c.first != t.first || c.second != t.second) { @@ -79,6 +79,18 @@ void doTestDivmod2(BigUInt n, BigUInt d) { cout << "c.r = " << toHexString(c.second) << endl; exit(-1); } + + if (d != 0) { + t = n.divmod(d); + if (t.second >= d || t.first * d + t.second != n) { + cout << "N = " << N << endl; + cout << "n = " << toHexString(n) << " " << n << endl; + cout << "d = " << toHexString(d) << " " << d << endl; + cout << "t.q = " << toHexString(t.first ) << endl; + cout << "t.r = " << toHexString(t.second) << endl; + exit(-1); + } + } } template diff --git a/winbuild-clang.bat b/winbuild-clang.bat index 4961a2c..ada3ba7 100644 --- a/winbuild-clang.bat +++ b/winbuild-clang.bat @@ -1,5 +1,6 @@ @echo off set CLANGINSTALLDIR=%VCINSTALLDIR%Tools\Llvm\x64 +set INSTALLDIR=tlfloat_install if NOT exist winbuild-clang.bat exit /b 255 @@ -14,8 +15,6 @@ echo Edit this batch file to set CLANGINSTALLDIR correctly. exit /b 255 ) -set INSTALLDIR=tlfloat_install - if %VSCMD_ARG_HOST_ARCH%==x86 call "%VCINSTALLDIR%Auxiliary\Build\vcvars64.bat" if exist build\ rmdir /S /Q build diff --git a/winbuild-msvc.bat b/winbuild-msvc.bat index a9f1973..cec8b48 100644 --- a/winbuild-msvc.bat +++ b/winbuild-msvc.bat @@ -1,4 +1,6 @@ @echo off +set INSTALLDIR=tlfloat_install + if NOT exist winbuild-msvc.bat exit /b 255 if "%VSCMD_ARG_HOST_ARCH%"=="" ( @@ -6,8 +8,6 @@ echo Run this batch file from Developer Command Prompt for VS 20XX exit /b 255 ) -set INSTALLDIR=tlfloat_install - if %VSCMD_ARG_HOST_ARCH%==x86 call "%VCINSTALLDIR%Auxiliary\Build\vcvars64.bat" if exist build\ rmdir /S /Q build