Skip to content

Commit

Permalink
Add gfx1151 target (#405)
Browse files Browse the repository at this point in the history
* Add gfx1151 target

* Avoid printing 128-bit values on test failure (#366)

The block_radix_sort test suite sometimes calls GTests's comparison macro ASSERT_EQ and prints to an output stream as part of the call, like this:
ASSERT_EQ(val1, val2) << "at index: " << index;

On Windows, if val1 or val2 is a 128-bit value, this can cause linker errors because GTests's "PrintTo" function has no overload for those types (the values are printed if the test fails).

This change adds a check to see if 128-bit values are being tested, and if so, performs the test in such a way that the values will not be printed in the case where the test fails.

* update changelog

---------

Co-authored-by: Wayne Franz <[email protected]>
  • Loading branch information
amd-garydeng and umfranzw authored Oct 2, 2024
1 parent 1875530 commit 906d157
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 10 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
Documentation for hipCUB is available at
[https://rocm.docs.amd.com/projects/hipCUB/en/latest/](https://rocm.docs.amd.com/projects/hipCUB/en/latest/).

## (Unreleased) hipCUB-3.2.1 for ROCm 6.2.4

### Added

* GFX1151 Support

## (Unreleased) hipCUB-3.2.0 for ROCm 6.2.0

### Added
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ if(NOT (CMAKE_CXX_COMPILER MATCHES ".*nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQ

if(GPU_TARGETS STREQUAL "all")
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151"
)
set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
endif()
Expand Down
33 changes: 24 additions & 9 deletions test/hipcub/test_hipcub_block_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,24 @@ __global__ __launch_bounds__(BlockSize) void sort_key_kernel(key_type* device
StoreOp<BlockSize, ItemsPerThread, Striped>{}(keys, device_keys_output);
}

template<class T, class U>
void assert_eq(T a, U b, size_t index)
{
// GTest's ASSERT_EQ prints the values if the test fails. On Windows, GTest doesn't currently provide overloads for
// printing 128 bit types, resulting in linker errors.
// Check if we're testing with 128 bit types. If so, test using bools so GTest doesn't try to print them on failure.
if (test_utils::is_int128<T>::value || test_utils::is_uint128<T>::value ||
test_utils::is_int128<U>::value || test_utils::is_uint128<U>::value)
{
const bool values_equal = (a == b);
ASSERT_EQ(values_equal, true) << "at index: " << index;
}
else
{
ASSERT_EQ(a, b) << "at index: " << index;
}
}

TYPED_TEST(HipcubBlockRadixSort, SortKeys)
{
int device_id = test_common_utils::obtain_device_from_ctest();
Expand Down Expand Up @@ -378,9 +396,8 @@ TYPED_TEST(HipcubBlockRadixSort, SortKeys)
// Verifying results
for(size_t i = 0; i < size; i++)
{
ASSERT_EQ(test_utils::convert_to_native(keys_output[i]),
test_utils::convert_to_native(expected[i]))
<< "at index: " << i;
assert_eq(test_utils::convert_to_native(keys_output[i]),
test_utils::convert_to_native(expected[i]), i);
}

HIP_CHECK(hipFree(device_keys_output));
Expand Down Expand Up @@ -547,12 +564,10 @@ TYPED_TEST(HipcubBlockRadixSort, SortKeysValues)

for(size_t i = 0; i < size; i++)
{
ASSERT_EQ(test_utils::convert_to_native(keys_output[i]),
test_utils::convert_to_native(expected[i].first))
<< "at index: " << i;
ASSERT_EQ(test_utils::convert_to_native(values_output[i]),
test_utils::convert_to_native(expected[i].second))
<< "at index: " << i;
assert_eq(test_utils::convert_to_native(keys_output[i]),
test_utils::convert_to_native(expected[i].first), i);
assert_eq(test_utils::convert_to_native(values_output[i]),
test_utils::convert_to_native(expected[i].second), i);
}

HIP_CHECK(hipFree(device_keys_output));
Expand Down
16 changes: 16 additions & 0 deletions test/hipcub/test_utils_data_generation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ class numeric_limits<float> : public std::numeric_limits<float>
};
// End of extended numeric_limits

#if HIPCUB_IS_INT128_ENABLED
template<class T>
using is_int128 = std::is_same<__int128_t, typename std::remove_cv<T>::type>;
template<class T>
using is_uint128 = std::is_same<__uint128_t, typename std::remove_cv<T>::type>;
#else
struct dummy_type
{
static constexpr bool value = false;
};
template<class T>
using is_int128 = dummy_type;
template<class T>
using is_uint128 = dummy_type;
#endif

template<class T>
using is_half = std::is_same<test_utils::half, typename std::remove_cv<T>::type>;

Expand Down

0 comments on commit 906d157

Please sign in to comment.