Skip to content

Commit

Permalink
Snapshot of kokkos.git from commit ff977240c493f8b5a170cb81c7dc111cd2…
Browse files Browse the repository at this point in the history
…fb1861

From repository at [email protected]:kokkos/kokkos.git

At commit:
commit ff977240c493f8b5a170cb81c7dc111cd2fb1861
Author: Nathan Ellingwood <[email protected]>
Date:   Thu Dec 19 11:22:34 2024 -0700

    update master_history.txt

Signed-off-by: Nathan Ellingwood <[email protected]>
  • Loading branch information
ndellingwood committed Dec 19, 2024
1 parent 0bac065 commit a748c40
Show file tree
Hide file tree
Showing 17 changed files with 182 additions and 31 deletions.
11 changes: 11 additions & 0 deletions packages/kokkos/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# CHANGELOG

## 4.5.01

[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)

### Bug Fixes

* Fix re-builds after cleaning the binary tree when doing `add_subdirectory` on the Kokkos source [\#7557](https://github.com/kokkos/kokkos/pull/7557)
* Update mdspan to include fix for submdspan and bracket operator with clang 15&16 [\#7559](https://github.com/kokkos/kokkos/pull/7559)
* Fix DynRankView performance regression by re-introducing shortcut operator() impls [\#7606](https://github.com/kokkos/kokkos/pull/7606)
* Add missing MI300A (`GFX942_APU`) option to Makefile build-system

## 4.5.00

[Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00)
Expand Down
2 changes: 1 addition & 1 deletion packages/kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ endif()

set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 5)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
Expand Down
11 changes: 9 additions & 2 deletions packages/kokkos/Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 5
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)

# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
Expand All @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= ""
Expand Down Expand Up @@ -454,6 +454,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
Expand All @@ -468,6 +469,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX9
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103))
Expand Down Expand Up @@ -1196,6 +1198,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"")
Expand Down
6 changes: 3 additions & 3 deletions packages/kokkos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ To start learning about Kokkos:

The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).

The current release is [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00).
The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01).

```bash
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
# Or with wget
wget https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz
wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
```

To clone the latest development version of Kokkos from GitHub:
Expand Down
127 changes: 126 additions & 1 deletion packages/kokkos/containers/src/Kokkos_DynRankView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,10 @@ class DynRankView : private View<DataType*******, Properties...> {
std::is_same_v<typename traits::array_layout, Kokkos::LayoutStride>,

is_default_map = std::is_void_v<typename traits::specialize> &&
(is_layout_left || is_layout_right || is_layout_stride)
(is_layout_left || is_layout_right || is_layout_stride),

is_default_access =
is_default_map && std::is_same_v<reference_type, element_type&>
};

// Bounds checking macros
Expand Down Expand Up @@ -574,12 +577,134 @@ class DynRankView : private View<DataType*******, Properties...> {
using view_type::stride_7; // FIXME: not tested
using view_type::use_count;

#ifdef KOKKOS_ENABLE_CUDA
KOKKOS_FUNCTION reference_type
operator()(index_type i0 = 0, index_type i1 = 0, index_type i2 = 0,
index_type i3 = 0, index_type i4 = 0, index_type i5 = 0,
index_type i6 = 0) const {
return view_type::operator()(i0, i1, i2, i3, i4, i5, i6);
}
#else
// Adding shortcut operators for rank-0 to rank-3 for default layouts
// and access modalities.
// This removes performance overhead for always using rank-7 mapping.
// See https://github.com/kokkos/kokkos/issues/7604
// When boundschecking is enabled we still go through the underlying
// rank-7 View to leverage the error checks there.

KOKKOS_FUNCTION reference_type operator()() const {
#ifdef KOKKOS_ENABLE_DEBUG
if (rank() != 0u)
Kokkos::abort(
"DynRankView rank 0 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
return view_type::data()[0];
} else
#endif
return view_type::operator()(0, 0, 0, 0, 0, 0, 0);
}

KOKKOS_FUNCTION reference_type operator()(index_type i0) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 1u)
Kokkos::abort(
"DynRankView rank 1 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_stride) {
return view_type::data()[i0 * view_type::stride(0)];
} else {
return view_type::data()[i0];
}
} else
#endif
return view_type::operator()(i0, 0, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}

KOKKOS_FUNCTION reference_type operator()(index_type i0,
index_type i1) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 2u)
Kokkos::abort(
"DynRankView rank 2 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_left) {
return view_type::data()[i0 + i1 * view_type::stride(1)];
} else if constexpr (is_layout_right) {
return view_type::data()[i0 * view_type::extent(1) + i1];
} else {
return view_type::data()[i0 * view_type::stride(0) +
i1 * view_type::stride(1)];
}
} else
#endif
return view_type::operator()(i0, i1, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}

KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1,
index_type i2) const {
#ifdef KOKKOS_ENABLE_DEBUG
// FIXME: Should be equal, only access(...) allows mismatch of rank and
// index args
if (rank() > 3u)
Kokkos::abort(
"DynRankView rank 3 operator() called with invalid number of "
"arguments.");
#endif
#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
if constexpr (is_default_access) {
if constexpr (is_layout_left) {
return view_type::data()[i0 + view_type::stride(1) *
(i1 + i2 * view_type::extent(1))];
} else if constexpr (is_layout_right) {
return view_type::data()[(i0 * view_type::extent(1) + i1) *
view_type::extent(2) +
i2];
} else {
return view_type::data()[i0 * view_type::stride(0) +
i1 * view_type::stride(1) +
i2 * view_type::stride(2)];
}
} else
#endif
return view_type::operator()(i0, i1, i2, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}

KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1,
index_type i2, index_type i3,
index_type i4 = 0,
index_type i5 = 0,
index_type i6 = 0) const {
return view_type::operator()(i0, i1, i2, i3, i4, i5, i6);
}
#endif

// This is an accomodation for Phalanx, that is usint the operator[] to access
// all elements in a linear fashion even when the rank is not 1
Expand Down
8 changes: 2 additions & 6 deletions packages/kokkos/core/unit_test/TestAtomicViews.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1065,13 +1065,9 @@ T AndEqualAtomicViewCheck(const int64_t input_length) {
const int64_t N = input_length;
T result[2] = {1};
for (int64_t i = 0; i < N; ++i) {
if (N % 2 == 0) {
result[0] &= (T)i;
} else {
result[1] &= (T)i;
}
int64_t idx = N % 2;
result[idx] &= (T)i;
}

return (result[0]);
}

Expand Down
6 changes: 6 additions & 0 deletions packages/kokkos/core/unit_test/TestViewBadAlloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ TEST(TEST_CATEGORY, view_bad_alloc) {
}
#endif

#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)
if (std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
GTEST_SKIP() << "MSVC/CUDA segfaults when allocating too much memory";
}
#endif

test_view_bad_alloc<MemorySpace>();

constexpr bool execution_space_is_device =
Expand Down
1 change: 1 addition & 0 deletions packages/kokkos/master_history.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e
tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c
tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039
tag: 4.5.00 date: 11:11:2024 master: 15dc143e release: 5164f2f6
tag: 4.5.01 date: 12:19:2024 master: 09e775bf release: e0d656f9
2 changes: 1 addition & 1 deletion packages/kokkos/scripts/docker/Dockerfile.gcc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sour
apt-get clean && rm -rf /var/lib/apt/lists/*


RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
2 changes: 1 addition & 1 deletion packages/kokkos/scripts/docker/Dockerfile.hipcc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \

ENV PATH=/opt/rocm/bin:$PATH

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
2 changes: 1 addition & 1 deletion packages/kokkos/scripts/docker/Dockerfile.nvcc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
2 changes: 1 addition & 1 deletion packages/kokkos/scripts/docker/Dockerfile.nvhpc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG BASE=nvcr.io/nvidia/nvhpc:23.7-devel-cuda12.2-ubuntu20.04
FROM $BASE

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
2 changes: 1 addition & 1 deletion packages/kokkos/scripts/docker/Dockerfile.openmptarget
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \

ARG NPROC=8

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down
4 changes: 2 additions & 2 deletions packages/kokkos/scripts/docker/Dockerfile.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \
RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \
KEYDUMP_FILE=keydump && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \
wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \
Expand Down Expand Up @@ -46,7 +46,7 @@ RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCT
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \
RUN wget https://cloud1.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \
echo "3416721faf83e5858e65795231bae47bb51ff91d4e8738613d498674f1636f72 oneapi-for-nvidia-gpus-2023.0.0-linux.sh" | sha256sum --check && \
chmod +x oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \
./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,13 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or

#ifndef MDSPAN_USE_BRACKET_OPERATOR
# if defined(__cpp_multidimensional_subscript)
# define MDSPAN_USE_BRACKET_OPERATOR 1
// The following if/else is necessary to workaround a clang issue
// relative to using a parameter pack inside a bracket operator in C++2b/C++23 mode
# if defined(_MDSPAN_COMPILER_CLANG) && ((__clang_major__ == 15) || (__clang_major__ == 16))
# define MDSPAN_USE_BRACKET_OPERATOR 0
# else
# define MDSPAN_USE_BRACKET_OPERATOR 1
# endif
# else
# define MDSPAN_USE_BRACKET_OPERATOR 0
# endif
Expand Down
Loading

0 comments on commit a748c40

Please sign in to comment.