From a495aff480083d62796ed3e86b992584bd62d631 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 15 May 2024 14:14:08 -0600 Subject: [PATCH 1/5] Update Kokkos library in LAMMPS to v4.3.1 --- lib/kokkos/CHANGELOG.md | 15 +++++- lib/kokkos/CMakeLists.txt | 2 +- lib/kokkos/Copyright.txt | 49 +++---------------- lib/kokkos/LICENSE | 10 ---- lib/kokkos/Makefile.kokkos | 34 ++++++------- .../src/sorting/impl/Kokkos_SortImpl.hpp | 5 ++ lib/kokkos/bin/nvcc_wrapper | 2 +- lib/kokkos/cmake/KokkosCore_config.h.in | 1 + lib/kokkos/cmake/kokkos_enable_devices.cmake | 37 +++++++------- lib/kokkos/cmake/kokkos_enable_options.cmake | 1 + .../src/Cuda/Kokkos_Cuda_Parallel_Range.hpp | 5 +- lib/kokkos/core/src/HIP/Kokkos_HIP.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.cpp | 4 ++ .../HIP/Kokkos_HIP_SharedAllocationRecord.hpp | 4 ++ lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp | 19 ++++++- .../Serial/Kokkos_Serial_Parallel_Team.hpp | 12 ++--- .../unit_test/TestExecSpacePartitioning.hpp | 14 ++++++ lib/kokkos/core/unit_test/TestReduce.hpp | 26 ++++++++++ .../core/unit_test/hip/TestHIP_Spaces.cpp | 16 ++++++ lib/kokkos/master_history.txt | 1 + 20 files changed, 164 insertions(+), 97 deletions(-) diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index f7b8af7695c..4fbc9002973 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,18 @@ # CHANGELOG +## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) + +### Backend and Architecture Enhancements: + +#### HIP: +* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) + +### Bug Fixes +* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) +* `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931) +* Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578) + ## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) @@ -39,7 +52,7 @@ * Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772) #### Threads: -* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446) +* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601) #### OpenMP: * Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 93a796f200b..76f2183db8a 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -151,7 +151,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 3) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt index 5e2f8d8647b..cbba3efc7bc 100644 --- a/lib/kokkos/Copyright.txt +++ b/lib/kokkos/Copyright.txt @@ -1,41 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER +************************************************************************ + + Kokkos v. 4.0 + Copyright (2022) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + +Under the terms of Contract DE-NA0003525 with NTESS, +the U.S. Government retains certain rights in this software. diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE index 6572cc2db05..4d9d69d7c44 100644 --- a/lib/kokkos/LICENSE +++ b/lib/kokkos/LICENSE @@ -1,13 +1,3 @@ - ************************************************************************ - - Kokkos v. 4.0 - Copyright (2022) National Technology & Engineering - Solutions of Sandia, LLC (NTESS). - - Under the terms of Contract DE-NA0003525 with NTESS, - the U.S. Government retains certain rights in this software. - - ============================================================================== Kokkos is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index a167ce2070b..14a417b532a 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -12,7 +12,7 @@ endif KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 3 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -1232,6 +1232,22 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") @@ -1251,26 +1267,10 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index 4c174b5fda9..08946228919 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -399,9 +399,14 @@ sort_device_view_with_comparator( using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; +// Note with HIP unified memory this code path is still the right thing to do +// if we end up here when RocThrust is not enabled. +// The create_mirror_view_and_copy will do the right thing (no copy). +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY static_assert(!SpaceAccessibility::accessible, "Impl::sort_device_view_with_comparator: should not be called " "on a view that is already accessible on the host"); +#endif copy_to_host_run_stdsort_copy_back(exec, view, comparator); } diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index 9b935835d5f..dbfef2267fe 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -229,7 +229,7 @@ do fi ;; #Handle known nvcc args - --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|--fmad=*|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in index 2df0f6c5205..3ab39cd6abf 100644 --- a/lib/kokkos/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -39,6 +39,7 @@ #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS +#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK diff --git a/lib/kokkos/cmake/kokkos_enable_devices.cmake b/lib/kokkos/cmake/kokkos_enable_devices.cmake index 9a977520a3a..c7d189285c5 100644 --- a/lib/kokkos/cmake/kokkos_enable_devices.cmake +++ b/lib/kokkos/cmake/kokkos_enable_devices.cmake @@ -40,6 +40,26 @@ ELSE() ENDIF() KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") + +# We want this to default to OFF for cache reasons, but if no +# host space is given, then activate serial +IF (KOKKOS_HAS_TRILINOS) + #However, Trilinos always wants Serial ON + SET(SERIAL_DEFAULT ON) +ELSEIF (KOKKOS_HAS_HOST) + SET(SERIAL_DEFAULT OFF) +ELSE() + SET(SERIAL_DEFAULT ON) + IF (NOT DEFINED Kokkos_ENABLE_SERIAL) + MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") + ENDIF() +ENDIF() +KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") + +KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") + +# Device backends have to come after host backends for header include order reasons +# Without this we can't make e.g. CudaSpace accessible by HostSpace KOKKOS_DEVICE_OPTION(OPENACC OFF DEVICE "Whether to build the OpenACC backend") IF (KOKKOS_ENABLE_OPENACC) COMPILER_SPECIFIC_FLAGS( @@ -90,23 +110,6 @@ IF (KOKKOS_ENABLE_CUDA) LIST(APPEND DEVICE_SETUP_LIST Cuda) ENDIF() -# We want this to default to OFF for cache reasons, but if no -# host space is given, then activate serial -IF (KOKKOS_HAS_TRILINOS) - #However, Trilinos always wants Serial ON - SET(SERIAL_DEFAULT ON) -ELSEIF (KOKKOS_HAS_HOST) - SET(SERIAL_DEFAULT OFF) -ELSE() - SET(SERIAL_DEFAULT ON) - IF (NOT DEFINED Kokkos_ENABLE_SERIAL) - MESSAGE(STATUS "SERIAL backend is being turned on to ensure there is at least one Host space. To change this, you must enable another host execution space and configure with -DKokkos_ENABLE_SERIAL=OFF or change CMakeCache.txt") - ENDIF() -ENDIF() -KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") - -KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") - KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend") ## HIP has extra setup requirements, turn on Kokkos_Setup_HIP.hpp in macros diff --git a/lib/kokkos/cmake/kokkos_enable_options.cmake b/lib/kokkos/cmake/kokkos_enable_options.cmake index a437f6132aa..32788e7aa0f 100644 --- a/lib/kokkos/cmake/kokkos_enable_options.cmake +++ b/lib/kokkos/cmake/kokkos_enable_options.cmake @@ -70,6 +70,7 @@ KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tu KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops") KOKKOS_ENABLE_OPTION(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support") KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time") +KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for HIP") # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp index 0f052be3c30..334834938a1 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp @@ -312,8 +312,9 @@ class ParallelReduce, // REQUIRED ( 1 , N , 1 ) dim3 block(1, block_size, 1); // Required grid.x <= block.y - dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, - 1); + dim3 grid(std::min(index_type(block.y), + index_type((nwork + block.y - 1) / block.y)), + 1, 1); // TODO @graph We need to effectively insert this in to the graph const int shmem = diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp index 309e07fb3fb..aced2083ffb 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp @@ -146,6 +146,10 @@ void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const { #else os << "no\n"; #endif +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + os << " KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY: "; + os << "yes\n"; +#endif os << "\nRuntime Configuration:\n"; diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp index ab24004f5fc..83f829fddae 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp @@ -23,8 +23,12 @@ #include #include +#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPSpace); +#else +KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp index fbae5188344..1ca7bd5cd0e 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp @@ -20,8 +20,12 @@ #include #include +#if defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) +KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPSpace); +#else KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION( Kokkos::HIPSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPHostPinnedSpace); KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::HIPManagedSpace); diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp index 7f2004e5cbc..e1b4768b877 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp @@ -65,6 +65,18 @@ class HIPSpace { ~HIPSpace() = default; /**\brief Allocate untracked memory in the hip space */ +#ifdef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY + template + void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const { + return allocate(arg_alloc_size); + } + template + void* allocate(const ExecutionSpace&, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const { + return allocate(arg_label, arg_alloc_size, arg_logical_size); + } +#else // FIXME_HIP Use execution space instance void* allocate(const HIP&, const size_t arg_alloc_size) const { return allocate(arg_alloc_size); @@ -74,6 +86,7 @@ class HIPSpace { const size_t arg_logical_size = 0) const { return allocate(arg_label, arg_alloc_size, arg_logical_size); } +#endif void* allocate(const size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -267,7 +280,11 @@ static_assert(Kokkos::Impl::MemorySpaceAccess::assignable); template <> struct MemorySpaceAccess { enum : bool { assignable = false }; - enum : bool { accessible = false }; +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) + enum : bool{accessible = false}; +#else + enum : bool { accessible = true }; +#endif enum : bool { deepcopy = true }; }; diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp index f34a7daaca0..a25b51496ef 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -37,6 +37,8 @@ class TeamPolicyInternal int m_league_size; int m_chunk_size; + Kokkos::Serial m_space; + public: //! Tag this class as a kokkos execution policy using execution_policy = TeamPolicyInternal; @@ -46,10 +48,7 @@ class TeamPolicyInternal //! Execution space of this execution policy: using execution_space = Kokkos::Serial; - const typename traits::execution_space& space() const { - static typename traits::execution_space m_space; - return m_space; - } + const typename traits::execution_space& space() const { return m_space; } template friend class TeamPolicyInternal; @@ -116,12 +115,13 @@ class TeamPolicyInternal return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024); } /** \brief Specify league size, request team size */ - TeamPolicyInternal(const execution_space&, int league_size_request, + TeamPolicyInternal(const execution_space& space, int league_size_request, int team_size_request, int /* vector_length_request */ = 1) : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_league_size(league_size_request), - m_chunk_size(32) { + m_chunk_size(32), + m_space(space) { if (team_size_request > 1) Kokkos::abort("Kokkos::abort: Requested Team Size is too large!"); } diff --git a/lib/kokkos/core/unit_test/TestExecSpacePartitioning.hpp b/lib/kokkos/core/unit_test/TestExecSpacePartitioning.hpp index 65314d6be7c..f8b570ab64d 100644 --- a/lib/kokkos/core/unit_test/TestExecSpacePartitioning.hpp +++ b/lib/kokkos/core/unit_test/TestExecSpacePartitioning.hpp @@ -28,6 +28,17 @@ struct SumFunctor { void operator()(int i, int& lsum) const { lsum += i; } }; +template +void check_space_member_for_policies(const ExecSpace& exec) { + Kokkos::RangePolicy range_policy(exec, 0, 1); + ASSERT_EQ(range_policy.space(), exec); + Kokkos::MDRangePolicy> mdrange_policy(exec, {0, 0}, + {1, 1}); + ASSERT_EQ(mdrange_policy.space(), exec); + Kokkos::TeamPolicy team_policy(exec, 1, Kokkos::AUTO); + ASSERT_EQ(team_policy.space(), exec); +} + template void check_distinctive([[maybe_unused]] ExecSpace exec1, [[maybe_unused]] ExecSpace exec2) { @@ -89,6 +100,9 @@ void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { void test_partitioning(std::vector& instances) { check_distinctive(instances[0], instances[1]); + check_space_member_for_policies(instances[0]); + check_space_member_for_policies(instances[1]); + int sum1, sum2; int N = 3910; run_threaded_test( diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index e1aa851f102..61b2bfb1505 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -625,4 +625,30 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) { } #endif #endif + +#if defined(NDEBUG) +// the following test was made for: +// https://github.com/kokkos/kokkos/issues/6517 + +struct FunctorReductionWithLargeIterationCount { + KOKKOS_FUNCTION void operator()(const int64_t /*i*/, double& update) const { + update += 1.0; + } +}; + +TEST(TEST_CATEGORY, reduction_with_large_iteration_count) { + if constexpr (std::is_same_v) { + GTEST_SKIP() << "Disabling for host backends"; + } + + const int64_t N = pow(2LL, 39LL) - pow(2LL, 8LL) + 1; + Kokkos::RangePolicy> p(0, N); + double nu = 0; + EXPECT_NO_THROW(Kokkos::parallel_reduce( + "sample reduction", p, FunctorReductionWithLargeIterationCount(), nu)); + ASSERT_DOUBLE_EQ(nu, double(N)); +} +#endif + } // namespace Test diff --git a/lib/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp b/lib/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp index 8f7499c244b..673c0f0fff3 100644 --- a/lib/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp +++ b/lib/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp @@ -38,8 +38,13 @@ TEST(hip, space_access) { static_assert(!Kokkos::Impl::MemorySpaceAccess::assignable); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::Impl::MemorySpaceAccess::accessible); +#else + static_assert(Kokkos::Impl::MemorySpaceAccess::accessible); +#endif static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(!Kokkos::SpaceAccessibility::accessible); +#else + static_assert(Kokkos::SpaceAccessibility::accessible); +#endif static_assert( Kokkos::SpaceAccessibility::accessible); +#if !defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) static_assert(std::is_same::Space, Kokkos::HostSpace>::value); +#else + static_assert(std::is_same::Space, + Kokkos::Device>::value); +#endif static_assert( std::is_same::Space, diff --git a/lib/kokkos/master_history.txt b/lib/kokkos/master_history.txt index bd122a456bd..31be9253254 100644 --- a/lib/kokkos/master_history.txt +++ b/lib/kokkos/master_history.txt @@ -36,3 +36,4 @@ tag: 4.1.00 date: 06:20:2023 master: 62d2b6c8 release: adde1e6a tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88 tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 +tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e From 8fbb959ab34d8c30641df661a854c71e48e9efd4 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 15 May 2024 14:40:41 -0600 Subject: [PATCH 2/5] Add support for MI300A with unified memory --- src/KOKKOS/kokkos_type.h | 7 +- src/MAKE/MACHINES/Makefile.elcapitan_kokkos | 124 ++++++++++++++++++++ src/MAKE/MACHINES/Makefile.frontier_kokkos | 2 +- 3 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 src/MAKE/MACHINES/Makefile.elcapitan_kokkos diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index cc4e00819fb..6e3cb2a1d99 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -143,12 +143,13 @@ typedef Kokkos::DefaultExecutionSpace LMPDeviceType; typedef Kokkos::HostSpace::execution_space LMPHostType; -// Need to use Cuda UVM memory space for Host execution space +// If unified memory, need to use device memory space for host execution space template class KKDevice { -public: -#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) + public: +#if ((defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM)) || \ + (defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY))) typedef Kokkos::Device value; #else typedef Kokkos::Device value; diff --git a/src/MAKE/MACHINES/Makefile.elcapitan_kokkos b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos new file mode 100644 index 00000000000..bea74e1329c --- /dev/null +++ b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos @@ -0,0 +1,124 @@ +# frontier_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +CC = hipcc +CCFLAGS = -g -O3 -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY +SHFLAGS = -fPIC +DEPFLAGS = -M + +LINK = hipcc +LINKFLAGS = -g -O3 +LIB = +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared +KOKKOS_DEVICES = HIP +KOKKOS_ARCH = AMD_GFX942 + +# --------------------------------------------------------------------- +# LAMMPS-specific settings, all OPTIONAL +# specify settings for LAMMPS features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# LAMMPS ifdef settings +# see possible settings in Section 3.5 of the manual + +LMP_INC = -DLAMMPS_GZIP + +# MPI library +# see discussion in Section 3.4 of the manual +# MPI wrapper compiler/linker can provide this info +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -I${MPICH_DIR}/include +MPI_PATH = +MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa + +# FFT library +# see discussion in Section 3.5.2 of manual +# can be left blank to use provided KISS FFT library +# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings +# PATH = path for FFT library +# LIB = name of FFT library + +MY_HIP_EXE = $(shell which hipcc) +MY_HIP_PATH = $(dir ${MY_HIP_EXE}) + +FFT_INC = -DFFT_KOKKOS_HIPFFT +FFT_PATH = +FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft + +# JPEG and/or PNG library +# see discussion in Section 3.5.4 of manual +# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC +# INC = path(s) for jpeglib.h and/or png.h +# PATH = path(s) for JPEG library and/or PNG library +# LIB = name(s) of JPEG library and/or PNG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# do not edit this section + +include Makefile.package.settings +include Makefile.package + +EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@ + $(SIZE) $@ + +# Library targets + +$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) + @rm -f ../$(ARLIB) + $(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ) + @rm -f $(ARLIB) + @ln -s ../$(ARLIB) $(ARLIB) + +$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + @rm -f $(SHLIB) + @ln -s ../$(SHLIB) $(SHLIB) + +# Compilation rules + +%.o:%.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +depend : fastdep.exe $(SRC) + @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 + +fastdep.exe: ../DEPEND/fastdep.c + cc -O -o $@ $< + +sinclude .depend diff --git a/src/MAKE/MACHINES/Makefile.frontier_kokkos b/src/MAKE/MACHINES/Makefile.frontier_kokkos index b58a3d871c2..7be15c5cf62 100644 --- a/src/MAKE/MACHINES/Makefile.frontier_kokkos +++ b/src/MAKE/MACHINES/Makefile.frontier_kokkos @@ -20,7 +20,7 @@ ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared KOKKOS_DEVICES = HIP -KOKKOS_ARCH = Vega90A,Zen3 +KOKKOS_ARCH = AMD_GFX90A,ZEN3 # --------------------------------------------------------------------- # LAMMPS-specific settings, all OPTIONAL From 3805a0144894a7458f07b562609b78d7a0843453 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 15 May 2024 14:42:01 -0600 Subject: [PATCH 3/5] Update CMake --- cmake/Modules/Packages/KOKKOS.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index a2f462905a7..3776d18a3e1 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) @@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 4.3.00 REQUIRED CONFIG) + find_package(Kokkos 4.3.01 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) From 852becb32a7f2914dcd311302a392cde8e9082a4 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 15 May 2024 14:44:50 -0600 Subject: [PATCH 4/5] Update Makefile comment --- src/MAKE/MACHINES/Makefile.elcapitan_kokkos | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MAKE/MACHINES/Makefile.elcapitan_kokkos b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos index bea74e1329c..ede6e2bba31 100644 --- a/src/MAKE/MACHINES/Makefile.elcapitan_kokkos +++ b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos @@ -1,4 +1,4 @@ -# frontier_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT +# elcapitan_kokkos = KOKKOS/HIP, AMD MI300A APU, Cray MPICH, hipcc compiler, hipFFT SHELL = /bin/sh From c99e5826587a686b74441d87a7a1244336679ca6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 15 May 2024 16:33:49 -0700 Subject: [PATCH 5/5] Need to manually enable rocthrust in Makefile build --- src/MAKE/MACHINES/Makefile.elcapitan_kokkos | 2 +- src/MAKE/MACHINES/Makefile.frontier_kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.elcapitan_kokkos b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos index ede6e2bba31..64d55ebc863 100644 --- a/src/MAKE/MACHINES/Makefile.elcapitan_kokkos +++ b/src/MAKE/MACHINES/Makefile.elcapitan_kokkos @@ -7,7 +7,7 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = hipcc -CCFLAGS = -g -O3 -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY +CCFLAGS = -g -O3 -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY -DKOKKOS_ENABLE_ROCTHRUST SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/MACHINES/Makefile.frontier_kokkos b/src/MAKE/MACHINES/Makefile.frontier_kokkos index 7be15c5cf62..470f2e14497 100644 --- a/src/MAKE/MACHINES/Makefile.frontier_kokkos +++ b/src/MAKE/MACHINES/Makefile.frontier_kokkos @@ -7,7 +7,7 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = hipcc -CCFLAGS = -g -O3 -munsafe-fp-atomics -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS +CCFLAGS = -g -O3 -munsafe-fp-atomics -DNDEBUG -DKOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS -DKOKKOS_ENABLE_ROCTHRUST SHFLAGS = -fPIC DEPFLAGS = -M