diff --git a/.gitlab/pnnl-ci.yml b/.gitlab/pnnl-ci.yml index d25387a59..e4cabf6fa 100644 --- a/.gitlab/pnnl-ci.yml +++ b/.gitlab/pnnl-ci.yml @@ -25,19 +25,19 @@ variables: variables: SLURM_Q: "newell_shared" MY_CLUSTER: "newell" - SLURM_ARGS: --gres=gpu:1 --exclusive + SLURM_ARGS: --gres=gpu:1 -N 1 -n 8 .marianas: variables: SLURM_Q: "dl" MY_CLUSTER: "marianas" - SLURM_ARGS: --gres=gpu:1 --exclusive + SLURM_ARGS: --gres=gpu:1 -N 1 -n 8 .incline: variables: SLURM_Q: "incline" MY_CLUSTER: "incline" - SLURM_ARGS: --exclusive + SLURM_ARGS: -N 1 -n 8 .pnnl_build: extends: @@ -69,7 +69,7 @@ variables: # Extra args for ctest export CTEST_CMD=$CTEST_CMD - sbatch -A EXASGD --exclusive -N 1 -n 8 -p $SLURM_Q -t $TIMELIMIT $SLURM_ARGS -o output -e output $WORKDIR/BUILD.sh $BUILD_SCRIPT_ARGS + sbatch -A EXASGD -p $SLURM_Q -t $TIMELIMIT $SLURM_ARGS -o output -e output $WORKDIR/BUILD.sh $BUILD_SCRIPT_ARGS res=1 set +xv while :; @@ -99,7 +99,6 @@ build_on_incline: extends: - .pnnl_build - .incline - allow_failure: true pnnl_cleanup: needs: [] diff --git a/cmake/FindHiopHipLibraries.cmake b/cmake/FindHiopHipLibraries.cmake index a6f4b16c1..3e822f924 100644 --- a/cmake/FindHiopHipLibraries.cmake +++ b/cmake/FindHiopHipLibraries.cmake @@ -15,6 +15,7 @@ if (NOT DEFINED ROCM_PATH ) set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "ROCm path") endif() endif() +message(STATUS "ROCm path: ${ROCM_PATH}") set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake" ${CMAKE_MODULE_PATH}) # Set GPU Targets and Find all the HIP modules diff --git a/scripts/clang-hip.cmake b/scripts/clang-hip.cmake index 924bf6eb8..5fcd2d9e2 100644 --- a/scripts/clang-hip.cmake +++ b/scripts/clang-hip.cmake @@ -12,6 +12,7 @@ set(HIOP_USE_UMPIRE ON CACHE BOOL "") set(HIOP_USE_GPU ON CACHE BOOL "") set(HIOP_USE_CUDA OFF CACHE BOOL "") set(HIOP_USE_HIP ON CACHE BOOL "") +# TODO - enable sparse testing on Incline set(HIOP_SPARSE OFF CACHE BOOL "") set(HIOP_DEEPCHECKS ON CACHE BOOL "") set(AMDGPU_TARGETS "gfx908" CACHE STRING "") diff --git a/scripts/inclineVariables.sh b/scripts/inclineVariables.sh index 542664e8d..cd687bf96 100644 --- a/scripts/inclineVariables.sh +++ b/scripts/inclineVariables.sh @@ -1,23 +1,34 @@ -# NOTE: The following is required when running from Gitlab CI via slurm job -source /etc/profile.d/modules.sh -module use -a /usr/share/Modules/modulefiles -module use -a /share/apps/modules/tools -module use -a /share/apps/modules/compilers -module use -a /share/apps/modules/mpi -module use -a /etc/modulefiles +#!/bin/bash -module load rocm/4.5.1 -module load umpire/6.0.0 -module load raja/0.14.0 +. /etc/profile.d/modules.sh + +module purge + +# MPI module is finnicky on incline +modules=$(module list 2>&1) +if echo $modules | grep -q 'openmpi'; then + module load gcc/8.4.0 + module rm openmpi +fi + +# System modules module load gcc/8.4.0 -module load openblas/0.3.18 -module load cmake/3.19.6 -module load magma/2.6.1 +module load openmpi/4.1.4 +module load rocm/5.3.0 +module load cmake/3.21.4 + +# Spack modules +module use -a /vast/projects/exasgd/spack/install/modules/linux-centos7-zen3 +# umpire@6.0.0%clang@15.0.0-rocm5.3.0 cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" +c~cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp+rocm+shared amdgpu_target=gfx908 build_system=cmake build_type=RelWithDebInfo generator=make tests=none arch=linux-centos7-zen +module load umpire-6.0.0-clang-15.0.0-rocm5.3.0-hsuiw34 +# magma@2.6.2%clang@15.0.0-rocm5.3.0 cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" ~cuda+fortran~ipo+rocm+shared amdgpu_target=gfx908 build_system=cmake build_type=Release generator=make arch=linux-centos7-zen +module load magma-2.6.2-clang-15.0.0-rocm5.3.0-failpgu +# raja@0.14.0%clang@15.0.0-rocm5.3.0 cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" ~cuda~examples~exercises~ipo~openmp+rocm+shared~tests amdgpu_target=gfx908 build_system=cmake build_type=Release generator=make arch=linux-centos7-zen +module load raja-0.14.0-clang-15.0.0-rocm5.3.0-x4u3jfh -export LD_LIBRARY_PATH=/share/apps/openmpi/4.1.1/gcc/8.1.0/lib -export PATH=/share/apps/openmpi/4.1.1/gcc/8.1.0/bin:$PATH +export CC=$(which clang) +export CXX=$(which clang++) +export FC=$(which gfortran) -# For some reason, OS libstdc++ keeps being found before GCC 8.4.0, so we have -# to force this link directory. GCC 4.8.5 is far too old... -export EXTRA_CMAKE_ARGS="-DHIOP_EXTRA_LINK_FLAGS=-L/share/apps/gcc/8.4.0/lib64;-Wl,-rpath,/share/apps/gcc/8.4.0/lib64" +export EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DAMDGPU_TARGETS='gfx908'" export CMAKE_CACHE_SCRIPT=clang-hip.cmake