Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Add working incline CI. #624

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .gitlab/pnnl-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ variables:
variables:
SLURM_Q: "newell_shared"
MY_CLUSTER: "newell"
SLURM_ARGS: --gres=gpu:1 --exclusive
SLURM_ARGS: --gres=gpu:1 -N 1 -n 8

.marianas:
variables:
SLURM_Q: "dl"
MY_CLUSTER: "marianas"
SLURM_ARGS: --gres=gpu:1 --exclusive
SLURM_ARGS: --gres=gpu:1 -N 1 -n 8

.incline:
variables:
SLURM_Q: "incline"
MY_CLUSTER: "incline"
SLURM_ARGS: --exclusive
SLURM_ARGS: -N 1 -n 8

.pnnl_build:
extends:
Expand Down Expand Up @@ -69,7 +69,7 @@ variables:
# Extra args for ctest
export CTEST_CMD=$CTEST_CMD

sbatch -A EXASGD --exclusive -N 1 -n 8 -p $SLURM_Q -t $TIMELIMIT $SLURM_ARGS -o output -e output $WORKDIR/BUILD.sh $BUILD_SCRIPT_ARGS
sbatch -A EXASGD -p $SLURM_Q -t $TIMELIMIT $SLURM_ARGS -o output -e output $WORKDIR/BUILD.sh $BUILD_SCRIPT_ARGS
res=1
set +xv
while :;
Expand Down Expand Up @@ -99,7 +99,6 @@ build_on_incline:
extends:
- .pnnl_build
- .incline
allow_failure: true

pnnl_cleanup:
needs: []
Expand Down
1 change: 1 addition & 0 deletions cmake/FindHiopHipLibraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ if (NOT DEFINED ROCM_PATH )
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "ROCm path")
endif()
endif()
message(STATUS "ROCm path: ${ROCM_PATH}")
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake" ${CMAKE_MODULE_PATH})

# Set GPU Targets and Find all the HIP modules
Expand Down
1 change: 1 addition & 0 deletions scripts/clang-hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(HIOP_USE_UMPIRE ON CACHE BOOL "")
set(HIOP_USE_GPU ON CACHE BOOL "")
set(HIOP_USE_CUDA OFF CACHE BOOL "")
set(HIOP_USE_HIP ON CACHE BOOL "")
# TODO - enable sparse testing on Incline
set(HIOP_SPARSE OFF CACHE BOOL "")
set(HIOP_DEEPCHECKS ON CACHE BOOL "")
set(AMDGPU_TARGETS "gfx908" CACHE STRING "")
Expand Down
47 changes: 29 additions & 18 deletions scripts/inclineVariables.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
# NOTE: The following is required when running from Gitlab CI via slurm job
source /etc/profile.d/modules.sh
module use -a /usr/share/Modules/modulefiles
module use -a /share/apps/modules/tools
module use -a /share/apps/modules/compilers
module use -a /share/apps/modules/mpi
module use -a /etc/modulefiles
#!/bin/bash

module load rocm/4.5.1
module load umpire/6.0.0
module load raja/0.14.0
. /etc/profile.d/modules.sh

module purge

# MPI module is finnicky on incline
modules=$(module list 2>&1)
if echo $modules | grep -q 'openmpi'; then
module load gcc/8.4.0
module rm openmpi
fi

# System modules
module load gcc/8.4.0
module load openblas/0.3.18
module load cmake/3.19.6
module load magma/2.6.1
module load openmpi/4.1.4
module load rocm/5.3.0
module load cmake/3.21.4

# Spack modules
module use -a /vast/projects/exasgd/spack/install/modules/linux-centos7-zen3
# [email protected]%[email protected] cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" +c~cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp+rocm+shared amdgpu_target=gfx908 build_system=cmake build_type=RelWithDebInfo generator=make tests=none arch=linux-centos7-zen
module load umpire-6.0.0-clang-15.0.0-rocm5.3.0-hsuiw34
# [email protected]%[email protected] cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" ~cuda+fortran~ipo+rocm+shared amdgpu_target=gfx908 build_system=cmake build_type=Release generator=make arch=linux-centos7-zen
module load magma-2.6.2-clang-15.0.0-rocm5.3.0-failpgu
# [email protected]%[email protected] cxxflags="--gcc-toolchain=/share/apps/gcc/8.4.0/" ~cuda~examples~exercises~ipo~openmp+rocm+shared~tests amdgpu_target=gfx908 build_system=cmake build_type=Release generator=make arch=linux-centos7-zen
module load raja-0.14.0-clang-15.0.0-rocm5.3.0-x4u3jfh

export LD_LIBRARY_PATH=/share/apps/openmpi/4.1.1/gcc/8.1.0/lib
export PATH=/share/apps/openmpi/4.1.1/gcc/8.1.0/bin:$PATH
export CC=$(which clang)
export CXX=$(which clang++)
export FC=$(which gfortran)

# For some reason, OS libstdc++ keeps being found before GCC 8.4.0, so we have
# to force this link directory. GCC 4.8.5 is far too old...
export EXTRA_CMAKE_ARGS="-DHIOP_EXTRA_LINK_FLAGS=-L/share/apps/gcc/8.4.0/lib64;-Wl,-rpath,/share/apps/gcc/8.4.0/lib64"
export EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DAMDGPU_TARGETS='gfx908'"
export CMAKE_CACHE_SCRIPT=clang-hip.cmake