Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into omp
Browse files Browse the repository at this point in the history
  • Loading branch information
jxy committed Dec 13, 2023
2 parents de9bed4 + a24bcfa commit 754bece
Show file tree
Hide file tree
Showing 35 changed files with 526 additions and 366 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/rocm-build-ci.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
name: rocm-build-ci
run-name: ${{ github.actor }} is kicking off a ROCm build
on: pull_request
jobs:
rocm-build:
runs-on: [self-hosted, amd]
strategy:
matrix:
rocm: [ 5.6.1, 5.7.2 ]
steps:
- uses: actions/checkout@v3
- run: |
export ROCM_PATH=/opt/rocm-5.5.0
export ROCM_PATH=/opt/rocm-${{ matrix.rocm }}
export PATH=${ROCM_PATH}/bin:${ROCM_PATH}/llvm/bin:${PATH}
SRCROOT=`pwd`
BUILDROOT=`mktemp -d build-XXXXXXXX`
Expand Down Expand Up @@ -40,7 +42,7 @@ jobs:
-DQUDA_DOWNLOAD_EIGEN=ON \
-DQUDA_PRECISION=14 \
-DCMAKE_INSTALL_PREFIX=${INSTALLROOT} \
-DCMAKE_BUILD_TYPE="DEVEL" \
-DCMAKE_BUILD_TYPE="STRICT" \
-DCMAKE_CXX_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
-DCMAKE_C_COMPILER="${ROCM_PATH}/llvm/bin/clang" \
-DCMAKE_HIP_COMPILER="${ROCM_PATH}/llvm/bin/clang++" \
Expand Down
6 changes: 6 additions & 0 deletions include/color_spinor_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,12 @@ namespace quda
*/
ColorSpinorField &operator=(ColorSpinorField &&field);

/**
@brief Returns if the object is empty (not initialized)
@return true if the object has not been allocated, otherwise false
*/
bool empty() const { return !init; }

/**
@brief Copy the source field contents into this
@param[in] src Source from which we are copying
Expand Down
11 changes: 11 additions & 0 deletions include/gauge_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,17 @@ namespace quda {
return reinterpret_cast<T>(gauge_array[d].data());
}

void *raw_pointer() const
{
if (is_pointer_array(order)) {
static void *data_array[8];
for (int i = 0; i < site_dim; i++) data_array[i] = gauge_array[i].data();
return data_array;
} else {
return gauge.data();
}
}

/**
@brief Return array of pointers to the per dimension gauge field allocation(s).
@tparam T Optional type to cast the pointer to (default is
Expand Down
3 changes: 3 additions & 0 deletions include/quda_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@

#elif defined(QUDA_TARGET_OMPTARGET)
#include <omp.h>
#endif

#ifdef QUDA_OPENMP
#include <omp.h>
#endif
2 changes: 1 addition & 1 deletion include/targets/hip/FFT_Plans.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <quda_hip_api.h>
#include <quda_internal.h>
#include <hipfft.h>
#include <hipfft/hipfft.h>

#define FFT_FORWARD HIPFFT_FORWARD
#define FFT_INVERSE HIPFFT_BACKWARD
Expand Down
2 changes: 1 addition & 1 deletion include/util_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ char *getPrintBuffer();
number of OMP threads for CPU functions recorded in the tune cache.
@return Returns the string
*/
char* getOmpThreadStr();
const char *getOmpThreadStr();

void errorQuda_(const char *func, const char *file, int line, ...);

Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ endif()

if(QUDA_OPENMP)
target_link_libraries(quda PUBLIC OpenMP::OpenMP_CXX)
target_compile_definitions(quda PUBLIC QUDA_OPENMP)
endif()

# set which precisions to enable
Expand Down
2 changes: 1 addition & 1 deletion lib/color_spinor_field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace quda
{
if (&src != this) {
// if field not already initialized then move the field
if (!init || are_compatible(*this, src)) {
if (!init || are_compatible(*this, src) || src.empty()) {
if (init) destroy();
LatticeField::operator=(std::move(src));
move(std::move(src));
Expand Down
7 changes: 3 additions & 4 deletions lib/interface_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3219,11 +3219,10 @@ void callMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, // col
// the split topology.
logQuda(QUDA_DEBUG_VERBOSE, "Split grid loading gauge field...\n");
if (!is_staggered) {
loadGaugeQuda(collected_gauge->data(), gauge_param);
loadGaugeQuda(collected_gauge->raw_pointer(), gauge_param);
} else {
// freeGaugeQuda();
loadFatLongGaugeQuda(param, gauge_param, collected_milc_fatlink_field->data(),
collected_milc_longlink_field->data());
loadFatLongGaugeQuda(param, gauge_param, collected_milc_fatlink_field->raw_pointer(),
collected_milc_longlink_field->raw_pointer());
}
logQuda(QUDA_DEBUG_VERBOSE, "Split grid loaded gauge field...\n");

Expand Down
2 changes: 1 addition & 1 deletion lib/targets/hip/blas_lapack_hipblas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <blas_lapack.h>
#include <timer.h>
#ifdef NATIVE_LAPACK_LIB
#include <hipblas.h>
#include <hipblas/hipblas.h>
#include <malloc_quda.h>
#endif

Expand Down
7 changes: 5 additions & 2 deletions lib/targets/hip/malloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,12 +528,15 @@ namespace quda
errorQuda("hipPointerGetAttributes returned error: %s\n", hipGetErrorString(error));
}

switch (attr.memoryType) {
switch (attr.type) {
#if HIP_VERSION_MAJOR >= 6
case hipMemoryTypeUnregistered: return QUDA_CPU_FIELD_LOCATION;
#endif // HIP_VERSION_MAJOR >= 6
case hipMemoryTypeHost: return QUDA_CPU_FIELD_LOCATION;
case hipMemoryTypeDevice: return QUDA_CUDA_FIELD_LOCATION;
case hipMemoryTypeArray: return QUDA_CUDA_FIELD_LOCATION;
case hipMemoryTypeUnified: return QUDA_CUDA_FIELD_LOCATION; ///< Not used currently
default: errorQuda("Unknown memory type %d\n", attr.memoryType); return QUDA_INVALID_FIELD_LOCATION;
default: errorQuda("Unknown memory type %d\n", attr.type); return QUDA_INVALID_FIELD_LOCATION;
}
}

Expand Down
14 changes: 7 additions & 7 deletions lib/util_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,17 +134,17 @@ void popOutputPrefix()

char *getPrintBuffer() { return buffer_; }

char* getOmpThreadStr() {
static char omp_thread_string[128];
const char *getOmpThreadStr()
{
static std::string omp_thread_string;
static bool init = false;
if (!init) {
strcpy(omp_thread_string,"omp_threads=");
char *omp_threads = getenv("OMP_NUM_THREADS");
strcat(omp_thread_string, omp_threads ? omp_threads : "1");
strcat(omp_thread_string, ",");
#ifdef QUDA_OPENMP
omp_thread_string = std::string("omp_threads=" + std::to_string(omp_get_max_threads()) + ",");
#endif
init = true;
}
return omp_thread_string;
return omp_thread_string.c_str();
}

void errorQuda_(const char *func, const char *file, int line, ...)
Expand Down
47 changes: 47 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ target_compile_options(
$<IF:$<CONFIG:RELEASE>,-w,-Wall -Wextra>
$<$<CONFIG:STRICT>:-Werror>
)

# ignore any unkown pragmas if not using OpenMP
if(NOT ${QUDA_OPENMP})
target_compile_options(quda_test PUBLIC $<$<COMPILE_LANGUAGE:CXX>:
$<$<CXX_COMPILER_ID:Clang>:-Wno-unknown-pragmas>
$<$<CXX_COMPILER_ID:GNU>:-Wno-unknown-pragmas>
>)
endif()

if(BUILD_SHARED_LIBS)
install(TARGETS quda_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
Expand Down Expand Up @@ -265,6 +274,7 @@ if(QUDA_MPI OR QUDA_QMP)
if(DEFINED ENV{QUDA_TEST_GRID_SIZE})
get_test_ranks($ENV{QUDA_TEST_GRID_SIZE} QUDA_TEST_NUM_PROCS)
endif()
message(STATUS "ctest will run on ${QUDA_TEST_NUM_PROCS} processes")
set(QUDA_CTEST_LAUNCH ${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${QUDA_TEST_NUM_PROCS};${MPIEXEC_PREFLAGS}
CACHE STRING "CTest Launcher command for QUDA's tests")
endif()
Expand Down Expand Up @@ -375,6 +385,18 @@ foreach(pol IN LISTS DSLASH_POLICIES)
set_tests_properties(dslash_${DIRAC_NAME}_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol})
endif()

add_test(NAME dslash_${DIRAC_NAME}_splitgrid_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
--all-partitions 0
--test Dslash
--dim 2 4 6 8
--gtest_output=xml:dslash_${DIRAC_NAME}_splitgrid_test_pol${pol2}.xml)
if(polenv)
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol})
endif()
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})

add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
Expand Down Expand Up @@ -830,6 +852,17 @@ endif()
set_tests_properties(dslash_${DIRAC_NAME}_mat_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2})
endif()

add_test(NAME dslash_${DIRAC_NAME}_splitgrid_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:staggered_dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
--test MatPC
--dim 2 4 6 8
--gtest_output=xml:dslash_${DIRAC_NAME}_matpc_test_pol${pol2}.xml)
if(polenv)
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2})
endif()
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})

add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:staggered_dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
Expand Down Expand Up @@ -931,6 +964,20 @@ foreach(prec IN LISTS TEST_PRECS)
--dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000
--enable-testing true
--gtest_output=xml:invert_test_wilson_${prec}.xml)

if(DEFINED ENV{QUDA_ENABLE_TUNING})
if($ENV{QUDA_ENABLE_TUNING} EQUAL 0)
add_test(NAME invert_test_splitgrid_wilson_${prec}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:invert_test> ${MPIEXEC_POSTFLAGS}
--dslash-type wilson --ngcrkrylov 8
--dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000
--nsrc ${QUDA_TEST_NUM_PROCS}
--enable-testing true
--gtest_output=xml:invert_test_splitgrid_wilson_${prec}.xml)

set_tests_properties(invert_test_splitgrid_wilson_${prec} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})
endif()
endif()
endif()

if(QUDA_DIRAC_TWISTED_MASS)
Expand Down
18 changes: 10 additions & 8 deletions tests/dslash_ctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
return true;
}

// work out if test_split_grid is enabled
bool test_split_grid = (grid_partition[0] * grid_partition[1] * grid_partition[2] * grid_partition[3] > 1);
if (::testing::get<2>(GetParam()) > 0 && test_split_grid) { return true; }

const std::array<bool, 16> partition_enabled {true, true, true, false, true, false, false, false,
true, false, false, false, true, false, true, true};
if (!ctest_all_partitions && !partition_enabled[::testing::get<2>(GetParam())]) return true;
Expand Down Expand Up @@ -68,8 +64,6 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
}

public:
DslashTest() : dslash_test_wrapper(dtest_type) { }

virtual void SetUp()
{
int prec = ::testing::get<0>(GetParam());
Expand All @@ -94,12 +88,20 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
commDimPartitionedReset();
}

static void SetUpTestCase() { initQuda(device_ordinal); }
static void SetUpTestCase()
{
initQuda(device_ordinal);
DslashTestWrapper::dtest_type = dtest_type;
}

// Per-test-case tear-down.
// Called after the last test in this test case.
// Can be omitted if not needed.
static void TearDownTestCase() { endQuda(); }
static void TearDownTestCase()
{
DslashTestWrapper::destroy();
endQuda();
}
};

TEST_P(DslashTest, verify)
Expand Down
14 changes: 10 additions & 4 deletions tests/dslash_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ class DslashTest : public ::testing::Test
}

public:
DslashTest() : dslash_test_wrapper(dtest_type) { }

virtual void SetUp()
{
dslash_test_wrapper.init_test(argc_copy, argv_copy);
Expand All @@ -43,12 +41,20 @@ class DslashTest : public ::testing::Test

virtual void TearDown() { dslash_test_wrapper.end(); }

static void SetUpTestCase() { initQuda(device_ordinal); }
static void SetUpTestCase()
{
initQuda(device_ordinal);
DslashTestWrapper::dtest_type = dtest_type;
}

// Per-test-case tear-down.
// Called after the last test in this test case.
// Can be omitted if not needed.
static void TearDownTestCase() { endQuda(); }
static void TearDownTestCase()
{
DslashTestWrapper::destroy();
endQuda();
}
};

TEST_F(DslashTest, benchmark) { dslash_test_wrapper.run_test(niter, /**show_metrics =*/true); }
Expand Down
Loading

0 comments on commit 754bece

Please sign in to comment.