Skip to content

Commit

Permalink
ifpack2: Remove Kokkos::subview calls from ComputeResidualVector kernel.
Browse files Browse the repository at this point in the history
This fixes a large performance regression seen in SPARC after the
kokkos-3.6.01 promotion, see issue kokkos/kokkos#6533.
  • Loading branch information
vbrunini committed Oct 24, 2023
1 parent dcd4ed5 commit a877537
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3466,10 +3466,12 @@ namespace Ifpack2 {
const local_ordinal_type num_local_rows = lclrow.extent(0);

// subview pattern
auto bb = Kokkos::subview(b, block_range, 0);
auto xx = Kokkos::subview(x, block_range, 0);
auto xx_remote = Kokkos::subview(x_remote, block_range, 0);
auto yy = Kokkos::subview(y_packed_scalar, 0, block_range, 0, 0);
using subview_1D_right_t = decltype(Kokkos::subview(b, block_range, 0));
subview_1D_right_t bb(nullptr, blocksize);
subview_1D_right_t xx(nullptr, blocksize);
subview_1D_right_t xx_remote(nullptr, blocksize);
using subview_1D_stride_t = decltype(Kokkos::subview(y_packed_scalar, 0, block_range, 0, 0));
subview_1D_stride_t yy(nullptr, Kokkos::LayoutStride(blocksize, y_packed_scalar.stride_1()));
auto A_block = ConstUnmanaged<tpetra_block_access_view_type>(NULL, blocksize, blocksize);

const local_ordinal_type lr = lclrow(rowidx);
Expand Down Expand Up @@ -3584,10 +3586,12 @@ namespace Ifpack2 {
const local_ordinal_type num_local_rows = lclrow.extent(0);

// subview pattern
auto bb = Kokkos::subview(b, block_range, 0);
auto xx = bb; //Kokkos::subview(x, block_range, 0);
auto xx_remote = bb; //Kokkos::subview(x_remote, block_range, 0);
auto yy = Kokkos::subview(y_packed_scalar, 0, block_range, 0, 0);
using subview_1D_right_t = decltype(Kokkos::subview(b, block_range, 0));
subview_1D_right_t bb(nullptr, blocksize);
subview_1D_right_t xx(nullptr, blocksize);
subview_1D_right_t xx_remote(nullptr, blocksize);
using subview_1D_stride_t = decltype(Kokkos::subview(y_packed_scalar, 0, block_range, 0, 0));
subview_1D_stride_t yy(nullptr, Kokkos::LayoutStride(blocksize, y_packed_scalar.stride_1()));
auto A_block = ConstUnmanaged<tpetra_block_access_view_type>(NULL, blocksize, blocksize);
auto colindsub_used = (P == 0 ? colindsub : colindsub_remote);
auto rowptr_used = (P == 0 ? rowptr : rowptr_remote);
Expand Down

0 comments on commit a877537

Please sign in to comment.