Skip to content

Commit

Permalink
Merge pull request #329 from EinsteinToolkit/main
Browse files Browse the repository at this point in the history
Updates from main
  • Loading branch information
lucass-carneiro authored Jan 7, 2025
2 parents 291123f + ad15372 commit 7f6ad2e
Show file tree
Hide file tree
Showing 93 changed files with 3,691 additions and 1,723 deletions.
2 changes: 1 addition & 1 deletion Arith/src/rten.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public:
template <typename F, typename... Args>
friend constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST void
fmap_(const F &f, const rten &x, const rten<Args, D> &...args) {
fmap_(f, x.args, args.elts...);
fmap_(f, x.elts, args.elts...);
}

template <
Expand Down
222 changes: 148 additions & 74 deletions Derivs/src/derivs.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ using namespace Loop;
template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx) {
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;
Expand All @@ -27,6 +26,24 @@ calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
});
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const vec<GF3D5<T>, dim> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
calc_copy<CI, CJ, CK>(gf(a), layout, grid, gf0(a));
};

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_copy(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_copy<CI, CJ, CK>(gf(a, b), layout, grid, gf0(a, b));
};

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -44,9 +61,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -60,9 +76,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -76,9 +91,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -87,11 +101,50 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs<CI, CJ, CK>(gf(a), dgf(a), layout, grid, gf0(a), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0, const vect<T, dim> dx,
const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs<CI, CJ, CK>(gf(a, b), dgf(a, b), layout, grid, gf0(a, b), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -109,9 +162,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -127,9 +179,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -145,9 +196,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -158,84 +208,108 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
const auto ddval = calc_deriv2<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
ddgf.store(mask, index, ddval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs2<CI, CJ, CK>(gf(a), dgf(a), ddgf(a), layout, grid, gf0(a), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs2<CI, CJ, CK>(gf(a, b), dgf(a, b), ddgf(a, b), layout, grid,
gf0(a, b), dx, deriv_order);
}

////////////////////////////////////////////////////////////////////////////////

// Template instantiations

using T = CCTK_REAL;

template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template void calc_copy<0, 0, 0>(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0,
const vect<T, dim> dx);
const GF3D2<const T> &gf0);

template void calc_copy<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0);

template void calc_copy<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0);
template void
calc_derivs<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const GF3D5layout layout, const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0, const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void
calc_derivs2<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const smat<GF3D5<T>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

} // namespace Derivs
Loading

0 comments on commit 7f6ad2e

Please sign in to comment.