Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#1548][vector crypto] adding register index LMUL alignement checks #1815

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
4 changes: 4 additions & 0 deletions riscv/insns/vghsh_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ require_zvkg;
require(P.VU.vsew == 32);
require_egw_fits(128);

require_vd_align_lmul;
require_vs2_align_lmul;
require_vs1_align_lmul;

VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
{},
{
Expand Down
3 changes: 3 additions & 0 deletions riscv/insns/vgmul_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ require_zvkg;
require(P.VU.vsew == 32);
require_egw_fits(128);

require_vd_align_lmul;
require_vs2_align_lmul;

VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
{},
{
Expand Down
1 change: 0 additions & 1 deletion riscv/insns/vsha2ch_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "zvknh_ext_macros.h"

// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
require_vsha2_common_constraints;

switch (P.VU.vsew) {
Expand Down
1 change: 0 additions & 1 deletion riscv/insns/vsha2cl_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "zvknh_ext_macros.h"

// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
require_vsha2_common_constraints;

switch (P.VU.vsew) {
Expand Down
1 change: 0 additions & 1 deletion riscv/insns/vsha2ms_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "zvknh_ext_macros.h"

// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
require_vsha2_common_constraints;

switch (P.VU.vsew) {
Expand Down
1 change: 1 addition & 0 deletions riscv/insns/vsm3me_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
(ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6))

require_vsm3_constraints;
require_vs1_align_lmul;

VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP(
{},
Expand Down
3 changes: 3 additions & 0 deletions riscv/insns/vsm4k_vi.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ static constexpr uint32_t zvksed_ck[32] = {

require_vsm4_constraints;

require_vd_align_lmul;
require_vs2_align_lmul;

VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(
{},
// The following statements will be executed before the first execution
Expand Down
5 changes: 4 additions & 1 deletion riscv/insns/vsm4r_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

require_vsm4_constraints;
// No overlap of vd and vs2.
require(insn.rd() != insn.rs2());
require_no_overlap_eglmul(insn.rd(), insn.rs2());
// vd and vs2 are LMUL (resp. EGW / VLEN) aligned
require_vd_align_lmul;
require_vs2_align_eglmul(128);

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
Expand Down
3 changes: 3 additions & 0 deletions riscv/insns/vsm4r_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

require_vsm4_constraints;

require_vd_align_lmul;
require_vs2_align_lmul;

VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
{},
{
Expand Down
35 changes: 35 additions & 0 deletions riscv/zvk_ext_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,41 @@
// (LMUL * VLEN) <= EGW
#define require_egw_fits(EGW) require((EGW) <= (P.VU.VLEN * P.VU.vflmul))

// Ensures that a register index is aligned with LMUL
#define require_vreg_align_lmul(VREG_NUM) \
if (P.VU.vflmul > 1) { \
require_align(VREG_NUM, P.VU.vflmul); \
}

// Ensures that a register index is aligned to EMUL
// evaluated as EGW / VLEN.
// The check is only enabled if this value is greater
// than one (no index alignment check required for fractional EMUL)
#define require_vreg_align_eglmul(EGW, VREG_NUM) \
do { \
float vfeglmul = EGW / P.VU.VLEN; \
if (vfeglmul > 1) { \
require_align(VREG_NUM, vfeglmul); \
}\
} while (0)

#define require_vd_align_lmul require_vreg_align_lmul(insn.rd())
#define require_vs2_align_lmul require_vreg_align_lmul(insn.rs2())
#define require_vs1_align_lmul require_vreg_align_lmul(insn.rs1())
#define require_vs2_align_eglmul(EGW) require_vreg_align_eglmul(EGW, insn.rs2())

// ensure that rs2 and rd do not overlap, assuming rd encodes an LMUL wide
// vector register group and rs2 encodes an vs2_EMUL=ceil(EGW / VLEN) vector register
// group.
// Assumption: LMUL >= vs2_EMUL which is enforced independently through require_egw_fits.
#define require_no_overlap_eglmul(vd, vs2) \
do { \
int vd_emul = P.VU.vflmul < 1.f ? 1 : (int) P.VU.vflmul; \
int aligned_vd = vd / vd_emul; \
int aligned_vs2 = vs2 / vd_emul; \
require(aligned_vd != aligned_vs2); \
Comment on lines +118 to +121

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also be implemented by masking the log2(max(LMUL, 1)) LSBs of the register indices.
Need to check if there is not already a macro to perform this function.

} while (0)

// Checks that the vector unit state (vtype and vl) can be interpreted
// as element groups with EEW=32, EGS=4 (four 32-bits elements per group),
// for an effective element group width of EGW=128 bits.
Expand Down
12 changes: 11 additions & 1 deletion riscv/zvkned_ext_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// - Zvkned is enabled
// - EGW (128) <= LMUL * VLEN
// - vd and vs2 cannot overlap
// - vd is LMUL aligned
// - vs2 is ceil(EGW / VLEN) aligned
//
// The constraint that vstart and vl are both EGS (4) aligned
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
Expand All @@ -18,13 +20,17 @@
require_zvkned; \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
require(insn.rd() != insn.rs2()); \
require_no_overlap_eglmul(insn.rd(), insn.rs2()); \
require_vd_align_lmul; \
require_vs2_align_eglmul(128); \
} while (false)

// vaes*.vv instruction constraints. Those are the same as the .vs ones,
// except for the overlap constraint that is not present for .vv variants.
// - Zvkned is enabled
// - EGW (128) <= LMUL * VLEN
// - vd is LMUL aligned
// - vs2 is LMUL aligned
//
// The constraint that vstart and vl are both EGS (4) aligned
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros.
Expand All @@ -33,6 +39,8 @@
require_zvkned; \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
require_vd_align_lmul; \
require_vs2_align_lmul; \
} while (false)

// vaeskf*.vi instruction constraints. Those are the same as the .vv ones.
Expand All @@ -41,6 +49,8 @@
require_zvkned; \
require(P.VU.vsew == 32); \
require_egw_fits(128); \
require_vd_align_lmul; \
require_vs2_align_lmul; \
} while (false)

#define VAES_XTIME(A) (((A) << 1) ^ (((A) & 0x80) ? 0x1b : 0))
Expand Down
6 changes: 6 additions & 0 deletions riscv/zvknh_ext_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// Constraints common to all vsha* instructions, across all VSEW:
// - VSEW is 32 (SHA-256) or 64 (SHA-512)
// - No overlap of vd with vs1 or vs2.
// - vd is LMUL aligned
// - vs1 is LMUL aligned
// - vs2 is LMUL aligned
//
// The constraint that vstart and vl are both EGS (4) aligned
// is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP
Expand All @@ -18,6 +21,9 @@
require(P.VU.vsew == 32 || P.VU.vsew == 64); \
require(insn.rd() != insn.rs1()); \
require(insn.rd() != insn.rs2()); \
require_vd_align_lmul; \
require_vs2_align_lmul; \
require_vs1_align_lmul; \
} while (false)

// Constraints on vsha2 instructions that must be verified when VSEW==32.
Expand Down
4 changes: 4 additions & 0 deletions riscv/zvksh_ext_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
// - VSEW == 32
// - EGW (256) <= LMUL * VLEN
// - No overlap of vd and vs2.
// - vd is LMUL aligned
// - vs2 is LMUL aligned
//
// The constraint that vstart and vl are both EGS (8) aligned
// is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros.
Expand All @@ -20,6 +22,8 @@
require(P.VU.vsew == 32); \
require_egw_fits(256); \
require(insn.rd() != insn.rs2()); \
require_vd_align_lmul; \
require_vs2_align_lmul; \
} while (false)

#define FF1(X, Y, Z) ((X) ^ (Y) ^ (Z))
Expand Down
Loading