diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 1bb129702661b3..aba2511959af03 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -212,9 +212,10 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL, llvm_unreachable("Opcode is not a vector unit stride load nor store"); } - uint8_t EMUL = - static_cast(RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW)); - return std::make_pair(EEW, EMUL); + auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW); + if (!EEW) + llvm_unreachable("Invalid SEW or LMUL for new ratio"); + return std::make_pair(EEW, *EMUL); } unsigned RISCVInstrumentManager::getSchedClassID( diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index ed8ae4a7a4550d..edcca8805dfc01 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -206,12 +206,14 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { return (SEW * 8) / LMul; } -RISCVII::VLMUL RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, - unsigned EEW) { +std::optional +RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) { unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL); unsigned EMULFixedPoint = (EEW * 8) / Ratio; bool Fractional = EMULFixedPoint < 8; unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8; + if (!isValidLMUL(EMUL, Fractional)) + return std::nullopt; return RISCVVType::encodeLMUL(EMUL, Fractional); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index e7181eadd49738..00b4751905f6ae 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -535,8 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS); unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul); -RISCVII::VLMUL getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, - unsigned EEW); +std::optional +getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW); } // namespace RISCVVType namespace RISCVRVC { diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 9d2b2c3b3f5926..27f281fff6fc59 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -539,6 +539,8 @@ class VSETVLIInfo { MaskAgnostic = MA; } + void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } + unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); @@ -1038,6 +1040,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, if (!RISCVII::hasVLOp(TSFlags)) return; + // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we + // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more + // places. + DemandedFields Demanded = getDemanded(MI, MRI, ST); + if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() && + PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown()) { + if (auto NewVLMul = RISCVVType::getSameRatioLMUL( + PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) + Info.setVLMul(*NewVLMul); + } + // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and // VL > 0. We can discard the user requested AVL and just use the last // one if we can prove it equally zero. This removes a vsetvli entirely diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll index cecaa9d24f8bcc..92f78032e81bfc 100644 --- a/llvm/test/CodeGen/RISCV/double_reduct.ll +++ b/llvm/test/CodeGen/RISCV/double_reduct.ll @@ -90,7 +90,7 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwaddu.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 25177734325ce1..83edd49bc96376 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -22,9 +22,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index cbcca9d2696f4b..3cc7371c1ce9ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a2, 6 -; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index e9412019a0dec8..85939377c6db08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -567,9 +567,9 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a1, 146 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index e6868abdb5b1d7..c95d144a970895 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1101,21 +1101,20 @@ define void @urem_v2i64(ptr %x, ptr %y) { define void @mulhu_v16i8(ptr %x) { ; CHECK-LABEL: mulhu_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: lui a1, 3 ; CHECK-NEXT: addi a1, a1, -2044 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: li a1, -128 ; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0 ; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: addi a2, a1, 32 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a2, %hi(.LCPI65_0) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) ; CHECK-NEXT: vle8.v v11, (a2) @@ -1126,21 +1125,21 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: li a2, 513 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 4 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a1, a1, 78 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v9, v9, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) @@ -1204,9 +1203,9 @@ define void @mulhu_v6i16(ptr %x) { ; CHECK-NEXT: vdivu.vv v9, v10, v9 ; CHECK-NEXT: lui a1, 45217 ; CHECK-NEXT: addi a1, a1, -1785 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vsext.vf2 v11, v10 ; CHECK-NEXT: vdivu.vv v8, v8, v11 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -1309,10 +1308,10 @@ define void @mulhs_v16i8(ptr %x) { ; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: lui a1, 5 ; CHECK-NEXT: addi a1, a1, -1452 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: li a1, 57 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0 ; CHECK-NEXT: vmulhu.vv v8, v8, v9 ; CHECK-NEXT: vmv.v.i v9, 7 @@ -1367,9 +1366,9 @@ define void @mulhs_v6i16(ptr %x) { ; CHECK-NEXT: vdiv.vv v9, v9, v10 ; CHECK-NEXT: lui a1, 1020016 ; CHECK-NEXT: addi a1, a1, 2041 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vsext.vf2 v11, v10 ; CHECK-NEXT: vdiv.vv v8, v8, v11 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -4872,21 +4871,21 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vmv.v.i v10, 0 -; LMULMAX2-NEXT: lui a2, 163907 -; LMULMAX2-NEXT: addi a2, a2, -2044 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: li a2, -128 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0 -; LMULMAX2-NEXT: lui a2, 66049 -; LMULMAX2-NEXT: addi a2, a2, 32 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-NEXT: vle8.v v14, (a2) +; LMULMAX2-NEXT: lui a1, 163907 +; LMULMAX2-NEXT: addi a1, a1, -2044 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: li a1, -128 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0 +; LMULMAX2-NEXT: lui a1, 66049 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0) +; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0) +; LMULMAX2-NEXT: vle8.v v14, (a1) ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-NEXT: vsrl.vv v10, v8, v10 ; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14 @@ -4894,23 +4893,23 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vmv.v.i v10, 4 -; LMULMAX2-NEXT: lui a2, 8208 -; LMULMAX2-NEXT: addi a2, a2, 513 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a1, 8208 +; LMULMAX2-NEXT: addi a1, a1, 513 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-NEXT: lui a2, 66785 -; LMULMAX2-NEXT: addi a2, a2, 78 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a1, 66785 +; LMULMAX2-NEXT: addi a1, a1, 78 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-NEXT: lui a2, 529160 -; LMULMAX2-NEXT: addi a2, a2, 304 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a1, 529160 +; LMULMAX2-NEXT: addi a1, a1, 304 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) @@ -5250,11 +5249,11 @@ define void @mulhs_v32i8(ptr %x) { ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vmv.v.i v10, 7 -; LMULMAX2-NEXT: lui a2, 304453 -; LMULMAX2-NEXT: addi a2, a2, -1452 -; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a2 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-NEXT: lui a1, 304453 +; LMULMAX2-NEXT: addi a1, a1, -1452 +; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-NEXT: li a1, -123 ; LMULMAX2-NEXT: vmv.v.x v12, a1 @@ -5267,15 +5266,14 @@ define void @mulhs_v32i8(ptr %x) { ; ; LMULMAX1-LABEL: mulhs_v32i8: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX1-NEXT: vle8.v v8, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle8.v v9, (a1) ; LMULMAX1-NEXT: lui a2, 5 ; LMULMAX1-NEXT: addi a2, a2, -1452 -; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX1-NEXT: vmv.s.x v0, a2 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v10, -9 ; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0 ; LMULMAX1-NEXT: vdivu.vv v9, v9, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 6ee0e4525f5ec7..728cf18e1a77d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -536,15 +536,16 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 ; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_3 @@ -636,7 +637,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 @@ -728,7 +729,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB12_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1259,15 +1260,16 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2 ; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_3 @@ -1359,7 +1361,7 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_2 @@ -1454,7 +1456,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB23_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1466,8 +1468,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1492,8 +1495,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -1511,8 +1515,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6 @@ -1523,8 +1528,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7 @@ -1545,8 +1551,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB23_11 @@ -1557,8 +1564,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1599,7 +1607,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB24_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1611,8 +1619,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1637,8 +1646,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -1656,8 +1666,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6 @@ -1668,8 +1679,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7 @@ -1690,8 +1702,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB24_11 @@ -1702,8 +1715,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1744,7 +1758,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB25_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1757,8 +1771,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -1784,8 +1799,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -1804,8 +1820,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6 @@ -1817,8 +1834,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7 @@ -1841,8 +1859,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB25_11 @@ -1854,8 +1873,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1891,7 +1911,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -2324,15 +2344,16 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; RV64ZVE32F-NEXT: .LBB32_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2 ; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_3 @@ -7188,15 +7209,16 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; RV64ZVE32F-NEXT: .LBB61_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 ; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_3 @@ -7288,7 +7310,7 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 @@ -7383,7 +7405,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB64_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7395,8 +7417,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7421,8 +7444,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -7440,8 +7464,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6 @@ -7452,8 +7477,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7 @@ -7474,8 +7500,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB64_11 @@ -7486,8 +7513,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7528,7 +7556,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB65_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7540,8 +7568,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7566,8 +7595,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -7585,8 +7615,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6 @@ -7597,8 +7628,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7 @@ -7619,8 +7651,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB65_11 @@ -7631,8 +7664,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7673,7 +7707,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB66_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7686,8 +7720,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -7713,8 +7748,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -7733,8 +7769,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6 @@ -7746,8 +7783,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7 @@ -7770,8 +7808,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB66_11 @@ -7783,8 +7822,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7820,7 +7860,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8127,15 +8167,16 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; RV64ZVE32F-NEXT: .LBB71_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2 ; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_3 @@ -12122,7 +12163,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12382,7 +12423,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -12416,7 +12457,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 @@ -12556,7 +12597,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 @@ -12706,7 +12747,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v13, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8 @@ -12741,7 +12782,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 @@ -12799,7 +12840,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 @@ -12834,7 +12875,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 7ba248556358f7..9df160bf30f005 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -185,7 +185,7 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t @@ -213,7 +213,7 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 4766b3727a4625..3f6aa72bc2e3b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -476,11 +476,10 @@ define float @vreduce_ord_fadd_v8f32(ptr %x, float %s) { define float @vreduce_fwadd_v8f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -494,11 +493,10 @@ define float @vreduce_fwadd_v8f32(ptr %x, float %s) { define float @vreduce_ord_fwadd_v8f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -542,11 +540,10 @@ define float @vreduce_ord_fadd_v16f32(ptr %x, float %s) { define float @vreduce_fwadd_v16f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -560,11 +557,10 @@ define float @vreduce_fwadd_v16f32(ptr %x, float %s) { define float @vreduce_ord_fwadd_v16f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -611,11 +607,10 @@ define float @vreduce_fwadd_v32f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -630,11 +625,10 @@ define float @vreduce_ord_fwadd_v32f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -694,9 +688,8 @@ define float @vreduce_fwadd_v64f32(ptr %x, float %s) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -888,11 +881,10 @@ define double @vreduce_ord_fadd_v4f64(ptr %x, double %s) { define double @vreduce_fwadd_v4f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_fwadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -906,11 +898,10 @@ define double @vreduce_fwadd_v4f64(ptr %x, double %s) { define double @vreduce_ord_fwadd_v4f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -954,11 +945,10 @@ define double @vreduce_ord_fadd_v8f64(ptr %x, double %s) { define double @vreduce_fwadd_v8f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_fwadd_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -972,11 +962,10 @@ define double @vreduce_fwadd_v8f64(ptr %x, double %s) { define double @vreduce_ord_fwadd_v8f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1020,11 +1009,10 @@ define double @vreduce_ord_fadd_v16f64(ptr %x, double %s) { define double @vreduce_fwadd_v16f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_fwadd_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1038,11 +1026,10 @@ define double @vreduce_fwadd_v16f64(ptr %x, double %s) { define double @vreduce_ord_fwadd_v16f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1099,7 +1086,7 @@ define double @vreduce_fwadd_v32f64(ptr %x, double %s) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 6da5ca06a79c23..4e576f12e10768 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -849,7 +849,7 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB49_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v25, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index f2a1f2752cda00..90ded1d70d5fc1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -362,11 +362,10 @@ define i16 @vreduce_add_v16i16(ptr %x) { define i16 @vwreduce_add_v16i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -380,11 +379,10 @@ define i16 @vwreduce_add_v16i16(ptr %x) { define i16 @vwreduce_uadd_v16i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -416,11 +414,10 @@ define i16 @vwreduce_add_v32i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -435,11 +432,10 @@ define i16 @vwreduce_uadd_v32i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -471,11 +467,10 @@ define i16 @vwreduce_add_v64i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v64i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -490,11 +485,10 @@ define i16 @vwreduce_uadd_v64i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v64i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -536,9 +530,8 @@ define i16 @vwreduce_add_v128i16(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -559,9 +552,8 @@ define i16 @vwreduce_uadd_v128i16(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -732,11 +724,10 @@ define i32 @vreduce_add_v8i32(ptr %x) { define i32 @vwreduce_add_v8i32(ptr %x) { ; CHECK-LABEL: vwreduce_add_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -750,11 +741,10 @@ define i32 @vwreduce_add_v8i32(ptr %x) { define i32 @vwreduce_uadd_v8i32(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -784,11 +774,10 @@ define i32 @vreduce_add_v16i32(ptr %x) { define i32 @vwreduce_add_v16i32(ptr %x) { ; CHECK-LABEL: vwreduce_add_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -802,11 +791,10 @@ define i32 @vwreduce_add_v16i32(ptr %x) { define i32 @vwreduce_uadd_v16i32(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -838,11 +826,10 @@ define i32 @vwreduce_add_v32i32(ptr %x) { ; CHECK-LABEL: vwreduce_add_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -857,11 +844,10 @@ define i32 @vwreduce_uadd_v32i32(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -903,9 +889,8 @@ define i32 @vwreduce_add_v64i32(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -926,9 +911,8 @@ define i32 @vwreduce_uadd_v64i32(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1135,11 +1119,10 @@ define i64 @vreduce_add_v4i64(ptr %x) { define i64 @vwreduce_add_v4i64(ptr %x) { ; RV32-LABEL: vwreduce_add_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1150,11 +1133,10 @@ define i64 @vwreduce_add_v4i64(ptr %x) { ; ; RV64-LABEL: vwreduce_add_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1168,11 +1150,10 @@ define i64 @vwreduce_add_v4i64(ptr %x) { define i64 @vwreduce_uadd_v4i64(ptr %x) { ; RV32-LABEL: vwreduce_uadd_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1183,11 +1164,10 @@ define i64 @vwreduce_uadd_v4i64(ptr %x) { ; ; RV64-LABEL: vwreduce_uadd_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1230,11 +1210,10 @@ define i64 @vreduce_add_v8i64(ptr %x) { define i64 @vwreduce_add_v8i64(ptr %x) { ; RV32-LABEL: vwreduce_add_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1245,11 +1224,10 @@ define i64 @vwreduce_add_v8i64(ptr %x) { ; ; RV64-LABEL: vwreduce_add_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1263,11 +1241,10 @@ define i64 @vwreduce_add_v8i64(ptr %x) { define i64 @vwreduce_uadd_v8i64(ptr %x) { ; RV32-LABEL: vwreduce_uadd_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1278,11 +1255,10 @@ define i64 @vwreduce_uadd_v8i64(ptr %x) { ; ; RV64-LABEL: vwreduce_uadd_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1325,11 +1301,10 @@ define i64 @vreduce_add_v16i64(ptr %x) { define i64 @vwreduce_add_v16i64(ptr %x) { ; RV32-LABEL: vwreduce_add_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1340,11 +1315,10 @@ define i64 @vwreduce_add_v16i64(ptr %x) { ; ; RV64-LABEL: vwreduce_add_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1358,11 +1332,10 @@ define i64 @vwreduce_add_v16i64(ptr %x) { define i64 @vwreduce_uadd_v16i64(ptr %x) { ; RV32-LABEL: vwreduce_uadd_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1373,11 +1346,10 @@ define i64 @vwreduce_uadd_v16i64(ptr %x) { ; ; RV64-LABEL: vwreduce_uadd_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1433,7 +1405,7 @@ define i64 @vwreduce_add_v32i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwadd.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 @@ -1451,7 +1423,7 @@ define i64 @vwreduce_add_v32i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 @@ -1472,7 +1444,7 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwaddu.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 @@ -1490,7 +1462,7 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index a1d2b5106d5a96..d0777962a75651 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -45,9 +45,9 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -64,9 +64,9 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 6984f2b3402a76..032d32109933f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -80,9 +80,9 @@ define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -117,9 +117,9 @@ define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vrsub.vi v8, v8, 15 ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v14, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -242,9 +242,9 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> @@ -376,9 +376,8 @@ define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-NEXT: vrsub.vi v16, v16, 15 ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; CHECK-NEXT: ret %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index f7ccf2c32cde0c..84e9e2801ff6db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -19,9 +19,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 3b1e19ec4b3d21..bd510d26279c48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -133,7 +133,7 @@ define half @vpreduce_fadd_nxv4f16(half %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, % ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmv.s.x v25, a0 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t @@ -1187,7 +1187,7 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, % ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmv.s.x v25, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 44a396ee29a8ab..73f651225da64d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -810,10 +810,9 @@ for.end: ; preds = %for.body define @cross_block_mutate( %a, %b, ; CHECK-LABEL: cross_block_mutate: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli a0, 6, e16, m1, ta, ma -; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma +; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %mask) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll index 7bae1160a8a5fc..f92f5e934f9f47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-valid-elen-fp.ll @@ -7,9 +7,8 @@ define void @foo(half %y, ptr %i64p) { ; CHECK-NO-FELEN64-LABEL: foo: ; CHECK-NO-FELEN64: # %bb.0: # %entry -; CHECK-NO-FELEN64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NO-FELEN64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NO-FELEN64-NEXT: vle64.v v8, (a0) -; CHECK-NO-FELEN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NO-FELEN64-NEXT: vfmv.s.f v9, fa0 ; CHECK-NO-FELEN64-NEXT: #APP ; CHECK-NO-FELEN64-NEXT: # use v8 v9