Skip to content

Commit

Permalink
[AArch64] Adds SUBS and ADDS instructions to the MIPeepholeOpt.
Browse files Browse the repository at this point in the history
Implements ADDS/SUBS 24-bit immediate optimization using the
MIPeepholeOpt pass. This follows the pattern:

Optimize ([adds|subs] r, imm) -> ([ADDS|SUBS] ([ADD|SUB] r, #imm0, lsl #12), #imm1),
if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

Optimize ([adds|subs] r, imm) -> ([SUBS|ADDS] ([SUB|ADD] r, #imm0, lsl #12), #imm1),
if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

The SplitAndOpcFunc type had to change the return type to an Opcode pair so that
the first add/sub is the regular instruction and the second is the flag setting
instruction. This required updating the code in the AND case.

Testing:

I ran a two stage bootstrap with this code.
Using the second stage compiler, I verified that the negation of an ADDS to SUBS
or vice versa is a valid optimization. Example V == -0x111111.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D118663
  • Loading branch information
red1bluelost authored and benshi001 committed Feb 12, 2022
1 parent 9e975e5 commit af45d0f
Show file tree
Hide file tree
Showing 5 changed files with 365 additions and 66 deletions.
42 changes: 10 additions & 32 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,27 +1547,6 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
}
}

namespace {

struct UsedNZCV {
bool N = false;
bool Z = false;
bool C = false;
bool V = false;

UsedNZCV() = default;

UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
this->C |= UsedFlags.C;
this->V |= UsedFlags.V;
return *this;
}
};

} // end anonymous namespace

/// Find a condition code used by the instruction.
/// Returns AArch64CC::Invalid if either the instruction does not use condition
/// codes or we don't optimize CmpInstr in the presence of such instructions.
Expand Down Expand Up @@ -1622,15 +1601,15 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
return UsedFlags;
}

/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
/// are not containing C or V flags and NZCV flags are not alive in successors
/// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
/// \returns None otherwise.
///
/// Collect instructions using that flags in \p CCUseInstrs if provided.
static Optional<UsedNZCV>
examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI,
SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
Optional<UsedNZCV>
llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI,
SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
MachineBasicBlock *CmpParent = CmpInstr.getParent();
if (MI.getParent() != CmpParent)
return None;
Expand All @@ -1652,8 +1631,6 @@ examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
break;
}
if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
return None;
return NZCVUsedAfterCmp;
}

Expand Down Expand Up @@ -1684,7 +1661,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
return false;

if (!examineCFlagsUse(MI, CmpInstr, TRI))
Optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
return false;

AccessKind AccessToCheck = AK_Write;
Expand Down Expand Up @@ -1773,7 +1751,7 @@ static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
// Condition flags are not used in CmpInstr basic block successors and only
// Z or N flags allowed to be used after CmpInstr within its basic block
if (!NZCVUsedAfterCmp)
if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
return false;
// Z or N flag used after CmpInstr must correspond to the flag used in MI
if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,33 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
const MachineRegisterInfo *MRI) const;
};

struct UsedNZCV {
bool N = false;
bool Z = false;
bool C = false;
bool V = false;

UsedNZCV() = default;

UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
this->C |= UsedFlags.C;
this->V |= UsedFlags.V;
return *this;
}
};

/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
/// \returns None otherwise.
///
/// Collect instructions using that flags in \p CCUseInstrs if provided.
Optional<UsedNZCV>
examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI,
SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);

/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
/// which either reads or clobbers NZCV.
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
Expand Down
133 changes: 108 additions & 25 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;

using OpcodePair = std::pair<unsigned, unsigned>;
template <typename T>
using SplitAndOpcFunc =
std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
using BuildMIFunc =
std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
Register, Register)>;
std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
Register, Register, Register)>;

/// For instructions where an immediate operand could be split into two
/// separate immediate instructions, use the splitTwoPartImm two handle the
Expand Down Expand Up @@ -93,6 +94,10 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
template <typename T>
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);

template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool visitORR(MachineInstr &MI,
Expand Down Expand Up @@ -171,20 +176,20 @@ bool AArch64MIPeepholeOpt::visitAND(

return splitTwoPartImm<T>(
MI, ToBeRemoved,
[Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
[Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
return Opc;
return std::make_pair(Opc, Opc);
return None;
},
[&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0);
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1);
});
Expand Down Expand Up @@ -273,23 +278,64 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
return splitTwoPartImm<T>(
MI, ToBeRemoved,
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
T &Imm1) -> Optional<unsigned> {
T &Imm1) -> Optional<OpcodePair> {
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
return PosOpc;
return std::make_pair(PosOpc, PosOpc);
if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
return NegOpc;
return std::make_pair(NegOpc, NegOpc);
return None;
},
[&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
});
}

template <typename T>
bool AArch64MIPeepholeOpt::visitADDSSUBS(
OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
// Try the same transformation as ADDSUB but with additional requirement
// that the condition code usages are only for Equal and Not Equal
return splitTwoPartImm<T>(
MI, ToBeRemoved,
[PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
OpcodePair OP;
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
OP = PosOpcs;
else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
OP = NegOpcs;
else
return None;
// Check conditional uses last since it is expensive for scanning
// proceeding instructions
MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
return None;
return OP;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
Expand Down Expand Up @@ -357,32 +403,49 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
// number since it was sign extended when we assign to the 64-bit Imm.
if (SubregToRegMI)
Imm &= 0xFFFFFFFF;
unsigned Opcode;
OpcodePair Opcode;
if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
Opcode = R.getValue();
else
return false;

// Create new ADD/SUB MIs.
// Create new MIs using the first and second opcodes. Opcodes might differ for
// flag setting operations that should only set flags on second instruction.
// NewTmpReg = Opcode.first SrcReg Imm0
// NewDstReg = Opcode.second NewTmpReg Imm1

// Determine register classes for destinations and register operands
MachineFunction *MF = MI.getMF();
const TargetRegisterClass *RC =
TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
const TargetRegisterClass *ORC =
TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
const TargetRegisterClass *FirstInstrDstRC =
TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
const TargetRegisterClass *FirstInstrOperandRC =
TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
const TargetRegisterClass *SecondInstrDstRC =
(Opcode.first == Opcode.second)
? FirstInstrDstRC
: TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
const TargetRegisterClass *SecondInstrOperandRC =
(Opcode.first == Opcode.second)
? FirstInstrOperandRC
: TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);

// Get old registers destinations and new register destinations
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
Register NewTmpReg = MRI->createVirtualRegister(RC);
Register NewDstReg = MRI->createVirtualRegister(RC);
Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
Register NewDstReg = MRI->createVirtualRegister(SecondInstrDstRC);

MRI->constrainRegClass(SrcReg, RC);
MRI->constrainRegClass(NewTmpReg, ORC);
// Constrain registers based on their new uses
MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));

// Call the delegating operation to build the instruction
BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);

MRI->replaceRegWith(DstReg, NewDstReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
// deleting MI.
MRI->replaceRegWith(DstReg, NewDstReg);
MI.getOperand(0).setReg(DstReg);

// Record the MIs need to be removed.
Expand Down Expand Up @@ -439,6 +502,26 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
ToBeRemoved);
break;
case AArch64::ADDSWrr:
Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
{AArch64::SUBWri, AArch64::SUBSWri},
MI, ToBeRemoved);
break;
case AArch64::SUBSWrr:
Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
{AArch64::ADDWri, AArch64::ADDSWri},
MI, ToBeRemoved);
break;
case AArch64::ADDSXrr:
Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
{AArch64::SUBXri, AArch64::SUBSXri},
MI, ToBeRemoved);
break;
case AArch64::SUBSXrr:
Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
{AArch64::ADDXri, AArch64::ADDSXri},
MI, ToBeRemoved);
break;
}
}
}
Expand Down
Loading

0 comments on commit af45d0f

Please sign in to comment.