From 9028c55ba1089fcf5e8dc76c0258858b4f6276d9 Mon Sep 17 00:00:00 2001 From: Abmcar Date: Sun, 29 Mar 2026 13:22:11 +0800 Subject: [PATCH 1/6] perf(compiler): extend x86 cg peephole rules --- src/compiler/target/x86/x86_cg_peephole.cpp | 959 ++++++++++++++++++-- src/compiler/target/x86/x86_cg_peephole.h | 5 + 2 files changed, 910 insertions(+), 54 deletions(-) diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp index bf7cb500c..bd9345fb2 100644 --- a/src/compiler/target/x86/x86_cg_peephole.cpp +++ b/src/compiler/target/x86/x86_cg_peephole.cpp @@ -5,10 +5,673 @@ #include "compiler/cgir/pass/cg_register_info.h" #include "compiler/llvm-prebuild/Target/X86/X86Subtarget.h" #include "compiler/target/x86/x86_constants.h" +#include +#include +#include using namespace llvm; namespace COMPILER { +namespace { + +constexpr unsigned InvalidOpcode = 0; +constexpr size_t MaxCmpFoldFunctionBlocks = 10000; + +bool isTestOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::TEST8rr: + case X86::TEST16rr: + case X86::TEST32rr: + case X86::TEST64rr: + return true; + default: + return false; + } +} + +bool isSelfTest(const CgInstruction &Inst, CgRegister Reg) { + if (!isTestOpcode(Inst.getOpcode())) { + return false; + } + + const auto &LHS = Inst.getOperand(0); + const auto &RHS = Inst.getOperand(1); + return LHS.isReg() && RHS.isReg() && LHS.getReg() == Reg && + RHS.getReg() == Reg; +} + +X86::CondCode getOppositeCond(X86::CondCode CC) { + switch (CC) { + case X86::COND_O: + return X86::COND_NO; + case X86::COND_NO: + return X86::COND_O; + case X86::COND_B: + return X86::COND_AE; + case X86::COND_AE: + return X86::COND_B; + case X86::COND_E: + return X86::COND_NE; + case X86::COND_NE: + return X86::COND_E; + case X86::COND_BE: + return X86::COND_A; + case X86::COND_A: + return X86::COND_BE; + case X86::COND_S: + return X86::COND_NS; + case X86::COND_NS: + return X86::COND_S; + case X86::COND_P: + return X86::COND_NP; + case X86::COND_NP: + return X86::COND_P; + case X86::COND_L: + return X86::COND_GE; + case X86::COND_GE: + return X86::COND_L; + case X86::COND_LE: + return X86::COND_G; + case X86::COND_G: + return X86::COND_LE; + default: + return X86::COND_INVALID; + } +} + +bool getFoldedBranchCond(int64_t BranchCond, int64_t SetCond, + int64_t &NewCond) { + if (BranchCond == X86::COND_NE) { + NewCond = SetCond; + return true; + } + if (BranchCond != X86::COND_E) { + return false; + } + + X86::CondCode Opposite = getOppositeCond(static_cast(SetCond)); + if (Opposite == X86::COND_INVALID) { + return false; + } + + NewCond = Opposite; + return true; +} + +bool isImmMoveValue(const CgInstruction &Inst, int64_t Value) { + switch (Inst.getOpcode()) { + case X86::MOV8ri: + case X86::MOV16ri: + case X86::MOV32ri: + case X86::MOV32ri64: + case X86::MOV64ri32: + case X86::MOV64ri: + break; + default: + return false; + } + + return Inst.getNumOperands() >= 2 && Inst.getOperand(1).isImm() && + Inst.getOperand(1).getImm() == Value; +} + +bool getImmMoveDstReg(const CgInstruction &Inst, int64_t Value, + CgRegister &DstReg) { + if (!isImmMoveValue(Inst, Value) || Inst.getNumOperands() < 1 || + !Inst.getOperand(0).isReg()) { + return false; + } + + DstReg = Inst.getOperand(0).getReg(); + return true; +} + +unsigned getAdcImmOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::ADC8rr: + return X86::ADC8ri; + case X86::ADC16rr: + return X86::ADC16ri; + case X86::ADC32rr: + return X86::ADC32ri; + case X86::ADC64rr: + return X86::ADC64ri32; + default: + return InvalidOpcode; + } +} + +bool isAddRegOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::ADD8rr: + case X86::ADD16rr: + case X86::ADD32rr: + case X86::ADD64rr: + return true; + default: + return false; + } +} + +bool isSelfImmZeroLogic(const CgInstruction &Inst, CgRegister Reg) { + if (Inst.getNumOperands() < 3 || !Inst.getOperand(0).isReg() || + !Inst.getOperand(1).isReg() || !Inst.getOperand(2).isImm() || + Inst.getOperand(0).getReg() != Reg || + Inst.getOperand(1).getReg() != Reg || Inst.getOperand(2).getImm() != 0) { + return false; + } + + switch (Inst.getOpcode()) { + case X86::OR8ri: + case X86::OR16ri8: + case X86::OR16ri: + case X86::OR32ri8: + case X86::OR32ri: + case X86::OR64ri8: + case X86::OR64ri32: + case X86::AND8ri: + case X86::AND16ri8: + case X86::AND16ri: + case X86::AND32ri8: + case X86::AND32ri: + case X86::AND64ri8: + case X86::AND64ri32: + return true; + default: + return false; + } +} + +bool isSelfAndZeroImm(const CgInstruction &Inst, CgRegister Reg) { + if (!isSelfImmZeroLogic(Inst, Reg)) { + return false; + } + + switch (Inst.getOpcode()) { + case X86::AND8ri: + case X86::AND16ri8: + case X86::AND16ri: + case X86::AND32ri8: + case X86::AND32ri: + case X86::AND64ri8: + case X86::AND64ri32: + return true; + default: + return false; + } +} + +bool isNoOpImmOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::ADD64ri8: + case X86::OR64ri8: + case X86::OR64ri32: + return true; + default: + return false; + } +} + +bool isZeroLogicChainOpcode(const CgInstruction &Inst, CgRegister Reg) { + switch (Inst.getOpcode()) { + case X86::OR64ri8: + case X86::OR64ri32: + case X86::AND64ri8: + case X86::AND64ri32: + break; + default: + return false; + } + + if (Inst.getNumOperands() < 3 || !Inst.getOperand(0).isReg() || + !Inst.getOperand(1).isReg() || !Inst.getOperand(2).isImm()) { + return false; + } + + return Inst.getOperand(0).getReg() == Reg && + Inst.getOperand(1).getReg() == Reg; +} + +bool areFlagsDeadAfter(CgBasicBlock &MBB, CgBasicBlock::iterator MII) { + auto LocalMII = MII; + ++LocalMII; + for (; LocalMII != MBB.end(); ++LocalMII) { + auto &Inst = *LocalMII; + if (Inst.readsRegister(X86::EFLAGS)) { + return false; + } + if (Inst.modifiesRegister(X86::EFLAGS)) { + return true; + } + } + + return false; +} + +bool isFullCopyFromReg(const CgInstruction &Inst, CgRegister SrcReg, + CgRegister &DstReg) { + if (!Inst.isFullCopy() || Inst.getNumOperands() < 2 || + !Inst.getOperand(0).isReg() || !Inst.getOperand(1).isReg() || + Inst.getOperand(1).getReg() != SrcReg) { + return false; + } + + DstReg = Inst.getOperand(0).getReg(); + return true; +} + +bool isFullCopyToReg(const CgInstruction &Inst, CgRegister DstReg, + CgRegister &SrcReg) { + if (!Inst.isFullCopy() || Inst.getNumOperands() < 2 || + !Inst.getOperand(0).isReg() || !Inst.getOperand(1).isReg() || + Inst.getOperand(0).getReg() != DstReg) { + return false; + } + + SrcReg = Inst.getOperand(1).getReg(); + return true; +} + +bool isZeroExtendCopyFromReg(const CgInstruction &Inst, CgRegister SrcReg, + CgRegister &DstReg) { + switch (Inst.getOpcode()) { + case X86::MOVZX32rr8: + case X86::MOVZX32rr16: + case X86::MOVZX64rr8: + case X86::MOVZX64rr16: + break; + default: + return false; + } + + if (Inst.getNumOperands() < 2 || !Inst.getOperand(0).isReg() || + !Inst.getOperand(1).isReg() || Inst.getOperand(1).getReg() != SrcReg) { + return false; + } + + DstReg = Inst.getOperand(0).getReg(); + return true; +} + +bool isZeroExtendCopyToReg(const CgInstruction &Inst, CgRegister DstReg, + CgRegister &SrcReg) { + switch (Inst.getOpcode()) { + case X86::MOVZX32rr8: + case X86::MOVZX32rr16: + case X86::MOVZX64rr8: + case X86::MOVZX64rr16: + break; + default: + return false; + } + + if (Inst.getNumOperands() < 2 || !Inst.getOperand(0).isReg() || + !Inst.getOperand(1).isReg() || Inst.getOperand(0).getReg() != DstReg) { + return false; + } + + SrcReg = Inst.getOperand(1).getReg(); + return true; +} + +bool getBoolOrOtherReg(const CgInstruction &Inst, CgRegister BoolReg, + CgRegister &DstReg, CgRegister &OtherReg) { + switch (Inst.getOpcode()) { + case X86::OR32rr: + case X86::OR64rr: + break; + default: + return false; + } + + if (Inst.getNumOperands() < 3 || !Inst.getOperand(0).isReg() || + !Inst.getOperand(1).isReg() || !Inst.getOperand(2).isReg()) { + return false; + } + + const CgRegister LHS = Inst.getOperand(1).getReg(); + const CgRegister RHS = Inst.getOperand(2).getReg(); + if (LHS == BoolReg) { + OtherReg = RHS; + } else if (RHS == BoolReg) { + OtherReg = LHS; + } else { + return false; + } + + DstReg = Inst.getOperand(0).getReg(); + return true; +} + +bool collectZeroDefChainBefore(CgBasicBlock &MBB, + CgBasicBlock::iterator StartMII, + CgRegister ZeroReg, + std::vector &ZeroToErase) { + CgRegister CurrentReg = ZeroReg; + for (auto LocalMII = StartMII; LocalMII != MBB.begin();) { + --LocalMII; + auto &Inst = *LocalMII; + if (!Inst.modifiesRegister(CurrentReg)) { + continue; + } + + CgRegister PrevReg = 0; + CgRegister ZeroDstReg = 0; + if (getImmMoveDstReg(Inst, 0, ZeroDstReg) && ZeroDstReg == CurrentReg) { + ZeroToErase.push_back(&Inst); + std::reverse(ZeroToErase.begin(), ZeroToErase.end()); + return true; + } + if (isSelfAndZeroImm(Inst, CurrentReg)) { + ZeroToErase.push_back(&Inst); + std::reverse(ZeroToErase.begin(), ZeroToErase.end()); + return true; + } + if (isSelfImmZeroLogic(Inst, CurrentReg)) { + ZeroToErase.push_back(&Inst); + continue; + } + if (isFullCopyToReg(Inst, CurrentReg, PrevReg) || + isZeroExtendCopyToReg(Inst, CurrentReg, PrevReg)) { + ZeroToErase.push_back(&Inst); + CurrentReg = PrevReg; + continue; + } + + return false; + } + + return false; +} + +CgOperand cloneRegOperand(const CgOperand &Operand) { + ZEN_ASSERT(Operand.isReg()); + CgOperand NewOperand = CgOperand::createRegOperand( + Operand.getReg(), Operand.isDef(), Operand.isImplicit(), + Operand.isUse() && Operand.isKill(), Operand.isDef() && Operand.isDead(), + Operand.isUndef()); + if (Operand.getSubReg() != 0) { + NewOperand.setSubReg(Operand.getSubReg()); + } + if (Operand.isInternalRead()) { + NewOperand.setIsInternalRead(); + } + if (Operand.isDef() && Operand.isEarlyClobber()) { + NewOperand.setIsEarlyClobber(); + } + if (CgRegister::isPhysicalRegister(Operand.getReg()) && + Operand.isRenamable()) { + NewOperand.setIsRenamable(); + } + return NewOperand; +} + +CgInstruction *getImmMoveDef(CgRegister Reg, CgRegisterInfo &MRI, + int64_t ImmValue) { + std::vector Visited; + while (Reg.isVirtual()) { + if (std::find(Visited.begin(), Visited.end(), Reg) != Visited.end()) { + return nullptr; + } + Visited.push_back(Reg); + + CgInstruction *Def = MRI.getUniqueVRegDef(Reg); + if (Def == nullptr) { + return nullptr; + } + if (Def->isFullCopy()) { + if (!Def->getOperand(1).isReg()) { + return nullptr; + } + Reg = Def->getOperand(1).getReg(); + continue; + } + + return isImmMoveValue(*Def, ImmValue) ? Def : nullptr; + } + + return nullptr; +} + +bool getSimplifiedSetccAfterTest(int64_t SetCond, bool &UseConst, + int64_t &ValueOrCond) { + UseConst = false; + switch (static_cast(SetCond)) { + case X86::COND_O: + case X86::COND_B: + UseConst = true; + ValueOrCond = 0; + return true; + case X86::COND_NO: + case X86::COND_AE: + UseConst = true; + ValueOrCond = 1; + return true; + case X86::COND_A: + ValueOrCond = X86::COND_NE; + return true; + case X86::COND_BE: + ValueOrCond = X86::COND_E; + return true; + case X86::COND_L: + ValueOrCond = X86::COND_S; + return true; + case X86::COND_GE: + ValueOrCond = X86::COND_NS; + return true; + default: + return false; + } +} + +bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, + CgRegister BoolReg, int64_t SetCond, + std::vector &ToErase, + CgInstruction *&BranchInst, int64_t &FoldedCond) { + std::vector LocalToErase; + std::vector ZeroChainToErase; + CgRegister ZeroReg = 0; + for (auto LocalMII = StartMII; LocalMII != MBB.end(); ++LocalMII) { + auto &Inst = *LocalMII; + CgRegister CopiedReg = 0; + if (isFullCopyFromReg(Inst, BoolReg, CopiedReg)) { + LocalToErase.push_back(&Inst); + BoolReg = CopiedReg; + continue; + } + + if (ZeroReg != 0 && isFullCopyFromReg(Inst, ZeroReg, CopiedReg)) { + ZeroChainToErase.push_back(&Inst); + ZeroReg = CopiedReg; + continue; + } + + if (ZeroReg != 0 && isZeroExtendCopyFromReg(Inst, ZeroReg, CopiedReg)) { + ZeroChainToErase.push_back(&Inst); + ZeroReg = CopiedReg; + continue; + } + + CgRegister ZeroDstReg = 0; + if (getImmMoveDstReg(Inst, 0, ZeroDstReg)) { + ZeroChainToErase.clear(); + ZeroChainToErase.push_back(&Inst); + ZeroReg = ZeroDstReg; + continue; + } + + CgRegister OrDstReg = 0; + CgRegister OrOtherReg = 0; + if (getBoolOrOtherReg(Inst, BoolReg, OrDstReg, OrOtherReg)) { + std::vector MatchedZeroToErase; + if (ZeroReg != 0 && OrOtherReg == ZeroReg) { + MatchedZeroToErase = ZeroChainToErase; + } else if (!collectZeroDefChainBefore(MBB, LocalMII, OrOtherReg, + MatchedZeroToErase)) { + MatchedZeroToErase.clear(); + } + + if (MatchedZeroToErase.empty()) { + ZeroReg = 0; + ZeroChainToErase.clear(); + } else { + LocalToErase.insert(LocalToErase.end(), MatchedZeroToErase.begin(), + MatchedZeroToErase.end()); + } + + if (!MatchedZeroToErase.empty()) { + ZeroChainToErase.clear(); + LocalToErase.push_back(&Inst); + ZeroReg = 0; + BoolReg = OrDstReg; + continue; + } + } + + if (isSelfTest(Inst, BoolReg)) { + auto BranchMII = std::next(LocalMII); + if (BranchMII == MBB.end()) { + return false; + } + + auto &CandidateBranchInst = *BranchMII; + if (CandidateBranchInst.getOpcode() != X86::JCC_1 || + !getFoldedBranchCond(CandidateBranchInst.getOperand(1).getImm(), + SetCond, FoldedCond)) { + return false; + } + + LocalToErase.push_back(&Inst); + ToErase.insert(ToErase.end(), LocalToErase.begin(), LocalToErase.end()); + BranchInst = &CandidateBranchInst; + return true; + } + + if (ZeroReg != 0 && + (Inst.readsRegister(ZeroReg) || Inst.modifiesRegister(ZeroReg))) { + ZeroReg = 0; + ZeroChainToErase.clear(); + } + + if (Inst.readsRegister(X86::EFLAGS) || Inst.modifiesRegister(X86::EFLAGS) || + Inst.readsRegister(BoolReg) || Inst.modifiesRegister(BoolReg)) { + return false; + } + } + + return false; +} + +bool matchCmovBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, + CgRegister BoolReg, int64_t SetCond, + std::vector &ToErase, + CgInstruction *&BranchInst, int64_t &FoldedCond) { + std::vector LocalToErase; + CgInstruction *ZeroInst = nullptr; + CgInstruction *OneInst = nullptr; + for (auto LocalMII = StartMII; LocalMII != MBB.end(); ++LocalMII) { + auto &Inst = *LocalMII; + if (isImmMoveValue(Inst, 0)) { + ZeroInst = &Inst; + continue; + } + if (isImmMoveValue(Inst, 1)) { + OneInst = &Inst; + continue; + } + + CgRegister CopiedReg = 0; + if (isFullCopyFromReg(Inst, BoolReg, CopiedReg)) { + LocalToErase.push_back(&Inst); + BoolReg = CopiedReg; + continue; + } + + if (!isSelfTest(Inst, BoolReg)) { + if ((ZeroInst != nullptr && ZeroInst->getOperand(0).isReg() && + (Inst.readsRegister(ZeroInst->getOperand(0).getReg()) || + Inst.modifiesRegister(ZeroInst->getOperand(0).getReg()))) || + (OneInst != nullptr && OneInst->getOperand(0).isReg() && + (Inst.readsRegister(OneInst->getOperand(0).getReg()) || + Inst.modifiesRegister(OneInst->getOperand(0).getReg())))) { + return false; + } + if (Inst.readsRegister(X86::EFLAGS) || + Inst.modifiesRegister(X86::EFLAGS) || Inst.readsRegister(BoolReg) || + Inst.modifiesRegister(BoolReg)) { + return false; + } + continue; + } + + if (ZeroInst == nullptr || OneInst == nullptr || + !ZeroInst->getOperand(0).isReg() || !OneInst->getOperand(0).isReg()) { + return false; + } + + auto CmovMII = std::next(LocalMII); + if (CmovMII == MBB.end()) { + return false; + } + CgRegister ZeroReg = ZeroInst->getOperand(0).getReg(); + CgRegister CmovCopiedZeroReg = 0; + if (isFullCopyFromReg(*CmovMII, ZeroReg, CmovCopiedZeroReg)) { + LocalToErase.push_back(&*CmovMII); + ZeroReg = CmovCopiedZeroReg; + ++CmovMII; + if (CmovMII == MBB.end()) { + return false; + } + } + auto &CmovInst = *CmovMII; + if (CmovInst.getOpcode() != X86::CMOV64rr || + CmovInst.getNumOperands() < 4 || !CmovInst.getOperand(0).isReg() || + !CmovInst.getOperand(1).isReg() || !CmovInst.getOperand(2).isReg() || + !CmovInst.getOperand(3).isImm() || + CmovInst.getOperand(1).getReg() != ZeroReg || + CmovInst.getOperand(2).getReg() != OneInst->getOperand(0).getReg()) { + return false; + } + + auto FinalTestMII = std::next(CmovMII); + if (FinalTestMII == MBB.end()) { + return false; + } + auto &FinalTestInst = *FinalTestMII; + if (!isSelfTest(FinalTestInst, CmovInst.getOperand(0).getReg())) { + return false; + } + + auto BranchMII = std::next(FinalTestMII); + if (BranchMII == MBB.end()) { + return false; + } + + auto &CandidateBranchInst = *BranchMII; + if (CandidateBranchInst.getOpcode() != X86::JCC_1) { + return false; + } + + int64_t BoolBranchCond = 0; + if (!getFoldedBranchCond(CandidateBranchInst.getOperand(1).getImm(), + CmovInst.getOperand(3).getImm(), BoolBranchCond) || + !getFoldedBranchCond(BoolBranchCond, SetCond, FoldedCond)) { + return false; + } + + LocalToErase.push_back(&Inst); + LocalToErase.push_back(&CmovInst); + LocalToErase.push_back(&FinalTestInst); + ToErase.insert(ToErase.end(), LocalToErase.begin(), LocalToErase.end()); + BranchInst = &CandidateBranchInst; + return true; + } + + return false; +} + +} // namespace + void X86CgPeephole::peepholeOptimizeBB(CgBasicBlock &MBB) { if (MBB.empty()) { return; @@ -23,88 +686,225 @@ void X86CgPeephole::peepholeOptimizeBB(CgBasicBlock &MBB) { void X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII) { auto &Inst = *MII; + if (isTestOpcode(Inst.getOpcode())) { + optimizeTestSetcc(MBB, MII); + } if (Inst.isCompare()) { optimizeCmp(MBB, MII); } + + optimizeAdcZeroReg(MBB, MII); + optimizeAddZeroReg(MBB, MII); + optimizeNoOpImm(MBB, MII); +} + +void X86CgPeephole::optimizeTestSetcc(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + auto &Inst = *MII; + if (!isTestOpcode(Inst.getOpcode())) { + return; + } + + auto LocalMII = std::next(MII); + if (LocalMII == MBB.end()) { + return; + } + + auto &SetccInst = *LocalMII; + if (SetccInst.getOpcode() != X86::SETCCr || SetccInst.getNumOperands() < 2 || + !SetccInst.getOperand(0).isReg() || !SetccInst.getOperand(1).isImm()) { + return; + } + + bool UseConst = false; + int64_t ValueOrCond = 0; + if (!getSimplifiedSetccAfterTest(SetccInst.getOperand(1).getImm(), UseConst, + ValueOrCond)) { + return; + } + + if (!UseConst) { + SetccInst.getOperand(1).setImm(ValueOrCond); + return; + } + + SmallVector Operands{ + cloneRegOperand(SetccInst.getOperand(0)), + CgOperand::createImmOperand(ValueOrCond), + }; + MBB.getParent()->replaceCgInstruction( + &SetccInst, MBB.getParent()->getTargetInstrInfo().get(X86::MOV8ri), + Operands); } + void X86CgPeephole::optimizeCmp(CgBasicBlock &MBB, CgBasicBlock::iterator &MII) { - auto MIE = MBB.end(); - // cmp/test -> setcc cond -> [movzx] -> test -> jne - // optimized to: cmp/test -> jcc cond + if (MBB.getParent()->size() > MaxCmpFoldFunctionBlocks) { + return; + } + auto LocalMII = MII; - LocalMII++; - if (LocalMII == MIE) + ++LocalMII; + if (LocalMII == MBB.end()) { return; - auto &Inst1 = *LocalMII; - if (Inst1.getOpcode() != X86::SETCCr) + } + + auto &SetccInst = *LocalMII; + if (SetccInst.getOpcode() != X86::SETCCr) { return; - const auto &Op1 = Inst1.getOperand(0); - if (!Op1.isReg()) + } + + const auto &SetccDst = SetccInst.getOperand(0); + if (!SetccDst.isReg()) { return; - auto CC = Inst1.getOperand(1).getImm(); - unsigned TestReg = Op1.getReg(); - CgInstruction *MovzxInst = nullptr; + } - LocalMII++; - if (LocalMII == MIE) + const auto SetCond = SetccInst.getOperand(1).getImm(); + std::vector ToErase; + ToErase.push_back(&SetccInst); + CgRegister TestReg = SetccDst.getReg(); + + ++LocalMII; + if (LocalMII == MBB.end()) { return; - auto &Inst2 = *LocalMII; - if (Inst2.getOpcode() == X86::MOVZX32rr8) { - const auto &MovzxDst = Inst2.getOperand(0); - const auto &MovzxSrc = Inst2.getOperand(1); - if (!MovzxDst.isReg() || !MovzxSrc.isReg() || - MovzxSrc.getReg() != Op1.getReg()) + } + + auto &NextInst = *LocalMII; + if (NextInst.getOpcode() == X86::MOVZX32rr8) { + const auto &Dst = NextInst.getOperand(0); + const auto &Src = NextInst.getOperand(1); + if (!Dst.isReg() || !Src.isReg() || Src.getReg() != TestReg) { return; - TestReg = MovzxDst.getReg(); - MovzxInst = &Inst2; - LocalMII++; - if (LocalMII == MIE) + } + TestReg = Dst.getReg(); + ToErase.push_back(&NextInst); + ++LocalMII; + if (LocalMII == MBB.end()) { return; + } } - auto &TestInst = *LocalMII; - switch (TestInst.getOpcode()) { - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - break; - default: + CgInstruction *BranchInst = nullptr; + int64_t FoldedCond = 0; + if (!matchDirectBoolBranch(MBB, LocalMII, TestReg, SetCond, ToErase, + BranchInst, FoldedCond) && + !matchCmovBoolBranch(MBB, LocalMII, TestReg, SetCond, ToErase, BranchInst, + FoldedCond)) { return; } - const auto &TestOp0 = TestInst.getOperand(0); - const auto &TestOp1 = TestInst.getOperand(1); - if (!TestOp0.isReg() || !TestOp1.isReg() || TestOp0.getReg() != TestReg || - TestOp1.getReg() != TestReg) + + for (CgInstruction *EraseInst : ToErase) { + EraseInst->eraseFromParent(); + } + BranchInst->getOperand(1).setImm(FoldedCond); +} + +void X86CgPeephole::optimizeNoOpImm(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + auto &Inst = *MII; + if (!isNoOpImmOpcode(Inst.getOpcode()) || Inst.getNumOperands() < 3) { return; + } - LocalMII++; - if (LocalMII == MIE) + const auto &Dst = Inst.getOperand(0); + const auto &Src = Inst.getOperand(1); + const auto &Imm = Inst.getOperand(2); + if (!Dst.isReg() || !Src.isReg() || !Imm.isImm() || Imm.getImm() != 0 || + Dst.getReg() != Src.getReg()) { return; - auto &Inst3 = *LocalMII; - if (Inst3.getOpcode() != X86::JCC_1) + } + + if (!areFlagsDeadAfter(MBB, MII)) { return; - if (Inst3.getOperand(1).getImm() != X86::CondCode::COND_NE) - return; // TODO, other optimization, use opposite condition code + } + auto NextMII = std::next(MII); + if (Inst.getOpcode() == X86::ADD64ri8) { + if (NextMII == MBB.end() || (NextMII->getOpcode() != X86::MOV64mr && + NextMII->getOpcode() != X86::MOV64rm && + NextMII->getOpcode() != X86::TEST64rr)) { + return; + } + } else if (NextMII == MBB.end() || + !isZeroLogicChainOpcode(*NextMII, Dst.getReg())) { + return; + } - // Ensure the SETCC/MOVZX registers have no uses beyond this chain. - // The lowering cache (_expr_reg_map) may share these virtual registers - // with other consumers; erasing them would leave dangling references. - const auto &RegInfo = MBB.getParent()->getRegInfo(); - if (!RegInfo.hasOneNonDBGUse(Op1.getReg())) + eraseCurrentInstruction(MBB, MII); +} + +void X86CgPeephole::optimizeAdcZeroReg(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + auto &Inst = *MII; + const unsigned NewOpcode = getAdcImmOpcode(Inst.getOpcode()); + if (NewOpcode == InvalidOpcode || Inst.getNumOperands() < 3) { return; - if (MovzxInst != nullptr && !RegInfo.hasOneNonDBGUse(TestReg)) + } + + const auto &RHS = Inst.getOperand(2); + if (!RHS.isReg() || !RHS.getReg().isVirtual()) { return; + } - Inst1.eraseFromParent(); - if (MovzxInst != nullptr) { - MovzxInst->eraseFromParent(); + auto *MF = MBB.getParent(); + auto &MRI = MF->getRegInfo(); + if (getImmMoveDef(RHS.getReg(), MRI, 0) == nullptr) { + return; + } + + std::vector Uses; + for (auto &UseInst : MRI.use_instructions(RHS.getReg())) { + if (UseInst.getNumOperands() < 3 || + getAdcImmOpcode(UseInst.getOpcode()) == InvalidOpcode || + !UseInst.getOperand(2).isReg() || + UseInst.getOperand(2).getReg() != RHS.getReg()) { + return; + } + Uses.push_back(&UseInst); + } + + CgInstruction *CurrentInst = &Inst; + for (CgInstruction *UseInst : Uses) { + SmallVector Operands{ + cloneRegOperand(UseInst->getOperand(0)), + cloneRegOperand(UseInst->getOperand(1)), + CgOperand::createImmOperand(0), + }; + CgInstruction *NewInst = MF->replaceCgInstruction( + UseInst, + MF->getTargetInstrInfo().get(getAdcImmOpcode(UseInst->getOpcode())), + Operands); + if (UseInst == CurrentInst) { + MII = CgBasicBlock::iterator(NewInst); + } } - TestInst.eraseFromParent(); - Inst3.getOperand(1).setImm(CC); } -} // namespace COMPILER + +void X86CgPeephole::optimizeAddZeroReg(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + auto &Inst = *MII; + if (!isAddRegOpcode(Inst.getOpcode()) || Inst.getNumOperands() < 3) { + return; + } + + const auto &Dst = Inst.getOperand(0); + const auto &LHS = Inst.getOperand(1); + const auto &RHS = Inst.getOperand(2); + if (!Dst.isReg() || !LHS.isReg() || !RHS.isReg() || + Dst.getReg() != LHS.getReg() || !RHS.getReg().isVirtual()) { + return; + } + + if (!areFlagsDeadAfter(MBB, MII)) { + return; + } + + std::vector ZeroToErase; + if (!collectZeroDefChainBefore(MBB, MII, RHS.getReg(), ZeroToErase)) { + return; + } + + eraseCurrentInstruction(MBB, MII); +} void X86CgPeephole::optimizeBranchInBlockEnd(CgBasicBlock &MBB, CgInstruction &MI) { @@ -115,5 +915,56 @@ void X86CgPeephole::optimizeBranchInBlockEnd(CgBasicBlock &MBB, if (TargetMBB->getNumber() == MBB.getNumber() + 1) { // remove the unconditional branch MI.eraseFromParent(); + return; + } + + auto ThisMII = CgBasicBlock::iterator(&MI); + if (ThisMII == MBB.begin()) { + return; + } + + auto PrevMII = std::prev(ThisMII); + CgInstruction &PrevMI = *PrevMII; + if (PrevMI.getOpcode() != X86::JCC_1 || PrevMI.getNumOperands() < 2) { + return; + } + + CgOperand &BranchTarget = PrevMI.getOperand(0); + CgOperand &BranchCond = PrevMI.getOperand(1); + if (!BranchTarget.isMBB() || !BranchCond.isImm()) { + return; + } + + CgBasicBlock *FallthroughMBB = BranchTarget.getMBB(); + if (FallthroughMBB->getNumber() != MBB.getNumber() + 1) { + return; + } + + X86::CondCode Opposite = + getOppositeCond(static_cast(BranchCond.getImm())); + if (Opposite == X86::COND_INVALID) { + return; + } + + BranchTarget.setMBB(TargetMBB); + BranchCond.setImm(Opposite); + MI.eraseFromParent(); +} + +void X86CgPeephole::eraseCurrentInstruction(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + auto Next = MBB.erase(MII); + if (MBB.empty()) { + MII = Next; + return; + } + + if (Next == MBB.begin()) { + MII = Next; + return; } + + MII = std::prev(Next); } + +} // namespace COMPILER diff --git a/src/compiler/target/x86/x86_cg_peephole.h b/src/compiler/target/x86/x86_cg_peephole.h index 631b31184..c023e5583 100644 --- a/src/compiler/target/x86/x86_cg_peephole.h +++ b/src/compiler/target/x86/x86_cg_peephole.h @@ -15,7 +15,12 @@ class X86CgPeephole : public CgPeephole { private: void optimizeCmp(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); + void optimizeTestSetcc(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); + void optimizeNoOpImm(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); + void optimizeAdcZeroReg(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); + void optimizeAddZeroReg(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); void optimizeBranchInBlockEnd(CgBasicBlock &MBB, CgInstruction &MI); + void eraseCurrentInstruction(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); }; } // namespace COMPILER From 7aac372ea42a15b18885da7215ee9515e33000a1 Mon Sep 17 00:00:00 2001 From: Abmcar Date: Sun, 29 Mar 2026 14:03:45 +0800 Subject: [PATCH 2/6] ci(ci): rerun flaky workflow From e30ae2dda8d55b3de055f17bf2f9a1b6d1a104fa Mon Sep 17 00:00:00 2001 From: Abmcar Date: Wed, 8 Apr 2026 19:46:41 +0800 Subject: [PATCH 3/6] docs(compiler): add change document for x86 CG peephole rules extension Co-Authored-By: Claude Opus 4.6 (1M context) --- .../README.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 docs/changes/2026-03-29-x86-cg-peephole-rules/README.md diff --git a/docs/changes/2026-03-29-x86-cg-peephole-rules/README.md b/docs/changes/2026-03-29-x86-cg-peephole-rules/README.md new file mode 100644 index 000000000..97d990aa7 --- /dev/null +++ b/docs/changes/2026-03-29-x86-cg-peephole-rules/README.md @@ -0,0 +1,27 @@ +# Change: Extend x86 CG peephole rules + +- **Status**: Implemented +- **Date**: 2026-03-29 +- **Tier**: Light + +## Overview + +Extend the x86 CG peephole pass beyond the original cmp/setcc/test/jcc fold. Add test/setcc simplification, zero-add elimination, adc-zero canonicalization, imm-zero no-op removal, and broader branch folding patterns seen in JIT output. + +## Motivation + +JIT output analysis revealed additional redundant x86 instruction patterns not covered by the initial peephole rules. These patterns are mechanical artifacts of U256 decomposition and lowering. + +## Impact + +- Module: `docs/modules/compiler/` (x86 CG peephole only) +- 2 files changed: `src/compiler/target/x86/x86_cg_peephole.cpp` and `.h` +- Benchmark geomean: +6.9%, largest wins in synth/GAS (~2.4x), memory_grow_mstore/by32 (~1.8x) +- Known regressions in small set of memory_grow/nogrow/by1 micro cases (within noise) + +## Checklist + +- [x] Implementation complete +- [x] Tests added/updated +- [ ] Module specs in `docs/modules/` updated (if affected) +- [x] Build and tests pass From bd73fdffdab4347eb8b746218d1b0e909f58c0f7 Mon Sep 17 00:00:00 2001 From: Abmcar Date: Thu, 9 Apr 2026 21:24:58 +0800 Subject: [PATCH 4/6] fix(compiler): address review findings in x86 CG peephole pass - Add comments for intentional design decisions (ZeroToErase asymmetry, TEST dual-trigger, NoOpImm whitelist) - Use SmallVector for small peephole collections to avoid heap allocs - Add function-level pattern descriptions for complex match functions Co-Authored-By: Claude Opus 4.6 (1M context) --- src/compiler/target/x86/x86_cg_peephole.cpp | 37 +++++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp index bd9345fb2..5bfc31775 100644 --- a/src/compiler/target/x86/x86_cg_peephole.cpp +++ b/src/compiler/target/x86/x86_cg_peephole.cpp @@ -346,7 +346,7 @@ bool getBoolOrOtherReg(const CgInstruction &Inst, CgRegister BoolReg, bool collectZeroDefChainBefore(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister ZeroReg, - std::vector &ZeroToErase) { + SmallVector &ZeroToErase) { CgRegister CurrentReg = ZeroReg; for (auto LocalMII = StartMII; LocalMII != MBB.begin();) { --LocalMII; @@ -408,7 +408,7 @@ CgOperand cloneRegOperand(const CgOperand &Operand) { CgInstruction *getImmMoveDef(CgRegister Reg, CgRegisterInfo &MRI, int64_t ImmValue) { - std::vector Visited; + SmallVector Visited; while (Reg.isVirtual()) { if (std::find(Visited.begin(), Visited.end(), Reg) != Visited.end()) { return nullptr; @@ -464,12 +464,14 @@ bool getSimplifiedSetccAfterTest(int64_t SetCond, bool &UseConst, } } +/// Match and fold: setcc -> zext -> or-with-zero -> test -> jcc +/// into: cmp -> jcc (eliminating intermediate bool materialization) bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister BoolReg, int64_t SetCond, - std::vector &ToErase, + SmallVector &ToErase, CgInstruction *&BranchInst, int64_t &FoldedCond) { - std::vector LocalToErase; - std::vector ZeroChainToErase; + SmallVector LocalToErase; + SmallVector ZeroChainToErase; CgRegister ZeroReg = 0; for (auto LocalMII = StartMII; LocalMII != MBB.end(); ++LocalMII) { auto &Inst = *LocalMII; @@ -503,7 +505,7 @@ bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister OrDstReg = 0; CgRegister OrOtherReg = 0; if (getBoolOrOtherReg(Inst, BoolReg, OrDstReg, OrOtherReg)) { - std::vector MatchedZeroToErase; + SmallVector MatchedZeroToErase; if (ZeroReg != 0 && OrOtherReg == ZeroReg) { MatchedZeroToErase = ZeroChainToErase; } else if (!collectZeroDefChainBefore(MBB, LocalMII, OrOtherReg, @@ -562,11 +564,13 @@ bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, return false; } +/// Match and fold: setcc -> mov 0 -> mov 1 -> test -> cmov -> test -> jcc +/// into: cmp -> jcc (eliminating CMOV-based bool select) bool matchCmovBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister BoolReg, int64_t SetCond, - std::vector &ToErase, + SmallVector &ToErase, CgInstruction *&BranchInst, int64_t &FoldedCond) { - std::vector LocalToErase; + SmallVector LocalToErase; CgInstruction *ZeroInst = nullptr; CgInstruction *OneInst = nullptr; for (auto LocalMII = StartMII; LocalMII != MBB.end(); ++LocalMII) { @@ -689,6 +693,11 @@ void X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB, if (isTestOpcode(Inst.getOpcode())) { optimizeTestSetcc(MBB, MII); } + // NOTE: x86 TEST instructions have the MCID::Compare flag set, so both + // optimizeTestSetcc (above) and optimizeCmp (below) fire on TEST. + // optimizeTestSetcc runs first to simplify the condition code (e.g., + // SETA -> SETNE) before optimizeCmp attempts the branch fold. + // This composition is intentional and order-dependent. if (Inst.isCompare()) { optimizeCmp(MBB, MII); } @@ -760,7 +769,7 @@ void X86CgPeephole::optimizeCmp(CgBasicBlock &MBB, } const auto SetCond = SetccInst.getOperand(1).getImm(); - std::vector ToErase; + SmallVector ToErase; ToErase.push_back(&SetccInst); CgRegister TestReg = SetccDst.getReg(); @@ -818,6 +827,9 @@ void X86CgPeephole::optimizeNoOpImm(CgBasicBlock &MBB, return; } auto NextMII = std::next(MII); + // Whitelist of observed successor patterns in current JIT output. + // areFlagsDeadAfter already guarantees flags safety; this extra guard + // limits the optimization to patterns we have verified in practice. if (Inst.getOpcode() == X86::ADD64ri8) { if (NextMII == MBB.end() || (NextMII->getOpcode() != X86::MOV64mr && NextMII->getOpcode() != X86::MOV64rm && @@ -851,7 +863,7 @@ void X86CgPeephole::optimizeAdcZeroReg(CgBasicBlock &MBB, return; } - std::vector Uses; + SmallVector Uses; for (auto &UseInst : MRI.use_instructions(RHS.getReg())) { if (UseInst.getNumOperands() < 3 || getAdcImmOpcode(UseInst.getOpcode()) == InvalidOpcode || @@ -898,7 +910,10 @@ void X86CgPeephole::optimizeAddZeroReg(CgBasicBlock &MBB, return; } - std::vector ZeroToErase; + // The zero-def chain is intentionally not erased here: the zero register + // may have other uses beyond this ADD. Dead zero-def instructions will + // be cleaned up by subsequent DCE. + SmallVector ZeroToErase; if (!collectZeroDefChainBefore(MBB, MII, RHS.getReg(), ZeroToErase)) { return; } From a03d8381c76816febca9556ad87340226dcfd3ce Mon Sep 17 00:00:00 2001 From: Abmcar Date: Thu, 9 Apr 2026 21:31:29 +0800 Subject: [PATCH 5/6] fix(compiler): refine pattern description comments per codex review - matchDirectBoolBranch: note optional or-with-zero and direct self-test path - matchCmovBoolBranch: note order-independent mov defs, cmp/test output Co-Authored-By: Claude Opus 4.6 (1M context) --- src/compiler/target/x86/x86_cg_peephole.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp index 5bfc31775..f230107db 100644 --- a/src/compiler/target/x86/x86_cg_peephole.cpp +++ b/src/compiler/target/x86/x86_cg_peephole.cpp @@ -464,8 +464,9 @@ bool getSimplifiedSetccAfterTest(int64_t SetCond, bool &UseConst, } } -/// Match and fold: setcc -> zext -> or-with-zero -> test -> jcc -/// into: cmp -> jcc (eliminating intermediate bool materialization) +/// Match and fold: setcc -> [zext -> or-with-zero ->] test -> jcc +/// into: cmp/test -> jcc (eliminating intermediate bool materialization) +/// The or-with-zero step is optional; direct self-test is also matched. bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister BoolReg, int64_t SetCond, SmallVector &ToErase, @@ -564,8 +565,9 @@ bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, return false; } -/// Match and fold: setcc -> mov 0 -> mov 1 -> test -> cmov -> test -> jcc -/// into: cmp -> jcc (eliminating CMOV-based bool select) +/// Match and fold: setcc -> {mov 0, mov 1} -> test -> cmov -> test -> jcc +/// into: cmp/test -> jcc (eliminating CMOV-based bool select) +/// The two mov defs (0 and 1) may appear in either order. bool matchCmovBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister BoolReg, int64_t SetCond, SmallVector &ToErase, From 3f4c8c4a04ee170bb2f6d43cbd3493e22e65e7e9 Mon Sep 17 00:00:00 2001 From: Abmcar Date: Thu, 9 Apr 2026 21:45:01 +0800 Subject: [PATCH 6/6] style(compiler): fix clang-format violations in peephole comments Co-Authored-By: Claude Opus 4.6 (1M context) --- src/compiler/target/x86/x86_cg_peephole.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp index f230107db..600e1ed20 100644 --- a/src/compiler/target/x86/x86_cg_peephole.cpp +++ b/src/compiler/target/x86/x86_cg_peephole.cpp @@ -465,8 +465,9 @@ bool getSimplifiedSetccAfterTest(int64_t SetCond, bool &UseConst, } /// Match and fold: setcc -> [zext -> or-with-zero ->] test -> jcc -/// into: cmp/test -> jcc (eliminating intermediate bool materialization) -/// The or-with-zero step is optional; direct self-test is also matched. +/// into: cmp/test -> jcc (eliminating intermediate bool +/// materialization) The or-with-zero step is optional; direct self-test is also +/// matched. bool matchDirectBoolBranch(CgBasicBlock &MBB, CgBasicBlock::iterator StartMII, CgRegister BoolReg, int64_t SetCond, SmallVector &ToErase,