63#define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
64#define COMP_EVEX_NAME "x86-compress-evex"
66#define DEBUG_TYPE COMP_EVEX_NAME
72#define GET_X86_COMPRESS_EVEX_TABLE
73#include "X86GenInstrMapping.inc"
91char CompressEVEXLegacy::ID = 0;
96 if (
Reg >= X86::XMM16 &&
Reg <= X86::XMM31)
99 if (
Reg >= X86::YMM16 &&
Reg <= X86::YMM31)
115 "ZMM instructions should not be in the EVEX->VEX tables");
126 unsigned Opc =
MI.getOpcode();
128 case X86::VALIGNDZ128rri:
129 case X86::VALIGNDZ128rmi:
130 case X86::VALIGNQZ128rri:
131 case X86::VALIGNQZ128rmi: {
132 assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
133 "Unexpected new opcode!");
135 (
Opc == X86::VALIGNQZ128rri ||
Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
137 Imm.setImm(Imm.getImm() * Scale);
140 case X86::VSHUFF32X4Z256rmi:
141 case X86::VSHUFF32X4Z256rri:
142 case X86::VSHUFF64X2Z256rmi:
143 case X86::VSHUFF64X2Z256rri:
144 case X86::VSHUFI32X4Z256rmi:
145 case X86::VSHUFI32X4Z256rri:
146 case X86::VSHUFI64X2Z256rmi:
147 case X86::VSHUFI64X2Z256rri: {
148 assert((NewOpc == X86::VPERM2F128rri || NewOpc == X86::VPERM2I128rri ||
149 NewOpc == X86::VPERM2F128rmi || NewOpc == X86::VPERM2I128rmi) &&
150 "Unexpected new opcode!");
152 int64_t ImmVal = Imm.getImm();
154 Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
157 case X86::VRNDSCALEPDZ128rri:
158 case X86::VRNDSCALEPDZ128rmi:
159 case X86::VRNDSCALEPSZ128rri:
160 case X86::VRNDSCALEPSZ128rmi:
161 case X86::VRNDSCALEPDZ256rri:
162 case X86::VRNDSCALEPDZ256rmi:
163 case X86::VRNDSCALEPSZ256rri:
164 case X86::VRNDSCALEPSZ256rmi:
165 case X86::VRNDSCALESDZrri:
166 case X86::VRNDSCALESDZrmi:
167 case X86::VRNDSCALESSZrri:
168 case X86::VRNDSCALESSZrmi:
169 case X86::VRNDSCALESDZrri_Int:
170 case X86::VRNDSCALESDZrmi_Int:
171 case X86::VRNDSCALESSZrri_Int:
172 case X86::VRNDSCALESSZrmi_Int:
174 int64_t ImmVal = Imm.getImm();
176 if ((ImmVal & 0xf) != ImmVal)
185 unsigned VPMOVBits = 0;
187 case X86::VPMOVQ2MZ128kr:
190 case X86::VPMOVQ2MZ256kr:
191 case X86::VPMOVD2MZ128kr:
194 case X86::VPMOVD2MZ256kr:
197 case X86::VPMOVB2MZ128kr:
200 case X86::VPMOVB2MZ256kr:
207 unsigned KMOVSize = 0;
222 return KMOVSize < VPMOVBits;
227 case X86::VBLENDVPSrrr:
229 case X86::VMOVAPSZ128rrk:
230 case X86::VMOVUPSZ128rrk:
231 case X86::VMOVDQA32Z128rrk:
232 case X86::VMOVDQU32Z128rrk:
237 case X86::VBLENDVPSYrrr:
239 case X86::VMOVAPSZ256rrk:
240 case X86::VMOVUPSZ256rrk:
241 case X86::VMOVDQA32Z256rrk:
242 case X86::VMOVDQU32Z256rrk:
247 case X86::VBLENDVPDrrr:
249 case X86::VMOVAPDZ128rrk:
250 case X86::VMOVUPDZ128rrk:
251 case X86::VMOVDQA64Z128rrk:
252 case X86::VMOVDQU64Z128rrk:
257 case X86::VBLENDVPDYrrr:
259 case X86::VMOVAPDZ256rrk:
260 case X86::VMOVUPDZ256rrk:
261 case X86::VMOVDQA64Z256rrk:
262 case X86::VMOVDQU64Z256rrk:
267 case X86::VPBLENDVBrrr:
268 return UseOpc == X86::VMOVDQU8Z128rrk;
269 case X86::VPBLENDVBYrrr:
270 return UseOpc == X86::VMOVDQU8Z256rrk;
289 unsigned Opc =
MI.getOpcode();
290 if (
Opc != X86::VPMOVD2MZ128kr &&
Opc != X86::VPMOVD2MZ256kr &&
291 Opc != X86::VPMOVQ2MZ128kr &&
Opc != X86::VPMOVQ2MZ256kr &&
292 Opc != X86::VPMOVB2MZ128kr &&
Opc != X86::VPMOVB2MZ256kr)
299 Register SrcVecReg =
MI.getOperand(1).getReg();
301 unsigned MovMskOpc = 0;
302 unsigned BlendOpc = 0;
304 case X86::VPMOVD2MZ128kr:
305 MovMskOpc = X86::VMOVMSKPSrr;
306 BlendOpc = X86::VBLENDVPSrrr;
308 case X86::VPMOVD2MZ256kr:
309 MovMskOpc = X86::VMOVMSKPSYrr;
310 BlendOpc = X86::VBLENDVPSYrrr;
312 case X86::VPMOVQ2MZ128kr:
313 MovMskOpc = X86::VMOVMSKPDrr;
314 BlendOpc = X86::VBLENDVPDrrr;
316 case X86::VPMOVQ2MZ256kr:
317 MovMskOpc = X86::VMOVMSKPDYrr;
318 BlendOpc = X86::VBLENDVPDYrrr;
320 case X86::VPMOVB2MZ128kr:
321 MovMskOpc = X86::VPMOVMSKBrr;
322 BlendOpc = X86::VPBLENDVBrrr;
324 case X86::VPMOVB2MZ256kr:
325 MovMskOpc = X86::VPMOVMSKBYrr;
326 BlendOpc = X86::VPBLENDVBYrrr;
337 if (CurMI.readsRegister(MaskReg,
TRI)) {
338 if (KMovMI || BlendMI)
341 unsigned UseOpc = CurMI.getOpcode();
342 bool IsKMOV = UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk ||
343 UseOpc == X86::KMOVDrk;
345 if (IsKMOV && CurMI.getOperand(1).getReg() == MaskReg &&
351 CurMI.getOperand(2).getReg() == MaskReg &&
353 checkPredicate(BlendOpc, &ST)) {
360 if (CurMI.modifiesRegister(MaskReg,
TRI)) {
361 if (!KMovMI && !BlendMI)
371 if (!KMovMI && !BlendMI)
376 if (MO.getParent()->getParent() != &
MBB)
389 }
else if (BlendMI) {
407 assert(NewMI &&
"Expected a compressed instruction");
430 auto IsRedundantNewDataDest = [&](
unsigned &
Opc) {
438 X86::isCFCMOVCC(
MI.getOpcode()))
445 if (!
Desc.isCommutable() ||
Desc.getNumOperands() < 3 ||
446 !
MI.getOperand(2).isReg() ||
MI.getOperand(2).getReg() != Reg0)
449 ST.getInstrInfo()->commuteInstruction(
MI,
false, 1, 2);
450 Opc =
MI.getOpcode();
465 unsigned Opc =
MI.getOpcode();
466 bool IsSetZUCCm =
Opc == X86::SETZUCCm;
470 bool IsNDLike = IsND ||
Opc == X86::MOVBE32rr ||
Opc == X86::MOVBE64rr;
471 bool IsRedundantNDD = IsNDLike ? IsRedundantNewDataDest(
Opc) :
false;
473 auto GetCompressedOpc = [&](
unsigned Opc) ->
unsigned {
476 if (
I == Table.
end() ||
I->OldOpc !=
Opc)
486 if (IsRedundantNDD) {
496 if (
MI.definesRegister(Super,
nullptr))
497 IsRedundantNDD =
false;
505 "Unexpected NDD instruction with relocation!");
506 }
else if (
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD64ri32_ND ||
507 Opc == X86::ADD32rr_ND ||
Opc == X86::ADD64rr_ND) {
512 MI.registerDefIsDead(X86::EFLAGS,
nullptr)) {
515 bool Is32BitReg =
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD32rr_ND;
517 ST.getInstrInfo()->get(Is32BitReg ? X86::LEA64_32r : X86::LEA64r);
523 if (
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD64ri32_ND)
530 MI.removeFromParent();
537 unsigned NewOpc = IsRedundantNDD
539 : ((IsNDLike && ST.hasNF() &&
540 MI.registerDefIsDead(X86::EFLAGS,
nullptr))
542 : GetCompressedOpc(
Opc));
549 "Unexpected to compress NF instructions without ND.");
551 const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(NewOpc);
564 "Unknown EVEX2EVEX compression");
569 MI.setAsmPrinterFlag(AsmComment);
571 MI.tieOperands(0, 1);
580 static std::atomic<bool> TableChecked(
false);
581 if (!TableChecked.load(std::memory_order_relaxed)) {
583 "X86CompressEVEXTable is not sorted!");
584 TableChecked.store(
true, std::memory_order_relaxed);
588 if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD() && !ST.hasZU())
601 MI->eraseFromParent();
612 return new CompressEVEXLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the SmallVector class.
static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB, const X86Subtarget &ST, SmallVectorImpl< MachineInstr * > &ToErase)
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc)
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB, const X86Subtarget &ST, SmallVectorImpl< MachineInstr * > &ToErase)
static bool isCompressibleBlendVUse(unsigned BlendOpc, unsigned UseOpc)
cl::opt< bool > X86EnableAPXForRelocation
static bool isKMovNarrowing(unsigned VPMOVOpc, unsigned KMOVOpc)
static bool runOnMF(MachineFunction &MF)
static bool usesExtendedRegister(const MachineInstr &MI)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
void setAsmPrinterFlag(AsmPrinterFlagTy Flag)
Set a flag for the AsmPrinter.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_iterator > use_operands(Register Reg) const
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isZMMReg(MCRegister Reg)
bool hasNewDataDest(uint64_t TSFlags)
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ VEX
VEX - encoding using 0xC4/0xC5.
@ LEGACY
LEGACY - encoding using REX/REX2 or w/o opcode prefix.
bool isApxExtendedReg(MCRegister Reg)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
unsigned getNonNDVariant(unsigned Opc)
unsigned getNFVariant(unsigned Opc)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86CompressEVEXLegacyPass()
static bool isAddMemInstrWithRelocation(const MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >