LLVM 22.0.0git
RISCVMakeCompressible.cpp
Go to the documentation of this file.
1//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass searches for instructions that are prevented from being compressed
10// by one of the following:
11//
12// 1. The use of a single uncompressed register.
13// 2. A base register + offset where the offset is too large to be compressed
14// and the base register may or may not be compressed.
15//
16//
17// For case 1, if a compressed register is available, then the uncompressed
18// register is copied to the compressed register and its uses are replaced.
19//
20// For example, storing zero uses the incompressible zero register:
21// sw zero, 0(a0) # if zero
22// sw zero, 8(a0) # if zero
23// sw zero, 4(a0) # if zero
24// sw zero, 24(a0) # if zero
25//
26// If a compressed register (e.g. a1) is available, the above can be transformed
27// to the following to improve code size:
28// li a1, 0
29// c.sw a1, 0(a0)
30// c.sw a1, 8(a0)
31// c.sw a1, 4(a0)
32// c.sw a1, 24(a0)
33//
34//
35// For case 2, if a compressed register is available, then the original base
36// is copied and adjusted such that:
37//
38// new_base_register = base_register + adjustment
39// base_register + large_offset = new_base_register + small_offset
40//
41// For example, the following offsets are too large for c.sw:
42// lui a2, 983065
43// sw a1, -236(a2)
44// sw a1, -240(a2)
45// sw a1, -244(a2)
46// sw a1, -248(a2)
47// sw a1, -252(a2)
48// sw a0, -256(a2)
49//
50// If a compressed register is available (e.g. a3), a new base could be created
51// such that the addresses can accessed with a compressible offset, thus
52// improving code size:
53// lui a2, 983065
54// addi a3, a2, -256
55// c.sw a1, 20(a3)
56// c.sw a1, 16(a3)
57// c.sw a1, 12(a3)
58// c.sw a1, 8(a3)
59// c.sw a1, 4(a3)
60// c.sw a0, 0(a3)
61//
62//
63// This optimization is only applied if there are enough uses of the copied
64// register for code size to be reduced.
65//
66//===----------------------------------------------------------------------===//
67
68#include "RISCV.h"
69#include "RISCVSubtarget.h"
70#include "llvm/CodeGen/Passes.h"
73#include "llvm/Support/Debug.h"
74
75using namespace llvm;
76
77#define DEBUG_TYPE "riscv-make-compressible"
78#define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79
80namespace {
81
82struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83 static char ID;
84
85 bool runOnMachineFunction(MachineFunction &Fn) override;
86
87 RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88
89 StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90};
91} // namespace
92
93char RISCVMakeCompressibleOpt::ID = 0;
94INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95 RISCV_COMPRESS_INSTRS_NAME, false, false)
96
97// Return log2(widthInBytes) of load/store done by Opcode.
98static unsigned log2LdstWidth(unsigned Opcode) {
99 switch (Opcode) {
100 default:
101 llvm_unreachable("Unexpected opcode");
102 case RISCV::LBU:
103 case RISCV::SB:
104 case RISCV::QC_E_LBU:
105 case RISCV::QC_E_SB:
106 return 0;
107 case RISCV::LH:
108 case RISCV::LH_INX:
109 case RISCV::LHU:
110 case RISCV::SH:
111 case RISCV::SH_INX:
112 case RISCV::QC_E_LH:
113 case RISCV::QC_E_LHU:
114 case RISCV::QC_E_SH:
115 return 1;
116 case RISCV::LW:
117 case RISCV::LW_INX:
118 case RISCV::SW:
119 case RISCV::SW_INX:
120 case RISCV::FLW:
121 case RISCV::FSW:
122 case RISCV::QC_E_LW:
123 case RISCV::QC_E_SW:
124 return 2;
125 case RISCV::LD:
126 case RISCV::LD_RV32:
127 case RISCV::SD:
128 case RISCV::SD_RV32:
129 case RISCV::FLD:
130 case RISCV::FSD:
131 return 3;
132 }
133}
134
135// Return bit field size of immediate operand of Opcode.
136static unsigned offsetMask(unsigned Opcode) {
137 switch (Opcode) {
138 default:
139 llvm_unreachable("Unexpected opcode");
140 case RISCV::LBU:
141 case RISCV::SB:
142 case RISCV::QC_E_LBU:
143 case RISCV::QC_E_SB:
145 case RISCV::LH:
146 case RISCV::LH_INX:
147 case RISCV::LHU:
148 case RISCV::SH:
149 case RISCV::SH_INX:
150 case RISCV::QC_E_LH:
151 case RISCV::QC_E_LHU:
152 case RISCV::QC_E_SH:
154 case RISCV::LW:
155 case RISCV::LW_INX:
156 case RISCV::SW:
157 case RISCV::SW_INX:
158 case RISCV::FLW:
159 case RISCV::FSW:
160 case RISCV::LD:
161 case RISCV::LD_RV32:
162 case RISCV::SD:
163 case RISCV::SD_RV32:
164 case RISCV::FLD:
165 case RISCV::FSD:
166 case RISCV::QC_E_LW:
167 case RISCV::QC_E_SW:
169 }
170}
171
172// Return a mask for the offset bits of a non-stack-pointer based compressed
173// load/store.
174static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
175 return offsetMask(Opcode) << log2LdstWidth(Opcode);
176}
177
178// Return true if Offset fits within a compressed stack-pointer based
179// load/store.
180static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
181 // Compressed sp-based loads and stores only work for 32/64 bits.
182 switch (log2LdstWidth(Opcode)) {
183 case 2:
185 case 3:
187 }
188 return false;
189}
190
191// Given an offset for a load/store, return the adjustment required to the base
192// register such that the address can be accessed with a compressible offset.
193// This will return 0 if the offset is already compressible.
194static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
195 // Return the excess bits that do not fit in a compressible offset.
196 return Offset & ~compressedLDSTOffsetMask(Opcode);
197}
198
199// Return true if Reg is in a compressed register class.
201 return RISCV::GPRCRegClass.contains(Reg) ||
202 RISCV::GPRF16CRegClass.contains(Reg) ||
203 RISCV::GPRF32CRegClass.contains(Reg) ||
204 RISCV::FPR32CRegClass.contains(Reg) ||
205 RISCV::FPR64CRegClass.contains(Reg) ||
206 RISCV::GPRPairCRegClass.contains(Reg);
207}
208
209// Return true if MI is a load for which there exists a compressed version.
210static bool isCompressibleLoad(const MachineInstr &MI) {
211 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
212
213 switch (MI.getOpcode()) {
214 default:
215 return false;
216 case RISCV::LBU:
217 case RISCV::LH:
218 case RISCV::LH_INX:
219 case RISCV::LHU:
220 return STI.hasStdExtZcb();
221 case RISCV::LW:
222 case RISCV::LW_INX:
223 case RISCV::LD:
224 return STI.hasStdExtZca();
225 case RISCV::LD_RV32:
226 return STI.hasStdExtZclsd();
227 case RISCV::FLW:
228 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
229 case RISCV::FLD:
230 return STI.hasStdExtCOrZcd();
231 // For the Xqcilo loads we mark it as compressible only if Xqcilia is also
232 // enabled so that QC_E_ADDI can be used to create the new base.
233 case RISCV::QC_E_LBU:
234 case RISCV::QC_E_LH:
235 case RISCV::QC_E_LHU:
236 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia() &&
237 STI.hasStdExtZcb();
238 case RISCV::QC_E_LW:
239 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia();
240 }
241}
242
243// Return true if MI is a store for which there exists a compressed version.
245 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
246
247 switch (MI.getOpcode()) {
248 default:
249 return false;
250 case RISCV::SB:
251 case RISCV::SH:
252 case RISCV::SH_INX:
253 return STI.hasStdExtZcb();
254 case RISCV::SW:
255 case RISCV::SW_INX:
256 case RISCV::SD:
257 return STI.hasStdExtZca();
258 case RISCV::SD_RV32:
259 return STI.hasStdExtZclsd();
260 case RISCV::FSW:
261 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
262 case RISCV::FSD:
263 return STI.hasStdExtCOrZcd();
264 // For the Xqcilo stores we mark it as compressible only if Xqcilia is also
265 // enabled so that QC_E_ADDI can be used to create the new base.
266 case RISCV::QC_E_SB:
267 case RISCV::QC_E_SH:
268 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia() &&
269 STI.hasStdExtZcb();
270 case RISCV::QC_E_SW:
271 return !STI.is64Bit() && STI.hasVendorXqcilo() && STI.hasVendorXqcilia();
272 }
273}
274
275// Find a single register and/or large offset which, if compressible, would
276// allow the given instruction to be compressed.
277//
278// Possible return values:
279//
280// {Reg, 0} - Uncompressed Reg needs replacing with a compressed
281// register.
282// {Reg, N} - Reg needs replacing with a compressed register and
283// N needs adding to the new register. (Reg may be
284// compressed or uncompressed).
285// {RISCV::NoRegister, 0} - No suitable optimization found for this
286// instruction.
288 const unsigned Opcode = MI.getOpcode();
289
291 const MachineOperand &MOImm = MI.getOperand(2);
292 if (!MOImm.isImm())
293 return RegImmPair(Register(), 0);
294
295 int64_t Offset = MOImm.getImm();
296 int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
297 Register Base = MI.getOperand(1).getReg();
298
299 // Memory accesses via the stack pointer do not have a requirement for
300 // either of the registers to be compressible and can take a larger offset.
301 if (RISCV::SPRegClass.contains(Base)) {
302 if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
303 return RegImmPair(Base, NewBaseAdjust);
304 } else {
305 Register SrcDest = MI.getOperand(0).getReg();
306 bool SrcDestCompressed = isCompressedReg(SrcDest);
307 bool BaseCompressed = isCompressedReg(Base);
308
309 // If only Base and/or offset prevent compression, then return Base and
310 // any adjustment required to make the offset compressible.
311 if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
312 return RegImmPair(Base, NewBaseAdjust);
313
314 // For loads, we can only change the base register since dest is defined
315 // rather than used.
316 //
317 // For stores, we can change SrcDest (and Base if SrcDest == Base) but
318 // cannot resolve an incompressible offset in this case.
319 if (isCompressibleStore(MI)) {
320 if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
321 !NewBaseAdjust)
322 return RegImmPair(SrcDest, NewBaseAdjust);
323 }
324 }
325 }
326 return RegImmPair(Register(), 0);
327}
328
329// Check all uses after FirstMI of the given register, keeping a vector of
330// instructions that would be compressible if the given register (and offset if
331// applicable) were compressible.
332//
333// If there are enough uses for this optimization to improve code size and a
334// compressed register is available, return that compressed register.
336 RegImmPair RegImm,
338 MachineBasicBlock &MBB = *FirstMI.getParent();
339 const TargetRegisterInfo *TRI =
340 MBB.getParent()->getSubtarget().getRegisterInfo();
341
343 E = MBB.instr_end();
344 I != E; ++I) {
345 MachineInstr &MI = *I;
346
347 // Determine if this is an instruction which would benefit from using the
348 // new register.
350 if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
351 MIs.push_back(&MI);
352
353 // If RegImm.Reg is modified by this instruction, then we cannot optimize
354 // past this instruction. If the register is already compressed, then it may
355 // possible to optimize a large offset in the current instruction - this
356 // will have been detected by the preceding call to
357 // getRegImmPairPreventingCompression.
358 if (MI.modifiesRegister(RegImm.Reg, TRI))
359 break;
360 }
361
362 // Adjusting the base costs one new uncompressed addi and therefore three uses
363 // are required for a code size reduction. If no base adjustment is required,
364 // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
365 // the zero register) and therefore two uses are required for a code size
366 // reduction. For GPR pairs, we need 2 ADDIs to copy so we need three users.
367 unsigned CopyCost = RISCV::GPRPairRegClass.contains(RegImm.Reg) ? 2 : 1;
368 assert((RegImm.Imm == 0 || CopyCost == 1) && "GPRPair should have zero imm");
369 if (MIs.size() <= CopyCost || (RegImm.Imm != 0 && MIs.size() <= 2))
370 return Register();
371
372 // Find a compressible register which will be available from the first
373 // instruction we care about to the last.
374 const TargetRegisterClass *RCToScavenge;
375
376 // Work out the compressed register class from which to scavenge.
377 if (RISCV::GPRRegClass.contains(RegImm.Reg))
378 RCToScavenge = &RISCV::GPRCRegClass;
379 else if (RISCV::GPRF16RegClass.contains(RegImm.Reg))
380 RCToScavenge = &RISCV::GPRF16CRegClass;
381 else if (RISCV::GPRF32RegClass.contains(RegImm.Reg))
382 RCToScavenge = &RISCV::GPRF32CRegClass;
383 else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
384 RCToScavenge = &RISCV::FPR32CRegClass;
385 else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
386 RCToScavenge = &RISCV::FPR64CRegClass;
387 else if (RISCV::GPRPairRegClass.contains(RegImm.Reg))
388 RCToScavenge = &RISCV::GPRPairCRegClass;
389 else
390 return Register();
391
392 RegScavenger RS;
393 RS.enterBasicBlockEnd(MBB);
394 RS.backward(std::next(MIs.back()->getIterator()));
395 return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
396 /*RestoreAfter=*/false, /*SPAdj=*/0,
397 /*AllowSpill=*/false);
398}
399
400// Update uses of the old register in the given instruction to the new register.
401static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
402 Register NewReg) {
403 unsigned Opcode = MI.getOpcode();
404
405 // If this pass is extended to support more instructions, the check for
406 // definedness may need to be strengthened.
408 "Unsupported instruction for this optimization.");
409
410 int SkipN = 0;
411
412 // Skip the first (value) operand to a store instruction (except if the store
413 // offset is zero) in order to avoid an incorrect transformation.
414 // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
415 if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
416 SkipN = 1;
417
418 // Update registers
419 for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
420 if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
421 // Do not update operands that define the old register.
422 //
423 // The new register was scavenged for the range of instructions that are
424 // being updated, therefore it should not be defined within this range
425 // except possibly in the final instruction.
426 if (MO.isDef()) {
428 continue;
429 }
430 // Update reg
431 MO.setReg(NewReg);
432 }
433
434 // Update offset
435 MachineOperand &MOImm = MI.getOperand(2);
436 int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
437 MOImm.setImm(NewOffset);
438}
439
440bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
441 // This is a size optimization.
442 if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
443 return false;
444
445 const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
446 const RISCVInstrInfo &TII = *STI.getInstrInfo();
447
448 // This optimization only makes sense if compressed instructions are emitted.
449 if (!STI.hasStdExtZca())
450 return false;
451
452 for (MachineBasicBlock &MBB : Fn) {
453 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
454 for (MachineInstr &MI : MBB) {
455 // Determine if this instruction would otherwise be compressed if not for
456 // an incompressible register or offset.
458 if (!RegImm.Reg && RegImm.Imm == 0)
459 continue;
460
461 // Determine if there is a set of instructions for which replacing this
462 // register with a compressed register (and compressible offset if
463 // applicable) is possible and will allow compression.
464 SmallVector<MachineInstr *, 8> MIs;
465 Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
466 if (!NewReg)
467 continue;
468
469 // Create the appropriate copy and/or offset.
470 if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
471 if (isInt<12>(RegImm.Imm)) {
472 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
473 .addReg(RegImm.Reg)
474 .addImm(RegImm.Imm);
475 } else {
476 assert(STI.hasVendorXqcilia() && isInt<26>(RegImm.Imm));
477 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::QC_E_ADDI), NewReg)
478 .addReg(RegImm.Reg)
479 .addImm(RegImm.Imm);
480 }
481 } else {
482 assert(RegImm.Imm == 0);
483 TII.copyPhysReg(MBB, MI, MI.getDebugLoc(), NewReg, RegImm.Reg,
484 /*KillSrc*/ false);
485 }
486
487 // Update the set of instructions to use the compressed register and
488 // compressible offset instead. These instructions should now be
489 // compressible.
490 // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
491 // expected to become compressible.
492 for (MachineInstr *UpdateMI : MIs)
493 updateOperands(*UpdateMI, RegImm, NewReg);
494 }
495 }
496 return true;
497}
498
499/// Returns an instance of the Make Compressible Optimization pass.
501 return new RISCVMakeCompressibleOpt();
502}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool isCompressibleLoad(const MachineInstr &MI)
#define RISCV_COMPRESS_INSTRS_NAME
static unsigned offsetMask(unsigned Opcode)
static bool isCompressibleStore(const MachineInstr &MI)
static uint8_t compressedLDSTOffsetMask(unsigned Opcode)
static bool isCompressedReg(Register Reg)
static Register analyzeCompressibleUses(MachineInstr &FirstMI, RegImmPair RegImm, SmallVectorImpl< MachineInstr * > &MIs)
static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode)
static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm, Register NewReg)
static bool compressibleSPOffset(int64_t Offset, unsigned Opcode)
static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI)
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
#define LLVM_DEBUG(...)
Definition Debug.h:114
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
Emit instructions to copy a pair of physical registers.
Instructions::iterator instr_iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool hasStdExtCOrZcfOrZce() const
const RISCVInstrInfo * getInstrInfo() const override
bool hasStdExtCOrZcd() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
FunctionPass * createRISCVMakeCompressibleOptPass()
Returns an instance of the Make Compressible Optimization pass.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
Used to describe a register and immediate addition.