1
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This file defines a DAG pattern matching instruction selector for X86,
11
// converting from a legalized dag to a X86 dag.
13
//===----------------------------------------------------------------------===//
15
#define DEBUG_TYPE "x86-isel"
17
#include "X86InstrBuilder.h"
18
#include "X86MachineFunctionInfo.h"
19
#include "X86RegisterInfo.h"
20
#include "X86Subtarget.h"
21
#include "X86TargetMachine.h"
22
#include "llvm/Instructions.h"
23
#include "llvm/Intrinsics.h"
24
#include "llvm/Support/CFG.h"
25
#include "llvm/Type.h"
26
#include "llvm/CodeGen/MachineConstantPool.h"
27
#include "llvm/CodeGen/MachineFunction.h"
28
#include "llvm/CodeGen/MachineFrameInfo.h"
29
#include "llvm/CodeGen/MachineInstrBuilder.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
#include "llvm/CodeGen/SelectionDAGISel.h"
32
#include "llvm/Target/TargetMachine.h"
33
#include "llvm/Target/TargetOptions.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/MathExtras.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include "llvm/ADT/SmallPtrSet.h"
39
#include "llvm/ADT/Statistic.h"
42
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
44
//===----------------------------------------------------------------------===//
45
// Pattern Matcher Implementation
46
//===----------------------------------------------------------------------===//
49
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
50
/// SDValue's instead of register numbers for the leaves of the matched
52
struct X86ISelAddressMode {
58
// This is really a union, discriminated by BaseType!
66
const GlobalValue *GV;
68
const BlockAddress *BlockAddr;
71
unsigned Align; // CP alignment.
72
unsigned char SymbolFlags; // X86II::MO_*
75
: BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
76
Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
77
SymbolFlags(X86II::MO_NO_FLAG) {
80
bool hasSymbolicDisplacement() const {
81
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
84
bool hasBaseOrIndexReg() const {
85
return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
88
/// isRIPRelative - Return true if this addressing mode is already RIP
90
bool isRIPRelative() const {
91
if (BaseType != RegBase) return false;
92
if (RegisterSDNode *RegNode =
93
dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
94
return RegNode->getReg() == X86::RIP;
98
void setBaseReg(SDValue Reg) {
104
dbgs() << "X86ISelAddressMode " << this << '\n';
105
dbgs() << "Base_Reg ";
106
if (Base_Reg.getNode() != 0)
107
Base_Reg.getNode()->dump();
110
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
111
<< " Scale" << Scale << '\n'
113
if (IndexReg.getNode() != 0)
114
IndexReg.getNode()->dump();
117
dbgs() << " Disp " << Disp << '\n'
134
dbgs() << " JT" << JT << " Align" << Align << '\n';
140
//===--------------------------------------------------------------------===//
141
/// ISel - X86 specific code to select X86 machine instructions for
142
/// SelectionDAG operations.
144
class X86DAGToDAGISel : public SelectionDAGISel {
145
/// X86Lowering - This object fully describes how to lower LLVM code to an
146
/// X86-specific SelectionDAG.
147
const X86TargetLowering &X86Lowering;
149
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
150
/// make the right decision when generating code for different targets.
151
const X86Subtarget *Subtarget;
153
/// OptForSize - If true, selector should try to optimize for code size
154
/// instead of performance.
158
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
159
: SelectionDAGISel(tm, OptLevel),
160
X86Lowering(*tm.getTargetLowering()),
161
Subtarget(&tm.getSubtarget<X86Subtarget>()),
164
virtual const char *getPassName() const {
165
return "X86 DAG->DAG Instruction Selection";
168
virtual void EmitFunctionEntryCode();
170
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
172
virtual void PreprocessISelDAG();
174
inline bool immSext8(SDNode *N) const {
175
return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
178
// i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
179
// sign extended field.
180
inline bool i64immSExt32(SDNode *N) const {
181
uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
182
return (int64_t)v == (int32_t)v;
185
// Include the pieces autogenerated from the target description.
186
#include "X86GenDAGISel.inc"
189
SDNode *Select(SDNode *N);
190
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
191
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
193
bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
194
bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
195
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
196
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
197
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
199
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
200
bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
201
SDValue &Scale, SDValue &Index, SDValue &Disp,
203
bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
204
SDValue &Scale, SDValue &Index, SDValue &Disp,
206
bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
207
SDValue &Scale, SDValue &Index, SDValue &Disp,
209
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
210
SDValue &Base, SDValue &Scale,
211
SDValue &Index, SDValue &Disp,
213
SDValue &NodeWithChain);
215
bool TryFoldLoad(SDNode *P, SDValue N,
216
SDValue &Base, SDValue &Scale,
217
SDValue &Index, SDValue &Disp,
220
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
221
/// inline asm expressions.
222
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
224
std::vector<SDValue> &OutOps);
226
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
228
inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
229
SDValue &Scale, SDValue &Index,
230
SDValue &Disp, SDValue &Segment) {
231
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
232
CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
234
Scale = getI8Imm(AM.Scale);
236
// These are 32-bit even in 64-bit mode since RIP relative offset
239
Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
243
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
244
AM.Align, AM.Disp, AM.SymbolFlags);
246
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
247
else if (AM.JT != -1)
248
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
249
else if (AM.BlockAddr)
250
Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
251
true, AM.SymbolFlags);
253
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
255
if (AM.Segment.getNode())
256
Segment = AM.Segment;
258
Segment = CurDAG->getRegister(0, MVT::i32);
261
/// getI8Imm - Return a target constant with the specified value, of type
263
inline SDValue getI8Imm(unsigned Imm) {
264
return CurDAG->getTargetConstant(Imm, MVT::i8);
267
/// getI16Imm - Return a target constant with the specified value, of type
269
inline SDValue getI16Imm(unsigned Imm) {
270
return CurDAG->getTargetConstant(Imm, MVT::i16);
273
/// getI32Imm - Return a target constant with the specified value, of type
275
inline SDValue getI32Imm(unsigned Imm) {
276
return CurDAG->getTargetConstant(Imm, MVT::i32);
279
/// getGlobalBaseReg - Return an SDNode that returns the value of
280
/// the global base register. Output instructions required to
281
/// initialize the global base register, if necessary.
283
SDNode *getGlobalBaseReg();
285
/// getTargetMachine - Return a reference to the TargetMachine, casted
286
/// to the target-specific type.
287
const X86TargetMachine &getTargetMachine() {
288
return static_cast<const X86TargetMachine &>(TM);
291
/// getInstrInfo - Return a reference to the TargetInstrInfo, casted
292
/// to the target-specific type.
293
const X86InstrInfo *getInstrInfo() {
294
return getTargetMachine().getInstrInfo();
301
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
302
if (OptLevel == CodeGenOpt::None) return false;
307
if (N.getOpcode() != ISD::LOAD)
310
// If N is a load, do additional profitability checks.
312
switch (U->getOpcode()) {
325
SDValue Op1 = U->getOperand(1);
327
// If the other operand is a 8-bit immediate we should fold the immediate
328
// instead. This reduces code size.
330
// movl 4(%esp), %eax
334
// addl 4(%esp), %eax
335
// The former is 2 bytes shorter. In case where the increment is 1, then
336
// the saving can be 4 bytes (by using incl %eax).
337
if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
338
if (Imm->getAPIntValue().isSignedIntN(8))
341
// If the other operand is a TLS address, we should fold it instead.
344
// leal i@NTPOFF(%eax), %eax
346
// movl $i@NTPOFF, %eax
348
// if the block also has an access to a second TLS address this will save
350
// FIXME: This is probably also true for non TLS addresses.
351
if (Op1.getOpcode() == X86ISD::Wrapper) {
352
SDValue Val = Op1.getOperand(0);
353
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
363
/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
364
/// load's chain operand and move load below the call's chain operand.
365
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
366
SDValue Call, SDValue OrigChain) {
367
SmallVector<SDValue, 8> Ops;
368
SDValue Chain = OrigChain.getOperand(0);
369
if (Chain.getNode() == Load.getNode())
370
Ops.push_back(Load.getOperand(0));
372
assert(Chain.getOpcode() == ISD::TokenFactor &&
373
"Unexpected chain operand");
374
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
375
if (Chain.getOperand(i).getNode() == Load.getNode())
376
Ops.push_back(Load.getOperand(0));
378
Ops.push_back(Chain.getOperand(i));
380
CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
381
MVT::Other, &Ops[0], Ops.size());
383
Ops.push_back(NewChain);
385
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
386
Ops.push_back(OrigChain.getOperand(i));
387
CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
388
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
389
Load.getOperand(1), Load.getOperand(2));
391
Ops.push_back(SDValue(Load.getNode(), 1));
392
for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
393
Ops.push_back(Call.getOperand(i));
394
CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
397
/// isCalleeLoad - Return true if call address is a load and it can be
398
/// moved below CALLSEQ_START and the chains leading up to the call.
399
/// Return the CALLSEQ_START by reference as a second output.
400
/// In the case of a tail call, there isn't a callseq node between the call
401
/// chain and the load.
402
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
403
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
405
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
408
LD->getAddressingMode() != ISD::UNINDEXED ||
409
LD->getExtensionType() != ISD::NON_EXTLOAD)
412
// Now let's find the callseq_start.
413
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
414
if (!Chain.hasOneUse())
416
Chain = Chain.getOperand(0);
419
if (!Chain.getNumOperands())
421
if (Chain.getOperand(0).getNode() == Callee.getNode())
423
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
424
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
425
Callee.getValue(1).hasOneUse())
430
void X86DAGToDAGISel::PreprocessISelDAG() {
431
// OptForSize is used in pattern predicates that isel is matching.
432
OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
434
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
435
E = CurDAG->allnodes_end(); I != E; ) {
436
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
438
if (OptLevel != CodeGenOpt::None &&
439
(N->getOpcode() == X86ISD::CALL ||
440
N->getOpcode() == X86ISD::TC_RETURN)) {
441
/// Also try moving call address load from outside callseq_start to just
442
/// before the call to allow it to be folded.
460
bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
461
SDValue Chain = N->getOperand(0);
462
SDValue Load = N->getOperand(1);
463
if (!isCalleeLoad(Load, Chain, HasCallSeq))
465
MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
470
// Lower fpround and fpextend nodes that target the FP stack to be store and
471
// load to the stack. This is a gross hack. We would like to simply mark
472
// these as being illegal, but when we do that, legalize produces these when
473
// it expands calls, then expands these in the same legalize pass. We would
474
// like dag combine to be able to hack on these between the call expansion
475
// and the node legalization. As such this pass basically does "really
476
// late" legalization of these inline with the X86 isel pass.
477
// FIXME: This should only happen when not compiled with -O0.
478
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
481
// If the source and destination are SSE registers, then this is a legal
482
// conversion that should not be lowered.
483
EVT SrcVT = N->getOperand(0).getValueType();
484
EVT DstVT = N->getValueType(0);
485
bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
486
bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
487
if (SrcIsSSE && DstIsSSE)
490
if (!SrcIsSSE && !DstIsSSE) {
491
// If this is an FPStack extension, it is a noop.
492
if (N->getOpcode() == ISD::FP_EXTEND)
494
// If this is a value-preserving FPStack truncation, it is a noop.
495
if (N->getConstantOperandVal(1))
499
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
500
// FPStack has extload and truncstore. SSE can fold direct loads into other
501
// operations. Based on this, decide what we want to do.
503
if (N->getOpcode() == ISD::FP_ROUND)
504
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
506
MemVT = SrcIsSSE ? SrcVT : DstVT;
508
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
509
DebugLoc dl = N->getDebugLoc();
511
// FIXME: optimize the case where the src/dest is a load or store?
512
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
514
MemTmp, NULL, 0, MemVT,
516
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
517
NULL, 0, MemVT, false, false, 0);
519
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
520
// extload we created. This will cause general havok on the dag because
521
// anything below the conversion could be folded into other existing nodes.
522
// To avoid invalidating 'I', back it up to the convert node.
524
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
526
// Now that we did that, the node is dead. Increment the iterator to the
527
// next node to process, then delete N.
529
CurDAG->DeleteNode(N);
534
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
535
/// the main function.
536
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
537
MachineFrameInfo *MFI) {
538
const TargetInstrInfo *TII = TM.getInstrInfo();
539
if (Subtarget->isTargetCygMing())
540
BuildMI(BB, DebugLoc(),
541
TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
544
void X86DAGToDAGISel::EmitFunctionEntryCode() {
545
// If this is main, emit special code for main.
546
if (const Function *Fn = MF->getFunction())
547
if (Fn->hasExternalLinkage() && Fn->getName() == "main")
548
EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
552
bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
553
X86ISelAddressMode &AM) {
554
assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
555
SDValue Segment = N.getOperand(0);
557
if (AM.Segment.getNode() == 0) {
558
AM.Segment = Segment;
565
bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
566
// This optimization is valid because the GNU TLS model defines that
567
// gs:0 (or fs:0 on X86-64) contains its own address.
568
// For more information see http://people.redhat.com/drepper/tls.pdf
570
SDValue Address = N.getOperand(1);
571
if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
572
!MatchSegmentBaseAddress (Address, AM))
578
/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
579
/// into an addressing mode. These wrap things that will resolve down into a
580
/// symbol reference. If no match is possible, this returns true, otherwise it
582
bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
583
// If the addressing mode already has a symbol as the displacement, we can
584
// never match another symbol.
585
if (AM.hasSymbolicDisplacement())
588
SDValue N0 = N.getOperand(0);
589
CodeModel::Model M = TM.getCodeModel();
591
// Handle X86-64 rip-relative addresses. We check this before checking direct
592
// folding because RIP is preferable to non-RIP accesses.
593
if (Subtarget->is64Bit() &&
594
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
595
// they cannot be folded into immediate fields.
596
// FIXME: This can be improved for kernel and other models?
597
(M == CodeModel::Small || M == CodeModel::Kernel) &&
598
// Base and index reg must be 0 in order to use %rip as base and lowering
600
!AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
601
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
602
int64_t Offset = AM.Disp + G->getOffset();
603
if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
604
AM.GV = G->getGlobal();
606
AM.SymbolFlags = G->getTargetFlags();
607
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
608
int64_t Offset = AM.Disp + CP->getOffset();
609
if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
610
AM.CP = CP->getConstVal();
611
AM.Align = CP->getAlignment();
613
AM.SymbolFlags = CP->getTargetFlags();
614
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
615
AM.ES = S->getSymbol();
616
AM.SymbolFlags = S->getTargetFlags();
617
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
618
AM.JT = J->getIndex();
619
AM.SymbolFlags = J->getTargetFlags();
621
AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
622
AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
625
if (N.getOpcode() == X86ISD::WrapperRIP)
626
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
630
// Handle the case when globals fit in our immediate field: This is true for
631
// X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
632
// mode, this results in a non-RIP-relative computation.
633
if (!Subtarget->is64Bit() ||
634
((M == CodeModel::Small || M == CodeModel::Kernel) &&
635
TM.getRelocationModel() == Reloc::Static)) {
636
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
637
AM.GV = G->getGlobal();
638
AM.Disp += G->getOffset();
639
AM.SymbolFlags = G->getTargetFlags();
640
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
641
AM.CP = CP->getConstVal();
642
AM.Align = CP->getAlignment();
643
AM.Disp += CP->getOffset();
644
AM.SymbolFlags = CP->getTargetFlags();
645
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
646
AM.ES = S->getSymbol();
647
AM.SymbolFlags = S->getTargetFlags();
648
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
649
AM.JT = J->getIndex();
650
AM.SymbolFlags = J->getTargetFlags();
652
AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
653
AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
661
/// MatchAddress - Add the specified node to the specified addressing mode,
662
/// returning true if it cannot be done. This just pattern matches for the
664
bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
665
if (MatchAddressRecursively(N, AM, 0))
668
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
669
// a smaller encoding and avoids a scaled-index.
671
AM.BaseType == X86ISelAddressMode::RegBase &&
672
AM.Base_Reg.getNode() == 0) {
673
AM.Base_Reg = AM.IndexReg;
677
// Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
678
// because it has a smaller encoding.
679
// TODO: Which other code models can use this?
680
if (TM.getCodeModel() == CodeModel::Small &&
681
Subtarget->is64Bit() &&
683
AM.BaseType == X86ISelAddressMode::RegBase &&
684
AM.Base_Reg.getNode() == 0 &&
685
AM.IndexReg.getNode() == 0 &&
686
AM.SymbolFlags == X86II::MO_NO_FLAG &&
687
AM.hasSymbolicDisplacement())
688
AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
693
/// isLogicallyAddWithConstant - Return true if this node is semantically an
694
/// add of a value with a constantint.
695
static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
696
// Check for (add x, Cst)
697
if (V->getOpcode() == ISD::ADD)
698
return isa<ConstantSDNode>(V->getOperand(1));
700
// Check for (or x, Cst), where Cst & x == 0.
701
if (V->getOpcode() != ISD::OR ||
702
!isa<ConstantSDNode>(V->getOperand(1)))
705
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
706
ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
708
// Check to see if the LHS & C is zero.
709
return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
712
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
714
bool is64Bit = Subtarget->is64Bit();
715
DebugLoc dl = N.getDebugLoc();
717
dbgs() << "MatchAddress: ";
722
return MatchAddressBase(N, AM);
724
CodeModel::Model M = TM.getCodeModel();
726
// If this is already a %rip relative address, we can only merge immediates
727
// into it. Instead of handling this in every case, we handle it here.
728
// RIP relative addressing: %rip + 32-bit displacement!
729
if (AM.isRIPRelative()) {
730
// FIXME: JumpTable and ExternalSymbol address currently don't like
731
// displacements. It isn't very important, but this should be fixed for
733
if (!AM.ES && AM.JT != -1) return true;
735
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
736
int64_t Val = AM.Disp + Cst->getSExtValue();
737
if (X86::isOffsetSuitableForCodeModel(Val, M,
738
AM.hasSymbolicDisplacement())) {
746
switch (N.getOpcode()) {
748
case ISD::Constant: {
749
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
751
X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
752
AM.hasSymbolicDisplacement())) {
759
case X86ISD::SegmentBaseAddress:
760
if (!MatchSegmentBaseAddress(N, AM))
764
case X86ISD::Wrapper:
765
case X86ISD::WrapperRIP:
766
if (!MatchWrapper(N, AM))
771
if (!MatchLoad(N, AM))
775
case ISD::FrameIndex:
776
if (AM.BaseType == X86ISelAddressMode::RegBase
777
&& AM.Base_Reg.getNode() == 0) {
778
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
779
AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
785
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
789
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
790
unsigned Val = CN->getZExtValue();
791
// Note that we handle x<<1 as (,x,2) rather than (x,x) here so
792
// that the base operand remains free for further matching. If
793
// the base doesn't end up getting used, a post-processing step
794
// in MatchAddress turns (,x,2) into (x,x), which is cheaper.
795
if (Val == 1 || Val == 2 || Val == 3) {
797
SDValue ShVal = N.getNode()->getOperand(0);
799
// Okay, we know that we have a scale by now. However, if the scaled
800
// value is an add of something and a constant, we can fold the
801
// constant into the disp field here.
802
if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
803
AM.IndexReg = ShVal.getNode()->getOperand(0);
804
ConstantSDNode *AddVal =
805
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
806
uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
808
X86::isOffsetSuitableForCodeModel(Disp, M,
809
AM.hasSymbolicDisplacement()))
823
// A mul_lohi where we need the low part can be folded as a plain multiply.
824
if (N.getResNo() != 0) break;
827
case X86ISD::MUL_IMM:
828
// X*[3,5,9] -> X+X*[2,4,8]
829
if (AM.BaseType == X86ISelAddressMode::RegBase &&
830
AM.Base_Reg.getNode() == 0 &&
831
AM.IndexReg.getNode() == 0) {
833
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
834
if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
835
CN->getZExtValue() == 9) {
836
AM.Scale = unsigned(CN->getZExtValue())-1;
838
SDValue MulVal = N.getNode()->getOperand(0);
841
// Okay, we know that we have a scale by now. However, if the scaled
842
// value is an add of something and a constant, we can fold the
843
// constant into the disp field here.
844
if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
845
isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
846
Reg = MulVal.getNode()->getOperand(0);
847
ConstantSDNode *AddVal =
848
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
849
uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
852
X86::isOffsetSuitableForCodeModel(Disp, M,
853
AM.hasSymbolicDisplacement()))
856
Reg = N.getNode()->getOperand(0);
858
Reg = N.getNode()->getOperand(0);
861
AM.IndexReg = AM.Base_Reg = Reg;
868
// Given A-B, if A can be completely folded into the address and
869
// the index field with the index field unused, use -B as the index.
870
// This is a win if a has multiple parts that can be folded into
871
// the address. Also, this saves a mov if the base register has
872
// other uses, since it avoids a two-address sub instruction, however
873
// it costs an additional mov if the index register has other uses.
875
// Add an artificial use to this node so that we can keep track of
876
// it if it gets CSE'd with a different node.
877
HandleSDNode Handle(N);
879
// Test if the LHS of the sub can be folded.
880
X86ISelAddressMode Backup = AM;
881
if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
885
// Test if the index field is free for use.
886
if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
892
SDValue RHS = Handle.getValue().getNode()->getOperand(1);
893
// If the RHS involves a register with multiple uses, this
894
// transformation incurs an extra mov, due to the neg instruction
895
// clobbering its operand.
896
if (!RHS.getNode()->hasOneUse() ||
897
RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
898
RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
899
RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
900
(RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
901
RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
903
// If the base is a register with multiple uses, this
904
// transformation may save a mov.
905
if ((AM.BaseType == X86ISelAddressMode::RegBase &&
906
AM.Base_Reg.getNode() &&
907
!AM.Base_Reg.getNode()->hasOneUse()) ||
908
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
910
// If the folded LHS was interesting, this transformation saves
911
// address arithmetic.
912
if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
913
((AM.Disp != 0) && (Backup.Disp == 0)) +
914
(AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
916
// If it doesn't look like it may be an overall win, don't do it.
922
// Ok, the transformation is legal and appears profitable. Go for it.
923
SDValue Zero = CurDAG->getConstant(0, N.getValueType());
924
SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
928
// Insert the new nodes into the topological ordering.
929
if (Zero.getNode()->getNodeId() == -1 ||
930
Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
931
CurDAG->RepositionNode(N.getNode(), Zero.getNode());
932
Zero.getNode()->setNodeId(N.getNode()->getNodeId());
934
if (Neg.getNode()->getNodeId() == -1 ||
935
Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
936
CurDAG->RepositionNode(N.getNode(), Neg.getNode());
937
Neg.getNode()->setNodeId(N.getNode()->getNodeId());
943
// Add an artificial use to this node so that we can keep track of
944
// it if it gets CSE'd with a different node.
945
HandleSDNode Handle(N);
946
SDValue LHS = Handle.getValue().getNode()->getOperand(0);
947
SDValue RHS = Handle.getValue().getNode()->getOperand(1);
949
X86ISelAddressMode Backup = AM;
950
if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
951
!MatchAddressRecursively(RHS, AM, Depth+1))
954
LHS = Handle.getValue().getNode()->getOperand(0);
955
RHS = Handle.getValue().getNode()->getOperand(1);
957
// Try again after commuting the operands.
958
if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
959
!MatchAddressRecursively(LHS, AM, Depth+1))
962
LHS = Handle.getValue().getNode()->getOperand(0);
963
RHS = Handle.getValue().getNode()->getOperand(1);
965
// If we couldn't fold both operands into the address at the same time,
966
// see if we can just put each operand into a register and fold at least
968
if (AM.BaseType == X86ISelAddressMode::RegBase &&
969
!AM.Base_Reg.getNode() &&
970
!AM.IndexReg.getNode()) {
980
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
981
if (isLogicallyAddWithConstant(N, CurDAG)) {
982
X86ISelAddressMode Backup = AM;
983
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
984
uint64_t Offset = CN->getSExtValue();
986
// Start with the LHS as an addr mode.
987
if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
988
// Address could not have picked a GV address for the displacement.
990
// On x86-64, the resultant disp must fit in 32-bits.
992
X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
993
AM.hasSymbolicDisplacement()))) {
1002
// Perform some heroic transforms on an and of a constant-count shift
1003
// with a constant to enable use of the scaled offset field.
1005
SDValue Shift = N.getOperand(0);
1006
if (Shift.getNumOperands() != 2) break;
1008
// Scale must not be used already.
1009
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1011
SDValue X = Shift.getOperand(0);
1012
ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1013
ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1014
if (!C1 || !C2) break;
1016
// Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1017
// allows us to convert the shift and and into an h-register extract and
1019
if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1020
unsigned ScaleLog = 8 - C1->getZExtValue();
1021
if (ScaleLog > 0 && ScaleLog < 4 &&
1022
C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1023
SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1024
SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1025
SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1027
SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1029
SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1030
SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1033
// Insert the new nodes into the topological ordering.
1034
if (Eight.getNode()->getNodeId() == -1 ||
1035
Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1036
CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1037
Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1039
if (Mask.getNode()->getNodeId() == -1 ||
1040
Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1041
CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1042
Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1044
if (Srl.getNode()->getNodeId() == -1 ||
1045
Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1046
CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1047
Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1049
if (And.getNode()->getNodeId() == -1 ||
1050
And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1051
CurDAG->RepositionNode(N.getNode(), And.getNode());
1052
And.getNode()->setNodeId(N.getNode()->getNodeId());
1054
if (ShlCount.getNode()->getNodeId() == -1 ||
1055
ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1056
CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1057
ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1059
if (Shl.getNode()->getNodeId() == -1 ||
1060
Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1061
CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1062
Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1064
CurDAG->ReplaceAllUsesWith(N, Shl);
1066
AM.Scale = (1 << ScaleLog);
1071
// Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1072
// allows us to fold the shift into this addressing mode.
1073
if (Shift.getOpcode() != ISD::SHL) break;
1075
// Not likely to be profitable if either the AND or SHIFT node has more
1076
// than one use (unless all uses are for address computation). Besides,
1077
// isel mechanism requires their node ids to be reused.
1078
if (!N.hasOneUse() || !Shift.hasOneUse())
1081
// Verify that the shift amount is something we can fold.
1082
unsigned ShiftCst = C1->getZExtValue();
1083
if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1086
// Get the new AND mask, this folds to a constant.
1087
SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1088
SDValue(C2, 0), SDValue(C1, 0));
1089
SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1091
SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1092
NewAND, SDValue(C1, 0));
1094
// Insert the new nodes into the topological ordering.
1095
if (C1->getNodeId() > X.getNode()->getNodeId()) {
1096
CurDAG->RepositionNode(X.getNode(), C1);
1097
C1->setNodeId(X.getNode()->getNodeId());
1099
if (NewANDMask.getNode()->getNodeId() == -1 ||
1100
NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1101
CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1102
NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1104
if (NewAND.getNode()->getNodeId() == -1 ||
1105
NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1106
CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1107
NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1109
if (NewSHIFT.getNode()->getNodeId() == -1 ||
1110
NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1111
CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1112
NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1115
CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1117
AM.Scale = 1 << ShiftCst;
1118
AM.IndexReg = NewAND;
1123
return MatchAddressBase(N, AM);
1126
/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1127
/// specified addressing mode without any further recursion.
1128
bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1129
// Is the base register already occupied?
1130
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1131
// If so, check to see if the scale index register is set.
1132
if (AM.IndexReg.getNode() == 0) {
1138
// Otherwise, we cannot select it.
1142
// Default, generate it as a register.
1143
AM.BaseType = X86ISelAddressMode::RegBase;
1148
/// SelectAddr - returns true if it is able pattern match an addressing mode.
1149
/// It returns the operands which make up the maximal addressing mode it can
1150
/// match by reference.
1151
bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
1152
SDValue &Scale, SDValue &Index,
1153
SDValue &Disp, SDValue &Segment) {
1154
X86ISelAddressMode AM;
1155
if (MatchAddress(N, AM))
1158
EVT VT = N.getValueType();
1159
if (AM.BaseType == X86ISelAddressMode::RegBase) {
1160
if (!AM.Base_Reg.getNode())
1161
AM.Base_Reg = CurDAG->getRegister(0, VT);
1164
if (!AM.IndexReg.getNode())
1165
AM.IndexReg = CurDAG->getRegister(0, VT);
1167
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1171
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1172
/// match a load whose top elements are either undef or zeros. The load flavor
1173
/// is derived from the type of N, which is either v4f32 or v2f64.
1176
/// PatternChainNode: this is the matched node that has a chain input and
1178
bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
1179
SDValue N, SDValue &Base,
1180
SDValue &Scale, SDValue &Index,
1181
SDValue &Disp, SDValue &Segment,
1182
SDValue &PatternNodeWithChain) {
1183
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1184
PatternNodeWithChain = N.getOperand(0);
1185
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1186
PatternNodeWithChain.hasOneUse() &&
1187
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1188
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1189
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1190
if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
1196
// Also handle the case where we explicitly require zeros in the top
1197
// elements. This is a vector shuffle from the zero vector.
1198
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1199
// Check to see if the top elements are all zeros (or bitcast of zeros).
1200
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1201
N.getOperand(0).getNode()->hasOneUse() &&
1202
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1203
N.getOperand(0).getOperand(0).hasOneUse() &&
1204
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1205
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1206
// Okay, this is a zero extending load. Fold it.
1207
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1208
if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1210
PatternNodeWithChain = SDValue(LD, 0);
1217
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1218
/// mode it matches can be cost effectively emitted as an LEA instruction.
1219
bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
1220
SDValue &Base, SDValue &Scale,
1221
SDValue &Index, SDValue &Disp,
1223
X86ISelAddressMode AM;
1225
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1227
SDValue Copy = AM.Segment;
1228
SDValue T = CurDAG->getRegister(0, MVT::i32);
1230
if (MatchAddress(N, AM))
1232
assert (T == AM.Segment);
1235
EVT VT = N.getValueType();
1236
unsigned Complexity = 0;
1237
if (AM.BaseType == X86ISelAddressMode::RegBase)
1238
if (AM.Base_Reg.getNode())
1241
AM.Base_Reg = CurDAG->getRegister(0, VT);
1242
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1245
if (AM.IndexReg.getNode())
1248
AM.IndexReg = CurDAG->getRegister(0, VT);
1250
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1255
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1256
// to a LEA. This is determined with some expermentation but is by no means
1257
// optimal (especially for code size consideration). LEA is nice because of
1258
// its three-address nature. Tweak the cost function again when we can run
1259
// convertToThreeAddress() at register allocation time.
1260
if (AM.hasSymbolicDisplacement()) {
1261
// For X86-64, we should always use lea to materialize RIP relative
1263
if (Subtarget->is64Bit())
1269
if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1272
// If it isn't worth using an LEA, reject it.
1273
if (Complexity <= 2)
1276
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1280
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1281
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
1282
SDValue &Scale, SDValue &Index,
1283
SDValue &Disp, SDValue &Segment) {
1284
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1285
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1287
X86ISelAddressMode AM;
1288
AM.GV = GA->getGlobal();
1289
AM.Disp += GA->getOffset();
1290
AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1291
AM.SymbolFlags = GA->getTargetFlags();
1293
if (N.getValueType() == MVT::i32) {
1295
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1297
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1300
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1305
bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
1306
SDValue &Base, SDValue &Scale,
1307
SDValue &Index, SDValue &Disp,
1309
if (!ISD::isNON_EXTLoad(N.getNode()) ||
1310
!IsProfitableToFold(N, P, P) ||
1311
!IsLegalToFold(N, P, P, OptLevel))
1314
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1317
/// getGlobalBaseReg - Return an SDNode that returns the value of
1318
/// the global base register. Output instructions required to
1319
/// initialize the global base register, if necessary.
1321
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1322
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1323
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1326
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1327
SDValue Chain = Node->getOperand(0);
1328
SDValue In1 = Node->getOperand(1);
1329
SDValue In2L = Node->getOperand(2);
1330
SDValue In2H = Node->getOperand(3);
1331
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1332
if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1334
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1335
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1336
const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
1337
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
1338
MVT::i32, MVT::i32, MVT::Other, Ops,
1339
array_lengthof(Ops));
1340
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
1344
SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
1345
if (Node->hasAnyUseOfValue(0))
1348
// Optimize common patterns for __sync_add_and_fetch and
1349
// __sync_sub_and_fetch where the result is not used. This allows us
1350
// to use "lock" version of add, sub, inc, dec instructions.
1351
// FIXME: Do not use special instructions but instead add the "lock"
1352
// prefix to the target node somehow. The extra information will then be
1353
// transferred to machine instruction and it denotes the prefix.
1354
SDValue Chain = Node->getOperand(0);
1355
SDValue Ptr = Node->getOperand(1);
1356
SDValue Val = Node->getOperand(2);
1357
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1358
if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1361
bool isInc = false, isDec = false, isSub = false, isCN = false;
1362
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
1365
int64_t CNVal = CN->getSExtValue();
1368
else if (CNVal == -1)
1370
else if (CNVal >= 0)
1371
Val = CurDAG->getTargetConstant(CNVal, NVT);
1374
Val = CurDAG->getTargetConstant(-CNVal, NVT);
1376
} else if (Val.hasOneUse() &&
1377
Val.getOpcode() == ISD::SUB &&
1378
X86::isZeroNode(Val.getOperand(0))) {
1380
Val = Val.getOperand(1);
1384
switch (NVT.getSimpleVT().SimpleTy) {
1388
Opc = X86::LOCK_INC8m;
1390
Opc = X86::LOCK_DEC8m;
1393
Opc = X86::LOCK_SUB8mi;
1395
Opc = X86::LOCK_SUB8mr;
1398
Opc = X86::LOCK_ADD8mi;
1400
Opc = X86::LOCK_ADD8mr;
1405
Opc = X86::LOCK_INC16m;
1407
Opc = X86::LOCK_DEC16m;
1410
if (immSext8(Val.getNode()))
1411
Opc = X86::LOCK_SUB16mi8;
1413
Opc = X86::LOCK_SUB16mi;
1415
Opc = X86::LOCK_SUB16mr;
1418
if (immSext8(Val.getNode()))
1419
Opc = X86::LOCK_ADD16mi8;
1421
Opc = X86::LOCK_ADD16mi;
1423
Opc = X86::LOCK_ADD16mr;
1428
Opc = X86::LOCK_INC32m;
1430
Opc = X86::LOCK_DEC32m;
1433
if (immSext8(Val.getNode()))
1434
Opc = X86::LOCK_SUB32mi8;
1436
Opc = X86::LOCK_SUB32mi;
1438
Opc = X86::LOCK_SUB32mr;
1441
if (immSext8(Val.getNode()))
1442
Opc = X86::LOCK_ADD32mi8;
1444
Opc = X86::LOCK_ADD32mi;
1446
Opc = X86::LOCK_ADD32mr;
1451
Opc = X86::LOCK_INC64m;
1453
Opc = X86::LOCK_DEC64m;
1455
Opc = X86::LOCK_SUB64mr;
1457
if (immSext8(Val.getNode()))
1458
Opc = X86::LOCK_SUB64mi8;
1459
else if (i64immSExt32(Val.getNode()))
1460
Opc = X86::LOCK_SUB64mi32;
1463
Opc = X86::LOCK_ADD64mr;
1465
if (immSext8(Val.getNode()))
1466
Opc = X86::LOCK_ADD64mi8;
1467
else if (i64immSExt32(Val.getNode()))
1468
Opc = X86::LOCK_ADD64mi32;
1474
DebugLoc dl = Node->getDebugLoc();
1475
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
1477
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1478
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1479
if (isInc || isDec) {
1480
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
1481
SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
1482
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1483
SDValue RetVals[] = { Undef, Ret };
1484
return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1486
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
1487
SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
1488
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1489
SDValue RetVals[] = { Undef, Ret };
1490
return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1494
/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1495
/// any uses which require the SF or OF bits to be accurate.
1496
static bool HasNoSignedComparisonUses(SDNode *N) {
1497
// Examine each user of the node.
1498
for (SDNode::use_iterator UI = N->use_begin(),
1499
UE = N->use_end(); UI != UE; ++UI) {
1500
// Only examine CopyToReg uses.
1501
if (UI->getOpcode() != ISD::CopyToReg)
1503
// Only examine CopyToReg uses that copy to EFLAGS.
1504
if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1507
// Examine each user of the CopyToReg use.
1508
for (SDNode::use_iterator FlagUI = UI->use_begin(),
1509
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1510
// Only examine the Flag result.
1511
if (FlagUI.getUse().getResNo() != 1) continue;
1512
// Anything unusual: assume conservatively.
1513
if (!FlagUI->isMachineOpcode()) return false;
1514
// Examine the opcode of the user.
1515
switch (FlagUI->getMachineOpcode()) {
1516
// These comparisons don't treat the most significant bit specially.
1517
case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1518
case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1519
case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1520
case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1521
case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
1522
case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
1523
case X86::CMOVA16rr: case X86::CMOVA16rm:
1524
case X86::CMOVA32rr: case X86::CMOVA32rm:
1525
case X86::CMOVA64rr: case X86::CMOVA64rm:
1526
case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1527
case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1528
case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1529
case X86::CMOVB16rr: case X86::CMOVB16rm:
1530
case X86::CMOVB32rr: case X86::CMOVB32rm:
1531
case X86::CMOVB64rr: case X86::CMOVB64rm:
1532
case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1533
case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1534
case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1535
case X86::CMOVE16rr: case X86::CMOVE16rm:
1536
case X86::CMOVE32rr: case X86::CMOVE32rm:
1537
case X86::CMOVE64rr: case X86::CMOVE64rm:
1538
case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1539
case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1540
case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1541
case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1542
case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1543
case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1544
case X86::CMOVP16rr: case X86::CMOVP16rm:
1545
case X86::CMOVP32rr: case X86::CMOVP32rm:
1546
case X86::CMOVP64rr: case X86::CMOVP64rm:
1548
// Anything else: assume conservatively.
1549
default: return false;
1556
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
1557
EVT NVT = Node->getValueType(0);
1559
unsigned Opcode = Node->getOpcode();
1560
DebugLoc dl = Node->getDebugLoc();
1562
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
1564
if (Node->isMachineOpcode()) {
1565
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
1566
return NULL; // Already selected.
1571
case X86ISD::GlobalBaseReg:
1572
return getGlobalBaseReg();
1574
case X86ISD::ATOMOR64_DAG:
1575
return SelectAtomic64(Node, X86::ATOMOR6432);
1576
case X86ISD::ATOMXOR64_DAG:
1577
return SelectAtomic64(Node, X86::ATOMXOR6432);
1578
case X86ISD::ATOMADD64_DAG:
1579
return SelectAtomic64(Node, X86::ATOMADD6432);
1580
case X86ISD::ATOMSUB64_DAG:
1581
return SelectAtomic64(Node, X86::ATOMSUB6432);
1582
case X86ISD::ATOMNAND64_DAG:
1583
return SelectAtomic64(Node, X86::ATOMNAND6432);
1584
case X86ISD::ATOMAND64_DAG:
1585
return SelectAtomic64(Node, X86::ATOMAND6432);
1586
case X86ISD::ATOMSWAP64_DAG:
1587
return SelectAtomic64(Node, X86::ATOMSWAP6432);
1589
case ISD::ATOMIC_LOAD_ADD: {
1590
SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
1596
case ISD::SMUL_LOHI:
1597
case ISD::UMUL_LOHI: {
1598
SDValue N0 = Node->getOperand(0);
1599
SDValue N1 = Node->getOperand(1);
1601
bool isSigned = Opcode == ISD::SMUL_LOHI;
1603
switch (NVT.getSimpleVT().SimpleTy) {
1604
default: llvm_unreachable("Unsupported VT!");
1605
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
1606
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1607
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1608
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1611
switch (NVT.getSimpleVT().SimpleTy) {
1612
default: llvm_unreachable("Unsupported VT!");
1613
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
1614
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1615
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1616
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1620
unsigned LoReg, HiReg;
1621
switch (NVT.getSimpleVT().SimpleTy) {
1622
default: llvm_unreachable("Unsupported VT!");
1623
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
1624
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
1625
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1626
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1629
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1630
bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1631
// Multiply is commmutative.
1633
foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1638
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1639
N0, SDValue()).getValue(1);
1642
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1645
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1646
array_lengthof(Ops));
1647
InFlag = SDValue(CNode, 1);
1648
// Update the chain.
1649
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1652
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1655
// Prevent use of AH in a REX instruction by referencing AX instead.
1656
if (HiReg == X86::AH && Subtarget->is64Bit() &&
1657
!SDValue(Node, 1).use_empty()) {
1658
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1659
X86::AX, MVT::i16, InFlag);
1660
InFlag = Result.getValue(2);
1661
// Get the low part if needed. Don't use getCopyFromReg for aliasing
1663
if (!SDValue(Node, 0).use_empty())
1664
ReplaceUses(SDValue(Node, 1),
1665
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
1667
// Shift AX down 8 bits.
1668
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
1670
CurDAG->getTargetConstant(8, MVT::i8)), 0);
1671
// Then truncate it down to i8.
1672
ReplaceUses(SDValue(Node, 1),
1673
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
1675
// Copy the low half of the result, if it is needed.
1676
if (!SDValue(Node, 0).use_empty()) {
1677
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1678
LoReg, NVT, InFlag);
1679
InFlag = Result.getValue(2);
1680
ReplaceUses(SDValue(Node, 0), Result);
1681
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
1683
// Copy the high half of the result, if it is needed.
1684
if (!SDValue(Node, 1).use_empty()) {
1685
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1686
HiReg, NVT, InFlag);
1687
InFlag = Result.getValue(2);
1688
ReplaceUses(SDValue(Node, 1), Result);
1689
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
1696
case ISD::UDIVREM: {
1697
SDValue N0 = Node->getOperand(0);
1698
SDValue N1 = Node->getOperand(1);
1700
bool isSigned = Opcode == ISD::SDIVREM;
1702
switch (NVT.getSimpleVT().SimpleTy) {
1703
default: llvm_unreachable("Unsupported VT!");
1704
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
1705
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1706
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1707
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1710
switch (NVT.getSimpleVT().SimpleTy) {
1711
default: llvm_unreachable("Unsupported VT!");
1712
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
1713
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1714
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1715
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1719
unsigned LoReg, HiReg, ClrReg;
1720
unsigned ClrOpcode, SExtOpcode;
1721
switch (NVT.getSimpleVT().SimpleTy) {
1722
default: llvm_unreachable("Unsupported VT!");
1724
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
1726
SExtOpcode = X86::CBW;
1729
LoReg = X86::AX; HiReg = X86::DX;
1730
ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
1731
SExtOpcode = X86::CWD;
1734
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
1735
ClrOpcode = X86::MOV32r0;
1736
SExtOpcode = X86::CDQ;
1739
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
1740
ClrOpcode = X86::MOV64r0;
1741
SExtOpcode = X86::CQO;
1745
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1746
bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1747
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1750
if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1751
// Special case for div8, just use a move with zero extension to AX to
1752
// clear the upper 8 bits (AH).
1753
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1754
if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1755
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1757
SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
1759
array_lengthof(Ops)), 0);
1760
Chain = Move.getValue(1);
1761
ReplaceUses(N0.getValue(1), Chain);
1764
SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1765
Chain = CurDAG->getEntryNode();
1767
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1768
InFlag = Chain.getValue(1);
1771
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1772
LoReg, N0, SDValue()).getValue(1);
1773
if (isSigned && !signBitIsZero) {
1774
// Sign extend the low part into the high part.
1776
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1778
// Zero out the high part, effectively zero extending the input.
1780
SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
1781
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
1782
ClrNode, InFlag).getValue(1);
1787
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1790
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1791
array_lengthof(Ops));
1792
InFlag = SDValue(CNode, 1);
1793
// Update the chain.
1794
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1797
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1800
// Prevent use of AH in a REX instruction by referencing AX instead.
1801
// Shift it down 8 bits.
1802
if (HiReg == X86::AH && Subtarget->is64Bit() &&
1803
!SDValue(Node, 1).use_empty()) {
1804
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1805
X86::AX, MVT::i16, InFlag);
1806
InFlag = Result.getValue(2);
1808
// If we also need AL (the quotient), get it by extracting a subreg from
1809
// Result. The fast register allocator does not like multiple CopyFromReg
1810
// nodes using aliasing registers.
1811
if (!SDValue(Node, 0).use_empty())
1812
ReplaceUses(SDValue(Node, 0),
1813
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
1815
// Shift AX right by 8 bits instead of using AH.
1816
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
1818
CurDAG->getTargetConstant(8, MVT::i8)),
1820
ReplaceUses(SDValue(Node, 1),
1821
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
1823
// Copy the division (low) result, if it is needed.
1824
if (!SDValue(Node, 0).use_empty()) {
1825
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1826
LoReg, NVT, InFlag);
1827
InFlag = Result.getValue(2);
1828
ReplaceUses(SDValue(Node, 0), Result);
1829
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
1831
// Copy the remainder (high) result, if it is needed.
1832
if (!SDValue(Node, 1).use_empty()) {
1833
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1834
HiReg, NVT, InFlag);
1835
InFlag = Result.getValue(2);
1836
ReplaceUses(SDValue(Node, 1), Result);
1837
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
1843
SDValue N0 = Node->getOperand(0);
1844
SDValue N1 = Node->getOperand(1);
1846
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
1847
// use a smaller encoding.
1848
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
1849
HasNoSignedComparisonUses(Node))
1850
// Look past the truncate if CMP is the only use of it.
1851
N0 = N0.getOperand(0);
1852
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
1853
N0.getValueType() != MVT::i8 &&
1854
X86::isZeroNode(N1)) {
1855
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
1858
// For example, convert "testl %eax, $8" to "testb %al, $8"
1859
if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
1860
(!(C->getZExtValue() & 0x80) ||
1861
HasNoSignedComparisonUses(Node))) {
1862
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
1863
SDValue Reg = N0.getNode()->getOperand(0);
1865
// On x86-32, only the ABCD registers have 8-bit subregisters.
1866
if (!Subtarget->is64Bit()) {
1867
TargetRegisterClass *TRC = 0;
1868
switch (N0.getValueType().getSimpleVT().SimpleTy) {
1869
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
1870
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
1871
default: llvm_unreachable("Unsupported TEST operand type!");
1873
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
1874
Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
1875
Reg.getValueType(), Reg, RC), 0);
1878
// Extract the l-register.
1879
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
1883
return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
1886
// For example, "testl %eax, $2048" to "testb %ah, $8".
1887
if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
1888
(!(C->getZExtValue() & 0x8000) ||
1889
HasNoSignedComparisonUses(Node))) {
1890
// Shift the immediate right by 8 bits.
1891
SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
1893
SDValue Reg = N0.getNode()->getOperand(0);
1895
// Put the value in an ABCD register.
1896
TargetRegisterClass *TRC = 0;
1897
switch (N0.getValueType().getSimpleVT().SimpleTy) {
1898
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
1899
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
1900
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
1901
default: llvm_unreachable("Unsupported TEST operand type!");
1903
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
1904
Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
1905
Reg.getValueType(), Reg, RC), 0);
1907
// Extract the h-register.
1908
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
1911
// Emit a testb. No special NOREX tricks are needed since there's
1912
// only one GPR operand!
1913
return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
1914
Subreg, ShiftedImm);
1917
// For example, "testl %eax, $32776" to "testw %ax, $32776".
1918
if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
1919
N0.getValueType() != MVT::i16 &&
1920
(!(C->getZExtValue() & 0x8000) ||
1921
HasNoSignedComparisonUses(Node))) {
1922
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
1923
SDValue Reg = N0.getNode()->getOperand(0);
1925
// Extract the 16-bit subregister.
1926
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
1930
return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
1933
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
1934
if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
1935
N0.getValueType() == MVT::i64 &&
1936
(!(C->getZExtValue() & 0x80000000) ||
1937
HasNoSignedComparisonUses(Node))) {
1938
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
1939
SDValue Reg = N0.getNode()->getOperand(0);
1941
// Extract the 32-bit subregister.
1942
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
1946
return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
1953
SDNode *ResNode = SelectCode(Node);
1955
DEBUG(dbgs() << "=> ";
1956
if (ResNode == NULL || ResNode == Node)
1959
ResNode->dump(CurDAG);
1965
bool X86DAGToDAGISel::
1966
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1967
std::vector<SDValue> &OutOps) {
1968
SDValue Op0, Op1, Op2, Op3, Op4;
1969
switch (ConstraintCode) {
1970
case 'o': // offsetable ??
1971
case 'v': // not offsetable ??
1972
default: return true;
1974
if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
1979
OutOps.push_back(Op0);
1980
OutOps.push_back(Op1);
1981
OutOps.push_back(Op2);
1982
OutOps.push_back(Op3);
1983
OutOps.push_back(Op4);
1987
/// createX86ISelDag - This pass converts a legalized DAG into a
1988
/// X86-specific DAG, ready for instruction scheduling.
1990
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
1991
llvm::CodeGenOpt::Level OptLevel) {
1992
return new X86DAGToDAGISel(TM, OptLevel);