1
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This file defines an instruction selector for the AArch64 target.
12
//===----------------------------------------------------------------------===//
14
#include "AArch64TargetMachine.h"
15
#include "MCTargetDesc/AArch64AddressingModes.h"
16
#include "llvm/ADT/APSInt.h"
17
#include "llvm/CodeGen/SelectionDAGISel.h"
18
#include "llvm/IR/Function.h" // To access function attributes.
19
#include "llvm/IR/GlobalValue.h"
20
#include "llvm/IR/Intrinsics.h"
21
#include "llvm/Support/Debug.h"
22
#include "llvm/Support/ErrorHandling.h"
23
#include "llvm/Support/MathExtras.h"
24
#include "llvm/Support/raw_ostream.h"
28
#define DEBUG_TYPE "aarch64-isel"
30
//===--------------------------------------------------------------------===//
31
/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32
/// instructions for SelectionDAG operations.
36
class AArch64DAGToDAGISel : public SelectionDAGISel {
37
AArch64TargetMachine &TM;
39
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40
/// make the right decision when generating code for different targets.
41
const AArch64Subtarget *Subtarget;
46
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47
CodeGenOpt::Level OptLevel)
48
: SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
51
const char *getPassName() const override {
52
return "AArch64 Instruction Selection";
55
bool runOnMachineFunction(MachineFunction &MF) override {
57
MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
58
MF.getFunction()->hasFnAttribute(Attribute::MinSize);
59
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
60
return SelectionDAGISel::runOnMachineFunction(MF);
63
SDNode *Select(SDNode *Node) override;
65
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66
/// inline asm expressions.
67
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68
unsigned ConstraintID,
69
std::vector<SDValue> &OutOps) override;
71
SDNode *SelectMLAV64LaneV128(SDNode *N);
72
SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
73
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
74
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75
bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
76
bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77
return SelectShiftedRegister(N, false, Reg, Shift);
79
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
80
return SelectShiftedRegister(N, true, Reg, Shift);
82
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
83
return SelectAddrModeIndexed(N, 1, Base, OffImm);
85
bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
86
return SelectAddrModeIndexed(N, 2, Base, OffImm);
88
bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
89
return SelectAddrModeIndexed(N, 4, Base, OffImm);
91
bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
92
return SelectAddrModeIndexed(N, 8, Base, OffImm);
94
bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
95
return SelectAddrModeIndexed(N, 16, Base, OffImm);
97
bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
98
return SelectAddrModeUnscaled(N, 1, Base, OffImm);
100
bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
101
return SelectAddrModeUnscaled(N, 2, Base, OffImm);
103
bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
104
return SelectAddrModeUnscaled(N, 4, Base, OffImm);
106
bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
107
return SelectAddrModeUnscaled(N, 8, Base, OffImm);
109
bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
110
return SelectAddrModeUnscaled(N, 16, Base, OffImm);
114
bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
115
SDValue &SignExtend, SDValue &DoShift) {
116
return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
120
bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
121
SDValue &SignExtend, SDValue &DoShift) {
122
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
126
/// Form sequences of consecutive 64/128-bit registers for use in NEON
127
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
128
/// between 1 and 4 elements. If it contains a single element that is returned
129
/// unchanged; otherwise a REG_SEQUENCE value is returned.
130
SDValue createDTuple(ArrayRef<SDValue> Vecs);
131
SDValue createQTuple(ArrayRef<SDValue> Vecs);
133
/// Generic helper for the createDTuple/createQTuple
134
/// functions. Those should almost always be called instead.
135
SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
136
const unsigned SubRegs[]);
138
SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
140
SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
142
SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
144
SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
146
SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
147
SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
149
SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
150
SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
151
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
152
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
154
SDNode *SelectBitfieldExtractOp(SDNode *N);
155
SDNode *SelectBitfieldInsertOp(SDNode *N);
157
SDNode *SelectLIBM(SDNode *N);
159
SDNode *SelectReadRegister(SDNode *N);
160
SDNode *SelectWriteRegister(SDNode *N);
162
// Include the pieces autogenerated from the target description.
163
#include "AArch64GenDAGISel.inc"
166
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
168
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
170
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
172
bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
173
SDValue &Offset, SDValue &SignExtend,
175
bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
176
SDValue &Offset, SDValue &SignExtend,
178
bool isWorthFolding(SDValue V) const;
179
bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
180
SDValue &Offset, SDValue &SignExtend);
182
template<unsigned RegWidth>
183
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
184
return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
187
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
189
} // end anonymous namespace
191
/// isIntImmediate - This method tests to see if the node is a constant
192
/// operand. If so Imm will receive the 32-bit value.
193
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
194
if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
195
Imm = C->getZExtValue();
201
// isIntImmediate - This method tests to see if a constant operand.
202
// If so Imm will receive the value.
203
static bool isIntImmediate(SDValue N, uint64_t &Imm) {
204
return isIntImmediate(N.getNode(), Imm);
207
// isOpcWithIntImmediate - This method tests to see if the node is a specific
208
// opcode and that it has a immediate integer right operand.
209
// If so Imm will receive the 32 bit value.
210
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
212
return N->getOpcode() == Opc &&
213
isIntImmediate(N->getOperand(1).getNode(), Imm);
216
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
217
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
218
switch(ConstraintID) {
220
llvm_unreachable("Unexpected asm memory constraint");
221
case InlineAsm::Constraint_i:
222
case InlineAsm::Constraint_m:
223
case InlineAsm::Constraint_Q:
224
// Require the address to be in a register. That is safe for all AArch64
225
// variants and it is hard to do anything much smarter without knowing
226
// how the operand is used.
227
OutOps.push_back(Op);
233
/// SelectArithImmed - Select an immediate value that can be represented as
234
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
235
/// Val set to the 12-bit value and Shift set to the shifter operand.
236
bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
238
// This function is called from the addsub_shifted_imm ComplexPattern,
239
// which lists [imm] as the list of opcode it's interested in, however
240
// we still need to check whether the operand is actually an immediate
241
// here because the ComplexPattern opcode list is only used in
242
// root-level opcode matching.
243
if (!isa<ConstantSDNode>(N.getNode()))
246
uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
249
if (Immed >> 12 == 0) {
251
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
257
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
259
Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
260
Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
264
/// SelectNegArithImmed - As above, but negates the value before trying to
266
bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
268
// This function is called from the addsub_shifted_imm ComplexPattern,
269
// which lists [imm] as the list of opcode it's interested in, however
270
// we still need to check whether the operand is actually an immediate
271
// here because the ComplexPattern opcode list is only used in
272
// root-level opcode matching.
273
if (!isa<ConstantSDNode>(N.getNode()))
276
// The immediate operand must be a 24-bit zero-extended immediate.
277
uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
279
// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
280
// have the opposite effect on the C flag, so this pattern mustn't match under
281
// those circumstances.
285
if (N.getValueType() == MVT::i32)
286
Immed = ~((uint32_t)Immed) + 1;
288
Immed = ~Immed + 1ULL;
289
if (Immed & 0xFFFFFFFFFF000000ULL)
292
Immed &= 0xFFFFFFULL;
293
return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
297
/// getShiftTypeForNode - Translate a shift node to the corresponding
299
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
300
switch (N.getOpcode()) {
302
return AArch64_AM::InvalidShiftExtend;
304
return AArch64_AM::LSL;
306
return AArch64_AM::LSR;
308
return AArch64_AM::ASR;
310
return AArch64_AM::ROR;
314
/// \brief Determine whether it is worth to fold V into an extended register.
315
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
316
// it hurts if the value is used at least twice, unless we are optimizing
318
if (ForCodeSize || V.hasOneUse())
323
/// SelectShiftedRegister - Select a "shifted register" operand. If the value
324
/// is not shifted, set the Shift operand to default of "LSL 0". The logical
325
/// instructions allow the shifted register to be rotated, but the arithmetic
326
/// instructions do not. The AllowROR parameter specifies whether ROR is
328
bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
329
SDValue &Reg, SDValue &Shift) {
330
AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
331
if (ShType == AArch64_AM::InvalidShiftExtend)
333
if (!AllowROR && ShType == AArch64_AM::ROR)
336
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
337
unsigned BitSize = N.getValueType().getSizeInBits();
338
unsigned Val = RHS->getZExtValue() & (BitSize - 1);
339
unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
341
Reg = N.getOperand(0);
342
Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
343
return isWorthFolding(N);
349
/// getExtendTypeForNode - Translate an extend node to the corresponding
350
/// ExtendType value.
351
static AArch64_AM::ShiftExtendType
352
getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
353
if (N.getOpcode() == ISD::SIGN_EXTEND ||
354
N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
356
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
357
SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
359
SrcVT = N.getOperand(0).getValueType();
361
if (!IsLoadStore && SrcVT == MVT::i8)
362
return AArch64_AM::SXTB;
363
else if (!IsLoadStore && SrcVT == MVT::i16)
364
return AArch64_AM::SXTH;
365
else if (SrcVT == MVT::i32)
366
return AArch64_AM::SXTW;
367
assert(SrcVT != MVT::i64 && "extend from 64-bits?");
369
return AArch64_AM::InvalidShiftExtend;
370
} else if (N.getOpcode() == ISD::ZERO_EXTEND ||
371
N.getOpcode() == ISD::ANY_EXTEND) {
372
EVT SrcVT = N.getOperand(0).getValueType();
373
if (!IsLoadStore && SrcVT == MVT::i8)
374
return AArch64_AM::UXTB;
375
else if (!IsLoadStore && SrcVT == MVT::i16)
376
return AArch64_AM::UXTH;
377
else if (SrcVT == MVT::i32)
378
return AArch64_AM::UXTW;
379
assert(SrcVT != MVT::i64 && "extend from 64-bits?");
381
return AArch64_AM::InvalidShiftExtend;
382
} else if (N.getOpcode() == ISD::AND) {
383
ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
385
return AArch64_AM::InvalidShiftExtend;
386
uint64_t AndMask = CSD->getZExtValue();
390
return AArch64_AM::InvalidShiftExtend;
392
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
394
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
396
return AArch64_AM::UXTW;
400
return AArch64_AM::InvalidShiftExtend;
403
// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
404
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
405
if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
406
DL->getOpcode() != AArch64ISD::DUPLANE32)
409
SDValue SV = DL->getOperand(0);
410
if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
413
SDValue EV = SV.getOperand(1);
414
if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
417
ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
418
ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
419
LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
420
LaneOp = EV.getOperand(0);
425
// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
426
// high lane extract.
427
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
428
SDValue &LaneOp, int &LaneIdx) {
430
if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
432
if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
439
/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
440
/// is a lane in the upper half of a 128-bit vector. Recognize and select this
441
/// so that we don't emit unnecessary lane extracts.
442
SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
444
SDValue Op0 = N->getOperand(0);
445
SDValue Op1 = N->getOperand(1);
446
SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
447
SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
448
int LaneIdx = -1; // Will hold the lane index.
450
if (Op1.getOpcode() != ISD::MUL ||
451
!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
454
if (Op1.getOpcode() != ISD::MUL ||
455
!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
460
SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
462
SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
464
unsigned MLAOpc = ~0U;
466
switch (N->getSimpleValueType(0).SimpleTy) {
468
llvm_unreachable("Unrecognized MLA.");
470
MLAOpc = AArch64::MLAv4i16_indexed;
473
MLAOpc = AArch64::MLAv8i16_indexed;
476
MLAOpc = AArch64::MLAv2i32_indexed;
479
MLAOpc = AArch64::MLAv4i32_indexed;
483
return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
486
SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
492
if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
496
SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
498
SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
500
unsigned SMULLOpc = ~0U;
502
if (IntNo == Intrinsic::aarch64_neon_smull) {
503
switch (N->getSimpleValueType(0).SimpleTy) {
505
llvm_unreachable("Unrecognized SMULL.");
507
SMULLOpc = AArch64::SMULLv4i16_indexed;
510
SMULLOpc = AArch64::SMULLv2i32_indexed;
513
} else if (IntNo == Intrinsic::aarch64_neon_umull) {
514
switch (N->getSimpleValueType(0).SimpleTy) {
516
llvm_unreachable("Unrecognized SMULL.");
518
SMULLOpc = AArch64::UMULLv4i16_indexed;
521
SMULLOpc = AArch64::UMULLv2i32_indexed;
525
llvm_unreachable("Unrecognized intrinsic.");
527
return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
530
/// Instructions that accept extend modifiers like UXTW expect the register
531
/// being extended to be a GPR32, but the incoming DAG might be acting on a
532
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
533
/// this is the case.
534
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
535
if (N.getValueType() == MVT::i32)
539
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
540
MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
541
dl, MVT::i32, N, SubReg);
542
return SDValue(Node, 0);
546
/// SelectArithExtendedRegister - Select a "extended register" operand. This
547
/// operand folds in an extend followed by an optional left shift.
548
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
550
unsigned ShiftVal = 0;
551
AArch64_AM::ShiftExtendType Ext;
553
if (N.getOpcode() == ISD::SHL) {
554
ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
557
ShiftVal = CSD->getZExtValue();
561
Ext = getExtendTypeForNode(N.getOperand(0));
562
if (Ext == AArch64_AM::InvalidShiftExtend)
565
Reg = N.getOperand(0).getOperand(0);
567
Ext = getExtendTypeForNode(N);
568
if (Ext == AArch64_AM::InvalidShiftExtend)
571
Reg = N.getOperand(0);
574
// AArch64 mandates that the RHS of the operation must use the smallest
575
// register classs that could contain the size being extended from. Thus,
576
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
577
// there might not be an actual 32-bit value in the program. We can
578
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
579
assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
580
Reg = narrowIfNeeded(CurDAG, Reg);
581
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
583
return isWorthFolding(N);
586
/// If there's a use of this ADDlow that's not itself a load/store then we'll
587
/// need to create a real ADD instruction from it anyway and there's no point in
588
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
589
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
590
/// leads to duplaicated ADRP instructions.
591
static bool isWorthFoldingADDlow(SDValue N) {
592
for (auto Use : N->uses()) {
593
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
594
Use->getOpcode() != ISD::ATOMIC_LOAD &&
595
Use->getOpcode() != ISD::ATOMIC_STORE)
598
// ldar and stlr have much more restrictive addressing modes (just a
600
if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
607
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
608
/// immediate" address. The "Size" argument is the size in bytes of the memory
609
/// reference, which determines the scale.
610
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
611
SDValue &Base, SDValue &OffImm) {
613
const DataLayout &DL = CurDAG->getDataLayout();
614
const TargetLowering *TLI = getTargetLowering();
615
if (N.getOpcode() == ISD::FrameIndex) {
616
int FI = cast<FrameIndexSDNode>(N)->getIndex();
617
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
618
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
622
if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
623
GlobalAddressSDNode *GAN =
624
dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
625
Base = N.getOperand(0);
626
OffImm = N.getOperand(1);
630
const GlobalValue *GV = GAN->getGlobal();
631
unsigned Alignment = GV->getAlignment();
632
Type *Ty = GV->getType()->getElementType();
633
if (Alignment == 0 && Ty->isSized())
634
Alignment = DL.getABITypeAlignment(Ty);
636
if (Alignment >= Size)
640
if (CurDAG->isBaseWithConstantOffset(N)) {
641
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
642
int64_t RHSC = (int64_t)RHS->getZExtValue();
643
unsigned Scale = Log2_32(Size);
644
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
645
Base = N.getOperand(0);
646
if (Base.getOpcode() == ISD::FrameIndex) {
647
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
648
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
650
OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
656
// Before falling back to our general case, check if the unscaled
657
// instructions can handle this. If so, that's preferable.
658
if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
661
// Base only. The address will be materialized into a register before
662
// the memory is accessed.
663
// add x0, Xbase, #offset
666
OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
670
/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
671
/// immediate" address. This should only match when there is an offset that
672
/// is not valid for a scaled immediate addressing mode. The "Size" argument
673
/// is the size in bytes of the memory reference, which is needed here to know
674
/// what is valid for a scaled immediate.
675
bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
678
if (!CurDAG->isBaseWithConstantOffset(N))
680
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
681
int64_t RHSC = RHS->getSExtValue();
682
// If the offset is valid as a scaled immediate, don't match here.
683
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
684
RHSC < (0x1000 << Log2_32(Size)))
686
if (RHSC >= -256 && RHSC < 256) {
687
Base = N.getOperand(0);
688
if (Base.getOpcode() == ISD::FrameIndex) {
689
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
690
const TargetLowering *TLI = getTargetLowering();
691
Base = CurDAG->getTargetFrameIndex(
692
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
694
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
701
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
703
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
704
SDValue ImpDef = SDValue(
705
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
706
MachineSDNode *Node = CurDAG->getMachineNode(
707
TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
708
return SDValue(Node, 0);
711
/// \brief Check if the given SHL node (\p N), can be used to form an
712
/// extended register for an addressing mode.
713
bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
714
bool WantExtend, SDValue &Offset,
715
SDValue &SignExtend) {
716
assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
717
ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
718
if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
723
AArch64_AM::ShiftExtendType Ext =
724
getExtendTypeForNode(N.getOperand(0), true);
725
if (Ext == AArch64_AM::InvalidShiftExtend)
728
Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
729
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
732
Offset = N.getOperand(0);
733
SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
736
unsigned LegalShiftVal = Log2_32(Size);
737
unsigned ShiftVal = CSD->getZExtValue();
739
if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
742
if (isWorthFolding(N))
748
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
749
SDValue &Base, SDValue &Offset,
752
if (N.getOpcode() != ISD::ADD)
754
SDValue LHS = N.getOperand(0);
755
SDValue RHS = N.getOperand(1);
758
// We don't want to match immediate adds here, because they are better lowered
759
// to the register-immediate addressing modes.
760
if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
763
// Check if this particular node is reused in any non-memory related
764
// operation. If yes, do not try to fold this node into the address
765
// computation, since the computation will be kept.
766
const SDNode *Node = N.getNode();
767
for (SDNode *UI : Node->uses()) {
768
if (!isa<MemSDNode>(*UI))
772
// Remember if it is worth folding N when it produces extended register.
773
bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
775
// Try to match a shifted extend on the RHS.
776
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
777
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
779
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
783
// Try to match a shifted extend on the LHS.
784
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
785
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
787
DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
791
// There was no shift, whatever else we find.
792
DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
794
AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
795
// Try to match an unshifted extend on the LHS.
796
if (IsExtendedRegisterWorthFolding &&
797
(Ext = getExtendTypeForNode(LHS, true)) !=
798
AArch64_AM::InvalidShiftExtend) {
800
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
801
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
803
if (isWorthFolding(LHS))
807
// Try to match an unshifted extend on the RHS.
808
if (IsExtendedRegisterWorthFolding &&
809
(Ext = getExtendTypeForNode(RHS, true)) !=
810
AArch64_AM::InvalidShiftExtend) {
812
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
813
SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
815
if (isWorthFolding(RHS))
822
// Check if the given immediate is preferred by ADD. If an immediate can be
823
// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
824
// encoded by one MOVZ, return true.
825
static bool isPreferredADD(int64_t ImmOff) {
826
// Constant in [0x0, 0xfff] can be encoded in ADD.
827
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
829
// Check if it can be encoded in an "ADD LSL #12".
830
if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
831
// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
832
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
833
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
837
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
838
SDValue &Base, SDValue &Offset,
841
if (N.getOpcode() != ISD::ADD)
843
SDValue LHS = N.getOperand(0);
844
SDValue RHS = N.getOperand(1);
847
// Check if this particular node is reused in any non-memory related
848
// operation. If yes, do not try to fold this node into the address
849
// computation, since the computation will be kept.
850
const SDNode *Node = N.getNode();
851
for (SDNode *UI : Node->uses()) {
852
if (!isa<MemSDNode>(*UI))
856
// Watch out if RHS is a wide immediate, it can not be selected into
857
// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
858
// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
859
// instructions like:
860
// MOV X0, WideImmediate
861
// ADD X1, BaseReg, X0
863
// For such situation, using [BaseReg, XReg] addressing mode can save one
865
// MOV X0, WideImmediate
866
// LDR X2, [BaseReg, X0]
867
if (isa<ConstantSDNode>(RHS)) {
868
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
869
unsigned Scale = Log2_32(Size);
870
// Skip the immediate can be seleced by load/store addressing mode.
871
// Also skip the immediate can be encoded by a single ADD (SUB is also
872
// checked by using -ImmOff).
873
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
874
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
877
SDValue Ops[] = { RHS };
879
CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
880
SDValue MOVIV = SDValue(MOVI, 0);
881
// This ADD of two X register will be selected into [Reg+Reg] mode.
882
N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
885
// Remember if it is worth folding N when it produces extended register.
886
bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
888
// Try to match a shifted extend on the RHS.
889
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
890
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
892
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
896
// Try to match a shifted extend on the LHS.
897
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
898
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
900
DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
904
// Match any non-shifted, non-extend, non-immediate add expression.
907
SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
908
DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
909
// Reg1 + Reg2 is free: no check needed.
913
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
914
static const unsigned RegClassIDs[] = {
915
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
916
static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
917
AArch64::dsub2, AArch64::dsub3};
919
return createTuple(Regs, RegClassIDs, SubRegs);
922
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
923
static const unsigned RegClassIDs[] = {
924
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
925
static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
926
AArch64::qsub2, AArch64::qsub3};
928
return createTuple(Regs, RegClassIDs, SubRegs);
931
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
932
const unsigned RegClassIDs[],
933
const unsigned SubRegs[]) {
934
// There's no special register-class for a vector-list of 1 element: it's just
936
if (Regs.size() == 1)
939
assert(Regs.size() >= 2 && Regs.size() <= 4);
943
SmallVector<SDValue, 4> Ops;
945
// First operand of REG_SEQUENCE is the desired RegClass.
947
CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
949
// Then we get pairs of source & subregister-position for the components.
950
for (unsigned i = 0; i < Regs.size(); ++i) {
951
Ops.push_back(Regs[i]);
952
Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
956
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
957
return SDValue(N, 0);
960
SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
961
unsigned Opc, bool isExt) {
963
EVT VT = N->getValueType(0);
965
unsigned ExtOff = isExt;
967
// Form a REG_SEQUENCE to force register allocation.
968
unsigned Vec0Off = ExtOff + 1;
969
SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
970
N->op_begin() + Vec0Off + NumVecs);
971
SDValue RegSeq = createQTuple(Regs);
973
SmallVector<SDValue, 6> Ops;
975
Ops.push_back(N->getOperand(1));
976
Ops.push_back(RegSeq);
977
Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
978
return CurDAG->getMachineNode(Opc, dl, VT, Ops);
981
SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
982
LoadSDNode *LD = cast<LoadSDNode>(N);
983
if (LD->isUnindexed())
985
EVT VT = LD->getMemoryVT();
986
EVT DstVT = N->getValueType(0);
987
ISD::MemIndexedMode AM = LD->getAddressingMode();
988
bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
990
// We're not doing validity checking here. That was done when checking
991
// if we should mark the load as indexed or not. We're just selecting
992
// the right instruction.
995
ISD::LoadExtType ExtType = LD->getExtensionType();
996
bool InsertTo64 = false;
998
Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
999
else if (VT == MVT::i32) {
1000
if (ExtType == ISD::NON_EXTLOAD)
1001
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1002
else if (ExtType == ISD::SEXTLOAD)
1003
Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1005
Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1007
// The result of the load is only i32. It's the subreg_to_reg that makes
1011
} else if (VT == MVT::i16) {
1012
if (ExtType == ISD::SEXTLOAD) {
1013
if (DstVT == MVT::i64)
1014
Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1016
Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1018
Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1019
InsertTo64 = DstVT == MVT::i64;
1020
// The result of the load is only i32. It's the subreg_to_reg that makes
1024
} else if (VT == MVT::i8) {
1025
if (ExtType == ISD::SEXTLOAD) {
1026
if (DstVT == MVT::i64)
1027
Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1029
Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1031
Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1032
InsertTo64 = DstVT == MVT::i64;
1033
// The result of the load is only i32. It's the subreg_to_reg that makes
1037
} else if (VT == MVT::f32) {
1038
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1039
} else if (VT == MVT::f64 || VT.is64BitVector()) {
1040
Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1041
} else if (VT.is128BitVector()) {
1042
Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1045
SDValue Chain = LD->getChain();
1046
SDValue Base = LD->getBasePtr();
1047
ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1048
int OffsetVal = (int)OffsetOp->getZExtValue();
1050
SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1051
SDValue Ops[] = { Base, Offset, Chain };
1052
SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1054
// Either way, we're replacing the node, so tell the caller that.
1056
SDValue LoadedVal = SDValue(Res, 1);
1058
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1060
SDValue(CurDAG->getMachineNode(
1061
AArch64::SUBREG_TO_REG, dl, MVT::i64,
1062
CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1067
ReplaceUses(SDValue(N, 0), LoadedVal);
1068
ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1069
ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1074
SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
1075
unsigned Opc, unsigned SubRegIdx) {
1077
EVT VT = N->getValueType(0);
1078
SDValue Chain = N->getOperand(0);
1080
SDValue Ops[] = {N->getOperand(2), // Mem operand;
1083
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1085
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1086
SDValue SuperReg = SDValue(Ld, 0);
1087
for (unsigned i = 0; i < NumVecs; ++i)
1088
ReplaceUses(SDValue(N, i),
1089
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1091
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1095
SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1096
unsigned Opc, unsigned SubRegIdx) {
1098
EVT VT = N->getValueType(0);
1099
SDValue Chain = N->getOperand(0);
1101
SDValue Ops[] = {N->getOperand(1), // Mem operand
1102
N->getOperand(2), // Incremental
1105
const EVT ResTys[] = {MVT::i64, // Type of the write back register
1106
MVT::Untyped, MVT::Other};
1108
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1110
// Update uses of write back register
1111
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1113
// Update uses of vector list
1114
SDValue SuperReg = SDValue(Ld, 1);
1116
ReplaceUses(SDValue(N, 0), SuperReg);
1118
for (unsigned i = 0; i < NumVecs; ++i)
1119
ReplaceUses(SDValue(N, i),
1120
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1123
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1127
SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1130
EVT VT = N->getOperand(2)->getValueType(0);
1132
// Form a REG_SEQUENCE to force register allocation.
1133
bool Is128Bit = VT.getSizeInBits() == 128;
1134
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1135
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1137
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1138
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1143
SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1146
EVT VT = N->getOperand(2)->getValueType(0);
1147
const EVT ResTys[] = {MVT::i64, // Type of the write back register
1148
MVT::Other}; // Type for the Chain
1150
// Form a REG_SEQUENCE to force register allocation.
1151
bool Is128Bit = VT.getSizeInBits() == 128;
1152
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1153
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1155
SDValue Ops[] = {RegSeq,
1156
N->getOperand(NumVecs + 1), // base register
1157
N->getOperand(NumVecs + 2), // Incremental
1158
N->getOperand(0)}; // Chain
1159
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1165
/// WidenVector - Given a value in the V64 register class, produce the
1166
/// equivalent value in the V128 register class.
1171
WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1173
SDValue operator()(SDValue V64Reg) {
1174
EVT VT = V64Reg.getValueType();
1175
unsigned NarrowSize = VT.getVectorNumElements();
1176
MVT EltTy = VT.getVectorElementType().getSimpleVT();
1177
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1181
SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1182
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1187
/// NarrowVector - Given a value in the V128 register class, produce the
1188
/// equivalent value in the V64 register class.
1189
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1190
EVT VT = V128Reg.getValueType();
1191
unsigned WideSize = VT.getVectorNumElements();
1192
MVT EltTy = VT.getVectorElementType().getSimpleVT();
1193
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1195
return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1199
SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1202
EVT VT = N->getValueType(0);
1203
bool Narrow = VT.getSizeInBits() == 64;
1205
// Form a REG_SEQUENCE to force register allocation.
1206
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1209
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1210
WidenVector(*CurDAG));
1212
SDValue RegSeq = createQTuple(Regs);
1214
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1217
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1219
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1220
N->getOperand(NumVecs + 3), N->getOperand(0)};
1221
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1222
SDValue SuperReg = SDValue(Ld, 0);
1224
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1225
static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1227
for (unsigned i = 0; i < NumVecs; ++i) {
1228
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1230
NV = NarrowVector(NV, *CurDAG);
1231
ReplaceUses(SDValue(N, i), NV);
1234
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1239
SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1242
EVT VT = N->getValueType(0);
1243
bool Narrow = VT.getSizeInBits() == 64;
1245
// Form a REG_SEQUENCE to force register allocation.
1246
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1249
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1250
WidenVector(*CurDAG));
1252
SDValue RegSeq = createQTuple(Regs);
1254
const EVT ResTys[] = {MVT::i64, // Type of the write back register
1255
RegSeq->getValueType(0), MVT::Other};
1258
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1260
SDValue Ops[] = {RegSeq,
1261
CurDAG->getTargetConstant(LaneNo, dl,
1262
MVT::i64), // Lane Number
1263
N->getOperand(NumVecs + 2), // Base register
1264
N->getOperand(NumVecs + 3), // Incremental
1266
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1268
// Update uses of the write back register
1269
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1271
// Update uses of the vector list
1272
SDValue SuperReg = SDValue(Ld, 1);
1274
ReplaceUses(SDValue(N, 0),
1275
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1277
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1278
static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1280
for (unsigned i = 0; i < NumVecs; ++i) {
1281
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1284
NV = NarrowVector(NV, *CurDAG);
1285
ReplaceUses(SDValue(N, i), NV);
1290
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1295
SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1298
EVT VT = N->getOperand(2)->getValueType(0);
1299
bool Narrow = VT.getSizeInBits() == 64;
1301
// Form a REG_SEQUENCE to force register allocation.
1302
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1305
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1306
WidenVector(*CurDAG));
1308
SDValue RegSeq = createQTuple(Regs);
1311
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1313
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1314
N->getOperand(NumVecs + 3), N->getOperand(0)};
1315
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1317
// Transfer memoperands.
1318
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1319
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1320
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1325
SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1328
EVT VT = N->getOperand(2)->getValueType(0);
1329
bool Narrow = VT.getSizeInBits() == 64;
1331
// Form a REG_SEQUENCE to force register allocation.
1332
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1335
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1336
WidenVector(*CurDAG));
1338
SDValue RegSeq = createQTuple(Regs);
1340
const EVT ResTys[] = {MVT::i64, // Type of the write back register
1344
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1346
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1347
N->getOperand(NumVecs + 2), // Base Register
1348
N->getOperand(NumVecs + 3), // Incremental
1350
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1352
// Transfer memoperands.
1353
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1354
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1355
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1360
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1361
unsigned &Opc, SDValue &Opd0,
1362
unsigned &LSB, unsigned &MSB,
1363
unsigned NumberOfIgnoredLowBits,
1364
bool BiggerPattern) {
1365
assert(N->getOpcode() == ISD::AND &&
1366
"N must be a AND operation to call this function");
1368
EVT VT = N->getValueType(0);
1370
// Here we can test the type of VT and return false when the type does not
1371
// match, but since it is done prior to that call in the current context
1372
// we turned that into an assert to avoid redundant code.
1373
assert((VT == MVT::i32 || VT == MVT::i64) &&
1374
"Type checking must have been done before calling this function");
1376
// FIXME: simplify-demanded-bits in DAGCombine will probably have
1377
// changed the AND node to a 32-bit mask operation. We'll have to
1378
// undo that as part of the transform here if we want to catch all
1379
// the opportunities.
1380
// Currently the NumberOfIgnoredLowBits argument helps to recover
1381
// form these situations when matching bigger pattern (bitfield insert).
1383
// For unsigned extracts, check for a shift right and mask
1384
uint64_t And_imm = 0;
1385
if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
1388
const SDNode *Op0 = N->getOperand(0).getNode();
1390
// Because of simplify-demanded-bits in DAGCombine, the mask may have been
1391
// simplified. Try to undo that
1392
And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
1394
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
1395
if (And_imm & (And_imm + 1))
1398
bool ClampMSB = false;
1399
uint64_t Srl_imm = 0;
1400
// Handle the SRL + ANY_EXTEND case.
1401
if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1402
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
1403
// Extend the incoming operand of the SRL to 64-bit.
1404
Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1405
// Make sure to clamp the MSB so that we preserve the semantics of the
1406
// original operations.
1408
} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1409
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1411
// If the shift result was truncated, we can still combine them.
1412
Opd0 = Op0->getOperand(0).getOperand(0);
1414
// Use the type of SRL node.
1415
VT = Opd0->getValueType(0);
1416
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
1417
Opd0 = Op0->getOperand(0);
1418
} else if (BiggerPattern) {
1419
// Let's pretend a 0 shift right has been performed.
1420
// The resulting code will be at least as good as the original one
1421
// plus it may expose more opportunities for bitfield insert pattern.
1422
// FIXME: Currently we limit this to the bigger pattern, because
1423
// some optimizations expect AND and not UBFM
1424
Opd0 = N->getOperand(0);
1428
// Bail out on large immediates. This happens when no proper
1429
// combining/constant folding was performed.
1430
if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
1432
<< ": Found large shift immediate, this should not happen\n"));
1437
MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
1438
: countTrailingOnes<uint64_t>(And_imm)) -
1441
// Since we're moving the extend before the right shift operation, we need
1442
// to clamp the MSB to make sure we don't shift in undefined bits instead of
1443
// the zeros which would get shifted in with the original right shift
1445
MSB = MSB > 31 ? 31 : MSB;
1447
Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1451
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1452
SDValue &Opd0, unsigned &LSB,
1454
// We are looking for the following pattern which basically extracts several
1455
// continuous bits from the source value and places it from the LSB of the
1456
// destination value, all other bits of the destination value or set to zero:
1458
// Value2 = AND Value, MaskImm
1459
// SRL Value2, ShiftImm
1461
// with MaskImm >> ShiftImm to search for the bit width.
1463
// This gets selected into a single UBFM:
1465
// UBFM Value, ShiftImm, BitWide + Srl_imm -1
1468
if (N->getOpcode() != ISD::SRL)
1471
uint64_t And_mask = 0;
1472
if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
1475
Opd0 = N->getOperand(0).getOperand(0);
1477
uint64_t Srl_imm = 0;
1478
if (!isIntImmediate(N->getOperand(1), Srl_imm))
1481
// Check whether we really have several bits extract here.
1482
unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
1483
if (BitWide && isMask_64(And_mask >> Srl_imm)) {
1484
if (N->getValueType(0) == MVT::i32)
1485
Opc = AArch64::UBFMWri;
1487
Opc = AArch64::UBFMXri;
1490
MSB = BitWide + Srl_imm - 1;
1497
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1498
unsigned &Immr, unsigned &Imms,
1499
bool BiggerPattern) {
1500
assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1501
"N must be a SHR/SRA operation to call this function");
1503
EVT VT = N->getValueType(0);
1505
// Here we can test the type of VT and return false when the type does not
1506
// match, but since it is done prior to that call in the current context
1507
// we turned that into an assert to avoid redundant code.
1508
assert((VT == MVT::i32 || VT == MVT::i64) &&
1509
"Type checking must have been done before calling this function");
1511
// Check for AND + SRL doing several bits extract.
1512
if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1515
// we're looking for a shift of a shift
1516
uint64_t Shl_imm = 0;
1517
uint64_t Trunc_bits = 0;
1518
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
1519
Opd0 = N->getOperand(0).getOperand(0);
1520
} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1521
N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1522
// We are looking for a shift of truncate. Truncate from i64 to i32 could
1523
// be considered as setting high 32 bits as zero. Our strategy here is to
1524
// always generate 64bit UBFM. This consistency will help the CSE pass
1525
// later find more redundancy.
1526
Opd0 = N->getOperand(0).getOperand(0);
1527
Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1528
VT = Opd0->getValueType(0);
1529
assert(VT == MVT::i64 && "the promoted type should be i64");
1530
} else if (BiggerPattern) {
1531
// Let's pretend a 0 shift left has been performed.
1532
// FIXME: Currently we limit this to the bigger pattern case,
1533
// because some optimizations expect AND and not UBFM
1534
Opd0 = N->getOperand(0);
1538
// Missing combines/constant folding may have left us with strange
1540
if (Shl_imm >= VT.getSizeInBits()) {
1542
<< ": Found large shift immediate, this should not happen\n"));
1546
uint64_t Srl_imm = 0;
1547
if (!isIntImmediate(N->getOperand(1), Srl_imm))
1550
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
1551
"bad amount in shift node!");
1552
int immr = Srl_imm - Shl_imm;
1553
Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1554
Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
1555
// SRA requires a signed extraction
1557
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1559
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1563
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1564
SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1565
unsigned NumberOfIgnoredLowBits = 0,
1566
bool BiggerPattern = false) {
1567
if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1570
switch (N->getOpcode()) {
1572
if (!N->isMachineOpcode())
1576
return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1577
NumberOfIgnoredLowBits, BiggerPattern);
1580
return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1583
unsigned NOpc = N->getMachineOpcode();
1587
case AArch64::SBFMWri:
1588
case AArch64::UBFMWri:
1589
case AArch64::SBFMXri:
1590
case AArch64::UBFMXri:
1592
Opd0 = N->getOperand(0);
1593
Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1594
Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1601
SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
1602
unsigned Opc, Immr, Imms;
1604
if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1607
EVT VT = N->getValueType(0);
1610
// If the bit extract operation is 64bit but the original type is 32bit, we
1611
// need to add one EXTRACT_SUBREG.
1612
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1613
SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1614
CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1616
SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1617
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1618
MachineSDNode *Node =
1619
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
1620
SDValue(BFM, 0), SubReg);
1624
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1625
CurDAG->getTargetConstant(Imms, dl, VT)};
1626
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1629
/// Does DstMask form a complementary pair with the mask provided by
1630
/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1631
/// this asks whether DstMask zeroes precisely those bits that will be set by
1633
static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
1634
unsigned NumberOfIgnoredHighBits, EVT VT) {
1635
assert((VT == MVT::i32 || VT == MVT::i64) &&
1636
"i32 or i64 mask type expected!");
1637
unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1639
APInt SignificantDstMask = APInt(BitWidth, DstMask);
1640
APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1642
return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1643
(SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1646
// Look for bits that will be useful for later uses.
1647
// A bit is consider useless as soon as it is dropped and never used
1648
// before it as been dropped.
1649
// E.g., looking for useful bit of x
1652
// After #1, x useful bits are 0x7, then the useful bits of x, live through
1654
// After #2, the useful bits of x are 0x4.
1655
// However, if x is used on an unpredicatable instruction, then all its bits
1661
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1663
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1666
cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1667
Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1668
UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1669
getUsefulBits(Op, UsefulBits, Depth + 1);
1672
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1673
uint64_t Imm, uint64_t MSB,
1675
// inherit the bitwidth value
1676
APInt OpUsefulBits(UsefulBits);
1680
OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1682
// The interesting part will be in the lower part of the result
1683
getUsefulBits(Op, OpUsefulBits, Depth + 1);
1684
// The interesting part was starting at Imm in the argument
1685
OpUsefulBits = OpUsefulBits.shl(Imm);
1687
OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1689
// The interesting part will be shifted in the result
1690
OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1691
getUsefulBits(Op, OpUsefulBits, Depth + 1);
1692
// The interesting part was at zero in the argument
1693
OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1696
UsefulBits &= OpUsefulBits;
1699
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1702
cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1704
cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1706
getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1709
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1711
uint64_t ShiftTypeAndValue =
1712
cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1713
APInt Mask(UsefulBits);
1714
Mask.clearAllBits();
1717
if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1719
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1720
Mask = Mask.shl(ShiftAmt);
1721
getUsefulBits(Op, Mask, Depth + 1);
1722
Mask = Mask.lshr(ShiftAmt);
1723
} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1725
// We do not handle AArch64_AM::ASR, because the sign will change the
1726
// number of useful bits
1727
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1728
Mask = Mask.lshr(ShiftAmt);
1729
getUsefulBits(Op, Mask, Depth + 1);
1730
Mask = Mask.shl(ShiftAmt);
1737
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1740
cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1742
cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1744
if (Op.getOperand(1) == Orig)
1745
return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1747
APInt OpUsefulBits(UsefulBits);
1751
OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1753
UsefulBits &= ~OpUsefulBits;
1754
getUsefulBits(Op, UsefulBits, Depth + 1);
1756
OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1758
UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
1759
getUsefulBits(Op, UsefulBits, Depth + 1);
1763
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1764
SDValue Orig, unsigned Depth) {
1766
// Users of this node should have already been instruction selected
1767
// FIXME: Can we turn that into an assert?
1768
if (!UserNode->isMachineOpcode())
1771
switch (UserNode->getMachineOpcode()) {
1774
case AArch64::ANDSWri:
1775
case AArch64::ANDSXri:
1776
case AArch64::ANDWri:
1777
case AArch64::ANDXri:
1778
// We increment Depth only when we call the getUsefulBits
1779
return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1781
case AArch64::UBFMWri:
1782
case AArch64::UBFMXri:
1783
return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1785
case AArch64::ORRWrs:
1786
case AArch64::ORRXrs:
1787
if (UserNode->getOperand(1) != Orig)
1789
return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1791
case AArch64::BFMWri:
1792
case AArch64::BFMXri:
1793
return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1797
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1800
// Initialize UsefulBits
1802
unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
1803
// At the beginning, assume every produced bits is useful
1804
UsefulBits = APInt(Bitwidth, 0);
1805
UsefulBits.flipAllBits();
1807
APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1809
for (SDNode *Node : Op.getNode()->uses()) {
1810
// A use cannot produce useful bits
1811
APInt UsefulBitsForUse = APInt(UsefulBits);
1812
getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1813
UsersUsefulBits |= UsefulBitsForUse;
1815
// UsefulBits contains the produced bits that are meaningful for the
1816
// current definition, thus a user cannot make a bit meaningful at
1818
UsefulBits &= UsersUsefulBits;
1821
/// Create a machine node performing a notional SHL of Op by ShlAmount. If
1822
/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
1823
/// 0, return Op unchanged.
1824
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
1828
EVT VT = Op.getValueType();
1830
unsigned BitWidth = VT.getSizeInBits();
1831
unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1834
if (ShlAmount > 0) {
1835
// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
1836
ShiftNode = CurDAG->getMachineNode(
1837
UBFMOpc, dl, VT, Op,
1838
CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
1839
CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
1841
// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
1842
assert(ShlAmount < 0 && "expected right shift");
1843
int ShrAmount = -ShlAmount;
1844
ShiftNode = CurDAG->getMachineNode(
1845
UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
1846
CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
1849
return SDValue(ShiftNode, 0);
1852
/// Does this tree qualify as an attempt to move a bitfield into position,
1853
/// essentially "(and (shl VAL, N), Mask)".
1854
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
1855
SDValue &Src, int &ShiftAmount,
1857
EVT VT = Op.getValueType();
1858
unsigned BitWidth = VT.getSizeInBits();
1860
assert(BitWidth == 32 || BitWidth == 64);
1862
APInt KnownZero, KnownOne;
1863
CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
1865
// Non-zero in the sense that they're not provably zero, which is the key
1866
// point if we want to use this value
1867
uint64_t NonZeroBits = (~KnownZero).getZExtValue();
1869
// Discard a constant AND mask if present. It's safe because the node will
1870
// already have been factored into the computeKnownBits calculation above.
1872
if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
1873
assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
1874
Op = Op.getOperand(0);
1878
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
1880
Op = Op.getOperand(0);
1882
if (!isShiftedMask_64(NonZeroBits))
1885
ShiftAmount = countTrailingZeros(NonZeroBits);
1886
MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
1888
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
1889
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
1891
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
1896
// Given a OR operation, check if we have the following pattern
1897
// ubfm c, b, imm, imm2 (or something that does the same jobs, see
1898
// isBitfieldExtractOp)
1899
// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
1900
// countTrailingZeros(mask2) == imm2 - imm + 1
1902
// if yes, given reference arguments will be update so that one can replace
1903
// the OR instruction with:
1904
// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
1905
static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
1906
SDValue &Src, unsigned &ImmR,
1907
unsigned &ImmS, SelectionDAG *CurDAG) {
1908
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1911
EVT VT = N->getValueType(0);
1913
Opc = AArch64::BFMWri;
1914
else if (VT == MVT::i64)
1915
Opc = AArch64::BFMXri;
1919
// Because of simplify-demanded-bits in DAGCombine, involved masks may not
1920
// have the expected shape. Try to undo that.
1922
getUsefulBits(SDValue(N, 0), UsefulBits);
1924
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
1925
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
1927
// OR is commutative, check both possibilities (does llvm provide a
1928
// way to do that directely, e.g., via code matcher?)
1929
SDValue OrOpd1Val = N->getOperand(1);
1930
SDNode *OrOpd0 = N->getOperand(0).getNode();
1931
SDNode *OrOpd1 = N->getOperand(1).getNode();
1932
for (int i = 0; i < 2;
1933
++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
1936
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
1937
NumberOfIgnoredLowBits, true)) {
1938
// Check that the returned opcode is compatible with the pattern,
1939
// i.e., same type and zero extended (U and not S)
1940
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
1941
(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
1944
// Compute the width of the bitfield insertion
1946
Width = ImmS - ImmR + 1;
1947
// FIXME: This constraint is to catch bitfield insertion we may
1948
// want to widen the pattern if we want to grab general bitfied
1953
// If the mask on the insertee is correct, we have a BFXIL operation. We
1954
// can share the ImmR and ImmS values from the already-computed UBFM.
1955
} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
1957
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
1962
// Check the second part of the pattern
1963
EVT VT = OrOpd1->getValueType(0);
1964
assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
1966
// Compute the Known Zero for the candidate of the first operand.
1967
// This allows to catch more general case than just looking for
1968
// AND with imm. Indeed, simplify-demanded-bits may have removed
1969
// the AND instruction because it proves it was useless.
1970
APInt KnownZero, KnownOne;
1971
CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
1973
// Check if there is enough room for the second operand to appear
1975
APInt BitsToBeInserted =
1976
APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
1978
if ((BitsToBeInserted & ~KnownZero) != 0)
1981
// Set the first operand
1983
if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
1984
isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
1985
// In that case, we can eliminate the AND
1986
Dst = OrOpd1->getOperand(0);
1988
// Maybe the AND has been removed by simplify-demanded-bits
1989
// or is useful because it discards more bits
1999
SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
2000
if (N->getOpcode() != ISD::OR)
2007
if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
2010
EVT VT = N->getValueType(0);
2012
SDValue Ops[] = { Opd0,
2014
CurDAG->getTargetConstant(LSB, dl, VT),
2015
CurDAG->getTargetConstant(MSB, dl, VT) };
2016
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2019
SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
2020
EVT VT = N->getValueType(0);
2023
unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
2025
if (VT == MVT::f32) {
2027
} else if (VT == MVT::f64) {
2030
return nullptr; // Unrecognized argument type. Fall back on default codegen.
2032
// Pick the FRINTX variant needed to set the flags.
2033
unsigned FRINTXOpc = FRINTXOpcs[Variant];
2035
switch (N->getOpcode()) {
2037
return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
2039
unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
2040
Opc = FRINTPOpcs[Variant];
2044
unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
2045
Opc = FRINTMOpcs[Variant];
2049
unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
2050
Opc = FRINTZOpcs[Variant];
2054
unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
2055
Opc = FRINTAOpcs[Variant];
2061
SDValue In = N->getOperand(0);
2062
SmallVector<SDValue, 2> Ops;
2065
if (!TM.Options.UnsafeFPMath) {
2066
SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
2067
Ops.push_back(SDValue(FRINTX, 1));
2070
return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2074
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2075
unsigned RegWidth) {
2077
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2078
FVal = CN->getValueAPF();
2079
else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2080
// Some otherwise illegal constants are allowed in this case.
2081
if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2082
!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2085
ConstantPoolSDNode *CN =
2086
dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2087
FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2091
// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2092
// is between 1 and 32 for a destination w-register, or 1 and 64 for an
2095
// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2096
// want THIS_NODE to be 2^fbits. This is much easier to deal with using
2100
// fbits is between 1 and 64 in the worst-case, which means the fmul
2101
// could have 2^64 as an actual operand. Need 65 bits of precision.
2102
APSInt IntVal(65, true);
2103
FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2105
// N.b. isPowerOf2 also checks for > 0.
2106
if (!IsExact || !IntVal.isPowerOf2()) return false;
2107
unsigned FBits = IntVal.logBase2();
2109
// Checks above should have guaranteed that we haven't lost information in
2110
// finding FBits, but it must still be in range.
2111
if (FBits == 0 || FBits > RegWidth) return false;
2113
FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2117
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2118
// of the string and obtains the integer values from them and combines these
2119
// into a single value to be used in the MRS/MSR instruction.
2120
static int getIntOperandFromRegisterString(StringRef RegString) {
2121
SmallVector<StringRef, 5> Fields;
2122
RegString.split(Fields, ":");
2124
if (Fields.size() == 1)
2127
assert(Fields.size() == 5
2128
&& "Invalid number of fields in read register string");
2130
SmallVector<int, 5> Ops;
2131
bool AllIntFields = true;
2133
for (StringRef Field : Fields) {
2135
AllIntFields &= !Field.getAsInteger(10, IntField);
2136
Ops.push_back(IntField);
2139
assert(AllIntFields &&
2140
"Unexpected non-integer value in special register string.");
2142
// Need to combine the integer fields of the string into a single value
2143
// based on the bit encoding of MRS/MSR instruction.
2144
return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2145
(Ops[3] << 3) | (Ops[4]);
2148
// Lower the read_register intrinsic to an MRS instruction node if the special
2149
// register string argument is either of the form detailed in the ALCE (the
2150
// form described in getIntOperandsFromRegsterString) or is a named register
2151
// known by the MRS SysReg mapper.
2152
SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {
2153
const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2154
const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2157
int Reg = getIntOperandFromRegisterString(RegString->getString());
2159
return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
2161
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2164
// Use the sysreg mapper to map the remaining possible strings to the
2165
// value for the register to be used for the instruction operand.
2166
AArch64SysReg::MRSMapper mapper;
2167
bool IsValidSpecialReg;
2168
Reg = mapper.fromString(RegString->getString(),
2169
Subtarget->getFeatureBits(),
2171
if (IsValidSpecialReg)
2172
return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
2174
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2180
// Lower the write_register intrinsic to an MSR instruction node if the special
2181
// register string argument is either of the form detailed in the ALCE (the
2182
// form described in getIntOperandsFromRegsterString) or is a named register
2183
// known by the MSR SysReg mapper.
2184
SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
2185
const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2186
const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2189
int Reg = getIntOperandFromRegisterString(RegString->getString());
2191
return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2192
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2193
N->getOperand(2), N->getOperand(0));
2195
// Check if the register was one of those allowed as the pstatefield value in
2196
// the MSR (immediate) instruction. To accept the values allowed in the
2197
// pstatefield for the MSR (immediate) instruction, we also require that an
2198
// immediate value has been provided as an argument, we know that this is
2199
// the case as it has been ensured by semantic checking.
2200
AArch64PState::PStateMapper PMapper;
2201
bool IsValidSpecialReg;
2202
Reg = PMapper.fromString(RegString->getString(),
2203
Subtarget->getFeatureBits(),
2205
if (IsValidSpecialReg) {
2206
assert (isa<ConstantSDNode>(N->getOperand(2))
2207
&& "Expected a constant integer expression.");
2208
uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2209
return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
2210
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2211
CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2215
// Use the sysreg mapper to attempt to map the remaining possible strings
2216
// to the value for the register to be used for the MSR (register)
2217
// instruction operand.
2218
AArch64SysReg::MSRMapper Mapper;
2219
Reg = Mapper.fromString(RegString->getString(),
2220
Subtarget->getFeatureBits(),
2223
if (IsValidSpecialReg)
2224
return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2225
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2226
N->getOperand(2), N->getOperand(0));
2231
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
2232
// Dump information about the Node being selected
2233
DEBUG(errs() << "Selecting: ");
2234
DEBUG(Node->dump(CurDAG));
2235
DEBUG(errs() << "\n");
2237
// If we have a custom node, we already have selected!
2238
if (Node->isMachineOpcode()) {
2239
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2240
Node->setNodeId(-1);
2244
// Few custom selection stuff.
2245
SDNode *ResNode = nullptr;
2246
EVT VT = Node->getValueType(0);
2248
switch (Node->getOpcode()) {
2252
case ISD::READ_REGISTER:
2253
if (SDNode *Res = SelectReadRegister(Node))
2257
case ISD::WRITE_REGISTER:
2258
if (SDNode *Res = SelectWriteRegister(Node))
2263
if (SDNode *I = SelectMLAV64LaneV128(Node))
2268
// Try to select as an indexed load. Fall through to normal processing
2271
SDNode *I = SelectIndexedLoad(Node, Done);
2280
if (SDNode *I = SelectBitfieldExtractOp(Node))
2285
if (SDNode *I = SelectBitfieldInsertOp(Node))
2289
case ISD::EXTRACT_VECTOR_ELT: {
2290
// Extracting lane zero is a special case where we can just use a plain
2291
// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2292
// the rest of the compiler, especially the register allocator and copyi
2293
// propagation, to reason about, so is preferred when it's possible to
2295
ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2296
// Bail and use the default Select() for non-zero lanes.
2297
if (LaneNode->getZExtValue() != 0)
2299
// If the element type is not the same as the result type, likewise
2300
// bail and use the default Select(), as there's more to do than just
2301
// a cross-class COPY. This catches extracts of i8 and i16 elements
2302
// since they will need an explicit zext.
2303
if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2306
switch (Node->getOperand(0)
2308
.getVectorElementType()
2311
llvm_unreachable("Unexpected vector element type!");
2313
SubReg = AArch64::dsub;
2316
SubReg = AArch64::ssub;
2319
SubReg = AArch64::hsub;
2322
llvm_unreachable("unexpected zext-requiring extract element!");
2324
SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2325
Node->getOperand(0));
2326
DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2327
DEBUG(Extract->dumpr(CurDAG));
2328
DEBUG(dbgs() << "\n");
2329
return Extract.getNode();
2331
case ISD::Constant: {
2332
// Materialize zero constants as copies from WZR/XZR. This allows
2333
// the coalescer to propagate these into other instructions.
2334
ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2335
if (ConstNode->isNullValue()) {
2337
return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2338
AArch64::WZR, MVT::i32).getNode();
2339
else if (VT == MVT::i64)
2340
return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2341
AArch64::XZR, MVT::i64).getNode();
2346
case ISD::FrameIndex: {
2347
// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2348
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2349
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2350
const TargetLowering *TLI = getTargetLowering();
2351
SDValue TFI = CurDAG->getTargetFrameIndex(
2352
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2354
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2355
CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2356
return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2358
case ISD::INTRINSIC_W_CHAIN: {
2359
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2363
case Intrinsic::aarch64_ldaxp:
2364
case Intrinsic::aarch64_ldxp: {
2366
IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2367
SDValue MemAddr = Node->getOperand(2);
2369
SDValue Chain = Node->getOperand(0);
2371
SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2372
MVT::Other, MemAddr, Chain);
2374
// Transfer memoperands.
2375
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2376
MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2377
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2380
case Intrinsic::aarch64_stlxp:
2381
case Intrinsic::aarch64_stxp: {
2383
IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2385
SDValue Chain = Node->getOperand(0);
2386
SDValue ValLo = Node->getOperand(2);
2387
SDValue ValHi = Node->getOperand(3);
2388
SDValue MemAddr = Node->getOperand(4);
2390
// Place arguments in the right order.
2391
SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2393
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2394
// Transfer memoperands.
2395
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2396
MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2397
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2401
case Intrinsic::aarch64_neon_ld1x2:
2402
if (VT == MVT::v8i8)
2403
return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2404
else if (VT == MVT::v16i8)
2405
return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2406
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2407
return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2408
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2409
return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2410
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2411
return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2412
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2413
return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2414
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2415
return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2416
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2417
return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2419
case Intrinsic::aarch64_neon_ld1x3:
2420
if (VT == MVT::v8i8)
2421
return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2422
else if (VT == MVT::v16i8)
2423
return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2424
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2425
return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2426
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2427
return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2428
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2429
return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2430
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2431
return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2432
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2433
return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2434
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2435
return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2437
case Intrinsic::aarch64_neon_ld1x4:
2438
if (VT == MVT::v8i8)
2439
return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2440
else if (VT == MVT::v16i8)
2441
return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2442
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2443
return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2444
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2445
return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2446
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2447
return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2448
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2449
return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2450
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2451
return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2452
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2453
return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2455
case Intrinsic::aarch64_neon_ld2:
2456
if (VT == MVT::v8i8)
2457
return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2458
else if (VT == MVT::v16i8)
2459
return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2460
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2461
return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2462
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2463
return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2464
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2465
return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2466
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2467
return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2468
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2469
return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2470
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2471
return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2473
case Intrinsic::aarch64_neon_ld3:
2474
if (VT == MVT::v8i8)
2475
return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2476
else if (VT == MVT::v16i8)
2477
return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2478
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2479
return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2480
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2481
return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2482
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2483
return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2484
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2485
return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2486
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2487
return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2488
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2489
return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2491
case Intrinsic::aarch64_neon_ld4:
2492
if (VT == MVT::v8i8)
2493
return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2494
else if (VT == MVT::v16i8)
2495
return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2496
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2497
return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2498
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2499
return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2500
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2501
return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2502
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2503
return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2504
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2505
return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2506
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2507
return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
2509
case Intrinsic::aarch64_neon_ld2r:
2510
if (VT == MVT::v8i8)
2511
return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
2512
else if (VT == MVT::v16i8)
2513
return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
2514
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2515
return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
2516
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2517
return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
2518
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2519
return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
2520
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2521
return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
2522
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2523
return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
2524
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2525
return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
2527
case Intrinsic::aarch64_neon_ld3r:
2528
if (VT == MVT::v8i8)
2529
return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
2530
else if (VT == MVT::v16i8)
2531
return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
2532
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2533
return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
2534
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2535
return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
2536
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2537
return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
2538
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2539
return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
2540
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2541
return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
2542
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2543
return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
2545
case Intrinsic::aarch64_neon_ld4r:
2546
if (VT == MVT::v8i8)
2547
return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
2548
else if (VT == MVT::v16i8)
2549
return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
2550
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2551
return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
2552
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2553
return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
2554
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2555
return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
2556
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2557
return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
2558
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2559
return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
2560
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2561
return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
2563
case Intrinsic::aarch64_neon_ld2lane:
2564
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2565
return SelectLoadLane(Node, 2, AArch64::LD2i8);
2566
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2568
return SelectLoadLane(Node, 2, AArch64::LD2i16);
2569
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2571
return SelectLoadLane(Node, 2, AArch64::LD2i32);
2572
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2574
return SelectLoadLane(Node, 2, AArch64::LD2i64);
2576
case Intrinsic::aarch64_neon_ld3lane:
2577
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2578
return SelectLoadLane(Node, 3, AArch64::LD3i8);
2579
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2581
return SelectLoadLane(Node, 3, AArch64::LD3i16);
2582
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2584
return SelectLoadLane(Node, 3, AArch64::LD3i32);
2585
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2587
return SelectLoadLane(Node, 3, AArch64::LD3i64);
2589
case Intrinsic::aarch64_neon_ld4lane:
2590
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2591
return SelectLoadLane(Node, 4, AArch64::LD4i8);
2592
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2594
return SelectLoadLane(Node, 4, AArch64::LD4i16);
2595
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2597
return SelectLoadLane(Node, 4, AArch64::LD4i32);
2598
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2600
return SelectLoadLane(Node, 4, AArch64::LD4i64);
2604
case ISD::INTRINSIC_WO_CHAIN: {
2605
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
2609
case Intrinsic::aarch64_neon_tbl2:
2610
return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
2611
: AArch64::TBLv16i8Two,
2613
case Intrinsic::aarch64_neon_tbl3:
2614
return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
2615
: AArch64::TBLv16i8Three,
2617
case Intrinsic::aarch64_neon_tbl4:
2618
return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
2619
: AArch64::TBLv16i8Four,
2621
case Intrinsic::aarch64_neon_tbx2:
2622
return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
2623
: AArch64::TBXv16i8Two,
2625
case Intrinsic::aarch64_neon_tbx3:
2626
return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
2627
: AArch64::TBXv16i8Three,
2629
case Intrinsic::aarch64_neon_tbx4:
2630
return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
2631
: AArch64::TBXv16i8Four,
2633
case Intrinsic::aarch64_neon_smull:
2634
case Intrinsic::aarch64_neon_umull:
2635
if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
2641
case ISD::INTRINSIC_VOID: {
2642
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2643
if (Node->getNumOperands() >= 3)
2644
VT = Node->getOperand(2)->getValueType(0);
2648
case Intrinsic::aarch64_neon_st1x2: {
2649
if (VT == MVT::v8i8)
2650
return SelectStore(Node, 2, AArch64::ST1Twov8b);
2651
else if (VT == MVT::v16i8)
2652
return SelectStore(Node, 2, AArch64::ST1Twov16b);
2653
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2654
return SelectStore(Node, 2, AArch64::ST1Twov4h);
2655
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2656
return SelectStore(Node, 2, AArch64::ST1Twov8h);
2657
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2658
return SelectStore(Node, 2, AArch64::ST1Twov2s);
2659
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2660
return SelectStore(Node, 2, AArch64::ST1Twov4s);
2661
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2662
return SelectStore(Node, 2, AArch64::ST1Twov2d);
2663
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2664
return SelectStore(Node, 2, AArch64::ST1Twov1d);
2667
case Intrinsic::aarch64_neon_st1x3: {
2668
if (VT == MVT::v8i8)
2669
return SelectStore(Node, 3, AArch64::ST1Threev8b);
2670
else if (VT == MVT::v16i8)
2671
return SelectStore(Node, 3, AArch64::ST1Threev16b);
2672
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2673
return SelectStore(Node, 3, AArch64::ST1Threev4h);
2674
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2675
return SelectStore(Node, 3, AArch64::ST1Threev8h);
2676
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2677
return SelectStore(Node, 3, AArch64::ST1Threev2s);
2678
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2679
return SelectStore(Node, 3, AArch64::ST1Threev4s);
2680
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2681
return SelectStore(Node, 3, AArch64::ST1Threev2d);
2682
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2683
return SelectStore(Node, 3, AArch64::ST1Threev1d);
2686
case Intrinsic::aarch64_neon_st1x4: {
2687
if (VT == MVT::v8i8)
2688
return SelectStore(Node, 4, AArch64::ST1Fourv8b);
2689
else if (VT == MVT::v16i8)
2690
return SelectStore(Node, 4, AArch64::ST1Fourv16b);
2691
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2692
return SelectStore(Node, 4, AArch64::ST1Fourv4h);
2693
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2694
return SelectStore(Node, 4, AArch64::ST1Fourv8h);
2695
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2696
return SelectStore(Node, 4, AArch64::ST1Fourv2s);
2697
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2698
return SelectStore(Node, 4, AArch64::ST1Fourv4s);
2699
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2700
return SelectStore(Node, 4, AArch64::ST1Fourv2d);
2701
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2702
return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2705
case Intrinsic::aarch64_neon_st2: {
2706
if (VT == MVT::v8i8)
2707
return SelectStore(Node, 2, AArch64::ST2Twov8b);
2708
else if (VT == MVT::v16i8)
2709
return SelectStore(Node, 2, AArch64::ST2Twov16b);
2710
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2711
return SelectStore(Node, 2, AArch64::ST2Twov4h);
2712
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2713
return SelectStore(Node, 2, AArch64::ST2Twov8h);
2714
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2715
return SelectStore(Node, 2, AArch64::ST2Twov2s);
2716
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2717
return SelectStore(Node, 2, AArch64::ST2Twov4s);
2718
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2719
return SelectStore(Node, 2, AArch64::ST2Twov2d);
2720
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2721
return SelectStore(Node, 2, AArch64::ST1Twov1d);
2724
case Intrinsic::aarch64_neon_st3: {
2725
if (VT == MVT::v8i8)
2726
return SelectStore(Node, 3, AArch64::ST3Threev8b);
2727
else if (VT == MVT::v16i8)
2728
return SelectStore(Node, 3, AArch64::ST3Threev16b);
2729
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2730
return SelectStore(Node, 3, AArch64::ST3Threev4h);
2731
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2732
return SelectStore(Node, 3, AArch64::ST3Threev8h);
2733
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2734
return SelectStore(Node, 3, AArch64::ST3Threev2s);
2735
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2736
return SelectStore(Node, 3, AArch64::ST3Threev4s);
2737
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2738
return SelectStore(Node, 3, AArch64::ST3Threev2d);
2739
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2740
return SelectStore(Node, 3, AArch64::ST1Threev1d);
2743
case Intrinsic::aarch64_neon_st4: {
2744
if (VT == MVT::v8i8)
2745
return SelectStore(Node, 4, AArch64::ST4Fourv8b);
2746
else if (VT == MVT::v16i8)
2747
return SelectStore(Node, 4, AArch64::ST4Fourv16b);
2748
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2749
return SelectStore(Node, 4, AArch64::ST4Fourv4h);
2750
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2751
return SelectStore(Node, 4, AArch64::ST4Fourv8h);
2752
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2753
return SelectStore(Node, 4, AArch64::ST4Fourv2s);
2754
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2755
return SelectStore(Node, 4, AArch64::ST4Fourv4s);
2756
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2757
return SelectStore(Node, 4, AArch64::ST4Fourv2d);
2758
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2759
return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2762
case Intrinsic::aarch64_neon_st2lane: {
2763
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2764
return SelectStoreLane(Node, 2, AArch64::ST2i8);
2765
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2767
return SelectStoreLane(Node, 2, AArch64::ST2i16);
2768
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2770
return SelectStoreLane(Node, 2, AArch64::ST2i32);
2771
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2773
return SelectStoreLane(Node, 2, AArch64::ST2i64);
2776
case Intrinsic::aarch64_neon_st3lane: {
2777
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2778
return SelectStoreLane(Node, 3, AArch64::ST3i8);
2779
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2781
return SelectStoreLane(Node, 3, AArch64::ST3i16);
2782
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2784
return SelectStoreLane(Node, 3, AArch64::ST3i32);
2785
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2787
return SelectStoreLane(Node, 3, AArch64::ST3i64);
2790
case Intrinsic::aarch64_neon_st4lane: {
2791
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2792
return SelectStoreLane(Node, 4, AArch64::ST4i8);
2793
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2795
return SelectStoreLane(Node, 4, AArch64::ST4i16);
2796
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2798
return SelectStoreLane(Node, 4, AArch64::ST4i32);
2799
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2801
return SelectStoreLane(Node, 4, AArch64::ST4i64);
2806
case AArch64ISD::LD2post: {
2807
if (VT == MVT::v8i8)
2808
return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
2809
else if (VT == MVT::v16i8)
2810
return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
2811
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2812
return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
2813
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2814
return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
2815
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2816
return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
2817
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2818
return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
2819
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2820
return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2821
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2822
return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
2825
case AArch64ISD::LD3post: {
2826
if (VT == MVT::v8i8)
2827
return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
2828
else if (VT == MVT::v16i8)
2829
return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
2830
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2831
return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
2832
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2833
return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
2834
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2835
return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
2836
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2837
return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
2838
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2839
return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2840
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2841
return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
2844
case AArch64ISD::LD4post: {
2845
if (VT == MVT::v8i8)
2846
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
2847
else if (VT == MVT::v16i8)
2848
return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
2849
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2850
return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
2851
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2852
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
2853
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2854
return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
2855
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2856
return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
2857
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2858
return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2859
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2860
return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
2863
case AArch64ISD::LD1x2post: {
2864
if (VT == MVT::v8i8)
2865
return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
2866
else if (VT == MVT::v16i8)
2867
return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
2868
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2869
return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
2870
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2871
return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
2872
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2873
return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
2874
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2875
return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
2876
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2877
return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2878
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2879
return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
2882
case AArch64ISD::LD1x3post: {
2883
if (VT == MVT::v8i8)
2884
return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
2885
else if (VT == MVT::v16i8)
2886
return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
2887
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2888
return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
2889
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2890
return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
2891
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2892
return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
2893
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2894
return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
2895
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2896
return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2897
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2898
return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
2901
case AArch64ISD::LD1x4post: {
2902
if (VT == MVT::v8i8)
2903
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
2904
else if (VT == MVT::v16i8)
2905
return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
2906
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2907
return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
2908
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2909
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
2910
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2911
return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
2912
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2913
return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
2914
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2915
return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2916
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2917
return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
2920
case AArch64ISD::LD1DUPpost: {
2921
if (VT == MVT::v8i8)
2922
return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
2923
else if (VT == MVT::v16i8)
2924
return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
2925
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2926
return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
2927
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2928
return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
2929
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2930
return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
2931
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2932
return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
2933
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2934
return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
2935
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2936
return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
2939
case AArch64ISD::LD2DUPpost: {
2940
if (VT == MVT::v8i8)
2941
return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
2942
else if (VT == MVT::v16i8)
2943
return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
2944
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2945
return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
2946
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2947
return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
2948
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2949
return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
2950
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2951
return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
2952
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2953
return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
2954
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2955
return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
2958
case AArch64ISD::LD3DUPpost: {
2959
if (VT == MVT::v8i8)
2960
return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
2961
else if (VT == MVT::v16i8)
2962
return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
2963
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2964
return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
2965
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2966
return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
2967
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2968
return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
2969
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2970
return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
2971
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2972
return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
2973
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2974
return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
2977
case AArch64ISD::LD4DUPpost: {
2978
if (VT == MVT::v8i8)
2979
return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
2980
else if (VT == MVT::v16i8)
2981
return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
2982
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2983
return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
2984
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2985
return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
2986
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2987
return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
2988
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2989
return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
2990
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2991
return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
2992
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2993
return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
2996
case AArch64ISD::LD1LANEpost: {
2997
if (VT == MVT::v16i8 || VT == MVT::v8i8)
2998
return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
2999
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3001
return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3002
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3004
return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3005
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3007
return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3010
case AArch64ISD::LD2LANEpost: {
3011
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3012
return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3013
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3015
return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3016
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3018
return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3019
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3021
return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3024
case AArch64ISD::LD3LANEpost: {
3025
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3026
return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3027
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3029
return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3030
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3032
return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3033
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3035
return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3038
case AArch64ISD::LD4LANEpost: {
3039
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3040
return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3041
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3043
return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3044
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3046
return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3047
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3049
return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3052
case AArch64ISD::ST2post: {
3053
VT = Node->getOperand(1).getValueType();
3054
if (VT == MVT::v8i8)
3055
return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3056
else if (VT == MVT::v16i8)
3057
return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3058
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3059
return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3060
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3061
return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3062
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3063
return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3064
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3065
return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3066
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3067
return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3068
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3069
return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3072
case AArch64ISD::ST3post: {
3073
VT = Node->getOperand(1).getValueType();
3074
if (VT == MVT::v8i8)
3075
return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3076
else if (VT == MVT::v16i8)
3077
return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3078
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3079
return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3080
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3081
return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3082
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3083
return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3084
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3085
return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3086
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3087
return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3088
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3089
return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3092
case AArch64ISD::ST4post: {
3093
VT = Node->getOperand(1).getValueType();
3094
if (VT == MVT::v8i8)
3095
return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3096
else if (VT == MVT::v16i8)
3097
return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3098
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3099
return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3100
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3101
return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3102
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3103
return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3104
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3105
return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3106
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3107
return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3108
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3109
return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3112
case AArch64ISD::ST1x2post: {
3113
VT = Node->getOperand(1).getValueType();
3114
if (VT == MVT::v8i8)
3115
return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3116
else if (VT == MVT::v16i8)
3117
return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3118
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3119
return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3120
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3121
return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3122
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3123
return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3124
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3125
return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3126
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3127
return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3128
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3129
return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3132
case AArch64ISD::ST1x3post: {
3133
VT = Node->getOperand(1).getValueType();
3134
if (VT == MVT::v8i8)
3135
return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3136
else if (VT == MVT::v16i8)
3137
return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
3138
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3139
return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
3140
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3141
return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
3142
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3143
return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
3144
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3145
return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
3146
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3147
return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3148
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3149
return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
3152
case AArch64ISD::ST1x4post: {
3153
VT = Node->getOperand(1).getValueType();
3154
if (VT == MVT::v8i8)
3155
return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
3156
else if (VT == MVT::v16i8)
3157
return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
3158
else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3159
return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
3160
else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3161
return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
3162
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3163
return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
3164
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3165
return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
3166
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3167
return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3168
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3169
return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
3172
case AArch64ISD::ST2LANEpost: {
3173
VT = Node->getOperand(1).getValueType();
3174
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3175
return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
3176
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3178
return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
3179
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3181
return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
3182
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3184
return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
3187
case AArch64ISD::ST3LANEpost: {
3188
VT = Node->getOperand(1).getValueType();
3189
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3190
return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
3191
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3193
return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
3194
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3196
return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
3197
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3199
return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
3202
case AArch64ISD::ST4LANEpost: {
3203
VT = Node->getOperand(1).getValueType();
3204
if (VT == MVT::v16i8 || VT == MVT::v8i8)
3205
return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
3206
else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3208
return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
3209
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3211
return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
3212
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3214
return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
3222
if (SDNode *I = SelectLIBM(Node))
3227
// Select the default instruction
3228
ResNode = SelectCode(Node);
3230
DEBUG(errs() << "=> ");
3231
if (ResNode == nullptr || ResNode == Node)
3232
DEBUG(Node->dump(CurDAG));
3234
DEBUG(ResNode->dump(CurDAG));
3235
DEBUG(errs() << "\n");
3240
/// createAArch64ISelDag - This pass converts a legalized DAG into a
3241
/// AArch64-specific DAG, ready for instruction scheduling.
3242
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
3243
CodeGenOpt::Level OptLevel) {
3244
return new AArch64DAGToDAGISel(TM, OptLevel);