1
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2
// The LLVM Compiler Infrastructure
4
// This file is distributed under the University of Illinois Open Source
5
// License. See LICENSE.TXT for details.
7
//===----------------------------------------------------------------------===//
9
// This file implements the SPUTargetLowering class.
11
//===----------------------------------------------------------------------===//
13
#include "SPUISelLowering.h"
14
#include "SPUTargetMachine.h"
15
#include "SPUFrameLowering.h"
16
#include "SPUMachineFunction.h"
17
#include "llvm/Constants.h"
18
#include "llvm/Function.h"
19
#include "llvm/Intrinsics.h"
20
#include "llvm/CallingConv.h"
21
#include "llvm/Type.h"
22
#include "llvm/CodeGen/CallingConvLower.h"
23
#include "llvm/CodeGen/MachineFrameInfo.h"
24
#include "llvm/CodeGen/MachineFunction.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineRegisterInfo.h"
27
#include "llvm/CodeGen/SelectionDAG.h"
28
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29
#include "llvm/Target/TargetOptions.h"
30
#include "llvm/Support/Debug.h"
31
#include "llvm/Support/ErrorHandling.h"
32
#include "llvm/Support/MathExtras.h"
33
#include "llvm/Support/raw_ostream.h"
38
// Byte offset of the preferred slot (counted from the MSB)
39
int prefslotOffset(EVT VT) {
41
if (VT==MVT::i1) retval=3;
42
if (VT==MVT::i8) retval=3;
43
if (VT==MVT::i16) retval=2;
48
//! Expand a library call into an actual call DAG node
51
This code is taken from SelectionDAGLegalize, since it is not exposed as
52
part of the LLVM SelectionDAG API.
56
ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
57
bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
58
// The input chain to this libcall is the entry node of the function.
59
// Legalizing the call will automatically add the previous call to the
61
SDValue InChain = DAG.getEntryNode();
63
TargetLowering::ArgListTy Args;
64
TargetLowering::ArgListEntry Entry;
65
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
66
EVT ArgVT = Op.getOperand(i).getValueType();
67
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
68
Entry.Node = Op.getOperand(i);
70
Entry.isSExt = isSigned;
71
Entry.isZExt = !isSigned;
72
Args.push_back(Entry);
74
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
77
// Splice the libcall in wherever FindInputOutputChains tells us to.
79
Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
80
std::pair<SDValue, SDValue> CallInfo =
81
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
82
0, TLI.getLibcallCallingConv(LC),
84
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
85
Callee, Args, DAG, Op.getDebugLoc());
87
return CallInfo.first;
91
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
92
: TargetLowering(TM, new TargetLoweringObjectFileELF()),
95
// Use _setjmp/_longjmp instead of setjmp/longjmp.
96
setUseUnderscoreSetJmp(true);
97
setUseUnderscoreLongJmp(true);
99
// Set RTLIB libcall names as used by SPU:
100
setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
102
// Set up the SPU's register classes:
103
addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
104
addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
105
addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
106
addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
107
addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
108
addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
109
addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
111
// SPU has no sign or zero extended loads for i1, i8, i16:
112
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
113
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
114
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
116
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
117
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
119
setTruncStoreAction(MVT::i128, MVT::i64, Expand);
120
setTruncStoreAction(MVT::i128, MVT::i32, Expand);
121
setTruncStoreAction(MVT::i128, MVT::i16, Expand);
122
setTruncStoreAction(MVT::i128, MVT::i8, Expand);
124
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
126
// SPU constant load actions are custom lowered:
127
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
128
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
130
// SPU's loads and stores have to be custom lowered:
131
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
133
MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
135
setOperationAction(ISD::LOAD, VT, Custom);
136
setOperationAction(ISD::STORE, VT, Custom);
137
setLoadExtAction(ISD::EXTLOAD, VT, Custom);
138
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
139
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
141
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
142
MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
143
setTruncStoreAction(VT, StoreVT, Expand);
147
for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
149
MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
151
setOperationAction(ISD::LOAD, VT, Custom);
152
setOperationAction(ISD::STORE, VT, Custom);
154
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
155
MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
156
setTruncStoreAction(VT, StoreVT, Expand);
160
// Expand the jumptable branches
161
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
162
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
164
// Custom lower SELECT_CC for most cases, but expand by default
165
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
166
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
167
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
168
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
169
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
171
// SPU has no intrinsics for these particular operations:
172
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
173
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
175
// SPU has no division/remainder instructions
176
setOperationAction(ISD::SREM, MVT::i8, Expand);
177
setOperationAction(ISD::UREM, MVT::i8, Expand);
178
setOperationAction(ISD::SDIV, MVT::i8, Expand);
179
setOperationAction(ISD::UDIV, MVT::i8, Expand);
180
setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
181
setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
182
setOperationAction(ISD::SREM, MVT::i16, Expand);
183
setOperationAction(ISD::UREM, MVT::i16, Expand);
184
setOperationAction(ISD::SDIV, MVT::i16, Expand);
185
setOperationAction(ISD::UDIV, MVT::i16, Expand);
186
setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
187
setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
188
setOperationAction(ISD::SREM, MVT::i32, Expand);
189
setOperationAction(ISD::UREM, MVT::i32, Expand);
190
setOperationAction(ISD::SDIV, MVT::i32, Expand);
191
setOperationAction(ISD::UDIV, MVT::i32, Expand);
192
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
193
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
194
setOperationAction(ISD::SREM, MVT::i64, Expand);
195
setOperationAction(ISD::UREM, MVT::i64, Expand);
196
setOperationAction(ISD::SDIV, MVT::i64, Expand);
197
setOperationAction(ISD::UDIV, MVT::i64, Expand);
198
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
199
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
200
setOperationAction(ISD::SREM, MVT::i128, Expand);
201
setOperationAction(ISD::UREM, MVT::i128, Expand);
202
setOperationAction(ISD::SDIV, MVT::i128, Expand);
203
setOperationAction(ISD::UDIV, MVT::i128, Expand);
204
setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
205
setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
207
// We don't support sin/cos/sqrt/fmod
208
setOperationAction(ISD::FSIN , MVT::f64, Expand);
209
setOperationAction(ISD::FCOS , MVT::f64, Expand);
210
setOperationAction(ISD::FREM , MVT::f64, Expand);
211
setOperationAction(ISD::FSIN , MVT::f32, Expand);
212
setOperationAction(ISD::FCOS , MVT::f32, Expand);
213
setOperationAction(ISD::FREM , MVT::f32, Expand);
215
// Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
217
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
218
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
220
setOperationAction(ISD::FMA, MVT::f64, Expand);
221
setOperationAction(ISD::FMA, MVT::f32, Expand);
223
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
224
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
226
// SPU can do rotate right and left, so legalize it... but customize for i8
227
// because instructions don't exist.
229
// FIXME: Change from "expand" to appropriate type once ROTR is supported in
231
setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
232
setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
233
setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
235
setOperationAction(ISD::ROTL, MVT::i32, Legal);
236
setOperationAction(ISD::ROTL, MVT::i16, Legal);
237
setOperationAction(ISD::ROTL, MVT::i8, Custom);
239
// SPU has no native version of shift left/right for i8
240
setOperationAction(ISD::SHL, MVT::i8, Custom);
241
setOperationAction(ISD::SRL, MVT::i8, Custom);
242
setOperationAction(ISD::SRA, MVT::i8, Custom);
244
// Make these operations legal and handle them during instruction selection:
245
setOperationAction(ISD::SHL, MVT::i64, Legal);
246
setOperationAction(ISD::SRL, MVT::i64, Legal);
247
setOperationAction(ISD::SRA, MVT::i64, Legal);
249
// Custom lower i8, i32 and i64 multiplications
250
setOperationAction(ISD::MUL, MVT::i8, Custom);
251
setOperationAction(ISD::MUL, MVT::i32, Legal);
252
setOperationAction(ISD::MUL, MVT::i64, Legal);
254
// Expand double-width multiplication
255
// FIXME: It would probably be reasonable to support some of these operations
256
setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
257
setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
258
setOperationAction(ISD::MULHU, MVT::i8, Expand);
259
setOperationAction(ISD::MULHS, MVT::i8, Expand);
260
setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
261
setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
262
setOperationAction(ISD::MULHU, MVT::i16, Expand);
263
setOperationAction(ISD::MULHS, MVT::i16, Expand);
264
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
265
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
266
setOperationAction(ISD::MULHU, MVT::i32, Expand);
267
setOperationAction(ISD::MULHS, MVT::i32, Expand);
268
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
269
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
270
setOperationAction(ISD::MULHU, MVT::i64, Expand);
271
setOperationAction(ISD::MULHS, MVT::i64, Expand);
273
// Need to custom handle (some) common i8, i64 math ops
274
setOperationAction(ISD::ADD, MVT::i8, Custom);
275
setOperationAction(ISD::ADD, MVT::i64, Legal);
276
setOperationAction(ISD::SUB, MVT::i8, Custom);
277
setOperationAction(ISD::SUB, MVT::i64, Legal);
279
// SPU does not have BSWAP. It does have i32 support CTLZ.
280
// CTPOP has to be custom lowered.
281
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
282
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
284
setOperationAction(ISD::CTPOP, MVT::i8, Custom);
285
setOperationAction(ISD::CTPOP, MVT::i16, Custom);
286
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
287
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
288
setOperationAction(ISD::CTPOP, MVT::i128, Expand);
290
setOperationAction(ISD::CTTZ , MVT::i8, Expand);
291
setOperationAction(ISD::CTTZ , MVT::i16, Expand);
292
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
293
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
294
setOperationAction(ISD::CTTZ , MVT::i128, Expand);
295
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
296
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
297
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
298
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
299
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
301
setOperationAction(ISD::CTLZ , MVT::i8, Promote);
302
setOperationAction(ISD::CTLZ , MVT::i16, Promote);
303
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
304
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
305
setOperationAction(ISD::CTLZ , MVT::i128, Expand);
306
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
307
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
308
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
309
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
310
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
312
// SPU has a version of select that implements (a&~c)|(b&c), just like
313
// select ought to work:
314
setOperationAction(ISD::SELECT, MVT::i8, Legal);
315
setOperationAction(ISD::SELECT, MVT::i16, Legal);
316
setOperationAction(ISD::SELECT, MVT::i32, Legal);
317
setOperationAction(ISD::SELECT, MVT::i64, Legal);
319
setOperationAction(ISD::SETCC, MVT::i8, Legal);
320
setOperationAction(ISD::SETCC, MVT::i16, Legal);
321
setOperationAction(ISD::SETCC, MVT::i32, Legal);
322
setOperationAction(ISD::SETCC, MVT::i64, Legal);
323
setOperationAction(ISD::SETCC, MVT::f64, Custom);
325
// Custom lower i128 -> i64 truncates
326
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
328
// Custom lower i32/i64 -> i128 sign extend
329
setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
331
setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
332
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
333
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
334
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
335
// SPU has a legal FP -> signed INT instruction for f32, but for f64, need
336
// to expand to a libcall, hence the custom lowering:
337
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
338
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
339
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
340
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
341
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
342
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
344
// FDIV on SPU requires custom lowering
345
setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
347
// SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
348
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
349
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
350
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
351
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
352
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
353
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
354
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
355
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
357
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
358
setOperationAction(ISD::BITCAST, MVT::f32, Legal);
359
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
360
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
362
// We cannot sextinreg(i1). Expand to shifts.
363
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
365
// We want to legalize GlobalAddress and ConstantPool nodes into the
366
// appropriate instructions to materialize the address.
367
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
369
MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
371
setOperationAction(ISD::GlobalAddress, VT, Custom);
372
setOperationAction(ISD::ConstantPool, VT, Custom);
373
setOperationAction(ISD::JumpTable, VT, Custom);
376
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
377
setOperationAction(ISD::VASTART , MVT::Other, Custom);
379
// Use the default implementation.
380
setOperationAction(ISD::VAARG , MVT::Other, Expand);
381
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
382
setOperationAction(ISD::VAEND , MVT::Other, Expand);
383
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
384
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
385
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
386
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
388
// Cell SPU has instructions for converting between i64 and fp.
389
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
390
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
392
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
393
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
395
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
396
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
398
// First set operation action for all vector types to expand. Then we
399
// will selectively turn on ones that can be effectively codegen'd.
400
addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
401
addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
402
addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
403
addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
404
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
405
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
407
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
408
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
409
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
411
// Set operation actions to legal types only.
412
if (!isTypeLegal(VT)) continue;
414
// add/sub are legal for all supported vector VT's.
415
setOperationAction(ISD::ADD, VT, Legal);
416
setOperationAction(ISD::SUB, VT, Legal);
417
// mul has to be custom lowered.
418
setOperationAction(ISD::MUL, VT, Legal);
420
setOperationAction(ISD::AND, VT, Legal);
421
setOperationAction(ISD::OR, VT, Legal);
422
setOperationAction(ISD::XOR, VT, Legal);
423
setOperationAction(ISD::LOAD, VT, Custom);
424
setOperationAction(ISD::SELECT, VT, Legal);
425
setOperationAction(ISD::STORE, VT, Custom);
427
// These operations need to be expanded:
428
setOperationAction(ISD::SDIV, VT, Expand);
429
setOperationAction(ISD::SREM, VT, Expand);
430
setOperationAction(ISD::UDIV, VT, Expand);
431
setOperationAction(ISD::UREM, VT, Expand);
433
// Expand all trunc stores
434
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
435
j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
436
MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
437
setTruncStoreAction(VT, TargetVT, Expand);
440
// Custom lower build_vector, constant pool spills, insert and
441
// extract vector elements:
442
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
443
setOperationAction(ISD::ConstantPool, VT, Custom);
444
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
445
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
446
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
447
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
450
setOperationAction(ISD::SHL, MVT::v2i64, Expand);
452
setOperationAction(ISD::AND, MVT::v16i8, Custom);
453
setOperationAction(ISD::OR, MVT::v16i8, Custom);
454
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
455
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
457
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
459
setBooleanContents(ZeroOrNegativeOneBooleanContent);
460
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
462
setStackPointerRegisterToSaveRestore(SPU::R1);
464
// We have target-specific dag combine patterns for the following nodes:
465
setTargetDAGCombine(ISD::ADD);
466
setTargetDAGCombine(ISD::ZERO_EXTEND);
467
setTargetDAGCombine(ISD::SIGN_EXTEND);
468
setTargetDAGCombine(ISD::ANY_EXTEND);
470
setMinFunctionAlignment(3);
472
computeRegisterProperties();
474
// Set pre-RA register scheduler default to BURR, which produces slightly
475
// better code than the default (could also be TDRR, but TargetLowering.h
476
// needs a mod to support that model):
477
setSchedulingPreference(Sched::RegPressure);
480
const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
483
case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
484
case SPUISD::Hi: return "SPUISD::Hi";
485
case SPUISD::Lo: return "SPUISD::Lo";
486
case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
487
case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
488
case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
489
case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
490
case SPUISD::CALL: return "SPUISD::CALL";
491
case SPUISD::SHUFB: return "SPUISD::SHUFB";
492
case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
493
case SPUISD::CNTB: return "SPUISD::CNTB";
494
case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
495
case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
496
case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
497
case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
498
case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
499
case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
500
case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
501
case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
502
case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
503
case SPUISD::SELB: return "SPUISD::SELB";
504
case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
505
case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
506
case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
510
//===----------------------------------------------------------------------===//
511
// Return the Cell SPU's SETCC result type
512
//===----------------------------------------------------------------------===//
514
EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
515
// i8, i16 and i32 are valid SETCC result types
516
MVT::SimpleValueType retval;
518
switch(VT.getSimpleVT().SimpleTy){
521
retval = MVT::i8; break;
523
retval = MVT::i16; break;
531
//===----------------------------------------------------------------------===//
532
// Calling convention code:
533
//===----------------------------------------------------------------------===//
535
#include "SPUGenCallingConv.inc"
537
//===----------------------------------------------------------------------===//
538
// LowerOperation implementation
539
//===----------------------------------------------------------------------===//
541
/// Custom lower loads for CellSPU
543
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
544
within a 16-byte block, we have to rotate to extract the requested element.
546
For extending loads, we also want to ensure that the following sequence is
547
emitted, e.g. for MVT::f32 extending load to MVT::f64:
551
%2 v16i8,ch = rotate %1
552
%3 v4f8, ch = bitconvert %2
553
%4 f32 = vec2perfslot %3
554
%5 f64 = fp_extend %4
558
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
559
LoadSDNode *LN = cast<LoadSDNode>(Op);
560
SDValue the_chain = LN->getChain();
561
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
562
EVT InVT = LN->getMemoryVT();
563
EVT OutVT = Op.getValueType();
564
ISD::LoadExtType ExtType = LN->getExtensionType();
565
unsigned alignment = LN->getAlignment();
566
int pso = prefslotOffset(InVT);
567
DebugLoc dl = Op.getDebugLoc();
568
EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
569
(128 / InVT.getSizeInBits()));
572
assert( LN->getAddressingMode() == ISD::UNINDEXED
573
&& "we should get only UNINDEXED adresses");
574
// clean aligned loads can be selected as-is
575
if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
578
// Get pointerinfos to the memory chunk(s) that contain the data to load
579
uint64_t mpi_offset = LN->getPointerInfo().Offset;
580
mpi_offset -= mpi_offset%16;
581
MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
582
MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
585
SDValue basePtr = LN->getBasePtr();
588
if ((alignment%16) == 0) {
591
// Special cases for a known aligned load to simplify the base pointer
592
// and the rotation amount:
593
if (basePtr.getOpcode() == ISD::ADD
594
&& (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
595
// Known offset into basePtr
596
int64_t offset = CN->getSExtValue();
597
int64_t rotamt = int64_t((offset & 0xf) - pso);
602
rotate = DAG.getConstant(rotamt, MVT::i16);
604
// Simplify the base pointer for this case:
605
basePtr = basePtr.getOperand(0);
606
if ((offset & ~0xf) > 0) {
607
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
609
DAG.getConstant((offset & ~0xf), PtrVT));
611
} else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
612
|| (basePtr.getOpcode() == SPUISD::IndirectAddr
613
&& basePtr.getOperand(0).getOpcode() == SPUISD::Hi
614
&& basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
615
// Plain aligned a-form address: rotate into preferred slot
616
// Same for (SPUindirect (SPUhi ...), (SPUlo ...))
617
int64_t rotamt = -pso;
620
rotate = DAG.getConstant(rotamt, MVT::i16);
622
// Offset the rotate amount by the basePtr and the preferred slot
624
int64_t rotamt = -pso;
627
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
629
DAG.getConstant(rotamt, PtrVT));
632
// Unaligned load: must be more pessimistic about addressing modes:
633
if (basePtr.getOpcode() == ISD::ADD) {
634
MachineFunction &MF = DAG.getMachineFunction();
635
MachineRegisterInfo &RegInfo = MF.getRegInfo();
636
unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
639
SDValue Op0 = basePtr.getOperand(0);
640
SDValue Op1 = basePtr.getOperand(1);
642
if (isa<ConstantSDNode>(Op1)) {
643
// Convert the (add <ptr>, <const>) to an indirect address contained
644
// in a register. Note that this is done because we need to avoid
645
// creating a 0(reg) d-form address due to the SPU's block loads.
646
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
647
the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
648
basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
650
// Convert the (add <arg1>, <arg2>) to an indirect address, which
651
// will likely be lowered as a reg(reg) x-form address.
652
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
655
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
657
DAG.getConstant(0, PtrVT));
660
// Offset the rotate amount by the basePtr and the preferred slot
662
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
664
DAG.getConstant(-pso, PtrVT));
667
// Do the load as a i128 to allow possible shifting
668
SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
670
LN->isVolatile(), LN->isNonTemporal(), false, 16);
672
// When the size is not greater than alignment we get all data with just
674
if (alignment >= InVT.getSizeInBits()/8) {
676
the_chain = low.getValue(1);
678
// Rotate into the preferred slot:
679
result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
680
low.getValue(0), rotate);
682
// Convert the loaded v16i8 vector to the appropriate vector type
683
// specified by the operand:
684
EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
685
InVT, (128 / InVT.getSizeInBits()));
686
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
687
DAG.getNode(ISD::BITCAST, dl, vecVT, result));
689
// When alignment is less than the size, we might need (known only at
690
// run-time) two loads
691
// TODO: if the memory address is composed only from constants, we have
692
// extra kowledge, and might avoid the second load
694
// storage position offset from lower 16 byte aligned memory chunk
695
SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
696
basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
697
// get a registerfull of ones. (this implementation is a workaround: LLVM
698
// cannot handle 128 bit signed int constants)
699
SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
700
ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
702
SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
703
DAG.getNode(ISD::ADD, dl, PtrVT,
705
DAG.getConstant(16, PtrVT)),
707
LN->isVolatile(), LN->isNonTemporal(), false,
710
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
713
// Shift the (possible) high part right to compensate the misalignemnt.
714
// if there is no highpart (i.e. value is i64 and offset is 4), this
715
// will zero out the high value.
716
high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
717
DAG.getNode(ISD::SUB, dl, MVT::i32,
718
DAG.getConstant( 16, MVT::i32),
722
// Shift the low similarly
723
// TODO: add SPUISD::SHL_BYTES
724
low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
726
// Merge the two parts
727
result = DAG.getNode(ISD::BITCAST, dl, vecVT,
728
DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
730
if (!InVT.isVector()) {
731
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
735
// Handle extending loads by extending the scalar result:
736
if (ExtType == ISD::SEXTLOAD) {
737
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
738
} else if (ExtType == ISD::ZEXTLOAD) {
739
result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
740
} else if (ExtType == ISD::EXTLOAD) {
741
unsigned NewOpc = ISD::ANY_EXTEND;
743
if (OutVT.isFloatingPoint())
744
NewOpc = ISD::FP_EXTEND;
746
result = DAG.getNode(NewOpc, dl, OutVT, result);
749
SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
750
SDValue retops[2] = {
755
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
756
retops, sizeof(retops) / sizeof(retops[0]));
760
/// Custom lower stores for CellSPU
762
All CellSPU stores are aligned to 16-byte boundaries, so for elements
763
within a 16-byte block, we have to generate a shuffle to insert the
764
requested element into its place, then store the resulting block.
767
LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
768
StoreSDNode *SN = cast<StoreSDNode>(Op);
769
SDValue Value = SN->getValue();
770
EVT VT = Value.getValueType();
771
EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
772
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
773
DebugLoc dl = Op.getDebugLoc();
774
unsigned alignment = SN->getAlignment();
776
EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
777
(128 / StVT.getSizeInBits()));
778
// Get pointerinfos to the memory chunk(s) that contain the data to load
779
uint64_t mpi_offset = SN->getPointerInfo().Offset;
780
mpi_offset -= mpi_offset%16;
781
MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
782
MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
786
assert( SN->getAddressingMode() == ISD::UNINDEXED
787
&& "we should get only UNINDEXED adresses");
788
// clean aligned loads can be selected as-is
789
if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
792
SDValue alignLoadVec;
793
SDValue basePtr = SN->getBasePtr();
794
SDValue the_chain = SN->getChain();
795
SDValue insertEltOffs;
797
if ((alignment%16) == 0) {
799
// Special cases for a known aligned load to simplify the base pointer
800
// and insertion byte:
801
if (basePtr.getOpcode() == ISD::ADD
802
&& (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
803
// Known offset into basePtr
804
int64_t offset = CN->getSExtValue();
806
// Simplify the base pointer for this case:
807
basePtr = basePtr.getOperand(0);
808
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
810
DAG.getConstant((offset & 0xf), PtrVT));
812
if ((offset & ~0xf) > 0) {
813
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
815
DAG.getConstant((offset & ~0xf), PtrVT));
818
// Otherwise, assume it's at byte 0 of basePtr
819
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
821
DAG.getConstant(0, PtrVT));
822
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
824
DAG.getConstant(0, PtrVT));
827
// Unaligned load: must be more pessimistic about addressing modes:
828
if (basePtr.getOpcode() == ISD::ADD) {
829
MachineFunction &MF = DAG.getMachineFunction();
830
MachineRegisterInfo &RegInfo = MF.getRegInfo();
831
unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
834
SDValue Op0 = basePtr.getOperand(0);
835
SDValue Op1 = basePtr.getOperand(1);
837
if (isa<ConstantSDNode>(Op1)) {
838
// Convert the (add <ptr>, <const>) to an indirect address contained
839
// in a register. Note that this is done because we need to avoid
840
// creating a 0(reg) d-form address due to the SPU's block loads.
841
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
842
the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
843
basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
845
// Convert the (add <arg1>, <arg2>) to an indirect address, which
846
// will likely be lowered as a reg(reg) x-form address.
847
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
850
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
852
DAG.getConstant(0, PtrVT));
855
// Insertion point is solely determined by basePtr's contents
856
insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
858
DAG.getConstant(0, PtrVT));
861
// Load the lower part of the memory to which to store.
862
SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
863
lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
866
// if we don't need to store over the 16 byte boundary, one store suffices
867
if (alignment >= StVT.getSizeInBits()/8) {
869
the_chain = low.getValue(1);
871
LoadSDNode *LN = cast<LoadSDNode>(low);
872
SDValue theValue = SN->getValue();
875
&& (theValue.getOpcode() == ISD::AssertZext
876
|| theValue.getOpcode() == ISD::AssertSext)) {
877
// Drill down and get the value for zero- and sign-extended
879
theValue = theValue.getOperand(0);
882
// If the base pointer is already a D-form address, then just create
883
// a new D-form address with a slot offset and the orignal base pointer.
884
// Otherwise generate a D-form address with the slot offset relative
885
// to the stack pointer, which is always aligned.
887
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
888
errs() << "CellSPU LowerSTORE: basePtr = ";
889
basePtr.getNode()->dump(&DAG);
894
SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
896
SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
899
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
901
DAG.getNode(ISD::BITCAST, dl,
902
MVT::v4i32, insertEltOp));
904
result = DAG.getStore(the_chain, dl, result, basePtr,
906
LN->isVolatile(), LN->isNonTemporal(),
910
// do the store when it might cross the 16 byte memory access boundary.
912
// TODO issue a warning if SN->isVolatile()== true? This is likely not
913
// what the user wanted.
915
// address offset from nearest lower 16byte alinged address
916
SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
918
DAG.getConstant(0xf, MVT::i32));
920
SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
921
DAG.getConstant( 16, MVT::i32),
923
// 16 - sizeof(Value)
924
SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
925
DAG.getConstant( 16, MVT::i32),
926
DAG.getConstant( VT.getSizeInBits()/8,
928
// get a registerfull of ones
929
SDValue ones = DAG.getConstant(-1, MVT::v4i32);
930
ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
932
// Create the 128 bit masks that have ones where the data to store is
934
SDValue lowmask, himask;
935
// if the value to store don't fill up the an entire 128 bits, zero
936
// out the last bits of the mask so that only the value we want to store
938
// this is e.g. in the case of store i32, align 2
940
Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
941
lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
942
lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
944
Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
945
Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
950
Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
952
// this will zero, if there are no data that goes to the high quad
953
himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
955
lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
958
// Load in the old data and zero out the parts that will be overwritten with
959
// the new data to store.
960
SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
961
DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
962
DAG.getConstant( 16, PtrVT)),
964
SN->isVolatile(), SN->isNonTemporal(),
966
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
969
low = DAG.getNode(ISD::AND, dl, MVT::i128,
970
DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
971
DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
972
hi = DAG.getNode(ISD::AND, dl, MVT::i128,
973
DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
974
DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
976
// Shift the Value to store into place. rlow contains the parts that go to
977
// the lower memory chunk, rhi has the parts that go to the upper one.
978
SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
979
rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
980
SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
983
// Merge the old data and the new data and store the results
984
// Need to convert vectors here to integer as 'OR'ing floats assert
985
rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
986
DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
987
DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
988
rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
989
DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
990
DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
992
low = DAG.getStore(the_chain, dl, rlow, basePtr,
994
SN->isVolatile(), SN->isNonTemporal(), 16);
995
hi = DAG.getStore(the_chain, dl, rhi,
996
DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
997
DAG.getConstant( 16, PtrVT)),
999
SN->isVolatile(), SN->isNonTemporal(), 16);
1000
result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
1007
//! Generate the address of a constant pool entry.
1009
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1010
EVT PtrVT = Op.getValueType();
1011
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1012
const Constant *C = CP->getConstVal();
1013
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1014
SDValue Zero = DAG.getConstant(0, PtrVT);
1015
const TargetMachine &TM = DAG.getTarget();
1016
// FIXME there is no actual debug info here
1017
DebugLoc dl = Op.getDebugLoc();
1019
if (TM.getRelocationModel() == Reloc::Static) {
1020
if (!ST->usingLargeMem()) {
1021
// Just return the SDValue with the constant pool address in it.
1022
return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1024
SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1025
SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1026
return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1030
llvm_unreachable("LowerConstantPool: Relocation model other than static"
1034
//! Alternate entry point for generating the address of a constant pool entry
1036
SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1037
return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1041
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1042
EVT PtrVT = Op.getValueType();
1043
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1044
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1045
SDValue Zero = DAG.getConstant(0, PtrVT);
1046
const TargetMachine &TM = DAG.getTarget();
1047
// FIXME there is no actual debug info here
1048
DebugLoc dl = Op.getDebugLoc();
1050
if (TM.getRelocationModel() == Reloc::Static) {
1051
if (!ST->usingLargeMem()) {
1052
return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1054
SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1055
SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1056
return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1060
llvm_unreachable("LowerJumpTable: Relocation model other than static"
1065
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1066
EVT PtrVT = Op.getValueType();
1067
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1068
const GlobalValue *GV = GSDN->getGlobal();
1069
SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1070
PtrVT, GSDN->getOffset());
1071
const TargetMachine &TM = DAG.getTarget();
1072
SDValue Zero = DAG.getConstant(0, PtrVT);
1073
// FIXME there is no actual debug info here
1074
DebugLoc dl = Op.getDebugLoc();
1076
if (TM.getRelocationModel() == Reloc::Static) {
1077
if (!ST->usingLargeMem()) {
1078
return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1080
SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1081
SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1082
return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1085
report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1091
//! Custom lower double precision floating point constants
1093
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1094
EVT VT = Op.getValueType();
1095
// FIXME there is no actual debug info here
1096
DebugLoc dl = Op.getDebugLoc();
1098
if (VT == MVT::f64) {
1099
ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1102
"LowerConstantFP: Node is not ConstantFPSDNode");
1104
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1105
SDValue T = DAG.getConstant(dbits, MVT::i64);
1106
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1107
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1108
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1115
SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1116
CallingConv::ID CallConv, bool isVarArg,
1117
const SmallVectorImpl<ISD::InputArg>
1119
DebugLoc dl, SelectionDAG &DAG,
1120
SmallVectorImpl<SDValue> &InVals)
1123
MachineFunction &MF = DAG.getMachineFunction();
1124
MachineFrameInfo *MFI = MF.getFrameInfo();
1125
MachineRegisterInfo &RegInfo = MF.getRegInfo();
1126
SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1128
unsigned ArgOffset = SPUFrameLowering::minStackSize();
1129
unsigned ArgRegIdx = 0;
1130
unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1132
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1134
SmallVector<CCValAssign, 16> ArgLocs;
1135
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1136
getTargetMachine(), ArgLocs, *DAG.getContext());
1137
// FIXME: allow for other calling conventions
1138
CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1140
// Add DAG nodes to load the arguments or copy them out of registers.
1141
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1142
EVT ObjectVT = Ins[ArgNo].VT;
1143
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1145
CCValAssign &VA = ArgLocs[ArgNo];
1147
if (VA.isRegLoc()) {
1148
const TargetRegisterClass *ArgRegClass;
1150
switch (ObjectVT.getSimpleVT().SimpleTy) {
1152
report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1153
Twine(ObjectVT.getEVTString()));
1155
ArgRegClass = &SPU::R8CRegClass;
1158
ArgRegClass = &SPU::R16CRegClass;
1161
ArgRegClass = &SPU::R32CRegClass;
1164
ArgRegClass = &SPU::R64CRegClass;
1167
ArgRegClass = &SPU::GPRCRegClass;
1170
ArgRegClass = &SPU::R32FPRegClass;
1173
ArgRegClass = &SPU::R64FPRegClass;
1181
ArgRegClass = &SPU::VECREGRegClass;
1185
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1186
RegInfo.addLiveIn(VA.getLocReg(), VReg);
1187
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1190
// We need to load the argument to a virtual register if we determined
1191
// above that we ran out of physical registers of the appropriate type
1192
// or we're forced to do vararg
1193
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1194
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1195
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1196
false, false, false, 0);
1197
ArgOffset += StackSlotSize;
1200
InVals.push_back(ArgVal);
1202
Chain = ArgVal.getOperand(0);
1207
// FIXME: we should be able to query the argument registers from
1208
// tablegen generated code.
1209
static const uint16_t ArgRegs[] = {
1210
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1211
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1212
SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1213
SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1214
SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1215
SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1216
SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1217
SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1218
SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1219
SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1220
SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1222
// size of ArgRegs array
1223
const unsigned NumArgRegs = 77;
1225
// We will spill (79-3)+1 registers to the stack
1226
SmallVector<SDValue, 79-3+1> MemOps;
1228
// Create the frame slot
1229
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1230
FuncInfo->setVarArgsFrameIndex(
1231
MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1232
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1233
unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
1234
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1235
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1237
Chain = Store.getOperand(0);
1238
MemOps.push_back(Store);
1240
// Increment address by stack slot size for the next stored argument
1241
ArgOffset += StackSlotSize;
1243
if (!MemOps.empty())
1244
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1245
&MemOps[0], MemOps.size());
1251
/// isLSAAddress - Return the immediate to use if the specified
1252
/// value is representable as a LSA address.
1253
static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1254
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1257
int Addr = C->getZExtValue();
1258
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1259
(Addr << 14 >> 14) != Addr)
1260
return 0; // Top 14 bits have to be sext of immediate.
1262
return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1266
SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1267
CallingConv::ID CallConv, bool isVarArg,
1268
bool doesNotRet, bool &isTailCall,
1269
const SmallVectorImpl<ISD::OutputArg> &Outs,
1270
const SmallVectorImpl<SDValue> &OutVals,
1271
const SmallVectorImpl<ISD::InputArg> &Ins,
1272
DebugLoc dl, SelectionDAG &DAG,
1273
SmallVectorImpl<SDValue> &InVals) const {
1274
// CellSPU target does not yet support tail call optimization.
1277
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1278
unsigned NumOps = Outs.size();
1279
unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1281
SmallVector<CCValAssign, 16> ArgLocs;
1282
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1283
getTargetMachine(), ArgLocs, *DAG.getContext());
1284
// FIXME: allow for other calling conventions
1285
CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1287
const unsigned NumArgRegs = ArgLocs.size();
1290
// Handy pointer type
1291
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1293
// Set up a copy of the stack pointer for use loading and storing any
1294
// arguments that may not fit in the registers available for argument
1296
SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1298
// Figure out which arguments are going to go in registers, and which in
1300
unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
1301
unsigned ArgRegIdx = 0;
1303
// Keep track of registers passing arguments
1304
std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1305
// And the arguments passed on the stack
1306
SmallVector<SDValue, 8> MemOpChains;
1308
for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1309
SDValue Arg = OutVals[ArgRegIdx];
1310
CCValAssign &VA = ArgLocs[ArgRegIdx];
1312
// PtrOff will be used to store the current argument to the stack if a
1313
// register cannot be found for it.
1314
SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1315
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1317
switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1318
default: llvm_unreachable("Unexpected ValueType for argument!");
1332
if (ArgRegIdx != NumArgRegs) {
1333
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1335
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1336
MachinePointerInfo(),
1338
ArgOffset += StackSlotSize;
1344
// Accumulate how many bytes are to be pushed on the stack, including the
1345
// linkage area, and parameter passing area. According to the SPU ABI,
1346
// we minimally need space for [LR] and [SP].
1347
unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
1349
// Insert a call sequence start
1350
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1353
if (!MemOpChains.empty()) {
1354
// Adjust the stack pointer for the stack arguments.
1355
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1356
&MemOpChains[0], MemOpChains.size());
1359
// Build a sequence of copy-to-reg nodes chained together with token chain
1360
// and flag operands which copy the outgoing args into the appropriate regs.
1362
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1363
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1364
RegsToPass[i].second, InFlag);
1365
InFlag = Chain.getValue(1);
1368
SmallVector<SDValue, 8> Ops;
1369
unsigned CallOpc = SPUISD::CALL;
1371
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1372
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1373
// node so that legalize doesn't hack it.
1374
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1375
const GlobalValue *GV = G->getGlobal();
1376
EVT CalleeVT = Callee.getValueType();
1377
SDValue Zero = DAG.getConstant(0, PtrVT);
1378
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1380
if (!ST->usingLargeMem()) {
1381
// Turn calls to targets that are defined (i.e., have bodies) into BRSL
1382
// style calls, otherwise, external symbols are BRASL calls. This assumes
1383
// that declared/defined symbols are in the same compilation unit and can
1384
// be reached through PC-relative jumps.
1387
// This may be an unsafe assumption for JIT and really large compilation
1389
if (GV->isDeclaration()) {
1390
Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1392
Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1395
// "Large memory" mode: Turn all calls into indirect calls with a X-form
1397
Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1399
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1400
EVT CalleeVT = Callee.getValueType();
1401
SDValue Zero = DAG.getConstant(0, PtrVT);
1402
SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1403
Callee.getValueType());
1405
if (!ST->usingLargeMem()) {
1406
Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1408
Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1410
} else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1411
// If this is an absolute destination address that appears to be a legal
1412
// local store address, use the munged value.
1413
Callee = SDValue(Dest, 0);
1416
Ops.push_back(Chain);
1417
Ops.push_back(Callee);
1419
// Add argument registers to the end of the list so that they are known live
1421
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1422
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1423
RegsToPass[i].second.getValueType()));
1425
if (InFlag.getNode())
1426
Ops.push_back(InFlag);
1427
// Returns a chain and a flag for retval copy to use.
1428
Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1429
&Ops[0], Ops.size());
1430
InFlag = Chain.getValue(1);
1432
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1433
DAG.getIntPtrConstant(0, true), InFlag);
1435
InFlag = Chain.getValue(1);
1437
// If the function returns void, just return the chain.
1441
// Now handle the return value(s)
1442
SmallVector<CCValAssign, 16> RVLocs;
1443
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1444
getTargetMachine(), RVLocs, *DAG.getContext());
1445
CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1448
// If the call has results, copy the values out of the ret val registers.
1449
for (unsigned i = 0; i != RVLocs.size(); ++i) {
1450
CCValAssign VA = RVLocs[i];
1452
SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1454
Chain = Val.getValue(1);
1455
InFlag = Val.getValue(2);
1456
InVals.push_back(Val);
1463
SPUTargetLowering::LowerReturn(SDValue Chain,
1464
CallingConv::ID CallConv, bool isVarArg,
1465
const SmallVectorImpl<ISD::OutputArg> &Outs,
1466
const SmallVectorImpl<SDValue> &OutVals,
1467
DebugLoc dl, SelectionDAG &DAG) const {
1469
SmallVector<CCValAssign, 16> RVLocs;
1470
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1471
getTargetMachine(), RVLocs, *DAG.getContext());
1472
CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1474
// If this is the first return lowered for this function, add the regs to the
1475
// liveout set for the function.
1476
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1477
for (unsigned i = 0; i != RVLocs.size(); ++i)
1478
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1483
// Copy the result values into the output registers.
1484
for (unsigned i = 0; i != RVLocs.size(); ++i) {
1485
CCValAssign &VA = RVLocs[i];
1486
assert(VA.isRegLoc() && "Can only return in registers!");
1487
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1489
Flag = Chain.getValue(1);
1493
return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1495
return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1499
//===----------------------------------------------------------------------===//
1500
// Vector related lowering:
1501
//===----------------------------------------------------------------------===//
1503
static ConstantSDNode *
1504
getVecImm(SDNode *N) {
1505
SDValue OpVal(0, 0);
1507
// Check to see if this buildvec has a single non-undef value in its elements.
1508
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1509
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1510
if (OpVal.getNode() == 0)
1511
OpVal = N->getOperand(i);
1512
else if (OpVal != N->getOperand(i))
1516
if (OpVal.getNode() != 0) {
1517
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1525
/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1526
/// and the value fits into an unsigned 18-bit constant, and if so, return the
1528
SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1530
if (ConstantSDNode *CN = getVecImm(N)) {
1531
uint64_t Value = CN->getZExtValue();
1532
if (ValueType == MVT::i64) {
1533
uint64_t UValue = CN->getZExtValue();
1534
uint32_t upper = uint32_t(UValue >> 32);
1535
uint32_t lower = uint32_t(UValue);
1538
Value = Value >> 32;
1540
if (Value <= 0x3ffff)
1541
return DAG.getTargetConstant(Value, ValueType);
1547
/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1548
/// and the value fits into a signed 16-bit constant, and if so, return the
1550
SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1552
if (ConstantSDNode *CN = getVecImm(N)) {
1553
int64_t Value = CN->getSExtValue();
1554
if (ValueType == MVT::i64) {
1555
uint64_t UValue = CN->getZExtValue();
1556
uint32_t upper = uint32_t(UValue >> 32);
1557
uint32_t lower = uint32_t(UValue);
1560
Value = Value >> 32;
1562
if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1563
return DAG.getTargetConstant(Value, ValueType);
1570
/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1571
/// and the value fits into a signed 10-bit constant, and if so, return the
1573
SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1575
if (ConstantSDNode *CN = getVecImm(N)) {
1576
int64_t Value = CN->getSExtValue();
1577
if (ValueType == MVT::i64) {
1578
uint64_t UValue = CN->getZExtValue();
1579
uint32_t upper = uint32_t(UValue >> 32);
1580
uint32_t lower = uint32_t(UValue);
1583
Value = Value >> 32;
1585
if (isInt<10>(Value))
1586
return DAG.getTargetConstant(Value, ValueType);
1592
/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1593
/// and the value fits into a signed 8-bit constant, and if so, return the
1596
/// @note: The incoming vector is v16i8 because that's the only way we can load
1597
/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1599
SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1601
if (ConstantSDNode *CN = getVecImm(N)) {
1602
int Value = (int) CN->getZExtValue();
1603
if (ValueType == MVT::i16
1604
&& Value <= 0xffff /* truncated from uint64_t */
1605
&& ((short) Value >> 8) == ((short) Value & 0xff))
1606
return DAG.getTargetConstant(Value & 0xff, ValueType);
1607
else if (ValueType == MVT::i8
1608
&& (Value & 0xff) == Value)
1609
return DAG.getTargetConstant(Value, ValueType);
1615
/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1616
/// and the value fits into a signed 16-bit constant, and if so, return the
1618
SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1620
if (ConstantSDNode *CN = getVecImm(N)) {
1621
uint64_t Value = CN->getZExtValue();
1622
if ((ValueType == MVT::i32
1623
&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1624
|| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1625
return DAG.getTargetConstant(Value >> 16, ValueType);
1631
/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1632
SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1633
if (ConstantSDNode *CN = getVecImm(N)) {
1634
return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1640
/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1641
SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1642
if (ConstantSDNode *CN = getVecImm(N)) {
1643
return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1649
//! Lower a BUILD_VECTOR instruction creatively:
1651
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1652
EVT VT = Op.getValueType();
1653
EVT EltVT = VT.getVectorElementType();
1654
DebugLoc dl = Op.getDebugLoc();
1655
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1656
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1657
unsigned minSplatBits = EltVT.getSizeInBits();
1659
if (minSplatBits < 16)
1662
APInt APSplatBits, APSplatUndef;
1663
unsigned SplatBitSize;
1666
if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1667
HasAnyUndefs, minSplatBits)
1668
|| minSplatBits < SplatBitSize)
1669
return SDValue(); // Wasn't a constant vector or splat exceeded min
1671
uint64_t SplatBits = APSplatBits.getZExtValue();
1673
switch (VT.getSimpleVT().SimpleTy) {
1675
report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1676
Twine(VT.getEVTString()));
1679
uint32_t Value32 = uint32_t(SplatBits);
1680
assert(SplatBitSize == 32
1681
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1682
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
1683
SDValue T = DAG.getConstant(Value32, MVT::i32);
1684
return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1685
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1688
uint64_t f64val = uint64_t(SplatBits);
1689
assert(SplatBitSize == 64
1690
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1691
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
1692
SDValue T = DAG.getConstant(f64val, MVT::i64);
1693
return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1694
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1697
// 8-bit constants have to be expanded to 16-bits
1698
unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1699
SmallVector<SDValue, 8> Ops;
1701
Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1702
return DAG.getNode(ISD::BITCAST, dl, VT,
1703
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1706
unsigned short Value16 = SplatBits;
1707
SDValue T = DAG.getConstant(Value16, EltVT);
1708
SmallVector<SDValue, 8> Ops;
1711
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1714
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1715
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1718
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1726
SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1728
uint32_t upper = uint32_t(SplatVal >> 32);
1729
uint32_t lower = uint32_t(SplatVal);
1731
if (upper == lower) {
1732
// Magic constant that can be matched by IL, ILA, et. al.
1733
SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1734
return DAG.getNode(ISD::BITCAST, dl, OpVT,
1735
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1736
Val, Val, Val, Val));
1738
bool upper_special, lower_special;
1740
// NOTE: This code creates common-case shuffle masks that can be easily
1741
// detected as common expressions. It is not attempting to create highly
1742
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
1744
// Detect if the upper or lower half is a special shuffle mask pattern:
1745
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1746
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1748
// Both upper and lower are special, lower to a constant pool load:
1749
if (lower_special && upper_special) {
1750
SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
1751
SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
1752
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1753
UpperVal, LowerVal, UpperVal, LowerVal);
1754
return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
1759
SmallVector<SDValue, 16> ShufBytes;
1762
// Create lower vector if not a special pattern
1763
if (!lower_special) {
1764
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1765
LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1766
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1767
LO32C, LO32C, LO32C, LO32C));
1770
// Create upper vector if not a special pattern
1771
if (!upper_special) {
1772
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1773
HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1774
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1775
HI32C, HI32C, HI32C, HI32C));
1778
// If either upper or lower are special, then the two input operands are
1779
// the same (basically, one of them is a "don't care")
1785
for (int i = 0; i < 4; ++i) {
1787
for (int j = 0; j < 4; ++j) {
1789
bool process_upper, process_lower;
1791
process_upper = (upper_special && (i & 1) == 0);
1792
process_lower = (lower_special && (i & 1) == 1);
1794
if (process_upper || process_lower) {
1795
if ((process_upper && upper == 0)
1796
|| (process_lower && lower == 0))
1798
else if ((process_upper && upper == 0xffffffff)
1799
|| (process_lower && lower == 0xffffffff))
1801
else if ((process_upper && upper == 0x80000000)
1802
|| (process_lower && lower == 0x80000000))
1803
val |= (j == 0 ? 0xe0 : 0x80);
1805
val |= i * 4 + j + ((i & 1) * 16);
1808
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1811
return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1812
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1813
&ShufBytes[0], ShufBytes.size()));
1817
/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1818
/// which the Cell can operate. The code inspects V3 to ascertain whether the
1819
/// permutation vector, V3, is monotonically increasing with one "exception"
1820
/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1821
/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1822
/// In either case, the net result is going to eventually invoke SHUFB to
1823
/// permute/shuffle the bytes from V1 and V2.
1825
/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1826
/// control word for byte/halfword/word insertion. This takes care of a single
1827
/// element move from V2 into V1.
1829
/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1830
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1831
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1832
SDValue V1 = Op.getOperand(0);
1833
SDValue V2 = Op.getOperand(1);
1834
DebugLoc dl = Op.getDebugLoc();
1836
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1838
// If we have a single element being moved from V1 to V2, this can be handled
1839
// using the C*[DX] compute mask instructions, but the vector elements have
1840
// to be monotonically increasing with one exception element, and the source
1841
// slot of the element to move must be the same as the destination.
1842
EVT VecVT = V1.getValueType();
1843
EVT EltVT = VecVT.getVectorElementType();
1844
unsigned EltsFromV2 = 0;
1845
unsigned V2EltOffset = 0;
1846
unsigned V2EltIdx0 = 0;
1847
unsigned CurrElt = 0;
1848
unsigned MaxElts = VecVT.getVectorNumElements();
1849
unsigned PrevElt = 0;
1850
bool monotonic = true;
1853
EVT maskVT; // which of the c?d instructions to use
1855
if (EltVT == MVT::i8) {
1857
maskVT = MVT::v16i8;
1858
} else if (EltVT == MVT::i16) {
1860
maskVT = MVT::v8i16;
1861
} else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1863
maskVT = MVT::v4i32;
1864
} else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1866
maskVT = MVT::v2i64;
1868
llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1870
for (unsigned i = 0; i != MaxElts; ++i) {
1871
if (SVN->getMaskElt(i) < 0)
1874
unsigned SrcElt = SVN->getMaskElt(i);
1877
if (SrcElt >= V2EltIdx0) {
1878
// TODO: optimize for the monotonic case when several consecutive
1879
// elements are taken form V2. Do we ever get such a case?
1880
if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1881
V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1885
} else if (CurrElt != SrcElt) {
1893
if (PrevElt > 0 && SrcElt < MaxElts) {
1894
if ((PrevElt == SrcElt - 1)
1895
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1900
} else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1901
// First time or after a "wrap around"
1905
// This isn't a rotation, takes elements from vector 2
1911
if (EltsFromV2 == 1 && monotonic) {
1912
// Compute mask and shuffle
1913
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1915
// As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1916
// R1 ($sp) is used here only as it is guaranteed to have last bits zero
1917
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1918
DAG.getRegister(SPU::R1, PtrVT),
1919
DAG.getConstant(V2EltOffset, MVT::i32));
1920
SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1923
// Use shuffle mask in SHUFB synthetic instruction:
1924
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1926
} else if (rotate) {
1929
rotamt *= EltVT.getSizeInBits()/8;
1930
return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1931
V1, DAG.getConstant(rotamt, MVT::i16));
1933
// Convert the SHUFFLE_VECTOR mask's input element units to the
1935
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1937
SmallVector<SDValue, 16> ResultMask;
1938
for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1939
unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1941
for (unsigned j = 0; j < BytesPerElement; ++j)
1942
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1944
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1945
&ResultMask[0], ResultMask.size());
1946
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1950
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1951
SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1952
DebugLoc dl = Op.getDebugLoc();
1954
if (Op0.getNode()->getOpcode() == ISD::Constant) {
1955
// For a constant, build the appropriate constant vector, which will
1956
// eventually simplify to a vector register load.
1958
ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1959
SmallVector<SDValue, 16> ConstVecValues;
1963
// Create a constant vector:
1964
switch (Op.getValueType().getSimpleVT().SimpleTy) {
1965
default: llvm_unreachable("Unexpected constant value type in "
1966
"LowerSCALAR_TO_VECTOR");
1967
case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1968
case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1969
case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1970
case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1971
case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1972
case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1975
SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1976
for (size_t j = 0; j < n_copies; ++j)
1977
ConstVecValues.push_back(CValue);
1979
return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1980
&ConstVecValues[0], ConstVecValues.size());
1982
// Otherwise, copy the value from one register to another:
1983
switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1984
default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1991
return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1996
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1997
EVT VT = Op.getValueType();
1998
SDValue N = Op.getOperand(0);
1999
SDValue Elt = Op.getOperand(1);
2000
DebugLoc dl = Op.getDebugLoc();
2003
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2004
// Constant argument:
2005
int EltNo = (int) C->getZExtValue();
2008
if (VT == MVT::i8 && EltNo >= 16)
2009
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2010
else if (VT == MVT::i16 && EltNo >= 8)
2011
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2012
else if (VT == MVT::i32 && EltNo >= 4)
2013
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2014
else if (VT == MVT::i64 && EltNo >= 2)
2015
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2017
if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2018
// i32 and i64: Element 0 is the preferred slot
2019
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2022
// Need to generate shuffle mask and extract:
2023
int prefslot_begin = -1, prefslot_end = -1;
2024
int elt_byte = EltNo * VT.getSizeInBits() / 8;
2026
switch (VT.getSimpleVT().SimpleTy) {
2027
default: llvm_unreachable("Invalid value type!");
2029
prefslot_begin = prefslot_end = 3;
2033
prefslot_begin = 2; prefslot_end = 3;
2038
prefslot_begin = 0; prefslot_end = 3;
2043
prefslot_begin = 0; prefslot_end = 7;
2048
assert(prefslot_begin != -1 && prefslot_end != -1 &&
2049
"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2051
unsigned int ShufBytes[16] = {
2052
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2054
for (int i = 0; i < 16; ++i) {
2055
// zero fill uppper part of preferred slot, don't care about the
2057
unsigned int mask_val;
2058
if (i <= prefslot_end) {
2060
((i < prefslot_begin)
2062
: elt_byte + (i - prefslot_begin));
2064
ShufBytes[i] = mask_val;
2066
ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2069
SDValue ShufMask[4];
2070
for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2071
unsigned bidx = i * 4;
2072
unsigned int bits = ((ShufBytes[bidx] << 24) |
2073
(ShufBytes[bidx+1] << 16) |
2074
(ShufBytes[bidx+2] << 8) |
2076
ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2079
SDValue ShufMaskVec =
2080
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2081
&ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2083
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2084
DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2085
N, N, ShufMaskVec));
2087
// Variable index: Rotate the requested element into slot 0, then replicate
2088
// slot 0 across the vector
2089
EVT VecVT = N.getValueType();
2090
if (!VecVT.isSimple() || !VecVT.isVector()) {
2091
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2095
// Make life easier by making sure the index is zero-extended to i32
2096
if (Elt.getValueType() != MVT::i32)
2097
Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2099
// Scale the index to a bit/byte shift quantity
2101
APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2102
unsigned scaleShift = scaleFactor.logBase2();
2105
if (scaleShift > 0) {
2106
// Scale the shift factor:
2107
Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2108
DAG.getConstant(scaleShift, MVT::i32));
2111
vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2113
// Replicate the bytes starting at byte 0 across the entire vector (for
2114
// consistency with the notion of a unified register set)
2117
switch (VT.getSimpleVT().SimpleTy) {
2119
report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2123
SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2124
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2125
factor, factor, factor, factor);
2129
SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2130
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2131
factor, factor, factor, factor);
2136
SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2137
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2138
factor, factor, factor, factor);
2143
SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2144
SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2145
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2146
loFactor, hiFactor, loFactor, hiFactor);
2151
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2152
DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2153
vecShift, vecShift, replicate));
2159
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2160
SDValue VecOp = Op.getOperand(0);
2161
SDValue ValOp = Op.getOperand(1);
2162
SDValue IdxOp = Op.getOperand(2);
2163
DebugLoc dl = Op.getDebugLoc();
2164
EVT VT = Op.getValueType();
2165
EVT eltVT = ValOp.getValueType();
2167
// use 0 when the lane to insert to is 'undef'
2169
if (IdxOp.getOpcode() != ISD::UNDEF) {
2170
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2171
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2172
Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2175
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2176
// Use $sp ($1) because it's always 16-byte aligned and it's available:
2177
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2178
DAG.getRegister(SPU::R1, PtrVT),
2179
DAG.getConstant(Offset, PtrVT));
2180
// widen the mask when dealing with half vectors
2181
EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2182
128/ VT.getVectorElementType().getSizeInBits());
2183
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2186
DAG.getNode(SPUISD::SHUFB, dl, VT,
2187
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2189
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2194
static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2195
const TargetLowering &TLI)
2197
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2198
DebugLoc dl = Op.getDebugLoc();
2199
EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
2201
assert(Op.getValueType() == MVT::i8);
2204
llvm_unreachable("Unhandled i8 math operator");
2206
// 8-bit addition: Promote the arguments up to 16-bits and truncate
2208
SDValue N1 = Op.getOperand(1);
2209
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2210
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2211
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2212
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2217
// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2219
SDValue N1 = Op.getOperand(1);
2220
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2221
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2222
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2223
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2227
SDValue N1 = Op.getOperand(1);
2228
EVT N1VT = N1.getValueType();
2230
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2231
if (!N1VT.bitsEq(ShiftVT)) {
2232
unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2235
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2238
// Replicate lower 8-bits into upper 8:
2240
DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2241
DAG.getNode(ISD::SHL, dl, MVT::i16,
2242
N0, DAG.getConstant(8, MVT::i32)));
2244
// Truncate back down to i8
2245
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2246
DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2250
SDValue N1 = Op.getOperand(1);
2251
EVT N1VT = N1.getValueType();
2253
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2254
if (!N1VT.bitsEq(ShiftVT)) {
2255
unsigned N1Opc = ISD::ZERO_EXTEND;
2257
if (N1.getValueType().bitsGT(ShiftVT))
2258
N1Opc = ISD::TRUNCATE;
2260
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2263
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2264
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2267
SDValue N1 = Op.getOperand(1);
2268
EVT N1VT = N1.getValueType();
2270
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2271
if (!N1VT.bitsEq(ShiftVT)) {
2272
unsigned N1Opc = ISD::SIGN_EXTEND;
2274
if (N1VT.bitsGT(ShiftVT))
2275
N1Opc = ISD::TRUNCATE;
2276
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2279
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2280
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2283
SDValue N1 = Op.getOperand(1);
2285
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2286
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2287
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2288
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2293
//! Lower byte immediate operations for v16i8 vectors:
2295
LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2298
EVT VT = Op.getValueType();
2299
DebugLoc dl = Op.getDebugLoc();
2301
ConstVec = Op.getOperand(0);
2302
Arg = Op.getOperand(1);
2303
if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2304
if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2305
ConstVec = ConstVec.getOperand(0);
2307
ConstVec = Op.getOperand(1);
2308
Arg = Op.getOperand(0);
2309
if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2310
ConstVec = ConstVec.getOperand(0);
2315
if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2316
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2317
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2319
APInt APSplatBits, APSplatUndef;
2320
unsigned SplatBitSize;
2322
unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2324
if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2325
HasAnyUndefs, minSplatBits)
2326
&& minSplatBits <= SplatBitSize) {
2327
uint64_t SplatBits = APSplatBits.getZExtValue();
2328
SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2330
SmallVector<SDValue, 16> tcVec;
2331
tcVec.assign(16, tc);
2332
return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2333
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2337
// These operations (AND, OR, XOR) are legal, they just couldn't be custom
2338
// lowered. Return the operation, rather than a null SDValue.
2342
//! Custom lowering for CTPOP (count population)
2344
Custom lowering code that counts the number ones in the input
2345
operand. SPU has such an instruction, but it counts the number of
2346
ones per byte, which then have to be accumulated.
2348
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2349
EVT VT = Op.getValueType();
2350
EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2351
VT, (128 / VT.getSizeInBits()));
2352
DebugLoc dl = Op.getDebugLoc();
2354
switch (VT.getSimpleVT().SimpleTy) {
2355
default: llvm_unreachable("Invalid value type!");
2357
SDValue N = Op.getOperand(0);
2358
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2360
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2361
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2363
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2367
MachineFunction &MF = DAG.getMachineFunction();
2368
MachineRegisterInfo &RegInfo = MF.getRegInfo();
2370
unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2372
SDValue N = Op.getOperand(0);
2373
SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2374
SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2375
SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2377
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2378
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2380
// CNTB_result becomes the chain to which all of the virtual registers
2381
// CNTB_reg, SUM1_reg become associated:
2382
SDValue CNTB_result =
2383
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2385
SDValue CNTB_rescopy =
2386
DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2388
SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2390
return DAG.getNode(ISD::AND, dl, MVT::i16,
2391
DAG.getNode(ISD::ADD, dl, MVT::i16,
2392
DAG.getNode(ISD::SRL, dl, MVT::i16,
2399
MachineFunction &MF = DAG.getMachineFunction();
2400
MachineRegisterInfo &RegInfo = MF.getRegInfo();
2402
unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2403
unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2405
SDValue N = Op.getOperand(0);
2406
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2407
SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2408
SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2409
SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2411
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2412
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2414
// CNTB_result becomes the chain to which all of the virtual registers
2415
// CNTB_reg, SUM1_reg become associated:
2416
SDValue CNTB_result =
2417
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2419
SDValue CNTB_rescopy =
2420
DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2423
DAG.getNode(ISD::SRL, dl, MVT::i32,
2424
DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2428
DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2429
DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2431
SDValue Sum1_rescopy =
2432
DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2435
DAG.getNode(ISD::SRL, dl, MVT::i32,
2436
DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2439
DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2440
DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2442
return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2452
//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2454
f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2455
All conversions to i64 are expanded to a libcall.
2457
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2458
const SPUTargetLowering &TLI) {
2459
EVT OpVT = Op.getValueType();
2460
SDValue Op0 = Op.getOperand(0);
2461
EVT Op0VT = Op0.getValueType();
2463
if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2464
|| OpVT == MVT::i64) {
2465
// Convert f32 / f64 to i32 / i64 via libcall.
2467
(Op.getOpcode() == ISD::FP_TO_SINT)
2468
? RTLIB::getFPTOSINT(Op0VT, OpVT)
2469
: RTLIB::getFPTOUINT(Op0VT, OpVT);
2470
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2472
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2478
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2480
i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2481
All conversions from i64 are expanded to a libcall.
2483
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2484
const SPUTargetLowering &TLI) {
2485
EVT OpVT = Op.getValueType();
2486
SDValue Op0 = Op.getOperand(0);
2487
EVT Op0VT = Op0.getValueType();
2489
if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2490
|| Op0VT == MVT::i64) {
2491
// Convert i32, i64 to f64 via libcall:
2493
(Op.getOpcode() == ISD::SINT_TO_FP)
2494
? RTLIB::getSINTTOFP(Op0VT, OpVT)
2495
: RTLIB::getUINTTOFP(Op0VT, OpVT);
2496
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2498
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2504
//! Lower ISD::SETCC
2506
This handles MVT::f64 (double floating point) condition lowering
2508
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2509
const TargetLowering &TLI) {
2510
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2511
DebugLoc dl = Op.getDebugLoc();
2512
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2514
SDValue lhs = Op.getOperand(0);
2515
SDValue rhs = Op.getOperand(1);
2516
EVT lhsVT = lhs.getValueType();
2517
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2519
EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2520
APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2521
EVT IntVT(MVT::i64);
2523
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2524
// selected to a NOP:
2525
SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2527
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2528
DAG.getNode(ISD::SRL, dl, IntVT,
2529
i64lhs, DAG.getConstant(32, MVT::i32)));
2530
SDValue lhsHi32abs =
2531
DAG.getNode(ISD::AND, dl, MVT::i32,
2532
lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2534
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2536
// SETO and SETUO only use the lhs operand:
2537
if (CC->get() == ISD::SETO) {
2538
// Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2540
APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2541
return DAG.getNode(ISD::XOR, dl, ccResultVT,
2542
DAG.getSetCC(dl, ccResultVT,
2543
lhs, DAG.getConstantFP(0.0, lhsVT),
2545
DAG.getConstant(ccResultAllOnes, ccResultVT));
2546
} else if (CC->get() == ISD::SETUO) {
2547
// Evaluates to true if Op0 is [SQ]NaN
2548
return DAG.getNode(ISD::AND, dl, ccResultVT,
2549
DAG.getSetCC(dl, ccResultVT,
2551
DAG.getConstant(0x7ff00000, MVT::i32),
2553
DAG.getSetCC(dl, ccResultVT,
2555
DAG.getConstant(0, MVT::i32),
2559
SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2561
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2562
DAG.getNode(ISD::SRL, dl, IntVT,
2563
i64rhs, DAG.getConstant(32, MVT::i32)));
2565
// If a value is negative, subtract from the sign magnitude constant:
2566
SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2568
// Convert the sign-magnitude representation into 2's complement:
2569
SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2570
lhsHi32, DAG.getConstant(31, MVT::i32));
2571
SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2573
DAG.getNode(ISD::SELECT, dl, IntVT,
2574
lhsSelectMask, lhsSignMag2TC, i64lhs);
2576
SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2577
rhsHi32, DAG.getConstant(31, MVT::i32));
2578
SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2580
DAG.getNode(ISD::SELECT, dl, IntVT,
2581
rhsSelectMask, rhsSignMag2TC, i64rhs);
2585
switch (CC->get()) {
2588
compareOp = ISD::SETEQ; break;
2591
compareOp = ISD::SETGT; break;
2594
compareOp = ISD::SETGE; break;
2597
compareOp = ISD::SETLT; break;
2600
compareOp = ISD::SETLE; break;
2603
compareOp = ISD::SETNE; break;
2605
report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2609
DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2610
(ISD::CondCode) compareOp);
2612
if ((CC->get() & 0x8) == 0) {
2613
// Ordered comparison:
2614
SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2615
lhs, DAG.getConstantFP(0.0, MVT::f64),
2617
SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2618
rhs, DAG.getConstantFP(0.0, MVT::f64),
2620
SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2622
result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2628
//! Lower ISD::SELECT_CC
2630
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2633
\note Need to revisit this in the future: if the code path through the true
2634
and false value computations is longer than the latency of a branch (6
2635
cycles), then it would be more advantageous to branch and insert a new basic
2636
block and branch on the condition. However, this code does not make that
2637
assumption, given the simplisitc uses so far.
2640
static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2641
const TargetLowering &TLI) {
2642
EVT VT = Op.getValueType();
2643
SDValue lhs = Op.getOperand(0);
2644
SDValue rhs = Op.getOperand(1);
2645
SDValue trueval = Op.getOperand(2);
2646
SDValue falseval = Op.getOperand(3);
2647
SDValue condition = Op.getOperand(4);
2648
DebugLoc dl = Op.getDebugLoc();
2650
// NOTE: SELB's arguments: $rA, $rB, $mask
2652
// SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2653
// where bits in $mask are 1. CCond will be inverted, having 1s where the
2654
// condition was true and 0s where the condition was false. Hence, the
2655
// arguments to SELB get reversed.
2657
// Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2658
// legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2659
// with another "cannot select select_cc" assert:
2661
SDValue compare = DAG.getNode(ISD::SETCC, dl,
2662
TLI.getSetCCResultType(Op.getValueType()),
2663
lhs, rhs, condition);
2664
return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2667
//! Custom lower ISD::TRUNCATE
2668
static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2670
// Type to truncate to
2671
EVT VT = Op.getValueType();
2672
MVT simpleVT = VT.getSimpleVT();
2673
EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2674
VT, (128 / VT.getSizeInBits()));
2675
DebugLoc dl = Op.getDebugLoc();
2677
// Type to truncate from
2678
SDValue Op0 = Op.getOperand(0);
2679
EVT Op0VT = Op0.getValueType();
2681
if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2682
// Create shuffle mask, least significant doubleword of quadword
2683
unsigned maskHigh = 0x08090a0b;
2684
unsigned maskLow = 0x0c0d0e0f;
2685
// Use a shuffle to perform the truncation
2686
SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2687
DAG.getConstant(maskHigh, MVT::i32),
2688
DAG.getConstant(maskLow, MVT::i32),
2689
DAG.getConstant(maskHigh, MVT::i32),
2690
DAG.getConstant(maskLow, MVT::i32));
2692
SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2693
Op0, Op0, shufMask);
2695
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2698
return SDValue(); // Leave the truncate unmolested
2702
* Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2703
* algorithm is to duplicate the sign bit using rotmai to generate at
2704
* least one byte full of sign bits. Then propagate the "sign-byte" into
2705
* the leftmost words and the i64/i32 into the rightmost words using shufb.
2707
* @param Op The sext operand
2708
* @param DAG The current DAG
2709
* @return The SDValue with the entire instruction sequence
2711
static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2713
DebugLoc dl = Op.getDebugLoc();
2715
// Type to extend to
2716
MVT OpVT = Op.getValueType().getSimpleVT();
2718
// Type to extend from
2719
SDValue Op0 = Op.getOperand(0);
2720
MVT Op0VT = Op0.getValueType().getSimpleVT();
2722
// extend i8 & i16 via i32
2723
if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
2724
Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
2728
// The type to extend to needs to be a i128 and
2729
// the type to extend from needs to be i64 or i32.
2730
assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2731
"LowerSIGN_EXTEND: input and/or output operand have wrong size");
2734
// Create shuffle mask
2735
unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2736
unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2737
unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2738
SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2739
DAG.getConstant(mask1, MVT::i32),
2740
DAG.getConstant(mask1, MVT::i32),
2741
DAG.getConstant(mask2, MVT::i32),
2742
DAG.getConstant(mask3, MVT::i32));
2744
// Word wise arithmetic right shift to generate at least one byte
2745
// that contains sign bits.
2746
MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2747
SDValue sraVal = DAG.getNode(ISD::SRA,
2750
DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2751
DAG.getConstant(31, MVT::i32));
2753
// reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2754
SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2756
DAG.getTargetConstant(
2757
SPU::GPRCRegClass.getID(),
2759
// Shuffle bytes - Copy the sign bits into the upper 64 bits
2760
// and the input value into the lower 64 bits.
2761
SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2762
extended, sraVal, shufMask);
2763
return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2766
//! Custom (target-specific) lowering entry point
2768
This is where LLVM's DAG selection process calls to do target-specific
2772
SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2774
unsigned Opc = (unsigned) Op.getOpcode();
2775
EVT VT = Op.getValueType();
2780
errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2781
errs() << "Op.getOpcode() = " << Opc << "\n";
2782
errs() << "*Op.getNode():\n";
2783
Op.getNode()->dump();
2785
llvm_unreachable(0);
2791
return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2793
return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2794
case ISD::ConstantPool:
2795
return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2796
case ISD::GlobalAddress:
2797
return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2798
case ISD::JumpTable:
2799
return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2800
case ISD::ConstantFP:
2801
return LowerConstantFP(Op, DAG);
2803
// i8, i64 math ops:
2812
return LowerI8Math(Op, DAG, Opc, *this);
2816
case ISD::FP_TO_SINT:
2817
case ISD::FP_TO_UINT:
2818
return LowerFP_TO_INT(Op, DAG, *this);
2820
case ISD::SINT_TO_FP:
2821
case ISD::UINT_TO_FP:
2822
return LowerINT_TO_FP(Op, DAG, *this);
2824
// Vector-related lowering.
2825
case ISD::BUILD_VECTOR:
2826
return LowerBUILD_VECTOR(Op, DAG);
2827
case ISD::SCALAR_TO_VECTOR:
2828
return LowerSCALAR_TO_VECTOR(Op, DAG);
2829
case ISD::VECTOR_SHUFFLE:
2830
return LowerVECTOR_SHUFFLE(Op, DAG);
2831
case ISD::EXTRACT_VECTOR_ELT:
2832
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2833
case ISD::INSERT_VECTOR_ELT:
2834
return LowerINSERT_VECTOR_ELT(Op, DAG);
2836
// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2840
return LowerByteImmed(Op, DAG);
2842
// Vector and i8 multiply:
2845
return LowerI8Math(Op, DAG, Opc, *this);
2848
return LowerCTPOP(Op, DAG);
2850
case ISD::SELECT_CC:
2851
return LowerSELECT_CC(Op, DAG, *this);
2854
return LowerSETCC(Op, DAG, *this);
2857
return LowerTRUNCATE(Op, DAG);
2859
case ISD::SIGN_EXTEND:
2860
return LowerSIGN_EXTEND(Op, DAG);
2866
void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2867
SmallVectorImpl<SDValue>&Results,
2868
SelectionDAG &DAG) const
2871
unsigned Opc = (unsigned) N->getOpcode();
2872
EVT OpVT = N->getValueType(0);
2876
errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2877
errs() << "Op.getOpcode() = " << Opc << "\n";
2878
errs() << "*Op.getNode():\n";
2886
/* Otherwise, return unchanged */
2889
//===----------------------------------------------------------------------===//
2890
// Target Optimization Hooks
2891
//===----------------------------------------------------------------------===//
2894
SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2897
TargetMachine &TM = getTargetMachine();
2899
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2900
SelectionDAG &DAG = DCI.DAG;
2901
SDValue Op0 = N->getOperand(0); // everything has at least one operand
2902
EVT NodeVT = N->getValueType(0); // The node's value type
2903
EVT Op0VT = Op0.getValueType(); // The first operand's result
2904
SDValue Result; // Initially, empty result
2905
DebugLoc dl = N->getDebugLoc();
2907
switch (N->getOpcode()) {
2910
SDValue Op1 = N->getOperand(1);
2912
if (Op0.getOpcode() == SPUISD::IndirectAddr
2913
|| Op1.getOpcode() == SPUISD::IndirectAddr) {
2914
// Normalize the operands to reduce repeated code
2915
SDValue IndirectArg = Op0, AddArg = Op1;
2917
if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2922
if (isa<ConstantSDNode>(AddArg)) {
2923
ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2924
SDValue IndOp1 = IndirectArg.getOperand(1);
2926
if (CN0->isNullValue()) {
2927
// (add (SPUindirect <arg>, <arg>), 0) ->
2928
// (SPUindirect <arg>, <arg>)
2930
#if !defined(NDEBUG)
2931
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2933
<< "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2934
<< "With: (SPUindirect <arg>, <arg>)\n";
2939
} else if (isa<ConstantSDNode>(IndOp1)) {
2940
// (add (SPUindirect <arg>, <const>), <const>) ->
2941
// (SPUindirect <arg>, <const + const>)
2942
ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2943
int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2944
SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2946
#if !defined(NDEBUG)
2947
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2949
<< "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2950
<< "), " << CN0->getSExtValue() << ")\n"
2951
<< "With: (SPUindirect <arg>, "
2952
<< combinedConst << ")\n";
2956
return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2957
IndirectArg, combinedValue);
2963
case ISD::SIGN_EXTEND:
2964
case ISD::ZERO_EXTEND:
2965
case ISD::ANY_EXTEND: {
2966
if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2967
// (any_extend (SPUextract_elt0 <arg>)) ->
2968
// (SPUextract_elt0 <arg>)
2969
// Types must match, however...
2970
#if !defined(NDEBUG)
2971
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2972
errs() << "\nReplace: ";
2974
errs() << "\nWith: ";
2975
Op0.getNode()->dump(&DAG);
2984
case SPUISD::IndirectAddr: {
2985
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2986
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2987
if (CN != 0 && CN->isNullValue()) {
2988
// (SPUindirect (SPUaform <addr>, 0), 0) ->
2989
// (SPUaform <addr>, 0)
2991
DEBUG(errs() << "Replace: ");
2992
DEBUG(N->dump(&DAG));
2993
DEBUG(errs() << "\nWith: ");
2994
DEBUG(Op0.getNode()->dump(&DAG));
2995
DEBUG(errs() << "\n");
2999
} else if (Op0.getOpcode() == ISD::ADD) {
3000
SDValue Op1 = N->getOperand(1);
3001
if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3002
// (SPUindirect (add <arg>, <arg>), 0) ->
3003
// (SPUindirect <arg>, <arg>)
3004
if (CN1->isNullValue()) {
3006
#if !defined(NDEBUG)
3007
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3009
<< "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3010
<< "With: (SPUindirect <arg>, <arg>)\n";
3014
return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3015
Op0.getOperand(0), Op0.getOperand(1));
3021
case SPUISD::SHL_BITS:
3022
case SPUISD::SHL_BYTES:
3023
case SPUISD::ROTBYTES_LEFT: {
3024
SDValue Op1 = N->getOperand(1);
3026
// Kill degenerate vector shifts:
3027
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3028
if (CN->isNullValue()) {
3034
case SPUISD::PREFSLOT2VEC: {
3035
switch (Op0.getOpcode()) {
3038
case ISD::ANY_EXTEND:
3039
case ISD::ZERO_EXTEND:
3040
case ISD::SIGN_EXTEND: {
3041
// (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3043
// but only if the SPUprefslot2vec and <arg> types match.
3044
SDValue Op00 = Op0.getOperand(0);
3045
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3046
SDValue Op000 = Op00.getOperand(0);
3047
if (Op000.getValueType() == NodeVT) {
3053
case SPUISD::VEC2PREFSLOT: {
3054
// (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3056
Result = Op0.getOperand(0);
3064
// Otherwise, return unchanged.
3066
if (Result.getNode()) {
3067
DEBUG(errs() << "\nReplace.SPU: ");
3068
DEBUG(N->dump(&DAG));
3069
DEBUG(errs() << "\nWith: ");
3070
DEBUG(Result.getNode()->dump(&DAG));
3071
DEBUG(errs() << "\n");
3078
//===----------------------------------------------------------------------===//
3079
// Inline Assembly Support
3080
//===----------------------------------------------------------------------===//
3082
/// getConstraintType - Given a constraint letter, return the type of
3083
/// constraint it is for this target.
3084
SPUTargetLowering::ConstraintType
3085
SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3086
if (ConstraintLetter.size() == 1) {
3087
switch (ConstraintLetter[0]) {
3094
return C_RegisterClass;
3097
return TargetLowering::getConstraintType(ConstraintLetter);
3100
/// Examine constraint type and operand type and determine a weight value.
3101
/// This object must already have been set up with the operand type
3102
/// and the current alternative constraint selected.
3103
TargetLowering::ConstraintWeight
3104
SPUTargetLowering::getSingleConstraintMatchWeight(
3105
AsmOperandInfo &info, const char *constraint) const {
3106
ConstraintWeight weight = CW_Invalid;
3107
Value *CallOperandVal = info.CallOperandVal;
3108
// If we don't have a value, we can't do a match,
3109
// but allow it at the lowest weight.
3110
if (CallOperandVal == NULL)
3112
// Look at the constraint type.
3113
switch (*constraint) {
3115
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3117
//FIXME: Seems like the supported constraint letters were just copied
3118
// from PPC, as the following doesn't correspond to the GCC docs.
3119
// I'm leaving it so until someone adds the corresponding lowering support.
3126
weight = CW_Register;
3132
std::pair<unsigned, const TargetRegisterClass*>
3133
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3136
if (Constraint.size() == 1) {
3137
// GCC RS6000 Constraint Letters
3138
switch (Constraint[0]) {
3142
return std::make_pair(0U, SPU::R64CRegisterClass);
3143
return std::make_pair(0U, SPU::R32CRegisterClass);
3146
return std::make_pair(0U, SPU::R32FPRegisterClass);
3147
else if (VT == MVT::f64)
3148
return std::make_pair(0U, SPU::R64FPRegisterClass);
3151
return std::make_pair(0U, SPU::GPRCRegisterClass);
3155
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3158
//! Compute used/known bits for a SPU operand
3160
SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3164
const SelectionDAG &DAG,
3165
unsigned Depth ) const {
3167
const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3169
switch (Op.getOpcode()) {
3171
// KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3177
case SPUISD::PREFSLOT2VEC:
3178
case SPUISD::LDRESULT:
3179
case SPUISD::VEC2PREFSLOT:
3180
case SPUISD::SHLQUAD_L_BITS:
3181
case SPUISD::SHLQUAD_L_BYTES:
3182
case SPUISD::VEC_ROTL:
3183
case SPUISD::VEC_ROTR:
3184
case SPUISD::ROTBYTES_LEFT:
3185
case SPUISD::SELECT_MASK:
3192
SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3193
unsigned Depth) const {
3194
switch (Op.getOpcode()) {
3199
EVT VT = Op.getValueType();
3201
if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3204
return VT.getSizeInBits();
3209
// LowerAsmOperandForConstraint
3211
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3212
std::string &Constraint,
3213
std::vector<SDValue> &Ops,
3214
SelectionDAG &DAG) const {
3215
// Default, for the time being, to the base class handler
3216
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3219
/// isLegalAddressImmediate - Return true if the integer value can be used
3220
/// as the offset of the target addressing mode.
3221
bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3223
// SPU's addresses are 256K:
3224
return (V > -(1 << 18) && V < (1 << 18) - 1);
3227
bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
3232
SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3233
// The SPU target isn't yet aware of offsets.
3237
// can we compare to Imm without writing it into a register?
3238
bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3239
//ceqi, cgti, etc. all take s10 operand
3240
return isInt<10>(Imm);
3244
SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3247
// A-form: 18bit absolute address.
3248
if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3251
// D-form: reg + 14bit offset
3252
if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3256
if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)