1
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This implements the TargetLowering class.
12
//===----------------------------------------------------------------------===//
14
#include "llvm/Target/TargetLowering.h"
15
#include "llvm/MC/MCAsmInfo.h"
16
#include "llvm/MC/MCExpr.h"
17
#include "llvm/Target/TargetData.h"
18
#include "llvm/Target/TargetLoweringObjectFile.h"
19
#include "llvm/Target/TargetMachine.h"
20
#include "llvm/Target/TargetRegisterInfo.h"
21
#include "llvm/Target/TargetSubtarget.h"
22
#include "llvm/GlobalVariable.h"
23
#include "llvm/DerivedTypes.h"
24
#include "llvm/CodeGen/MachineFrameInfo.h"
25
#include "llvm/CodeGen/MachineJumpTableInfo.h"
26
#include "llvm/CodeGen/MachineFunction.h"
27
#include "llvm/CodeGen/SelectionDAG.h"
28
#include "llvm/ADT/STLExtras.h"
29
#include "llvm/Support/ErrorHandling.h"
30
#include "llvm/Support/MathExtras.h"
34
TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
35
bool isLocal = GV->hasLocalLinkage();
36
bool isDeclaration = GV->isDeclaration();
37
// FIXME: what should we do for protected and internal visibility?
38
// For variables, is internal different from hidden?
39
bool isHidden = GV->hasHiddenVisibility();
41
if (reloc == Reloc::PIC_) {
42
if (isLocal || isHidden)
43
return TLSModel::LocalDynamic;
45
return TLSModel::GeneralDynamic;
47
if (!isDeclaration || isHidden)
48
return TLSModel::LocalExec;
50
return TLSModel::InitialExec;
55
/// InitLibcallNames - Set default libcall names.
57
static void InitLibcallNames(const char **Names) {
58
Names[RTLIB::SHL_I16] = "__ashlhi3";
59
Names[RTLIB::SHL_I32] = "__ashlsi3";
60
Names[RTLIB::SHL_I64] = "__ashldi3";
61
Names[RTLIB::SHL_I128] = "__ashlti3";
62
Names[RTLIB::SRL_I16] = "__lshrhi3";
63
Names[RTLIB::SRL_I32] = "__lshrsi3";
64
Names[RTLIB::SRL_I64] = "__lshrdi3";
65
Names[RTLIB::SRL_I128] = "__lshrti3";
66
Names[RTLIB::SRA_I16] = "__ashrhi3";
67
Names[RTLIB::SRA_I32] = "__ashrsi3";
68
Names[RTLIB::SRA_I64] = "__ashrdi3";
69
Names[RTLIB::SRA_I128] = "__ashrti3";
70
Names[RTLIB::MUL_I8] = "__mulqi3";
71
Names[RTLIB::MUL_I16] = "__mulhi3";
72
Names[RTLIB::MUL_I32] = "__mulsi3";
73
Names[RTLIB::MUL_I64] = "__muldi3";
74
Names[RTLIB::MUL_I128] = "__multi3";
75
Names[RTLIB::SDIV_I8] = "__divqi3";
76
Names[RTLIB::SDIV_I16] = "__divhi3";
77
Names[RTLIB::SDIV_I32] = "__divsi3";
78
Names[RTLIB::SDIV_I64] = "__divdi3";
79
Names[RTLIB::SDIV_I128] = "__divti3";
80
Names[RTLIB::UDIV_I8] = "__udivqi3";
81
Names[RTLIB::UDIV_I16] = "__udivhi3";
82
Names[RTLIB::UDIV_I32] = "__udivsi3";
83
Names[RTLIB::UDIV_I64] = "__udivdi3";
84
Names[RTLIB::UDIV_I128] = "__udivti3";
85
Names[RTLIB::SREM_I8] = "__modqi3";
86
Names[RTLIB::SREM_I16] = "__modhi3";
87
Names[RTLIB::SREM_I32] = "__modsi3";
88
Names[RTLIB::SREM_I64] = "__moddi3";
89
Names[RTLIB::SREM_I128] = "__modti3";
90
Names[RTLIB::UREM_I8] = "__umodqi3";
91
Names[RTLIB::UREM_I16] = "__umodhi3";
92
Names[RTLIB::UREM_I32] = "__umodsi3";
93
Names[RTLIB::UREM_I64] = "__umoddi3";
94
Names[RTLIB::UREM_I128] = "__umodti3";
95
Names[RTLIB::NEG_I32] = "__negsi2";
96
Names[RTLIB::NEG_I64] = "__negdi2";
97
Names[RTLIB::ADD_F32] = "__addsf3";
98
Names[RTLIB::ADD_F64] = "__adddf3";
99
Names[RTLIB::ADD_F80] = "__addxf3";
100
Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
101
Names[RTLIB::SUB_F32] = "__subsf3";
102
Names[RTLIB::SUB_F64] = "__subdf3";
103
Names[RTLIB::SUB_F80] = "__subxf3";
104
Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
105
Names[RTLIB::MUL_F32] = "__mulsf3";
106
Names[RTLIB::MUL_F64] = "__muldf3";
107
Names[RTLIB::MUL_F80] = "__mulxf3";
108
Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
109
Names[RTLIB::DIV_F32] = "__divsf3";
110
Names[RTLIB::DIV_F64] = "__divdf3";
111
Names[RTLIB::DIV_F80] = "__divxf3";
112
Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
113
Names[RTLIB::REM_F32] = "fmodf";
114
Names[RTLIB::REM_F64] = "fmod";
115
Names[RTLIB::REM_F80] = "fmodl";
116
Names[RTLIB::REM_PPCF128] = "fmodl";
117
Names[RTLIB::POWI_F32] = "__powisf2";
118
Names[RTLIB::POWI_F64] = "__powidf2";
119
Names[RTLIB::POWI_F80] = "__powixf2";
120
Names[RTLIB::POWI_PPCF128] = "__powitf2";
121
Names[RTLIB::SQRT_F32] = "sqrtf";
122
Names[RTLIB::SQRT_F64] = "sqrt";
123
Names[RTLIB::SQRT_F80] = "sqrtl";
124
Names[RTLIB::SQRT_PPCF128] = "sqrtl";
125
Names[RTLIB::LOG_F32] = "logf";
126
Names[RTLIB::LOG_F64] = "log";
127
Names[RTLIB::LOG_F80] = "logl";
128
Names[RTLIB::LOG_PPCF128] = "logl";
129
Names[RTLIB::LOG2_F32] = "log2f";
130
Names[RTLIB::LOG2_F64] = "log2";
131
Names[RTLIB::LOG2_F80] = "log2l";
132
Names[RTLIB::LOG2_PPCF128] = "log2l";
133
Names[RTLIB::LOG10_F32] = "log10f";
134
Names[RTLIB::LOG10_F64] = "log10";
135
Names[RTLIB::LOG10_F80] = "log10l";
136
Names[RTLIB::LOG10_PPCF128] = "log10l";
137
Names[RTLIB::EXP_F32] = "expf";
138
Names[RTLIB::EXP_F64] = "exp";
139
Names[RTLIB::EXP_F80] = "expl";
140
Names[RTLIB::EXP_PPCF128] = "expl";
141
Names[RTLIB::EXP2_F32] = "exp2f";
142
Names[RTLIB::EXP2_F64] = "exp2";
143
Names[RTLIB::EXP2_F80] = "exp2l";
144
Names[RTLIB::EXP2_PPCF128] = "exp2l";
145
Names[RTLIB::SIN_F32] = "sinf";
146
Names[RTLIB::SIN_F64] = "sin";
147
Names[RTLIB::SIN_F80] = "sinl";
148
Names[RTLIB::SIN_PPCF128] = "sinl";
149
Names[RTLIB::COS_F32] = "cosf";
150
Names[RTLIB::COS_F64] = "cos";
151
Names[RTLIB::COS_F80] = "cosl";
152
Names[RTLIB::COS_PPCF128] = "cosl";
153
Names[RTLIB::POW_F32] = "powf";
154
Names[RTLIB::POW_F64] = "pow";
155
Names[RTLIB::POW_F80] = "powl";
156
Names[RTLIB::POW_PPCF128] = "powl";
157
Names[RTLIB::CEIL_F32] = "ceilf";
158
Names[RTLIB::CEIL_F64] = "ceil";
159
Names[RTLIB::CEIL_F80] = "ceill";
160
Names[RTLIB::CEIL_PPCF128] = "ceill";
161
Names[RTLIB::TRUNC_F32] = "truncf";
162
Names[RTLIB::TRUNC_F64] = "trunc";
163
Names[RTLIB::TRUNC_F80] = "truncl";
164
Names[RTLIB::TRUNC_PPCF128] = "truncl";
165
Names[RTLIB::RINT_F32] = "rintf";
166
Names[RTLIB::RINT_F64] = "rint";
167
Names[RTLIB::RINT_F80] = "rintl";
168
Names[RTLIB::RINT_PPCF128] = "rintl";
169
Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
170
Names[RTLIB::NEARBYINT_F64] = "nearbyint";
171
Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
172
Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
173
Names[RTLIB::FLOOR_F32] = "floorf";
174
Names[RTLIB::FLOOR_F64] = "floor";
175
Names[RTLIB::FLOOR_F80] = "floorl";
176
Names[RTLIB::FLOOR_PPCF128] = "floorl";
177
Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
178
Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
179
Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
180
Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
181
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
182
Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
183
Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
184
Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
185
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
186
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
187
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
188
Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
189
Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
190
Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
191
Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
192
Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
193
Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
194
Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
195
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
196
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
197
Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
198
Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
199
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
200
Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
201
Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
202
Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
203
Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
204
Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
205
Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
206
Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
207
Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
208
Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
209
Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
210
Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
211
Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
212
Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
213
Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
214
Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
215
Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
216
Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
217
Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
218
Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
219
Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
220
Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
221
Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
222
Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
223
Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
224
Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
225
Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
226
Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
227
Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
228
Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
229
Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
230
Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
231
Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
232
Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
233
Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
234
Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
235
Names[RTLIB::OEQ_F32] = "__eqsf2";
236
Names[RTLIB::OEQ_F64] = "__eqdf2";
237
Names[RTLIB::UNE_F32] = "__nesf2";
238
Names[RTLIB::UNE_F64] = "__nedf2";
239
Names[RTLIB::OGE_F32] = "__gesf2";
240
Names[RTLIB::OGE_F64] = "__gedf2";
241
Names[RTLIB::OLT_F32] = "__ltsf2";
242
Names[RTLIB::OLT_F64] = "__ltdf2";
243
Names[RTLIB::OLE_F32] = "__lesf2";
244
Names[RTLIB::OLE_F64] = "__ledf2";
245
Names[RTLIB::OGT_F32] = "__gtsf2";
246
Names[RTLIB::OGT_F64] = "__gtdf2";
247
Names[RTLIB::UO_F32] = "__unordsf2";
248
Names[RTLIB::UO_F64] = "__unorddf2";
249
Names[RTLIB::O_F32] = "__unordsf2";
250
Names[RTLIB::O_F64] = "__unorddf2";
251
Names[RTLIB::MEMCPY] = "memcpy";
252
Names[RTLIB::MEMMOVE] = "memmove";
253
Names[RTLIB::MEMSET] = "memset";
254
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
257
/// InitLibcallCallingConvs - Set default libcall CallingConvs.
259
static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
260
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
261
CCs[i] = CallingConv::C;
265
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
266
/// UNKNOWN_LIBCALL if there is none.
267
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
268
if (OpVT == MVT::f32) {
269
if (RetVT == MVT::f64)
270
return FPEXT_F32_F64;
272
return UNKNOWN_LIBCALL;
275
/// getFPROUND - Return the FPROUND_*_* value for the given types, or
276
/// UNKNOWN_LIBCALL if there is none.
277
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
278
if (RetVT == MVT::f32) {
279
if (OpVT == MVT::f64)
280
return FPROUND_F64_F32;
281
if (OpVT == MVT::f80)
282
return FPROUND_F80_F32;
283
if (OpVT == MVT::ppcf128)
284
return FPROUND_PPCF128_F32;
285
} else if (RetVT == MVT::f64) {
286
if (OpVT == MVT::f80)
287
return FPROUND_F80_F64;
288
if (OpVT == MVT::ppcf128)
289
return FPROUND_PPCF128_F64;
291
return UNKNOWN_LIBCALL;
294
/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
295
/// UNKNOWN_LIBCALL if there is none.
296
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
297
if (OpVT == MVT::f32) {
298
if (RetVT == MVT::i8)
299
return FPTOSINT_F32_I8;
300
if (RetVT == MVT::i16)
301
return FPTOSINT_F32_I16;
302
if (RetVT == MVT::i32)
303
return FPTOSINT_F32_I32;
304
if (RetVT == MVT::i64)
305
return FPTOSINT_F32_I64;
306
if (RetVT == MVT::i128)
307
return FPTOSINT_F32_I128;
308
} else if (OpVT == MVT::f64) {
309
if (RetVT == MVT::i32)
310
return FPTOSINT_F64_I32;
311
if (RetVT == MVT::i64)
312
return FPTOSINT_F64_I64;
313
if (RetVT == MVT::i128)
314
return FPTOSINT_F64_I128;
315
} else if (OpVT == MVT::f80) {
316
if (RetVT == MVT::i32)
317
return FPTOSINT_F80_I32;
318
if (RetVT == MVT::i64)
319
return FPTOSINT_F80_I64;
320
if (RetVT == MVT::i128)
321
return FPTOSINT_F80_I128;
322
} else if (OpVT == MVT::ppcf128) {
323
if (RetVT == MVT::i32)
324
return FPTOSINT_PPCF128_I32;
325
if (RetVT == MVT::i64)
326
return FPTOSINT_PPCF128_I64;
327
if (RetVT == MVT::i128)
328
return FPTOSINT_PPCF128_I128;
330
return UNKNOWN_LIBCALL;
333
/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
334
/// UNKNOWN_LIBCALL if there is none.
335
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
336
if (OpVT == MVT::f32) {
337
if (RetVT == MVT::i8)
338
return FPTOUINT_F32_I8;
339
if (RetVT == MVT::i16)
340
return FPTOUINT_F32_I16;
341
if (RetVT == MVT::i32)
342
return FPTOUINT_F32_I32;
343
if (RetVT == MVT::i64)
344
return FPTOUINT_F32_I64;
345
if (RetVT == MVT::i128)
346
return FPTOUINT_F32_I128;
347
} else if (OpVT == MVT::f64) {
348
if (RetVT == MVT::i32)
349
return FPTOUINT_F64_I32;
350
if (RetVT == MVT::i64)
351
return FPTOUINT_F64_I64;
352
if (RetVT == MVT::i128)
353
return FPTOUINT_F64_I128;
354
} else if (OpVT == MVT::f80) {
355
if (RetVT == MVT::i32)
356
return FPTOUINT_F80_I32;
357
if (RetVT == MVT::i64)
358
return FPTOUINT_F80_I64;
359
if (RetVT == MVT::i128)
360
return FPTOUINT_F80_I128;
361
} else if (OpVT == MVT::ppcf128) {
362
if (RetVT == MVT::i32)
363
return FPTOUINT_PPCF128_I32;
364
if (RetVT == MVT::i64)
365
return FPTOUINT_PPCF128_I64;
366
if (RetVT == MVT::i128)
367
return FPTOUINT_PPCF128_I128;
369
return UNKNOWN_LIBCALL;
372
/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
373
/// UNKNOWN_LIBCALL if there is none.
374
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
375
if (OpVT == MVT::i32) {
376
if (RetVT == MVT::f32)
377
return SINTTOFP_I32_F32;
378
else if (RetVT == MVT::f64)
379
return SINTTOFP_I32_F64;
380
else if (RetVT == MVT::f80)
381
return SINTTOFP_I32_F80;
382
else if (RetVT == MVT::ppcf128)
383
return SINTTOFP_I32_PPCF128;
384
} else if (OpVT == MVT::i64) {
385
if (RetVT == MVT::f32)
386
return SINTTOFP_I64_F32;
387
else if (RetVT == MVT::f64)
388
return SINTTOFP_I64_F64;
389
else if (RetVT == MVT::f80)
390
return SINTTOFP_I64_F80;
391
else if (RetVT == MVT::ppcf128)
392
return SINTTOFP_I64_PPCF128;
393
} else if (OpVT == MVT::i128) {
394
if (RetVT == MVT::f32)
395
return SINTTOFP_I128_F32;
396
else if (RetVT == MVT::f64)
397
return SINTTOFP_I128_F64;
398
else if (RetVT == MVT::f80)
399
return SINTTOFP_I128_F80;
400
else if (RetVT == MVT::ppcf128)
401
return SINTTOFP_I128_PPCF128;
403
return UNKNOWN_LIBCALL;
406
/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
407
/// UNKNOWN_LIBCALL if there is none.
408
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
409
if (OpVT == MVT::i32) {
410
if (RetVT == MVT::f32)
411
return UINTTOFP_I32_F32;
412
else if (RetVT == MVT::f64)
413
return UINTTOFP_I32_F64;
414
else if (RetVT == MVT::f80)
415
return UINTTOFP_I32_F80;
416
else if (RetVT == MVT::ppcf128)
417
return UINTTOFP_I32_PPCF128;
418
} else if (OpVT == MVT::i64) {
419
if (RetVT == MVT::f32)
420
return UINTTOFP_I64_F32;
421
else if (RetVT == MVT::f64)
422
return UINTTOFP_I64_F64;
423
else if (RetVT == MVT::f80)
424
return UINTTOFP_I64_F80;
425
else if (RetVT == MVT::ppcf128)
426
return UINTTOFP_I64_PPCF128;
427
} else if (OpVT == MVT::i128) {
428
if (RetVT == MVT::f32)
429
return UINTTOFP_I128_F32;
430
else if (RetVT == MVT::f64)
431
return UINTTOFP_I128_F64;
432
else if (RetVT == MVT::f80)
433
return UINTTOFP_I128_F80;
434
else if (RetVT == MVT::ppcf128)
435
return UINTTOFP_I128_PPCF128;
437
return UNKNOWN_LIBCALL;
440
/// InitCmpLibcallCCs - Set default comparison libcall CC.
442
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
443
memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
444
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
445
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
446
CCs[RTLIB::UNE_F32] = ISD::SETNE;
447
CCs[RTLIB::UNE_F64] = ISD::SETNE;
448
CCs[RTLIB::OGE_F32] = ISD::SETGE;
449
CCs[RTLIB::OGE_F64] = ISD::SETGE;
450
CCs[RTLIB::OLT_F32] = ISD::SETLT;
451
CCs[RTLIB::OLT_F64] = ISD::SETLT;
452
CCs[RTLIB::OLE_F32] = ISD::SETLE;
453
CCs[RTLIB::OLE_F64] = ISD::SETLE;
454
CCs[RTLIB::OGT_F32] = ISD::SETGT;
455
CCs[RTLIB::OGT_F64] = ISD::SETGT;
456
CCs[RTLIB::UO_F32] = ISD::SETNE;
457
CCs[RTLIB::UO_F64] = ISD::SETNE;
458
CCs[RTLIB::O_F32] = ISD::SETEQ;
459
CCs[RTLIB::O_F64] = ISD::SETEQ;
462
/// NOTE: The constructor takes ownership of TLOF.
463
TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
464
: TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
465
// All operations default to being supported.
466
memset(OpActions, 0, sizeof(OpActions));
467
memset(LoadExtActions, 0, sizeof(LoadExtActions));
468
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
469
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
470
memset(ConvertActions, 0, sizeof(ConvertActions));
471
memset(CondCodeActions, 0, sizeof(CondCodeActions));
473
// Set default actions for various operations.
474
for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
475
// Default all indexed load / store to expand.
476
for (unsigned IM = (unsigned)ISD::PRE_INC;
477
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
478
setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
479
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
482
// These operations default to expand.
483
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
484
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
487
// Most targets ignore the @llvm.prefetch intrinsic.
488
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
490
// ConstantFP nodes default to expand. Targets can either change this to
491
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
492
// to optimize expansions for certain constants.
493
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
494
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
495
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
497
// These library functions default to expand.
498
setOperationAction(ISD::FLOG , MVT::f64, Expand);
499
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
500
setOperationAction(ISD::FLOG10,MVT::f64, Expand);
501
setOperationAction(ISD::FEXP , MVT::f64, Expand);
502
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
503
setOperationAction(ISD::FLOG , MVT::f32, Expand);
504
setOperationAction(ISD::FLOG2, MVT::f32, Expand);
505
setOperationAction(ISD::FLOG10,MVT::f32, Expand);
506
setOperationAction(ISD::FEXP , MVT::f32, Expand);
507
setOperationAction(ISD::FEXP2, MVT::f32, Expand);
509
// Default ISD::TRAP to expand (which turns it into abort).
510
setOperationAction(ISD::TRAP, MVT::Other, Expand);
512
IsLittleEndian = TD->isLittleEndian();
513
ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
514
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
515
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
516
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
517
benefitFromCodePlacementOpt = false;
518
UseUnderscoreSetJmp = false;
519
UseUnderscoreLongJmp = false;
520
SelectIsExpensive = false;
521
IntDivIsCheap = false;
522
Pow2DivIsCheap = false;
523
StackPointerRegisterToSaveRestore = 0;
524
ExceptionPointerRegister = 0;
525
ExceptionSelectorRegister = 0;
526
BooleanContents = UndefinedBooleanContent;
527
SchedPreferenceInfo = SchedulingForLatency;
529
JumpBufAlignment = 0;
530
IfCvtBlockSizeLimit = 2;
531
IfCvtDupBlockSizeLimit = 0;
532
PrefLoopAlignment = 0;
534
InitLibcallNames(LibcallRoutineNames);
535
InitCmpLibcallCCs(CmpLibcallCCs);
536
InitLibcallCallingConvs(LibcallCallingConvs);
539
TargetLowering::~TargetLowering() {
543
/// canOpTrap - Returns true if the operation can trap for the value type.
544
/// VT must be a legal type.
545
bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
546
assert(isTypeLegal(VT));
561
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
562
unsigned &NumIntermediates,
564
TargetLowering* TLI) {
565
// Figure out the right, legal destination reg to copy into.
566
unsigned NumElts = VT.getVectorNumElements();
567
MVT EltTy = VT.getVectorElementType();
569
unsigned NumVectorRegs = 1;
571
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
572
// could break down into LHS/RHS like LegalizeDAG does.
573
if (!isPowerOf2_32(NumElts)) {
574
NumVectorRegs = NumElts;
578
// Divide the input until we get to a supported size. This will always
579
// end with a scalar if the target doesn't support vectors.
580
while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
585
NumIntermediates = NumVectorRegs;
587
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
588
if (!TLI->isTypeLegal(NewVT))
590
IntermediateVT = NewVT;
592
EVT DestVT = TLI->getRegisterType(NewVT);
594
if (EVT(DestVT).bitsLT(NewVT)) {
595
// Value is expanded, e.g. i64 -> i16.
596
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
598
// Otherwise, promotion or legal types use the same number of registers as
599
// the vector decimated to the appropriate level.
600
return NumVectorRegs;
606
/// computeRegisterProperties - Once all of the register classes are added,
607
/// this allows us to compute derived properties we expose.
608
void TargetLowering::computeRegisterProperties() {
609
assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
610
"Too many value types for ValueTypeActions to hold!");
612
// Everything defaults to needing one register.
613
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
614
NumRegistersForVT[i] = 1;
615
RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
617
// ...except isVoid, which doesn't need any registers.
618
NumRegistersForVT[MVT::isVoid] = 0;
620
// Find the largest integer register class.
621
unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
622
for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
623
assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
625
// Every integer value type larger than this largest register takes twice as
626
// many registers to represent as the previous ValueType.
627
for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
628
EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
629
if (!ExpandedVT.isInteger())
631
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
632
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
633
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
634
ValueTypeActions.setTypeAction(ExpandedVT, Expand);
637
// Inspect all of the ValueType's smaller than the largest integer
638
// register to see which ones need promotion.
639
unsigned LegalIntReg = LargestIntReg;
640
for (unsigned IntReg = LargestIntReg - 1;
641
IntReg >= (unsigned)MVT::i1; --IntReg) {
642
EVT IVT = (MVT::SimpleValueType)IntReg;
643
if (isTypeLegal(IVT)) {
644
LegalIntReg = IntReg;
646
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
647
(MVT::SimpleValueType)LegalIntReg;
648
ValueTypeActions.setTypeAction(IVT, Promote);
652
// ppcf128 type is really two f64's.
653
if (!isTypeLegal(MVT::ppcf128)) {
654
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
655
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
656
TransformToType[MVT::ppcf128] = MVT::f64;
657
ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
660
// Decide how to handle f64. If the target does not have native f64 support,
661
// expand it to i64 and we will be generating soft float library calls.
662
if (!isTypeLegal(MVT::f64)) {
663
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
664
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
665
TransformToType[MVT::f64] = MVT::i64;
666
ValueTypeActions.setTypeAction(MVT::f64, Expand);
669
// Decide how to handle f32. If the target does not have native support for
670
// f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
671
if (!isTypeLegal(MVT::f32)) {
672
if (isTypeLegal(MVT::f64)) {
673
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
674
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
675
TransformToType[MVT::f32] = MVT::f64;
676
ValueTypeActions.setTypeAction(MVT::f32, Promote);
678
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
679
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
680
TransformToType[MVT::f32] = MVT::i32;
681
ValueTypeActions.setTypeAction(MVT::f32, Expand);
685
// Loop over all of the vector value types to see which need transformations.
686
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
687
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
688
MVT VT = (MVT::SimpleValueType)i;
689
if (!isTypeLegal(VT)) {
692
unsigned NumIntermediates;
693
NumRegistersForVT[i] =
694
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
696
RegisterTypeForVT[i] = RegisterVT;
698
// Determine if there is a legal wider type.
699
bool IsLegalWiderType = false;
700
EVT EltVT = VT.getVectorElementType();
701
unsigned NElts = VT.getVectorNumElements();
702
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
703
EVT SVT = (MVT::SimpleValueType)nVT;
704
if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
705
SVT.getVectorNumElements() > NElts && NElts != 1) {
706
TransformToType[i] = SVT;
707
ValueTypeActions.setTypeAction(VT, Promote);
708
IsLegalWiderType = true;
712
if (!IsLegalWiderType) {
713
EVT NVT = VT.getPow2VectorType();
715
// Type is already a power of 2. The default action is to split.
716
TransformToType[i] = MVT::Other;
717
ValueTypeActions.setTypeAction(VT, Expand);
719
TransformToType[i] = NVT;
720
ValueTypeActions.setTypeAction(VT, Promote);
727
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
732
MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
733
return PointerTy.SimpleTy;
736
MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
737
return MVT::i32; // return the default value
740
/// getVectorTypeBreakdown - Vector types are broken down into some number of
741
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
742
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
743
/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
745
/// This method returns the number of registers needed, and the VT for each
746
/// register. It also returns the VT and quantity of the intermediate values
747
/// before they are promoted/expanded.
749
unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
751
unsigned &NumIntermediates,
752
EVT &RegisterVT) const {
753
// Figure out the right, legal destination reg to copy into.
754
unsigned NumElts = VT.getVectorNumElements();
755
EVT EltTy = VT.getVectorElementType();
757
unsigned NumVectorRegs = 1;
759
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
760
// could break down into LHS/RHS like LegalizeDAG does.
761
if (!isPowerOf2_32(NumElts)) {
762
NumVectorRegs = NumElts;
766
// Divide the input until we get to a supported size. This will always
767
// end with a scalar if the target doesn't support vectors.
768
while (NumElts > 1 && !isTypeLegal(
769
EVT::getVectorVT(Context, EltTy, NumElts))) {
774
NumIntermediates = NumVectorRegs;
776
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
777
if (!isTypeLegal(NewVT))
779
IntermediateVT = NewVT;
781
EVT DestVT = getRegisterType(Context, NewVT);
783
if (DestVT.bitsLT(NewVT)) {
784
// Value is expanded, e.g. i64 -> i16.
785
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
787
// Otherwise, promotion or legal types use the same number of registers as
788
// the vector decimated to the appropriate level.
789
return NumVectorRegs;
795
/// getWidenVectorType: given a vector type, returns the type to widen to
796
/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
797
/// If there is no vector type that we want to widen to, returns MVT::Other
798
/// When and where to widen is target dependent based on the cost of
799
/// scalarizing vs using the wider vector type.
800
EVT TargetLowering::getWidenVectorType(EVT VT) const {
801
assert(VT.isVector());
805
// Default is not to widen until moved to LegalizeTypes
809
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
810
/// function arguments in the caller parameter area. This is the actual
811
/// alignment, not its logarithm.
812
unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
813
return TD->getCallFrameTypeAlignment(Ty);
816
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
817
/// current function. The returned value is a member of the
818
/// MachineJumpTableInfo::JTEntryKind enum.
819
unsigned TargetLowering::getJumpTableEncoding() const {
820
// In non-pic modes, just use the address of a block.
821
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
822
return MachineJumpTableInfo::EK_BlockAddress;
824
// In PIC mode, if the target supports a GPRel32 directive, use it.
825
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
826
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
828
// Otherwise, use a label difference.
829
return MachineJumpTableInfo::EK_LabelDifference32;
832
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
833
SelectionDAG &DAG) const {
834
// If our PIC model is GP relative, use the global offset table as the base.
835
if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
836
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
840
/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
841
/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
844
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
845
unsigned JTI,MCContext &Ctx) const{
846
// The normal PIC reloc base is the label at the start of the jump table.
847
return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
851
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
852
// Assume that everything is safe in static mode.
853
if (getTargetMachine().getRelocationModel() == Reloc::Static)
856
// In dynamic-no-pic mode, assume that known defined values are safe.
857
if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
859
!GA->getGlobal()->isDeclaration() &&
860
!GA->getGlobal()->isWeakForLinker())
863
// Otherwise assume nothing is safe.
867
//===----------------------------------------------------------------------===//
868
// Optimization Methods
869
//===----------------------------------------------------------------------===//
871
/// ShrinkDemandedConstant - Check to see if the specified operand of the
872
/// specified instruction is a constant integer. If so, check to see if there
873
/// are any bits set in the constant that are not demanded. If so, shrink the
874
/// constant and return true.
875
bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
876
const APInt &Demanded) {
877
DebugLoc dl = Op.getDebugLoc();
879
// FIXME: ISD::SELECT, ISD::SELECT_CC
880
switch (Op.getOpcode()) {
885
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
886
if (!C) return false;
888
if (Op.getOpcode() == ISD::XOR &&
889
(C->getAPIntValue() | (~Demanded)).isAllOnesValue())
892
// if we can expand it to have all bits set, do it
893
if (C->getAPIntValue().intersects(~Demanded)) {
894
EVT VT = Op.getValueType();
895
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
896
DAG.getConstant(Demanded &
899
return CombineTo(Op, New);
909
/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
910
/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
911
/// cast, but it could be generalized for targets with other types of
912
/// implicit widening casts.
914
TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
916
const APInt &Demanded,
918
assert(Op.getNumOperands() == 2 &&
919
"ShrinkDemandedOp only supports binary operators!");
920
assert(Op.getNode()->getNumValues() == 1 &&
921
"ShrinkDemandedOp only supports nodes with one result!");
923
// Don't do this if the node has another user, which may require the
925
if (!Op.getNode()->hasOneUse())
928
// Search for the smallest integer type with free casts to and from
929
// Op's type. For expedience, just check power-of-2 integer types.
930
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
931
unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
932
if (!isPowerOf2_32(SmallVTBits))
933
SmallVTBits = NextPowerOf2(SmallVTBits);
934
for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
935
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
936
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
937
TLI.isZExtFree(SmallVT, Op.getValueType())) {
938
// We found a type with free casts.
939
SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
940
DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
941
Op.getNode()->getOperand(0)),
942
DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
943
Op.getNode()->getOperand(1)));
944
SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
945
return CombineTo(Op, Z);
951
/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
952
/// DemandedMask bits of the result of Op are ever used downstream. If we can
953
/// use this information to simplify Op, create a new simplified DAG node and
954
/// return true, returning the original and new nodes in Old and New. Otherwise,
955
/// analyze the expression and return a mask of KnownOne and KnownZero bits for
956
/// the expression (used to simplify the caller). The KnownZero/One bits may
957
/// only be accurate for those bits in the DemandedMask.
958
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
959
const APInt &DemandedMask,
962
TargetLoweringOpt &TLO,
963
unsigned Depth) const {
964
unsigned BitWidth = DemandedMask.getBitWidth();
965
assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
966
"Mask size mismatches value type size!");
967
APInt NewMask = DemandedMask;
968
DebugLoc dl = Op.getDebugLoc();
970
// Don't know anything.
971
KnownZero = KnownOne = APInt(BitWidth, 0);
973
// Other users may use these bits.
974
if (!Op.getNode()->hasOneUse()) {
976
// If not at the root, Just compute the KnownZero/KnownOne bits to
977
// simplify things downstream.
978
TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
981
// If this is the root being simplified, allow it to have multiple uses,
982
// just set the NewMask to all bits.
983
NewMask = APInt::getAllOnesValue(BitWidth);
984
} else if (DemandedMask == 0) {
985
// Not demanding any bits from Op.
986
if (Op.getOpcode() != ISD::UNDEF)
987
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
989
} else if (Depth == 6) { // Limit search depth.
993
APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
994
switch (Op.getOpcode()) {
996
// We know all of the bits for a constant!
997
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
998
KnownZero = ~KnownOne & NewMask;
999
return false; // Don't fall through, will infinitely loop.
1001
// If the RHS is a constant, check to see if the LHS would be zero without
1002
// using the bits from the RHS. Below, we use knowledge about the RHS to
1003
// simplify the LHS, here we're using information from the LHS to simplify
1005
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1006
APInt LHSZero, LHSOne;
1007
TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
1008
LHSZero, LHSOne, Depth+1);
1009
// If the LHS already has zeros where RHSC does, this and is dead.
1010
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
1011
return TLO.CombineTo(Op, Op.getOperand(0));
1012
// If any of the set bits in the RHS are known zero on the LHS, shrink
1014
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
1018
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
1019
KnownOne, TLO, Depth+1))
1021
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1022
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
1023
KnownZero2, KnownOne2, TLO, Depth+1))
1025
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
1027
// If all of the demanded bits are known one on one side, return the other.
1028
// These bits cannot contribute to the result of the 'and'.
1029
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
1030
return TLO.CombineTo(Op, Op.getOperand(0));
1031
if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
1032
return TLO.CombineTo(Op, Op.getOperand(1));
1033
// If all of the demanded bits in the inputs are known zeros, return zero.
1034
if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
1035
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
1036
// If the RHS is a constant, see if we can simplify it.
1037
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
1039
// If the operation can be done in a smaller type, do so.
1040
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1043
// Output known-1 bits are only known if set in both the LHS & RHS.
1044
KnownOne &= KnownOne2;
1045
// Output known-0 are known to be clear if zero in either the LHS | RHS.
1046
KnownZero |= KnownZero2;
1049
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
1050
KnownOne, TLO, Depth+1))
1052
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1053
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
1054
KnownZero2, KnownOne2, TLO, Depth+1))
1056
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
1058
// If all of the demanded bits are known zero on one side, return the other.
1059
// These bits cannot contribute to the result of the 'or'.
1060
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
1061
return TLO.CombineTo(Op, Op.getOperand(0));
1062
if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
1063
return TLO.CombineTo(Op, Op.getOperand(1));
1064
// If all of the potentially set bits on one side are known to be set on
1065
// the other side, just use the 'other' side.
1066
if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
1067
return TLO.CombineTo(Op, Op.getOperand(0));
1068
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
1069
return TLO.CombineTo(Op, Op.getOperand(1));
1070
// If the RHS is a constant, see if we can simplify it.
1071
if (TLO.ShrinkDemandedConstant(Op, NewMask))
1073
// If the operation can be done in a smaller type, do so.
1074
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1077
// Output known-0 bits are only known if clear in both the LHS & RHS.
1078
KnownZero &= KnownZero2;
1079
// Output known-1 are known to be set if set in either the LHS | RHS.
1080
KnownOne |= KnownOne2;
1083
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
1084
KnownOne, TLO, Depth+1))
1086
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1087
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
1088
KnownOne2, TLO, Depth+1))
1090
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
1092
// If all of the demanded bits are known zero on one side, return the other.
1093
// These bits cannot contribute to the result of the 'xor'.
1094
if ((KnownZero & NewMask) == NewMask)
1095
return TLO.CombineTo(Op, Op.getOperand(0));
1096
if ((KnownZero2 & NewMask) == NewMask)
1097
return TLO.CombineTo(Op, Op.getOperand(1));
1098
// If the operation can be done in a smaller type, do so.
1099
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1102
// If all of the unknown bits are known to be zero on one side or the other
1103
// (but not both) turn this into an *inclusive* or.
1104
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1105
if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
1106
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
1110
// Output known-0 bits are known if clear or set in both the LHS & RHS.
1111
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
1112
// Output known-1 are known to be set if set in only one of the LHS, RHS.
1113
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
1115
// If all of the demanded bits on one side are known, and all of the set
1116
// bits on that side are also known to be set on the other side, turn this
1117
// into an AND, as we know the bits will be cleared.
1118
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1119
if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
1120
if ((KnownOne & KnownOne2) == KnownOne) {
1121
EVT VT = Op.getValueType();
1122
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
1123
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
1124
Op.getOperand(0), ANDC));
1128
// If the RHS is a constant, see if we can simplify it.
1129
// for XOR, we prefer to force bits to 1 if they will make a -1.
1130
// if we can't force bits, try to shrink constant
1131
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1132
APInt Expanded = C->getAPIntValue() | (~NewMask);
1133
// if we can expand it to have all bits set, do it
1134
if (Expanded.isAllOnesValue()) {
1135
if (Expanded != C->getAPIntValue()) {
1136
EVT VT = Op.getValueType();
1137
SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
1138
TLO.DAG.getConstant(Expanded, VT));
1139
return TLO.CombineTo(Op, New);
1141
// if it already has all the bits set, nothing to change
1142
// but don't shrink either!
1143
} else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
1148
KnownZero = KnownZeroOut;
1149
KnownOne = KnownOneOut;
1152
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
1153
KnownOne, TLO, Depth+1))
1155
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
1156
KnownOne2, TLO, Depth+1))
1158
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1159
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
1161
// If the operands are constants, see if we can simplify them.
1162
if (TLO.ShrinkDemandedConstant(Op, NewMask))
1165
// Only known if known in both the LHS and RHS.
1166
KnownOne &= KnownOne2;
1167
KnownZero &= KnownZero2;
1169
case ISD::SELECT_CC:
1170
if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
1171
KnownOne, TLO, Depth+1))
1173
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
1174
KnownOne2, TLO, Depth+1))
1176
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1177
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
1179
// If the operands are constants, see if we can simplify them.
1180
if (TLO.ShrinkDemandedConstant(Op, NewMask))
1183
// Only known if known in both the LHS and RHS.
1184
KnownOne &= KnownOne2;
1185
KnownZero &= KnownZero2;
1188
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1189
unsigned ShAmt = SA->getZExtValue();
1190
SDValue InOp = Op.getOperand(0);
1192
// If the shift count is an invalid immediate, don't do anything.
1193
if (ShAmt >= BitWidth)
1196
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1197
// single shift. We can do this if the bottom bits (which are shifted
1198
// out) are never demanded.
1199
if (InOp.getOpcode() == ISD::SRL &&
1200
isa<ConstantSDNode>(InOp.getOperand(1))) {
1201
if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
1202
unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
1203
unsigned Opc = ISD::SHL;
1204
int Diff = ShAmt-C1;
1211
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
1212
EVT VT = Op.getValueType();
1213
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
1214
InOp.getOperand(0), NewSA));
1218
if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
1219
KnownZero, KnownOne, TLO, Depth+1))
1221
KnownZero <<= SA->getZExtValue();
1222
KnownOne <<= SA->getZExtValue();
1223
// low bits known zero.
1224
KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
1228
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1229
EVT VT = Op.getValueType();
1230
unsigned ShAmt = SA->getZExtValue();
1231
unsigned VTSize = VT.getSizeInBits();
1232
SDValue InOp = Op.getOperand(0);
1234
// If the shift count is an invalid immediate, don't do anything.
1235
if (ShAmt >= BitWidth)
1238
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1239
// single shift. We can do this if the top bits (which are shifted out)
1240
// are never demanded.
1241
if (InOp.getOpcode() == ISD::SHL &&
1242
isa<ConstantSDNode>(InOp.getOperand(1))) {
1243
if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
1244
unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
1245
unsigned Opc = ISD::SRL;
1246
int Diff = ShAmt-C1;
1253
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
1254
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
1255
InOp.getOperand(0), NewSA));
1259
// Compute the new bits that are at the top now.
1260
if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
1261
KnownZero, KnownOne, TLO, Depth+1))
1263
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1264
KnownZero = KnownZero.lshr(ShAmt);
1265
KnownOne = KnownOne.lshr(ShAmt);
1267
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
1268
KnownZero |= HighBits; // High bits known zero.
1272
// If this is an arithmetic shift right and only the low-bit is set, we can
1273
// always convert this into a logical shr, even if the shift amount is
1274
// variable. The low bit of the shift cannot be an input sign bit unless
1275
// the shift amount is >= the size of the datatype, which is undefined.
1276
if (DemandedMask == 1)
1277
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
1278
Op.getOperand(0), Op.getOperand(1)));
1280
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1281
EVT VT = Op.getValueType();
1282
unsigned ShAmt = SA->getZExtValue();
1284
// If the shift count is an invalid immediate, don't do anything.
1285
if (ShAmt >= BitWidth)
1288
APInt InDemandedMask = (NewMask << ShAmt);
1290
// If any of the demanded bits are produced by the sign extension, we also
1291
// demand the input sign bit.
1292
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
1293
if (HighBits.intersects(NewMask))
1294
InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
1296
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
1297
KnownZero, KnownOne, TLO, Depth+1))
1299
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1300
KnownZero = KnownZero.lshr(ShAmt);
1301
KnownOne = KnownOne.lshr(ShAmt);
1303
// Handle the sign bit, adjusted to where it is now in the mask.
1304
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
1306
// If the input sign bit is known to be zero, or if none of the top bits
1307
// are demanded, turn this into an unsigned shift right.
1308
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
1309
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
1312
} else if (KnownOne.intersects(SignBit)) { // New bits are known one.
1313
KnownOne |= HighBits;
1317
case ISD::SIGN_EXTEND_INREG: {
1318
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1320
// Sign extension. Compute the demanded bits in the result that are not
1321
// present in the input.
1323
APInt::getHighBitsSet(BitWidth,
1324
BitWidth - EVT.getScalarType().getSizeInBits()) &
1327
// If none of the extended bits are demanded, eliminate the sextinreg.
1329
return TLO.CombineTo(Op, Op.getOperand(0));
1331
APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
1332
InSignBit.zext(BitWidth);
1333
APInt InputDemandedBits =
1334
APInt::getLowBitsSet(BitWidth,
1335
EVT.getScalarType().getSizeInBits()) &
1338
// Since the sign extended bits are demanded, we know that the sign
1340
InputDemandedBits |= InSignBit;
1342
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
1343
KnownZero, KnownOne, TLO, Depth+1))
1345
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1347
// If the sign bit of the input is known set or clear, then we know the
1348
// top bits of the result.
1350
// If the input sign bit is known zero, convert this into a zero extension.
1351
if (KnownZero.intersects(InSignBit))
1352
return TLO.CombineTo(Op,
1353
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
1355
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
1356
KnownOne |= NewBits;
1357
KnownZero &= ~NewBits;
1358
} else { // Input sign bit unknown
1359
KnownZero &= ~NewBits;
1360
KnownOne &= ~NewBits;
1364
case ISD::ZERO_EXTEND: {
1365
unsigned OperandBitWidth =
1366
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
1367
APInt InMask = NewMask;
1368
InMask.trunc(OperandBitWidth);
1370
// If none of the top bits are demanded, convert this into an any_extend.
1372
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
1373
if (!NewBits.intersects(NewMask))
1374
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
1378
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
1379
KnownZero, KnownOne, TLO, Depth+1))
1381
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1382
KnownZero.zext(BitWidth);
1383
KnownOne.zext(BitWidth);
1384
KnownZero |= NewBits;
1387
case ISD::SIGN_EXTEND: {
1388
EVT InVT = Op.getOperand(0).getValueType();
1389
unsigned InBits = InVT.getScalarType().getSizeInBits();
1390
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
1391
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
1392
APInt NewBits = ~InMask & NewMask;
1394
// If none of the top bits are demanded, convert this into an any_extend.
1396
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
1400
// Since some of the sign extended bits are demanded, we know that the sign
1402
APInt InDemandedBits = InMask & NewMask;
1403
InDemandedBits |= InSignBit;
1404
InDemandedBits.trunc(InBits);
1406
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
1407
KnownOne, TLO, Depth+1))
1409
KnownZero.zext(BitWidth);
1410
KnownOne.zext(BitWidth);
1412
// If the sign bit is known zero, convert this to a zero extend.
1413
if (KnownZero.intersects(InSignBit))
1414
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
1418
// If the sign bit is known one, the top bits match.
1419
if (KnownOne.intersects(InSignBit)) {
1420
KnownOne |= NewBits;
1421
KnownZero &= ~NewBits;
1422
} else { // Otherwise, top bits aren't known.
1423
KnownOne &= ~NewBits;
1424
KnownZero &= ~NewBits;
1428
case ISD::ANY_EXTEND: {
1429
unsigned OperandBitWidth =
1430
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
1431
APInt InMask = NewMask;
1432
InMask.trunc(OperandBitWidth);
1433
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
1434
KnownZero, KnownOne, TLO, Depth+1))
1436
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1437
KnownZero.zext(BitWidth);
1438
KnownOne.zext(BitWidth);
1441
case ISD::TRUNCATE: {
1442
// Simplify the input, using demanded bit information, and compute the known
1443
// zero/one bits live out.
1444
unsigned OperandBitWidth =
1445
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
1446
APInt TruncMask = NewMask;
1447
TruncMask.zext(OperandBitWidth);
1448
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
1449
KnownZero, KnownOne, TLO, Depth+1))
1451
KnownZero.trunc(BitWidth);
1452
KnownOne.trunc(BitWidth);
1454
// If the input is only used by this truncate, see if we can shrink it based
1455
// on the known demanded bits.
1456
if (Op.getOperand(0).getNode()->hasOneUse()) {
1457
SDValue In = Op.getOperand(0);
1458
switch (In.getOpcode()) {
1461
// Shrink SRL by a constant if none of the high bits shifted in are
1463
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
1464
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
1465
OperandBitWidth - BitWidth);
1466
HighBits = HighBits.lshr(ShAmt->getZExtValue());
1467
HighBits.trunc(BitWidth);
1469
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
1470
// None of the shifted in bits are needed. Add a truncate of the
1471
// shift input, then shift it.
1472
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
1475
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
1485
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1488
case ISD::AssertZext: {
1489
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1490
APInt InMask = APInt::getLowBitsSet(BitWidth,
1491
VT.getSizeInBits());
1492
if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
1493
KnownZero, KnownOne, TLO, Depth+1))
1495
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
1496
KnownZero |= ~InMask & NewMask;
1499
case ISD::BIT_CONVERT:
1501
// If this is an FP->Int bitcast and if the sign bit is the only thing that
1502
// is demanded, turn this into a FGETSIGN.
1503
if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
1504
MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
1505
!MVT::isVector(Op.getOperand(0).getValueType())) {
1506
// Only do this xform if FGETSIGN is valid or if before legalize.
1507
if (!TLO.AfterLegalize ||
1508
isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
1509
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
1510
// place. We expect the SHL to be eliminated by other optimizations.
1511
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
1513
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
1514
SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
1515
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
1524
// Add, Sub, and Mul don't demand any bits in positions beyond that
1525
// of the highest bit demanded of them.
1526
APInt LoMask = APInt::getLowBitsSet(BitWidth,
1527
BitWidth - NewMask.countLeadingZeros());
1528
if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
1529
KnownOne2, TLO, Depth+1))
1531
if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
1532
KnownOne2, TLO, Depth+1))
1534
// See if the operation should be performed at a smaller bit width.
1535
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1540
// Just use ComputeMaskedBits to compute output bits.
1541
TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
1545
// If we know the value of all of the demanded bits, return this as a
1547
if ((NewMask & (KnownZero|KnownOne)) == NewMask)
1548
return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
1553
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
1554
/// in Mask are known to be either zero or one and return them in the
1555
/// KnownZero/KnownOne bitsets.
1556
void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
1560
const SelectionDAG &DAG,
1561
unsigned Depth) const {
1562
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1563
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1564
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1565
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1566
"Should use MaskedValueIsZero if you don't know whether Op"
1567
" is a target node!");
1568
KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
1571
/// ComputeNumSignBitsForTargetNode - This method can be implemented by
1572
/// targets that want to expose additional information about sign bits to the
1574
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
1575
unsigned Depth) const {
1576
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1577
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1578
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1579
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1580
"Should use ComputeNumSignBits if you don't know whether Op"
1581
" is a target node!");
1585
/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
1586
/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
1587
/// determine which bit is set.
1589
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
1590
// A left-shift of a constant one will have exactly one bit set, because
1591
// shifting the bit off the end is undefined.
1592
if (Val.getOpcode() == ISD::SHL)
1593
if (ConstantSDNode *C =
1594
dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
1595
if (C->getAPIntValue() == 1)
1598
// Similarly, a right-shift of a constant sign-bit will have exactly
1600
if (Val.getOpcode() == ISD::SRL)
1601
if (ConstantSDNode *C =
1602
dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
1603
if (C->getAPIntValue().isSignBit())
1606
// More could be done here, though the above checks are enough
1607
// to handle some common cases.
1609
// Fall back to ComputeMaskedBits to catch other known cases.
1610
EVT OpVT = Val.getValueType();
1611
unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
1612
APInt Mask = APInt::getAllOnesValue(BitWidth);
1613
APInt KnownZero, KnownOne;
1614
DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
1615
return (KnownZero.countPopulation() == BitWidth - 1) &&
1616
(KnownOne.countPopulation() == 1);
1619
/// SimplifySetCC - Try to simplify a setcc built with the specified operands
1620
/// and cc. If it is unable to simplify it, return a null SDValue.
1622
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
1623
ISD::CondCode Cond, bool foldBooleans,
1624
DAGCombinerInfo &DCI, DebugLoc dl) const {
1625
SelectionDAG &DAG = DCI.DAG;
1626
LLVMContext &Context = *DAG.getContext();
1628
// These setcc operations always fold.
1632
case ISD::SETFALSE2: return DAG.getConstant(0, VT);
1634
case ISD::SETTRUE2: return DAG.getConstant(1, VT);
1637
if (isa<ConstantSDNode>(N0.getNode())) {
1638
// Ensure that the constant occurs on the RHS, and fold constant
1640
return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
1643
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
1644
const APInt &C1 = N1C->getAPIntValue();
1646
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
1647
// equality comparison, then we're just comparing whether X itself is
1649
if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
1650
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
1651
N0.getOperand(1).getOpcode() == ISD::Constant) {
1653
= cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
1654
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
1655
ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
1656
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
1657
// (srl (ctlz x), 5) == 0 -> X != 0
1658
// (srl (ctlz x), 5) != 1 -> X != 0
1661
// (srl (ctlz x), 5) != 0 -> X == 0
1662
// (srl (ctlz x), 5) == 1 -> X == 0
1665
SDValue Zero = DAG.getConstant(0, N0.getValueType());
1666
return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
1671
// If the LHS is '(and load, const)', the RHS is 0,
1672
// the test is for equality or unsigned, and all 1 bits of the const are
1673
// in the same partial word, see if we can shorten the load.
1674
if (DCI.isBeforeLegalize() &&
1675
N0.getOpcode() == ISD::AND && C1 == 0 &&
1676
N0.getNode()->hasOneUse() &&
1677
isa<LoadSDNode>(N0.getOperand(0)) &&
1678
N0.getOperand(0).getNode()->hasOneUse() &&
1679
isa<ConstantSDNode>(N0.getOperand(1))) {
1680
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
1682
unsigned bestWidth = 0, bestOffset = 0;
1683
if (!Lod->isVolatile() && Lod->isUnindexed()) {
1684
unsigned origWidth = N0.getValueType().getSizeInBits();
1685
unsigned maskWidth = origWidth;
1686
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
1687
// 8 bits, but have to be careful...
1688
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
1689
origWidth = Lod->getMemoryVT().getSizeInBits();
1691
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
1692
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
1693
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
1694
for (unsigned offset=0; offset<origWidth/width; offset++) {
1695
if ((newMask & Mask) == Mask) {
1696
if (!TD->isLittleEndian())
1697
bestOffset = (origWidth/width - offset - 1) * (width/8);
1699
bestOffset = (uint64_t)offset * (width/8);
1700
bestMask = Mask.lshr(offset * (width/8) * 8);
1704
newMask = newMask << width;
1709
EVT newVT = EVT::getIntegerVT(Context, bestWidth);
1710
if (newVT.isRound()) {
1711
EVT PtrType = Lod->getOperand(1).getValueType();
1712
SDValue Ptr = Lod->getBasePtr();
1713
if (bestOffset != 0)
1714
Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
1715
DAG.getConstant(bestOffset, PtrType));
1716
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
1717
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
1719
Lod->getSrcValueOffset() + bestOffset,
1720
false, false, NewAlign);
1721
return DAG.getSetCC(dl, VT,
1722
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
1723
DAG.getConstant(bestMask.trunc(bestWidth),
1725
DAG.getConstant(0LL, newVT), Cond);
1730
// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
1731
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
1732
unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
1734
// If the comparison constant has bits in the upper part, the
1735
// zero-extended value could never match.
1736
if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
1737
C1.getBitWidth() - InSize))) {
1741
case ISD::SETEQ: return DAG.getConstant(0, VT);
1744
case ISD::SETNE: return DAG.getConstant(1, VT);
1747
// True if the sign bit of C1 is set.
1748
return DAG.getConstant(C1.isNegative(), VT);
1751
// True if the sign bit of C1 isn't set.
1752
return DAG.getConstant(C1.isNonNegative(), VT);
1758
// Otherwise, we can perform the comparison with the low bits.
1766
EVT newVT = N0.getOperand(0).getValueType();
1767
if (DCI.isBeforeLegalizeOps() ||
1768
(isOperationLegal(ISD::SETCC, newVT) &&
1769
getCondCodeAction(Cond, newVT)==Legal))
1770
return DAG.getSetCC(dl, VT, N0.getOperand(0),
1771
DAG.getConstant(APInt(C1).trunc(InSize), newVT),
1776
break; // todo, be more careful with signed comparisons
1778
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1779
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
1780
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
1781
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
1782
EVT ExtDstTy = N0.getValueType();
1783
unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
1785
// If the extended part has any inconsistent bits, it cannot ever
1786
// compare equal. In other words, they have to be all ones or all
1789
APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
1790
if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
1791
return DAG.getConstant(Cond == ISD::SETNE, VT);
1794
EVT Op0Ty = N0.getOperand(0).getValueType();
1795
if (Op0Ty == ExtSrcTy) {
1796
ZextOp = N0.getOperand(0);
1798
APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
1799
ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
1800
DAG.getConstant(Imm, Op0Ty));
1802
if (!DCI.isCalledByLegalizer())
1803
DCI.AddToWorklist(ZextOp.getNode());
1804
// Otherwise, make this a use of a zext.
1805
return DAG.getSetCC(dl, VT, ZextOp,
1806
DAG.getConstant(C1 & APInt::getLowBitsSet(
1811
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
1812
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
1813
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
1814
if (N0.getOpcode() == ISD::SETCC &&
1815
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
1816
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
1818
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
1819
// Invert the condition.
1820
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
1821
CC = ISD::getSetCCInverse(CC,
1822
N0.getOperand(0).getValueType().isInteger());
1823
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
1826
if ((N0.getOpcode() == ISD::XOR ||
1827
(N0.getOpcode() == ISD::AND &&
1828
N0.getOperand(0).getOpcode() == ISD::XOR &&
1829
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
1830
isa<ConstantSDNode>(N0.getOperand(1)) &&
1831
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
1832
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
1833
// can only do this if the top bits are known zero.
1834
unsigned BitWidth = N0.getValueSizeInBits();
1835
if (DAG.MaskedValueIsZero(N0,
1836
APInt::getHighBitsSet(BitWidth,
1838
// Okay, get the un-inverted input value.
1840
if (N0.getOpcode() == ISD::XOR)
1841
Val = N0.getOperand(0);
1843
assert(N0.getOpcode() == ISD::AND &&
1844
N0.getOperand(0).getOpcode() == ISD::XOR);
1845
// ((X^1)&1)^1 -> X & 1
1846
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
1847
N0.getOperand(0).getOperand(0),
1851
return DAG.getSetCC(dl, VT, Val, N1,
1852
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
1854
} else if (N1C->getAPIntValue() == 1 &&
1856
getBooleanContents() == ZeroOrOneBooleanContent)) {
1858
if (Op0.getOpcode() == ISD::TRUNCATE)
1859
Op0 = Op0.getOperand(0);
1861
if ((Op0.getOpcode() == ISD::XOR) &&
1862
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
1863
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
1864
// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
1865
Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
1866
return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
1868
} else if (Op0.getOpcode() == ISD::AND &&
1869
isa<ConstantSDNode>(Op0.getOperand(1)) &&
1870
cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
1871
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
1872
if (Op0.getValueType() != VT)
1873
Op0 = DAG.getNode(ISD::AND, dl, VT,
1874
DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
1875
DAG.getConstant(1, VT));
1876
return DAG.getSetCC(dl, VT, Op0,
1877
DAG.getConstant(0, Op0.getValueType()),
1878
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
1883
APInt MinVal, MaxVal;
1884
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
1885
if (ISD::isSignedIntSetCC(Cond)) {
1886
MinVal = APInt::getSignedMinValue(OperandBitSize);
1887
MaxVal = APInt::getSignedMaxValue(OperandBitSize);
1889
MinVal = APInt::getMinValue(OperandBitSize);
1890
MaxVal = APInt::getMaxValue(OperandBitSize);
1893
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
1894
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
1895
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
1896
// X >= C0 --> X > (C0-1)
1897
return DAG.getSetCC(dl, VT, N0,
1898
DAG.getConstant(C1-1, N1.getValueType()),
1899
(Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
1902
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
1903
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
1904
// X <= C0 --> X < (C0+1)
1905
return DAG.getSetCC(dl, VT, N0,
1906
DAG.getConstant(C1+1, N1.getValueType()),
1907
(Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
1910
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
1911
return DAG.getConstant(0, VT); // X < MIN --> false
1912
if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
1913
return DAG.getConstant(1, VT); // X >= MIN --> true
1914
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
1915
return DAG.getConstant(0, VT); // X > MAX --> false
1916
if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
1917
return DAG.getConstant(1, VT); // X <= MAX --> true
1919
// Canonicalize setgt X, Min --> setne X, Min
1920
if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
1921
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
1922
// Canonicalize setlt X, Max --> setne X, Max
1923
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
1924
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
1926
// If we have setult X, 1, turn it into seteq X, 0
1927
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
1928
return DAG.getSetCC(dl, VT, N0,
1929
DAG.getConstant(MinVal, N0.getValueType()),
1931
// If we have setugt X, Max-1, turn it into seteq X, Max
1932
else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
1933
return DAG.getSetCC(dl, VT, N0,
1934
DAG.getConstant(MaxVal, N0.getValueType()),
1937
// If we have "setcc X, C0", check to see if we can shrink the immediate
1940
// SETUGT X, SINTMAX -> SETLT X, 0
1941
if (Cond == ISD::SETUGT &&
1942
C1 == APInt::getSignedMaxValue(OperandBitSize))
1943
return DAG.getSetCC(dl, VT, N0,
1944
DAG.getConstant(0, N1.getValueType()),
1947
// SETULT X, SINTMIN -> SETGT X, -1
1948
if (Cond == ISD::SETULT &&
1949
C1 == APInt::getSignedMinValue(OperandBitSize)) {
1950
SDValue ConstMinusOne =
1951
DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
1953
return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
1956
// Fold bit comparisons when we can.
1957
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
1958
(VT == N0.getValueType() ||
1959
(isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
1960
N0.getOpcode() == ISD::AND)
1961
if (ConstantSDNode *AndRHS =
1962
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1963
EVT ShiftTy = DCI.isBeforeLegalize() ?
1964
getPointerTy() : getShiftAmountTy();
1965
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
1966
// Perform the xform if the AND RHS is a single bit.
1967
if (AndRHS->getAPIntValue().isPowerOf2()) {
1968
return DAG.getNode(ISD::TRUNCATE, dl, VT,
1969
DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
1970
DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
1972
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
1973
// (X & 8) == 8 --> (X & 8) >> 3
1974
// Perform the xform if C1 is a single bit.
1975
if (C1.isPowerOf2()) {
1976
return DAG.getNode(ISD::TRUNCATE, dl, VT,
1977
DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
1978
DAG.getConstant(C1.logBase2(), ShiftTy)));
1984
if (isa<ConstantFPSDNode>(N0.getNode())) {
1985
// Constant fold or commute setcc.
1986
SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
1987
if (O.getNode()) return O;
1988
} else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
1989
// If the RHS of an FP comparison is a constant, simplify it away in
1991
if (CFP->getValueAPF().isNaN()) {
1992
// If an operand is known to be a nan, we can fold it.
1993
switch (ISD::getUnorderedFlavor(Cond)) {
1994
default: llvm_unreachable("Unknown flavor!");
1995
case 0: // Known false.
1996
return DAG.getConstant(0, VT);
1997
case 1: // Known true.
1998
return DAG.getConstant(1, VT);
1999
case 2: // Undefined.
2000
return DAG.getUNDEF(VT);
2004
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
2005
// constant if knowing that the operand is non-nan is enough. We prefer to
2006
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2008
if (Cond == ISD::SETO || Cond == ISD::SETUO)
2009
return DAG.getSetCC(dl, VT, N0, N0, Cond);
2011
// If the condition is not legal, see if we can find an equivalent one
2013
if (!isCondCodeLegal(Cond, N0.getValueType())) {
2014
// If the comparison was an awkward floating-point == or != and one of
2015
// the comparison operands is infinity or negative infinity, convert the
2016
// condition to a less-awkward <= or >=.
2017
if (CFP->getValueAPF().isInfinity()) {
2018
if (CFP->getValueAPF().isNegative()) {
2019
if (Cond == ISD::SETOEQ &&
2020
isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
2021
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
2022
if (Cond == ISD::SETUEQ &&
2023
isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
2024
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
2025
if (Cond == ISD::SETUNE &&
2026
isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
2027
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
2028
if (Cond == ISD::SETONE &&
2029
isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
2030
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
2032
if (Cond == ISD::SETOEQ &&
2033
isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
2034
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
2035
if (Cond == ISD::SETUEQ &&
2036
isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
2037
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
2038
if (Cond == ISD::SETUNE &&
2039
isCondCodeLegal(ISD::SETULT, N0.getValueType()))
2040
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
2041
if (Cond == ISD::SETONE &&
2042
isCondCodeLegal(ISD::SETULT, N0.getValueType()))
2043
return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
2050
// We can always fold X == X for integer setcc's.
2051
if (N0.getValueType().isInteger())
2052
return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
2053
unsigned UOF = ISD::getUnorderedFlavor(Cond);
2054
if (UOF == 2) // FP operators that are undefined on NaNs.
2055
return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
2056
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
2057
return DAG.getConstant(UOF, VT);
2058
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
2059
// if it is not already.
2060
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
2061
if (NewCond != Cond)
2062
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
2065
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2066
N0.getValueType().isInteger()) {
2067
if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
2068
N0.getOpcode() == ISD::XOR) {
2069
// Simplify (X+Y) == (X+Z) --> Y == Z
2070
if (N0.getOpcode() == N1.getOpcode()) {
2071
if (N0.getOperand(0) == N1.getOperand(0))
2072
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
2073
if (N0.getOperand(1) == N1.getOperand(1))
2074
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
2075
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
2076
// If X op Y == Y op X, try other combinations.
2077
if (N0.getOperand(0) == N1.getOperand(1))
2078
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
2080
if (N0.getOperand(1) == N1.getOperand(0))
2081
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
2086
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
2087
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2088
// Turn (X+C1) == C2 --> X == C2-C1
2089
if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
2090
return DAG.getSetCC(dl, VT, N0.getOperand(0),
2091
DAG.getConstant(RHSC->getAPIntValue()-
2092
LHSR->getAPIntValue(),
2093
N0.getValueType()), Cond);
2096
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
2097
if (N0.getOpcode() == ISD::XOR)
2098
// If we know that all of the inverted bits are zero, don't bother
2099
// performing the inversion.
2100
if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
2102
DAG.getSetCC(dl, VT, N0.getOperand(0),
2103
DAG.getConstant(LHSR->getAPIntValue() ^
2104
RHSC->getAPIntValue(),
2109
// Turn (C1-X) == C2 --> X == C1-C2
2110
if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
2111
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
2113
DAG.getSetCC(dl, VT, N0.getOperand(1),
2114
DAG.getConstant(SUBC->getAPIntValue() -
2115
RHSC->getAPIntValue(),
2122
// Simplify (X+Z) == X --> Z == 0
2123
if (N0.getOperand(0) == N1)
2124
return DAG.getSetCC(dl, VT, N0.getOperand(1),
2125
DAG.getConstant(0, N0.getValueType()), Cond);
2126
if (N0.getOperand(1) == N1) {
2127
if (DAG.isCommutativeBinOp(N0.getOpcode()))
2128
return DAG.getSetCC(dl, VT, N0.getOperand(0),
2129
DAG.getConstant(0, N0.getValueType()), Cond);
2130
else if (N0.getNode()->hasOneUse()) {
2131
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
2132
// (Z-X) == X --> Z == X<<1
2133
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
2135
DAG.getConstant(1, getShiftAmountTy()));
2136
if (!DCI.isCalledByLegalizer())
2137
DCI.AddToWorklist(SH.getNode());
2138
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
2143
if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
2144
N1.getOpcode() == ISD::XOR) {
2145
// Simplify X == (X+Z) --> Z == 0
2146
if (N1.getOperand(0) == N0) {
2147
return DAG.getSetCC(dl, VT, N1.getOperand(1),
2148
DAG.getConstant(0, N1.getValueType()), Cond);
2149
} else if (N1.getOperand(1) == N0) {
2150
if (DAG.isCommutativeBinOp(N1.getOpcode())) {
2151
return DAG.getSetCC(dl, VT, N1.getOperand(0),
2152
DAG.getConstant(0, N1.getValueType()), Cond);
2153
} else if (N1.getNode()->hasOneUse()) {
2154
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
2155
// X == (Z-X) --> X<<1 == Z
2156
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
2157
DAG.getConstant(1, getShiftAmountTy()));
2158
if (!DCI.isCalledByLegalizer())
2159
DCI.AddToWorklist(SH.getNode());
2160
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
2165
// Simplify x&y == y to x&y != 0 if y has exactly one bit set.
2166
// Note that where y is variable and is known to have at most
2167
// one bit set (for example, if it is z&1) we cannot do this;
2168
// the expressions are not equivalent when y==0.
2169
if (N0.getOpcode() == ISD::AND)
2170
if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
2171
if (ValueHasExactlyOneBitSet(N1, DAG)) {
2172
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
2173
SDValue Zero = DAG.getConstant(0, N1.getValueType());
2174
return DAG.getSetCC(dl, VT, N0, Zero, Cond);
2177
if (N1.getOpcode() == ISD::AND)
2178
if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
2179
if (ValueHasExactlyOneBitSet(N0, DAG)) {
2180
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
2181
SDValue Zero = DAG.getConstant(0, N0.getValueType());
2182
return DAG.getSetCC(dl, VT, N1, Zero, Cond);
2187
// Fold away ALL boolean setcc's.
2189
if (N0.getValueType() == MVT::i1 && foldBooleans) {
2191
default: llvm_unreachable("Unknown integer setcc!");
2192
case ISD::SETEQ: // X == Y -> ~(X^Y)
2193
Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
2194
N0 = DAG.getNOT(dl, Temp, MVT::i1);
2195
if (!DCI.isCalledByLegalizer())
2196
DCI.AddToWorklist(Temp.getNode());
2198
case ISD::SETNE: // X != Y --> (X^Y)
2199
N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
2201
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2202
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2203
Temp = DAG.getNOT(dl, N0, MVT::i1);
2204
N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
2205
if (!DCI.isCalledByLegalizer())
2206
DCI.AddToWorklist(Temp.getNode());
2208
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2209
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2210
Temp = DAG.getNOT(dl, N1, MVT::i1);
2211
N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
2212
if (!DCI.isCalledByLegalizer())
2213
DCI.AddToWorklist(Temp.getNode());
2215
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2216
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2217
Temp = DAG.getNOT(dl, N0, MVT::i1);
2218
N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
2219
if (!DCI.isCalledByLegalizer())
2220
DCI.AddToWorklist(Temp.getNode());
2222
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2223
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2224
Temp = DAG.getNOT(dl, N1, MVT::i1);
2225
N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
2228
if (VT != MVT::i1) {
2229
if (!DCI.isCalledByLegalizer())
2230
DCI.AddToWorklist(N0.getNode());
2231
// FIXME: If running after legalize, we probably can't do this.
2232
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
2237
// Could not fold it.
2241
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
2242
/// node is a GlobalAddress + offset.
2243
bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
2244
int64_t &Offset) const {
2245
if (isa<GlobalAddressSDNode>(N)) {
2246
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
2247
GA = GASD->getGlobal();
2248
Offset += GASD->getOffset();
2252
if (N->getOpcode() == ISD::ADD) {
2253
SDValue N1 = N->getOperand(0);
2254
SDValue N2 = N->getOperand(1);
2255
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
2256
ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
2258
Offset += V->getSExtValue();
2261
} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
2262
ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
2264
Offset += V->getSExtValue();
2273
SDValue TargetLowering::
2274
PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
2275
// Default implementation: no optimization.
2279
//===----------------------------------------------------------------------===//
2280
// Inline Assembler Implementation Methods
2281
//===----------------------------------------------------------------------===//
2284
TargetLowering::ConstraintType
2285
TargetLowering::getConstraintType(const std::string &Constraint) const {
2286
// FIXME: lots more standard ones to handle.
2287
if (Constraint.size() == 1) {
2288
switch (Constraint[0]) {
2290
case 'r': return C_RegisterClass;
2292
case 'o': // offsetable
2293
case 'V': // not offsetable
2295
case 'i': // Simple Integer or Relocatable Constant
2296
case 'n': // Simple Integer
2297
case 's': // Relocatable Constant
2298
case 'X': // Allow ANY value.
2299
case 'I': // Target registers.
2311
if (Constraint.size() > 1 && Constraint[0] == '{' &&
2312
Constraint[Constraint.size()-1] == '}')
2317
/// LowerXConstraint - try to replace an X constraint, which matches anything,
2318
/// with another that has more specific requirements based on the type of the
2319
/// corresponding operand.
2320
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
2321
if (ConstraintVT.isInteger())
2323
if (ConstraintVT.isFloatingPoint())
2324
return "f"; // works for many targets
2328
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
2329
/// vector. If it is invalid, don't add anything to Ops.
2330
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2331
char ConstraintLetter,
2333
std::vector<SDValue> &Ops,
2334
SelectionDAG &DAG) const {
2335
switch (ConstraintLetter) {
2337
case 'X': // Allows any operand; labels (basic block) use this.
2338
if (Op.getOpcode() == ISD::BasicBlock) {
2343
case 'i': // Simple Integer or Relocatable Constant
2344
case 'n': // Simple Integer
2345
case 's': { // Relocatable Constant
2346
// These operands are interested in values of the form (GV+C), where C may
2347
// be folded in as an offset of GV, or it may be explicitly added. Also, it
2348
// is possible and fine if either GV or C are missing.
2349
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2350
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
2352
// If we have "(add GV, C)", pull out GV/C
2353
if (Op.getOpcode() == ISD::ADD) {
2354
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2355
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
2356
if (C == 0 || GA == 0) {
2357
C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
2358
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
2360
if (C == 0 || GA == 0)
2364
// If we find a valid operand, map to the TargetXXX version so that the
2365
// value itself doesn't get selected.
2366
if (GA) { // Either &GV or &GV+C
2367
if (ConstraintLetter != 'n') {
2368
int64_t Offs = GA->getOffset();
2369
if (C) Offs += C->getZExtValue();
2370
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
2371
Op.getValueType(), Offs));
2375
if (C) { // just C, no GV.
2376
// Simple constants are not allowed for 's'.
2377
if (ConstraintLetter != 's') {
2378
// gcc prints these as sign extended. Sign extend value to 64 bits
2379
// now; without this it would get ZExt'd later in
2380
// ScheduleDAGSDNodes::EmitNode, which is very generic.
2381
Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
2391
std::vector<unsigned> TargetLowering::
2392
getRegClassForInlineAsmConstraint(const std::string &Constraint,
2394
return std::vector<unsigned>();
2398
std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
2399
getRegForInlineAsmConstraint(const std::string &Constraint,
2401
if (Constraint[0] != '{')
2402
return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
2403
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
2405
// Remove the braces from around the name.
2406
StringRef RegName(Constraint.data()+1, Constraint.size()-2);
2408
// Figure out which register class contains this reg.
2409
const TargetRegisterInfo *RI = TM.getRegisterInfo();
2410
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
2411
E = RI->regclass_end(); RCI != E; ++RCI) {
2412
const TargetRegisterClass *RC = *RCI;
2414
// If none of the value types for this register class are valid, we
2415
// can't use it. For example, 64-bit reg classes on 32-bit targets.
2416
bool isLegal = false;
2417
for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
2419
if (isTypeLegal(*I)) {
2425
if (!isLegal) continue;
2427
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
2429
if (RegName.equals_lower(RI->getName(*I)))
2430
return std::make_pair(*I, RC);
2434
return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
2437
//===----------------------------------------------------------------------===//
2438
// Constraint Selection.
2440
/// isMatchingInputConstraint - Return true of this is an input operand that is
2441
/// a matching constraint like "4".
2442
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
2443
assert(!ConstraintCode.empty() && "No known constraint!");
2444
return isdigit(ConstraintCode[0]);
2447
/// getMatchedOperand - If this is an input matching constraint, this method
2448
/// returns the output operand it matches.
2449
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
2450
assert(!ConstraintCode.empty() && "No known constraint!");
2451
return atoi(ConstraintCode.c_str());
2455
/// getConstraintGenerality - Return an integer indicating how general CT
2457
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
2459
default: llvm_unreachable("Unknown constraint type!");
2460
case TargetLowering::C_Other:
2461
case TargetLowering::C_Unknown:
2463
case TargetLowering::C_Register:
2465
case TargetLowering::C_RegisterClass:
2467
case TargetLowering::C_Memory:
2472
/// ChooseConstraint - If there are multiple different constraints that we
2473
/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
2474
/// This is somewhat tricky: constraints fall into four classes:
2475
/// Other -> immediates and magic values
2476
/// Register -> one specific register
2477
/// RegisterClass -> a group of regs
2478
/// Memory -> memory
2479
/// Ideally, we would pick the most specific constraint possible: if we have
2480
/// something that fits into a register, we would pick it. The problem here
2481
/// is that if we have something that could either be in a register or in
2482
/// memory that use of the register could cause selection of *other*
2483
/// operands to fail: they might only succeed if we pick memory. Because of
2484
/// this the heuristic we use is:
2486
/// 1) If there is an 'other' constraint, and if the operand is valid for
2487
/// that constraint, use it. This makes us take advantage of 'i'
2488
/// constraints when available.
2489
/// 2) Otherwise, pick the most general constraint present. This prefers
2490
/// 'm' over 'r', for example.
2492
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
2493
bool hasMemory, const TargetLowering &TLI,
2494
SDValue Op, SelectionDAG *DAG) {
2495
assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
2496
unsigned BestIdx = 0;
2497
TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
2498
int BestGenerality = -1;
2500
// Loop over the options, keeping track of the most general one.
2501
for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
2502
TargetLowering::ConstraintType CType =
2503
TLI.getConstraintType(OpInfo.Codes[i]);
2505
// If this is an 'other' constraint, see if the operand is valid for it.
2506
// For example, on X86 we might have an 'rI' constraint. If the operand
2507
// is an integer in the range [0..31] we want to use I (saving a load
2508
// of a register), otherwise we must use 'r'.
2509
if (CType == TargetLowering::C_Other && Op.getNode()) {
2510
assert(OpInfo.Codes[i].size() == 1 &&
2511
"Unhandled multi-letter 'other' constraint");
2512
std::vector<SDValue> ResultOps;
2513
TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
2515
if (!ResultOps.empty()) {
2522
// This constraint letter is more general than the previous one, use it.
2523
int Generality = getConstraintGenerality(CType);
2524
if (Generality > BestGenerality) {
2527
BestGenerality = Generality;
2531
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
2532
OpInfo.ConstraintType = BestType;
2535
/// ComputeConstraintToUse - Determines the constraint code and constraint
2536
/// type to use for the specific AsmOperandInfo, setting
2537
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
2538
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
2541
SelectionDAG *DAG) const {
2542
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
2544
// Single-letter constraints ('r') are very common.
2545
if (OpInfo.Codes.size() == 1) {
2546
OpInfo.ConstraintCode = OpInfo.Codes[0];
2547
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
2549
ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
2552
// 'X' matches anything.
2553
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
2554
// Labels and constants are handled elsewhere ('X' is the only thing
2555
// that matches labels). For Functions, the type here is the type of
2556
// the result, which is not what we want to look at; leave them alone.
2557
Value *v = OpInfo.CallOperandVal;
2558
if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
2559
OpInfo.CallOperandVal = v;
2563
// Otherwise, try to resolve it to something we know about by looking at
2564
// the actual operand type.
2565
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
2566
OpInfo.ConstraintCode = Repl;
2567
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
2572
//===----------------------------------------------------------------------===//
2573
// Loop Strength Reduction hooks
2574
//===----------------------------------------------------------------------===//
2576
/// isLegalAddressingMode - Return true if the addressing mode represented
2577
/// by AM is legal for this target, for a load/store of the specified type.
2578
bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
2579
const Type *Ty) const {
2580
// The default implementation of this implements a conservative RISCy, r+r and
2583
// Allows a sign-extended 16-bit immediate field.
2584
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
2587
// No global is ever allowed as a base.
2591
// Only support r+r,
2593
case 0: // "r+i" or just "i", depending on HasBaseReg.
2596
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
2598
// Otherwise we have r+r or r+i.
2601
if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
2603
// Allow 2*r as r+r.
2610
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
2611
/// return a DAG expression to select that will generate the same value by
2612
/// multiplying by a magic number. See:
2613
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
2614
SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
2615
std::vector<SDNode*>* Created) const {
2616
EVT VT = N->getValueType(0);
2617
DebugLoc dl= N->getDebugLoc();
2619
// Check to see if we can do this.
2620
// FIXME: We should be more aggressive here.
2621
if (!isTypeLegal(VT))
2624
APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
2625
APInt::ms magics = d.magic();
2627
// Multiply the numerator (operand 0) by the magic value
2628
// FIXME: We should support doing a MUL in a wider type
2630
if (isOperationLegalOrCustom(ISD::MULHS, VT))
2631
Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
2632
DAG.getConstant(magics.m, VT));
2633
else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
2634
Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
2636
DAG.getConstant(magics.m, VT)).getNode(), 1);
2638
return SDValue(); // No mulhs or equvialent
2639
// If d > 0 and m < 0, add the numerator
2640
if (d.isStrictlyPositive() && magics.m.isNegative()) {
2641
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
2643
Created->push_back(Q.getNode());
2645
// If d < 0 and m > 0, subtract the numerator.
2646
if (d.isNegative() && magics.m.isStrictlyPositive()) {
2647
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
2649
Created->push_back(Q.getNode());
2651
// Shift right algebraic if shift value is nonzero
2653
Q = DAG.getNode(ISD::SRA, dl, VT, Q,
2654
DAG.getConstant(magics.s, getShiftAmountTy()));
2656
Created->push_back(Q.getNode());
2658
// Extract the sign bit and add it to the quotient
2660
DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
2661
getShiftAmountTy()));
2663
Created->push_back(T.getNode());
2664
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
2667
/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
2668
/// return a DAG expression to select that will generate the same value by
2669
/// multiplying by a magic number. See:
2670
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
2671
SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
2672
std::vector<SDNode*>* Created) const {
2673
EVT VT = N->getValueType(0);
2674
DebugLoc dl = N->getDebugLoc();
2676
// Check to see if we can do this.
2677
// FIXME: We should be more aggressive here.
2678
if (!isTypeLegal(VT))
2681
// FIXME: We should use a narrower constant when the upper
2682
// bits are known to be zero.
2683
ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
2684
APInt::mu magics = N1C->getAPIntValue().magicu();
2686
// Multiply the numerator (operand 0) by the magic value
2687
// FIXME: We should support doing a MUL in a wider type
2689
if (isOperationLegalOrCustom(ISD::MULHU, VT))
2690
Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
2691
DAG.getConstant(magics.m, VT));
2692
else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
2693
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
2695
DAG.getConstant(magics.m, VT)).getNode(), 1);
2697
return SDValue(); // No mulhu or equvialent
2699
Created->push_back(Q.getNode());
2701
if (magics.a == 0) {
2702
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
2703
"We shouldn't generate an undefined shift!");
2704
return DAG.getNode(ISD::SRL, dl, VT, Q,
2705
DAG.getConstant(magics.s, getShiftAmountTy()));
2707
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
2709
Created->push_back(NPQ.getNode());
2710
NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
2711
DAG.getConstant(1, getShiftAmountTy()));
2713
Created->push_back(NPQ.getNode());
2714
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
2716
Created->push_back(NPQ.getNode());
2717
return DAG.getNode(ISD::SRL, dl, VT, NPQ,
2718
DAG.getConstant(magics.s-1, getShiftAmountTy()));