2
* Copyright 2011 Christoph Bumiller
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
#include "nv50_ir_target.h"
25
#include "nv50_ir_build_util.h"
28
#include "util/u_math.h"
34
Instruction::isNop() const
36
if (op == OP_CONSTRAINT || op == OP_PHI)
38
if (terminator || join) // XXX: should terminator imply flow ?
40
if (!fixed && op == OP_NOP)
43
if (def[0].exists() && def[0].rep()->reg.data.id < 0) {
44
for (int d = 1; defExists(d); ++d)
45
if (def[d].rep()->reg.data.id >= 0)
46
WARN("part of vector result is unused !\n");
50
if (op == OP_MOV || op == OP_UNION) {
51
if (!def[0].rep()->equals(getSrc(0)))
54
if (!def[0].rep()->equals(getSrc(1)))
62
bool Instruction::isDead() const
68
for (int d = 0; defExists(d); ++d)
69
if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0)
72
if (terminator || asFlow())
80
// =============================================================================
82
class CopyPropagation : public Pass
85
virtual bool visit(BasicBlock *);
88
// Propagate all MOVs forward to make subsequent optimization easier, except if
89
// the sources stem from a phi, in which case we don't want to mess up potential
90
// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
92
CopyPropagation::visit(BasicBlock *bb)
94
Instruction *mov, *si, *next;
96
for (mov = bb->getEntry(); mov; mov = next) {
98
if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue())
100
si = mov->getSrc(0)->getInsn();
101
if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) {
103
mov->def[0].replace(mov->getSrc(0), false);
104
delete_Instruction(prog, mov);
110
// =============================================================================
112
class LoadPropagation : public Pass
115
virtual bool visit(BasicBlock *);
117
void checkSwapSrc01(Instruction *);
119
bool isCSpaceLoad(Instruction *);
120
bool isImmd32Load(Instruction *);
124
LoadPropagation::isCSpaceLoad(Instruction *ld)
126
return ld && ld->op == OP_LOAD && ld->src[0].getFile() == FILE_MEMORY_CONST;
130
LoadPropagation::isImmd32Load(Instruction *ld)
132
if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
134
return ld->src[0].getFile() == FILE_IMMEDIATE;
138
LoadPropagation::checkSwapSrc01(Instruction *insn)
140
if (!prog->getTarget()->getOpInfo(insn).commutative)
141
if (insn->op != OP_SET && insn->op != OP_SLCT)
143
if (insn->src[1].getFile() != FILE_GPR)
146
Instruction *i0 = insn->getSrc(0)->getInsn();
147
Instruction *i1 = insn->getSrc(1)->getInsn();
149
if (isCSpaceLoad(i0)) {
150
if (!isCSpaceLoad(i1))
151
insn->swapSources(0, 1);
155
if (isImmd32Load(i0)) {
156
if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
157
insn->swapSources(0, 1);
164
if (insn->op == OP_SET)
165
insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
167
if (insn->op == OP_SLCT)
168
insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond);
172
LoadPropagation::visit(BasicBlock *bb)
174
const Target *targ = prog->getTarget();
177
for (Instruction *i = bb->getEntry(); i; i = next) {
183
for (int s = 0; i->srcExists(s); ++s) {
184
Instruction *ld = i->getSrc(s)->getInsn();
186
if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV))
188
if (!targ->insnCanLoad(i, s, ld))
192
i->setSrc(s, ld->getSrc(0));
193
if (ld->src[0].isIndirect(0))
194
i->setIndirect(s, 0, ld->getIndirect(0, 0));
196
if (ld->getDef(0)->refCount() == 0)
197
delete_Instruction(prog, ld);
203
// =============================================================================
205
// Evaluate constant expressions.
206
class ConstantFolding : public Pass
209
bool foldAll(Program *);
212
virtual bool visit(BasicBlock *);
214
void expr(Instruction *, ImmediateValue *, ImmediateValue *);
215
void opnd(Instruction *, ImmediateValue *, int s);
217
void unary(Instruction *, const ImmediateValue&);
219
// TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
220
CmpInstruction *findOriginForTestWithZero(Value *);
222
unsigned int foldCount;
227
// TODO: remember generated immediates and only revisit these
229
ConstantFolding::foldAll(Program *prog)
231
unsigned int iterCount = 0;
236
} while (foldCount && ++iterCount < 2);
241
ConstantFolding::visit(BasicBlock *bb)
243
Instruction *i, *next;
245
for (i = bb->getEntry(); i; i = next) {
247
if (i->op == OP_MOV) // continue early, MOV appears frequently
250
ImmediateValue *src0 = i->src[0].getImmediate();
251
ImmediateValue *src1 = i->src[1].getImmediate();
266
ConstantFolding::findOriginForTestWithZero(Value *value)
270
Instruction *insn = value->getInsn();
272
while (insn && insn->op != OP_SET) {
273
Instruction *next = NULL;
278
next = insn->getSrc(0)->getInsn();
279
if (insn->sType != next->dType)
283
next = insn->getSrc(0)->getInsn();
290
return insn ? insn->asCmp() : NULL;
294
Modifier::applyTo(ImmediateValue& imm) const
296
switch (imm.reg.type) {
298
if (bits & NV50_IR_MOD_ABS)
299
imm.reg.data.f32 = fabsf(imm.reg.data.f32);
300
if (bits & NV50_IR_MOD_NEG)
301
imm.reg.data.f32 = -imm.reg.data.f32;
302
if (bits & NV50_IR_MOD_SAT) {
303
if (imm.reg.data.f32 < 0.0f)
304
imm.reg.data.f32 = 0.0f;
306
if (imm.reg.data.f32 > 1.0f)
307
imm.reg.data.f32 = 1.0f;
309
assert(!(bits & NV50_IR_MOD_NOT));
312
case TYPE_S8: // NOTE: will be extended
315
case TYPE_U8: // NOTE: treated as signed
318
if (bits & NV50_IR_MOD_ABS)
319
imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ?
320
imm.reg.data.s32 : -imm.reg.data.s32;
321
if (bits & NV50_IR_MOD_NEG)
322
imm.reg.data.s32 = -imm.reg.data.s32;
323
if (bits & NV50_IR_MOD_NOT)
324
imm.reg.data.s32 = ~imm.reg.data.s32;
328
if (bits & NV50_IR_MOD_ABS)
329
imm.reg.data.f64 = fabs(imm.reg.data.f64);
330
if (bits & NV50_IR_MOD_NEG)
331
imm.reg.data.f64 = -imm.reg.data.f64;
332
if (bits & NV50_IR_MOD_SAT) {
333
if (imm.reg.data.f64 < 0.0)
334
imm.reg.data.f64 = 0.0;
336
if (imm.reg.data.f64 > 1.0)
337
imm.reg.data.f64 = 1.0;
339
assert(!(bits & NV50_IR_MOD_NOT));
343
assert(!"invalid/unhandled type");
344
imm.reg.data.u64 = 0;
350
Modifier::getOp() const
353
case NV50_IR_MOD_ABS: return OP_ABS;
354
case NV50_IR_MOD_NEG: return OP_NEG;
355
case NV50_IR_MOD_SAT: return OP_SAT;
356
case NV50_IR_MOD_NOT: return OP_NOT;
365
ConstantFolding::expr(Instruction *i,
366
ImmediateValue *src0, ImmediateValue *src1)
368
ImmediateValue imm0(src0, i->sType);
369
ImmediateValue imm1(src1, i->sType);
371
struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
373
i->src[0].mod.applyTo(imm0);
374
i->src[1].mod.applyTo(imm1);
380
if (i->dnz && i->dType == TYPE_F32) {
381
if (!isfinite(a->data.f32))
383
if (!isfinite(b->data.f32))
387
case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
388
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
390
case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
396
if (b->data.u32 == 0)
399
case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break;
400
case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break;
401
case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break;
402
case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break;
409
case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break;
410
case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break;
412
case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
419
case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;
420
case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break;
427
case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break;
428
case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break;
429
case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break;
430
case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break;
437
case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break;
438
case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break;
439
case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break;
440
case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break;
446
res.data.u64 = a->data.u64 & b->data.u64;
449
res.data.u64 = a->data.u64 | b->data.u64;
452
res.data.u64 = a->data.u64 ^ b->data.u64;
455
res.data.u32 = a->data.u32 << b->data.u32;
459
case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break;
460
case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break;
466
if (a->data.u32 != b->data.u32)
468
res.data.u32 = a->data.u32;
475
i->src[0].mod = Modifier(0);
476
i->src[1].mod = Modifier(0);
478
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
481
i->getSrc(0)->reg.data = res.data;
483
if (i->op == OP_MAD || i->op == OP_FMA) {
486
i->setSrc(1, i->getSrc(0));
487
i->setSrc(0, i->getSrc(2));
490
i->src[1].mod = i->src[2].mod;
492
src0 = i->src[0].getImmediate();
494
expr(i, src0, i->getSrc(1)->asImm());
501
ConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
505
if (i->dType != TYPE_F32)
508
case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
509
case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
510
case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
511
case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
512
case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
513
case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break;
514
case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break;
515
case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break;
516
case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break;
519
// these should be handled in subsequent OP_SIN/COS/EX2
520
res.data.f32 = imm.reg.data.f32;
526
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32));
527
i->src[0].mod = Modifier(0);
531
ConstantFolding::opnd(Instruction *i, ImmediateValue *src, int s)
534
const operation op = i->op;
536
ImmediateValue imm(src, i->sType);
538
i->src[s].mod.applyTo(imm);
542
if (i->dType == TYPE_F32 && i->getSrc(t)->refCount() == 1) {
543
Instruction *si = i->getSrc(t)->getUniqueInsn();
545
if (si && si->op == OP_MUL) {
546
float f = imm.reg.data.f32;
548
if (si->src[1].getImmediate()) {
549
f *= si->src[1].getImmediate()->reg.data.f32;
550
si->setSrc(1, new_ImmediateValue(prog, f));
551
i->def[0].replace(i->getSrc(t), false);
555
if (f == 0.125f) fac = -3;
557
if (f == 0.250f) fac = -2;
559
if (f == 0.500f) fac = -1;
561
if (f == 2.000f) fac = +1;
563
if (f == 4.000f) fac = +2;
565
if (f == 8.000f) fac = +3;
569
// FIXME: allowed & modifier
570
si->postFactor = fac;
571
i->def[0].replace(i->getSrc(t), false);
577
if (imm.isInteger(0)) {
579
i->setSrc(0, i->getSrc(s));
582
if (imm.isInteger(1) || imm.isInteger(-1)) {
583
if (imm.isNegative())
584
i->src[t].mod = i->src[t].mod ^ Modifier(NV50_IR_MOD_NEG);
585
i->op = i->src[t].mod.getOp();
587
i->setSrc(0, i->getSrc(1));
588
i->src[0].mod = i->src[1].mod;
595
if (imm.isInteger(2) || imm.isInteger(-2)) {
596
if (imm.isNegative())
597
i->src[t].mod = i->src[t].mod ^ Modifier(NV50_IR_MOD_NEG);
599
i->setSrc(s, i->getSrc(t));
600
i->src[s].mod = i->src[t].mod;
602
if (!isFloatType(i->sType) && !imm.isNegative() && imm.isPow2()) {
605
i->setSrc(1, new_ImmediateValue(prog, imm.reg.data.u32));
609
if (imm.isInteger(0)) {
611
i->setSrc(0, i->getSrc(1));
612
i->src[0].mod = i->src[1].mod;
615
i->op = i->src[0].mod.getOp();
617
i->src[0].mod = Modifier(0);
622
if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
624
bld.setPosition(i, false);
625
if (imm.reg.data.u32 == 0) {
628
if (imm.reg.data.u32 == 1) {
632
if (i->dType == TYPE_U32 && imm.isPow2()) {
634
i->setSrc(1, bld.mkImm(util_logbase2(imm.reg.data.u32)));
636
if (i->dType == TYPE_U32) {
639
const uint32_t d = imm.reg.data.u32;
642
uint32_t l = util_logbase2(d);
643
if (((uint32_t)1 << l) < d)
645
m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1;
651
mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0),
652
bld.loadImm(NULL, m));
653
mul->subOp = NV50_IR_SUBOP_MUL_HIGH;
654
bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA);
657
bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r));
660
tB = s ? bld.getSSA() : i->getDef(0);
661
bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
663
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
665
delete_Instruction(prog, i);
667
if (imm.reg.data.s32 == -1) {
673
const int32_t d = imm.reg.data.s32;
675
int32_t l = util_logbase2(static_cast<unsigned>(abs(d)));
676
if ((1 << l) < abs(d))
680
m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32);
684
bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m),
685
i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH;
687
bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1));
691
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0));
692
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
693
bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
695
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
697
delete_Instruction(prog, i);
702
if (i->sType == TYPE_U32 && imm.isPow2()) {
703
bld.setPosition(i, false);
705
i->setSrc(1, bld.loadImm(NULL, imm.reg.data.u32 - 1));
709
case OP_SET: // TODO: SET_AND,OR,XOR
711
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
713
if (i->src[t].mod != Modifier(0))
715
if (imm.reg.data.u32 != 0 || !si || si->op != OP_SET)
718
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
720
ccZ = reverseCondCode(ccZ);
722
case CC_LT: cc = CC_FL; break;
723
case CC_GE: cc = CC_TR; break;
724
case CC_EQ: cc = inverseCondCode(cc); break;
725
case CC_LE: cc = inverseCondCode(cc); break;
731
i->asCmp()->setCond = cc;
732
i->setSrc(0, si->src[0]);
733
i->setSrc(1, si->src[1]);
734
i->sType = si->sType;
740
if (s != 1 || i->src[0].mod != Modifier(0))
742
// try to concatenate shifts
743
Instruction *si = i->getSrc(0)->getInsn();
745
si->op != OP_SHL || si->src[1].mod != Modifier(0))
747
ImmediateValue *siImm = si->src[1].getImmediate();
749
bld.setPosition(i, false);
750
i->setSrc(0, si->getSrc(0));
751
i->setSrc(1, bld.loadImm(NULL,
752
imm.reg.data.u32 + siImm->reg.data.u32));
777
// =============================================================================
779
// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
780
class ModifierFolding : public Pass
783
virtual bool visit(BasicBlock *);
787
ModifierFolding::visit(BasicBlock *bb)
789
const Target *target = prog->getTarget();
791
Instruction *i, *next, *mi;
794
for (i = bb->getEntry(); i; i = next) {
797
if (0 && i->op == OP_SUB) {
798
// turn "sub" into "add neg" (do we really want this ?)
800
i->src[0].mod = i->src[0].mod ^ Modifier(NV50_IR_MOD_NEG);
803
for (int s = 0; s < 3 && i->srcExists(s); ++s) {
804
mi = i->getSrc(s)->getInsn();
806
mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8)
808
if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) {
809
if ((i->op != OP_ADD &&
815
if (i->sType != mi->dType) {
818
if ((mod = Modifier(mi->op)) == Modifier(0))
820
mod = mod * mi->src[0].mod;
822
if ((i->op == OP_ABS) || i->src[s].mod.abs()) {
823
// abs neg [abs] = abs
824
mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS));
826
if ((i->op == OP_NEG) && mod.neg()) {
828
// neg as both opcode and modifier on same insn is prohibited
829
// neg neg abs = abs, neg neg = identity
830
mod = mod & Modifier(~NV50_IR_MOD_NEG);
832
mod = mod & Modifier(~NV50_IR_MOD_ABS);
833
if (mod == Modifier(0))
837
if (target->isModSupported(i, s, mod)) {
838
i->setSrc(s, mi->getSrc(0));
839
i->src[s].mod = i->src[s].mod * mod;
843
if (i->op == OP_SAT) {
844
mi = i->getSrc(0)->getInsn();
846
mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) {
848
mi->setDef(0, i->getDef(0));
849
delete_Instruction(prog, i);
857
// =============================================================================
859
// MUL + ADD -> MAD/FMA
860
// MIN/MAX(a, a) -> a, etc.
861
// SLCT(a, b, const) -> cc(const) ? a : b
863
// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
864
class AlgebraicOpt : public Pass
867
virtual bool visit(BasicBlock *);
869
void handleADD(Instruction *);
870
void handleMINMAX(Instruction *);
871
void handleRCP(Instruction *);
872
void handleSLCT(Instruction *);
873
void handleLOGOP(Instruction *);
874
void handleCVT(Instruction *);
878
AlgebraicOpt::handleADD(Instruction *add)
880
Value *src0 = add->getSrc(0);
881
Value *src1 = add->getSrc(1);
886
if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType))
889
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
892
if (src0->refCount() == 1 &&
893
src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL)
896
if (src1->refCount() == 1 &&
897
src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL)
902
if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
903
(src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
906
src = add->getSrc(s);
908
mod[0] = add->src[0].mod;
909
mod[1] = add->src[1].mod;
910
mod[2] = src->getUniqueInsn()->src[0].mod;
911
mod[3] = src->getUniqueInsn()->src[1].mod;
913
if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG))
917
add->subOp = src->getInsn()->subOp; // potentially mul-high
919
add->setSrc(2, add->src[s ? 0 : 1]);
921
add->setSrc(0, src->getInsn()->getSrc(0));
922
add->src[0].mod = mod[2] ^ mod[s];
923
add->setSrc(1, src->getInsn()->getSrc(1));
924
add->src[1].mod = mod[3];
928
AlgebraicOpt::handleMINMAX(Instruction *minmax)
930
Value *src0 = minmax->getSrc(0);
931
Value *src1 = minmax->getSrc(1);
933
if (src0 != src1 || src0->reg.file != FILE_GPR)
935
if (minmax->src[0].mod == minmax->src[1].mod) {
936
if (minmax->src[0].mod) {
938
minmax->setSrc(1, NULL);
940
minmax->def[0].replace(minmax->getSrc(0), false);
941
minmax->bb->remove(minmax);
945
// min(x, -x) = -abs(x)
946
// min(x, -abs(x)) = -abs(x)
947
// min(x, abs(x)) = x
948
// max(x, -abs(x)) = x
949
// max(x, abs(x)) = abs(x)
950
// max(x, -x) = abs(x)
955
AlgebraicOpt::handleRCP(Instruction *rcp)
957
Instruction *si = rcp->getSrc(0)->getUniqueInsn();
959
if (si && si->op == OP_RCP) {
960
Modifier mod = rcp->src[0].mod * si->src[0].mod;
961
rcp->op = mod.getOp();
962
rcp->setSrc(0, si->getSrc(0));
967
AlgebraicOpt::handleSLCT(Instruction *slct)
969
if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
970
if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
971
slct->setSrc(0, slct->getSrc(1));
973
if (slct->getSrc(0) != slct->getSrc(1)) {
977
slct->setSrc(1, NULL);
978
slct->setSrc(2, NULL);
982
AlgebraicOpt::handleLOGOP(Instruction *logop)
984
Value *src0 = logop->getSrc(0);
985
Value *src1 = logop->getSrc(1);
987
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
991
if (logop->src[0].mod != Modifier(0) ||
992
logop->src[1].mod != Modifier(0))
994
if (logop->op == OP_AND || logop->op == OP_OR) {
995
logop->def[0].replace(logop->getSrc(0), false);
996
delete_Instruction(prog, logop);
999
// try AND(SET, SET) -> SET_AND(SET)
1000
Instruction *set0 = src0->getInsn();
1001
Instruction *set1 = src1->getInsn();
1003
if (!set0 || set0->fixed || !set1 || set1->fixed)
1005
if (set1->op != OP_SET) {
1006
Instruction *xchg = set0;
1009
if (set1->op != OP_SET)
1012
if (set0->op != OP_SET &&
1013
set0->op != OP_SET_AND &&
1014
set0->op != OP_SET_OR &&
1015
set0->op != OP_SET_XOR)
1017
if (set0->getDef(0)->refCount() > 1 &&
1018
set1->getDef(0)->refCount() > 1)
1020
if (set0->getPredicate() || set1->getPredicate())
1022
// check that they don't source each other
1023
for (int s = 0; s < 2; ++s)
1024
if (set0->getSrc(s) == set1->getDef(0) ||
1025
set1->getSrc(s) == set0->getDef(0))
1028
set0 = set0->clone(true);
1029
set1 = set1->clone(false);
1030
logop->bb->insertAfter(logop, set1);
1031
logop->bb->insertAfter(logop, set0);
1033
set0->dType = TYPE_U8;
1034
set0->getDef(0)->reg.file = FILE_PREDICATE;
1035
set0->getDef(0)->reg.size = 1;
1036
set1->setSrc(2, set0->getDef(0));
1037
switch (logop->op) {
1038
case OP_AND: set1->op = OP_SET_AND; break;
1039
case OP_OR: set1->op = OP_SET_OR; break;
1040
case OP_XOR: set1->op = OP_SET_XOR; break;
1045
set1->setDef(0, logop->getDef(0));
1046
delete_Instruction(prog, logop);
1050
// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
1052
AlgebraicOpt::handleCVT(Instruction *cvt)
1054
if (cvt->sType != TYPE_F32 ||
1055
cvt->dType != TYPE_S32 || cvt->src[0].mod != Modifier(0))
1057
Instruction *insn = cvt->getSrc(0)->getInsn();
1058
if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
1060
if (insn->src[0].mod != Modifier(0))
1062
insn = insn->getSrc(0)->getInsn();
1063
if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32)
1066
Instruction *bset = insn->clone(false);
1067
bset->dType = TYPE_U32;
1068
bset->setDef(0, cvt->getDef(0));
1069
cvt->bb->insertAfter(cvt, bset);
1070
delete_Instruction(prog, cvt);
1074
AlgebraicOpt::visit(BasicBlock *bb)
1077
for (Instruction *i = bb->getEntry(); i; i = next) {
1109
// =============================================================================
1112
updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
1114
if (offset != ldst->getSrc(0)->reg.data.offset) {
1115
if (ldst->getSrc(0)->refCount() > 1)
1116
ldst->setSrc(0, ldst->getSrc(0)->clone(fn));
1117
ldst->getSrc(0)->reg.data.offset = offset;
1121
// Combine loads and stores, forward stores to loads where possible.
1122
class MemoryOpt : public Pass
1130
const Value *rel[2];
1138
bool overlaps(const Instruction *ldst) const;
1140
inline void link(Record **);
1141
inline void unlink(Record **);
1142
inline void set(const Instruction *ldst);
1148
Record *loads[DATA_FILE_COUNT];
1149
Record *stores[DATA_FILE_COUNT];
1151
MemoryPool recordPool;
1154
virtual bool visit(BasicBlock *);
1155
bool runOpt(BasicBlock *);
1157
Record **getList(const Instruction *);
1159
Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const;
1161
// merge @insn into load/store instruction from @rec
1162
bool combineLd(Record *rec, Instruction *ld);
1163
bool combineSt(Record *rec, Instruction *st);
1165
bool replaceLdFromLd(Instruction *ld, Record *ldRec);
1166
bool replaceLdFromSt(Instruction *ld, Record *stRec);
1167
bool replaceStFromSt(Instruction *restrict st, Record *stRec);
1169
void addRecord(Instruction *ldst);
1170
void purgeRecords(Instruction *const st, DataFile);
1171
void lockStores(Instruction *const ld);
1178
MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6)
1180
for (int i = 0; i < DATA_FILE_COUNT; ++i) {
1190
for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) {
1192
for (it = loads[i]; it; it = next) {
1194
recordPool.release(it);
1197
for (it = stores[i]; it; it = next) {
1199
recordPool.release(it);
1206
MemoryOpt::combineLd(Record *rec, Instruction *ld)
1208
int32_t offRc = rec->offset;
1209
int32_t offLd = ld->getSrc(0)->reg.data.offset;
1210
int sizeRc = rec->size;
1211
int sizeLd = typeSizeof(ld->dType);
1212
int size = sizeRc + sizeLd;
1215
// only VFETCH can do a 96 byte load
1216
if (ld->op != OP_VFETCH && size == 12)
1218
// no unaligned loads
1219
if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
1220
((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
1223
assert(sizeRc + sizeLd <= 16 && offRc != offLd);
1225
for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
1227
if (offLd < offRc) {
1229
for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d);
1230
// d: nr of definitions in ld
1231
// j: nr of definitions in rec->insn, move:
1232
for (d = d + j - 1; j > 0; --j, --d)
1233
rec->insn->setDef(d, rec->insn->getDef(j - 1));
1235
if (rec->insn->getSrc(0)->refCount() > 1)
1236
rec->insn->setSrc(0, rec->insn->getSrc(0)->clone(func));
1237
rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd;
1243
// move definitions of @ld to @rec->insn
1244
for (j = 0; sizeLd; ++j, ++d) {
1245
sizeLd -= ld->getDef(j)->reg.size;
1246
rec->insn->setDef(d, ld->getDef(j));
1250
rec->insn->setType(typeOfSize(size));
1252
delete_Instruction(prog, ld);
1258
MemoryOpt::combineSt(Record *rec, Instruction *st)
1260
int32_t offRc = rec->offset;
1261
int32_t offSt = st->getSrc(0)->reg.data.offset;
1262
int sizeRc = rec->size;
1263
int sizeSt = typeSizeof(st->dType);
1265
int size = sizeRc + sizeSt;
1267
Value *src[4]; // no modifiers in ValueRef allowed for st
1270
if (size == 12) // XXX: check if EXPORT a[] can do this after all
1272
if (size == 8 && MIN2(offRc, offSt) & 0x7)
1275
st->takeExtraSources(0, extra); // save predicate and indirect address
1277
if (offRc < offSt) {
1278
// save values from @st
1279
for (s = 0; sizeSt; ++s) {
1280
sizeSt -= st->getSrc(s + 1)->reg.size;
1281
src[s] = st->getSrc(s + 1);
1283
// set record's values as low sources of @st
1284
for (j = 1; sizeRc; ++j) {
1285
sizeRc -= st->getSrc(j)->reg.size;
1286
st->setSrc(j, rec->insn->getSrc(j));
1288
// set saved values as high sources of @st
1289
for (k = j, j = 0; j < s; ++j)
1290
st->setSrc(k++, src[j]);
1292
updateLdStOffset(st, offRc, func);
1294
for (j = 1; sizeSt; ++j)
1295
sizeSt -= st->getSrc(j)->reg.size;
1296
for (s = 1; sizeRc; ++j, ++s) {
1297
sizeRc -= rec->insn->getSrc(s)->reg.size;
1298
st->setSrc(j, rec->insn->getSrc(s));
1300
rec->offset = offSt;
1302
st->putExtraSources(0, extra); // restore pointer and predicate
1304
delete_Instruction(prog, rec->insn);
1307
rec->insn->setType(typeOfSize(size));
1312
MemoryOpt::Record::set(const Instruction *ldst)
1314
const Symbol *mem = ldst->getSrc(0)->asSym();
1315
fileIndex = mem->reg.fileIndex;
1316
rel[0] = ldst->getIndirect(0, 0);
1317
rel[1] = ldst->getIndirect(0, 1);
1318
offset = mem->reg.data.offset;
1319
base = mem->getBase();
1320
size = typeSizeof(ldst->sType);
1324
MemoryOpt::Record::link(Record **list)
1334
MemoryOpt::Record::unlink(Record **list)
1344
MemoryOpt::Record **
1345
MemoryOpt::getList(const Instruction *insn)
1347
if (insn->op == OP_LOAD || insn->op == OP_VFETCH)
1348
return &loads[insn->src[0].getFile()];
1349
return &stores[insn->src[0].getFile()];
1353
MemoryOpt::addRecord(Instruction *i)
1355
Record **list = getList(i);
1356
Record *it = reinterpret_cast<Record *>(recordPool.allocate());
1365
MemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const
1367
const Symbol *sym = insn->getSrc(0)->asSym();
1368
const int size = typeSizeof(insn->sType);
1370
Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
1372
for (; it; it = it->next) {
1373
if (it->locked && insn->op != OP_LOAD)
1375
if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
1376
it->rel[0] != insn->getIndirect(0, 0) ||
1377
it->fileIndex != sym->reg.fileIndex ||
1378
it->rel[1] != insn->getIndirect(0, 1))
1381
if (it->offset < sym->reg.data.offset) {
1382
if (it->offset + it->size >= sym->reg.data.offset) {
1383
isAdj = (it->offset + it->size == sym->reg.data.offset);
1386
if (!(it->offset & 0x7))
1390
isAdj = it->offset != sym->reg.data.offset;
1391
if (size <= it->size && !isAdj)
1394
if (!(sym->reg.data.offset & 0x7))
1395
if (it->offset - size <= sym->reg.data.offset)
1403
MemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec)
1405
Instruction *st = rec->insn;
1406
int32_t offSt = rec->offset;
1407
int32_t offLd = ld->getSrc(0)->reg.data.offset;
1410
for (s = 1; offSt != offLd && st->srcExists(s); ++s)
1411
offSt += st->getSrc(s)->reg.size;
1415
for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) {
1416
if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size)
1418
if (st->getSrc(s)->reg.file != FILE_GPR)
1420
ld->def[d].replace(st->getSrc(s), false);
1427
MemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec)
1429
Instruction *ldR = rec->insn;
1430
int32_t offR = rec->offset;
1431
int32_t offE = ldE->getSrc(0)->reg.data.offset;
1434
assert(offR <= offE);
1435
for (dR = 0; offR < offE && ldR->defExists(dR); ++dR)
1436
offR += ldR->getDef(dR)->reg.size;
1440
for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) {
1441
if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size)
1443
ldE->def[dE].replace(ldR->getDef(dR), false);
1446
delete_Instruction(prog, ldE);
1451
MemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec)
1453
const Instruction *const ri = rec->insn;
1456
int32_t offS = st->getSrc(0)->reg.data.offset;
1457
int32_t offR = rec->offset;
1458
int32_t endS = offS + typeSizeof(st->dType);
1459
int32_t endR = offR + typeSizeof(ri->dType);
1461
rec->size = MAX2(endS, endR) - MIN2(offS, offR);
1463
st->takeExtraSources(0, extra);
1469
// get non-replaced sources of ri
1470
for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s)
1471
vals[k++] = ri->getSrc(s);
1473
// get replaced sources of st
1474
for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s)
1475
vals[k++] = st->getSrc(s);
1476
// skip replaced sources of ri
1477
for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s);
1478
// get non-replaced sources after values covered by st
1479
for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
1480
vals[k++] = ri->getSrc(s);
1481
for (s = 0; s < k; ++s)
1482
st->setSrc(s + 1, vals[s]);
1483
st->setSrc(0, ri->getSrc(0));
1487
for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size);
1488
for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size);
1489
for (; offR < endR; offR += ri->getSrc(j++)->reg.size)
1490
st->setSrc(s++, ri->getSrc(j));
1492
st->putExtraSources(0, extra);
1494
delete_Instruction(prog, rec->insn);
1497
rec->offset = st->getSrc(0)->reg.data.offset;
1499
st->setType(typeOfSize(rec->size));
1505
MemoryOpt::Record::overlaps(const Instruction *ldst) const
1510
if (this->fileIndex != that.fileIndex)
1513
if (this->rel[0] || that.rel[0])
1514
return this->base == that.base;
1516
(this->offset < that.offset + that.size) &&
1517
(this->offset + this->size > that.offset);
1520
// We must not eliminate stores that affect the result of @ld if
1521
// we find later stores to the same location, and we may no longer
1522
// merge them with later stores.
1523
// The stored value can, however, still be used to determine the value
1524
// returned by future loads.
1526
MemoryOpt::lockStores(Instruction *const ld)
1528
for (Record *r = stores[ld->src[0].getFile()]; r; r = r->next)
1529
if (!r->locked && r->overlaps(ld))
1533
// Prior loads from the location of @st are no longer valid.
1534
// Stores to the location of @st may no longer be used to derive
1535
// the value at it nor be coalesced into later stores.
1537
MemoryOpt::purgeRecords(Instruction *const st, DataFile f)
1540
f = st->src[0].getFile();
1542
for (Record *r = loads[f]; r; r = r->next)
1543
if (!st || r->overlaps(st))
1544
r->unlink(&loads[f]);
1546
for (Record *r = stores[f]; r; r = r->next)
1547
if (!st || r->overlaps(st))
1548
r->unlink(&stores[f]);
1552
MemoryOpt::visit(BasicBlock *bb)
1554
bool ret = runOpt(bb);
1555
// Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
1556
// where 96 bit memory operations are forbidden.
1563
MemoryOpt::runOpt(BasicBlock *bb)
1565
Instruction *ldst, *next;
1567
bool isAdjacent = true;
1569
for (ldst = bb->getEntry(); ldst; ldst = next) {
1574
if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) {
1575
if (ldst->isDead()) {
1576
// might have been produced by earlier optimization
1577
delete_Instruction(prog, ldst);
1581
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
1584
// TODO: maybe have all fixed ops act as barrier ?
1585
if (ldst->op == OP_CALL) {
1586
purgeRecords(NULL, FILE_MEMORY_LOCAL);
1587
purgeRecords(NULL, FILE_MEMORY_GLOBAL);
1588
purgeRecords(NULL, FILE_MEMORY_SHARED);
1589
purgeRecords(NULL, FILE_SHADER_OUTPUT);
1591
if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) {
1592
purgeRecords(NULL, FILE_SHADER_OUTPUT);
1596
if (ldst->getPredicate()) // TODO: handle predicated ld/st
1600
DataFile file = ldst->src[0].getFile();
1602
// if ld l[]/g[] look for previous store to eliminate the reload
1603
if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) {
1604
// TODO: shared memory ?
1605
rec = findRecord(ldst, false, isAdjacent);
1606
if (rec && !isAdjacent)
1607
keep = !replaceLdFromSt(ldst, rec);
1610
// or look for ld from the same location and replace this one
1611
rec = keep ? findRecord(ldst, true, isAdjacent) : NULL;
1614
keep = !replaceLdFromLd(ldst, rec);
1616
// or combine a previous load with this one
1617
keep = !combineLd(rec, ldst);
1622
rec = findRecord(ldst, false, isAdjacent);
1625
keep = !replaceStFromSt(ldst, rec);
1627
keep = !combineSt(rec, ldst);
1630
purgeRecords(ldst, DATA_FILE_COUNT);
1640
// =============================================================================
1642
// Turn control flow into predicated instructions (after register allocation !).
1644
// Could move this to before register allocation on NVC0 and also handle nested
1646
class FlatteningPass : public Pass
1649
virtual bool visit(BasicBlock *);
1651
bool tryPredicateConditional(BasicBlock *);
1652
void predicateInstructions(BasicBlock *, Value *pred, CondCode cc);
1653
void tryPropagateBranch(BasicBlock *);
1654
inline bool isConstantCondition(Value *pred);
1655
inline bool mayPredicate(const Instruction *, const Value *pred) const;
1656
inline void removeFlow(Instruction *);
1660
FlatteningPass::isConstantCondition(Value *pred)
1662
Instruction *insn = pred->getUniqueInsn();
1664
if (insn->op != OP_SET || insn->srcExists(2))
1667
for (int s = 0; s < 2 && insn->srcExists(s); ++s) {
1668
Instruction *ld = insn->getSrc(s)->getUniqueInsn();
1671
if (ld->op != OP_MOV && ld->op != OP_LOAD)
1673
if (ld->src[0].isIndirect(0))
1675
file = ld->src[0].getFile();
1677
file = insn->src[s].getFile();
1678
// catch $r63 on NVC0
1679
if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
1680
file = FILE_IMMEDIATE;
1682
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
1689
FlatteningPass::removeFlow(Instruction *insn)
1691
FlowInstruction *term = insn ? insn->asFlow() : NULL;
1694
Graph::Edge::Type ty = term->bb->cfg.outgoing().getType();
1696
if (term->op == OP_BRA) {
1697
// TODO: this might get more difficult when we get arbitrary BRAs
1698
if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK)
1701
if (term->op != OP_JOIN)
1704
delete_Instruction(prog, term);
1706
Value *pred = term->getPredicate();
1708
if (pred && pred->refCount() == 0) {
1709
Instruction *pSet = pred->getUniqueInsn();
1710
pred->join->reg.data.id = -1; // deallocate
1712
delete_Instruction(prog, pSet);
1717
FlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc)
1719
for (Instruction *i = bb->getEntry(); i; i = i->next) {
1722
assert(!i->getPredicate());
1723
i->setPredicate(cc, pred);
1725
removeFlow(bb->getExit());
1729
FlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const
1731
if (insn->isPseudo())
1733
// TODO: calls where we don't know which registers are modified
1735
if (!prog->getTarget()->mayPredicate(insn, pred))
1737
for (int d = 0; insn->defExists(d); ++d)
1738
if (insn->getDef(d)->equals(pred))
1743
// If we conditionally skip over or to a branch instruction, replace it.
1744
// NOTE: We do not update the CFG anymore here !
1746
FlatteningPass::tryPropagateBranch(BasicBlock *bb)
1748
BasicBlock *bf = NULL;
1751
if (bb->cfg.outgoingCount() != 2)
1753
if (!bb->getExit() || bb->getExit()->op != OP_BRA)
1755
Graph::EdgeIterator ei = bb->cfg.outgoing();
1757
for (i = 0; !ei.end(); ++i, ei.next()) {
1758
bf = BasicBlock::get(ei.getNode());
1759
if (bf->getInsnCount() == 1)
1762
if (ei.end() || !bf->getExit())
1764
FlowInstruction *bra = bb->getExit()->asFlow();
1765
FlowInstruction *rep = bf->getExit()->asFlow();
1767
if (rep->getPredicate())
1769
if (rep->op != OP_BRA &&
1770
rep->op != OP_JOIN &&
1775
bra->target.bb = rep->target.bb;
1776
if (i) // 2nd out block means branch not taken
1777
bra->cc = inverseCondCode(bra->cc);
1782
FlatteningPass::visit(BasicBlock *bb)
1784
if (tryPredicateConditional(bb))
1787
// try to attach join to previous instruction
1788
Instruction *insn = bb->getExit();
1789
if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
1791
if (insn && !insn->getPredicate() && !insn->asFlow() && !insn->isNop()) {
1793
bb->remove(bb->getExit());
1798
tryPropagateBranch(bb);
1804
FlatteningPass::tryPredicateConditional(BasicBlock *bb)
1806
BasicBlock *bL = NULL, *bR = NULL;
1807
unsigned int nL = 0, nR = 0, limit = 12;
1811
mask = bb->initiatesSimpleConditional();
1815
assert(bb->getExit());
1816
Value *pred = bb->getExit()->getPredicate();
1819
if (isConstantCondition(pred))
1822
Graph::EdgeIterator ei = bb->cfg.outgoing();
1825
bL = BasicBlock::get(ei.getNode());
1826
for (insn = bL->getEntry(); insn; insn = insn->next, ++nL)
1827
if (!mayPredicate(insn, pred))
1830
return false; // too long, do a real branch
1835
bR = BasicBlock::get(ei.getNode());
1836
for (insn = bR->getEntry(); insn; insn = insn->next, ++nR)
1837
if (!mayPredicate(insn, pred))
1840
return false; // too long, do a real branch
1844
predicateInstructions(bL, pred, bb->getExit()->cc);
1846
predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc));
1849
bb->remove(bb->joinAt);
1852
removeFlow(bb->getExit()); // delete the branch/join at the fork point
1854
// remove potential join operations at the end of the conditional
1855
if (prog->getTarget()->joinAnterior) {
1856
bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode());
1857
if (bb->getEntry() && bb->getEntry()->op == OP_JOIN)
1858
removeFlow(bb->getEntry());
1864
// =============================================================================
1866
// Common subexpression elimination. Stupid O^2 implementation.
1867
class LocalCSE : public Pass
1870
virtual bool visit(BasicBlock *);
1872
inline bool tryReplace(Instruction **, Instruction *);
1874
DLList ops[OP_LAST + 1];
1877
class GlobalCSE : public Pass
1880
virtual bool visit(BasicBlock *);
1884
Instruction::isActionEqual(const Instruction *that) const
1886
if (this->op != that->op ||
1887
this->dType != that->dType ||
1888
this->sType != that->sType)
1890
if (this->cc != that->cc)
1893
if (this->asTex()) {
1894
if (memcmp(&this->asTex()->tex,
1895
&that->asTex()->tex,
1896
sizeof(this->asTex()->tex)))
1899
if (this->asCmp()) {
1900
if (this->asCmp()->setCond != that->asCmp()->setCond)
1903
if (this->asFlow()) {
1906
if (this->atomic != that->atomic ||
1907
this->ipa != that->ipa ||
1908
this->lanes != that->lanes ||
1909
this->perPatch != that->perPatch)
1911
if (this->postFactor != that->postFactor)
1915
if (this->subOp != that->subOp ||
1916
this->saturate != that->saturate ||
1917
this->rnd != that->rnd ||
1918
this->ftz != that->ftz ||
1919
this->dnz != that->dnz ||
1920
this->cache != that->cache)
1927
Instruction::isResultEqual(const Instruction *that) const
1931
// NOTE: location of discard only affects tex with liveOnly and quadops
1932
if (!this->defExists(0) && this->op != OP_DISCARD)
1935
if (!isActionEqual(that))
1938
if (this->predSrc != that->predSrc)
1941
for (d = 0; this->defExists(d); ++d) {
1942
if (!that->defExists(d) ||
1943
!this->getDef(d)->equals(that->getDef(d), false))
1946
if (that->defExists(d))
1949
for (s = 0; this->srcExists(s); ++s) {
1950
if (!that->srcExists(s))
1952
if (this->src[s].mod != that->src[s].mod)
1954
if (!this->getSrc(s)->equals(that->getSrc(s), true))
1957
if (that->srcExists(s))
1960
if (op == OP_LOAD || op == OP_VFETCH) {
1961
switch (src[0].getFile()) {
1962
case FILE_MEMORY_CONST:
1963
case FILE_SHADER_INPUT:
1973
// pull through common expressions from different in-blocks
1975
GlobalCSE::visit(BasicBlock *bb)
1977
Instruction *phi, *next, *ik;
1980
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) {
1982
if (phi->getSrc(0)->refCount() > 1)
1984
ik = phi->getSrc(0)->getInsn();
1985
for (s = 1; phi->srcExists(s); ++s) {
1986
if (phi->getSrc(s)->refCount() > 1)
1988
if (!phi->getSrc(s)->getInsn()->isResultEqual(ik))
1991
if (!phi->srcExists(s)) {
1992
Instruction *entry = bb->getEntry();
1994
if (!entry || entry->op != OP_JOIN)
1997
bb->insertAfter(entry, ik);
1998
ik->setDef(0, phi->getDef(0));
1999
delete_Instruction(prog, phi);
2007
LocalCSE::tryReplace(Instruction **ptr, Instruction *i)
2009
Instruction *old = *ptr;
2010
if (!old->isResultEqual(i))
2012
for (int d = 0; old->defExists(d); ++d)
2013
old->def[d].replace(i->getDef(d), false);
2014
delete_Instruction(prog, old);
2020
LocalCSE::visit(BasicBlock *bb)
2022
unsigned int replaced;
2025
Instruction *ir, *next;
2029
// will need to know the order of instructions
2031
for (ir = bb->getEntry(); ir; ir = ir->next)
2032
ir->serial = serial++;
2034
for (ir = bb->getEntry(); ir; ir = next) {
2041
ops[ir->op].insert(ir);
2045
for (s = 0; ir->srcExists(s); ++s)
2046
if (ir->getSrc(s)->asLValue())
2047
if (!src || ir->getSrc(s)->refCount() < src->refCount())
2048
src = ir->getSrc(s);
2051
for (ValueRef::Iterator refs = src->uses->iterator(); !refs.end();
2053
Instruction *ik = refs.get()->getInsn();
2054
if (ik->serial < ir->serial && ik->bb == ir->bb)
2055
if (tryReplace(&ir, ik))
2059
DLLIST_FOR_EACH(&ops[ir->op], iter)
2061
Instruction *ik = reinterpret_cast<Instruction *>(iter.get());
2062
if (tryReplace(&ir, ik))
2068
ops[ir->op].insert(ir);
2072
for (unsigned int i = 0; i <= OP_LAST; ++i)
2080
// =============================================================================
2082
// Remove computations of unused values.
2083
class DeadCodeElim : public Pass
2086
bool buryAll(Program *);
2089
virtual bool visit(BasicBlock *);
2091
void checkSplitLoad(Instruction *ld); // for partially dead loads
2093
unsigned int deadCount;
2097
DeadCodeElim::buryAll(Program *prog)
2101
if (!this->run(prog, false, false))
2103
} while (deadCount);
2109
DeadCodeElim::visit(BasicBlock *bb)
2113
for (Instruction *i = bb->getFirst(); i; i = next) {
2117
delete_Instruction(prog, i);
2119
if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
2127
DeadCodeElim::checkSplitLoad(Instruction *ld1)
2129
Instruction *ld2 = NULL; // can get at most 2 loads
2132
int32_t addr1, addr2;
2133
int32_t size1, size2;
2135
uint32_t mask = 0xffffffff;
2137
for (d = 0; ld1->defExists(d); ++d)
2138
if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0)
2140
if (mask == 0xffffffff)
2143
addr1 = ld1->getSrc(0)->reg.data.offset;
2146
for (d = 0; ld1->defExists(d); ++d) {
2147
if (mask & (1 << d)) {
2148
if (size1 && (addr1 & 0x7))
2150
def1[n1] = ld1->getDef(d);
2151
size1 += def1[n1++]->reg.size;
2154
addr1 += ld1->getDef(d)->reg.size;
2159
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
2160
if (mask & (1 << d)) {
2161
def2[n2] = ld1->getDef(d);
2162
size2 += def2[n2++]->reg.size;
2165
addr2 += ld1->getDef(d)->reg.size;
2169
updateLdStOffset(ld1, addr1, func);
2170
ld1->setType(typeOfSize(size1));
2171
for (d = 0; d < 4; ++d)
2172
ld1->setDef(d, (d < n1) ? def1[d] : NULL);
2177
ld2 = ld1->clone(false);
2178
updateLdStOffset(ld2, addr2, func);
2179
ld2->setType(typeOfSize(size2));
2180
for (d = 0; d < 4; ++d)
2181
ld2->setDef(d, (d < n2) ? def2[d] : NULL);
2183
ld1->bb->insertAfter(ld1, ld2);
2186
// =============================================================================
2188
#define RUN_PASS(l, n, f) \
2189
if (level >= (l)) { \
2190
if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \
2191
INFO("PEEPHOLE: %s\n", #n); \
2193
if (!pass.f(this)) \
2198
Program::optimizeSSA(int level)
2200
RUN_PASS(1, DeadCodeElim, buryAll);
2201
RUN_PASS(1, CopyPropagation, run);
2202
RUN_PASS(2, GlobalCSE, run);
2203
RUN_PASS(1, LocalCSE, run);
2204
RUN_PASS(2, AlgebraicOpt, run);
2205
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
2206
RUN_PASS(1, ConstantFolding, foldAll);
2207
RUN_PASS(1, LoadPropagation, run);
2208
RUN_PASS(2, MemoryOpt, run);
2209
RUN_PASS(2, LocalCSE, run);
2210
RUN_PASS(0, DeadCodeElim, buryAll);
2215
Program::optimizePostRA(int level)
2217
RUN_PASS(2, FlatteningPass, run);