2
* Copyright 2011 Christoph Bumiller
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
#include "nv50_ir_util.h"
31
#include "nv50_ir_graph.h"
33
#include "nv50_ir_driver.h"
41
OP_UNION, // unify a new definition and several source values
42
OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
43
OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
44
OP_CONSTRAINT, // copy values into consecutive registers
55
OP_SAD, // abs(src0 - src1) + src2
66
OP_SAT, // CLAMP(f32, 0.0, 1.0)
71
OP_SET_AND, // dst = (src0 CMP src1) & src2
75
OP_SELP, // dst = src2 ? src0 : src1
76
OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
83
OP_EXP, // exponential (base M_E)
84
OP_LOG, // natural logarithm
97
OP_BRKPT, // breakpoint (not related to loops)
98
OP_JOINAT, // push control flow convergence point
103
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
104
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
108
OP_EMIT, // emit vertex
109
OP_RESTART, // restart primitive
111
OP_TXB, // texture bias
112
OP_TXL, // texure lod
113
OP_TXF, // texel fetch
114
OP_TXQ, // texture size query
115
OP_TXD, // texture derivatives
116
OP_TXG, // texture gather
118
OP_SULD, // surface load
119
OP_SUST, // surface store
122
OP_RDSV, // read system value
123
OP_WRSV, // write system value
128
OP_POPCNT, // bitcount(src0 & src1)
129
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
134
#define NV50_IR_SUBOP_MUL_HIGH 1
135
#define NV50_IR_SUBOP_EMIT_RESTART 1
136
#define NV50_IR_SUBOP_LDC_IL 1
137
#define NV50_IR_SUBOP_LDC_IS 2
138
#define NV50_IR_SUBOP_LDC_ISL 3
139
#define NV50_IR_SUBOP_SHIFT_WRAP 1
150
TYPE_U64, // 64 bit operations are only lowered after register allocation
162
CC_NEVER = CC_FL, // when used with FILE_FLAGS
165
CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
193
ROUND_M, // towards -inf
194
ROUND_Z, // towards 0
195
ROUND_P, // towards +inf
196
ROUND_NI, // nearest integer
197
ROUND_MI, // to integer towards -inf
198
ROUND_ZI, // to integer towards 0
199
ROUND_PI, // to integer towards +inf
204
CACHE_CA, // cache at all levels
205
CACHE_WB = CACHE_CA, // cache write back
206
CACHE_CG, // cache at global level
207
CACHE_CS, // cache streaming
208
CACHE_CV, // cache as volatile
209
CACHE_WT = CACHE_CV // cache write-through
216
FILE_PREDICATE, // boolean predicate
217
FILE_FLAGS, // zero/sign/carry/overflow bits
237
TEX_TARGET_1D_SHADOW,
238
TEX_TARGET_2D_SHADOW,
239
TEX_TARGET_CUBE_SHADOW,
242
TEX_TARGET_2D_MS_ARRAY,
243
TEX_TARGET_CUBE_ARRAY,
244
TEX_TARGET_1D_ARRAY_SHADOW,
245
TEX_TARGET_2D_ARRAY_SHADOW,
247
TEX_TARGET_RECT_SHADOW,
248
TEX_TARGET_CUBE_ARRAY_SHADOW,
260
SV_VERTEX_COUNT, // gl_PatchVerticesIn
293
class CmpInstruction;
294
class TexInstruction;
295
class FlowInstruction;
300
class ImmediateValue;
305
int8_t fileIndex; // signed, may be indirect for CONST[]
306
uint8_t size; // this should match the Instruction type's size
307
DataType type; // mainly for pretty printing
309
uint64_t u64; // immediate values
319
int32_t offset; // offset from 0 (base of address space)
320
int32_t id; // register id (< 0 if virtual/unassigned)
328
// precedence: NOT after SAT after NEG after ABS
329
#define NV50_IR_MOD_ABS (1 << 0)
330
#define NV50_IR_MOD_NEG (1 << 1)
331
#define NV50_IR_MOD_SAT (1 << 2)
332
#define NV50_IR_MOD_NOT (1 << 3)
333
#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
335
#define NV50_IR_INTERP_MODE_MASK 0x3
336
#define NV50_IR_INTERP_LINEAR (0 << 0)
337
#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
338
#define NV50_IR_INTERP_FLAT (2 << 0)
339
#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
340
#define NV50_IR_INTERP_SAMPLE_MASK 0xc
341
#define NV50_IR_INTERP_DEFAULT (0 << 2)
342
#define NV50_IR_INTERP_CENTROID (1 << 2)
343
#define NV50_IR_INTERP_OFFSET (2 << 2)
344
#define NV50_IR_INTERP_SAMPLEID (3 << 2)
346
// do we really want this to be a class ?
350
Modifier() : bits(0) { }
351
Modifier(unsigned int m) : bits(m) { }
352
Modifier(operation op);
354
// @return new Modifier applying a after b (asserts if unrepresentable)
355
Modifier operator*(const Modifier) const;
356
Modifier operator==(const Modifier m) const { return m.bits == bits; }
357
Modifier operator!=(const Modifier m) const { return m.bits != bits; }
359
inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
360
inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
361
inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
363
operation getOp() const;
365
inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
366
inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
368
inline operator bool() { return bits ? true : false; }
370
void applyTo(ImmediateValue &imm) const;
372
int print(char *buf, size_t size) const;
384
inline ValueRef& operator=(Value *val) { this->set(val); return *this; }
386
inline bool exists() const { return value != NULL; }
389
void set(const ValueRef&);
390
inline Value *get() const { return value; }
391
inline Value *rep() const;
393
inline Instruction *getInsn() const { return insn; }
394
inline void setInsn(Instruction *inst) { insn = inst; }
396
inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
397
inline const ValueRef *getIndirect(int dim) const;
399
inline DataFile getFile() const;
400
inline unsigned getSize() const;
402
// SSA: return eventual (traverse MOVs) literal value, if it exists
403
ImmediateValue *getImmediate() const;
408
Iterator(ValueRef *ref) : pos(ref), ini(ref) { }
410
inline ValueRef *get() const { return pos; }
411
inline bool end() const { return pos == NULL; }
412
inline void next() { pos = (pos->next != ini) ? pos->next : 0; }
418
inline Iterator iterator() { return Iterator(this); }
422
int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src[indirect[i]]
425
bool usedAsPtr; // for printing
430
ValueRef *next; // to link uses of the value
440
inline ValueDef& operator=(Value *val) { this->set(val); return *this; }
442
inline bool exists() const { return value != NULL; }
444
inline Value *get() const { return value; }
445
inline Value *rep() const;
447
void replace(Value *, bool doSet); // replace all uses of the old value
449
inline Instruction *getInsn() const { return insn; }
450
inline void setInsn(Instruction *inst) { insn = inst; }
452
inline DataFile getFile() const;
453
inline unsigned getSize() const;
455
// HACK: save the pre-SSA value in 'prev', in SSA we don't need the def list
456
// but we'll use it again for coalescing in register allocation
457
inline void setSSA(LValue *);
458
inline const LValue *preSSA() const;
459
inline void restoreDefList(); // after having been abused for SSA hack
460
void mergeDefs(ValueDef *);
465
Iterator(ValueDef *def) : pos(def), ini(def) { }
467
inline ValueDef *get() const { return pos; }
468
inline bool end() const { return pos == NULL; }
469
inline void next() { pos = (pos->next != ini) ? pos->next : NULL; }
475
inline Iterator iterator() { return Iterator(this); }
478
Value *value; // should make this LValue * ...
480
ValueDef *next; // circular list of all definitions of the same value
489
virtual Value *clone(Function *) const { return NULL; }
491
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
493
virtual bool equals(const Value *, bool strict = false) const;
494
virtual bool interfers(const Value *) const;
496
inline Instruction *getUniqueInsn() const;
497
inline Instruction *getInsn() const; // use when uniqueness is certain
499
inline int refCount() { return refCnt; }
500
inline int ref() { return ++refCnt; }
501
inline int unref() { --refCnt; assert(refCnt >= 0); return refCnt; }
503
inline LValue *asLValue();
504
inline Symbol *asSym();
505
inline ImmediateValue *asImm();
506
inline const Symbol *asSym() const;
507
inline const ImmediateValue *asImm() const;
509
bool coalesce(Value *, bool force = false);
511
inline bool inFile(DataFile f) { return reg.file == f; }
513
static inline Value *get(Iterator&);
518
friend class ValueDef;
519
friend class ValueRef;
527
// TODO: these should be in LValue:
532
class LValue : public Value
535
LValue(Function *, DataFile file);
536
LValue(Function *, LValue *);
538
virtual Value *clone(Function *) const;
540
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
548
class Symbol : public Value
551
Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
553
virtual Value *clone(Function *) const;
555
virtual bool equals(const Value *that, bool strict) const;
557
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
559
// print with indirect values
560
int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
562
inline void setFile(DataFile file, ubyte fileIndex = 0)
565
reg.fileIndex = fileIndex;
568
inline void setOffset(int32_t offset);
569
inline void setAddress(Symbol *base, int32_t offset);
570
inline void setSV(SVSemantic sv, uint32_t idx = 0);
572
inline const Symbol *getBase() const { return baseSym; }
575
Symbol *baseSym; // array base for Symbols representing array elements
578
class ImmediateValue : public Value
581
ImmediateValue(Program *, uint32_t);
582
ImmediateValue(Program *, float);
583
ImmediateValue(Program *, double);
585
// NOTE: not added to program with
586
ImmediateValue(const ImmediateValue *, DataType ty);
588
virtual bool equals(const Value *that, bool strict) const;
590
// these only work if 'type' is valid (we mostly use untyped literals):
591
bool isInteger(const int ival) const; // ival is cast to this' type
592
bool isNegative() const;
597
// for constant folding:
598
ImmediateValue operator+(const ImmediateValue&) const;
599
ImmediateValue operator-(const ImmediateValue&) const;
600
ImmediateValue operator*(const ImmediateValue&) const;
601
ImmediateValue operator/(const ImmediateValue&) const;
603
bool compare(CondCode cc, float fval) const;
605
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
609
#define NV50_IR_MAX_DEFS 4
610
#define NV50_IR_MAX_SRCS 8
616
Instruction(Function *, operation, DataType);
617
virtual ~Instruction();
619
virtual Instruction *clone(bool deep) const;
621
inline void setDef(int i, Value *val) { def[i].set(val); }
622
inline void setSrc(int s, Value *val) { src[s].set(val); }
623
void setSrc(int s, ValueRef&);
624
void swapSources(int a, int b);
625
bool setIndirect(int s, int dim, Value *);
627
inline Value *getDef(int d) const { return def[d].get(); }
628
inline Value *getSrc(int s) const { return src[s].get(); }
629
inline Value *getIndirect(int s, int dim) const;
631
inline bool defExists(int d) const { return d < 4 && def[d].exists(); }
632
inline bool srcExists(int s) const { return s < 8 && src[s].exists(); }
634
inline bool constrainedDefs() const { return def[1].exists(); }
636
bool setPredicate(CondCode ccode, Value *);
637
inline Value *getPredicate() const;
638
bool writesPredicate() const;
640
unsigned int defCount(unsigned int mask) const;
641
unsigned int srcCount(unsigned int mask) const;
643
// save & remove / set indirect[0,1] and predicate source
644
void takeExtraSources(int s, Value *[3]);
645
void putExtraSources(int s, Value *[3]);
647
inline void setType(DataType type) { dType = sType = type; }
649
inline void setType(DataType dtype, DataType stype)
655
inline bool isPseudo() const { return op < OP_MOV; }
658
bool isCommutationLegal(const Instruction *) const; // must be adjacent !
659
bool isActionEqual(const Instruction *) const;
660
bool isResultEqual(const Instruction *) const;
664
inline CmpInstruction *asCmp();
665
inline TexInstruction *asTex();
666
inline FlowInstruction *asFlow();
667
inline const TexInstruction *asTex() const;
668
inline const CmpInstruction *asCmp() const;
669
inline const FlowInstruction *asFlow() const;
675
int serial; // CFG order
678
DataType dType; // destination or defining type
679
DataType sType; // source or secondary type
684
uint8_t subOp; // quadop, 1 for mul-high, etc.
686
unsigned encSize : 4; // encoding size in bytes
687
unsigned saturate : 1; // to [0.0f, 1.0f]
688
unsigned join : 1; // converge control flow (use OP_JOIN until end)
689
unsigned fixed : 1; // prevent dead code elimination
690
unsigned terminator : 1; // end of basic block
692
unsigned ftz : 1; // flush denormal to zero
693
unsigned dnz : 1; // denormals, NaN are zero
694
unsigned ipa : 4; // interpolation mode
696
unsigned perPatch : 1;
697
unsigned exit : 1; // terminate program after insn
699
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
705
// NOTE: should make these pointers, saves space and work on shuffling
706
ValueDef def[NV50_IR_MAX_DEFS]; // no gaps !
707
ValueRef src[NV50_IR_MAX_SRCS]; // no gaps !
711
// instruction specific methods:
712
// (don't want to subclass, would need more constructors and memory pools)
714
inline void setInterpolate(unsigned int mode) { ipa = mode; }
716
unsigned int getInterpMode() const { return ipa & 0x3; }
717
unsigned int getSampleMode() const { return ipa & 0xc; }
722
void cloneBase(Instruction *clone, bool deep) const;
736
class TexInstruction : public Instruction
742
Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
744
const char *getName() const { return descTable[target].name; }
745
unsigned int getArgCount() const { return descTable[target].argc; }
746
unsigned int getDim() const { return descTable[target].dim; }
747
int isArray() const { return descTable[target].array ? 1 : 0; }
748
int isCube() const { return descTable[target].cube ? 1 : 0; }
749
int isShadow() const { return descTable[target].shadow ? 1 : 0; }
751
Target& operator=(TexTarget targ)
753
assert(targ < TEX_TARGET_COUNT);
757
inline bool operator==(TexTarget targ) const { return target == targ; }
770
static const struct Desc descTable[TEX_TARGET_COUNT];
773
enum TexTarget target;
777
TexInstruction(Function *, operation);
778
virtual ~TexInstruction();
780
virtual Instruction *clone(bool deep) const;
782
inline void setTexture(Target targ, uint8_t r, uint8_t s)
789
inline Value *getIndirectR() const;
790
inline Value *getIndirectS() const;
804
bool liveOnly; // only execute on live pixels of a quad (optimization)
808
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
818
class CmpInstruction : public Instruction
821
CmpInstruction(Function *, operation);
823
virtual Instruction *clone(bool deep) const;
825
void setCondition(CondCode cond) { setCond = cond; }
826
CondCode getCondition() const { return setCond; }
832
class FlowInstruction : public Instruction
835
FlowInstruction(Function *, operation, BasicBlock *target);
838
unsigned allWarp : 1;
839
unsigned absolute : 1;
841
unsigned builtin : 1; // true for calls to emulation code
853
BasicBlock(Function *);
856
inline int getId() const { return id; }
857
inline unsigned int getInsnCount() const { return numInsns; }
858
inline bool isTerminated() const { return exit && exit->terminator; }
860
bool dominatedBy(BasicBlock *bb);
861
inline bool reachableBy(BasicBlock *by, BasicBlock *term);
863
// returns mask of conditional out blocks
864
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
865
unsigned int initiatesSimpleConditional() const;
868
Function *getFunction() const { return func; }
869
Program *getProgram() const { return program; }
871
Instruction *getEntry() const { return entry; } // first non-phi instruction
872
Instruction *getPhi() const { return phi; }
873
Instruction *getFirst() const { return phi ? phi : entry; }
874
Instruction *getExit() const { return exit; }
876
void insertHead(Instruction *);
877
void insertTail(Instruction *);
878
void insertBefore(Instruction *, Instruction *);
879
void insertAfter(Instruction *, Instruction *);
880
void remove(Instruction *);
881
void permuteAdjacent(Instruction *, Instruction *);
883
BasicBlock *idom() const;
885
DLList& getDF() { return df; }
886
DLList::Iterator iterDF() { return df.iterator(); }
888
static inline BasicBlock *get(Iterator&);
889
static inline BasicBlock *get(Graph::Node *);
892
Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
900
Instruction *joinAt; // for quick reference
902
bool explicitCont; // loop headers: true if loop contains continue stmts
912
unsigned int numInsns;
922
Function(Program *, const char *name);
925
inline Program *getProgram() const { return prog; }
926
inline const char *getName() const { return name; }
927
inline int getId() const { return id; }
930
void printLiveIntervals() const;
931
void printCFGraph(const char *filePath);
933
bool setEntry(BasicBlock *);
934
bool setExit(BasicBlock *);
936
unsigned int orderInstructions(ArrayList&);
938
inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
939
inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
940
inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
942
inline LValue *getLValue(int id);
948
Graph::Node *cfgExit;
950
Graph::Node call; // node in the call graph
952
BasicBlock **bbArray; // BBs in emission order
955
unsigned int loopNestingBound;
961
ArrayList allBBlocks;
963
ArrayList allLValues;
966
void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
970
const char *const name;
977
CG_STAGE_SSA, // expected directly before register allocation
987
TYPE_TESSELLATION_CONTROL,
988
TYPE_TESSELLATION_EVAL,
994
Program(Type type, Target *targ);
999
Type getType() const { return progType; }
1001
inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
1002
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
1004
bool makeFromTGSI(struct nv50_ir_prog_info *);
1005
bool makeFromSM4(struct nv50_ir_prog_info *);
1006
bool convertToSSA();
1007
bool optimizeSSA(int level);
1008
bool optimizePostRA(int level);
1009
bool registerAllocation();
1010
bool emitBinary(struct nv50_ir_prog_info *);
1012
const Target *getTarget() const { return target; }
1023
ArrayList allRValues;
1030
MemoryPool mem_Instruction;
1031
MemoryPool mem_CmpInstruction;
1032
MemoryPool mem_TexInstruction;
1033
MemoryPool mem_FlowInstruction;
1034
MemoryPool mem_LValue;
1035
MemoryPool mem_Symbol;
1036
MemoryPool mem_ImmediateValue;
1040
void releaseInstruction(Instruction *);
1041
void releaseValue(Value *);
1044
// TODO: add const version
1048
bool run(Program *, bool ordered = false, bool skipPhi = false);
1049
bool run(Function *, bool ordered = false, bool skipPhi = false);
1052
// return false to continue with next entity on next higher level
1053
virtual bool visit(Function *) { return true; }
1054
virtual bool visit(BasicBlock *) { return true; }
1055
virtual bool visit(Instruction *) { return false; }
1057
bool doRun(Program *, bool ordered, bool skipPhi);
1058
bool doRun(Function *, bool ordered, bool skipPhi);
1066
// =============================================================================
1068
#include "nv50_ir_inlines.h"
1070
} // namespace nv50_ir
1072
#endif // __NV50_IR_H__