2
* Copyright 2014 Red Hat Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
22
* Authors: Ben Skeggs <bskeggs@redhat.com>
25
#include "codegen/nv50_ir_target_gm107.h"
26
#include "codegen/nv50_ir_sched_gm107.h"
28
//#define GM107_DEBUG_SCHED_DATA
32
class CodeEmitterGM107 : public CodeEmitter
35
CodeEmitterGM107(const TargetGM107 *);
37
virtual bool emitInstruction(Instruction *);
38
virtual uint32_t getMinEncodingSize(const Instruction *) const;
40
virtual void prepareEmission(Program *);
41
virtual void prepareEmission(Function *);
43
inline void setProgramType(Program::Type pType) { progType = pType; }
46
const TargetGM107 *targGM107;
48
Program::Type progType;
50
const Instruction *insn;
51
const bool writeIssueDelays;
55
inline void emitField(uint32_t *, int, int, uint32_t);
56
inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
58
inline void emitInsn(uint32_t, bool);
59
inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60
inline void emitPred();
61
inline void emitGPR(int, const Value *);
62
inline void emitGPR(int pos) {
63
emitGPR(pos, (const Value *)NULL);
65
inline void emitGPR(int pos, const ValueRef &ref) {
66
emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
68
inline void emitGPR(int pos, const ValueRef *ref) {
69
emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
71
inline void emitGPR(int pos, const ValueDef &def) {
72
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
74
inline void emitSYS(int, const Value *);
75
inline void emitSYS(int pos, const ValueRef &ref) {
76
emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
78
inline void emitPRED(int, const Value *);
79
inline void emitPRED(int pos) {
80
emitPRED(pos, (const Value *)NULL);
82
inline void emitPRED(int pos, const ValueRef &ref) {
83
emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
85
inline void emitPRED(int pos, const ValueDef &def) {
86
emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
88
inline void emitADDR(int, int, int, int, const ValueRef &);
89
inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90
inline bool longIMMD(const ValueRef &);
91
inline void emitIMMD(int, int, const ValueRef &);
93
void emitCond3(int, CondCode);
94
void emitCond4(int, CondCode);
95
void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96
inline void emitO(int);
97
inline void emitP(int);
98
inline void emitSAT(int);
99
inline void emitCC(int);
100
inline void emitX(int);
101
inline void emitABS(int, const ValueRef &);
102
inline void emitNEG(int, const ValueRef &);
103
inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104
inline void emitFMZ(int, int);
105
inline void emitRND(int, RoundMode, int);
106
inline void emitRND(int pos) {
107
emitRND(pos, insn->rnd, -1);
109
inline void emitPDIV(int);
110
inline void emitINV(int, const ValueRef &);
176
void emitLDSTs(int, DataType);
217
void emitSUHandle(const int s);
223
/*******************************************************************************
224
* general instruction layout/fields
225
******************************************************************************/
228
CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
231
uint32_t m = ((1ULL << s) - 1);
232
uint64_t d = (uint64_t)(v & m) << b;
233
assert(!(v & ~m) || (v & ~m) == ~m);
240
CodeEmitterGM107::emitPred()
242
if (insn->predSrc >= 0) {
243
emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244
emitField(19, 1, insn->cc == CC_NOT_P);
251
CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
253
code[0] = 0x00000000;
260
CodeEmitterGM107::emitGPR(int pos, const Value *val)
262
emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263
val->reg.data.id : 255);
267
CodeEmitterGM107::emitSYS(int pos, const Value *val)
269
int id = val ? val->reg.data.id : -1;
272
case SV_LANEID : id = 0x00; break;
273
case SV_VERTEX_COUNT : id = 0x10; break;
274
case SV_INVOCATION_ID : id = 0x11; break;
275
case SV_THREAD_KILL : id = 0x13; break;
276
case SV_INVOCATION_INFO: id = 0x1d; break;
277
case SV_COMBINED_TID : id = 0x20; break;
278
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
279
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
280
case SV_LANEMASK_EQ : id = 0x38; break;
281
case SV_LANEMASK_LT : id = 0x39; break;
282
case SV_LANEMASK_LE : id = 0x3a; break;
283
case SV_LANEMASK_GT : id = 0x3b; break;
284
case SV_LANEMASK_GE : id = 0x3c; break;
285
case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
287
assert(!"invalid system value");
292
emitField(pos, 8, id);
296
CodeEmitterGM107::emitPRED(int pos, const Value *val)
298
emitField(pos, 3, val ? val->reg.data.id : 7);
302
CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
305
const Value *v = ref.get();
306
assert(!(v->reg.data.offset & ((1 << shr) - 1)));
308
emitGPR(gpr, ref.getIndirect(0));
309
emitField(off, len, v->reg.data.offset >> shr);
313
CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
316
const Value *v = ref.get();
317
const Symbol *s = v->asSym();
319
assert(!(s->reg.data.offset & ((1 << shr) - 1)));
321
emitField(buf, 5, v->reg.fileIndex);
323
emitGPR(gpr, ref.getIndirect(0));
324
emitField(off, 16, s->reg.data.offset >> shr);
328
CodeEmitterGM107::longIMMD(const ValueRef &ref)
330
if (ref.getFile() == FILE_IMMEDIATE) {
331
const ImmediateValue *imm = ref.get()->asImm();
332
if (isFloatType(insn->sType))
333
return imm->reg.data.u32 & 0xfff;
335
return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
341
CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
343
const ImmediateValue *imm = ref.get()->asImm();
344
uint32_t val = imm->reg.data.u32;
347
if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348
assert(!(val & 0x00000fff));
350
} else if (insn->sType == TYPE_F64) {
351
assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352
val = imm->reg.data.u64 >> 44;
354
assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
356
emitField( 56, 1, (val & 0x80000) >> 19);
357
emitField(pos, len, (val & 0x7ffff));
359
emitField(pos, len, val);
363
/*******************************************************************************
365
******************************************************************************/
368
CodeEmitterGM107::emitCond3(int pos, CondCode code)
373
case CC_FL : data = 0x00; break;
375
case CC_LT : data = 0x01; break;
377
case CC_EQ : data = 0x02; break;
379
case CC_LE : data = 0x03; break;
381
case CC_GT : data = 0x04; break;
383
case CC_NE : data = 0x05; break;
385
case CC_GE : data = 0x06; break;
386
case CC_TR : data = 0x07; break;
388
assert(!"invalid cond3");
392
emitField(pos, 3, data);
396
CodeEmitterGM107::emitCond4(int pos, CondCode code)
401
case CC_FL: data = 0x00; break;
402
case CC_LT: data = 0x01; break;
403
case CC_EQ: data = 0x02; break;
404
case CC_LE: data = 0x03; break;
405
case CC_GT: data = 0x04; break;
406
case CC_NE: data = 0x05; break;
407
case CC_GE: data = 0x06; break;
408
// case CC_NUM: data = 0x07; break;
409
// case CC_NAN: data = 0x08; break;
410
case CC_LTU: data = 0x09; break;
411
case CC_EQU: data = 0x0a; break;
412
case CC_LEU: data = 0x0b; break;
413
case CC_GTU: data = 0x0c; break;
414
case CC_NEU: data = 0x0d; break;
415
case CC_GEU: data = 0x0e; break;
416
case CC_TR: data = 0x0f; break;
418
assert(!"invalid cond4");
422
emitField(pos, 4, data);
426
CodeEmitterGM107::emitO(int pos)
428
emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
432
CodeEmitterGM107::emitP(int pos)
434
emitField(pos, 1, insn->perPatch);
438
CodeEmitterGM107::emitSAT(int pos)
440
emitField(pos, 1, insn->saturate);
444
CodeEmitterGM107::emitCC(int pos)
446
emitField(pos, 1, insn->flagsDef >= 0);
450
CodeEmitterGM107::emitX(int pos)
452
emitField(pos, 1, insn->flagsSrc >= 0);
456
CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
458
emitField(pos, 1, ref.mod.abs());
462
CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
464
emitField(pos, 1, ref.mod.neg());
468
CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
470
emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
474
CodeEmitterGM107::emitFMZ(int pos, int len)
476
emitField(pos, len, insn->dnz << 1 | insn->ftz);
480
CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
484
case ROUND_NI: ri = 1;
485
case ROUND_N : rm = 0; break;
486
case ROUND_MI: ri = 1;
487
case ROUND_M : rm = 1; break;
488
case ROUND_PI: ri = 1;
489
case ROUND_P : rm = 2; break;
490
case ROUND_ZI: ri = 1;
491
case ROUND_Z : rm = 3; break;
493
assert(!"invalid round mode");
496
emitField(rip, 1, ri);
497
emitField(rmp, 2, rm);
501
CodeEmitterGM107::emitPDIV(int pos)
503
assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504
if (insn->postFactor > 0)
505
emitField(pos, 3, 7 - insn->postFactor);
507
emitField(pos, 3, 0 - insn->postFactor);
511
CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
513
emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
516
/*******************************************************************************
518
******************************************************************************/
521
CodeEmitterGM107::emitEXIT()
523
emitInsn (0xe3000000);
524
emitCond5(0x00, CC_TR);
528
CodeEmitterGM107::emitBRA()
530
const FlowInstruction *insn = this->insn->asFlow();
533
if (insn->indirect) {
535
emitInsn(0xe2000000); // JMX
537
emitInsn(0xe2500000); // BRX
541
emitInsn(0xe2100000); // JMP
543
emitInsn(0xe2400000); // BRA
544
emitField(0x07, 1, insn->allWarp);
547
emitField(0x06, 1, insn->limit);
548
emitCond5(0x00, CC_TR);
550
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551
int32_t pos = insn->target.bb->binPos;
552
if (writeIssueDelays && !(pos & 0x1f))
555
emitField(0x14, 24, pos - (codeSize + 8));
557
emitField(0x14, 32, pos);
559
emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560
emitField(0x05, 1, 1);
565
CodeEmitterGM107::emitCAL()
567
const FlowInstruction *insn = this->insn->asFlow();
569
if (insn->absolute) {
570
emitInsn(0xe2200000, false); // JCAL
572
emitInsn(0xe2600000, false); // CAL
575
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
577
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
580
int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
582
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
584
emitField(0x14, 32, insn->target.bb->binPos);
588
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589
emitField(0x05, 1, 1);
594
CodeEmitterGM107::emitPCNT()
596
const FlowInstruction *insn = this->insn->asFlow();
598
emitInsn(0xe2b00000, false);
600
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
603
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604
emitField(0x05, 1, 1);
609
CodeEmitterGM107::emitCONT()
611
emitInsn (0xe3500000);
612
emitCond5(0x00, CC_TR);
616
CodeEmitterGM107::emitPBK()
618
const FlowInstruction *insn = this->insn->asFlow();
620
emitInsn(0xe2a00000, false);
622
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
625
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626
emitField(0x05, 1, 1);
631
CodeEmitterGM107::emitBRK()
633
emitInsn (0xe3400000);
634
emitCond5(0x00, CC_TR);
638
CodeEmitterGM107::emitPRET()
640
const FlowInstruction *insn = this->insn->asFlow();
642
emitInsn(0xe2700000, false);
644
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
647
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648
emitField(0x05, 1, 1);
653
CodeEmitterGM107::emitRET()
655
emitInsn (0xe3200000);
656
emitCond5(0x00, CC_TR);
660
CodeEmitterGM107::emitSSY()
662
const FlowInstruction *insn = this->insn->asFlow();
664
emitInsn(0xe2900000, false);
666
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
669
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670
emitField(0x05, 1, 1);
675
CodeEmitterGM107::emitSYNC()
677
emitInsn (0xf0f80000);
678
emitCond5(0x00, CC_TR);
682
CodeEmitterGM107::emitSAM()
684
emitInsn(0xe3700000, false);
688
CodeEmitterGM107::emitRAM()
690
emitInsn(0xe3800000, false);
693
/*******************************************************************************
695
******************************************************************************/
698
CodeEmitterGM107::emitPSETP()
701
emitInsn(0x50900000);
704
case OP_AND: emitField(0x18, 3, 0); break;
705
case OP_OR: emitField(0x18, 3, 1); break;
706
case OP_XOR: emitField(0x18, 3, 2); break;
708
assert(!"unexpected operation");
713
emitPRED(0x27); // TODO: support 3-arg
714
emitINV (0x20, insn->src(1));
715
emitPRED(0x1d, insn->src(1));
716
emitINV (0x0f, insn->src(0));
717
emitPRED(0x0c, insn->src(0));
718
emitPRED(0x03, insn->def(0));
722
/*******************************************************************************
723
* movement / conversion
724
******************************************************************************/
727
CodeEmitterGM107::emitMOV()
729
if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730
switch (insn->src(0).getFile()) {
732
if (insn->def(0).getFile() == FILE_PREDICATE) {
733
emitInsn(0x5b6a0000);
736
emitInsn(0x5c980000);
738
emitGPR (0x14, insn->src(0));
740
case FILE_MEMORY_CONST:
741
emitInsn(0x4c980000);
742
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
745
emitInsn(0x38980000);
746
emitIMMD(0x14, 19, insn->src(0));
749
emitInsn(0x50880000);
750
emitPRED(0x0c, insn->src(0));
755
assert(!"bad src file");
758
if (insn->def(0).getFile() != FILE_PREDICATE &&
759
insn->src(0).getFile() != FILE_PREDICATE)
760
emitField(0x27, 4, insn->lanes);
762
emitInsn (0x01000000);
763
emitIMMD (0x14, 32, insn->src(0));
764
emitField(0x0c, 4, insn->lanes);
767
if (insn->def(0).getFile() == FILE_PREDICATE) {
769
emitPRED(0x03, insn->def(0));
772
emitGPR(0x00, insn->def(0));
777
CodeEmitterGM107::emitS2R()
779
emitInsn(0xf0c80000);
780
emitSYS (0x14, insn->src(0));
781
emitGPR (0x00, insn->def(0));
785
CodeEmitterGM107::emitCS2R()
787
emitInsn(0x50c80000);
788
emitSYS (0x14, insn->src(0));
789
emitGPR (0x00, insn->def(0));
793
CodeEmitterGM107::emitF2F()
795
RoundMode rnd = insn->rnd;
798
case OP_FLOOR: rnd = ROUND_MI; break;
799
case OP_CEIL : rnd = ROUND_PI; break;
800
case OP_TRUNC: rnd = ROUND_ZI; break;
805
switch (insn->src(0).getFile()) {
807
emitInsn(0x5ca80000);
808
emitGPR (0x14, insn->src(0));
810
case FILE_MEMORY_CONST:
811
emitInsn(0x4ca80000);
812
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
815
emitInsn(0x38a80000);
816
emitIMMD(0x14, 19, insn->src(0));
819
assert(!"bad src0 file");
823
emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
826
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
828
emitField(0x29, 1, insn->subOp);
829
emitRND (0x27, rnd, 0x2a);
830
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832
emitGPR (0x00, insn->def(0));
836
CodeEmitterGM107::emitF2I()
838
RoundMode rnd = insn->rnd;
841
case OP_FLOOR: rnd = ROUND_M; break;
842
case OP_CEIL : rnd = ROUND_P; break;
843
case OP_TRUNC: rnd = ROUND_Z; break;
848
switch (insn->src(0).getFile()) {
850
emitInsn(0x5cb00000);
851
emitGPR (0x14, insn->src(0));
853
case FILE_MEMORY_CONST:
854
emitInsn(0x4cb00000);
855
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
858
emitInsn(0x38b00000);
859
emitIMMD(0x14, 19, insn->src(0));
862
assert(!"bad src0 file");
866
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
868
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
870
emitRND (0x27, rnd, 0x2a);
871
emitField(0x0c, 1, isSignedType(insn->dType));
872
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874
emitGPR (0x00, insn->def(0));
878
CodeEmitterGM107::emitI2F()
880
RoundMode rnd = insn->rnd;
883
case OP_FLOOR: rnd = ROUND_M; break;
884
case OP_CEIL : rnd = ROUND_P; break;
885
case OP_TRUNC: rnd = ROUND_Z; break;
890
switch (insn->src(0).getFile()) {
892
emitInsn(0x5cb80000);
893
emitGPR (0x14, insn->src(0));
895
case FILE_MEMORY_CONST:
896
emitInsn(0x4cb80000);
897
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
900
emitInsn(0x38b80000);
901
emitIMMD(0x14, 19, insn->src(0));
904
assert(!"bad src0 file");
908
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
910
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911
emitField(0x29, 2, insn->subOp);
912
emitRND (0x27, rnd, -1);
913
emitField(0x0d, 1, isSignedType(insn->sType));
914
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916
emitGPR (0x00, insn->def(0));
920
CodeEmitterGM107::emitI2I()
922
switch (insn->src(0).getFile()) {
924
emitInsn(0x5ce00000);
925
emitGPR (0x14, insn->src(0));
927
case FILE_MEMORY_CONST:
928
emitInsn(0x4ce00000);
929
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
932
emitInsn(0x38e00000);
933
emitIMMD(0x14, 19, insn->src(0));
936
assert(!"bad src0 file");
941
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
943
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944
emitField(0x29, 2, insn->subOp);
945
emitField(0x0d, 1, isSignedType(insn->sType));
946
emitField(0x0c, 1, isSignedType(insn->dType));
947
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949
emitGPR (0x00, insn->def(0));
953
gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
955
int loc = entry->loc;
957
switch (entry->ipa) {
959
val = data.force_persample_interp;
966
code[loc + 1] |= 1 << 10;
968
code[loc + 1] &= ~(1 << 10);
972
CodeEmitterGM107::emitSEL()
974
switch (insn->src(1).getFile()) {
976
emitInsn(0x5ca00000);
977
emitGPR (0x14, insn->src(1));
979
case FILE_MEMORY_CONST:
980
emitInsn(0x4ca00000);
981
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
984
emitInsn(0x38a00000);
985
emitIMMD(0x14, 19, insn->src(1));
988
assert(!"bad src1 file");
992
emitINV (0x2a, insn->src(2));
993
emitPRED(0x27, insn->src(2));
994
emitGPR (0x08, insn->src(0));
995
emitGPR (0x00, insn->def(0));
997
if (insn->subOp >= 1) {
998
addInterp(insn->subOp - 1, 0, gm107_selpFlip);
1003
CodeEmitterGM107::emitSHFL()
1007
emitInsn (0xef100000);
1009
switch (insn->src(1).getFile()) {
1011
emitGPR(0x14, insn->src(1));
1013
case FILE_IMMEDIATE:
1014
emitIMMD(0x14, 5, insn->src(1));
1018
assert(!"invalid src1 file");
1022
switch (insn->src(2).getFile()) {
1024
emitGPR(0x27, insn->src(2));
1026
case FILE_IMMEDIATE:
1027
emitIMMD(0x22, 13, insn->src(2));
1031
assert(!"invalid src2 file");
1035
if (!insn->defExists(1))
1038
assert(insn->def(1).getFile() == FILE_PREDICATE);
1039
emitPRED(0x30, insn->def(1));
1042
emitField(0x1e, 2, insn->subOp);
1043
emitField(0x1c, 2, type);
1044
emitGPR (0x08, insn->src(0));
1045
emitGPR (0x00, insn->def(0));
1048
/*******************************************************************************
1050
******************************************************************************/
1053
CodeEmitterGM107::emitDADD()
1055
switch (insn->src(1).getFile()) {
1057
emitInsn(0x5c700000);
1058
emitGPR (0x14, insn->src(1));
1060
case FILE_MEMORY_CONST:
1061
emitInsn(0x4c700000);
1062
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1064
case FILE_IMMEDIATE:
1065
emitInsn(0x38700000);
1066
emitIMMD(0x14, 19, insn->src(1));
1069
assert(!"bad src1 file");
1072
emitABS(0x31, insn->src(1));
1073
emitNEG(0x30, insn->src(0));
1075
emitABS(0x2e, insn->src(0));
1076
emitNEG(0x2d, insn->src(1));
1078
if (insn->op == OP_SUB)
1079
code[1] ^= 0x00002000;
1081
emitGPR(0x08, insn->src(0));
1082
emitGPR(0x00, insn->def(0));
1086
CodeEmitterGM107::emitDMUL()
1088
switch (insn->src(1).getFile()) {
1090
emitInsn(0x5c800000);
1091
emitGPR (0x14, insn->src(1));
1093
case FILE_MEMORY_CONST:
1094
emitInsn(0x4c800000);
1095
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1097
case FILE_IMMEDIATE:
1098
emitInsn(0x38800000);
1099
emitIMMD(0x14, 19, insn->src(1));
1102
assert(!"bad src1 file");
1106
emitNEG2(0x30, insn->src(0), insn->src(1));
1109
emitGPR (0x08, insn->src(0));
1110
emitGPR (0x00, insn->def(0));
1114
CodeEmitterGM107::emitDFMA()
1116
switch(insn->src(2).getFile()) {
1118
switch (insn->src(1).getFile()) {
1120
emitInsn(0x5b700000);
1121
emitGPR (0x14, insn->src(1));
1123
case FILE_MEMORY_CONST:
1124
emitInsn(0x4b700000);
1125
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1127
case FILE_IMMEDIATE:
1128
emitInsn(0x36700000);
1129
emitIMMD(0x14, 19, insn->src(1));
1132
assert(!"bad src1 file");
1135
emitGPR (0x27, insn->src(2));
1137
case FILE_MEMORY_CONST:
1138
emitInsn(0x53700000);
1139
emitGPR (0x27, insn->src(1));
1140
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1143
assert(!"bad src2 file");
1148
emitNEG (0x31, insn->src(2));
1149
emitNEG2(0x30, insn->src(0), insn->src(1));
1151
emitGPR (0x08, insn->src(0));
1152
emitGPR (0x00, insn->def(0));
1156
CodeEmitterGM107::emitDMNMX()
1158
switch (insn->src(1).getFile()) {
1160
emitInsn(0x5c500000);
1161
emitGPR (0x14, insn->src(1));
1163
case FILE_MEMORY_CONST:
1164
emitInsn(0x4c500000);
1165
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1167
case FILE_IMMEDIATE:
1168
emitInsn(0x38500000);
1169
emitIMMD(0x14, 19, insn->src(1));
1172
assert(!"bad src1 file");
1176
emitABS (0x31, insn->src(1));
1177
emitNEG (0x30, insn->src(0));
1179
emitABS (0x2e, insn->src(0));
1180
emitNEG (0x2d, insn->src(1));
1181
emitField(0x2a, 1, insn->op == OP_MAX);
1183
emitGPR (0x08, insn->src(0));
1184
emitGPR (0x00, insn->def(0));
1188
CodeEmitterGM107::emitDSET()
1190
const CmpInstruction *insn = this->insn->asCmp();
1192
switch (insn->src(1).getFile()) {
1194
emitInsn(0x59000000);
1195
emitGPR (0x14, insn->src(1));
1197
case FILE_MEMORY_CONST:
1198
emitInsn(0x49000000);
1199
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1201
case FILE_IMMEDIATE:
1202
emitInsn(0x32000000);
1203
emitIMMD(0x14, 19, insn->src(1));
1206
assert(!"bad src1 file");
1210
if (insn->op != OP_SET) {
1212
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1216
assert(!"invalid set op");
1219
emitPRED(0x27, insn->src(2));
1224
emitABS (0x36, insn->src(0));
1225
emitNEG (0x35, insn->src(1));
1226
emitField(0x34, 1, insn->dType == TYPE_F32);
1227
emitCond4(0x30, insn->setCond);
1229
emitABS (0x2c, insn->src(1));
1230
emitNEG (0x2b, insn->src(0));
1231
emitGPR (0x08, insn->src(0));
1232
emitGPR (0x00, insn->def(0));
1236
CodeEmitterGM107::emitDSETP()
1238
const CmpInstruction *insn = this->insn->asCmp();
1240
switch (insn->src(1).getFile()) {
1242
emitInsn(0x5b800000);
1243
emitGPR (0x14, insn->src(1));
1245
case FILE_MEMORY_CONST:
1246
emitInsn(0x4b800000);
1247
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1249
case FILE_IMMEDIATE:
1250
emitInsn(0x36800000);
1251
emitIMMD(0x14, 19, insn->src(1));
1254
assert(!"bad src1 file");
1258
if (insn->op != OP_SET) {
1260
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1264
assert(!"invalid set op");
1267
emitPRED(0x27, insn->src(2));
1272
emitCond4(0x30, insn->setCond);
1273
emitABS (0x2c, insn->src(1));
1274
emitNEG (0x2b, insn->src(0));
1275
emitGPR (0x08, insn->src(0));
1276
emitABS (0x07, insn->src(0));
1277
emitNEG (0x06, insn->src(1));
1278
emitPRED (0x03, insn->def(0));
1279
if (insn->defExists(1))
1280
emitPRED(0x00, insn->def(1));
1285
/*******************************************************************************
1287
******************************************************************************/
1290
CodeEmitterGM107::emitFADD()
1292
if (!longIMMD(insn->src(1))) {
1293
switch (insn->src(1).getFile()) {
1295
emitInsn(0x5c580000);
1296
emitGPR (0x14, insn->src(1));
1298
case FILE_MEMORY_CONST:
1299
emitInsn(0x4c580000);
1300
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1302
case FILE_IMMEDIATE:
1303
emitInsn(0x38580000);
1304
emitIMMD(0x14, 19, insn->src(1));
1307
assert(!"bad src1 file");
1311
emitABS(0x31, insn->src(1));
1312
emitNEG(0x30, insn->src(0));
1314
emitABS(0x2e, insn->src(0));
1315
emitNEG(0x2d, insn->src(1));
1318
if (insn->op == OP_SUB)
1319
code[1] ^= 0x00002000;
1321
emitInsn(0x08000000);
1322
emitABS(0x39, insn->src(1));
1323
emitNEG(0x38, insn->src(0));
1325
emitABS(0x36, insn->src(0));
1326
emitNEG(0x35, insn->src(1));
1328
emitIMMD(0x14, 32, insn->src(1));
1330
if (insn->op == OP_SUB)
1331
code[1] ^= 0x00080000;
1334
emitGPR(0x08, insn->src(0));
1335
emitGPR(0x00, insn->def(0));
1339
CodeEmitterGM107::emitFMUL()
1341
if (!longIMMD(insn->src(1))) {
1342
switch (insn->src(1).getFile()) {
1344
emitInsn(0x5c680000);
1345
emitGPR (0x14, insn->src(1));
1347
case FILE_MEMORY_CONST:
1348
emitInsn(0x4c680000);
1349
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1351
case FILE_IMMEDIATE:
1352
emitInsn(0x38680000);
1353
emitIMMD(0x14, 19, insn->src(1));
1356
assert(!"bad src1 file");
1360
emitNEG2(0x30, insn->src(0), insn->src(1));
1366
emitInsn(0x1e000000);
1370
emitIMMD(0x14, 32, insn->src(1));
1371
if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372
code[1] ^= 0x00080000; /* flip immd sign bit */
1375
emitGPR(0x08, insn->src(0));
1376
emitGPR(0x00, insn->def(0));
1380
CodeEmitterGM107::emitFFMA()
1382
bool isLongIMMD = false;
1383
switch(insn->src(2).getFile()) {
1385
switch (insn->src(1).getFile()) {
1387
emitInsn(0x59800000);
1388
emitGPR (0x14, insn->src(1));
1390
case FILE_MEMORY_CONST:
1391
emitInsn(0x49800000);
1392
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1394
case FILE_IMMEDIATE:
1395
if (longIMMD(insn->getSrc(1))) {
1396
assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1398
emitInsn(0x0c000000);
1399
emitIMMD(0x14, 32, insn->src(1));
1401
emitInsn(0x32800000);
1402
emitIMMD(0x14, 19, insn->src(1));
1406
assert(!"bad src1 file");
1410
emitGPR (0x27, insn->src(2));
1412
case FILE_MEMORY_CONST:
1413
emitInsn(0x51800000);
1414
emitGPR (0x27, insn->src(1));
1415
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1418
assert(!"bad src2 file");
1423
emitNEG (0x39, insn->src(2));
1424
emitNEG2(0x38, insn->src(0), insn->src(1));
1430
emitNEG (0x31, insn->src(2));
1431
emitNEG2(0x30, insn->src(0), insn->src(1));
1436
emitGPR(0x08, insn->src(0));
1437
emitGPR(0x00, insn->def(0));
1441
CodeEmitterGM107::emitMUFU()
1446
case OP_COS: mufu = 0; break;
1447
case OP_SIN: mufu = 1; break;
1448
case OP_EX2: mufu = 2; break;
1449
case OP_LG2: mufu = 3; break;
1450
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452
case OP_SQRT: mufu = 8; break;
1454
assert(!"invalid mufu");
1458
emitInsn (0x50800000);
1460
emitNEG (0x30, insn->src(0));
1461
emitABS (0x2e, insn->src(0));
1462
emitField(0x14, 4, mufu);
1463
emitGPR (0x08, insn->src(0));
1464
emitGPR (0x00, insn->def(0));
1468
CodeEmitterGM107::emitFMNMX()
1470
switch (insn->src(1).getFile()) {
1472
emitInsn(0x5c600000);
1473
emitGPR (0x14, insn->src(1));
1475
case FILE_MEMORY_CONST:
1476
emitInsn(0x4c600000);
1477
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1479
case FILE_IMMEDIATE:
1480
emitInsn(0x38600000);
1481
emitIMMD(0x14, 19, insn->src(1));
1484
assert(!"bad src1 file");
1488
emitField(0x2a, 1, insn->op == OP_MAX);
1491
emitABS(0x31, insn->src(1));
1492
emitNEG(0x30, insn->src(0));
1494
emitABS(0x2e, insn->src(0));
1495
emitNEG(0x2d, insn->src(1));
1497
emitGPR(0x08, insn->src(0));
1498
emitGPR(0x00, insn->def(0));
1502
CodeEmitterGM107::emitRRO()
1504
switch (insn->src(0).getFile()) {
1506
emitInsn(0x5c900000);
1507
emitGPR (0x14, insn->src(0));
1509
case FILE_MEMORY_CONST:
1510
emitInsn(0x4c900000);
1511
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1513
case FILE_IMMEDIATE:
1514
emitInsn(0x38900000);
1515
emitIMMD(0x14, 19, insn->src(0));
1518
assert(!"bad src file");
1522
emitABS (0x31, insn->src(0));
1523
emitNEG (0x2d, insn->src(0));
1524
emitField(0x27, 1, insn->op == OP_PREEX2);
1525
emitGPR (0x00, insn->def(0));
1529
CodeEmitterGM107::emitFCMP()
1531
const CmpInstruction *insn = this->insn->asCmp();
1532
CondCode cc = insn->setCond;
1534
if (insn->src(2).mod.neg())
1535
cc = reverseCondCode(cc);
1537
switch(insn->src(2).getFile()) {
1539
switch (insn->src(1).getFile()) {
1541
emitInsn(0x5ba00000);
1542
emitGPR (0x14, insn->src(1));
1544
case FILE_MEMORY_CONST:
1545
emitInsn(0x4ba00000);
1546
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1548
case FILE_IMMEDIATE:
1549
emitInsn(0x36a00000);
1550
emitIMMD(0x14, 19, insn->src(1));
1553
assert(!"bad src1 file");
1556
emitGPR (0x27, insn->src(2));
1558
case FILE_MEMORY_CONST:
1559
emitInsn(0x53a00000);
1560
emitGPR (0x27, insn->src(1));
1561
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1564
assert(!"bad src2 file");
1568
emitCond4(0x30, cc);
1570
emitGPR (0x08, insn->src(0));
1571
emitGPR (0x00, insn->def(0));
1575
CodeEmitterGM107::emitFSET()
1577
const CmpInstruction *insn = this->insn->asCmp();
1579
switch (insn->src(1).getFile()) {
1581
emitInsn(0x58000000);
1582
emitGPR (0x14, insn->src(1));
1584
case FILE_MEMORY_CONST:
1585
emitInsn(0x48000000);
1586
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1588
case FILE_IMMEDIATE:
1589
emitInsn(0x30000000);
1590
emitIMMD(0x14, 19, insn->src(1));
1593
assert(!"bad src1 file");
1597
if (insn->op != OP_SET) {
1599
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1603
assert(!"invalid set op");
1606
emitPRED(0x27, insn->src(2));
1612
emitABS (0x36, insn->src(0));
1613
emitNEG (0x35, insn->src(1));
1614
emitField(0x34, 1, insn->dType == TYPE_F32);
1615
emitCond4(0x30, insn->setCond);
1617
emitABS (0x2c, insn->src(1));
1618
emitNEG (0x2b, insn->src(0));
1619
emitGPR (0x08, insn->src(0));
1620
emitGPR (0x00, insn->def(0));
1624
CodeEmitterGM107::emitFSETP()
1626
const CmpInstruction *insn = this->insn->asCmp();
1628
switch (insn->src(1).getFile()) {
1630
emitInsn(0x5bb00000);
1631
emitGPR (0x14, insn->src(1));
1633
case FILE_MEMORY_CONST:
1634
emitInsn(0x4bb00000);
1635
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1637
case FILE_IMMEDIATE:
1638
emitInsn(0x36b00000);
1639
emitIMMD(0x14, 19, insn->src(1));
1642
assert(!"bad src1 file");
1646
if (insn->op != OP_SET) {
1648
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1652
assert(!"invalid set op");
1655
emitPRED(0x27, insn->src(2));
1660
emitCond4(0x30, insn->setCond);
1662
emitABS (0x2c, insn->src(1));
1663
emitNEG (0x2b, insn->src(0));
1664
emitGPR (0x08, insn->src(0));
1665
emitABS (0x07, insn->src(0));
1666
emitNEG (0x06, insn->src(1));
1667
emitPRED (0x03, insn->def(0));
1668
if (insn->defExists(1))
1669
emitPRED(0x00, insn->def(1));
1675
CodeEmitterGM107::emitFSWZADD()
1677
emitInsn (0x50f80000);
1681
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682
emitField(0x1c, 8, insn->subOp);
1683
if (insn->predSrc != 1)
1684
emitGPR (0x14, insn->src(1));
1687
emitGPR (0x08, insn->src(0));
1688
emitGPR (0x00, insn->def(0));
1691
/*******************************************************************************
1693
******************************************************************************/
1696
CodeEmitterGM107::emitLOP()
1701
case OP_AND: lop = 0; break;
1702
case OP_OR : lop = 1; break;
1703
case OP_XOR: lop = 2; break;
1705
assert(!"invalid lop");
1709
if (!longIMMD(insn->src(1))) {
1710
switch (insn->src(1).getFile()) {
1712
emitInsn(0x5c400000);
1713
emitGPR (0x14, insn->src(1));
1715
case FILE_MEMORY_CONST:
1716
emitInsn(0x4c400000);
1717
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1719
case FILE_IMMEDIATE:
1720
emitInsn(0x38400000);
1721
emitIMMD(0x14, 19, insn->src(1));
1724
assert(!"bad src1 file");
1730
emitField(0x29, 2, lop);
1731
emitINV (0x28, insn->src(1));
1732
emitINV (0x27, insn->src(0));
1734
emitInsn (0x04000000);
1736
emitINV (0x38, insn->src(1));
1737
emitINV (0x37, insn->src(0));
1738
emitField(0x35, 2, lop);
1740
emitIMMD (0x14, 32, insn->src(1));
1743
emitGPR (0x08, insn->src(0));
1744
emitGPR (0x00, insn->def(0));
1747
/* special-case of emitLOP(): lop pass_b dst 0 ~src */
1749
CodeEmitterGM107::emitNOT()
1751
if (!longIMMD(insn->src(0))) {
1752
switch (insn->src(0).getFile()) {
1754
emitInsn(0x5c400700);
1755
emitGPR (0x14, insn->src(0));
1757
case FILE_MEMORY_CONST:
1758
emitInsn(0x4c400700);
1759
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1761
case FILE_IMMEDIATE:
1762
emitInsn(0x38400700);
1763
emitIMMD(0x14, 19, insn->src(0));
1766
assert(!"bad src1 file");
1771
emitInsn (0x05600000);
1772
emitIMMD (0x14, 32, insn->src(1));
1776
emitGPR(0x00, insn->def(0));
1780
CodeEmitterGM107::emitIADD()
1782
if (!longIMMD(insn->src(1))) {
1783
switch (insn->src(1).getFile()) {
1785
emitInsn(0x5c100000);
1786
emitGPR (0x14, insn->src(1));
1788
case FILE_MEMORY_CONST:
1789
emitInsn(0x4c100000);
1790
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1792
case FILE_IMMEDIATE:
1793
emitInsn(0x38100000);
1794
emitIMMD(0x14, 19, insn->src(1));
1797
assert(!"bad src1 file");
1801
emitNEG(0x31, insn->src(0));
1802
emitNEG(0x30, insn->src(1));
1806
emitInsn(0x1c000000);
1807
emitNEG (0x38, insn->src(0));
1811
emitIMMD(0x14, 32, insn->src(1));
1814
if (insn->op == OP_SUB)
1815
code[1] ^= 0x00010000;
1817
emitGPR(0x08, insn->src(0));
1818
emitGPR(0x00, insn->def(0));
1822
CodeEmitterGM107::emitIMUL()
1824
if (!longIMMD(insn->src(1))) {
1825
switch (insn->src(1).getFile()) {
1827
emitInsn(0x5c380000);
1828
emitGPR (0x14, insn->src(1));
1830
case FILE_MEMORY_CONST:
1831
emitInsn(0x4c380000);
1832
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1834
case FILE_IMMEDIATE:
1835
emitInsn(0x38380000);
1836
emitIMMD(0x14, 19, insn->src(1));
1839
assert(!"bad src1 file");
1843
emitField(0x29, 1, isSignedType(insn->sType));
1844
emitField(0x28, 1, isSignedType(insn->dType));
1845
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1847
emitInsn (0x1f000000);
1848
emitField(0x37, 1, isSignedType(insn->sType));
1849
emitField(0x36, 1, isSignedType(insn->dType));
1850
emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1852
emitIMMD (0x14, 32, insn->src(1));
1855
emitGPR(0x08, insn->src(0));
1856
emitGPR(0x00, insn->def(0));
1860
CodeEmitterGM107::emitIMAD()
1862
/*XXX: imad32i exists, but not using it as third src overlaps dst */
1863
switch(insn->src(2).getFile()) {
1865
switch (insn->src(1).getFile()) {
1867
emitInsn(0x5a000000);
1868
emitGPR (0x14, insn->src(1));
1870
case FILE_MEMORY_CONST:
1871
emitInsn(0x4a000000);
1872
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1874
case FILE_IMMEDIATE:
1875
emitInsn(0x34000000);
1876
emitIMMD(0x14, 19, insn->src(1));
1879
assert(!"bad src1 file");
1882
emitGPR (0x27, insn->src(2));
1884
case FILE_MEMORY_CONST:
1885
emitInsn(0x52000000);
1886
emitGPR (0x27, insn->src(1));
1887
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1890
assert(!"bad src2 file");
1894
emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895
emitField(0x35, 1, isSignedType(insn->sType));
1896
emitNEG (0x34, insn->src(2));
1897
emitNEG2 (0x33, insn->src(0), insn->src(1));
1900
emitField(0x30, 1, isSignedType(insn->dType));
1902
emitGPR (0x08, insn->src(0));
1903
emitGPR (0x00, insn->def(0));
1907
CodeEmitterGM107::emitISCADD()
1909
assert(insn->src(1).get()->asImm());
1911
switch (insn->src(2).getFile()) {
1913
emitInsn(0x5c180000);
1914
emitGPR (0x14, insn->src(2));
1916
case FILE_MEMORY_CONST:
1917
emitInsn(0x4c180000);
1918
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1920
case FILE_IMMEDIATE:
1921
emitInsn(0x38180000);
1922
emitIMMD(0x14, 19, insn->src(2));
1925
assert(!"bad src1 file");
1928
emitNEG (0x31, insn->src(0));
1929
emitNEG (0x30, insn->src(2));
1931
emitIMMD(0x27, 5, insn->src(1));
1932
emitGPR (0x08, insn->src(0));
1933
emitGPR (0x00, insn->def(0));
1937
CodeEmitterGM107::emitXMAD()
1939
assert(insn->src(0).getFile() == FILE_GPR);
1941
bool constbuf = false;
1942
bool psl_mrg = true;
1943
bool immediate = false;
1944
if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945
assert(insn->src(1).getFile() == FILE_GPR);
1948
emitInsn(0x51000000);
1949
emitGPR(0x27, insn->src(1));
1950
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951
} else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952
assert(insn->src(2).getFile() == FILE_GPR);
1954
emitInsn(0x4e000000);
1955
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956
emitGPR(0x27, insn->src(2));
1957
} else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958
assert(insn->src(2).getFile() == FILE_GPR);
1959
assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1961
emitInsn(0x36000000);
1962
emitIMMD(0x14, 16, insn->src(1));
1963
emitGPR(0x27, insn->src(2));
1965
assert(insn->src(1).getFile() == FILE_GPR);
1966
assert(insn->src(2).getFile() == FILE_GPR);
1967
emitInsn(0x5b000000);
1968
emitGPR(0x14, insn->src(1));
1969
emitGPR(0x27, insn->src(2));
1973
emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1975
unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976
cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977
emitField(0x32, constbuf ? 2 : 3, cmode);
1979
emitX(constbuf ? 0x36 : 0x26);
1982
emitGPR(0x0, insn->def(0));
1983
emitGPR(0x8, insn->src(0));
1986
if (isSignedType(insn->sType)) {
1987
uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988
emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1990
emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1992
bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993
emitField(constbuf ? 0x34 : 0x23, 1, h1);
1998
CodeEmitterGM107::emitIMNMX()
2000
switch (insn->src(1).getFile()) {
2002
emitInsn(0x5c200000);
2003
emitGPR (0x14, insn->src(1));
2005
case FILE_MEMORY_CONST:
2006
emitInsn(0x4c200000);
2007
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2009
case FILE_IMMEDIATE:
2010
emitInsn(0x38200000);
2011
emitIMMD(0x14, 19, insn->src(1));
2014
assert(!"bad src1 file");
2018
emitField(0x30, 1, isSignedType(insn->dType));
2020
emitField(0x2b, 2, insn->subOp);
2021
emitField(0x2a, 1, insn->op == OP_MAX);
2023
emitGPR (0x08, insn->src(0));
2024
emitGPR (0x00, insn->def(0));
2028
CodeEmitterGM107::emitICMP()
2030
const CmpInstruction *insn = this->insn->asCmp();
2031
CondCode cc = insn->setCond;
2033
if (insn->src(2).mod.neg())
2034
cc = reverseCondCode(cc);
2036
switch(insn->src(2).getFile()) {
2038
switch (insn->src(1).getFile()) {
2040
emitInsn(0x5b400000);
2041
emitGPR (0x14, insn->src(1));
2043
case FILE_MEMORY_CONST:
2044
emitInsn(0x4b400000);
2045
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2047
case FILE_IMMEDIATE:
2048
emitInsn(0x36400000);
2049
emitIMMD(0x14, 19, insn->src(1));
2052
assert(!"bad src1 file");
2055
emitGPR (0x27, insn->src(2));
2057
case FILE_MEMORY_CONST:
2058
emitInsn(0x53400000);
2059
emitGPR (0x27, insn->src(1));
2060
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2063
assert(!"bad src2 file");
2067
emitCond3(0x31, cc);
2068
emitField(0x30, 1, isSignedType(insn->sType));
2069
emitGPR (0x08, insn->src(0));
2070
emitGPR (0x00, insn->def(0));
2074
CodeEmitterGM107::emitISET()
2076
const CmpInstruction *insn = this->insn->asCmp();
2078
switch (insn->src(1).getFile()) {
2080
emitInsn(0x5b500000);
2081
emitGPR (0x14, insn->src(1));
2083
case FILE_MEMORY_CONST:
2084
emitInsn(0x4b500000);
2085
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2087
case FILE_IMMEDIATE:
2088
emitInsn(0x36500000);
2089
emitIMMD(0x14, 19, insn->src(1));
2092
assert(!"bad src1 file");
2096
if (insn->op != OP_SET) {
2098
case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099
case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2102
assert(!"invalid set op");
2105
emitPRED(0x27, insn->src(2));
2110
emitCond3(0x31, insn->setCond);
2111
emitField(0x30, 1, isSignedType(insn->sType));
2113
emitField(0x2c, 1, insn->dType == TYPE_F32);
2115
emitGPR (0x08, insn->src(0));
2116
emitGPR (0x00, insn->def(0));
2120
CodeEmitterGM107::emitISETP()
2122
const CmpInstruction *insn = this->insn->asCmp();
2124
switch (insn->src(1).getFile()) {
2126
emitInsn(0x5b600000);
2127
emitGPR (0x14, insn->src(1));
2129
case FILE_MEMORY_CONST:
2130
emitInsn(0x4b600000);
2131
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2133
case FILE_IMMEDIATE:
2134
emitInsn(0x36600000);
2135
emitIMMD(0x14, 19, insn->src(1));
2138
assert(!"bad src1 file");
2142
if (insn->op != OP_SET) {
2144
case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145
case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2148
assert(!"invalid set op");
2151
emitPRED(0x27, insn->src(2));
2156
emitCond3(0x31, insn->setCond);
2157
emitField(0x30, 1, isSignedType(insn->sType));
2159
emitGPR (0x08, insn->src(0));
2160
emitPRED (0x03, insn->def(0));
2161
if (insn->defExists(1))
2162
emitPRED(0x00, insn->def(1));
2168
CodeEmitterGM107::emitSHL()
2170
switch (insn->src(1).getFile()) {
2172
emitInsn(0x5c480000);
2173
emitGPR (0x14, insn->src(1));
2175
case FILE_MEMORY_CONST:
2176
emitInsn(0x4c480000);
2177
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2179
case FILE_IMMEDIATE:
2180
emitInsn(0x38480000);
2181
emitIMMD(0x14, 19, insn->src(1));
2184
assert(!"bad src1 file");
2190
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191
emitGPR (0x08, insn->src(0));
2192
emitGPR (0x00, insn->def(0));
2196
CodeEmitterGM107::emitSHR()
2198
switch (insn->src(1).getFile()) {
2200
emitInsn(0x5c280000);
2201
emitGPR (0x14, insn->src(1));
2203
case FILE_MEMORY_CONST:
2204
emitInsn(0x4c280000);
2205
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2207
case FILE_IMMEDIATE:
2208
emitInsn(0x38280000);
2209
emitIMMD(0x14, 19, insn->src(1));
2212
assert(!"bad src1 file");
2216
emitField(0x30, 1, isSignedType(insn->dType));
2219
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220
emitGPR (0x08, insn->src(0));
2221
emitGPR (0x00, insn->def(0));
2225
CodeEmitterGM107::emitSHF()
2229
switch (insn->src(1).getFile()) {
2231
emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232
emitGPR(0x14, insn->src(1));
2234
case FILE_IMMEDIATE:
2235
emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236
emitIMMD(0x14, 19, insn->src(1));
2239
assert(!"bad src1 file");
2243
switch (insn->sType) {
2255
emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2257
emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2259
emitGPR (0x27, insn->src(2));
2260
emitField(0x25, 2, type);
2261
emitGPR (0x08, insn->src(0));
2262
emitGPR (0x00, insn->def(0));
2266
CodeEmitterGM107::emitPOPC()
2268
switch (insn->src(0).getFile()) {
2270
emitInsn(0x5c080000);
2271
emitGPR (0x14, insn->src(0));
2273
case FILE_MEMORY_CONST:
2274
emitInsn(0x4c080000);
2275
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2277
case FILE_IMMEDIATE:
2278
emitInsn(0x38080000);
2279
emitIMMD(0x14, 19, insn->src(0));
2282
assert(!"bad src1 file");
2286
emitINV(0x28, insn->src(0));
2287
emitGPR(0x00, insn->def(0));
2291
CodeEmitterGM107::emitBFI()
2293
switch(insn->src(2).getFile()) {
2295
switch (insn->src(1).getFile()) {
2297
emitInsn(0x5bf00000);
2298
emitGPR (0x14, insn->src(1));
2300
case FILE_MEMORY_CONST:
2301
emitInsn(0x4bf00000);
2302
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2304
case FILE_IMMEDIATE:
2305
emitInsn(0x36f00000);
2306
emitIMMD(0x14, 19, insn->src(1));
2309
assert(!"bad src1 file");
2312
emitGPR (0x27, insn->src(2));
2314
case FILE_MEMORY_CONST:
2315
emitInsn(0x53f00000);
2316
emitGPR (0x27, insn->src(1));
2317
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2320
assert(!"bad src2 file");
2325
emitGPR (0x08, insn->src(0));
2326
emitGPR (0x00, insn->def(0));
2330
CodeEmitterGM107::emitBFE()
2332
switch (insn->src(1).getFile()) {
2334
emitInsn(0x5c000000);
2335
emitGPR (0x14, insn->src(1));
2337
case FILE_MEMORY_CONST:
2338
emitInsn(0x4c000000);
2339
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2341
case FILE_IMMEDIATE:
2342
emitInsn(0x38000000);
2343
emitIMMD(0x14, 19, insn->src(1));
2346
assert(!"bad src1 file");
2350
emitField(0x30, 1, isSignedType(insn->dType));
2352
emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353
emitGPR (0x08, insn->src(0));
2354
emitGPR (0x00, insn->def(0));
2358
CodeEmitterGM107::emitFLO()
2360
switch (insn->src(0).getFile()) {
2362
emitInsn(0x5c300000);
2363
emitGPR (0x14, insn->src(0));
2365
case FILE_MEMORY_CONST:
2366
emitInsn(0x4c300000);
2367
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2369
case FILE_IMMEDIATE:
2370
emitInsn(0x38300000);
2371
emitIMMD(0x14, 19, insn->src(0));
2374
assert(!"bad src1 file");
2378
emitField(0x30, 1, isSignedType(insn->dType));
2380
emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381
emitINV (0x28, insn->src(0));
2382
emitGPR (0x00, insn->def(0));
2386
CodeEmitterGM107::emitPRMT()
2388
switch (insn->src(1).getFile()) {
2390
emitInsn(0x5bc00000);
2391
emitGPR (0x14, insn->src(1));
2393
case FILE_MEMORY_CONST:
2394
emitInsn(0x4bc00000);
2395
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2397
case FILE_IMMEDIATE:
2398
emitInsn(0x36c00000);
2399
emitIMMD(0x14, 19, insn->src(1));
2402
assert(!"bad src1 file");
2406
emitField(0x30, 3, insn->subOp);
2407
emitGPR (0x27, insn->src(2));
2408
emitGPR (0x08, insn->src(0));
2409
emitGPR (0x00, insn->def(0));
2412
/*******************************************************************************
2414
******************************************************************************/
2417
CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2421
switch (typeSizeof(type)) {
2422
case 1: data = isSignedType(type) ? 1 : 0; break;
2423
case 2: data = isSignedType(type) ? 3 : 2; break;
2424
case 4: data = 4; break;
2425
case 8: data = 5; break;
2426
case 16: data = 6; break;
2428
assert(!"bad type");
2432
emitField(pos, 3, data);
2436
CodeEmitterGM107::emitLDSTc(int pos)
2440
switch (insn->cache) {
2441
case CACHE_CA: mode = 0; break;
2442
case CACHE_CG: mode = 1; break;
2443
case CACHE_CS: mode = 2; break;
2444
case CACHE_CV: mode = 3; break;
2446
assert(!"invalid caching mode");
2450
emitField(pos, 2, mode);
2454
CodeEmitterGM107::emitLDC()
2456
emitInsn (0xef900000);
2457
emitLDSTs(0x30, insn->dType);
2458
emitField(0x2c, 2, insn->subOp);
2459
emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460
emitGPR (0x00, insn->def(0));
2464
CodeEmitterGM107::emitLDL()
2466
emitInsn (0xef400000);
2467
emitLDSTs(0x30, insn->dType);
2469
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470
emitGPR (0x00, insn->def(0));
2474
CodeEmitterGM107::emitLDS()
2476
emitInsn (0xef480000);
2477
emitLDSTs(0x30, insn->dType);
2478
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479
emitGPR (0x00, insn->def(0));
2483
CodeEmitterGM107::emitLD()
2485
emitInsn (0x80000000);
2488
emitLDSTs(0x35, insn->dType);
2489
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491
emitGPR (0x00, insn->def(0));
2495
CodeEmitterGM107::emitSTL()
2497
emitInsn (0xef500000);
2498
emitLDSTs(0x30, insn->dType);
2500
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501
emitGPR (0x00, insn->src(1));
2505
CodeEmitterGM107::emitSTS()
2507
emitInsn (0xef580000);
2508
emitLDSTs(0x30, insn->dType);
2509
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510
emitGPR (0x00, insn->src(1));
2514
CodeEmitterGM107::emitST()
2516
emitInsn (0xa0000000);
2519
emitLDSTs(0x35, insn->dType);
2520
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522
emitGPR (0x00, insn->src(1));
2526
CodeEmitterGM107::emitALD()
2528
emitInsn (0xefd80000);
2529
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530
emitGPR (0x27, insn->src(0).getIndirect(1));
2533
emitADDR (0x08, 20, 10, 0, insn->src(0));
2534
emitGPR (0x00, insn->def(0));
2538
CodeEmitterGM107::emitAST()
2540
emitInsn (0xeff00000);
2541
emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542
emitGPR (0x27, insn->src(0).getIndirect(1));
2544
emitADDR (0x08, 20, 10, 0, insn->src(0));
2545
emitGPR (0x00, insn->src(1));
2549
CodeEmitterGM107::emitISBERD()
2551
emitInsn(0xefd00000);
2552
emitGPR (0x08, insn->src(0));
2553
emitGPR (0x00, insn->def(0));
2557
CodeEmitterGM107::emitAL2P()
2559
emitInsn (0xefa00000);
2560
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2563
emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564
emitGPR (0x08, insn->src(0).getIndirect(0));
2565
emitGPR (0x00, insn->def(0));
2569
gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2571
int ipa = entry->ipa;
2572
int reg = entry->reg;
2573
int loc = entry->loc;
2575
if (data.flatshade &&
2576
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577
ipa = NV50_IR_INTERP_FLAT;
2579
} else if (data.force_persample_interp &&
2580
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582
ipa |= NV50_IR_INTERP_CENTROID;
2584
code[loc + 1] &= ~(0xf << 0x14);
2585
code[loc + 1] |= (ipa & 0x3) << 0x16;
2586
code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587
code[loc + 0] &= ~(0xff << 0x14);
2588
code[loc + 0] |= reg << 0x14;
2592
CodeEmitterGM107::emitIPA()
2594
int ipam = 0, ipas = 0;
2596
switch (insn->getInterpMode()) {
2597
case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2598
case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599
case NV50_IR_INTERP_FLAT : ipam = 2; break;
2600
case NV50_IR_INTERP_SC : ipam = 3; break;
2602
assert(!"invalid ipa mode");
2606
switch (insn->getSampleMode()) {
2607
case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608
case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609
case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2611
assert(!"invalid ipa sample mode");
2615
emitInsn (0xe0000000);
2616
emitField(0x36, 2, ipam);
2617
emitField(0x34, 2, ipas);
2619
emitField(0x2f, 3, 7);
2620
emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621
if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622
code[1] |= 0x00000040; /* .idx */
2623
emitGPR(0x00, insn->def(0));
2625
if (insn->op == OP_PINTERP) {
2626
emitGPR(0x14, insn->src(1));
2627
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628
emitGPR(0x27, insn->src(2));
2629
addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2631
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632
emitGPR(0x27, insn->src(1));
2634
addInterp(insn->ipa, 0xff, gm107_interpApply);
2637
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2642
CodeEmitterGM107::emitATOM()
2644
unsigned dType, subOp;
2646
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647
switch (insn->dType) {
2648
case TYPE_U32: dType = 0; break;
2649
case TYPE_U64: dType = 1; break;
2650
default: assert(!"unexpected dType"); dType = 0; break;
2654
emitInsn (0xee000000);
2656
switch (insn->dType) {
2657
case TYPE_U32: dType = 0; break;
2658
case TYPE_S32: dType = 1; break;
2659
case TYPE_U64: dType = 2; break;
2660
case TYPE_F32: dType = 3; break;
2661
case TYPE_B128: dType = 4; break;
2662
case TYPE_S64: dType = 5; break;
2663
default: assert(!"unexpected dType"); dType = 0; break;
2665
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2668
subOp = insn->subOp;
2670
emitInsn (0xed000000);
2673
emitField(0x34, 4, subOp);
2674
emitField(0x31, 3, dType);
2675
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676
emitGPR (0x14, insn->src(1));
2677
emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678
emitGPR (0x00, insn->def(0));
2682
CodeEmitterGM107::emitATOMS()
2684
unsigned dType, subOp;
2686
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687
switch (insn->dType) {
2688
case TYPE_U32: dType = 0; break;
2689
case TYPE_U64: dType = 1; break;
2690
default: assert(!"unexpected dType"); dType = 0; break;
2694
emitInsn (0xee000000);
2695
emitField(0x34, 1, dType);
2697
switch (insn->dType) {
2698
case TYPE_U32: dType = 0; break;
2699
case TYPE_S32: dType = 1; break;
2700
case TYPE_U64: dType = 2; break;
2701
case TYPE_S64: dType = 3; break;
2702
default: assert(!"unexpected dType"); dType = 0; break;
2705
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2708
subOp = insn->subOp;
2710
emitInsn (0xec000000);
2711
emitField(0x1c, 3, dType);
2714
emitField(0x34, 4, subOp);
2715
emitGPR (0x14, insn->src(1));
2716
emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717
emitGPR (0x00, insn->def(0));
2721
CodeEmitterGM107::emitRED()
2725
switch (insn->dType) {
2726
case TYPE_U32: dType = 0; break;
2727
case TYPE_S32: dType = 1; break;
2728
case TYPE_U64: dType = 2; break;
2729
case TYPE_F32: dType = 3; break;
2730
case TYPE_B128: dType = 4; break;
2731
case TYPE_S64: dType = 5; break;
2732
default: assert(!"unexpected dType"); dType = 0; break;
2735
emitInsn (0xebf80000);
2736
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737
emitField(0x17, 3, insn->subOp);
2738
emitField(0x14, 3, dType);
2739
emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740
emitGPR (0x00, insn->src(1));
2744
CodeEmitterGM107::emitCCTL()
2747
if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748
emitInsn(0xef600000);
2751
emitInsn(0xef800000);
2754
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755
emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756
emitField(0x00, 4, insn->subOp);
2759
/*******************************************************************************
2761
******************************************************************************/
2764
CodeEmitterGM107::emitPIXLD()
2766
emitInsn (0xefe80000);
2768
emitField(0x1f, 3, insn->subOp);
2769
emitGPR (0x08, insn->src(0));
2770
emitGPR (0x00, insn->def(0));
2773
/*******************************************************************************
2775
******************************************************************************/
2778
CodeEmitterGM107::emitTEXs(int pos)
2780
int src1 = insn->predSrc == 1 ? 2 : 1;
2781
if (insn->srcExists(src1))
2782
emitGPR(pos, insn->src(src1));
2788
getTEXSMask(uint8_t mask)
2791
case 0x1: return 0x0;
2792
case 0x2: return 0x1;
2793
case 0x3: return 0x4;
2794
case 0x4: return 0x2;
2795
case 0x7: return 0x0;
2796
case 0x8: return 0x3;
2797
case 0x9: return 0x5;
2798
case 0xa: return 0x6;
2799
case 0xb: return 0x1;
2800
case 0xc: return 0x7;
2801
case 0xd: return 0x2;
2802
case 0xe: return 0x3;
2803
case 0xf: return 0x4;
2805
assert(!"invalid mask");
2811
getTEXSTarget(const TexInstruction *tex)
2813
assert(tex->op == OP_TEX || tex->op == OP_TXL);
2815
switch (tex->tex.target.getEnum()) {
2817
assert(tex->tex.levelZero);
2820
case TEX_TARGET_RECT:
2821
if (tex->tex.levelZero)
2823
if (tex->op == OP_TXL)
2826
case TEX_TARGET_2D_SHADOW:
2827
case TEX_TARGET_RECT_SHADOW:
2828
if (tex->tex.levelZero)
2830
if (tex->op == OP_TXL)
2833
case TEX_TARGET_2D_ARRAY:
2834
if (tex->tex.levelZero)
2837
case TEX_TARGET_2D_ARRAY_SHADOW:
2838
assert(tex->tex.levelZero);
2841
if (tex->tex.levelZero)
2843
assert(tex->op != OP_TXL);
2845
case TEX_TARGET_CUBE:
2846
assert(!tex->tex.levelZero);
2847
if (tex->op == OP_TXL)
2857
getTLDSTarget(const TexInstruction *tex)
2859
switch (tex->tex.target.getEnum()) {
2861
if (tex->tex.levelZero)
2865
case TEX_TARGET_RECT:
2866
if (tex->tex.levelZero)
2867
return tex->tex.useOffsets ? 0x4 : 0x2;
2868
return tex->tex.useOffsets ? 0xc : 0x5;
2869
case TEX_TARGET_2D_MS:
2870
assert(tex->tex.levelZero);
2873
assert(tex->tex.levelZero);
2875
case TEX_TARGET_2D_ARRAY:
2876
assert(tex->tex.levelZero);
2886
CodeEmitterGM107::emitTEX()
2888
const TexInstruction *insn = this->insn->asTex();
2891
if (!insn->tex.levelZero) {
2893
case OP_TEX: lodm = 0; break;
2894
case OP_TXB: lodm = 2; break;
2895
case OP_TXL: lodm = 3; break;
2897
assert(!"invalid tex op");
2904
if (insn->tex.rIndirectSrc >= 0) {
2905
emitInsn (0xdeb80000);
2906
emitField(0x25, 2, lodm);
2907
emitField(0x24, 1, insn->tex.useOffsets == 1);
2909
emitInsn (0xc0380000);
2910
emitField(0x37, 2, lodm);
2911
emitField(0x36, 1, insn->tex.useOffsets == 1);
2912
emitField(0x24, 13, insn->tex.r);
2915
emitField(0x32, 1, insn->tex.target.isShadow());
2916
emitField(0x31, 1, insn->tex.liveOnly);
2917
emitField(0x23, 1, insn->tex.derivAll);
2918
emitField(0x1f, 4, insn->tex.mask);
2919
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920
insn->tex.target.getDim() - 1);
2921
emitField(0x1c, 1, insn->tex.target.isArray());
2923
emitGPR (0x08, insn->src(0));
2924
emitGPR (0x00, insn->def(0));
2928
CodeEmitterGM107::emitTEXS()
2930
const TexInstruction *insn = this->insn->asTex();
2931
assert(!insn->tex.derivAll);
2936
emitInsn (0xd8000000);
2937
emitField(0x35, 4, getTEXSTarget(insn));
2938
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2941
emitInsn (0xda000000);
2942
emitField(0x35, 4, getTLDSTarget(insn));
2943
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2946
assert(insn->tex.useOffsets != 4);
2947
emitInsn (0xdf000000);
2948
emitField(0x34, 2, insn->tex.gatherComp);
2949
emitField(0x33, 1, insn->tex.useOffsets == 1);
2950
emitField(0x32, 1, insn->tex.target.isShadow());
2953
unreachable("unknown op in emitTEXS()");
2957
emitField(0x31, 1, insn->tex.liveOnly);
2958
emitField(0x24, 13, insn->tex.r);
2959
if (insn->defExists(1))
2960
emitGPR(0x1c, insn->def(1));
2963
if (insn->srcExists(1))
2964
emitGPR(0x14, insn->getSrc(1));
2967
emitGPR (0x08, insn->src(0));
2968
emitGPR (0x00, insn->def(0));
2972
CodeEmitterGM107::emitTLD()
2974
const TexInstruction *insn = this->insn->asTex();
2976
if (insn->tex.rIndirectSrc >= 0) {
2977
emitInsn (0xdd380000);
2979
emitInsn (0xdc380000);
2980
emitField(0x24, 13, insn->tex.r);
2983
emitField(0x37, 1, insn->tex.levelZero == 0);
2984
emitField(0x32, 1, insn->tex.target.isMS());
2985
emitField(0x31, 1, insn->tex.liveOnly);
2986
emitField(0x23, 1, insn->tex.useOffsets == 1);
2987
emitField(0x1f, 4, insn->tex.mask);
2988
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989
insn->tex.target.getDim() - 1);
2990
emitField(0x1c, 1, insn->tex.target.isArray());
2992
emitGPR (0x08, insn->src(0));
2993
emitGPR (0x00, insn->def(0));
2997
CodeEmitterGM107::emitTLD4()
2999
const TexInstruction *insn = this->insn->asTex();
3001
if (insn->tex.rIndirectSrc >= 0) {
3002
emitInsn (0xdef80000);
3003
emitField(0x26, 2, insn->tex.gatherComp);
3004
emitField(0x25, 2, insn->tex.useOffsets == 4);
3005
emitField(0x24, 2, insn->tex.useOffsets == 1);
3007
emitInsn (0xc8380000);
3008
emitField(0x38, 2, insn->tex.gatherComp);
3009
emitField(0x37, 2, insn->tex.useOffsets == 4);
3010
emitField(0x36, 2, insn->tex.useOffsets == 1);
3011
emitField(0x24, 13, insn->tex.r);
3014
emitField(0x32, 1, insn->tex.target.isShadow());
3015
emitField(0x31, 1, insn->tex.liveOnly);
3016
emitField(0x23, 1, insn->tex.derivAll);
3017
emitField(0x1f, 4, insn->tex.mask);
3018
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019
insn->tex.target.getDim() - 1);
3020
emitField(0x1c, 1, insn->tex.target.isArray());
3022
emitGPR (0x08, insn->src(0));
3023
emitGPR (0x00, insn->def(0));
3027
CodeEmitterGM107::emitTXD()
3029
const TexInstruction *insn = this->insn->asTex();
3031
if (insn->tex.rIndirectSrc >= 0) {
3032
emitInsn (0xde780000);
3034
emitInsn (0xde380000);
3035
emitField(0x24, 13, insn->tex.r);
3038
emitField(0x31, 1, insn->tex.liveOnly);
3039
emitField(0x23, 1, insn->tex.useOffsets == 1);
3040
emitField(0x1f, 4, insn->tex.mask);
3041
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042
insn->tex.target.getDim() - 1);
3043
emitField(0x1c, 1, insn->tex.target.isArray());
3045
emitGPR (0x08, insn->src(0));
3046
emitGPR (0x00, insn->def(0));
3050
CodeEmitterGM107::emitTMML()
3052
const TexInstruction *insn = this->insn->asTex();
3054
if (insn->tex.rIndirectSrc >= 0) {
3055
emitInsn (0xdf600000);
3057
emitInsn (0xdf580000);
3058
emitField(0x24, 13, insn->tex.r);
3061
emitField(0x31, 1, insn->tex.liveOnly);
3062
emitField(0x23, 1, insn->tex.derivAll);
3063
emitField(0x1f, 4, insn->tex.mask);
3064
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065
insn->tex.target.getDim() - 1);
3066
emitField(0x1c, 1, insn->tex.target.isArray());
3068
emitGPR (0x08, insn->src(0));
3069
emitGPR (0x00, insn->def(0));
3073
CodeEmitterGM107::emitTXQ()
3075
const TexInstruction *insn = this->insn->asTex();
3078
switch (insn->tex.query) {
3079
case TXQ_DIMS : type = 0x01; break;
3080
case TXQ_TYPE : type = 0x02; break;
3081
case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082
case TXQ_FILTER : type = 0x10; break;
3083
case TXQ_LOD : type = 0x12; break;
3084
case TXQ_WRAP : type = 0x14; break;
3085
case TXQ_BORDER_COLOUR : type = 0x16; break;
3087
assert(!"invalid txq query");
3091
if (insn->tex.rIndirectSrc >= 0) {
3092
emitInsn (0xdf500000);
3094
emitInsn (0xdf480000);
3095
emitField(0x24, 13, insn->tex.r);
3098
emitField(0x31, 1, insn->tex.liveOnly);
3099
emitField(0x1f, 4, insn->tex.mask);
3100
emitField(0x16, 6, type);
3101
emitGPR (0x08, insn->src(0));
3102
emitGPR (0x00, insn->def(0));
3106
CodeEmitterGM107::emitDEPBAR()
3108
emitInsn (0xf0f00000);
3109
emitField(0x1d, 1, 1); /* le */
3110
emitField(0x1a, 3, 5);
3111
emitField(0x14, 6, insn->subOp);
3112
emitField(0x00, 6, insn->subOp);
3115
/*******************************************************************************
3117
******************************************************************************/
3120
CodeEmitterGM107::emitNOP()
3122
emitInsn(0x50b00000);
3126
CodeEmitterGM107::emitKIL()
3128
emitInsn (0xe3300000);
3129
emitCond5(0x00, CC_TR);
3133
CodeEmitterGM107::emitOUT()
3135
const int cut = insn->op == OP_RESTART || insn->subOp;
3136
const int emit = insn->op == OP_EMIT;
3138
switch (insn->src(1).getFile()) {
3140
emitInsn(0xfbe00000);
3141
emitGPR (0x14, insn->src(1));
3143
case FILE_IMMEDIATE:
3144
emitInsn(0xf6e00000);
3145
emitIMMD(0x14, 19, insn->src(1));
3147
case FILE_MEMORY_CONST:
3148
emitInsn(0xebe00000);
3149
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3152
assert(!"bad src1 file");
3156
emitField(0x27, 2, (cut << 1) | emit);
3157
emitGPR (0x08, insn->src(0));
3158
emitGPR (0x00, insn->def(0));
3162
CodeEmitterGM107::emitBAR()
3166
emitInsn (0xf0a80000);
3168
switch (insn->subOp) {
3169
case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170
case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3171
case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3172
case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3175
assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3179
emitField(0x20, 8, subop);
3182
if (insn->src(0).getFile() == FILE_GPR) {
3183
emitGPR(0x08, insn->src(0));
3185
ImmediateValue *imm = insn->getSrc(0)->asImm();
3187
emitField(0x08, 8, imm->reg.data.u32);
3188
emitField(0x2b, 1, 1);
3192
if (insn->src(1).getFile() == FILE_GPR) {
3193
emitGPR(0x14, insn->src(1));
3195
ImmediateValue *imm = insn->getSrc(0)->asImm();
3197
emitField(0x14, 12, imm->reg.data.u32);
3198
emitField(0x2c, 1, 1);
3201
if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202
emitPRED (0x27, insn->src(2));
3203
emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3205
emitField(0x27, 3, 7);
3210
CodeEmitterGM107::emitMEMBAR()
3212
emitInsn (0xef980000);
3213
emitField(0x08, 2, insn->subOp >> 2);
3217
CodeEmitterGM107::emitVOTE()
3219
const ImmediateValue *imm;
3223
for (int i = 0; insn->defExists(i); i++) {
3224
if (insn->def(i).getFile() == FILE_GPR)
3226
else if (insn->def(i).getFile() == FILE_PREDICATE)
3230
emitInsn (0x50d80000);
3231
emitField(0x30, 2, insn->subOp);
3233
emitGPR (0x00, insn->def(r));
3237
emitPRED (0x2d, insn->def(p));
3241
switch (insn->src(0).getFile()) {
3242
case FILE_PREDICATE:
3243
emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244
emitPRED (0x27, insn->src(0));
3246
case FILE_IMMEDIATE:
3247
imm = insn->getSrc(0)->asImm();
3249
u32 = imm->reg.data.u32;
3250
assert(u32 == 0 || u32 == 1);
3252
emitField(0x2a, 1, u32 == 0);
3255
assert(!"Unhandled src");
3261
CodeEmitterGM107::emitSUTarget()
3263
const TexInstruction *insn = this->insn->asTex();
3266
assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3268
if (insn->tex.target == TEX_TARGET_BUFFER) {
3270
} else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3272
} else if (insn->tex.target == TEX_TARGET_2D ||
3273
insn->tex.target == TEX_TARGET_RECT) {
3275
} else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276
insn->tex.target == TEX_TARGET_CUBE ||
3277
insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3279
} else if (insn->tex.target == TEX_TARGET_3D) {
3282
assert(insn->tex.target == TEX_TARGET_1D);
3284
emitField(0x20, 4, target);
3288
CodeEmitterGM107::emitSUHandle(const int s)
3290
const TexInstruction *insn = this->insn->asTex();
3292
assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3294
if (insn->src(s).getFile() == FILE_GPR) {
3295
emitGPR(0x27, insn->src(s));
3297
ImmediateValue *imm = insn->getSrc(s)->asImm();
3299
emitField(0x33, 1, 1);
3300
emitField(0x24, 13, imm->reg.data.u32);
3305
CodeEmitterGM107::emitSUSTx()
3307
const TexInstruction *insn = this->insn->asTex();
3309
emitInsn(0xeb200000);
3310
if (insn->op == OP_SUSTB)
3311
emitField(0x34, 1, 1);
3315
emitField(0x14, 4, 0xf); // rgba
3316
emitGPR (0x08, insn->src(0));
3317
emitGPR (0x00, insn->src(1));
3323
CodeEmitterGM107::emitSULDx()
3325
const TexInstruction *insn = this->insn->asTex();
3328
emitInsn(0xeb000000);
3329
if (insn->op == OP_SULDB)
3330
emitField(0x34, 1, 1);
3333
switch (insn->dType) {
3334
case TYPE_S8: type = 1; break;
3335
case TYPE_U16: type = 2; break;
3336
case TYPE_S16: type = 3; break;
3337
case TYPE_U32: type = 4; break;
3338
case TYPE_U64: type = 5; break;
3339
case TYPE_B128: type = 6; break;
3341
assert(insn->dType == TYPE_U8);
3345
emitField(0x14, 3, type);
3346
emitGPR (0x00, insn->def(0));
3347
emitGPR (0x08, insn->src(0));
3353
CodeEmitterGM107::emitSUREDx()
3355
const TexInstruction *insn = this->insn->asTex();
3356
uint8_t type = 0, subOp;
3358
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3359
emitInsn(0xeac00000);
3361
emitInsn(0xea600000);
3363
if (insn->op == OP_SUREDB)
3364
emitField(0x34, 1, 1);
3368
switch (insn->dType) {
3369
case TYPE_S32: type = 1; break;
3370
case TYPE_U64: type = 2; break;
3371
case TYPE_F32: type = 3; break;
3372
case TYPE_S64: type = 5; break;
3374
assert(insn->dType == TYPE_U32);
3379
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3381
} else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3384
subOp = insn->subOp;
3387
emitField(0x24, 3, type);
3388
emitField(0x1d, 4, subOp);
3389
emitGPR (0x14, insn->src(1));
3390
emitGPR (0x08, insn->src(0));
3391
emitGPR (0x00, insn->def(0));
3396
/*******************************************************************************
3397
* assembler front-end
3398
******************************************************************************/
3401
CodeEmitterGM107::emitInstruction(Instruction *i)
3403
const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3408
if (insn->encSize != 8) {
3409
ERROR("skipping undecodable instruction: "); insn->print();
3412
if (codeSize + size > codeSizeLimit) {
3413
ERROR("code emitter output buffer too small\n");
3417
if (writeIssueDelays) {
3418
int n = ((codeSize & 0x1f) / 8) - 1;
3421
data[0] = 0x00000000;
3422
data[1] = 0x00000000;
3428
emitField(data, n * 21, 21, insn->sched);
3475
if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3487
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3488
insn->src(0).getFile() == FILE_PREDICATE)) {
3490
} else if (isFloatType(insn->dType)) {
3491
if (isFloatType(insn->sType))
3496
if (isFloatType(insn->sType))
3507
if (isFloatType(insn->dType)) {
3508
if (insn->dType == TYPE_F64)
3517
if (isFloatType(insn->dType)) {
3518
if (insn->dType == TYPE_F64)
3528
if (isFloatType(insn->dType)) {
3529
if (insn->dType == TYPE_F64)
3545
if (isFloatType(insn->dType)) {
3546
if (insn->dType == TYPE_F64)
3555
if (typeSizeof(insn->sType) == 8)
3561
if (typeSizeof(insn->sType) == 8)
3582
if (isFloatType(insn->dType))
3591
if (insn->def(0).getFile() != FILE_PREDICATE) {
3592
if (isFloatType(insn->sType))
3593
if (insn->sType == TYPE_F64)
3600
if (isFloatType(insn->sType))
3601
if (insn->sType == TYPE_F64)
3628
switch (insn->def(0).getFile()) {
3629
case FILE_GPR: emitLOP(); break;
3630
case FILE_PREDICATE: emitPSETP(); break;
3632
assert(!"invalid bool op");
3639
switch (insn->src(0).getFile()) {
3640
case FILE_MEMORY_CONST : emitLDC(); break;
3641
case FILE_MEMORY_LOCAL : emitLDL(); break;
3642
case FILE_MEMORY_SHARED: emitLDS(); break;
3643
case FILE_MEMORY_GLOBAL: emitLD(); break;
3645
assert(!"invalid load");
3651
switch (insn->src(0).getFile()) {
3652
case FILE_MEMORY_LOCAL : emitSTL(); break;
3653
case FILE_MEMORY_SHARED: emitSTS(); break;
3654
case FILE_MEMORY_GLOBAL: emitST(); break;
3656
assert(!"invalid store");
3662
if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3665
if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3694
if (insn->asTex()->tex.scalar)
3703
if (insn->asTex()->tex.scalar)
3709
if (insn->asTex()->tex.scalar)
3761
assert(!"invalid opcode");
3777
CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3782
/*******************************************************************************
3783
* sched data calculator
3784
******************************************************************************/
3787
SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3794
SchedDataCalculatorGM107::emitYield(Instruction *insn)
3796
insn->sched |= 1 << 4;
3800
SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3803
if ((insn->sched & 0xe0) == 0xe0)
3804
insn->sched ^= 0xe0;
3805
insn->sched |= id << 5;
3809
SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3812
if ((insn->sched & 0x700) == 0x700)
3813
insn->sched ^= 0x700;
3814
insn->sched |= id << 8;
3818
SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3821
insn->sched |= 1 << (11 + id);
3825
SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3828
insn->sched |= 1 << (17 + id);
3832
SchedDataCalculatorGM107::printSchedInfo(int cycle,
3833
const Instruction *insn) const
3835
uint8_t st, yl, wr, rd, wt, ru;
3837
st = (insn->sched & 0x00000f) >> 0;
3838
yl = (insn->sched & 0x000010) >> 4;
3839
wr = (insn->sched & 0x0000e0) >> 5;
3840
rd = (insn->sched & 0x000700) >> 8;
3841
wt = (insn->sched & 0x01f800) >> 11;
3842
ru = (insn->sched & 0x1e0000) >> 17;
3844
INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3845
cycle, st, yl, wr, rd, wt, ru);
3849
SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3851
return insn->sched & 0xf;
3855
SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3857
return (insn->sched & 0x0000e0) >> 5;
3861
SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3863
return (insn->sched & 0x000700) >> 8;
3867
SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3869
return (insn->sched & 0x01f800) >> 11;
3872
// Emit the reuse flag which allows to make use of the new memory hierarchy
3873
// introduced since Maxwell, the operand reuse cache.
3875
// It allows to reduce bank conflicts by caching operands. Each time you issue
3876
// an instruction, that flag can tell the hw which operands are going to be
3877
// re-used by the next instruction. Note that the next instruction has to use
3878
// the same GPR id in the same operand slot.
3880
SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3882
Instruction *next = insn->next;
3883
BitSet defs(255, true);
3885
if (!targ->isReuseSupported(insn))
3888
for (int d = 0; insn->defExists(d); ++d) {
3889
const Value *def = insn->def(d).rep();
3890
if (insn->def(d).getFile() != FILE_GPR)
3892
if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3894
defs.set(def->reg.data.id);
3897
for (int s = 0; insn->srcExists(s); s++) {
3898
const Value *src = insn->src(s).rep();
3899
if (insn->src(s).getFile() != FILE_GPR)
3901
if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3903
if (defs.test(src->reg.data.id))
3905
if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3907
if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3915
SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3917
int a = v->reg.data.id, b;
3919
switch (v->reg.file) {
3921
b = a + v->reg.size / 4;
3922
for (int r = a; r < b; ++r)
3923
score->rd.r[r] = ready;
3925
case FILE_PREDICATE:
3926
// To immediately use a predicate set by any instructions, the minimum
3927
// number of stall counts is 13.
3928
score->rd.p[a] = cycle + 13;
3931
score->rd.c = ready;
3939
SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3941
int a = v->reg.data.id, b;
3944
switch (v->reg.file) {
3946
b = a + v->reg.size / 4;
3947
for (int r = a; r < b; ++r)
3948
ready = MAX2(ready, score->rd.r[r]);
3950
case FILE_PREDICATE:
3951
ready = MAX2(ready, score->rd.p[a]);
3954
ready = MAX2(ready, score->rd.c);
3960
delay = MAX2(delay, ready - cycle);
3964
SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3966
const int ready = cycle + targ->getLatency(insn);
3968
for (int d = 0; insn->defExists(d); ++d)
3969
recordWr(insn->getDef(d), cycle, ready);
3971
#ifdef GM107_DEBUG_SCHED_DATA
3972
score->print(cycle);
3976
#define GM107_MIN_ISSUE_DELAY 0x1
3977
#define GM107_MAX_ISSUE_DELAY 0xf
3980
SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3982
int delay = 0, ready = cycle;
3984
for (int s = 0; insn->srcExists(s); ++s)
3985
checkRd(insn->getSrc(s), cycle, delay);
3987
// TODO: make use of getReadLatency()!
3989
return MAX2(delay, ready - cycle);
3993
SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3994
const Instruction *next)
3996
const OpClass cl = targ->getOpClass(insn->op);
3999
if (insn->op == OP_EXIT ||
4000
insn->op == OP_BAR ||
4001
insn->op == OP_MEMBAR) {
4002
delay = GM107_MAX_ISSUE_DELAY;
4004
if (insn->op == OP_QUADON ||
4005
insn->op == OP_QUADPOP) {
4008
if (cl == OPCLASS_FLOW || insn->join) {
4012
if (!next || !targ->canDualIssue(insn, next)) {
4013
delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4015
delay = 0x0; // dual-issue
4018
wr = getWrDepBar(insn);
4019
rd = getRdDepBar(insn);
4021
if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4022
// Barriers take one additional clock cycle to become active on top of
4023
// the clock consumed by the instruction producing it.
4024
if (!next || insn->bb != next->bb) {
4027
int wt = getWtDepBar(next);
4028
if ((wt & (1 << wr)) | (wt & (1 << rd)))
4033
emitStall(insn, delay);
4037
// Return true when the given instruction needs to emit a read dependency
4038
// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4039
// setting the maximum number of stall counts is not enough.
4041
SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4043
BitSet srcs(255, true), defs(255, true);
4046
if (!targ->isBarrierRequired(insn))
4049
// Do not emit a read dependency barrier when the instruction doesn't use
4050
// any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4051
for (int s = 0; insn->srcExists(s); ++s) {
4052
const Value *src = insn->src(s).rep();
4053
if (insn->src(s).getFile() != FILE_GPR)
4055
if (src->reg.data.id == 255)
4058
a = src->reg.data.id;
4059
b = a + src->reg.size / 4;
4060
for (int r = a; r < b; ++r)
4064
if (!srcs.popCount())
4067
// Do not emit a read dependency barrier when the output GPRs are equal to
4068
// the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4069
// be produced and WaR hazards are prevented.
4070
for (int d = 0; insn->defExists(d); ++d) {
4071
const Value *def = insn->def(d).rep();
4072
if (insn->def(d).getFile() != FILE_GPR)
4074
if (def->reg.data.id == 255)
4077
a = def->reg.data.id;
4078
b = a + def->reg.size / 4;
4079
for (int r = a; r < b; ++r)
4084
if (!srcs.popCount())
4090
// Return true when the given instruction needs to emit a write dependency
4091
// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4092
// setting the maximum number of stall counts is not enough. This is only legal
4093
// if the instruction output something.
4095
SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4097
if (!targ->isBarrierRequired(insn))
4100
for (int d = 0; insn->defExists(d); ++d) {
4101
if (insn->def(d).getFile() == FILE_GPR ||
4102
insn->def(d).getFile() == FILE_FLAGS ||
4103
insn->def(d).getFile() == FILE_PREDICATE)
4109
// Helper function for findFirstUse() and findFirstDef()
4111
SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4112
const Value *val) const
4114
if (val->reg.file != FILE_GPR &&
4115
val->reg.file != FILE_PREDICATE &&
4116
val->reg.file != FILE_FLAGS)
4119
for (int d = 0; insn->defExists(d); ++d) {
4120
const Value* def = insn->getDef(d);
4121
int minGPR = def->reg.data.id;
4122
int maxGPR = minGPR + def->reg.size / 4 - 1;
4124
if (def->reg.file != val->reg.file)
4127
if (def->reg.file == FILE_GPR) {
4128
if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4129
val->reg.data.id > maxGPR)
4133
if (def->reg.file == FILE_PREDICATE) {
4134
if (val->reg.data.id != minGPR)
4138
if (def->reg.file == FILE_FLAGS) {
4139
if (val->reg.data.id != minGPR)
4148
// Find the next instruction inside the same basic block which uses (reads or
4149
// writes from) the output of the given instruction in order to avoid RaW and
4152
SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4154
Instruction *insn, *next;
4156
if (!bari->defExists(0))
4159
for (insn = bari->next; insn != NULL; insn = next) {
4162
for (int s = 0; insn->srcExists(s); ++s)
4163
if (doesInsnWriteTo(bari, insn->getSrc(s)))
4166
for (int d = 0; insn->defExists(d); ++d)
4167
if (doesInsnWriteTo(bari, insn->getDef(d)))
4173
// Find the next instruction inside the same basic block which overwrites, at
4174
// least, one source of the given instruction in order to avoid WaR hazards.
4176
SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4178
Instruction *insn, *next;
4180
if (!bari->srcExists(0))
4183
for (insn = bari->next; insn != NULL; insn = next) {
4186
for (int s = 0; bari->srcExists(s); ++s)
4187
if (doesInsnWriteTo(insn, bari->getSrc(s)))
4193
// Dependency barriers:
4194
// This pass is a bit ugly and could probably be improved by performing a
4195
// better allocation.
4197
// The main idea is to avoid WaR and RaW hazards by emitting read/write
4198
// dependency barriers using the control codes.
4200
SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4202
std::list<LiveBarUse> live_uses;
4203
std::list<LiveBarDef> live_defs;
4204
Instruction *insn, *next;
4205
BitSet bars(6, true);
4208
for (insn = bb->getEntry(); insn != NULL; insn = next) {
4209
Instruction *usei = NULL, *defi = NULL;
4210
bool need_wr_bar, need_rd_bar;
4214
// Expire old barrier uses.
4215
for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4216
it != live_uses.end();) {
4217
if (insn->serial >= it->usei->serial) {
4218
int wr = getWrDepBar(it->insn);
4219
emitWtDepBar(insn, wr);
4220
bars.clr(wr); // free barrier
4221
it = live_uses.erase(it);
4227
// Expire old barrier defs.
4228
for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4229
it != live_defs.end();) {
4230
if (insn->serial >= it->defi->serial) {
4231
int rd = getRdDepBar(it->insn);
4232
emitWtDepBar(insn, rd);
4233
bars.clr(rd); // free barrier
4234
it = live_defs.erase(it);
4240
need_wr_bar = needWrDepBar(insn);
4241
need_rd_bar = needRdDepBar(insn);
4244
// When the instruction requires to emit a write dependency barrier
4245
// (all which write something at a variable latency), find the next
4246
// instruction which reads the outputs (or writes to them, potentially
4247
// completing before this insn.
4248
usei = findFirstUse(insn);
4250
// Allocate and emit a new barrier.
4251
bar_id = bars.findFreeRange(1);
4255
emitWrDepBar(insn, bar_id);
4257
live_uses.push_back(LiveBarUse(insn, usei));
4261
// When the instruction requires to emit a read dependency barrier
4262
// (all which read something at a variable latency), find the next
4263
// instruction which will write the inputs.
4264
defi = findFirstDef(insn);
4266
if (usei && defi && usei->serial <= defi->serial)
4269
// Allocate and emit a new barrier.
4270
bar_id = bars.findFreeRange(1);
4274
emitRdDepBar(insn, bar_id);
4276
live_defs.push_back(LiveBarDef(insn, defi));
4280
// Remove unnecessary barrier waits.
4281
BitSet alive_bars(6, true);
4282
for (insn = bb->getEntry(); insn != NULL; insn = next) {
4287
wr = getWrDepBar(insn);
4288
rd = getRdDepBar(insn);
4289
wt = getWtDepBar(insn);
4291
for (int idx = 0; idx < 6; ++idx) {
4292
if (!(wt & (1 << idx)))
4294
if (!alive_bars.test(idx)) {
4295
insn->sched &= ~(1 << (11 + idx));
4297
alive_bars.clr(idx);
4311
SchedDataCalculatorGM107::visit(Function *func)
4315
func->orderInstructions(insns);
4317
scoreBoards.resize(func->cfg.getSize());
4318
for (size_t i = 0; i < scoreBoards.size(); ++i)
4319
scoreBoards[i].wipe();
4324
SchedDataCalculatorGM107::visit(BasicBlock *bb)
4326
Instruction *insn, *next = NULL;
4329
for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4331
insn->sched = 0x7e0;
4334
if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4337
// Insert read/write dependency barriers for instructions which don't
4338
// operate at a fixed latency.
4341
score = &scoreBoards.at(bb->getId());
4343
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4344
// back branches will wait until all target dependencies are satisfied
4345
if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4347
BasicBlock *in = BasicBlock::get(ei.getNode());
4348
score->setMax(&scoreBoards.at(in->getId()));
4351
#ifdef GM107_DEBUG_SCHED_DATA
4352
INFO("=== BB:%i initial scores\n", bb->getId());
4353
score->print(cycle);
4356
// Because barriers are allocated locally (intra-BB), we have to make sure
4357
// that all produced barriers have been consumed before entering inside a
4358
// new basic block. The best way is to do a global allocation pre RA but
4359
// it's really more difficult, especially because of the phi nodes. Anyways,
4360
// it seems like that waiting on a barrier which has already been consumed
4361
// doesn't add any additional cost, it's just not elegant!
4362
Instruction *start = bb->getEntry();
4363
if (start && bb->cfg.incidentCount() > 0) {
4364
for (int b = 0; b < 6; b++)
4365
emitWtDepBar(start, b);
4368
for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4371
commitInsn(insn, cycle);
4372
int delay = calcDelay(next, cycle);
4373
setDelay(insn, delay, next);
4374
cycle += getStall(insn);
4378
// XXX: The yield flag seems to destroy a bunch of things when it is
4379
// set on every instruction, need investigation.
4382
#ifdef GM107_DEBUG_SCHED_DATA
4383
printSchedInfo(cycle, insn);
4391
commitInsn(insn, cycle);
4395
#ifdef GM107_DEBUG_SCHED_DATA
4396
fprintf(stderr, "last instruction is : ");
4398
fprintf(stderr, "cycle=%d\n", cycle);
4401
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4402
BasicBlock *out = BasicBlock::get(ei.getNode());
4404
if (ei.getType() != Graph::Edge::BACK) {
4405
// Only test the first instruction of the outgoing block.
4406
next = out->getEntry();
4408
bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4410
// When the outgoing BB is empty, make sure to set the number of
4411
// stall counts needed by the instruction because we don't know the
4412
// next instruction.
4413
bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4416
// Wait until all dependencies are satisfied.
4417
const int regsFree = score->getLatest();
4418
next = out->getFirst();
4419
for (int c = cycle; next && c < regsFree; next = next->next) {
4420
bbDelay = MAX2(bbDelay, calcDelay(next, c));
4421
c += getStall(next);
4426
if (bb->cfg.outgoingCount() != 1)
4428
setDelay(insn, bbDelay, next);
4429
cycle += getStall(insn);
4431
score->rebase(cycle); // common base for initializing out blocks' scores
4435
/*******************************************************************************
4437
******************************************************************************/
4440
CodeEmitterGM107::prepareEmission(Function *func)
4442
SchedDataCalculatorGM107 sched(targGM107);
4443
CodeEmitter::prepareEmission(func);
4444
sched.run(func, true, true);
4447
static inline uint32_t sizeToBundlesGM107(uint32_t size)
4449
return (size + 23) / 24;
4453
CodeEmitterGM107::prepareEmission(Program *prog)
4455
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4456
!fi.end(); fi.next()) {
4457
Function *func = reinterpret_cast<Function *>(fi.get());
4458
func->binPos = prog->binSize;
4459
prepareEmission(func);
4461
// adjust sizes & positions for schedulding info:
4462
if (prog->getTarget()->hasSWSched) {
4463
uint32_t adjPos = func->binPos;
4464
BasicBlock *bb = NULL;
4465
for (int i = 0; i < func->bbCount; ++i) {
4466
bb = func->bbArray[i];
4467
int32_t adjSize = bb->binSize;
4469
adjSize -= 32 - adjPos % 32;
4473
adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4474
bb->binPos = adjPos;
4475
bb->binSize = adjSize;
4479
func->binSize = adjPos - func->binPos;
4482
prog->binSize += func->binSize;
4486
CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4487
: CodeEmitter(target),
4489
progType(Program::TYPE_VERTEX),
4491
writeIssueDelays(target->hasSWSched),
4495
codeSize = codeSizeLimit = 0;
4500
TargetGM107::createCodeEmitterGM107(Program::Type type)
4502
CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4503
emit->setProgramType(type);
4507
} // namespace nv50_ir