2
* Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3
* Copyright 2013 Christoph Bumiller
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
24
#include "nine_shader.h"
27
#include "nine_debug.h"
28
#include "nine_state.h"
29
#include "vertexdeclaration9.h"
31
#include "util/macros.h"
32
#include "util/u_memory.h"
33
#include "util/u_inlines.h"
34
#include "pipe/p_shader_tokens.h"
35
#include "tgsi/tgsi_ureg.h"
36
#include "tgsi/tgsi_dump.h"
37
#include "nir/tgsi_to_nir.h"
39
#define DBG_CHANNEL DBG_SHADER
41
#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
44
struct shader_translator;
46
typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
48
static inline const char *d3dsio_to_string(unsigned opcode);
51
#define NINED3D_SM1_VS 0xfffe
52
#define NINED3D_SM1_PS 0xffff
54
#define NINE_MAX_COND_DEPTH 64
55
#define NINE_MAX_LOOP_DEPTH 64
57
#define NINED3DSP_END 0x0000ffff
59
#define NINED3DSPTYPE_FLOAT4 0
60
#define NINED3DSPTYPE_INT4 1
61
#define NINED3DSPTYPE_BOOL 2
63
#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
65
#define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
66
#define NINED3DSP_WRITEMASK_SHIFT 16
68
#define NINED3DSHADER_INST_PREDICATED (1 << 28)
70
#define NINED3DSHADER_REL_OP_GT 1
71
#define NINED3DSHADER_REL_OP_EQ 2
72
#define NINED3DSHADER_REL_OP_GE 3
73
#define NINED3DSHADER_REL_OP_LT 4
74
#define NINED3DSHADER_REL_OP_NE 5
75
#define NINED3DSHADER_REL_OP_LE 6
77
#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78
#define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
80
#define NINED3DSI_TEXLD_PROJECT 0x1
81
#define NINED3DSI_TEXLD_BIAS 0x2
83
#define NINED3DSP_WRITEMASK_0 0x1
84
#define NINED3DSP_WRITEMASK_1 0x2
85
#define NINED3DSP_WRITEMASK_2 0x4
86
#define NINED3DSP_WRITEMASK_3 0x8
87
#define NINED3DSP_WRITEMASK_ALL 0xf
89
#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
91
#define NINE_SWIZZLE4(x,y,z,w) \
92
TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
94
#define NINE_APPLY_SWIZZLE(src, s) \
95
ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98
#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99
#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
102
* NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103
* BIAS <= PS 1.4 (x-0.5)
104
* BIASNEG <= PS 1.4 (-(x-0.5))
105
* SIGN <= PS 1.4 (2(x-0.5))
106
* SIGNNEG <= PS 1.4 (-2(x-0.5))
107
* COMP <= PS 1.4 (1-x)
109
* X2NEG = PS 1.4 (-2x)
110
* DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111
* DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112
* ABS >= SM 3.0 (abs(x))
113
* ABSNEG >= SM 3.0 (-abs(x))
114
* NOT >= SM 2.0 pedication only
116
#define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
117
#define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
118
#define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
119
#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120
#define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
121
#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122
#define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
123
#define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
124
#define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
125
#define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
126
#define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
127
#define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
128
#define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
129
#define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
131
static const char *sm1_mod_str[] =
133
[NINED3DSPSM_NONE] = "",
134
[NINED3DSPSM_NEG] = "-",
135
[NINED3DSPSM_BIAS] = "bias",
136
[NINED3DSPSM_BIASNEG] = "biasneg",
137
[NINED3DSPSM_SIGN] = "sign",
138
[NINED3DSPSM_SIGNNEG] = "signneg",
139
[NINED3DSPSM_COMP] = "comp",
140
[NINED3DSPSM_X2] = "x2",
141
[NINED3DSPSM_X2NEG] = "x2neg",
142
[NINED3DSPSM_DZ] = "dz",
143
[NINED3DSPSM_DW] = "dw",
144
[NINED3DSPSM_ABS] = "abs",
145
[NINED3DSPSM_ABSNEG] = "-abs",
146
[NINED3DSPSM_NOT] = "not"
150
sm1_dump_writemask(BYTE mask)
152
if (mask & 1) DUMP("x"); else DUMP("_");
153
if (mask & 2) DUMP("y"); else DUMP("_");
154
if (mask & 4) DUMP("z"); else DUMP("_");
155
if (mask & 8) DUMP("w"); else DUMP("_");
159
sm1_dump_swizzle(BYTE s)
161
char c[4] = { 'x', 'y', 'z', 'w' };
163
c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
166
static const char sm1_file_char[] =
169
[D3DSPR_INPUT] = 'v',
170
[D3DSPR_CONST] = 'c',
172
[D3DSPR_RASTOUT] = 'R',
173
[D3DSPR_ATTROUT] = 'D',
174
[D3DSPR_OUTPUT] = 'o',
175
[D3DSPR_CONSTINT] = 'I',
176
[D3DSPR_COLOROUT] = 'C',
177
[D3DSPR_DEPTHOUT] = 'D',
178
[D3DSPR_SAMPLER] = 's',
179
[D3DSPR_CONST2] = 'c',
180
[D3DSPR_CONST3] = 'c',
181
[D3DSPR_CONST4] = 'c',
182
[D3DSPR_CONSTBOOL] = 'B',
184
[D3DSPR_TEMPFLOAT16] = 'h',
185
[D3DSPR_MISCTYPE] = 'M',
186
[D3DSPR_LABEL] = 'X',
187
[D3DSPR_PREDICATE] = 'p'
191
sm1_dump_reg(BYTE file, INT index)
197
case D3DSPR_COLOROUT:
200
case D3DSPR_DEPTHOUT:
204
DUMP("oRast%i", index);
206
case D3DSPR_CONSTINT:
207
DUMP("iconst[%i]", index);
209
case D3DSPR_CONSTBOOL:
210
DUMP("bconst[%i]", index);
213
DUMP("%c%i", sm1_file_char[file], index);
221
struct sm1_src_param *rel;
234
sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
239
struct sm1_src_param *rel;
243
int8_t shift; /* sint4 */
248
assert_replicate_swizzle(const struct ureg_src *reg)
250
assert(reg->SwizzleY == reg->SwizzleX &&
251
reg->SwizzleZ == reg->SwizzleX &&
252
reg->SwizzleW == reg->SwizzleX);
256
sm1_dump_immediate(const struct sm1_src_param *param)
258
switch (param->type) {
259
case NINED3DSPTYPE_FLOAT4:
260
DUMP("{ %f %f %f %f }",
261
param->imm.f[0], param->imm.f[1],
262
param->imm.f[2], param->imm.f[3]);
264
case NINED3DSPTYPE_INT4:
265
DUMP("{ %i %i %i %i }",
266
param->imm.i[0], param->imm.i[1],
267
param->imm.i[2], param->imm.i[3]);
269
case NINED3DSPTYPE_BOOL:
270
DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
279
sm1_dump_src_param(const struct sm1_src_param *param)
281
if (param->file == NINED3DSPR_IMMEDIATE) {
282
assert(!param->mod &&
284
param->swizzle == NINED3DSP_NOSWIZZLE);
285
sm1_dump_immediate(param);
290
DUMP("%s(", sm1_mod_str[param->mod]);
292
DUMP("%c[", sm1_file_char[param->file]);
293
sm1_dump_src_param(param->rel);
294
DUMP("+%i]", param->idx);
296
sm1_dump_reg(param->file, param->idx);
300
if (param->swizzle != NINED3DSP_NOSWIZZLE) {
302
sm1_dump_swizzle(param->swizzle);
307
sm1_dump_dst_param(const struct sm1_dst_param *param)
309
if (param->mod & NINED3DSPDM_SATURATE)
311
if (param->mod & NINED3DSPDM_PARTIALP)
313
if (param->mod & NINED3DSPDM_CENTROID)
315
if (param->shift < 0)
316
DUMP("/%u ", 1 << -param->shift);
317
if (param->shift > 0)
318
DUMP("*%u ", 1 << param->shift);
321
DUMP("%c[", sm1_file_char[param->file]);
322
sm1_dump_src_param(param->rel);
323
DUMP("+%i]", param->idx);
325
sm1_dump_reg(param->file, param->idx);
327
if (param->mask != NINED3DSP_WRITEMASK_ALL) {
329
sm1_dump_writemask(param->mask);
335
struct sm1_dst_param reg;
343
/* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344
* should be ignored completely */
346
unsigned opcode; /* TGSI_OPCODE_x */
348
/* versions are still set even handler is set */
352
} vert_version, frag_version;
354
/* number of regs parsed outside of special handler */
358
/* some instructions don't map perfectly, so use a special handler */
359
translate_instruction_func handler;
362
struct sm1_instruction
364
D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370
struct sm1_src_param src[4];
371
struct sm1_src_param src_rel[4];
372
struct sm1_src_param pred;
373
struct sm1_src_param dst_rel[1];
374
struct sm1_dst_param dst[1];
376
const struct sm1_op_info *info;
380
sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
384
/* no info stored for these: */
385
if (insn->opcode == D3DSIO_DCL)
387
for (i = 0; i < indent; ++i)
390
if (insn->predicated) {
392
sm1_dump_src_param(&insn->pred);
395
DUMP("%s", d3dsio_to_string(insn->opcode));
397
switch (insn->opcode) {
399
DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
402
DUMP("_%x", insn->flags);
410
for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
411
sm1_dump_dst_param(&insn->dst[i]);
415
for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
416
sm1_dump_src_param(&insn->src[i]);
419
if (insn->opcode == D3DSIO_DEF ||
420
insn->opcode == D3DSIO_DEFI ||
421
insn->opcode == D3DSIO_DEFB)
422
sm1_dump_immediate(&insn->src[0]);
427
struct sm1_local_const
431
float f[4]; /* for indirect addressing of float constants */
434
struct shader_translator
436
const DWORD *byte_code;
438
const DWORD *parse_next;
440
struct ureg_program *ureg;
447
unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
448
unsigned num_constf_allowed;
449
unsigned num_consti_allowed;
450
unsigned num_constb_allowed;
452
boolean native_integers;
453
boolean inline_subroutines;
454
boolean want_texcoord;
456
boolean wpos_is_sysval;
457
boolean face_is_sysval_integer;
458
boolean mul_zero_wins;
459
unsigned texcoord_sn;
461
struct sm1_instruction insn; /* current instruction */
465
struct ureg_dst oPos;
466
struct ureg_dst oPos_out; /* the real output when doing streamout */
467
struct ureg_dst oFog;
468
struct ureg_dst oPts;
469
struct ureg_dst oCol[4];
470
struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
471
struct ureg_dst oDepth;
472
struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
473
struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
474
struct ureg_src vPos;
475
struct ureg_src vFace;
478
struct ureg_dst address;
480
struct ureg_dst predicate;
481
struct ureg_dst predicate_tmp;
482
struct ureg_dst predicate_dst;
483
struct ureg_dst tS[8]; /* texture stage registers */
484
struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
485
struct ureg_dst t[8]; /* scratch TEMPs */
486
struct ureg_src vC[2]; /* PS color in */
487
struct ureg_src vT[8]; /* PS texcoord in */
488
struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
490
unsigned num_temp; /* ARRAY_SIZE(regs.r) */
491
unsigned num_scratch;
493
unsigned loop_depth_max;
495
unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
496
unsigned cond_labels[NINE_MAX_COND_DEPTH];
497
boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
498
boolean predicated_activated;
500
unsigned *inst_labels; /* LABEL op */
501
unsigned num_inst_labels;
503
unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
505
struct sm1_local_const *lconstf;
506
unsigned num_lconstf;
507
struct sm1_local_const *lconsti;
508
unsigned num_lconsti;
509
struct sm1_local_const *lconstb;
510
unsigned num_lconstb;
512
boolean slots_used[NINE_MAX_CONST_ALL];
516
boolean indirect_const_access;
519
struct nine_vs_output_info output_info[16];
522
struct nine_shader_info *info;
524
int16_t op_info_map[D3DSIO_BREAKP + 1];
527
#define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528
#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
530
#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
533
sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
536
sm1_instruction_check(const struct sm1_instruction *insn)
538
if (insn->opcode == D3DSIO_CRS)
540
if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
548
nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
549
int mask, int output_index)
551
tx->output_info[tx->num_outputs].output_semantic = Usage;
552
tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
553
tx->output_info[tx->num_outputs].mask = mask;
554
tx->output_info[tx->num_outputs].output_index = output_index;
558
static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
563
idx = tx->slot_map[idx];
564
/* vswp constant handling: we use two buffers
565
* to fit all the float constants. The special handling
566
* doesn't need to be elsewhere, because all the instructions
567
* accessing the constants directly are VS1, and swvp
569
if (tx->info->swvp_on && idx >= 4096) {
570
/* TODO: swvp rel is broken if many constants are used */
571
src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
572
src = ureg_src_dimension(src, 1);
574
src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
575
src = ureg_src_dimension(src, 0);
578
if (!tx->info->swvp_on)
579
tx->slots_used[idx] = TRUE;
580
if (tx->info->const_float_slots < (idx + 1))
581
tx->info->const_float_slots = idx + 1;
582
if (tx->num_slots < (idx + 1))
583
tx->num_slots = idx + 1;
588
static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
592
if (tx->info->swvp_on) {
593
src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
594
src = ureg_src_dimension(src, 2);
596
unsigned slot_idx = tx->info->const_i_base + idx;
598
slot_idx = tx->slot_map[slot_idx];
599
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
600
src = ureg_src_dimension(src, 0);
601
tx->slots_used[slot_idx] = TRUE;
602
tx->info->int_slots_used[idx] = TRUE;
603
if (tx->num_slots < (slot_idx + 1))
604
tx->num_slots = slot_idx + 1;
607
if (tx->info->const_int_slots < (idx + 1))
608
tx->info->const_int_slots = idx + 1;
613
static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
620
if (tx->info->swvp_on) {
621
src = ureg_src_register(TGSI_FILE_CONSTANT, r);
622
src = ureg_src_dimension(src, 3);
624
unsigned slot_idx = tx->info->const_b_base + r;
626
slot_idx = tx->slot_map[slot_idx];
627
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
628
src = ureg_src_dimension(src, 0);
629
tx->slots_used[slot_idx] = TRUE;
630
tx->info->bool_slots_used[idx] = TRUE;
631
if (tx->num_slots < (slot_idx + 1))
632
tx->num_slots = slot_idx + 1;
634
src = ureg_swizzle(src, s, s, s, s);
636
if (tx->info->const_bool_slots < (idx + 1))
637
tx->info->const_bool_slots = idx + 1;
643
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
647
if (index < 0 || index >= tx->num_constf_allowed) {
651
for (i = 0; i < tx->num_lconstf; ++i) {
652
if (tx->lconstf[i].idx == index) {
653
*src = tx->lconstf[i].reg;
660
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
664
if (index < 0 || index >= tx->num_consti_allowed) {
668
for (i = 0; i < tx->num_lconsti; ++i) {
669
if (tx->lconsti[i].idx == index) {
670
*src = tx->lconsti[i].reg;
677
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
681
if (index < 0 || index >= tx->num_constb_allowed) {
685
for (i = 0; i < tx->num_lconstb; ++i) {
686
if (tx->lconstb[i].idx == index) {
687
*src = tx->lconstb[i].reg;
695
tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
699
FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
701
for (n = 0; n < tx->num_lconstf; ++n)
702
if (tx->lconstf[n].idx == index)
704
if (n == tx->num_lconstf) {
706
tx->lconstf = REALLOC(tx->lconstf,
707
(n + 0) * sizeof(tx->lconstf[0]),
708
(n + 8) * sizeof(tx->lconstf[0]));
713
tx->lconstf[n].idx = index;
714
tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
716
memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
719
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
723
FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
725
for (n = 0; n < tx->num_lconsti; ++n)
726
if (tx->lconsti[n].idx == index)
728
if (n == tx->num_lconsti) {
730
tx->lconsti = REALLOC(tx->lconsti,
731
(n + 0) * sizeof(tx->lconsti[0]),
732
(n + 8) * sizeof(tx->lconsti[0]));
738
tx->lconsti[n].idx = index;
739
tx->lconsti[n].reg = tx->native_integers ?
740
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
741
ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
744
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
748
FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
750
for (n = 0; n < tx->num_lconstb; ++n)
751
if (tx->lconstb[n].idx == index)
753
if (n == tx->num_lconstb) {
755
tx->lconstb = REALLOC(tx->lconstb,
756
(n + 0) * sizeof(tx->lconstb[0]),
757
(n + 8) * sizeof(tx->lconstb[0]));
763
tx->lconstb[n].idx = index;
764
tx->lconstb[n].reg = tx->native_integers ?
765
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
766
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
769
static inline struct ureg_dst
770
tx_scratch(struct shader_translator *tx)
772
if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
774
return tx->regs.t[0];
776
if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
777
tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
778
return tx->regs.t[tx->num_scratch++];
781
static inline struct ureg_dst
782
tx_scratch_scalar(struct shader_translator *tx)
784
return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
787
static inline struct ureg_src
788
tx_src_scalar(struct ureg_dst dst)
790
struct ureg_src src = ureg_src(dst);
791
int c = ffs(dst.WriteMask) - 1;
792
if (dst.WriteMask == (1 << c))
793
src = ureg_scalar(src, c);
798
tx_temp_alloc(struct shader_translator *tx, INT idx)
801
if (idx >= tx->num_temp) {
802
unsigned k = tx->num_temp;
803
unsigned n = idx + 1;
804
tx->regs.r = REALLOC(tx->regs.r,
805
k * sizeof(tx->regs.r[0]),
806
n * sizeof(tx->regs.r[0]));
808
tx->regs.r[k] = ureg_dst_undef();
811
if (ureg_dst_is_undef(tx->regs.r[idx]))
812
tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
816
tx_addr_alloc(struct shader_translator *tx, INT idx)
819
if (ureg_dst_is_undef(tx->regs.address))
820
tx->regs.address = ureg_DECL_address(tx->ureg);
821
if (ureg_dst_is_undef(tx->regs.a0))
822
tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
826
TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
827
unsigned target, struct ureg_src src0,
828
struct ureg_src src1, INT idx)
831
struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
833
if (!(tx->info->fetch4 & (1 << idx)))
836
/* TODO: needs more tests, but this feature is not much used at all */
838
tmp = tx_scratch(tx);
839
ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
840
NULL, 0, src_tg4, 3);
841
ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
845
/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
846
* the projection should be applied on the texture. It doesn't
848
* The doc is very imprecise here (it says the projection is done
849
* before rasterization, thus in vs, which seems wrong since ps instructions
850
* are affected differently)
851
* For now we only apply to the ps TEX instruction and TEXBEM.
852
* Perhaps some other instructions would need it */
854
apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
855
struct ureg_src src, INT idx)
858
unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
862
ureg_MOV(tx->ureg, dst, src);
864
tmp = tx_scratch_scalar(tx);
865
ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
866
ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
871
TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
872
unsigned target, struct ureg_src src0,
873
struct ureg_src src1, INT idx)
875
unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
877
boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
879
/* dim == 1: no projection
880
* Looks like must be disabled when it makes no
881
* sense according the texture dimensions
883
if (dim == 1 || (dim <= target && !shadow)) {
884
ureg_TEX(tx->ureg, dst, target, src0, src1);
885
} else if (dim == 4) {
886
ureg_TXP(tx->ureg, dst, target, src0, src1);
888
tmp = tx_scratch(tx);
889
apply_ps1x_projection(tx, tmp, src0, idx);
890
ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
895
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
898
assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
899
if (ureg_src_is_undef(tx->regs.vT[idx]))
900
tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
901
TGSI_INTERPOLATE_PERSPECTIVE);
904
static inline unsigned *
905
tx_bgnloop(struct shader_translator *tx)
908
if (tx->loop_depth_max < tx->loop_depth)
909
tx->loop_depth_max = tx->loop_depth;
910
assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
911
return &tx->loop_labels[tx->loop_depth - 1];
914
static inline unsigned *
915
tx_endloop(struct shader_translator *tx)
917
assert(tx->loop_depth);
919
ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
920
ureg_get_instruction_number(tx->ureg));
921
return &tx->loop_labels[tx->loop_depth];
924
static struct ureg_dst
925
tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
927
const unsigned l = tx->loop_depth - 1;
931
DBG("loop counter requested outside of loop\n");
932
return ureg_dst_undef();
935
if (ureg_dst_is_undef(tx->regs.rL[l])) {
936
/* loop or rep ctr creation */
937
tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
938
tx->loop_or_rep[l] = loop_or_rep;
940
/* loop - rep - endloop - endrep not allowed */
941
assert(tx->loop_or_rep[l] == loop_or_rep);
943
return tx->regs.rL[l];
946
static struct ureg_src
947
tx_get_loopal(struct shader_translator *tx)
949
int loop_level = tx->loop_depth - 1;
951
while (loop_level >= 0) {
952
/* handle loop - rep - endrep - endloop case */
953
if (tx->loop_or_rep[loop_level])
954
/* the value is in the loop counter y component (nine implementation) */
955
return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
959
DBG("aL counter requested outside of loop\n");
960
return ureg_src_undef();
963
static inline unsigned *
964
tx_cond(struct shader_translator *tx)
966
assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
968
return &tx->cond_labels[tx->cond_depth - 1];
971
static inline unsigned *
972
tx_elsecond(struct shader_translator *tx)
974
assert(tx->cond_depth);
975
return &tx->cond_labels[tx->cond_depth - 1];
979
tx_endcond(struct shader_translator *tx)
981
assert(tx->cond_depth);
983
ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
984
ureg_get_instruction_number(tx->ureg));
987
static inline struct ureg_dst
988
nine_ureg_dst_register(unsigned file, int index)
990
return ureg_dst(ureg_src_register(file, index));
993
static inline struct ureg_src
994
nine_get_position_input(struct shader_translator *tx)
996
struct ureg_program *ureg = tx->ureg;
998
if (tx->wpos_is_sysval)
999
return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1001
return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1002
0, TGSI_INTERPOLATE_LINEAR);
1005
static struct ureg_src
1006
tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1008
struct ureg_program *ureg = tx->ureg;
1009
struct ureg_src src;
1010
struct ureg_dst tmp;
1012
assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1013
(param->file == D3DSPR_INPUT && tx->version.major == 3));
1015
switch (param->file)
1018
tx_temp_alloc(tx, param->idx);
1019
src = ureg_src(tx->regs.r[param->idx]);
1021
/* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1024
assert(param->idx == 0);
1025
/* the address register (vs only) must be
1026
* assigned before use */
1027
assert(!ureg_dst_is_undef(tx->regs.a0));
1028
/* Round to lowest for vs1.1 (contrary to the doc), else
1029
* round to nearest */
1030
if (tx->version.major < 2 && tx->version.minor < 2)
1031
ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1033
ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1034
src = ureg_src(tx->regs.address);
1036
if (tx->version.major < 2 && tx->version.minor < 4) {
1037
/* no subroutines, so should be defined */
1038
src = ureg_src(tx->regs.tS[param->idx]);
1040
tx_texcoord_alloc(tx, param->idx);
1041
src = tx->regs.vT[param->idx];
1047
src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1049
if (tx->version.major < 3) {
1050
src = ureg_DECL_fs_input_centroid(
1051
ureg, TGSI_SEMANTIC_COLOR, param->idx,
1052
TGSI_INTERPOLATE_COLOR,
1053
tx->info->force_color_in_centroid ?
1054
TGSI_INTERPOLATE_LOC_CENTROID : 0,
1058
/* Copy all inputs (non consecutive)
1059
* to temp array (consecutive).
1060
* This is not good for performance.
1061
* A better way would be to have inputs
1062
* consecutive (would need implement alternative
1063
* way to match vs outputs and ps inputs).
1064
* However even with the better way, the temp array
1065
* copy would need to be used if some inputs
1066
* are not GENERIC or if they have different
1067
* interpolation flag. */
1068
if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1070
tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1071
for (i = 0; i < 10; i++) {
1072
if (!ureg_src_is_undef(tx->regs.v[i]))
1073
ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1075
ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1078
src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1080
assert(param->idx < ARRAY_SIZE(tx->regs.v));
1081
src = tx->regs.v[param->idx];
1086
src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1088
case D3DSPR_PREDICATE:
1089
if (ureg_dst_is_undef(tx->regs.predicate)) {
1090
/* Forbidden to use the predicate register before being set */
1092
tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1094
src = ureg_src(tx->regs.predicate);
1096
case D3DSPR_SAMPLER:
1097
assert(param->mod == NINED3DSPSM_NONE);
1098
/* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1099
src = ureg_DECL_sampler(ureg, param->idx);
1102
if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1103
src = nine_float_constant_src(tx, param->idx);
1105
tx->indirect_const_access = TRUE;
1106
src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1109
if (!IS_VS && tx->version.major < 2) {
1110
/* ps 1.X clamps constants */
1111
tmp = tx_scratch(tx);
1112
ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1113
ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1114
src = ureg_src(tmp);
1120
DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1121
assert(!"CONST2/3/4");
1122
src = ureg_imm1f(ureg, 0.0f);
1124
case D3DSPR_CONSTINT:
1125
/* relative adressing only possible for float constants in vs */
1126
if (!tx_lconsti(tx, &src, param->idx))
1127
src = nine_integer_constant_src(tx, param->idx);
1129
case D3DSPR_CONSTBOOL:
1130
if (!tx_lconstb(tx, &src, param->idx))
1131
src = nine_boolean_constant_src(tx, param->idx);
1134
if (ureg_dst_is_undef(tx->regs.address))
1135
tx->regs.address = ureg_DECL_address(ureg);
1136
if (!tx->native_integers)
1137
ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1139
ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1140
src = ureg_src(tx->regs.address);
1142
case D3DSPR_MISCTYPE:
1143
switch (param->idx) {
1144
case D3DSMO_POSITION:
1145
if (ureg_src_is_undef(tx->regs.vPos))
1146
tx->regs.vPos = nine_get_position_input(tx);
1147
if (tx->shift_wpos) {
1148
/* TODO: do this only once */
1149
struct ureg_dst wpos = tx_scratch(tx);
1150
ureg_ADD(ureg, wpos, tx->regs.vPos,
1151
ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1152
src = ureg_src(wpos);
1154
src = tx->regs.vPos;
1158
if (ureg_src_is_undef(tx->regs.vFace)) {
1159
if (tx->face_is_sysval_integer) {
1160
tmp = ureg_DECL_temporary(ureg);
1162
ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1164
/* convert bool to float */
1165
ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1166
ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1167
tx->regs.vFace = ureg_src(tmp);
1169
tx->regs.vFace = ureg_DECL_fs_input(ureg,
1170
TGSI_SEMANTIC_FACE, 0,
1171
TGSI_INTERPOLATE_CONSTANT);
1173
tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1175
src = tx->regs.vFace;
1178
assert(!"invalid src D3DSMO");
1182
case D3DSPR_TEMPFLOAT16:
1185
assert(!"invalid src D3DSPR");
1188
switch (param->mod) {
1189
case NINED3DSPSM_DW:
1190
tmp = tx_scratch(tx);
1191
/* NOTE: app is not allowed to read w with this modifier */
1192
ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1193
ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1194
src = ureg_src(tmp);
1196
case NINED3DSPSM_DZ:
1197
tmp = tx_scratch(tx);
1198
/* NOTE: app is not allowed to read z with this modifier */
1199
ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1200
ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1201
src = ureg_src(tmp);
1207
if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1208
src = ureg_swizzle(src,
1209
(param->swizzle >> 0) & 0x3,
1210
(param->swizzle >> 2) & 0x3,
1211
(param->swizzle >> 4) & 0x3,
1212
(param->swizzle >> 6) & 0x3);
1214
switch (param->mod) {
1215
case NINED3DSPSM_ABS:
1216
src = ureg_abs(src);
1218
case NINED3DSPSM_ABSNEG:
1219
src = ureg_negate(ureg_abs(src));
1221
case NINED3DSPSM_NEG:
1222
src = ureg_negate(src);
1224
case NINED3DSPSM_BIAS:
1225
tmp = tx_scratch(tx);
1226
ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1227
src = ureg_src(tmp);
1229
case NINED3DSPSM_BIASNEG:
1230
tmp = tx_scratch(tx);
1231
ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1232
src = ureg_src(tmp);
1234
case NINED3DSPSM_NOT:
1235
if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1236
tmp = tx_scratch(tx);
1237
ureg_NOT(ureg, tmp, src);
1238
src = ureg_src(tmp);
1240
} else { /* predicate */
1241
tmp = tx_scratch(tx);
1242
ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1243
src = ureg_src(tmp);
1246
case NINED3DSPSM_COMP:
1247
tmp = tx_scratch(tx);
1248
ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1249
src = ureg_src(tmp);
1251
case NINED3DSPSM_DZ:
1252
case NINED3DSPSM_DW:
1253
/* Already handled*/
1255
case NINED3DSPSM_SIGN:
1256
tmp = tx_scratch(tx);
1257
ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1258
src = ureg_src(tmp);
1260
case NINED3DSPSM_SIGNNEG:
1261
tmp = tx_scratch(tx);
1262
ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1263
src = ureg_src(tmp);
1265
case NINED3DSPSM_X2:
1266
tmp = tx_scratch(tx);
1267
ureg_ADD(ureg, tmp, src, src);
1268
src = ureg_src(tmp);
1270
case NINED3DSPSM_X2NEG:
1271
tmp = tx_scratch(tx);
1272
ureg_ADD(ureg, tmp, src, src);
1273
src = ureg_negate(ureg_src(tmp));
1276
assert(param->mod == NINED3DSPSM_NONE);
1283
static struct ureg_dst
1284
_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1286
struct ureg_dst dst;
1288
switch (param->file)
1291
assert(!param->rel);
1292
tx_temp_alloc(tx, param->idx);
1293
dst = tx->regs.r[param->idx];
1295
/* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1297
assert(!param->rel);
1298
if (tx->version.major < 2 && !IS_VS) {
1299
if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1300
tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1301
dst = tx->regs.tS[param->idx];
1303
if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1304
tx_texcoord_alloc(tx, param->idx);
1305
dst = ureg_dst(tx->regs.vT[param->idx]);
1307
tx_addr_alloc(tx, param->idx);
1311
case D3DSPR_RASTOUT:
1312
assert(!param->rel);
1313
switch (param->idx) {
1315
if (ureg_dst_is_undef(tx->regs.oPos))
1317
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1318
dst = tx->regs.oPos;
1321
if (ureg_dst_is_undef(tx->regs.oFog))
1323
ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1324
dst = tx->regs.oFog;
1327
if (ureg_dst_is_undef(tx->regs.oPts))
1328
tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1329
dst = tx->regs.oPts;
1336
/* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1338
if (tx->version.major < 3) {
1339
assert(!param->rel);
1340
dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1342
assert(!param->rel); /* TODO */
1343
assert(param->idx < ARRAY_SIZE(tx->regs.o));
1344
dst = tx->regs.o[param->idx];
1347
case D3DSPR_ATTROUT: /* VS */
1348
case D3DSPR_COLOROUT: /* PS */
1349
assert(param->idx >= 0 && param->idx < 4);
1350
assert(!param->rel);
1351
tx->info->rt_mask |= 1 << param->idx;
1352
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1353
/* ps < 3: oCol[0] will have fog blending afterward */
1354
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1355
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1357
tx->regs.oCol[param->idx] =
1358
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1361
dst = tx->regs.oCol[param->idx];
1362
if (IS_VS && tx->version.major < 3)
1363
dst = ureg_saturate(dst);
1365
case D3DSPR_DEPTHOUT:
1366
assert(!param->rel);
1367
if (ureg_dst_is_undef(tx->regs.oDepth))
1369
ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1370
TGSI_WRITEMASK_Z, 0, 1);
1371
dst = tx->regs.oDepth; /* XXX: must write .z component */
1373
case D3DSPR_PREDICATE:
1374
if (ureg_dst_is_undef(tx->regs.predicate))
1375
tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1376
dst = tx->regs.predicate;
1378
case D3DSPR_TEMPFLOAT16:
1379
DBG("unhandled D3DSPR: %u\n", param->file);
1382
assert(!"invalid dst D3DSPR");
1386
dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1388
if (param->mask != NINED3DSP_WRITEMASK_ALL)
1389
dst = ureg_writemask(dst, param->mask);
1390
if (param->mod & NINED3DSPDM_SATURATE)
1391
dst = ureg_saturate(dst);
1393
if (tx->predicated_activated) {
1394
tx->regs.predicate_dst = dst;
1395
dst = tx->regs.predicate_tmp;
1401
static struct ureg_dst
1402
tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1405
tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1406
return tx->regs.tdst;
1408
return _tx_dst_param(tx, param);
1412
tx_apply_dst0_modifiers(struct shader_translator *tx)
1414
struct ureg_dst rdst;
1417
if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1419
rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1421
assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1423
if (tx->insn.dst[0].shift < 0)
1424
f = 1.0f / (1 << -tx->insn.dst[0].shift);
1426
f = 1 << tx->insn.dst[0].shift;
1428
ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1431
static struct ureg_src
1432
tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1434
struct ureg_src src;
1436
assert(!param->shift);
1437
assert(!(param->mod & NINED3DSPDM_SATURATE));
1439
switch (param->file) {
1442
src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1444
assert(!param->rel);
1445
assert(param->idx < ARRAY_SIZE(tx->regs.v));
1446
src = tx->regs.v[param->idx];
1450
src = ureg_src(tx_dst_param(tx, param));
1454
src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1457
WARN("mask is 0, using identity swizzle\n");
1459
if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1463
for (n = 0, c = 0; c < 4; ++c)
1464
if (param->mask & (1 << c))
1467
for (c = n; c < 4; ++c)
1469
src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1475
NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1477
struct ureg_program *ureg = tx->ureg;
1478
struct ureg_dst dst;
1479
struct ureg_src src[2];
1480
struct sm1_src_param *src_mat = &tx->insn.src[1];
1483
dst = tx_dst_param(tx, &tx->insn.dst[0]);
1484
src[0] = tx_src_param(tx, &tx->insn.src[0]);
1486
for (i = 0; i < n; i++)
1488
const unsigned m = (1 << i);
1490
src[1] = tx_src_param(tx, src_mat);
1493
if (!(dst.WriteMask & m))
1496
/* XXX: src == dst case ? */
1500
ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1503
ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1506
DBG("invalid operation: M%ux%u\n", m, n);
1514
#define VNOTSUPPORTED 0, 0
1515
#define V(maj, min) (((maj) << 8) | (min))
1517
static inline const char *
1518
d3dsio_to_string( unsigned opcode )
1520
static const char *names[] = {
1620
if (opcode < ARRAY_SIZE(names)) return names[opcode];
1623
case D3DSIO_PHASE: return "PHASE";
1624
case D3DSIO_COMMENT: return "COMMENT";
1625
case D3DSIO_END: return "END";
1631
#define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1632
#define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1633
(inst).vert_version.max | \
1634
(inst).frag_version.min | \
1635
(inst).frag_version.max)
1637
#define SPECIAL(name) \
1638
NineTranslateInstruction_##name
1640
#define DECL_SPECIAL(name) \
1642
NineTranslateInstruction_##name( struct shader_translator *tx )
1645
NineTranslateInstruction_Generic(struct shader_translator *);
1649
/* Nothing to do. NOP was used to avoid hangs
1650
* with very old d3d drivers. */
1656
struct ureg_program *ureg = tx->ureg;
1657
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1658
struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1659
struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1661
ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1667
struct ureg_program *ureg = tx->ureg;
1668
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1669
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1671
ureg_MOV(ureg, dst, ureg_abs(src));
1677
struct ureg_program *ureg = tx->ureg;
1678
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1679
struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1680
struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1682
ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1683
ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1685
ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1686
TGSI_SWIZZLE_Y, 0));
1687
ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1688
ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1690
ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1691
TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1693
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1694
ureg_imm1f(ureg, 1));
1700
return NineTranslateInstruction_Mkxn(tx, 4, 4);
1705
return NineTranslateInstruction_Mkxn(tx, 4, 3);
1710
return NineTranslateInstruction_Mkxn(tx, 3, 4);
1715
return NineTranslateInstruction_Mkxn(tx, 3, 3);
1720
return NineTranslateInstruction_Mkxn(tx, 3, 2);
1725
ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1726
tx_src_param(tx, &tx->insn.src[0]),
1727
tx_src_param(tx, &tx->insn.src[2]),
1728
tx_src_param(tx, &tx->insn.src[1]));
1734
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1735
struct ureg_dst cgt;
1736
struct ureg_src cnd;
1738
/* the coissue flag was a tip for compilers to advise to
1739
* execute two operations at the same time, in cases
1740
* the two executions had same dst with different channels.
1741
* It has no effect on current hw. However it seems CND
1742
* is affected. The handling of this very specific case
1743
* handled below mimick wine behaviour */
1744
if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1746
dst, tx_src_param(tx, &tx->insn.src[1]));
1750
cnd = tx_src_param(tx, &tx->insn.src[0]);
1751
cgt = tx_scratch(tx);
1753
if (tx->version.major == 1 && tx->version.minor < 4)
1754
cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1756
ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1758
ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1759
tx_src_param(tx, &tx->insn.src[1]),
1760
tx_src_param(tx, &tx->insn.src[2]));
1766
assert(tx->insn.src[0].idx < tx->num_inst_labels);
1767
ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1771
DECL_SPECIAL(CALLNZ)
1773
struct ureg_program *ureg = tx->ureg;
1774
struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1776
if (!tx->native_integers)
1777
ureg_IF(ureg, src, tx_cond(tx));
1779
ureg_UIF(ureg, src, tx_cond(tx));
1780
ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1788
struct ureg_program *ureg = tx->ureg;
1790
struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1791
struct ureg_dst ctr;
1792
struct ureg_dst tmp;
1793
struct ureg_src ctrx;
1795
label = tx_bgnloop(tx);
1796
ctr = tx_get_loopctr(tx, TRUE);
1797
ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1799
/* src: num_iterations - start_value of al - step for al - 0 */
1800
ureg_MOV(ureg, ctr, src);
1801
ureg_BGNLOOP(tx->ureg, label);
1802
tmp = tx_scratch_scalar(tx);
1803
/* Initially ctr.x contains the number of iterations.
1804
* ctr.y will contain the updated value of al.
1805
* We decrease ctr.x at the end of every iteration,
1806
* and stop when it reaches 0. */
1808
if (!tx->native_integers) {
1809
/* case src and ctr contain floats */
1810
/* to avoid precision issue, we stop when ctr <= 0.5 */
1811
ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1812
ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1814
/* case src and ctr contain integers */
1815
ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1816
ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1826
/* RET as a last instruction could be safely ignored.
1827
* Remove it to prevent crashes/warnings in case underlying
1828
* driver doesn't implement arbitrary returns.
1830
if (*(tx->parse_next) != NINED3DSP_END) {
1836
DECL_SPECIAL(ENDLOOP)
1838
struct ureg_program *ureg = tx->ureg;
1839
struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1840
struct ureg_dst dst_ctrx, dst_al;
1841
struct ureg_src src_ctr, al_counter;
1843
dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1844
dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1845
src_ctr = ureg_src(ctr);
1846
al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1849
* ctr.y (aL) += step */
1850
if (!tx->native_integers) {
1851
ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1852
ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1854
ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1855
ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1857
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1863
unsigned k = tx->num_inst_labels;
1864
unsigned n = tx->insn.src[0].idx;
1867
tx->inst_labels = REALLOC(tx->inst_labels,
1868
k * sizeof(tx->inst_labels[0]),
1869
n * sizeof(tx->inst_labels[0]));
1871
tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1875
DECL_SPECIAL(SINCOS)
1877
struct ureg_program *ureg = tx->ureg;
1878
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1879
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1880
struct ureg_dst tmp = tx_scratch_scalar(tx);
1882
assert(!(dst.WriteMask & 0xc));
1884
/* Copying to a temporary register avoids src/dst aliasing.
1885
* src is supposed to have replicated swizzle. */
1886
ureg_MOV(ureg, tmp, src);
1888
/* z undefined, w untouched */
1889
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1890
tx_src_scalar(tmp));
1891
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1892
tx_src_scalar(tmp));
1899
tx_dst_param(tx, &tx->insn.dst[0]),
1900
tx_src_param(tx, &tx->insn.src[0]));
1906
struct ureg_program *ureg = tx->ureg;
1908
struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1909
struct ureg_dst ctr;
1910
struct ureg_dst tmp;
1911
struct ureg_src ctrx;
1913
label = tx_bgnloop(tx);
1914
ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1915
ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1917
/* NOTE: rep must be constant, so we don't have to save the count */
1918
assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1920
/* rep: num_iterations - 0 - 0 - 0 */
1921
ureg_MOV(ureg, ctr, rep);
1922
ureg_BGNLOOP(ureg, label);
1923
tmp = tx_scratch_scalar(tx);
1924
/* Initially ctr.x contains the number of iterations.
1925
* We decrease ctr.x at the end of every iteration,
1926
* and stop when it reaches 0. */
1928
if (!tx->native_integers) {
1929
/* case src and ctr contain floats */
1930
/* to avoid precision issue, we stop when ctr <= 0.5 */
1931
ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1932
ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1934
/* case src and ctr contain integers */
1935
ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1936
ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1945
DECL_SPECIAL(ENDREP)
1947
struct ureg_program *ureg = tx->ureg;
1948
struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1949
struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1950
struct ureg_src src_ctr = ureg_src(ctr);
1953
if (!tx->native_integers)
1954
ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1956
ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1958
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1965
ureg_ENDIF(tx->ureg);
1971
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1973
if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1974
ureg_UIF(tx->ureg, src, tx_cond(tx));
1976
ureg_IF(tx->ureg, src, tx_cond(tx));
1981
static inline unsigned
1982
sm1_insn_flags_to_tgsi_setop(BYTE flags)
1985
case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1986
case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1987
case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1988
case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1989
case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1990
case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1992
assert(!"invalid comparison flags");
1993
return TGSI_OPCODE_SGT;
1999
const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2000
struct ureg_src src[2];
2001
struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2002
src[0] = tx_src_param(tx, &tx->insn.src[0]);
2003
src[1] = tx_src_param(tx, &tx->insn.src[1]);
2004
ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2005
ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2011
ureg_ELSE(tx->ureg, tx_elsecond(tx));
2015
DECL_SPECIAL(BREAKC)
2017
const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2018
struct ureg_src src[2];
2019
struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2020
src[0] = tx_src_param(tx, &tx->insn.src[0]);
2021
src[1] = tx_src_param(tx, &tx->insn.src[1]);
2022
ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2023
ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2026
ureg_ENDIF(tx->ureg);
2030
static const char *sm1_declusage_names[] =
2032
[D3DDECLUSAGE_POSITION] = "POSITION",
2033
[D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2034
[D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2035
[D3DDECLUSAGE_NORMAL] = "NORMAL",
2036
[D3DDECLUSAGE_PSIZE] = "PSIZE",
2037
[D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2038
[D3DDECLUSAGE_TANGENT] = "TANGENT",
2039
[D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2040
[D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2041
[D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2042
[D3DDECLUSAGE_COLOR] = "COLOR",
2043
[D3DDECLUSAGE_FOG] = "FOG",
2044
[D3DDECLUSAGE_DEPTH] = "DEPTH",
2045
[D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2048
static inline unsigned
2049
sm1_to_nine_declusage(struct sm1_semantic *dcl)
2051
return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2055
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2057
struct sm1_semantic *dcl)
2059
BYTE index = dcl->usage_idx;
2061
/* For everything that is not matching to a TGSI_SEMANTIC_****,
2062
* we match to a TGSI_SEMANTIC_GENERIC with index.
2064
* The index can be anything UINT16 and usage_idx is BYTE,
2065
* so we can fit everything. It doesn't matter if indices
2066
* are close together or low.
2069
* POSITION >= 1: 10 * index + 7
2070
* COLOR >= 2: 10 * (index-1) + 8
2072
* TEXCOORD[0..15]: index
2073
* BLENDWEIGHT: 10 * index + 19
2074
* BLENDINDICES: 10 * index + 20
2075
* NORMAL: 10 * index + 21
2076
* TANGENT: 10 * index + 22
2077
* BINORMAL: 10 * index + 23
2078
* TESSFACTOR: 10 * index + 24
2081
switch (dcl->usage) {
2082
case D3DDECLUSAGE_POSITION:
2083
case D3DDECLUSAGE_POSITIONT:
2084
case D3DDECLUSAGE_DEPTH:
2086
sem->Name = TGSI_SEMANTIC_POSITION;
2089
sem->Name = TGSI_SEMANTIC_GENERIC;
2090
sem->Index = 10 * index + 7;
2093
case D3DDECLUSAGE_COLOR:
2095
sem->Name = TGSI_SEMANTIC_COLOR;
2098
sem->Name = TGSI_SEMANTIC_GENERIC;
2099
sem->Index = 10 * (index-1) + 8;
2102
case D3DDECLUSAGE_FOG:
2104
sem->Name = TGSI_SEMANTIC_GENERIC;
2107
case D3DDECLUSAGE_PSIZE:
2109
sem->Name = TGSI_SEMANTIC_PSIZE;
2112
case D3DDECLUSAGE_TEXCOORD:
2114
if (index < 8 && tc)
2115
sem->Name = TGSI_SEMANTIC_TEXCOORD;
2117
sem->Name = TGSI_SEMANTIC_GENERIC;
2120
case D3DDECLUSAGE_BLENDWEIGHT:
2121
sem->Name = TGSI_SEMANTIC_GENERIC;
2122
sem->Index = 10 * index + 19;
2124
case D3DDECLUSAGE_BLENDINDICES:
2125
sem->Name = TGSI_SEMANTIC_GENERIC;
2126
sem->Index = 10 * index + 20;
2128
case D3DDECLUSAGE_NORMAL:
2129
sem->Name = TGSI_SEMANTIC_GENERIC;
2130
sem->Index = 10 * index + 21;
2132
case D3DDECLUSAGE_TANGENT:
2133
sem->Name = TGSI_SEMANTIC_GENERIC;
2134
sem->Index = 10 * index + 22;
2136
case D3DDECLUSAGE_BINORMAL:
2137
sem->Name = TGSI_SEMANTIC_GENERIC;
2138
sem->Index = 10 * index + 23;
2140
case D3DDECLUSAGE_TESSFACTOR:
2141
sem->Name = TGSI_SEMANTIC_GENERIC;
2142
sem->Index = 10 * index + 24;
2144
case D3DDECLUSAGE_SAMPLE:
2145
sem->Name = TGSI_SEMANTIC_COUNT;
2149
unreachable("Invalid DECLUSAGE.");
2154
#define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2155
#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2156
#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2157
#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2158
static inline unsigned
2159
d3dstt_to_tgsi_tex(BYTE sampler_type)
2161
switch (sampler_type) {
2162
case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2163
case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2164
case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2165
case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2168
return TGSI_TEXTURE_UNKNOWN;
2171
static inline unsigned
2172
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2174
switch (sampler_type) {
2175
case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2176
case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2177
case NINED3DSTT_VOLUME:
2178
case NINED3DSTT_CUBE:
2181
return TGSI_TEXTURE_UNKNOWN;
2184
static inline unsigned
2185
ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2187
boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2188
switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2189
case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2190
case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2191
case 3: return TGSI_TEXTURE_3D;
2193
return TGSI_TEXTURE_CUBE;
2198
sm1_sampler_type_name(BYTE sampler_type)
2200
switch (sampler_type) {
2201
case NINED3DSTT_1D: return "1D";
2202
case NINED3DSTT_2D: return "2D";
2203
case NINED3DSTT_VOLUME: return "VOLUME";
2204
case NINED3DSTT_CUBE: return "CUBE";
2206
return "(D3DSTT_?)";
2210
static inline unsigned
2211
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2213
switch (sem->Name) {
2214
case TGSI_SEMANTIC_POSITION:
2215
case TGSI_SEMANTIC_NORMAL:
2216
return TGSI_INTERPOLATE_LINEAR;
2217
case TGSI_SEMANTIC_BCOLOR:
2218
case TGSI_SEMANTIC_COLOR:
2219
return TGSI_INTERPOLATE_COLOR;
2220
case TGSI_SEMANTIC_FOG:
2221
case TGSI_SEMANTIC_GENERIC:
2222
case TGSI_SEMANTIC_TEXCOORD:
2223
case TGSI_SEMANTIC_CLIPDIST:
2224
case TGSI_SEMANTIC_CLIPVERTEX:
2225
return TGSI_INTERPOLATE_PERSPECTIVE;
2226
case TGSI_SEMANTIC_EDGEFLAG:
2227
case TGSI_SEMANTIC_FACE:
2228
case TGSI_SEMANTIC_INSTANCEID:
2229
case TGSI_SEMANTIC_PCOORD:
2230
case TGSI_SEMANTIC_PRIMID:
2231
case TGSI_SEMANTIC_PSIZE:
2232
case TGSI_SEMANTIC_VERTEXID:
2233
return TGSI_INTERPOLATE_CONSTANT;
2236
return TGSI_INTERPOLATE_CONSTANT;
2242
struct ureg_program *ureg = tx->ureg;
2245
struct tgsi_declaration_semantic tgsi;
2246
struct sm1_semantic sem;
2247
sm1_read_semantic(tx, &sem);
2249
is_input = sem.reg.file == D3DSPR_INPUT;
2251
sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2254
sm1_dump_dst_param(&sem.reg);
2256
DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2258
if (tx->version.major >= 3)
2259
DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2261
if (sem.usage | sem.usage_idx)
2262
DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2267
const unsigned m = 1 << sem.reg.idx;
2268
ureg_DECL_sampler(ureg, sem.reg.idx);
2269
tx->info->sampler_mask |= m;
2270
tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2271
d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2272
d3dstt_to_tgsi_tex(sem.sampler_type);
2276
sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2279
/* linkage outside of shader with vertex declaration */
2280
ureg_DECL_vs_input(ureg, sem.reg.idx);
2281
assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2282
tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2283
tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2284
/* NOTE: preserving order in case of indirect access */
2286
if (tx->version.major >= 3) {
2287
/* SM2 output semantic determined by file */
2288
assert(sem.reg.mask != 0);
2289
if (sem.usage == D3DDECLUSAGE_POSITIONT)
2290
tx->info->position_t = TRUE;
2291
assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2292
assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2293
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2294
ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2295
nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2296
if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2297
tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2298
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2299
tx->regs.oPos = tx->regs.o[sem.reg.idx];
2302
if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2303
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2304
tx->regs.oPts = tx->regs.o[sem.reg.idx];
2308
if (is_input && tx->version.major >= 3) {
2309
unsigned interp_location = 0;
2310
/* SM3 only, SM2 input semantic determined by file */
2311
assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2312
assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2313
/* PositionT and tessfactor forbidden */
2314
if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2315
return D3DERR_INVALIDCALL;
2317
if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2318
/* Position0 is forbidden (likely because vPos already does that) */
2319
if (sem.usage == D3DDECLUSAGE_POSITION)
2320
return D3DERR_INVALIDCALL;
2321
/* Following code is for depth */
2322
tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2326
if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2327
(tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2328
interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2330
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2331
ureg, tgsi.Name, tgsi.Index,
2332
nine_tgsi_to_interp_mode(&tgsi),
2333
interp_location, 0, 1);
2335
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2336
/* FragColor or FragDepth */
2337
assert(sem.reg.mask != 0);
2338
ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2347
tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2353
tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2359
tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2365
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2366
struct ureg_src src[2] = {
2367
tx_src_param(tx, &tx->insn.src[0]),
2368
tx_src_param(tx, &tx->insn.src[1])
2370
ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2374
/* Tests results on Win 10:
2375
* NV (NVIDIA GeForce GT 635M)
2376
* AMD (AMD Radeon HD 7730M)
2377
* INTEL (Intel(R) HD Graphics 4000)
2379
* RCP and RSQ can generate inf on NV and AMD.
2380
* RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2381
* NV: log not clamped
2382
* AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2383
* INTEL: log(0) is -FLT_MAX and log(inf) is 127
2384
* All devices have 0*anything = 0
2386
* INTEL VS2 and VS3: same behaviour.
2387
* Some differences VS2 and VS3 for constants defined with inf/NaN.
2388
* While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2389
* VS2 seems to clamp to zero (may be test failure).
2390
* AMD VS2: unknown, VS3: very likely behaviour of PS3
2391
* NV VS2 and VS3: very likely behaviour of PS3
2392
* For both, Inf in VS becomes NaN is PS
2393
* "Very likely" because the test was less extensive.
2395
* Thus all clamping can be removed for shaders 2 and 3,
2396
* as long as 0*anything = 0.
2397
* Else clamps to enforce 0*anything = 0 (anything being then
2398
* neither inf or NaN, the user being unlikely to pass them
2400
* The status for VS1 and PS1 is unknown.
2405
struct ureg_program *ureg = tx->ureg;
2406
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2407
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2408
struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2409
ureg_RCP(ureg, tmp, src);
2410
if (!tx->mul_zero_wins) {
2411
/* FLT_MAX has issues with Rayman */
2412
ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2413
ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2420
struct ureg_program *ureg = tx->ureg;
2421
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2422
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2423
struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2424
ureg_RSQ(ureg, tmp, ureg_abs(src));
2425
if (!tx->mul_zero_wins)
2426
ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2432
struct ureg_program *ureg = tx->ureg;
2433
struct ureg_dst tmp = tx_scratch_scalar(tx);
2434
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2435
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2436
ureg_LG2(ureg, tmp, ureg_abs(src));
2437
if (tx->mul_zero_wins) {
2438
ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2440
ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2447
struct ureg_program *ureg = tx->ureg;
2448
struct ureg_dst tmp = tx_scratch(tx);
2449
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2450
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2451
ureg_LIT(ureg, tmp, src);
2452
/* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2453
* states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2454
* it 0^0 if src.w=0, which value is driver dependent. */
2455
ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2456
ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2457
ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2458
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2464
struct ureg_program *ureg = tx->ureg;
2465
struct ureg_dst tmp = tx_scratch_scalar(tx);
2466
struct ureg_src nrm = tx_src_scalar(tmp);
2467
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2468
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2469
ureg_DP3(ureg, tmp, src, src);
2470
ureg_RSQ(ureg, tmp, nrm);
2471
if (!tx->mul_zero_wins)
2472
ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2473
ureg_MUL(ureg, dst, src, nrm);
2477
DECL_SPECIAL(DP2ADD)
2479
struct ureg_dst tmp = tx_scratch_scalar(tx);
2480
struct ureg_src dp2 = tx_src_scalar(tmp);
2481
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2482
struct ureg_src src[3];
2484
for (i = 0; i < 3; ++i)
2485
src[i] = tx_src_param(tx, &tx->insn.src[i]);
2486
assert_replicate_swizzle(&src[2]);
2488
ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2489
ureg_ADD(tx->ureg, dst, src[2], dp2);
2494
DECL_SPECIAL(TEXCOORD)
2496
struct ureg_program *ureg = tx->ureg;
2497
const unsigned s = tx->insn.dst[0].idx;
2498
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2500
tx_texcoord_alloc(tx, s);
2501
ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2502
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2507
DECL_SPECIAL(TEXCOORD_ps14)
2509
struct ureg_program *ureg = tx->ureg;
2510
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2511
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2513
assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2515
ureg_MOV(ureg, dst, src);
2520
DECL_SPECIAL(TEXKILL)
2522
struct ureg_src reg;
2524
if (tx->version.major > 1 || tx->version.minor > 3) {
2525
reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2527
tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2528
reg = tx->regs.vT[tx->insn.dst[0].idx];
2530
if (tx->version.major < 2)
2531
reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2532
ureg_KILL_IF(tx->ureg, reg);
2537
DECL_SPECIAL(TEXBEM)
2539
struct ureg_program *ureg = tx->ureg;
2540
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2541
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2542
struct ureg_dst tmp, tmp2, texcoord;
2543
struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2544
struct ureg_src bumpenvlscale, bumpenvloffset;
2545
const int m = tx->insn.dst[0].idx;
2547
assert(tx->version.major == 1);
2549
sample = ureg_DECL_sampler(ureg, m);
2550
tx->info->sampler_mask |= 1 << m;
2552
tx_texcoord_alloc(tx, m);
2554
tmp = tx_scratch(tx);
2555
tmp2 = tx_scratch(tx);
2556
texcoord = tx_scratch(tx);
2564
c8m = nine_float_constant_src(tx, 8+m);
2565
c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2567
m00 = NINE_APPLY_SWIZZLE(c8m, X);
2568
m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2569
m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2570
m11 = NINE_APPLY_SWIZZLE(c8m, W);
2572
/* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2574
bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2575
bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2577
bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2578
bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2581
apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2583
/* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2584
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2585
NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2586
/* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2587
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2588
NINE_APPLY_SWIZZLE(src, Y),
2589
NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2591
/* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2592
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2593
NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2594
/* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2595
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2596
NINE_APPLY_SWIZZLE(src, Y),
2597
NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2599
/* Now the texture coordinates are in tmp.xy */
2601
if (tx->insn.opcode == D3DSIO_TEXBEM) {
2602
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2603
} else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2604
/* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2605
ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2606
ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2607
bumpenvlscale, bumpenvloffset);
2608
ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2611
tx->info->bumpenvmat_needed = 1;
2616
DECL_SPECIAL(TEXREG2AR)
2618
struct ureg_program *ureg = tx->ureg;
2619
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2620
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2621
struct ureg_src sample;
2622
const int m = tx->insn.dst[0].idx;
2623
ASSERTED const int n = tx->insn.src[0].idx;
2624
assert(m >= 0 && m > n);
2626
sample = ureg_DECL_sampler(ureg, m);
2627
tx->info->sampler_mask |= 1 << m;
2628
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2633
DECL_SPECIAL(TEXREG2GB)
2635
struct ureg_program *ureg = tx->ureg;
2636
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2637
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2638
struct ureg_src sample;
2639
const int m = tx->insn.dst[0].idx;
2640
ASSERTED const int n = tx->insn.src[0].idx;
2641
assert(m >= 0 && m > n);
2643
sample = ureg_DECL_sampler(ureg, m);
2644
tx->info->sampler_mask |= 1 << m;
2645
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2650
DECL_SPECIAL(TEXM3x2PAD)
2652
return D3D_OK; /* this is just padding */
2655
DECL_SPECIAL(TEXM3x2TEX)
2657
struct ureg_program *ureg = tx->ureg;
2658
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2659
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2660
struct ureg_src sample;
2661
const int m = tx->insn.dst[0].idx - 1;
2662
ASSERTED const int n = tx->insn.src[0].idx;
2663
assert(m >= 0 && m > n);
2665
tx_texcoord_alloc(tx, m);
2666
tx_texcoord_alloc(tx, m+1);
2668
/* performs the matrix multiplication */
2669
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2670
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2672
sample = ureg_DECL_sampler(ureg, m + 1);
2673
tx->info->sampler_mask |= 1 << (m + 1);
2674
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2679
DECL_SPECIAL(TEXM3x3PAD)
2681
return D3D_OK; /* this is just padding */
2684
DECL_SPECIAL(TEXM3x3SPEC)
2686
struct ureg_program *ureg = tx->ureg;
2687
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2688
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2689
struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2690
struct ureg_src sample;
2691
struct ureg_dst tmp;
2692
const int m = tx->insn.dst[0].idx - 2;
2693
ASSERTED const int n = tx->insn.src[0].idx;
2694
assert(m >= 0 && m > n);
2696
tx_texcoord_alloc(tx, m);
2697
tx_texcoord_alloc(tx, m+1);
2698
tx_texcoord_alloc(tx, m+2);
2700
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2701
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2702
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2704
sample = ureg_DECL_sampler(ureg, m + 2);
2705
tx->info->sampler_mask |= 1 << (m + 2);
2706
tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2708
/* At this step, dst = N = (u', w', z').
2709
* We want dst to be the texture sampled at (u'', w'', z''), with
2710
* (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2711
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2712
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2713
/* at this step tmp.x = 1/N.N */
2714
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2715
/* at this step tmp.y = N.E */
2716
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2717
/* at this step tmp.x = N.E/N.N */
2718
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2719
ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2720
/* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2721
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2722
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2727
DECL_SPECIAL(TEXREG2RGB)
2729
struct ureg_program *ureg = tx->ureg;
2730
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2731
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2732
struct ureg_src sample;
2733
const int m = tx->insn.dst[0].idx;
2734
ASSERTED const int n = tx->insn.src[0].idx;
2735
assert(m >= 0 && m > n);
2737
sample = ureg_DECL_sampler(ureg, m);
2738
tx->info->sampler_mask |= 1 << m;
2739
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2744
DECL_SPECIAL(TEXDP3TEX)
2746
struct ureg_program *ureg = tx->ureg;
2747
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2748
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2749
struct ureg_dst tmp;
2750
struct ureg_src sample;
2751
const int m = tx->insn.dst[0].idx;
2752
ASSERTED const int n = tx->insn.src[0].idx;
2753
assert(m >= 0 && m > n);
2755
tx_texcoord_alloc(tx, m);
2757
tmp = tx_scratch(tx);
2758
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2759
ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2761
sample = ureg_DECL_sampler(ureg, m);
2762
tx->info->sampler_mask |= 1 << m;
2763
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2768
DECL_SPECIAL(TEXM3x2DEPTH)
2770
struct ureg_program *ureg = tx->ureg;
2771
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2772
struct ureg_dst tmp;
2773
const int m = tx->insn.dst[0].idx - 1;
2774
ASSERTED const int n = tx->insn.src[0].idx;
2775
assert(m >= 0 && m > n);
2777
tx_texcoord_alloc(tx, m);
2778
tx_texcoord_alloc(tx, m+1);
2780
tmp = tx_scratch(tx);
2782
/* performs the matrix multiplication */
2783
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2784
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2786
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2787
/* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2788
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2789
/* res = 'w' == 0 ? 1.0 : z/w */
2790
ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2791
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2792
/* replace the depth for depth testing with the result */
2793
tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2794
TGSI_WRITEMASK_Z, 0, 1);
2795
ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2796
/* note that we write nothing to the destination, since it's disallowed to use it afterward */
2800
DECL_SPECIAL(TEXDP3)
2802
struct ureg_program *ureg = tx->ureg;
2803
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2804
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2805
const int m = tx->insn.dst[0].idx;
2806
ASSERTED const int n = tx->insn.src[0].idx;
2807
assert(m >= 0 && m > n);
2809
tx_texcoord_alloc(tx, m);
2811
ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2816
DECL_SPECIAL(TEXM3x3)
2818
struct ureg_program *ureg = tx->ureg;
2819
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2820
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2821
struct ureg_src sample;
2822
struct ureg_dst E, tmp;
2823
const int m = tx->insn.dst[0].idx - 2;
2824
ASSERTED const int n = tx->insn.src[0].idx;
2825
assert(m >= 0 && m > n);
2827
tx_texcoord_alloc(tx, m);
2828
tx_texcoord_alloc(tx, m+1);
2829
tx_texcoord_alloc(tx, m+2);
2831
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2832
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2833
ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2835
switch (tx->insn.opcode) {
2836
case D3DSIO_TEXM3x3:
2837
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2839
case D3DSIO_TEXM3x3TEX:
2840
sample = ureg_DECL_sampler(ureg, m + 2);
2841
tx->info->sampler_mask |= 1 << (m + 2);
2842
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2844
case D3DSIO_TEXM3x3VSPEC:
2845
sample = ureg_DECL_sampler(ureg, m + 2);
2846
tx->info->sampler_mask |= 1 << (m + 2);
2848
tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2849
ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2850
ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2851
ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2852
/* At this step, dst = N = (u', w', z').
2853
* We want dst to be the texture sampled at (u'', w'', z''), with
2854
* (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2855
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2856
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2857
/* at this step tmp.x = 1/N.N */
2858
ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2859
/* at this step tmp.y = N.E */
2860
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2861
/* at this step tmp.x = N.E/N.N */
2862
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2863
ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2864
/* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2865
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2866
ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2869
return D3DERR_INVALIDCALL;
2874
DECL_SPECIAL(TEXDEPTH)
2876
struct ureg_program *ureg = tx->ureg;
2878
struct ureg_src r5r, r5g;
2880
assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2882
/* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2883
* r5 won't be used afterward, thus we can use r5.ba */
2885
r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2886
r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2888
ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2889
ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2891
ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2892
r5r, ureg_imm1f(ureg, 1.0f));
2893
/* replace the depth for depth testing with the result */
2894
tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2895
TGSI_WRITEMASK_Z, 0, 1);
2896
ureg_MOV(ureg, tx->regs.oDepth, r5r);
2903
struct ureg_program *ureg = tx->ureg;
2904
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2905
struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2906
struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2907
struct ureg_src m00, m01, m10, m11, c8m;
2908
const int m = tx->insn.dst[0].idx;
2909
struct ureg_dst tmp = tx_scratch(tx);
2917
c8m = nine_float_constant_src(tx, 8+m);
2918
m00 = NINE_APPLY_SWIZZLE(c8m, X);
2919
m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2920
m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2921
m11 = NINE_APPLY_SWIZZLE(c8m, W);
2922
/* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2923
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2924
NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2925
/* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2926
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2927
NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2929
/* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2930
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2931
NINE_APPLY_SWIZZLE(src1, X), src0);
2932
/* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2933
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2934
NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2935
ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2937
tx->info->bumpenvmat_needed = 1;
2944
struct ureg_program *ureg = tx->ureg;
2946
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2947
struct ureg_src src[2] = {
2948
tx_src_param(tx, &tx->insn.src[0]),
2949
tx_src_param(tx, &tx->insn.src[1])
2951
assert(tx->insn.src[1].idx >= 0 &&
2952
tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2953
target = tx->sampler_targets[tx->insn.src[1].idx];
2955
if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
2958
switch (tx->insn.flags) {
2960
ureg_TEX(ureg, dst, target, src[0], src[1]);
2962
case NINED3DSI_TEXLD_PROJECT:
2963
ureg_TXP(ureg, dst, target, src[0], src[1]);
2965
case NINED3DSI_TEXLD_BIAS:
2966
ureg_TXB(ureg, dst, target, src[0], src[1]);
2970
return D3DERR_INVALIDCALL;
2975
DECL_SPECIAL(TEXLD_14)
2977
struct ureg_program *ureg = tx->ureg;
2978
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2979
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2980
const unsigned s = tx->insn.dst[0].idx;
2981
const unsigned t = ps1x_sampler_type(tx->info, s);
2983
tx->info->sampler_mask |= 1 << s;
2984
ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2991
struct ureg_program *ureg = tx->ureg;
2992
const unsigned s = tx->insn.dst[0].idx;
2993
const unsigned t = ps1x_sampler_type(tx->info, s);
2994
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2995
struct ureg_src src[2];
2997
tx_texcoord_alloc(tx, s);
2999
src[0] = tx->regs.vT[s];
3000
src[1] = ureg_DECL_sampler(ureg, s);
3001
tx->info->sampler_mask |= 1 << s;
3003
TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3008
DECL_SPECIAL(TEXLDD)
3011
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3012
struct ureg_src src[4] = {
3013
tx_src_param(tx, &tx->insn.src[0]),
3014
tx_src_param(tx, &tx->insn.src[1]),
3015
tx_src_param(tx, &tx->insn.src[2]),
3016
tx_src_param(tx, &tx->insn.src[3])
3018
assert(tx->insn.src[1].idx >= 0 &&
3019
tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3020
target = tx->sampler_targets[tx->insn.src[1].idx];
3022
if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3025
ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3029
DECL_SPECIAL(TEXLDL)
3032
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3033
struct ureg_src src[2] = {
3034
tx_src_param(tx, &tx->insn.src[0]),
3035
tx_src_param(tx, &tx->insn.src[1])
3037
assert(tx->insn.src[1].idx >= 0 &&
3038
tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3039
target = tx->sampler_targets[tx->insn.src[1].idx];
3041
if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3044
ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3050
const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3051
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052
struct ureg_src src[2] = {
3053
tx_src_param(tx, &tx->insn.src[0]),
3054
tx_src_param(tx, &tx->insn.src[1])
3056
ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3060
DECL_SPECIAL(BREAKP)
3062
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3063
ureg_IF(tx->ureg, src, tx_cond(tx));
3066
ureg_ENDIF(tx->ureg);
3072
return D3D_OK; /* we don't care about phase */
3075
DECL_SPECIAL(COMMENT)
3077
return D3D_OK; /* nothing to do */
3081
#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3082
{ D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3084
static const struct sm1_op_info inst_table[] =
3086
_OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3087
_OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3088
_OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3089
_OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3090
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3091
_OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3092
_OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3093
_OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3094
_OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3095
_OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3096
_OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3097
_OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3098
_OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3099
_OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3100
_OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3101
_OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3102
_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3103
_OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3104
_OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3105
_OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3107
_OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3108
_OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3109
_OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3110
_OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3111
_OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3113
_OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3114
_OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3115
_OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3116
_OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3117
_OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3118
_OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3120
_OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3122
_OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3123
_OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3124
_OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3125
_OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3126
_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3128
_OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3129
_OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3131
/* More flow control */
3132
_OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3133
_OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3134
_OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3135
_OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3136
_OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3137
_OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3138
_OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3139
_OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3140
/* we don't write to the address register, but a normal register (copied
3141
* when needed to the address register), thus we don't use ARR */
3142
_OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3144
_OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3145
_OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3147
_OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3148
_OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3149
_OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3150
_OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3151
_OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3152
_OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3153
_OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3154
_OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3155
_OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3156
_OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3157
_OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3158
_OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3159
_OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3160
_OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3161
_OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3162
_OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3164
_OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3165
_OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3166
_OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3167
_OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3169
_OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3171
/* More tex stuff */
3172
_OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3173
_OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3174
_OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3175
_OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3176
_OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3177
_OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3180
_OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3181
_OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3182
_OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3183
_OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3184
_OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3185
_OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3186
_OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3187
_OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3188
_OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3191
static const struct sm1_op_info inst_phase =
3192
_OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3194
static const struct sm1_op_info inst_comment =
3195
_OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3198
create_op_info_map(struct shader_translator *tx)
3200
const unsigned version = (tx->version.major << 8) | tx->version.minor;
3203
for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3204
tx->op_info_map[i] = -1;
3206
if (tx->processor == PIPE_SHADER_VERTEX) {
3207
for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3208
assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3209
if (inst_table[i].vert_version.min <= version &&
3210
inst_table[i].vert_version.max >= version)
3211
tx->op_info_map[inst_table[i].sio] = i;
3214
for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3215
assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3216
if (inst_table[i].frag_version.min <= version &&
3217
inst_table[i].frag_version.max >= version)
3218
tx->op_info_map[inst_table[i].sio] = i;
3223
static inline HRESULT
3224
NineTranslateInstruction_Generic(struct shader_translator *tx)
3226
struct ureg_dst dst[1];
3227
struct ureg_src src[4];
3230
for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3231
dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3232
for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3233
src[i] = tx_src_param(tx, &tx->insn.src[i]);
3235
ureg_insn(tx->ureg, tx->insn.info->opcode,
3237
src, tx->insn.nsrc, 0);
3242
TOKEN_PEEK(struct shader_translator *tx)
3244
return *(tx->parse);
3248
TOKEN_NEXT(struct shader_translator *tx)
3250
return *(tx->parse)++;
3254
TOKEN_JUMP(struct shader_translator *tx)
3256
if (tx->parse_next && tx->parse != tx->parse_next) {
3257
WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3258
tx->parse = tx->parse_next;
3262
static inline boolean
3263
sm1_parse_eof(struct shader_translator *tx)
3265
return TOKEN_PEEK(tx) == NINED3DSP_END;
3269
sm1_read_version(struct shader_translator *tx)
3271
const DWORD tok = TOKEN_NEXT(tx);
3273
tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3274
tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3276
switch (tok >> 16) {
3277
case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3278
case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3280
DBG("Invalid shader type: %x\n", tok);
3286
/* This is just to check if we parsed the instruction properly. */
3288
sm1_parse_get_skip(struct shader_translator *tx)
3290
const DWORD tok = TOKEN_PEEK(tx);
3292
if (tx->version.major >= 2) {
3293
tx->parse_next = tx->parse + 1 /* this */ +
3294
((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3296
tx->parse_next = NULL; /* TODO: determine from param count */
3301
sm1_print_comment(const char *comment, UINT size)
3309
sm1_parse_comments(struct shader_translator *tx, BOOL print)
3311
DWORD tok = TOKEN_PEEK(tx);
3313
while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3315
const char *comment = "";
3316
UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3317
tx->parse += size + 1;
3320
sm1_print_comment(comment, size);
3322
tok = TOKEN_PEEK(tx);
3327
sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3329
*reg = TOKEN_NEXT(tx);
3331
if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3333
if (tx->version.major < 2)
3335
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3336
((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3339
*rel = TOKEN_NEXT(tx);
3344
sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3348
(tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3349
(tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3350
dst->type = TGSI_RETURN_TYPE_FLOAT;
3351
dst->idx = tok & D3DSP_REGNUM_MASK;
3353
dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3354
dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3355
shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3356
dst->shift = (shift & 0x7) - (shift & 0x8);
3360
sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3363
((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3364
((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3365
src->type = TGSI_RETURN_TYPE_FLOAT;
3366
src->idx = tok & D3DSP_REGNUM_MASK;
3368
src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3369
src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3371
switch (src->file) {
3372
case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3373
case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3374
case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3381
sm1_parse_immediate(struct shader_translator *tx,
3382
struct sm1_src_param *imm)
3384
imm->file = NINED3DSPR_IMMEDIATE;
3387
imm->swizzle = NINED3DSP_NOSWIZZLE;
3389
switch (tx->insn.opcode) {
3391
imm->type = NINED3DSPTYPE_FLOAT4;
3392
memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3396
imm->type = NINED3DSPTYPE_INT4;
3397
memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3401
imm->type = NINED3DSPTYPE_BOOL;
3402
memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3412
sm1_read_dst_param(struct shader_translator *tx,
3413
struct sm1_dst_param *dst,
3414
struct sm1_src_param *rel)
3416
DWORD tok_dst, tok_rel = 0;
3418
sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3419
sm1_parse_dst_param(dst, tok_dst);
3420
if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3421
sm1_parse_src_param(rel, tok_rel);
3427
sm1_read_src_param(struct shader_translator *tx,
3428
struct sm1_src_param *src,
3429
struct sm1_src_param *rel)
3431
DWORD tok_src, tok_rel = 0;
3433
sm1_parse_get_param(tx, &tok_src, &tok_rel);
3434
sm1_parse_src_param(src, tok_src);
3435
if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3437
sm1_parse_src_param(rel, tok_rel);
3443
sm1_read_semantic(struct shader_translator *tx,
3444
struct sm1_semantic *sem)
3446
const DWORD tok_usg = TOKEN_NEXT(tx);
3447
const DWORD tok_dst = TOKEN_NEXT(tx);
3449
sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3450
sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3451
sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3453
sm1_parse_dst_param(&sem->reg, tok_dst);
3457
sm1_parse_instruction(struct shader_translator *tx)
3459
struct sm1_instruction *insn = &tx->insn;
3462
const struct sm1_op_info *info = NULL;
3465
sm1_parse_comments(tx, TRUE);
3466
sm1_parse_get_skip(tx);
3468
tok = TOKEN_NEXT(tx);
3470
insn->opcode = tok & D3DSI_OPCODE_MASK;
3471
insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3472
insn->coissue = !!(tok & D3DSI_COISSUE);
3473
insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3475
if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3476
int k = tx->op_info_map[insn->opcode];
3478
assert(k < ARRAY_SIZE(inst_table));
3479
info = &inst_table[k];
3482
if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3483
if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3486
DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3491
insn->ndst = info->ndst;
3492
insn->nsrc = info->nsrc;
3496
unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3497
unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3498
unsigned ver = (tx->version.major << 8) | tx->version.minor;
3499
if (ver < min || ver > max) {
3500
DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3506
for (i = 0; i < insn->ndst; ++i)
3507
sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3508
if (insn->predicated)
3509
sm1_read_src_param(tx, &insn->pred, NULL);
3510
for (i = 0; i < insn->nsrc; ++i)
3511
sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3513
/* parse here so we can dump them before processing */
3514
if (insn->opcode == D3DSIO_DEF ||
3515
insn->opcode == D3DSIO_DEFI ||
3516
insn->opcode == D3DSIO_DEFB)
3517
sm1_parse_immediate(tx, &tx->insn.src[0]);
3519
sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3520
sm1_instruction_check(insn);
3522
if (insn->predicated) {
3523
tx->predicated_activated = true;
3524
if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3525
tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3526
tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3531
hr = info->handler(tx);
3533
hr = NineTranslateInstruction_Generic(tx);
3534
tx_apply_dst0_modifiers(tx);
3536
if (insn->predicated) {
3537
tx->predicated_activated = false;
3538
/* TODO: predicate might be allowed on outputs,
3539
* which cannot be src. Workaround it. */
3540
ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3541
ureg_negate(tx_src_param(tx, &insn->pred)),
3542
ureg_src(tx->regs.predicate_tmp),
3543
ureg_src(tx->regs.predicate_dst));
3548
tx->num_scratch = 0; /* reset */
3553
#define GET_CAP(n) screen->get_param( \
3554
screen, PIPE_CAP_##n)
3555
#define GET_SHADER_CAP(n) screen->get_shader_param( \
3556
screen, info->type, PIPE_SHADER_CAP_##n)
3559
tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3563
memset(tx, 0, sizeof(*tx));
3567
tx->byte_code = info->byte_code;
3568
tx->parse = info->byte_code;
3570
for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3571
info->input_map[i] = NINE_DECLUSAGE_NONE;
3572
info->num_inputs = 0;
3574
info->position_t = FALSE;
3575
info->point_size = FALSE;
3577
memset(tx->slots_used, 0, sizeof(tx->slots_used));
3578
memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3579
memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3581
tx->info->const_float_slots = 0;
3582
tx->info->const_int_slots = 0;
3583
tx->info->const_bool_slots = 0;
3585
info->sampler_mask = 0x0;
3586
info->rt_mask = 0x0;
3588
info->lconstf.data = NULL;
3589
info->lconstf.ranges = NULL;
3591
info->bumpenvmat_needed = 0;
3593
for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3594
tx->regs.rL[i] = ureg_dst_undef();
3596
tx->regs.address = ureg_dst_undef();
3597
tx->regs.a0 = ureg_dst_undef();
3598
tx->regs.p = ureg_dst_undef();
3599
tx->regs.oDepth = ureg_dst_undef();
3600
tx->regs.vPos = ureg_src_undef();
3601
tx->regs.vFace = ureg_src_undef();
3602
for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3603
tx->regs.o[i] = ureg_dst_undef();
3604
for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3605
tx->regs.oCol[i] = ureg_dst_undef();
3606
for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3607
tx->regs.vC[i] = ureg_src_undef();
3608
for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3609
tx->regs.vT[i] = ureg_src_undef();
3611
sm1_read_version(tx);
3613
info->version = (tx->version.major << 4) | tx->version.minor;
3615
tx->num_outputs = 0;
3617
create_op_info_map(tx);
3619
tx->ureg = ureg_create(info->type);
3621
return E_OUTOFMEMORY;
3624
tx->native_integers = GET_SHADER_CAP(INTEGERS);
3625
tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3626
tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3627
tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER);
3628
tx->texcoord_sn = tx->want_texcoord ?
3629
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3630
tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3631
tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3634
tx->num_constf_allowed = NINE_MAX_CONST_F;
3635
} else if (tx->version.major < 2) {/* IS_PS v1 */
3636
tx->num_constf_allowed = 8;
3637
} else if (tx->version.major == 2) {/* IS_PS v2 */
3638
tx->num_constf_allowed = 32;
3639
} else {/* IS_PS v3 */
3640
tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3643
if (tx->version.major < 2) {
3644
tx->num_consti_allowed = 0;
3645
tx->num_constb_allowed = 0;
3647
tx->num_consti_allowed = NINE_MAX_CONST_I;
3648
tx->num_constb_allowed = NINE_MAX_CONST_B;
3651
if (info->swvp_on) {
3652
/* TODO: The values tx->version.major == 1 */
3653
tx->num_constf_allowed = 8192;
3654
tx->num_consti_allowed = 2048;
3655
tx->num_constb_allowed = 2048;
3658
/* VS must always write position. Declare it here to make it the 1st output.
3659
* (Some drivers like nv50 are buggy and rely on that.)
3662
tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3664
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3665
if (!tx->shift_wpos)
3666
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3669
tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3670
if (tx->mul_zero_wins)
3671
ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3673
/* Add additional definition of constants */
3674
if (info->add_constants_defs.c_combination) {
3677
assert(info->add_constants_defs.int_const_added);
3678
assert(info->add_constants_defs.bool_const_added);
3679
/* We only add constants that are used by the shader
3680
* and that are not defined in the shader */
3681
for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3682
if ((*info->add_constants_defs.int_const_added)[i]) {
3683
DBG("Defining const i%i : { %i %i %i %i }\n", i,
3684
info->add_constants_defs.c_combination->const_i[i][0],
3685
info->add_constants_defs.c_combination->const_i[i][1],
3686
info->add_constants_defs.c_combination->const_i[i][2],
3687
info->add_constants_defs.c_combination->const_i[i][3]);
3688
tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3691
for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3692
if ((*info->add_constants_defs.bool_const_added)[i]) {
3693
DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3694
tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3702
tx_dtor(struct shader_translator *tx)
3706
if (tx->num_inst_labels)
3707
FREE(tx->inst_labels);
3713
/* CONST[0].xyz = width/2, -height/2, zmax-zmin
3714
* CONST[1].xyz = x+width/2, y+height/2, zmin */
3716
shader_add_vs_viewport_transform(struct shader_translator *tx)
3718
struct ureg_program *ureg = tx->ureg;
3719
struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3720
struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3721
/* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3723
c0 = ureg_src_dimension(c0, 4);
3724
c1 = ureg_src_dimension(c1, 4);
3725
/* TODO: find out when we need to apply the viewport transformation or not.
3726
* Likely will be XYZ vs XYZRHW in vdecl_out
3727
* ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3728
* ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3730
ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3734
shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3736
struct ureg_program *ureg = tx->ureg;
3737
struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3738
struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3739
struct ureg_src fog_vs, fog_color;
3740
struct ureg_dst fog_factor, depth;
3742
if (!tx->info->fog_enable) {
3743
ureg_MOV(ureg, oCol0, src_col);
3747
if (tx->info->fog_mode != D3DFOG_NONE) {
3748
depth = tx_scratch_scalar(tx);
3749
/* Depth used for fog is perspective interpolated */
3750
ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3751
ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3754
fog_color = nine_float_constant_src(tx, 32);
3755
fog_params = nine_float_constant_src(tx, 33);
3756
fog_factor = tx_scratch_scalar(tx);
3758
if (tx->info->fog_mode == D3DFOG_LINEAR) {
3759
fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3760
fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3761
ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3762
ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3763
} else if (tx->info->fog_mode == D3DFOG_EXP) {
3764
fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3765
ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3766
ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3767
ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3768
} else if (tx->info->fog_mode == D3DFOG_EXP2) {
3769
fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3770
ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3771
ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3772
ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3773
ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3775
fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3776
TGSI_INTERPOLATE_PERSPECTIVE),
3778
ureg_MOV(ureg, fog_factor, fog_vs);
3781
ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3782
tx_src_scalar(fog_factor), src_col, fog_color);
3783
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3786
static void parse_shader(struct shader_translator *tx)
3788
struct nine_shader_info *info = tx->info;
3790
while (!sm1_parse_eof(tx) && !tx->failure)
3791
sm1_parse_instruction(tx);
3792
tx->parse++; /* for byte_size */
3797
if (IS_PS && tx->version.major < 3) {
3798
if (tx->version.major < 2) {
3799
assert(tx->num_temp); /* there must be color output */
3800
info->rt_mask |= 0x1;
3801
shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3803
shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3807
if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3808
tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3809
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3812
if (info->position_t)
3813
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3815
if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3816
struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3817
ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3818
ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3819
info->point_size = TRUE;
3822
if (info->process_vertices)
3823
shader_add_vs_viewport_transform(tx);
3828
#define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0)
3829
#define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1)
3830
#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3831
#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3832
#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3833
#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3835
static const struct debug_named_value nine_shader_debug_options[] = {
3836
{ "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3837
{ "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3838
{ "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3839
{ "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3840
{ "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3841
{ "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3842
DEBUG_NAMED_VALUE_END /* must be last */
3845
static inline boolean
3846
nine_shader_get_debug_flag(uint64_t flag)
3848
static uint64_t flags = 0;
3849
static boolean first_run = TRUE;
3851
if (unlikely(first_run)) {
3853
flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3855
// Check old TGSI dump envvar too
3856
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3857
flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3861
return !!(flags & flag);
3865
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3866
struct pipe_screen *screen)
3868
struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3870
if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3871
nir_print_shader(nir, stdout);
3874
state->type = PIPE_SHADER_IR_NIR;
3875
state->tokens = NULL;
3876
state->ir.nir = nir;
3877
memset(&state->stream_output, 0, sizeof(state->stream_output));
3881
nine_ureg_create_shader(struct ureg_program *ureg,
3882
struct pipe_context *pipe,
3883
const struct pipe_stream_output_info *so)
3885
struct pipe_shader_state state;
3886
const struct tgsi_token *tgsi_tokens;
3887
struct pipe_screen *screen = pipe->screen;
3889
tgsi_tokens = ureg_finalize(ureg);
3893
assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3894
enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3896
int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3897
bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3898
bool use_nir = prefer_nir ||
3899
((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3900
((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
3902
/* Allow user to override preferred IR, this is very useful for debugging */
3903
if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3905
if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3908
DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3909
shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3910
prefer_nir ? "NIR" : "TGSI",
3911
use_nir ? "NIR" : "TGSI");
3914
nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
3916
pipe_shader_state_from_tgsi(&state, tgsi_tokens);
3919
assert(state.tokens || state.ir.nir);
3922
state.stream_output = *so;
3924
switch (shader_type) {
3925
case PIPE_SHADER_VERTEX:
3926
return pipe->create_vs_state(pipe, &state);
3927
case PIPE_SHADER_FRAGMENT:
3928
return pipe->create_fs_state(pipe, &state);
3930
unreachable("unsupported shader type");
3936
nine_create_shader_with_so_and_destroy(struct ureg_program *p,
3937
struct pipe_context *pipe,
3938
const struct pipe_stream_output_info *so)
3940
void *result = nine_ureg_create_shader(p, pipe, so);
3946
nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3948
struct shader_translator *tx;
3949
HRESULT hr = D3D_OK;
3950
const unsigned processor = info->type;
3951
struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3952
unsigned *const_ranges = NULL;
3954
user_assert(processor != ~0, D3DERR_INVALIDCALL);
3956
tx = MALLOC_STRUCT(shader_translator);
3958
return E_OUTOFMEMORY;
3960
if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3965
assert(IS_VS || !info->swvp_on);
3967
if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3968
hr = D3DERR_INVALIDCALL;
3969
DBG("Unsupported shader version: %u.%u !\n",
3970
tx->version.major, tx->version.minor);
3973
if (tx->processor != processor) {
3974
hr = D3DERR_INVALIDCALL;
3975
DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3978
DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3979
tx->version.major, tx->version.minor);
3984
/* For VS shaders, we print the warning later,
3985
* we first try with swvp. */
3987
ERR("Encountered buggy shader\n");
3988
ureg_destroy(tx->ureg);
3989
hr = D3DERR_INVALIDCALL;
3993
/* Recompile after compacting constant slots if possible */
3994
if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
3997
int i, j, num_ranges, prev;
3999
DBG("Recompiling shader for constant compaction\n");
4000
ureg_destroy(tx->ureg);
4002
if (tx->num_inst_labels)
4003
FREE(tx->inst_labels);
4009
for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4010
if (tx->slots_used[i]) {
4016
slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
4017
const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4018
if (!slot_map || !const_ranges) {
4025
for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4026
if (tx->slots_used[i]) {
4029
/* Initialize first slot of the range */
4030
if (!const_ranges[2*j+1])
4031
const_ranges[2*j] = i;
4032
const_ranges[2*j+1]++;
4038
if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4042
tx->slot_map = slot_map;
4044
assert(!tx->failure);
4045
#if !defined(NDEBUG)
4048
while (const_ranges[i*2+1] != 0) {
4049
j += const_ranges[i*2+1];
4052
assert(j == tx->num_slots);
4056
/* record local constants */
4057
if (tx->num_lconstf && tx->indirect_const_access) {
4058
struct nine_range *ranges;
4065
data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4068
info->lconstf.data = data;
4070
indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4074
/* lazy sort, num_lconstf should be small */
4075
for (n = 0; n < tx->num_lconstf; ++n) {
4076
for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4077
if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4080
indices[n] = tx->lconstf[k].idx;
4081
memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4082
tx->lconstf[k].idx = INT_MAX;
4086
for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4087
if (indices[i] != indices[i - 1] + 1)
4089
ranges = MALLOC(n * sizeof(ranges[0]));
4094
info->lconstf.ranges = ranges;
4097
ranges[k].bgn = indices[0];
4098
for (i = 1; i < tx->num_lconstf; ++i) {
4099
if (indices[i] != indices[i - 1] + 1) {
4100
ranges[k].next = &ranges[k + 1];
4101
ranges[k].end = indices[i - 1] + 1;
4103
ranges[k].bgn = indices[i];
4106
ranges[k].end = indices[i - 1] + 1;
4107
ranges[k].next = NULL;
4108
assert(n == (k + 1));
4115
if (info->const_float_slots > device->max_vs_const_f &&
4116
(info->const_int_slots || info->const_bool_slots) &&
4118
ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4121
if (tx->indirect_const_access) { /* vs only */
4122
info->const_float_slots = device->max_vs_const_f;
4123
tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4126
if (!info->swvp_on) {
4127
info->const_used_size = sizeof(float[4]) * tx->num_slots;
4129
ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4131
ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4132
ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4133
ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4134
ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4137
if (info->process_vertices)
4138
ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4140
if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4141
const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4143
ureg_free_tokens(toks);
4146
if (info->process_vertices) {
4147
NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4151
info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4153
info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4155
hr = D3DERR_DRIVERINTERNALERROR;
4156
FREE(info->lconstf.data);
4157
FREE(info->lconstf.ranges);
4161
info->const_ranges = const_ranges;
4162
const_ranges = NULL;
4163
info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);