495
498
unsigned cond_depth;
496
499
unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
497
500
unsigned cond_labels[NINE_MAX_COND_DEPTH];
498
boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
499
boolean predicated_activated;
501
bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
502
bool predicated_activated;
501
504
unsigned *inst_labels; /* LABEL op */
502
505
unsigned num_inst_labels;
599
602
slot_idx = tx->slot_map[slot_idx];
600
603
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
601
604
src = ureg_src_dimension(src, 0);
602
tx->slots_used[slot_idx] = TRUE;
603
tx->info->int_slots_used[idx] = TRUE;
605
tx->slots_used[slot_idx] = true;
606
tx->info->int_slots_used[idx] = true;
604
607
if (tx->num_slots < (slot_idx + 1))
605
608
tx->num_slots = slot_idx + 1;
627
630
slot_idx = tx->slot_map[slot_idx];
628
631
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
629
632
src = ureg_src_dimension(src, 0);
630
tx->slots_used[slot_idx] = TRUE;
631
tx->info->bool_slots_used[idx] = TRUE;
633
tx->slots_used[slot_idx] = true;
634
tx->info->bool_slots_used[idx] = true;
632
635
if (tx->num_slots < (slot_idx + 1))
633
636
tx->num_slots = slot_idx + 1;
646
static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
650
unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
651
(tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
653
if (!tx->info->swvp_on && tx->slot_map)
654
slot_idx = tx->slot_map[slot_idx];
655
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
656
src = ureg_src_dimension(src, 0);
658
if (!tx->info->swvp_on)
659
tx->slots_used[slot_idx] = true;
660
if (tx->num_slots < (slot_idx + 1))
661
tx->num_slots = slot_idx + 1;
644
667
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
648
671
if (index < 0 || index >= tx->num_constf_allowed) {
652
675
for (i = 0; i < tx->num_lconstf; ++i) {
653
676
if (tx->lconstf[i].idx == index) {
654
677
*src = tx->lconstf[i].reg;
661
684
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
665
688
if (index < 0 || index >= tx->num_consti_allowed) {
669
692
for (i = 0; i < tx->num_lconsti; ++i) {
670
693
if (tx->lconsti[i].idx == index) {
671
694
*src = tx->lconsti[i].reg;
678
701
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
682
705
if (index < 0 || index >= tx->num_constb_allowed) {
686
709
for (i = 0; i < tx->num_lconstb; ++i) {
687
710
if (tx->lconstb[i].idx == index) {
688
711
*src = tx->lconstb[i].reg;
1355
1382
assert(!param->rel);
1356
1383
tx->info->rt_mask |= 1 << param->idx;
1357
1384
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1358
/* ps < 3: oCol[0] will have fog blending afterward */
1359
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1385
/* ps < 3: oCol[0] will have fog blending afterward
1386
* ps: oCol[0] might have alphatest afterward */
1387
if (!IS_VS && param->idx == 0) {
1360
1388
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1362
1390
tx->regs.oCol[param->idx] =
2298
2326
/* SM2 output semantic determined by file */
2299
2327
assert(sem.reg.mask != 0);
2300
2328
if (sem.usage == D3DDECLUSAGE_POSITIONT)
2301
tx->info->position_t = TRUE;
2329
tx->info->position_t = true;
2302
2330
assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2303
2331
assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2304
2332
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2305
2333
ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2306
2334
nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2307
if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2308
tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2335
if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
2336
sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2337
tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
2309
2338
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2310
2339
tx->regs.oPos = tx->regs.o[sem.reg.idx];
2337
2367
if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2338
2368
(tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2339
2369
interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2370
interp_flag = nine_tgsi_to_interp_mode(&tgsi);
2371
/* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
2372
* and those who support it do the same replacement we do */
2373
if (interp_flag == TGSI_INTERPOLATE_COLOR)
2374
interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2341
2376
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2342
2377
ureg, tgsi.Name, tgsi.Index,
2343
nine_tgsi_to_interp_mode(&tgsi),
2344
2379
interp_location, 0, 1);
2346
2381
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2378
2413
tx_src_param(tx, &tx->insn.src[0]),
2379
2414
tx_src_param(tx, &tx->insn.src[1])
2381
ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2416
/* Anything^0 is 1, including 0^0.
2417
* Assume mul_zero_wins drivers already have
2418
* this behaviour. Emulate for the others. */
2419
if (tx->mul_zero_wins) {
2420
ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2422
struct ureg_dst tmp = tx_scratch_scalar(tx);
2423
ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
2424
ureg_CMP(tx->ureg, dst,
2425
ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
2426
tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
2575
c8m = nine_float_constant_src(tx, 8+m);
2576
c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2621
c8m = nine_special_constant_src(tx, m);
2622
c16m2 = nine_special_constant_src(tx, 8+m/2);
2578
2624
m00 = NINE_APPLY_SWIZZLE(c8m, X);
2579
2625
m01 = NINE_APPLY_SWIZZLE(c8m, Y);
3582
3628
info->input_map[i] = NINE_DECLUSAGE_NONE;
3583
3629
info->num_inputs = 0;
3585
info->position_t = FALSE;
3586
info->point_size = FALSE;
3631
info->position_t = false;
3632
info->point_size = false;
3588
3634
memset(tx->slots_used, 0, sizeof(tx->slots_used));
3589
3635
memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3640
3686
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3641
3687
tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3642
3688
tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3689
tx->no_vs_window_space = !GET_CAP(VS_WINDOW_SPACE_POSITION);
3690
tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3692
if (info->emulate_features) {
3693
tx->shift_wpos = true;
3694
tx->no_vs_window_space = true;
3695
tx->mul_zero_wins = false;
3645
3699
tx->num_constf_allowed = NINE_MAX_CONST_F;
3670
3724
* (Some drivers like nv50 are buggy and rely on that.)
3673
tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3727
tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3675
3729
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3676
3730
if (!tx->shift_wpos)
3677
3731
ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3680
tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3681
3734
if (tx->mul_zero_wins)
3682
3735
ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
3745
shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3798
shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
3747
3800
struct ureg_program *ureg = tx->ureg;
3748
struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3749
3801
struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3750
3802
struct ureg_src fog_vs, fog_color;
3751
3803
struct ureg_dst fog_factor, depth;
3753
3805
if (!tx->info->fog_enable) {
3754
ureg_MOV(ureg, oCol0, src_col);
3806
ureg_MOV(ureg, dst_col, src_col);
3758
3810
if (tx->info->fog_mode != D3DFOG_NONE) {
3759
3811
depth = tx_scratch_scalar(tx);
3760
/* Depth used for fog is perspective interpolated */
3761
ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3762
ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3813
ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3814
else /* wfog: use w. position's w contains 1/w */
3815
ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3765
fog_color = nine_float_constant_src(tx, 32);
3766
fog_params = nine_float_constant_src(tx, 33);
3818
fog_color = nine_special_constant_src(tx, 12);
3819
fog_params = nine_special_constant_src(tx, 13);
3767
3820
fog_factor = tx_scratch_scalar(tx);
3769
3822
if (tx->info->fog_mode == D3DFOG_LINEAR) {
3789
3842
ureg_MOV(ureg, fog_factor, fog_vs);
3792
ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3845
ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
3793
3846
tx_src_scalar(fog_factor), src_col, fog_color);
3794
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3847
ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
3851
shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
3853
struct ureg_program *ureg = tx->ureg;
3855
struct ureg_src src[2];
3856
struct ureg_dst tmp = tx_scratch(tx);
3857
if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
3859
if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
3863
cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
3864
src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
3865
src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
3866
ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
3867
ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
3797
3870
static void parse_shader(struct shader_translator *tx)
3805
3878
if (tx->failure)
3808
if (IS_PS && tx->version.major < 3) {
3809
if (tx->version.major < 2) {
3810
assert(tx->num_temp); /* there must be color output */
3811
info->rt_mask |= 0x1;
3812
shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3882
struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
3883
struct ureg_dst tmp_oCol0;
3884
if (tx->version.major < 3) {
3885
tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
3886
if (tx->version.major < 2) {
3887
assert(tx->num_temp); /* there must be color output */
3888
info->rt_mask |= 0x1;
3889
shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
3891
shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
3814
shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3894
assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
3895
tmp_oCol0 = tx->regs.oCol[0];
3897
shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
3898
ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
3818
3901
if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3820
3903
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3823
if (info->position_t)
3824
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3906
if (info->position_t) {
3907
if (tx->no_vs_window_space) {
3908
ERR("POSITIONT is not yet implemented for your device.\n");
3910
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
3826
3914
if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3827
3915
struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3828
ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3829
ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3830
info->point_size = TRUE;
3916
ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3917
ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3918
info->point_size = true;
3919
} else if (IS_VS && tx->always_output_pointsize) {
3920
struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3921
ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
3922
info->point_size = true;
3925
if (IS_VS && tx->info->clip_plane_emulation > 0) {
3926
struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
3927
int num_clipdist = ffs(tx->info->clip_plane_emulation);
3929
/* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
3930
* Note in d3d9 it's not possible to output clipvert, so we don't need to check
3931
* for its existence */
3932
clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
3933
if (num_clipdist >= 5)
3934
clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
3935
ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
3936
for (i = 0; i < num_clipdist; i++) {
3937
assert(!ureg_dst_is_undef(clipdist[i>>2]));
3938
if (!(tx->info->clip_plane_emulation & (1 << i)))
3939
ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
3941
ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
3942
ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
3945
ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
3833
3948
if (info->process_vertices)
3836
3951
ureg_END(tx->ureg);
3839
#define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0)
3840
#define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1)
3841
3954
#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3842
3955
#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3843
3956
#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3844
3957
#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3846
3959
static const struct debug_named_value nine_shader_debug_options[] = {
3847
{ "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3848
{ "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3849
3960
{ "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3850
3961
{ "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3851
3962
{ "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3853
3964
DEBUG_NAMED_VALUE_END /* must be last */
3856
static inline boolean
3857
3968
nine_shader_get_debug_flag(uint64_t flag)
3859
3970
static uint64_t flags = 0;
3860
static boolean first_run = TRUE;
3971
static bool first_run = true;
3862
3973
if (unlikely(first_run)) {
3864
3975
flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3866
3977
// Check old TGSI dump envvar too
3867
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3978
if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
3868
3979
flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3904
4015
assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3905
4016
enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3907
int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3908
bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3909
bool use_nir = prefer_nir ||
3910
((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3911
((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
4018
bool use_nir = true;
3913
4020
/* Allow user to override preferred IR, this is very useful for debugging */
3914
4021
if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3916
4023
if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3917
4024
use_nir = false;
3919
DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
4026
DUMP("shader type: %s, selected IR: %s\n",
3920
4027
shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3921
prefer_nir ? "NIR" : "TGSI",
3922
4028
use_nir ? "NIR" : "TGSI");