422
450
Smart enough to realize that it doesnt need it? */
423
451
int u_temp_i = R200_VSF_MAX_TEMPS - 1;
424
452
struct prog_src_register src[3];
426
/* if (getenv("R300_VP_SAFETY")) {
427
WARN_ONCE("R300_VP_SAFETY enabled.\n");
429
vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
430
memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
432
for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
433
vpi[i].Opcode = OPCODE_MOV;
434
vpi[i].StringPos = 0;
437
vpi[i].DstReg.File = PROGRAM_TEMPORARY;
438
vpi[i].DstReg.Index = i;
439
vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
440
vpi[i].DstReg.CondMask = COND_TR;
442
vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
443
vpi[i].SrcReg[0].Index = 0;
444
vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
447
memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
449
free(mesa_vp->Base.Instructions);
451
mesa_vp->Base.Instructions = vpi;
453
mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
454
vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
456
assert(vpi->Opcode == OPCODE_END);
453
struct prog_dst_register dst;
458
455
/* FIXME: is changing the prog safe to do here? */
459
if (mesa_vp->IsPositionInvariant) {
456
if (mesa_vp->IsPositionInvariant &&
457
/* make sure we only do this once */
458
!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
459
_mesa_insert_mvp_code(ctx, mesa_vp);
462
/* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
463
base e isn't directly available neither. */
464
if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) {
460
465
struct gl_program_parameter_list *paramList;
461
GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
464
tokens[5] = STATE_MATRIX;
466
tokens[5] = STATE_MATRIX_TRANSPOSE;
466
GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 };
468
467
paramList = mesa_vp->Base.Parameters;
470
vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
471
memset(vpi, 0, 4 * sizeof(struct prog_instruction));
473
for (i=0; i < 4; i++) {
475
tokens[3] = tokens[4] = i;
476
idx = _mesa_add_state_reference(paramList, tokens);
478
vpi[i].Opcode = OPCODE_DP4;
479
vpi[i].StringPos = 0;
482
vpi[i].DstReg.File = PROGRAM_OUTPUT;
483
vpi[i].DstReg.Index = VERT_RESULT_HPOS;
484
vpi[i].DstReg.WriteMask = 1 << i;
485
vpi[i].DstReg.CondMask = COND_TR;
487
vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
488
vpi[i].SrcReg[0].Index = idx;
489
vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
491
vpi[i].SrcReg[1].File = PROGRAM_INPUT;
492
vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
493
vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
496
vpi[i].Opcode = OPCODE_MUL;
498
vpi[i].Opcode = OPCODE_MAD;
500
vpi[i].StringPos = 0;
504
vpi[i].DstReg.File = PROGRAM_OUTPUT;
506
vpi[i].DstReg.File = PROGRAM_TEMPORARY;
507
vpi[i].DstReg.Index = 0;
508
vpi[i].DstReg.WriteMask = 0xf;
509
vpi[i].DstReg.CondMask = COND_TR;
511
vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
512
vpi[i].SrcReg[0].Index = idx;
513
vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
515
vpi[i].SrcReg[1].File = PROGRAM_INPUT;
516
vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
517
vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
520
vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
521
vpi[i].SrcReg[2].Index = 0;
522
vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
527
memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
529
free(mesa_vp->Base.Instructions);
531
mesa_vp->Base.Instructions = vpi;
533
mesa_vp->Base.NumInstructions += 4;
534
vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
536
assert(vpi->Opcode == OPCODE_END);
538
mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
539
mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
541
//fprintf(stderr, "IsPositionInvariant is set!\n");
542
//_mesa_print_program(&mesa_vp->Base);
468
vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
546
472
mesa_vp->Base.NumNativeInstructions = 0;
547
mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
473
if (mesa_vp->Base.Parameters)
474
mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
476
mesa_vp->Base.NumNativeParameters = 0;
549
for(i=0; i < VERT_ATTRIB_MAX; i++)
478
for(i = 0; i < VERT_ATTRIB_MAX; i++)
550
479
vp->inputs[i] = -1;
480
free_inputs = 0x2ffd;
551
482
/* fglrx uses fixed inputs as follows for conventional attribs.
552
generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
553
There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
554
attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
555
vertex normal/weight)
483
generic attribs use non-fixed assignment, fglrx will always use the
484
lowest attrib values available. We'll just do the same.
485
There are 12 generic attribs possible, corresponding to attrib 0, 2-11
486
and 13 in a hw vertex prog.
487
attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
488
(correspond to vertex normal/weight - maybe weight actually could be made vec4).
489
Additionally, not more than 12 arrays in total are possible I think.
556
490
attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
557
491
attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
558
492
attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
559
493
attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
560
generic attribs would require some more work (dma regions, renaming). */
562
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
563
vp->inputs[VERT_ATTRIB_POS] = 0;
564
vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
565
vp->inputs[VERT_ATTRIB_NORMAL] = 1;
566
vp->inputs[VERT_ATTRIB_COLOR0] = 2;
567
vp->inputs[VERT_ATTRIB_COLOR1] = 3;
568
vp->inputs[VERT_ATTRIB_FOG] = 15;
569
vp->inputs[VERT_ATTRIB_TEX0] = 6;
570
vp->inputs[VERT_ATTRIB_TEX1] = 7;
571
vp->inputs[VERT_ATTRIB_TEX2] = 8;
572
vp->inputs[VERT_ATTRIB_TEX3] = 9;
573
vp->inputs[VERT_ATTRIB_TEX4] = 10;
574
vp->inputs[VERT_ATTRIB_TEX5] = 11;
575
496
/* attr 4,5 and 13 are only used with generic attribs.
576
497
Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
577
498
not possibe to use with vertex progs as it is lacking in vert prog specification) */
579
assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
581
vp->translated = GL_TRUE;
499
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
500
if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
501
vp->inputs[VERT_ATTRIB_POS] = 0;
502
free_inputs &= ~(1 << 0);
505
if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
506
/* we don't actually handle that later. Then again, we don't have to... */
507
vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
510
if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
511
vp->inputs[VERT_ATTRIB_NORMAL] = 1;
514
if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
515
vp->inputs[VERT_ATTRIB_COLOR0] = 2;
516
free_inputs &= ~(1 << 2);
519
if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
520
vp->inputs[VERT_ATTRIB_COLOR1] = 3;
521
free_inputs &= ~(1 << 3);
524
if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
525
vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
527
for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
528
if (mesa_vp->Base.InputsRead & (1 << i)) {
529
vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
530
free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
534
free_inputs_conv = free_inputs;
535
/* using VERT_ATTRIB_TEX6/7 would be illegal */
536
/* completely ignore aliasing? */
537
for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
539
/* completely ignore aliasing? */
540
if (mesa_vp->Base.InputsRead & (1 << i)) {
542
if (array_count > 12) {
543
if (R200_DEBUG & DEBUG_FALLBACKS) {
544
fprintf(stderr, "more than 12 attribs used in vert prog\n");
548
for (j = 0; j < 14; j++) {
549
/* will always find one due to limited array_count */
550
if (free_inputs & (1 << j)) {
551
free_inputs &= ~(1 << j);
553
vp->rev_inputs[j] = i;
559
vp->gen_inputs_mapped = free_inputs ^ free_inputs_conv;
561
if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
562
if (R200_DEBUG & DEBUG_FALLBACKS) {
563
fprintf(stderr, "can't handle vert prog without position output\n");
567
if (free_inputs & 1) {
568
if (R200_DEBUG & DEBUG_FALLBACKS) {
569
fprintf(stderr, "can't handle vert prog without position input\n");
583
574
o_inst = vp->instr;
584
for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
585
if (u_temp_i < mesa_vp->Base.NumTemporaries) {
586
if (R200_DEBUG & DEBUG_FALLBACKS) {
587
fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
591
u_temp_i = R200_VSF_MAX_TEMPS - 1;
592
if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
593
mesa_vp->Base.NumNativeInstructions = 129;
594
if (R200_DEBUG & DEBUG_FALLBACKS) {
595
fprintf(stderr, "more than 128 native instructions\n");
575
for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
600
576
operands = op_operands(vpi->Opcode);
601
577
are_srcs_scalar = operands & SCALAR_FLAG;
602
578
operands &= OP_MASK;
604
for(i = 0; i < operands; i++)
580
for(i = 0; i < operands; i++) {
605
581
src[i] = vpi->SrcReg[i];
582
/* hack up default attrib values as per spec as swizzling.
583
normal, fog, secondary color. Crazy?
584
May need more if we don't submit vec4 elements? */
585
if (src[i].File == PROGRAM_INPUT) {
586
if (src[i].Index == VERT_ATTRIB_NORMAL) {
588
for (j = 0; j < 4; j++) {
589
if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
590
src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
591
src[i].Swizzle |= SWIZZLE_ONE << (j*3);
595
else if (src[i].Index == VERT_ATTRIB_COLOR1) {
597
for (j = 0; j < 4; j++) {
598
if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
599
src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
600
src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
604
else if (src[i].Index == VERT_ATTRIB_FOG) {
606
for (j = 0; j < 4; j++) {
607
if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
608
src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
609
src[i].Swizzle |= SWIZZLE_ONE << (j*3);
611
else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
612
GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
613
src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
614
src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
607
621
if(operands == 3){
608
622
if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
986
if (vp->fogmode == GL_EXP) {
987
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
988
(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
990
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
991
o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
992
o_inst->src2 = UNUSED_SRC_1;
994
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
995
R200_VSF_OUT_CLASS_RESULT_FOGC,
997
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
998
o_inst->src1 = UNUSED_SRC_0;
999
o_inst->src2 = UNUSED_SRC_1;
1001
else if (vp->fogmode == GL_EXP2) {
1002
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1003
(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1005
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1006
o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1007
o_inst->src2 = UNUSED_SRC_1;
1009
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1010
(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1012
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1013
o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1014
o_inst->src2 = UNUSED_SRC_1;
1016
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1017
R200_VSF_OUT_CLASS_RESULT_FOGC,
1019
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1020
o_inst->src1 = UNUSED_SRC_0;
1021
o_inst->src2 = UNUSED_SRC_1;
1023
else { /* fogmode == GL_LINEAR */
1024
/* could do that with single op (dot) if using params like
1025
with fixed function pipeline fog */
1026
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1027
(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1029
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1030
o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1031
o_inst->src2 = UNUSED_SRC_1;
1033
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1034
R200_VSF_OUT_CLASS_RESULT_FOGC,
1036
o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1037
o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1038
o_inst->src2 = UNUSED_SRC_1;
1044
if (mesa_vp->Base.NumNativeTemporaries <
1045
(mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
1046
mesa_vp->Base.NumNativeTemporaries =
1047
mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
1049
if (u_temp_i < mesa_vp->Base.NumTemporaries) {
1050
if (R200_DEBUG & DEBUG_FALLBACKS) {
1051
fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
1055
u_temp_i = R200_VSF_MAX_TEMPS - 1;
1056
if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1057
mesa_vp->Base.NumNativeInstructions = 129;
1058
if (R200_DEBUG & DEBUG_FALLBACKS) {
1059
fprintf(stderr, "more than 128 native instructions\n");
958
1063
if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
959
1064
vp->pos_end = (o_inst - vp->instr);
963
/* need to test again since some instructions require more than one (up to 3) native inst */
964
if(o_inst - vp->instr > R200_VSF_MAX_INST) {
965
mesa_vp->Base.NumNativeInstructions = 129;
966
if (R200_DEBUG & DEBUG_FALLBACKS) {
967
fprintf(stderr, "more than 128 native instructions\n");
971
1068
vp->native = GL_TRUE;
972
1069
mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);