2
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
6
* Permission is hereby granted, free of charge, to any person obtaining
7
* a copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sublicense, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial
16
* portions of the Software.
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
#include "radeon_compiler_util.h"
34
#include "radeon_compiler.h"
35
#include "radeon_dataflow.h"
38
unsigned int rc_swizzle_to_writemask(unsigned int swz)
40
unsigned int mask = 0;
43
for(i = 0; i < 4; i++) {
44
mask |= 1 << GET_SWZ(swz, i);
51
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
55
return GET_SWZ(swz, idx);
59
* The purpose of this function is to standardize the number channels used by
60
* swizzles. All swizzles regardless of what instruction they are a part of
61
* should have 4 channels initialized with values.
62
* @param channels The number of channels in initial_value that have a
64
* @return An initialized swizzle that has all of the unused channels set to
67
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
70
for (i = channels; i < 4; i++) {
71
SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
76
unsigned int combine_swizzles4(unsigned int src,
77
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
81
ret |= get_swz(src, swz_x);
82
ret |= get_swz(src, swz_y) << 3;
83
ret |= get_swz(src, swz_z) << 6;
84
ret |= get_swz(src, swz_w) << 9;
89
unsigned int combine_swizzles(unsigned int src, unsigned int swz)
93
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
94
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
95
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
96
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
102
* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
104
rc_swizzle rc_mask_to_swizzle(unsigned int mask)
107
case RC_MASK_X: return RC_SWIZZLE_X;
108
case RC_MASK_Y: return RC_SWIZZLE_Y;
109
case RC_MASK_Z: return RC_SWIZZLE_Z;
110
case RC_MASK_W: return RC_SWIZZLE_W;
112
return RC_SWIZZLE_UNUSED;
115
/* Reorder mask bits according to swizzle. */
116
unsigned swizzle_mask(unsigned swizzle, unsigned mask)
119
for (unsigned chan = 0; chan < 4; ++chan) {
120
unsigned swz = GET_SWZ(swizzle, chan);
122
ret |= GET_BIT(mask, swz) << chan;
127
static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
129
if (info->HasTexture) {
132
switch (info->Opcode) {
145
* @return A swizzle the results from converting old_swizzle using
148
unsigned int rc_adjust_channels(
149
unsigned int old_swizzle,
150
unsigned int conversion_swizzle)
153
unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
154
for (i = 0; i < 4; i++) {
155
unsigned int new_chan = get_swz(conversion_swizzle, i);
156
if (new_chan == RC_SWIZZLE_UNUSED) {
159
SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
164
static unsigned int rewrite_writemask(
165
unsigned int old_mask,
166
unsigned int conversion_swizzle)
168
unsigned int new_mask = 0;
171
for (i = 0; i < 4; i++) {
172
if (!GET_BIT(old_mask, i)
173
|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
176
new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
183
* This function rewrites the writemask of sub and adjusts the swizzles
184
* of all its source registers based on the conversion_swizzle.
185
* conversion_swizzle represents a mapping of the old writemask to the
186
* new writemask. For a detailed description of how conversion swizzles
187
* work see rc_rewrite_swizzle().
189
void rc_pair_rewrite_writemask(
190
struct rc_pair_sub_instruction * sub,
191
unsigned int conversion_swizzle)
193
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
196
sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
198
if (!srcs_need_rewrite(info)) {
202
for (i = 0; i < info->NumSrcRegs; i++) {
203
sub->Arg[i].Swizzle =
204
rc_adjust_channels(sub->Arg[i].Swizzle,
209
static void normal_rewrite_writemask_cb(
211
struct rc_instruction * inst,
212
struct rc_src_register * src)
214
unsigned int * conversion_swizzle = (unsigned int *)userdata;
215
src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle);
219
* This function is the same as rc_pair_rewrite_writemask() except it
220
* operates on normal instructions.
222
void rc_normal_rewrite_writemask(
223
struct rc_instruction * inst,
224
unsigned int conversion_swizzle)
226
struct rc_sub_instruction * sub = &inst->U.I;
227
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
228
sub->DstReg.WriteMask =
229
rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
231
if (info->HasTexture) {
233
assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
234
for (i = 0; i < 4; i++) {
235
unsigned int swz = GET_SWZ(conversion_swizzle, i);
238
SET_SWZ(sub->TexSwizzle, swz, i);
242
if (!srcs_need_rewrite(info)) {
246
rc_for_all_reads_src(inst, normal_rewrite_writemask_cb,
247
&conversion_swizzle);
251
* This function replaces each value 'swz' in swizzle with the value of
252
* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
253
* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
254
* to change all the Y's in swizzle to X, then conversion_swizzle should be
255
* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
256
* conversion swizzle should be YX__ (0xfc1).
257
* @param swizzle The swizzle to change
258
* @param conversion_swizzle Describes the conversion to perform on the swizzle
259
* @return A converted swizzle
261
unsigned int rc_rewrite_swizzle(
262
unsigned int swizzle,
263
unsigned int conversion_swizzle)
266
unsigned int out_swizzle = swizzle;
268
for (chan = 0; chan < 4; chan++) {
269
unsigned int swz = GET_SWZ(swizzle, chan);
270
unsigned int new_swz;
272
SET_SWZ(out_swizzle, chan, swz);
274
new_swz = GET_SWZ(conversion_swizzle, swz);
275
if (new_swz != RC_SWIZZLE_UNUSED) {
276
SET_SWZ(out_swizzle, chan, new_swz);
278
SET_SWZ(out_swizzle, chan, swz);
286
* Left multiplication of a register with a swizzle
288
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
290
struct rc_src_register tmp = srcreg;
294
for(i = 0; i < 4; ++i) {
295
rc_swizzle swz = GET_SWZ(swizzle, i);
297
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
298
tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
300
tmp.Swizzle |= swz << (i*3);
306
void reset_srcreg(struct rc_src_register* reg)
308
memset(reg, 0, sizeof(struct rc_src_register));
309
reg->Swizzle = RC_SWIZZLE_XYZW;
312
unsigned int rc_src_reads_dst_mask(
313
rc_register_file src_file,
314
unsigned int src_idx,
315
unsigned int src_swz,
316
rc_register_file dst_file,
317
unsigned int dst_idx,
318
unsigned int dst_mask)
320
if (src_file != dst_file || src_idx != dst_idx) {
323
return dst_mask & rc_swizzle_to_writemask(src_swz);
327
* @return A bit mask specifying whether this swizzle will select from an RGB
328
* source, an Alpha source, or both.
330
unsigned int rc_source_type_swz(unsigned int swizzle)
333
unsigned int swz = RC_SWIZZLE_UNUSED;
334
unsigned int ret = RC_SOURCE_NONE;
336
for(chan = 0; chan < 4; chan++) {
337
swz = GET_SWZ(swizzle, chan);
338
if (swz == RC_SWIZZLE_W) {
339
ret |= RC_SOURCE_ALPHA;
340
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
341
|| swz == RC_SWIZZLE_Z) {
342
ret |= RC_SOURCE_RGB;
348
unsigned int rc_source_type_mask(unsigned int mask)
350
unsigned int ret = RC_SOURCE_NONE;
352
if (mask & RC_MASK_XYZ)
353
ret |= RC_SOURCE_RGB;
355
if (mask & RC_MASK_W)
356
ret |= RC_SOURCE_ALPHA;
362
rc_register_file File;
364
unsigned int SrcType;
367
struct can_use_presub_data {
368
struct src_select Selects[5];
369
unsigned int SelectCount;
370
const struct rc_src_register * ReplaceReg;
371
unsigned int ReplaceRemoved;
374
static void can_use_presub_data_add_select(
375
struct can_use_presub_data * data,
376
rc_register_file file,
378
unsigned int src_type)
380
struct src_select * select;
382
select = &data->Selects[data->SelectCount++];
384
select->Index = index;
385
select->SrcType = src_type;
389
* This callback function counts the number of sources in inst that are
390
* different from the sources in can_use_presub_data->RemoveSrcs.
392
static void can_use_presub_read_cb(
394
struct rc_instruction * inst,
395
struct rc_src_register * src)
397
struct can_use_presub_data * d = userdata;
399
if (!d->ReplaceRemoved && src == d->ReplaceReg) {
400
d->ReplaceRemoved = 1;
404
if (src->File == RC_FILE_NONE)
407
can_use_presub_data_add_select(d, src->File, src->Index,
408
rc_source_type_swz(src->Swizzle));
411
unsigned int rc_inst_can_use_presub(
412
struct rc_instruction * inst,
413
rc_presubtract_op presub_op,
414
unsigned int presub_writemask,
415
const struct rc_src_register * replace_reg,
416
const struct rc_src_register * presub_src0,
417
const struct rc_src_register * presub_src1)
419
struct can_use_presub_data d;
420
unsigned int num_presub_srcs;
422
const struct rc_opcode_info * info =
423
rc_get_opcode_info(inst->U.I.Opcode);
424
int rgb_count = 0, alpha_count = 0;
425
unsigned int src_type0, src_type1;
427
if (presub_op == RC_PRESUB_NONE) {
431
if (info->HasTexture) {
435
/* We can't use more than one presubtract value in an
436
* instruction, unless the two prsubtract operations
437
* are the same and read from the same registers.
438
* XXX For now we will limit instructions to only one presubtract
440
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
444
memset(&d, 0, sizeof(d));
445
d.ReplaceReg = replace_reg;
447
rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
449
num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
451
src_type0 = rc_source_type_swz(presub_src0->Swizzle);
452
can_use_presub_data_add_select(&d,
457
if (num_presub_srcs > 1) {
458
src_type1 = rc_source_type_swz(presub_src1->Swizzle);
459
can_use_presub_data_add_select(&d,
464
/* Even if both of the presub sources read from the same
465
* register, we still need to use 2 different source selects
466
* for them, so we need to increment the count to compensate.
468
if (presub_src0->File == presub_src1->File
469
&& presub_src0->Index == presub_src1->Index) {
470
if (src_type0 & src_type1 & RC_SOURCE_RGB) {
473
if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
479
/* Count the number of source selects for Alpha and RGB. If we
480
* encounter two of the same source selects then we can ignore the
482
for (i = 0; i < d.SelectCount; i++) {
484
unsigned int src_type = d.Selects[i].SrcType;
485
for (j = i + 1; j < d.SelectCount; j++) {
486
if (d.Selects[i].File == d.Selects[j].File
487
&& d.Selects[i].Index == d.Selects[j].Index) {
488
src_type &= ~d.Selects[j].SrcType;
491
if (src_type & RC_SOURCE_RGB) {
495
if (src_type & RC_SOURCE_ALPHA) {
500
if (rgb_count > 3 || alpha_count > 3) {
509
unsigned int HasFileType;
510
rc_register_file File;
513
static void max_callback(
515
struct rc_instruction * inst,
516
rc_register_file file,
520
struct max_data * d = (struct max_data*)userdata;
521
if (file == d->File && (!d->HasFileType || index > d->Max)) {
528
* @return The maximum index of the specified register file used by the
531
int rc_get_max_index(
532
struct radeon_compiler * c,
533
rc_register_file file)
535
struct max_data data;
536
struct rc_instruction * inst;
538
data.HasFileType = 0;
540
for (inst = c->Program.Instructions.Next;
541
inst != &c->Program.Instructions;
543
rc_for_all_reads_mask(inst, max_callback, &data);
544
rc_for_all_writes_mask(inst, max_callback, &data);
546
if (!data.HasFileType) {
553
static unsigned int get_source_readmask(
554
struct rc_pair_sub_instruction * sub,
556
unsigned int src_type)
559
unsigned int readmask = 0;
560
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
562
for (i = 0; i < info->NumSrcRegs; i++) {
563
if (sub->Arg[i].Source != source
564
|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
567
readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
573
* This function attempts to remove a source from a pair instructions.
575
* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
576
* @param source The index of the source to remove
577
* @param new_readmask A mask representing the components that are read by
578
* the source that is intended to replace the one you are removing. If you
579
* want to remove a source only and not replace it, this parameter should be
581
* @return 1 if the source was successfully removed, 0 if it was not
583
unsigned int rc_pair_remove_src(
584
struct rc_instruction * inst,
585
unsigned int src_type,
587
unsigned int new_readmask)
589
unsigned int readmask = 0;
591
readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
592
readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
594
if ((new_readmask & readmask) != readmask)
597
if (src_type & RC_SOURCE_RGB) {
598
memset(&inst->U.P.RGB.Src[source], 0,
599
sizeof(struct rc_pair_instruction_source));
602
if (src_type & RC_SOURCE_ALPHA) {
603
memset(&inst->U.P.Alpha.Src[source], 0,
604
sizeof(struct rc_pair_instruction_source));
611
* @return RC_OPCODE_NOOP if inst is not a flow control instruction.
612
* @return The opcode of inst if it is a flow control instruction.
614
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
616
const struct rc_opcode_info * info;
617
if (inst->Type == RC_INSTRUCTION_NORMAL) {
618
info = rc_get_opcode_info(inst->U.I.Opcode);
620
info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
621
/*A flow control instruction shouldn't have an alpha
623
assert(!info->IsFlowControl ||
624
inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
627
if (info->IsFlowControl)
630
return RC_OPCODE_NOP;
635
* @return The BGNLOOP instruction that starts the loop ended by endloop.
637
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
639
unsigned int endloop_count = 0;
640
struct rc_instruction * inst;
641
for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
642
rc_opcode op = rc_get_flow_control_inst(inst);
643
if (op == RC_OPCODE_ENDLOOP) {
645
} else if (op == RC_OPCODE_BGNLOOP) {
646
if (endloop_count == 0) {
657
* @return The ENDLOOP instruction that ends the loop started by bgnloop.
659
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
661
unsigned int bgnloop_count = 0;
662
struct rc_instruction * inst;
663
for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
664
rc_opcode op = rc_get_flow_control_inst(inst);
665
if (op == RC_OPCODE_BGNLOOP) {
667
} else if (op == RC_OPCODE_ENDLOOP) {
668
if (bgnloop_count == 0) {
679
* @return A conversion swizzle for converting from old_mask->new_mask
681
unsigned int rc_make_conversion_swizzle(
682
unsigned int old_mask,
683
unsigned int new_mask)
685
unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
686
unsigned int old_idx;
687
unsigned int new_idx = 0;
688
for (old_idx = 0; old_idx < 4; old_idx++) {
689
if (!GET_BIT(old_mask, old_idx))
691
for ( ; new_idx < 4; new_idx++) {
692
if (GET_BIT(new_mask, new_idx)) {
693
SET_SWZ(conversion_swizzle, old_idx, new_idx);
699
return conversion_swizzle;
703
* @return 1 if the register contains an immediate value, 0 otherwise.
705
unsigned int rc_src_reg_is_immediate(
706
struct radeon_compiler * c,
710
return file == RC_FILE_CONSTANT &&
711
c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE;
715
* @return The immediate value in the specified register.
717
float rc_get_constant_value(
718
struct radeon_compiler * c,
720
unsigned int swizzle,
725
int swz = GET_SWZ(swizzle, chan);
726
if(swz >= 4 || index >= c->Program.Constants.Count ){
727
rc_error(c, "get_constant_value: Can't find a value.\n");
730
if(GET_BIT(negate, chan)){
734
c->Program.Constants.Constants[index].u.Immediate[swz];
738
* This function returns the component value (RC_SWIZZLE_*) of the first used
739
* channel in the swizzle. This is only useful for scalar instructions that are
740
* known to use only one channel of the swizzle.
742
unsigned int rc_get_scalar_src_swz(unsigned int swizzle)
744
unsigned int swz, chan;
745
for (chan = 0; chan < 4; chan++) {
746
swz = GET_SWZ(swizzle, chan);
747
if (swz != RC_SWIZZLE_UNUSED) {
751
assert(swz != RC_SWIZZLE_UNUSED);