2
* Copyright © 2010 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
25
* \file opt_copy_propagation_elements.cpp
27
* Replaces usage of recently-copied components of variables with the
28
* previous copy of the variable.
30
* This should reduce the number of MOV instructions in the generated
31
* programs and help triggering other optimizations that live in GLSL
36
#include "ir_rvalue_visitor.h"
37
#include "ir_basic_block.h"
38
#include "ir_optimization.h"
39
#include "compiler/glsl_types.h"
40
#include "util/hash_table.h"
43
static bool debug = false;
50
DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
52
/* If set, rhs_full indicates that this ACP entry represents a
53
* whole-variable copy. The rhs_element[] array will still be filled,
54
* to allow the swizzling from its components in case the variable
55
* was a vector (and to simplify some of the erase() and write_vector()
59
ir_variable *rhs_full;
60
ir_variable *rhs_element[4];
61
unsigned rhs_channel[4];
63
/* Set of variables that use the variable associated with this acp_entry as
64
* RHS. This has the "reverse references" of rhs_full/rhs_element. It is
65
* used to speed up invalidating those references when the acp_entry
71
class copy_propagation_state {
73
DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state);
76
copy_propagation_state* create(void *mem_ctx)
78
return new (mem_ctx) copy_propagation_state(NULL);
81
copy_propagation_state* clone()
83
return new (ralloc_parent(this)) copy_propagation_state(this);
88
/* Individual elements were allocated from a linear allocator, so will
89
* be destroyed when the state is destroyed.
91
_mesa_hash_table_clear(acp, NULL);
95
void erase(ir_variable *var, unsigned write_mask)
97
acp_entry *entry = pull_acp(var);
98
entry->rhs_full = NULL;
100
for (int i = 0; i < 4; i++) {
101
if (!entry->rhs_element[i])
103
if ((write_mask & (1 << i)) == 0)
106
ir_variable *to_remove = entry->rhs_element[i];
107
entry->rhs_element[i] = NULL;
108
remove_unused_var_from_dsts(entry, var, to_remove);
111
/* TODO: Check write mask, and possibly not clear everything. */
113
/* For any usage of our variable on the RHS, clear it out. */
114
set_foreach(entry->dsts, set_entry) {
115
ir_variable *dst_var = (ir_variable *)set_entry->key;
116
acp_entry *dst_entry = pull_acp(dst_var);
117
for (int i = 0; i < 4; i++) {
118
if (dst_entry->rhs_element[i] == var)
119
dst_entry->rhs_element[i] = NULL;
121
if (dst_entry->rhs_full == var)
122
dst_entry->rhs_full = NULL;
123
_mesa_set_remove(entry->dsts, set_entry);
127
acp_entry *read(ir_variable *var)
129
for (copy_propagation_state *s = this; s != NULL; s = s->fallback) {
130
hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var);
132
return (acp_entry *) ht_entry->data;
137
void write_elements(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4])
139
acp_entry *lhs_entry = pull_acp(lhs);
140
lhs_entry->rhs_full = NULL;
142
for (int i = 0; i < 4; i++) {
143
if ((write_mask & (1 << i)) == 0)
145
ir_variable *to_remove = lhs_entry->rhs_element[i];
146
lhs_entry->rhs_element[i] = rhs;
147
lhs_entry->rhs_channel[i] = swizzle[i];
149
remove_unused_var_from_dsts(lhs_entry, lhs, to_remove);
152
acp_entry *rhs_entry = pull_acp(rhs);
153
_mesa_set_add(rhs_entry->dsts, lhs);
156
void write_full(ir_variable *lhs, ir_variable *rhs)
158
acp_entry *lhs_entry = pull_acp(lhs);
159
if (lhs_entry->rhs_full == rhs)
162
if (lhs_entry->rhs_full) {
163
remove_from_dsts(lhs_entry->rhs_full, lhs);
164
} else if (lhs->type->is_vector()) {
165
for (int i = 0; i < 4; i++) {
166
if (lhs_entry->rhs_element[i])
167
remove_from_dsts(lhs_entry->rhs_element[i], lhs);
171
lhs_entry->rhs_full = rhs;
172
acp_entry *rhs_entry = pull_acp(rhs);
173
_mesa_set_add(rhs_entry->dsts, lhs);
175
if (lhs->type->is_vector()) {
176
for (int i = 0; i < 4; i++) {
177
lhs_entry->rhs_element[i] = rhs;
178
lhs_entry->rhs_channel[i] = i;
183
void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var)
188
/* If lhs still uses var, don't remove anything. */
189
for (int j = 0; j < 4; j++) {
190
if (lhs_entry->rhs_element[j] == var)
194
acp_entry *element = pull_acp(var);
196
_mesa_set_remove_key(element->dsts, lhs);
200
explicit copy_propagation_state(copy_propagation_state *fallback)
202
this->fallback = fallback;
203
/* Use 'this' as context for the table, no explicit destruction
206
acp = _mesa_pointer_hash_table_create(this);
207
lin_ctx = linear_alloc_parent(this, 0);
210
acp_entry *pull_acp(ir_variable *var)
212
hash_entry *ht_entry = _mesa_hash_table_search(acp, var);
214
return (acp_entry *) ht_entry->data;
216
/* If not found, create one and copy data from fallback if available. */
217
acp_entry *entry = new(lin_ctx) acp_entry();
218
_mesa_hash_table_insert(acp, var, entry);
221
for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) {
222
hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var);
223
if (fallback_ht_entry) {
224
acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data;
225
*entry = *fallback_entry;
226
entry->dsts = _mesa_set_clone(fallback_entry->dsts, this);
233
entry->dsts = _mesa_pointer_set_create(this);
240
remove_from_dsts(ir_variable *var, ir_variable *to_remove)
242
acp_entry *entry = pull_acp(var);
244
_mesa_set_remove_key(entry->dsts, to_remove);
247
/** Available Copy to Propagate table, from variable to the entry
248
* containing the current sources that can be used. */
251
/** When a state is cloned, entries are copied on demand from fallback. */
252
copy_propagation_state *fallback;
257
class kill_entry : public exec_node
260
/* override operator new from exec_node */
261
DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
263
kill_entry(ir_variable *var, int write_mask)
266
this->write_mask = write_mask;
270
unsigned int write_mask;
273
class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
275
ir_copy_propagation_elements_visitor()
277
this->progress = false;
278
this->killed_all = false;
279
this->mem_ctx = ralloc_context(NULL);
280
this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
281
this->shader_mem_ctx = NULL;
282
this->kills = new(mem_ctx) exec_list;
283
this->state = copy_propagation_state::create(mem_ctx);
285
~ir_copy_propagation_elements_visitor()
287
ralloc_free(mem_ctx);
290
virtual ir_visitor_status visit(ir_dereference_variable *);
292
void handle_loop(ir_loop *, bool keep_acp);
293
virtual ir_visitor_status visit_enter(class ir_loop *);
294
virtual ir_visitor_status visit_enter(class ir_function_signature *);
295
virtual ir_visitor_status visit_leave(class ir_assignment *);
296
virtual ir_visitor_status visit_enter(class ir_call *);
297
virtual ir_visitor_status visit_enter(class ir_if *);
298
virtual ir_visitor_status visit_leave(class ir_swizzle *);
300
void handle_rvalue(ir_rvalue **rvalue);
302
void add_copy(ir_assignment *ir);
303
void kill(kill_entry *k);
304
void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all);
306
copy_propagation_state *state;
309
* List of kill_entry: The variables whose values were killed in this
318
/* Context for our local data structures. */
321
/* Context for allocating new shader nodes. */
322
void *shader_mem_ctx;
325
} /* unnamed namespace */
328
ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir)
330
if (this->in_assignee)
331
return visit_continue;
333
const acp_entry *entry = state->read(ir->var);
334
if (entry && entry->rhs_full) {
335
ir->var = (ir_variable *) entry->rhs_full;
339
return visit_continue;
343
ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
345
/* Treat entry into a function signature as a completely separate
346
* block. Any instructions at global scope will be shuffled into
347
* main() at link time, so they're irrelevant to us.
349
exec_list *orig_kills = this->kills;
350
bool orig_killed_all = this->killed_all;
352
this->kills = new(mem_ctx) exec_list;
353
this->killed_all = false;
355
copy_propagation_state *orig_state = state;
356
this->state = copy_propagation_state::create(mem_ctx);
358
visit_list_elements(this, &ir->body);
361
this->state = orig_state;
363
ralloc_free(this->kills);
364
this->kills = orig_kills;
365
this->killed_all = orig_killed_all;
367
return visit_continue_with_parent;
371
ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
373
ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
374
ir_variable *var = ir->lhs->variable_referenced();
378
if (lhs && var->type->is_vector())
379
k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
381
k = new(this->lin_ctx) kill_entry(var, ~0);
387
return visit_continue;
391
ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
393
/* Don't visit the values of swizzles since they are handled while
394
* visiting the swizzle itself.
396
return visit_continue;
400
* Replaces dereferences of ACP RHS variables with ACP LHS variables.
402
* This is where the actual copy propagation occurs. Note that the
403
* rewriting of ir_dereference means that the ir_dereference instance
404
* must not be shared by multiple IR operations!
407
ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
410
ir_dereference_variable *deref_var;
411
ir_variable *source[4] = {NULL, NULL, NULL, NULL};
412
int source_chan[4] = {0, 0, 0, 0};
414
bool noop_swizzle = true;
419
ir_swizzle *swizzle = (*ir)->as_swizzle();
421
deref_var = swizzle->val->as_dereference_variable();
425
swizzle_chan[0] = swizzle->mask.x;
426
swizzle_chan[1] = swizzle->mask.y;
427
swizzle_chan[2] = swizzle->mask.z;
428
swizzle_chan[3] = swizzle->mask.w;
429
chans = swizzle->type->vector_elements;
431
deref_var = (*ir)->as_dereference_variable();
439
chans = deref_var->type->vector_elements;
442
if (this->in_assignee)
445
ir_variable *var = deref_var->var;
447
/* Try to find ACP entries covering swizzle_chan[], hoping they're
448
* the same source variable.
451
const acp_entry *entry = state->read(var);
453
for (int c = 0; c < chans; c++) {
454
unsigned index = swizzle_chan[c];
455
ir_variable *src = entry->rhs_element[index];
459
source_chan[c] = entry->rhs_channel[index];
460
if (source_chan[c] != swizzle_chan[c])
461
noop_swizzle = false;
465
/* Make sure all channels are copying from the same source variable. */
468
for (int c = 1; c < chans; c++) {
469
if (source[c] != source[0])
474
shader_mem_ctx = ralloc_parent(deref_var);
476
/* Don't pointlessly replace the rvalue with itself (or a noop swizzle
477
* of itself, which would just be deleted by opt_noop_swizzle).
479
if (source[0] == var && noop_swizzle)
483
printf("Copy propagation from:\n");
487
deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
488
*ir = new(shader_mem_ctx) ir_swizzle(deref_var,
505
ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
507
/* Do copy propagation on call parameters, but skip any out params */
508
foreach_two_lists(formal_node, &ir->callee->parameters,
509
actual_node, &ir->actual_parameters) {
510
ir_variable *sig_param = (ir_variable *) formal_node;
511
ir_rvalue *ir = (ir_rvalue *) actual_node;
512
if (sig_param->data.mode != ir_var_function_out
513
&& sig_param->data.mode != ir_var_function_inout) {
518
if (!ir->callee->is_intrinsic()) {
520
this->killed_all = true;
522
if (ir->return_deref) {
523
kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0));
526
foreach_two_lists(formal_node, &ir->callee->parameters,
527
actual_node, &ir->actual_parameters) {
528
ir_variable *sig_param = (ir_variable *) formal_node;
529
if (sig_param->data.mode == ir_var_function_out ||
530
sig_param->data.mode == ir_var_function_inout) {
531
ir_rvalue *ir = (ir_rvalue *) actual_node;
532
ir_variable *var = ir->variable_referenced();
533
kill(new(this->lin_ctx) kill_entry(var, ~0));
538
return visit_continue_with_parent;
542
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all)
544
exec_list *orig_kills = this->kills;
545
bool orig_killed_all = this->killed_all;
548
this->killed_all = false;
550
/* Populate the initial acp with a copy of the original */
551
copy_propagation_state *orig_state = state;
552
this->state = orig_state->clone();
554
visit_list_elements(this, instructions);
557
this->state = orig_state;
559
*killed_all = this->killed_all;
560
this->kills = orig_kills;
561
this->killed_all = orig_killed_all;
565
ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
567
ir->condition->accept(this);
569
exec_list *new_kills = new(mem_ctx) exec_list;
570
bool then_killed_all = false;
571
bool else_killed_all = false;
573
handle_if_block(&ir->then_instructions, new_kills, &then_killed_all);
574
handle_if_block(&ir->else_instructions, new_kills, &else_killed_all);
576
if (then_killed_all || else_killed_all) {
580
foreach_in_list_safe(kill_entry, k, new_kills)
584
ralloc_free(new_kills);
586
/* handle_if_block() already descended into the children. */
587
return visit_continue_with_parent;
591
ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
593
exec_list *orig_kills = this->kills;
594
bool orig_killed_all = this->killed_all;
596
this->kills = new(mem_ctx) exec_list;
597
this->killed_all = false;
599
copy_propagation_state *orig_state = state;
602
/* Populate the initial acp with a copy of the original */
603
this->state = orig_state->clone();
605
this->state = copy_propagation_state::create(mem_ctx);
608
visit_list_elements(this, &ir->body_instructions);
611
this->state = orig_state;
613
if (this->killed_all)
614
this->state->erase_all();
616
exec_list *new_kills = this->kills;
617
this->kills = orig_kills;
618
this->killed_all = this->killed_all || orig_killed_all;
620
foreach_in_list_safe(kill_entry, k, new_kills) {
624
ralloc_free(new_kills);
628
ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
630
handle_loop(ir, false);
631
handle_loop(ir, true);
633
/* already descended into the children. */
634
return visit_continue_with_parent;
637
/* Remove any entries currently in the ACP for this kill. */
639
ir_copy_propagation_elements_visitor::kill(kill_entry *k)
641
state->erase(k->var, k->write_mask);
643
/* If we were on a list, remove ourselves before inserting */
647
this->kills->push_tail(k);
651
* Adds directly-copied channels between vector variables to the available
652
* copy propagation list.
655
ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
658
ir_variable *lhs_var = ir->whole_variable_written();
659
ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
661
if (lhs_var != NULL && rhs && rhs->var != NULL && lhs_var != rhs->var) {
662
if (lhs_var->data.mode == ir_var_shader_storage ||
663
lhs_var->data.mode == ir_var_shader_shared ||
664
rhs->var->data.mode == ir_var_shader_storage ||
665
rhs->var->data.mode == ir_var_shader_shared ||
666
lhs_var->data.precise != rhs->var->data.precise) {
669
state->write_full(lhs_var, rhs->var);
674
int orig_swizzle[4] = {0, 1, 2, 3};
677
ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
678
if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
681
if (lhs->var->data.mode == ir_var_shader_storage ||
682
lhs->var->data.mode == ir_var_shader_shared)
685
ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
687
ir_swizzle *swiz = ir->rhs->as_swizzle();
691
rhs = swiz->val->as_dereference_variable();
695
orig_swizzle[0] = swiz->mask.x;
696
orig_swizzle[1] = swiz->mask.y;
697
orig_swizzle[2] = swiz->mask.z;
698
orig_swizzle[3] = swiz->mask.w;
701
if (rhs->var->data.mode == ir_var_shader_storage ||
702
rhs->var->data.mode == ir_var_shader_shared)
705
/* Move the swizzle channels out to the positions they match in the
706
* destination. We don't want to have to rewrite the swizzle[]
707
* array every time we clear a bit of the write_mask.
710
for (int i = 0; i < 4; i++) {
711
if (ir->write_mask & (1 << i))
712
swizzle[i] = orig_swizzle[j++];
715
int write_mask = ir->write_mask;
716
if (lhs->var == rhs->var) {
717
/* If this is a copy from the variable to itself, then we need
718
* to be sure not to include the updated channels from this
719
* instruction in the set of new source channels to be
720
* copy-propagated from.
722
for (int i = 0; i < 4; i++) {
723
if (ir->write_mask & (1 << orig_swizzle[i]))
724
write_mask &= ~(1 << i);
728
if (lhs->var->data.precise != rhs->var->data.precise)
731
state->write_elements(lhs->var, rhs->var, write_mask, swizzle);
735
do_copy_propagation_elements(exec_list *instructions)
737
ir_copy_propagation_elements_visitor v;
739
visit_list_elements(&v, instructions);